Initial commit: Vision scanner for shelf/pantry product extraction
This commit is contained in:
100
src/tiling.ts
Normal file
100
src/tiling.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import sharp from "sharp";
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import * as crypto from "crypto";
|
||||
import { config } from "./config";
|
||||
import { callVision } from "./vision";
|
||||
|
||||
export interface TileResult {
|
||||
buffer: Buffer;
|
||||
base64: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split image into grid tiles with overlap, returning JPEG buffers.
|
||||
*/
|
||||
export async function tileImage(
|
||||
imageBuffer: Buffer,
|
||||
grid: [number, number] = [2, 2],
|
||||
overlap: number = 0.10
|
||||
): Promise<TileResult[]> {
|
||||
const metadata = await sharp(imageBuffer).metadata();
|
||||
const w = metadata.width || 0;
|
||||
const h = metadata.height || 0;
|
||||
|
||||
if (w === 0 || h === 0) return [];
|
||||
|
||||
const [cols, rows] = grid;
|
||||
const tileW = Math.floor(w / cols);
|
||||
const tileH = Math.floor(h / rows);
|
||||
const overlapX = Math.floor(tileW * overlap);
|
||||
const overlapY = Math.floor(tileH * overlap);
|
||||
|
||||
const tilePromises: Promise<TileResult>[] = [];
|
||||
|
||||
for (let row = 0; row < rows; row++) {
|
||||
for (let col = 0; col < cols; col++) {
|
||||
const x1 = Math.max(0, col * tileW - overlapX);
|
||||
const y1 = Math.max(0, row * tileH - overlapY);
|
||||
const x2 = Math.min(w, (col + 1) * tileW + overlapX);
|
||||
const y2 = Math.min(h, (row + 1) * tileH + overlapY);
|
||||
|
||||
tilePromises.push(
|
||||
sharp(imageBuffer)
|
||||
.extract({ left: x1, top: y1, width: x2 - x1, height: y2 - y1 })
|
||||
.jpeg({ quality: 85 })
|
||||
.toBuffer()
|
||||
.then((buffer) => ({
|
||||
buffer,
|
||||
base64: buffer.toString("base64"),
|
||||
}))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return Promise.all(tilePromises);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send all tiles to vision model in parallel (throttled), return raw text per tile.
|
||||
*/
|
||||
export async function scanTiles(
|
||||
tiles: TileResult[],
|
||||
prompt: string
|
||||
): Promise<string[]> {
|
||||
const maxConcurrent = config.maxConcurrentTiles;
|
||||
const results: string[] = [];
|
||||
|
||||
// Process in batches to throttle GPU usage
|
||||
for (let i = 0; i < tiles.length; i += maxConcurrent) {
|
||||
const batch = tiles.slice(i, i + maxConcurrent);
|
||||
const batchResults = await Promise.all(
|
||||
batch.map(async (tile) => {
|
||||
try {
|
||||
return await callVision(prompt, tile.base64);
|
||||
} catch (err) {
|
||||
console.warn("Vision tile scan failed:", err instanceof Error ? err.message : err);
|
||||
return "";
|
||||
}
|
||||
})
|
||||
);
|
||||
results.push(...batchResults);
|
||||
}
|
||||
|
||||
return results.filter((r) => r.trim().length > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save uploaded image to disk. Returns scan_id and file_path.
|
||||
*/
|
||||
export function saveScanImage(
|
||||
imageBuffer: Buffer,
|
||||
scanType: string
|
||||
): { scanId: string; filePath: string } {
|
||||
const scanId = crypto.randomUUID().slice(0, 12);
|
||||
const dirPath = path.join(config.uploadDir, scanType);
|
||||
fs.mkdirSync(dirPath, { recursive: true });
|
||||
const filePath = path.join(dirPath, `${scanId}.jpg`);
|
||||
fs.writeFileSync(filePath, imageBuffer);
|
||||
return { scanId, filePath };
|
||||
}
|
||||
Reference in New Issue
Block a user