101 lines
2.8 KiB
TypeScript
101 lines
2.8 KiB
TypeScript
|
|
import sharp from "sharp";
|
||
|
|
import * as fs from "fs";
|
||
|
|
import * as path from "path";
|
||
|
|
import * as crypto from "crypto";
|
||
|
|
import { config } from "./config";
|
||
|
|
import { callVision } from "./vision";
|
||
|
|
|
||
|
|
export interface TileResult {
|
||
|
|
buffer: Buffer;
|
||
|
|
base64: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Split image into grid tiles with overlap, returning JPEG buffers.
|
||
|
|
*/
|
||
|
|
export async function tileImage(
|
||
|
|
imageBuffer: Buffer,
|
||
|
|
grid: [number, number] = [2, 2],
|
||
|
|
overlap: number = 0.10
|
||
|
|
): Promise<TileResult[]> {
|
||
|
|
const metadata = await sharp(imageBuffer).metadata();
|
||
|
|
const w = metadata.width || 0;
|
||
|
|
const h = metadata.height || 0;
|
||
|
|
|
||
|
|
if (w === 0 || h === 0) return [];
|
||
|
|
|
||
|
|
const [cols, rows] = grid;
|
||
|
|
const tileW = Math.floor(w / cols);
|
||
|
|
const tileH = Math.floor(h / rows);
|
||
|
|
const overlapX = Math.floor(tileW * overlap);
|
||
|
|
const overlapY = Math.floor(tileH * overlap);
|
||
|
|
|
||
|
|
const tilePromises: Promise<TileResult>[] = [];
|
||
|
|
|
||
|
|
for (let row = 0; row < rows; row++) {
|
||
|
|
for (let col = 0; col < cols; col++) {
|
||
|
|
const x1 = Math.max(0, col * tileW - overlapX);
|
||
|
|
const y1 = Math.max(0, row * tileH - overlapY);
|
||
|
|
const x2 = Math.min(w, (col + 1) * tileW + overlapX);
|
||
|
|
const y2 = Math.min(h, (row + 1) * tileH + overlapY);
|
||
|
|
|
||
|
|
tilePromises.push(
|
||
|
|
sharp(imageBuffer)
|
||
|
|
.extract({ left: x1, top: y1, width: x2 - x1, height: y2 - y1 })
|
||
|
|
.jpeg({ quality: 85 })
|
||
|
|
.toBuffer()
|
||
|
|
.then((buffer) => ({
|
||
|
|
buffer,
|
||
|
|
base64: buffer.toString("base64"),
|
||
|
|
}))
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return Promise.all(tilePromises);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Send all tiles to vision model in parallel (throttled), return raw text per tile.
|
||
|
|
*/
|
||
|
|
export async function scanTiles(
|
||
|
|
tiles: TileResult[],
|
||
|
|
prompt: string
|
||
|
|
): Promise<string[]> {
|
||
|
|
const maxConcurrent = config.maxConcurrentTiles;
|
||
|
|
const results: string[] = [];
|
||
|
|
|
||
|
|
// Process in batches to throttle GPU usage
|
||
|
|
for (let i = 0; i < tiles.length; i += maxConcurrent) {
|
||
|
|
const batch = tiles.slice(i, i + maxConcurrent);
|
||
|
|
const batchResults = await Promise.all(
|
||
|
|
batch.map(async (tile) => {
|
||
|
|
try {
|
||
|
|
return await callVision(prompt, tile.base64);
|
||
|
|
} catch (err) {
|
||
|
|
console.warn("Vision tile scan failed:", err instanceof Error ? err.message : err);
|
||
|
|
return "";
|
||
|
|
}
|
||
|
|
})
|
||
|
|
);
|
||
|
|
results.push(...batchResults);
|
||
|
|
}
|
||
|
|
|
||
|
|
return results.filter((r) => r.trim().length > 0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Save uploaded image to disk. Returns scan_id and file_path.
|
||
|
|
*/
|
||
|
|
export function saveScanImage(
|
||
|
|
imageBuffer: Buffer,
|
||
|
|
scanType: string
|
||
|
|
): { scanId: string; filePath: string } {
|
||
|
|
const scanId = crypto.randomUUID().slice(0, 12);
|
||
|
|
const dirPath = path.join(config.uploadDir, scanType);
|
||
|
|
fs.mkdirSync(dirPath, { recursive: true });
|
||
|
|
const filePath = path.join(dirPath, `${scanId}.jpg`);
|
||
|
|
fs.writeFileSync(filePath, imageBuffer);
|
||
|
|
return { scanId, filePath };
|
||
|
|
}
|