Initial commit: Vision scanner for shelf/pantry product extraction
This commit is contained in:
91
src/pantry.ts
Normal file
91
src/pantry.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
import { tileImage, scanTiles, saveScanImage } from "./tiling";
|
||||
import { parseJsonItems, deduplicateItems } from "./parsing";
|
||||
import { matchToKnownProducts } from "./matching";
|
||||
import { geminiIdentifyProducts } from "./gemini";
|
||||
|
||||
const PANTRY_PROMPT = `Identify ALL food and grocery products visible in this photo of a pantry, fridge, or kitchen.
|
||||
For each product, extract:
|
||||
- Product name (as shown on the label)
|
||||
- Brand (if visible)
|
||||
- Category (produce/dairy/meat/seafood/bakery/snacks/beverages/frozen/pantry/condiments/other)
|
||||
- Approximate quantity (e.g. "2 cans", "1 bottle", "half gallon")
|
||||
|
||||
Return ONLY a JSON array of objects with keys: "name", "brand", "category", "quantity_desc"
|
||||
Example: [{"name": "Greek Yogurt", "brand": "Fage", "category": "dairy", "quantity_desc": "2 containers"}]
|
||||
Return ONLY the JSON array, no other text.`;
|
||||
|
||||
const MIN_LOCAL_RESULTS = 2;
|
||||
|
||||
interface PantryItem extends Record<string, unknown> {
|
||||
name: string;
|
||||
brand: string;
|
||||
category: string;
|
||||
quantity_desc: string;
|
||||
}
|
||||
|
||||
function cleanPantryItems(items: Record<string, unknown>[]): PantryItem[] {
|
||||
const cleaned: PantryItem[] = [];
|
||||
|
||||
for (const item of items) {
|
||||
if (typeof item !== "object" || item === null) continue;
|
||||
const name = String(item.name || "").trim();
|
||||
if (!name) continue;
|
||||
|
||||
cleaned.push({
|
||||
name,
|
||||
brand: String(item.brand || "").trim(),
|
||||
category: String(item.category || "other").trim().toLowerCase(),
|
||||
quantity_desc: String(item.quantity_desc || "1").trim(),
|
||||
});
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
export async function scanPantryPhoto(
|
||||
imageBuffer: Buffer
|
||||
): Promise<Record<string, unknown>> {
|
||||
const { scanId } = saveScanImage(imageBuffer, "pantry");
|
||||
let modelUsed = "local";
|
||||
|
||||
// Step 1: Tile and scan with local model
|
||||
const tiles = await tileImage(imageBuffer);
|
||||
const rawTexts = await scanTiles(tiles, PANTRY_PROMPT);
|
||||
let items: Record<string, unknown>[] = cleanPantryItems(parseJsonItems(rawTexts));
|
||||
items = deduplicateItems(items);
|
||||
|
||||
// Step 2: Gemini fallback if too few results
|
||||
if (items.length < MIN_LOCAL_RESULTS) {
|
||||
console.log(`Local model found ${items.length} pantry items, falling back to Gemini`);
|
||||
try {
|
||||
const fullBase64 = imageBuffer.toString("base64");
|
||||
const geminiItems = await geminiIdentifyProducts(fullBase64);
|
||||
const normalized: PantryItem[] = geminiItems.map((gi) => ({
|
||||
name: String(gi.name || ""),
|
||||
brand: String(gi.brand || ""),
|
||||
category: String(gi.category || "other"),
|
||||
quantity_desc: "1",
|
||||
}));
|
||||
if (normalized.length > items.length) {
|
||||
items = normalized;
|
||||
modelUsed = "gemini";
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Gemini fallback failed for pantry scan:", err instanceof Error ? err.message : err);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Match against known products (ChromaDB only)
|
||||
const matched = await matchToKnownProducts(items);
|
||||
|
||||
// Add index
|
||||
const indexed = matched.map((item, i) => ({ ...item, index: i }));
|
||||
|
||||
console.log(`Pantry scan ${scanId}: ${indexed.length} products via ${modelUsed}`);
|
||||
return {
|
||||
scan_id: scanId,
|
||||
items: indexed,
|
||||
total_found: indexed.length,
|
||||
model_used: modelUsed,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user