Initial commit: Vision scanner for shelf/pantry product extraction

This commit is contained in:
2026-03-29 21:58:07 -04:00
commit 5de44e7579
19 changed files with 3673 additions and 0 deletions

80
src/gemini.ts Normal file
View File

@@ -0,0 +1,80 @@
import { config } from "./config";
let genaiModule: typeof import("@google/generative-ai") | null = null;
async function getClient() {
if (!config.geminiApiKey) {
throw new Error("GEMINI_API_KEY not configured");
}
if (!genaiModule) {
genaiModule = await import("@google/generative-ai");
}
return new genaiModule.GoogleGenerativeAI(config.geminiApiKey);
}
/**
* Send an image + prompt to Gemini vision.
*/
export async function geminiVision(prompt: string, imageBase64: string): Promise<string> {
const ai = await getClient();
const model = ai.getGenerativeModel({ model: config.geminiModel });
const result = await model.generateContent([
prompt,
{
inlineData: {
mimeType: "image/jpeg",
data: imageBase64,
},
},
]);
return result.response.text();
}
/**
* Identify ALL food/grocery products visible in a photo.
*/
export async function geminiIdentifyProducts(
imageBase64: string
): Promise<Record<string, unknown>[]> {
const prompt = `Identify ALL food and grocery products visible in this photo. \
There may be one product or many (e.g. a grocery haul, a shelf, a receipt).
Return ONLY a JSON array of objects. Each object must have:
- "name": product name (string)
- "brand": brand name if visible, otherwise "" (string). Produce typically has no brand.
- "category": one of "produce", "dairy", "meat", "seafood", "bakery", "snacks", "beverages", "frozen", "pantry", "condiments", "other" (string)
- "is_organic": "yes" or "no" based on visible labels (string)
Return ONLY the JSON array, no other text.`;
const raw = await geminiVision(prompt, imageBase64);
try {
const start = raw.indexOf("[");
const end = raw.lastIndexOf("]");
if (start !== -1 && end > start) {
const items = JSON.parse(raw.slice(start, end + 1));
return (items as Record<string, unknown>[]).filter(
(i) => typeof i === "object" && i !== null && i.name
);
}
} catch {
// try single object
}
try {
const start = raw.indexOf("{");
const end = raw.lastIndexOf("}");
if (start !== -1 && end > start) {
const item = JSON.parse(raw.slice(start, end + 1)) as Record<string, unknown>;
if (item.name) return [item];
}
} catch {
// unparseable
}
console.warn(`Failed to parse Gemini product identification: ${raw.slice(0, 200)}`);
return [];
}