Initial commit: Vision scanner for shelf/pantry product extraction
This commit is contained in:
80
src/gemini.ts
Normal file
80
src/gemini.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import { config } from "./config";
|
||||
|
||||
let genaiModule: typeof import("@google/generative-ai") | null = null;
|
||||
|
||||
async function getClient() {
|
||||
if (!config.geminiApiKey) {
|
||||
throw new Error("GEMINI_API_KEY not configured");
|
||||
}
|
||||
if (!genaiModule) {
|
||||
genaiModule = await import("@google/generative-ai");
|
||||
}
|
||||
return new genaiModule.GoogleGenerativeAI(config.geminiApiKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send an image + prompt to Gemini vision.
|
||||
*/
|
||||
export async function geminiVision(prompt: string, imageBase64: string): Promise<string> {
|
||||
const ai = await getClient();
|
||||
const model = ai.getGenerativeModel({ model: config.geminiModel });
|
||||
|
||||
const result = await model.generateContent([
|
||||
prompt,
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: "image/jpeg",
|
||||
data: imageBase64,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
return result.response.text();
|
||||
}
|
||||
|
||||
/**
|
||||
* Identify ALL food/grocery products visible in a photo.
|
||||
*/
|
||||
export async function geminiIdentifyProducts(
|
||||
imageBase64: string
|
||||
): Promise<Record<string, unknown>[]> {
|
||||
const prompt = `Identify ALL food and grocery products visible in this photo. \
|
||||
There may be one product or many (e.g. a grocery haul, a shelf, a receipt).
|
||||
|
||||
Return ONLY a JSON array of objects. Each object must have:
|
||||
- "name": product name (string)
|
||||
- "brand": brand name if visible, otherwise "" (string). Produce typically has no brand.
|
||||
- "category": one of "produce", "dairy", "meat", "seafood", "bakery", "snacks", "beverages", "frozen", "pantry", "condiments", "other" (string)
|
||||
- "is_organic": "yes" or "no" based on visible labels (string)
|
||||
|
||||
Return ONLY the JSON array, no other text.`;
|
||||
|
||||
const raw = await geminiVision(prompt, imageBase64);
|
||||
|
||||
try {
|
||||
const start = raw.indexOf("[");
|
||||
const end = raw.lastIndexOf("]");
|
||||
if (start !== -1 && end > start) {
|
||||
const items = JSON.parse(raw.slice(start, end + 1));
|
||||
return (items as Record<string, unknown>[]).filter(
|
||||
(i) => typeof i === "object" && i !== null && i.name
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
// try single object
|
||||
}
|
||||
|
||||
try {
|
||||
const start = raw.indexOf("{");
|
||||
const end = raw.lastIndexOf("}");
|
||||
if (start !== -1 && end > start) {
|
||||
const item = JSON.parse(raw.slice(start, end + 1)) as Record<string, unknown>;
|
||||
if (item.name) return [item];
|
||||
}
|
||||
} catch {
|
||||
// unparseable
|
||||
}
|
||||
|
||||
console.warn(`Failed to parse Gemini product identification: ${raw.slice(0, 200)}`);
|
||||
return [];
|
||||
}
|
||||
Reference in New Issue
Block a user