81 lines
2.3 KiB
TypeScript
81 lines
2.3 KiB
TypeScript
|
|
import { config } from "./config";
|
||
|
|
|
||
|
|
let genaiModule: typeof import("@google/generative-ai") | null = null;
|
||
|
|
|
||
|
|
async function getClient() {
|
||
|
|
if (!config.geminiApiKey) {
|
||
|
|
throw new Error("GEMINI_API_KEY not configured");
|
||
|
|
}
|
||
|
|
if (!genaiModule) {
|
||
|
|
genaiModule = await import("@google/generative-ai");
|
||
|
|
}
|
||
|
|
return new genaiModule.GoogleGenerativeAI(config.geminiApiKey);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Send an image + prompt to Gemini vision.
|
||
|
|
*/
|
||
|
|
export async function geminiVision(prompt: string, imageBase64: string): Promise<string> {
|
||
|
|
const ai = await getClient();
|
||
|
|
const model = ai.getGenerativeModel({ model: config.geminiModel });
|
||
|
|
|
||
|
|
const result = await model.generateContent([
|
||
|
|
prompt,
|
||
|
|
{
|
||
|
|
inlineData: {
|
||
|
|
mimeType: "image/jpeg",
|
||
|
|
data: imageBase64,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
]);
|
||
|
|
|
||
|
|
return result.response.text();
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Identify ALL food/grocery products visible in a photo.
|
||
|
|
*/
|
||
|
|
export async function geminiIdentifyProducts(
|
||
|
|
imageBase64: string
|
||
|
|
): Promise<Record<string, unknown>[]> {
|
||
|
|
const prompt = `Identify ALL food and grocery products visible in this photo. \
|
||
|
|
There may be one product or many (e.g. a grocery haul, a shelf, a receipt).
|
||
|
|
|
||
|
|
Return ONLY a JSON array of objects. Each object must have:
|
||
|
|
- "name": product name (string)
|
||
|
|
- "brand": brand name if visible, otherwise "" (string). Produce typically has no brand.
|
||
|
|
- "category": one of "produce", "dairy", "meat", "seafood", "bakery", "snacks", "beverages", "frozen", "pantry", "condiments", "other" (string)
|
||
|
|
- "is_organic": "yes" or "no" based on visible labels (string)
|
||
|
|
|
||
|
|
Return ONLY the JSON array, no other text.`;
|
||
|
|
|
||
|
|
const raw = await geminiVision(prompt, imageBase64);
|
||
|
|
|
||
|
|
try {
|
||
|
|
const start = raw.indexOf("[");
|
||
|
|
const end = raw.lastIndexOf("]");
|
||
|
|
if (start !== -1 && end > start) {
|
||
|
|
const items = JSON.parse(raw.slice(start, end + 1));
|
||
|
|
return (items as Record<string, unknown>[]).filter(
|
||
|
|
(i) => typeof i === "object" && i !== null && i.name
|
||
|
|
);
|
||
|
|
}
|
||
|
|
} catch {
|
||
|
|
// try single object
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
const start = raw.indexOf("{");
|
||
|
|
const end = raw.lastIndexOf("}");
|
||
|
|
if (start !== -1 && end > start) {
|
||
|
|
const item = JSON.parse(raw.slice(start, end + 1)) as Record<string, unknown>;
|
||
|
|
if (item.name) return [item];
|
||
|
|
}
|
||
|
|
} catch {
|
||
|
|
// unparseable
|
||
|
|
}
|
||
|
|
|
||
|
|
console.warn(`Failed to parse Gemini product identification: ${raw.slice(0, 200)}`);
|
||
|
|
return [];
|
||
|
|
}
|