Initial commit: Vision scanner for shelf/pantry product extraction

This commit is contained in:
2026-03-29 21:58:07 -04:00
commit 5de44e7579
19 changed files with 3673 additions and 0 deletions

84
src/parsing.ts Normal file
View File

@@ -0,0 +1,84 @@
/**
* Extract JSON arrays from raw model output texts, merge into single list.
*/
export function parseJsonItems(rawTexts: string[]): Record<string, unknown>[] {
const allItems: Record<string, unknown>[] = [];
for (const text of rawTexts) {
// Try array first
try {
const start = text.indexOf("[");
const end = text.lastIndexOf("]");
if (start !== -1 && end > start) {
const items = JSON.parse(text.slice(start, end + 1));
if (Array.isArray(items)) {
for (const item of items) {
if (typeof item === "object" && item !== null) {
allItems.push(item as Record<string, unknown>);
}
}
continue;
}
}
} catch {
// fall through to single object
}
// Try single object
try {
const start = text.indexOf("{");
const end = text.lastIndexOf("}");
if (start !== -1 && end > start) {
const item = JSON.parse(text.slice(start, end + 1));
if (typeof item === "object" && item !== null && (item as Record<string, unknown>).name) {
allItems.push(item as Record<string, unknown>);
}
}
} catch {
// unparseable
}
}
return allItems;
}
function tokenSet(name: string): Set<string> {
return new Set(name.toLowerCase().split(/\s+/));
}
/**
* Remove near-duplicate items based on token overlap (>80% = duplicate).
*/
export function deduplicateItems(
items: Record<string, unknown>[],
key: string = "name"
): Record<string, unknown>[] {
const unique: Record<string, unknown>[] = [];
for (const item of items) {
const name = String(item[key] || "").trim();
if (!name) continue;
const tokens = tokenSet(name);
let isDup = false;
for (const existing of unique) {
const existingTokens = tokenSet(String(existing[key] || ""));
if (tokens.size === 0 || existingTokens.size === 0) continue;
const intersection = new Set([...tokens].filter((t) => existingTokens.has(t)));
const overlap = intersection.size / Math.max(tokens.size, existingTokens.size);
if (overlap > 0.8) {
isDup = true;
break;
}
}
if (!isDup) {
unique.push(item);
}
}
return unique;
}