Initial commit: Vision scanner for shelf/pantry product extraction
This commit is contained in:
84
src/parsing.ts
Normal file
84
src/parsing.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* Extract JSON arrays from raw model output texts, merge into single list.
|
||||
*/
|
||||
export function parseJsonItems(rawTexts: string[]): Record<string, unknown>[] {
|
||||
const allItems: Record<string, unknown>[] = [];
|
||||
|
||||
for (const text of rawTexts) {
|
||||
// Try array first
|
||||
try {
|
||||
const start = text.indexOf("[");
|
||||
const end = text.lastIndexOf("]");
|
||||
if (start !== -1 && end > start) {
|
||||
const items = JSON.parse(text.slice(start, end + 1));
|
||||
if (Array.isArray(items)) {
|
||||
for (const item of items) {
|
||||
if (typeof item === "object" && item !== null) {
|
||||
allItems.push(item as Record<string, unknown>);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// fall through to single object
|
||||
}
|
||||
|
||||
// Try single object
|
||||
try {
|
||||
const start = text.indexOf("{");
|
||||
const end = text.lastIndexOf("}");
|
||||
if (start !== -1 && end > start) {
|
||||
const item = JSON.parse(text.slice(start, end + 1));
|
||||
if (typeof item === "object" && item !== null && (item as Record<string, unknown>).name) {
|
||||
allItems.push(item as Record<string, unknown>);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// unparseable
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
function tokenSet(name: string): Set<string> {
|
||||
return new Set(name.toLowerCase().split(/\s+/));
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove near-duplicate items based on token overlap (>80% = duplicate).
|
||||
*/
|
||||
export function deduplicateItems(
|
||||
items: Record<string, unknown>[],
|
||||
key: string = "name"
|
||||
): Record<string, unknown>[] {
|
||||
const unique: Record<string, unknown>[] = [];
|
||||
|
||||
for (const item of items) {
|
||||
const name = String(item[key] || "").trim();
|
||||
if (!name) continue;
|
||||
|
||||
const tokens = tokenSet(name);
|
||||
let isDup = false;
|
||||
|
||||
for (const existing of unique) {
|
||||
const existingTokens = tokenSet(String(existing[key] || ""));
|
||||
if (tokens.size === 0 || existingTokens.size === 0) continue;
|
||||
|
||||
const intersection = new Set([...tokens].filter((t) => existingTokens.has(t)));
|
||||
const overlap = intersection.size / Math.max(tokens.size, existingTokens.size);
|
||||
|
||||
if (overlap > 0.8) {
|
||||
isDup = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isDup) {
|
||||
unique.push(item);
|
||||
}
|
||||
}
|
||||
|
||||
return unique;
|
||||
}
|
||||
Reference in New Issue
Block a user