/** * Extract JSON arrays from raw model output texts, merge into single list. */ export function parseJsonItems(rawTexts: string[]): Record[] { const allItems: Record[] = []; for (const text of rawTexts) { // Try array first try { const start = text.indexOf("["); const end = text.lastIndexOf("]"); if (start !== -1 && end > start) { const items = JSON.parse(text.slice(start, end + 1)); if (Array.isArray(items)) { for (const item of items) { if (typeof item === "object" && item !== null) { allItems.push(item as Record); } } continue; } } } catch { // fall through to single object } // Try single object try { const start = text.indexOf("{"); const end = text.lastIndexOf("}"); if (start !== -1 && end > start) { const item = JSON.parse(text.slice(start, end + 1)); if (typeof item === "object" && item !== null && (item as Record).name) { allItems.push(item as Record); } } } catch { // unparseable } } return allItems; } function tokenSet(name: string): Set { return new Set(name.toLowerCase().split(/\s+/)); } /** * Remove near-duplicate items based on token overlap (>80% = duplicate). */ export function deduplicateItems( items: Record[], key: string = "name" ): Record[] { const unique: Record[] = []; for (const item of items) { const name = String(item[key] || "").trim(); if (!name) continue; const tokens = tokenSet(name); let isDup = false; for (const existing of unique) { const existingTokens = tokenSet(String(existing[key] || "")); if (tokens.size === 0 || existingTokens.size === 0) continue; const intersection = new Set([...tokens].filter((t) => existingTokens.has(t))); const overlap = intersection.size / Math.max(tokens.size, existingTokens.size); if (overlap > 0.8) { isDup = true; break; } } if (!isDup) { unique.push(item); } } return unique; }