/**
 * Extract JSON arrays from raw model output texts, merge into single list.
 */
export function parseJsonItems(rawTexts: string[]): Record<string, unknown>[] {
  const allItems: Record<string, unknown>[] = [];

  for (const text of rawTexts) {
    // Try array first
    try {
      const start = text.indexOf("[");
      const end = text.lastIndexOf("]");
      if (start !== -1 && end > start) {
        const items = JSON.parse(text.slice(start, end + 1));
        if (Array.isArray(items)) {
          for (const item of items) {
            if (typeof item === "object" && item !== null) {
              allItems.push(item as Record<string, unknown>);
            }
          }
          continue;
        }
      }
    } catch {
      // fall through to single object
    }

    // Try single object
    try {
      const start = text.indexOf("{");
      const end = text.lastIndexOf("}");
      if (start !== -1 && end > start) {
        const item = JSON.parse(text.slice(start, end + 1));
        if (typeof item === "object" && item !== null && (item as Record<string, unknown>).name) {
          allItems.push(item as Record<string, unknown>);
        }
      }
    } catch {
      // unparseable
    }
  }

  return allItems;
}

function tokenSet(name: string): Set<string> {
  return new Set(name.toLowerCase().split(/\s+/));
}

/**
 * Remove near-duplicate items based on token overlap (>80% = duplicate).
 */
export function deduplicateItems(
  items: Record<string, unknown>[],
  key: string = "name"
): Record<string, unknown>[] {
  const unique: Record<string, unknown>[] = [];

  for (const item of items) {
    const name = String(item[key] || "").trim();
    if (!name) continue;

    const tokens = tokenSet(name);
    let isDup = false;

    for (const existing of unique) {
      const existingTokens = tokenSet(String(existing[key] || ""));
      if (tokens.size === 0 || existingTokens.size === 0) continue;

      const intersection = new Set([...tokens].filter((t) => existingTokens.has(t)));
      const overlap = intersection.size / Math.max(tokens.size, existingTokens.size);

      if (overlap > 0.8) {
        isDup = true;
        break;
      }
    }

    if (!isDup) {
      unique.push(item);
    }
  }

  return unique;
}