fix: use true Jaccard similarity in wordOverlap (intersection/union)

Replaces max(|A|,|B|) denominator with |A∪B| = |A|+|B|-intersection,
which is the correct Jaccard formula and avoids inflating similarity
when both name sets have significant unique words.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-04-03 16:25:24 -04:00
parent 3ebbc3732f
commit 8075072c24

View File

@@ -396,9 +396,10 @@ function wordOverlap(a: string, b: string): number {
const setA = new Set(a.split(' ').filter(Boolean));
const setB = new Set(b.split(' ').filter(Boolean));
if (setA.size === 0 || setB.size === 0) return 0;
let common = 0;
for (const w of setA) if (setB.has(w)) common++;
return common / Math.max(setA.size, setB.size);
let intersection = 0;
for (const w of setA) if (setB.has(w)) intersection++;
const union = setA.size + setB.size - intersection;
return intersection / union;
}
/**