fix: use true Jaccard similarity in wordOverlap (intersection/union)
Replaces max(|A|,|B|) denominator with |A∪B| = |A|+|B|-intersection, which is the correct Jaccard formula and avoids inflating similarity when both name sets have significant unique words. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -396,9 +396,10 @@ function wordOverlap(a: string, b: string): number {
|
|||||||
const setA = new Set(a.split(' ').filter(Boolean));
|
const setA = new Set(a.split(' ').filter(Boolean));
|
||||||
const setB = new Set(b.split(' ').filter(Boolean));
|
const setB = new Set(b.split(' ').filter(Boolean));
|
||||||
if (setA.size === 0 || setB.size === 0) return 0;
|
if (setA.size === 0 || setB.size === 0) return 0;
|
||||||
let common = 0;
|
let intersection = 0;
|
||||||
for (const w of setA) if (setB.has(w)) common++;
|
for (const w of setA) if (setB.has(w)) intersection++;
|
||||||
return common / Math.max(setA.size, setB.size);
|
const union = setA.size + setB.size - intersection;
|
||||||
|
return intersection / union;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user