feat: add name normalizer and church matcher for HK import
normalizeName strips noise words (church/parish/chapel/etc), accents, and punctuation for robust name comparison. findMatch uses word-overlap Jaccard score (threshold 0.4) with address-prefix fallback for Chinese- named churches where English name overlap may be low. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,8 @@ import {
|
||||
parseScheduleLine,
|
||||
parseWeekdayLine,
|
||||
parseEntry,
|
||||
normalizeName,
|
||||
findMatch,
|
||||
} from './import-hk-parishes.js';
|
||||
|
||||
// ─── Task 2: Entry splitter and name extractor ────────────────────────────────
|
||||
@@ -208,3 +210,35 @@ test('parseEntry extracts names, fields, and schedules from a full entry', () =>
|
||||
const weekday = entry.schedules.filter(s => s.dayOfWeek >= 1 && s.dayOfWeek <= 5);
|
||||
assert.equal(weekday.length, 5);
|
||||
});
|
||||
|
||||
// ─── Task 8: Name normalizer + matcher ───────────────────────────────────────
|
||||
|
||||
test('normalizeName strips noise words and lowercases', () => {
|
||||
assert.equal(normalizeName('HOLY CROSS CHURCH'), 'holy cross');
|
||||
assert.equal(normalizeName('Our Lady Of Mount Carmel Church'), 'mount carmel');
|
||||
assert.equal(normalizeName("St. Joseph's Parish"), 'joseph');
|
||||
assert.equal(normalizeName('Salesian Mass Centre'), 'salesian');
|
||||
});
|
||||
|
||||
test('findMatch matches by name overlap', () => {
|
||||
const existing = [
|
||||
{ id: '1', name: 'Holy Cross (Sai Wan Ho)', address: '1 Holy Cross Path', phone: null, email: null },
|
||||
{ id: '2', name: 'St Joseph (Central)', address: '37 Garden Road', phone: null, email: null },
|
||||
];
|
||||
assert.equal(findMatch('HOLY CROSS CHURCH', '1 Holy Cross Path', existing)?.id, '1');
|
||||
assert.equal(findMatch("St. Joseph's Church", '37 Garden Road', existing)?.id, '2');
|
||||
});
|
||||
|
||||
test('findMatch falls back to address prefix match', () => {
|
||||
const existing = [
|
||||
{ id: '3', name: '聖母聖衣堂 (Our Lady of Mount Carmel Wanchai)', address: 'No.1, Star Street', phone: null, email: null },
|
||||
];
|
||||
assert.equal(findMatch('Our Lady Of Mount Carmel Church', 'No.1, Star Street, Wan Chai', existing)?.id, '3');
|
||||
});
|
||||
|
||||
test('findMatch returns null for no match', () => {
|
||||
const existing = [
|
||||
{ id: '1', name: 'Holy Cross (Sai Wan Ho)', address: '1 Holy Cross Path', phone: null, email: null },
|
||||
];
|
||||
assert.equal(findMatch('Salesian Mass Centre', 'Salesian School, 16 Chai Wan Road', existing), null);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user