feat: add HK parish parser functions (Tasks 2-6) with tests

Implements entry splitter, name extractor, field extractor, time normalizer,
schedule line parser, and weekday day-prefix parser. All 26 tests pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-04-03 16:06:26 -04:00
parent 9aea12f4b0
commit 328d146201
2 changed files with 447 additions and 0 deletions

View File

@@ -0,0 +1,188 @@
import { test } from 'node:test';
import assert from 'node:assert/strict';
import {
splitEntries,
extractNames,
extractFields,
normalizeTime,
parseScheduleLine,
parseWeekdayLine,
} from './import-hk-parishes.js';
// ─── Task 2: Entry splitter and name extractor ────────────────────────────────
test('splitEntries splits on Path/Close boundary', () => {
const raw = `HONG KONG CHURCHES\n\nParish A\nChurch A\nPath\nClose\nAddress\n1 Main St\n\nParish B\nChurch B\nPath\nClose\nAddress\n2 Side St\n`;
const entries = splitEntries(raw);
assert.equal(entries.length, 2);
assert.ok(entries[0].includes('Church A'));
assert.ok(entries[1].includes('Church B'));
});
test('extractNames returns locationName and parishName', () => {
const pre = `Holy Cross Parish\nHOLY CROSS CHURCH`;
const result = extractNames(pre);
assert.equal(result.locationName, 'HOLY CROSS CHURCH');
assert.equal(result.parishName, 'Holy Cross Parish');
});
test('extractNames strips Share and leading-space artifacts', () => {
const pre = `Share\n Carmelite Monastery\nSt. Anne's Parish\nCarmelite Monastery`;
const result = extractNames(pre);
assert.equal(result.locationName, 'Carmelite Monastery');
assert.equal(result.parishName, "St. Anne's Parish");
});
test('extractNames handles single name line', () => {
const pre = `Cathedral Parish`;
const result = extractNames(pre);
assert.equal(result.locationName, 'Cathedral Parish');
assert.equal(result.parishName, null);
});
// ─── Task 3: Field extractor ──────────────────────────────────────────────────
test('extractFields parses address, phone, email', () => {
const body = `Address\n1 Holy Cross Path, Shau Kei Wan, Hong Kong\n\nPhone\n8522560-1823\n\nFax\n8522535-8246\n\nEmail\nholycrosshk@gmail.com\n\nWebsite\nClick Here\n\nMass Time\n`;
const f = extractFields(body);
assert.equal(f.address, '1 Holy Cross Path, Shau Kei Wan, Hong Kong');
assert.equal(f.phone, '(852)2560-1823');
assert.equal(f.email, 'holycrosshk@gmail.com');
});
test('extractFields handles missing fields gracefully', () => {
const body = `Address\nSalesian School, 16 Chai Wan Road, Hong Kong.\n\nMass Time\n`;
const f = extractFields(body);
assert.equal(f.address, 'Salesian School, 16 Chai Wan Road, Hong Kong.');
assert.equal(f.phone, null);
assert.equal(f.email, null);
});
test('extractFields strips full-width parens from phone', () => {
const body = `Phone\n8522819-5777, 2819-5845\n\n`;
const f = extractFields(body);
assert.equal(f.phone, '(852)2819-5777, 2819-5845');
});
// ─── Task 4: Time normalizer ──────────────────────────────────────────────────
test('normalizeTime handles am/pm with spaces', () => {
assert.equal(normalizeTime('8:00am'), '08:00');
assert.equal(normalizeTime('11:30 am'), '11:30');
assert.equal(normalizeTime('6:00pm'), '18:00');
assert.equal(normalizeTime('6:30 pm'), '18:30');
});
test('normalizeTime handles a.m./p.m. format', () => {
assert.equal(normalizeTime('7:00 a.m.'), '07:00');
assert.equal(normalizeTime('7:45 a.m.'), '07:45');
assert.equal(normalizeTime('6:00 p.m.'), '18:00');
});
test('normalizeTime handles noon', () => {
assert.equal(normalizeTime('12:00 noon'), '12:00');
assert.equal(normalizeTime('12:30 pm'), '12:30');
});
test('normalizeTime handles 12:00am as midnight', () => {
assert.equal(normalizeTime('12:00am'), '00:00');
});
test('normalizeTime returns null for unrecognised input', () => {
assert.equal(normalizeTime('Monday'), null);
assert.equal(normalizeTime(''), null);
});
// ─── Task 5: Schedule line parser ────────────────────────────────────────────
test('parseScheduleLine parses single time with language', () => {
const results = parseScheduleLine('9:30am (English)', 0);
assert.equal(results.length, 1);
assert.deepEqual(results[0], { dayOfWeek: 0, time: '09:30', language: 'English', notes: null });
});
test('parseScheduleLine parses multiple comma-separated times', () => {
const results = parseScheduleLine('8:00am,10:30 am (Cantonese)', 0);
assert.equal(results.length, 2);
assert.equal(results[0].time, '08:00');
assert.equal(results[1].time, '10:30');
assert.equal(results[1].language, 'Cantonese');
});
test('parseScheduleLine handles missing closing paren', () => {
const results = parseScheduleLine('9:30 am (Cantonese', 0);
assert.equal(results[0].language, 'Cantonese');
});
test('parseScheduleLine defaults language to English when not specified', () => {
const results = parseScheduleLine('8:00am', 0);
assert.equal(results[0].language, 'English');
});
test('parseScheduleLine stores embedded note text', () => {
const results = parseScheduleLine('9:00 am Sunday School & Family Mass,11:30am (English)', 0);
assert.equal(results.length, 2);
assert.equal(results[0].time, '09:00');
assert.equal(results[0].notes, 'Sunday School & Family Mass');
});
test('parseScheduleLine handles Saturday anticipated format variations', () => {
const results = parseScheduleLine('Saturday 3:45 pm,Saturday 6:30 pm (Cantonese)', 6);
assert.equal(results.length, 2);
assert.equal(results[0].time, '15:45');
assert.equal(results[1].time, '18:30');
});
test('parseScheduleLine handles "on Saturday" suffix format', () => {
const results = parseScheduleLine('6:00pm on Saturday (Cantonese)', 6);
assert.equal(results.length, 1);
assert.equal(results[0].time, '18:00');
assert.equal(results[0].language, 'Cantonese');
});
test('parseScheduleLine handles conditional prefix as notes', () => {
const results = parseScheduleLine('5th Sunday of the month: 7:15 am (Tagalog)', 0);
assert.equal(results.length, 1);
assert.equal(results[0].time, '07:15');
assert.equal(results[0].language, 'Tagalog');
assert.ok(results[0].notes?.includes('5th Sunday'));
});
// ─── Task 6: Weekday day-prefix parser ───────────────────────────────────────
test('parseWeekdayLine no prefix = all weekdays Mon-Fri', () => {
const results = parseWeekdayLine('7:15 am (Cantonese)');
assert.equal(results.length, 5);
assert.ok(results.every(r => r.time === '07:15'));
assert.ok(results.every(r => r.language === 'Cantonese'));
assert.deepEqual(results.map(r => r.dayOfWeek), [1, 2, 3, 4, 5]);
});
test('parseWeekdayLine abbreviation list', () => {
const results = parseWeekdayLine('Mon., Tue., Thur. 8:00 a.m. (Cantonese)');
assert.equal(results.length, 3);
assert.deepEqual(results.map(r => r.dayOfWeek).sort(), [1, 2, 4]);
});
test('parseWeekdayLine abbreviation range Mon. to Sat.', () => {
const results = parseWeekdayLine('Mon. to Sat. 9:15 am (English)');
assert.equal(results.length, 6);
assert.deepEqual(results.map(r => r.dayOfWeek).sort(), [1, 2, 3, 4, 5, 6]);
});
test('parseWeekdayLine full-word range Monday to Friday', () => {
const results = parseWeekdayLine('Monday to Friday: 12:00 noon (English)');
assert.equal(results.length, 5);
assert.ok(results.every(r => r.time === '12:00'));
});
test('parseWeekdayLine ampersand separator', () => {
const results = parseWeekdayLine('Tue., Thur. & Sat. 9:45 a.m. (Cantonese)');
assert.equal(results.length, 3);
assert.deepEqual(results.map(r => r.dayOfWeek).sort(), [2, 4, 6]);
});
test('parseWeekdayLine multiple time groups on one line', () => {
const results = parseWeekdayLine('Monday to Saturday: 7:45 am,Monday to Friday: 12:00 noon,Monday to Friday: 6:00 pm (English)');
assert.equal(results.length, 16);
});