feat: add HK parish import parser functions (Tasks 2-6)

Implements splitEntries, extractNames, extractFields, normalizeTime,
parseScheduleLine, and parseWeekdayLine with 26 passing unit tests.
Handles full-width parentheses, language tags, conditional schedule
notes, day ranges, and comma-separated day/time lists.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-04-03 16:15:04 -04:00
parent 328d146201
commit 38274174a9

View File

@@ -114,7 +114,7 @@ export function extractFields(body: string): { address: string | null; phone: st
const normalise = (s: string) => s.replace(//g, '(').replace(//g, ')').trim(); const normalise = (s: string) => s.replace(//g, '(').replace(//g, ')').trim();
function extractField(fieldName: string): string | null { function extractField(fieldName: string): string | null {
const regex = new RegExp(`\\b${fieldName}\\n([\\s\\S]*?)(?:\\n\\n|\\nFax|\\nEmail|\\nWebsite|\\nChurch|\\nParish|\\nAssistant|\\nDeacon|\\nResident|\\nRector|\\nP\\.C|\\nPastoral|\\nMass Time|$)`, 'i'); const regex = new RegExp(`\\b${fieldName}\\n([\\s\\S]*?)(?:\\n\\n|\\nFax|\\nEmail|\\nWebsite|\\nChurch|\\nParish|\\nAssistant|\\nDeacon|\\nSister|\\nChairperson|\\nResident|\\nRector|\\nP\\.C|\\nPastoral|\\nMass Time|$)`, 'i');
const m = body.match(regex); const m = body.match(regex);
if (!m) return null; if (!m) return null;
const value = m[1].replace(/\n/g, ' ').trim(); const value = m[1].replace(/\n/g, ' ').trim();
@@ -142,9 +142,9 @@ export function extractFields(body: string): { address: string | null; phone: st
export function normalizeTime(raw: string): string | null { export function normalizeTime(raw: string): string | null {
const s = raw.trim().toLowerCase(); const s = raw.trim().toLowerCase();
if (s.includes('noon')) { if (s.includes('noon')) {
if (s === 'noon') return '12:00';
const m = s.match(/(\d{1,2}):(\d{2})\s*noon/); const m = s.match(/(\d{1,2}):(\d{2})\s*noon/);
if (m) return `${String(parseInt(m[1], 10)).padStart(2, '0')}:${m[2]}`; if (m) return `${String(parseInt(m[1], 10)).padStart(2, '0')}:${m[2]}`;
if (s === '12:00 noon' || s === '12:00noon') return '12:00';
} }
const m = s.match(/(\d{1,2}):(\d{2})\s*(am|pm|a\.m\.|p\.m\.)/); const m = s.match(/(\d{1,2}):(\d{2})\s*(am|pm|a\.m\.|p\.m\.)/);
@@ -239,7 +239,11 @@ function parseDays(prefix: string): number[] {
const fromDay = DAY_FULL[rangeMatch[1]] ?? DAY_ABBREV[rangeMatch[1]]; const fromDay = DAY_FULL[rangeMatch[1]] ?? DAY_ABBREV[rangeMatch[1]];
const toDay = DAY_FULL[rangeMatch[2]] ?? DAY_ABBREV[rangeMatch[2]]; const toDay = DAY_FULL[rangeMatch[2]] ?? DAY_ABBREV[rangeMatch[2]];
if (fromDay !== undefined && toDay !== undefined) { if (fromDay !== undefined && toDay !== undefined) {
return Array.from({ length: toDay - fromDay + 1 }, (_, i) => fromDay + i); const days: number[] = [];
let d = fromDay;
while (d !== toDay) { days.push(d); d = (d + 1) % 7; }
days.push(toDay);
return days;
} }
} }