feat: add HK parish import parser functions (Tasks 2-6)
Implements splitEntries, extractNames, extractFields, normalizeTime, parseScheduleLine, and parseWeekdayLine with 26 passing unit tests. Handles full-width parentheses, language tags, conditional schedule notes, day ranges, and comma-separated day/time lists. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -114,7 +114,7 @@ export function extractFields(body: string): { address: string | null; phone: st
|
|||||||
const normalise = (s: string) => s.replace(/(/g, '(').replace(/)/g, ')').trim();
|
const normalise = (s: string) => s.replace(/(/g, '(').replace(/)/g, ')').trim();
|
||||||
|
|
||||||
function extractField(fieldName: string): string | null {
|
function extractField(fieldName: string): string | null {
|
||||||
const regex = new RegExp(`\\b${fieldName}\\n([\\s\\S]*?)(?:\\n\\n|\\nFax|\\nEmail|\\nWebsite|\\nChurch|\\nParish|\\nAssistant|\\nDeacon|\\nResident|\\nRector|\\nP\\.C|\\nPastoral|\\nMass Time|$)`, 'i');
|
const regex = new RegExp(`\\b${fieldName}\\n([\\s\\S]*?)(?:\\n\\n|\\nFax|\\nEmail|\\nWebsite|\\nChurch|\\nParish|\\nAssistant|\\nDeacon|\\nSister|\\nChairperson|\\nResident|\\nRector|\\nP\\.C|\\nPastoral|\\nMass Time|$)`, 'i');
|
||||||
const m = body.match(regex);
|
const m = body.match(regex);
|
||||||
if (!m) return null;
|
if (!m) return null;
|
||||||
const value = m[1].replace(/\n/g, ' ').trim();
|
const value = m[1].replace(/\n/g, ' ').trim();
|
||||||
@@ -142,9 +142,9 @@ export function extractFields(body: string): { address: string | null; phone: st
|
|||||||
export function normalizeTime(raw: string): string | null {
|
export function normalizeTime(raw: string): string | null {
|
||||||
const s = raw.trim().toLowerCase();
|
const s = raw.trim().toLowerCase();
|
||||||
if (s.includes('noon')) {
|
if (s.includes('noon')) {
|
||||||
|
if (s === 'noon') return '12:00';
|
||||||
const m = s.match(/(\d{1,2}):(\d{2})\s*noon/);
|
const m = s.match(/(\d{1,2}):(\d{2})\s*noon/);
|
||||||
if (m) return `${String(parseInt(m[1], 10)).padStart(2, '0')}:${m[2]}`;
|
if (m) return `${String(parseInt(m[1], 10)).padStart(2, '0')}:${m[2]}`;
|
||||||
if (s === '12:00 noon' || s === '12:00noon') return '12:00';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const m = s.match(/(\d{1,2}):(\d{2})\s*(am|pm|a\.m\.|p\.m\.)/);
|
const m = s.match(/(\d{1,2}):(\d{2})\s*(am|pm|a\.m\.|p\.m\.)/);
|
||||||
@@ -239,7 +239,11 @@ function parseDays(prefix: string): number[] {
|
|||||||
const fromDay = DAY_FULL[rangeMatch[1]] ?? DAY_ABBREV[rangeMatch[1]];
|
const fromDay = DAY_FULL[rangeMatch[1]] ?? DAY_ABBREV[rangeMatch[1]];
|
||||||
const toDay = DAY_FULL[rangeMatch[2]] ?? DAY_ABBREV[rangeMatch[2]];
|
const toDay = DAY_FULL[rangeMatch[2]] ?? DAY_ABBREV[rangeMatch[2]];
|
||||||
if (fromDay !== undefined && toDay !== undefined) {
|
if (fromDay !== undefined && toDay !== undefined) {
|
||||||
return Array.from({ length: toDay - fromDay + 1 }, (_, i) => fromDay + i);
|
const days: number[] = [];
|
||||||
|
let d = fromDay;
|
||||||
|
while (d !== toDay) { days.push(d); d = (d + 1) % 7; }
|
||||||
|
days.push(toDay);
|
||||||
|
return days;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user