feat: add full entry parser for HK parishes

parseEntry composes extractNames, extractFields, parseScheduleLine,
and parseWeekdayLine into a single ParsedEntry. Routes schedule
lines by section header (Sunday/Anticipated/Weekday) and skips
Special Masses and Eucharist Adoration sections.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-04-03 16:18:05 -04:00
parent 38274174a9
commit eedb442e78
2 changed files with 72 additions and 0 deletions

View File

@@ -319,3 +319,53 @@ export function parseWeekdayLine(line: string): ParsedSchedule[] {
return results;
}
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
const SKIP_SECTIONS = new Set(['special masses', 'eucharist adoration']);
/**
* Parse a full raw entry string (including pre-marker names) into a ParsedEntry.
*/
export function parseEntry(raw: string): ParsedEntry {
const markerIdx = raw.indexOf('\nPath\nClose\n');
const pre = markerIdx >= 0 ? raw.slice(0, markerIdx) : '';
const body = markerIdx >= 0 ? raw.slice(markerIdx + '\nPath\nClose\n'.length) : raw;
const { locationName, parishName } = extractNames(pre);
const { address, phone, email } = extractFields(body);
const schedules: ParsedSchedule[] = [];
const massSectionMatch = body.match(/Mass Time\n([\s\S]*?)(?:Share\n|$)/i);
if (massSectionMatch) {
const massText = massSectionMatch[1];
const lines = massText.split('\n');
let currentSection: string | null = null;
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
const lower = trimmed.toLowerCase();
if (lower === 'sunday masses') { currentSection = 'sunday'; continue; }
if (lower === 'anticipated sunday masses') { currentSection = 'anticipated'; continue; }
if (lower === 'weekday masses') { currentSection = 'weekday'; continue; }
if (SKIP_SECTIONS.has(lower)) { currentSection = 'skip'; continue; }
if (currentSection === 'skip') continue;
if (currentSection === null) continue;
if (currentSection === 'sunday') {
schedules.push(...parseScheduleLine(trimmed, 0));
} else if (currentSection === 'anticipated') {
schedules.push(...parseScheduleLine(trimmed, 6));
} else if (currentSection === 'weekday') {
schedules.push(...parseWeekdayLine(trimmed));
}
}
}
return { locationName, parishName, address, phone, email, schedules };
}