feat: add full entry parser for HK parishes

parseEntry composes extractNames, extractFields, parseScheduleLine,
and parseWeekdayLine into a single ParsedEntry. Routes schedule
lines by section header (Sunday/Anticipated/Weekday) and skips
Special Masses and Eucharist Adoration sections.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-04-03 16:18:05 -04:00
parent 38274174a9
commit eedb442e78
2 changed files with 72 additions and 0 deletions

View File

@@ -7,6 +7,7 @@ import {
normalizeTime,
parseScheduleLine,
parseWeekdayLine,
parseEntry,
} from './import-hk-parishes.js';
// ─── Task 2: Entry splitter and name extractor ────────────────────────────────
@@ -186,3 +187,24 @@ test('parseWeekdayLine multiple time groups on one line', () => {
const results = parseWeekdayLine('Monday to Saturday: 7:45 am,Monday to Friday: 12:00 noon,Monday to Friday: 6:00 pm (English)');
assert.equal(results.length, 16);
});
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
test('parseEntry extracts names, fields, and schedules from a full entry', () => {
const raw = `Holy Cross Parish\nHOLY CROSS CHURCH\nPath\nClose\nAddress\n1 Holy Cross Path, Shau Kei Wan, Hong Kong\n\nPhone\n8522560-1823\n\nEmail\nholycrosshk@gmail.com\n\nWebsite\nClick Here\n\nMass Time\nSunday Masses\n8:00am,9:30am (Cantonese)\n1:00 pm (English)\n\nAnticipated Sunday Masses\nSaturday 3:45 pm,Saturday 6:30 pm (Cantonese)\n\nWeekday Masses\n7:15 am (Cantonese)\n\nSpecial Masses\nSomething irrelevant\n`;
const entry = parseEntry(raw);
assert.equal(entry.locationName, 'HOLY CROSS CHURCH');
assert.equal(entry.parishName, 'Holy Cross Parish');
assert.equal(entry.address, '1 Holy Cross Path, Shau Kei Wan, Hong Kong');
assert.equal(entry.phone, '(852)2560-1823');
assert.equal(entry.email, 'holycrosshk@gmail.com');
// Sunday: 2 Cantonese + 1 English = 3 entries
const sunday = entry.schedules.filter(s => s.dayOfWeek === 0);
assert.equal(sunday.length, 3);
// Anticipated (Saturday): 2 entries
const saturday = entry.schedules.filter(s => s.dayOfWeek === 6);
assert.equal(saturday.length, 2);
// Weekday: 5 entries (MonFri)
const weekday = entry.schedules.filter(s => s.dayOfWeek >= 1 && s.dayOfWeek <= 5);
assert.equal(weekday.length, 5);
});

View File

@@ -319,3 +319,53 @@ export function parseWeekdayLine(line: string): ParsedSchedule[] {
return results;
}
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
const SKIP_SECTIONS = new Set(['special masses', 'eucharist adoration']);
/**
* Parse a full raw entry string (including pre-marker names) into a ParsedEntry.
*/
export function parseEntry(raw: string): ParsedEntry {
const markerIdx = raw.indexOf('\nPath\nClose\n');
const pre = markerIdx >= 0 ? raw.slice(0, markerIdx) : '';
const body = markerIdx >= 0 ? raw.slice(markerIdx + '\nPath\nClose\n'.length) : raw;
const { locationName, parishName } = extractNames(pre);
const { address, phone, email } = extractFields(body);
const schedules: ParsedSchedule[] = [];
const massSectionMatch = body.match(/Mass Time\n([\s\S]*?)(?:Share\n|$)/i);
if (massSectionMatch) {
const massText = massSectionMatch[1];
const lines = massText.split('\n');
let currentSection: string | null = null;
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
const lower = trimmed.toLowerCase();
if (lower === 'sunday masses') { currentSection = 'sunday'; continue; }
if (lower === 'anticipated sunday masses') { currentSection = 'anticipated'; continue; }
if (lower === 'weekday masses') { currentSection = 'weekday'; continue; }
if (SKIP_SECTIONS.has(lower)) { currentSection = 'skip'; continue; }
if (currentSection === 'skip') continue;
if (currentSection === null) continue;
if (currentSection === 'sunday') {
schedules.push(...parseScheduleLine(trimmed, 0));
} else if (currentSection === 'anticipated') {
schedules.push(...parseScheduleLine(trimmed, 6));
} else if (currentSection === 'weekday') {
schedules.push(...parseWeekdayLine(trimmed));
}
}
}
return { locationName, parishName, address, phone, email, schedules };
}