diff --git a/scripts/import-hk-parishes.test.ts b/scripts/import-hk-parishes.test.ts index 9924e34..40296c2 100644 --- a/scripts/import-hk-parishes.test.ts +++ b/scripts/import-hk-parishes.test.ts @@ -7,6 +7,7 @@ import { normalizeTime, parseScheduleLine, parseWeekdayLine, + parseEntry, } from './import-hk-parishes.js'; // ─── Task 2: Entry splitter and name extractor ──────────────────────────────── @@ -186,3 +187,24 @@ test('parseWeekdayLine multiple time groups on one line', () => { const results = parseWeekdayLine('Monday to Saturday: 7:45 am,Monday to Friday: 12:00 noon,Monday to Friday: 6:00 pm (English)'); assert.equal(results.length, 16); }); + +// ─── Task 7: Full entry parser ──────────────────────────────────────────────── + +test('parseEntry extracts names, fields, and schedules from a full entry', () => { + const raw = `Holy Cross Parish\nHOLY CROSS CHURCH\nPath\nClose\nAddress\n1 Holy Cross Path, Shau Kei Wan, Hong Kong\n\nPhone\n(852)2560-1823\n\nEmail\nholycrosshk@gmail.com\n\nWebsite\nClick Here\n\nMass Time\nSunday Masses\n8:00am,9:30am (Cantonese)\n1:00 pm (English)\n\nAnticipated Sunday Masses\nSaturday 3:45 pm,Saturday 6:30 pm (Cantonese)\n\nWeekday Masses\n7:15 am (Cantonese)\n\nSpecial Masses\nSomething irrelevant\n`; + const entry = parseEntry(raw); + assert.equal(entry.locationName, 'HOLY CROSS CHURCH'); + assert.equal(entry.parishName, 'Holy Cross Parish'); + assert.equal(entry.address, '1 Holy Cross Path, Shau Kei Wan, Hong Kong'); + assert.equal(entry.phone, '(852)2560-1823'); + assert.equal(entry.email, 'holycrosshk@gmail.com'); + // Sunday: 2 Cantonese + 1 English = 3 entries + const sunday = entry.schedules.filter(s => s.dayOfWeek === 0); + assert.equal(sunday.length, 3); + // Anticipated (Saturday): 2 entries + const saturday = entry.schedules.filter(s => s.dayOfWeek === 6); + assert.equal(saturday.length, 2); + // Weekday: 5 entries (Mon–Fri) + const weekday = entry.schedules.filter(s => s.dayOfWeek >= 1 && s.dayOfWeek <= 5); + assert.equal(weekday.length, 5); +}); diff --git a/scripts/import-hk-parishes.ts b/scripts/import-hk-parishes.ts index 2520507..6758e19 100644 --- a/scripts/import-hk-parishes.ts +++ b/scripts/import-hk-parishes.ts @@ -319,3 +319,53 @@ export function parseWeekdayLine(line: string): ParsedSchedule[] { return results; } + +// ─── Task 7: Full entry parser ──────────────────────────────────────────────── + +const SKIP_SECTIONS = new Set(['special masses', 'eucharist adoration']); + +/** + * Parse a full raw entry string (including pre-marker names) into a ParsedEntry. + */ +export function parseEntry(raw: string): ParsedEntry { + const markerIdx = raw.indexOf('\nPath\nClose\n'); + const pre = markerIdx >= 0 ? raw.slice(0, markerIdx) : ''; + const body = markerIdx >= 0 ? raw.slice(markerIdx + '\nPath\nClose\n'.length) : raw; + + const { locationName, parishName } = extractNames(pre); + const { address, phone, email } = extractFields(body); + + const schedules: ParsedSchedule[] = []; + + const massSectionMatch = body.match(/Mass Time\n([\s\S]*?)(?:Share\n|$)/i); + if (massSectionMatch) { + const massText = massSectionMatch[1]; + const lines = massText.split('\n'); + let currentSection: string | null = null; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + + const lower = trimmed.toLowerCase(); + + if (lower === 'sunday masses') { currentSection = 'sunday'; continue; } + if (lower === 'anticipated sunday masses') { currentSection = 'anticipated'; continue; } + if (lower === 'weekday masses') { currentSection = 'weekday'; continue; } + if (SKIP_SECTIONS.has(lower)) { currentSection = 'skip'; continue; } + + if (currentSection === 'skip') continue; + if (currentSection === null) continue; + + if (currentSection === 'sunday') { + schedules.push(...parseScheduleLine(trimmed, 0)); + } else if (currentSection === 'anticipated') { + schedules.push(...parseScheduleLine(trimmed, 6)); + } else if (currentSection === 'weekday') { + schedules.push(...parseWeekdayLine(trimmed)); + } + } + } + + return { locationName, parishName, address, phone, email, schedules }; +}