feat: add full entry parser for HK parishes
parseEntry composes extractNames, extractFields, parseScheduleLine, and parseWeekdayLine into a single ParsedEntry. Routes schedule lines by section header (Sunday/Anticipated/Weekday) and skips Special Masses and Eucharist Adoration sections. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import {
|
||||
normalizeTime,
|
||||
parseScheduleLine,
|
||||
parseWeekdayLine,
|
||||
parseEntry,
|
||||
} from './import-hk-parishes.js';
|
||||
|
||||
// ─── Task 2: Entry splitter and name extractor ────────────────────────────────
|
||||
@@ -186,3 +187,24 @@ test('parseWeekdayLine multiple time groups on one line', () => {
|
||||
const results = parseWeekdayLine('Monday to Saturday: 7:45 am,Monday to Friday: 12:00 noon,Monday to Friday: 6:00 pm (English)');
|
||||
assert.equal(results.length, 16);
|
||||
});
|
||||
|
||||
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
|
||||
|
||||
test('parseEntry extracts names, fields, and schedules from a full entry', () => {
|
||||
const raw = `Holy Cross Parish\nHOLY CROSS CHURCH\nPath\nClose\nAddress\n1 Holy Cross Path, Shau Kei Wan, Hong Kong\n\nPhone\n(852)2560-1823\n\nEmail\nholycrosshk@gmail.com\n\nWebsite\nClick Here\n\nMass Time\nSunday Masses\n8:00am,9:30am (Cantonese)\n1:00 pm (English)\n\nAnticipated Sunday Masses\nSaturday 3:45 pm,Saturday 6:30 pm (Cantonese)\n\nWeekday Masses\n7:15 am (Cantonese)\n\nSpecial Masses\nSomething irrelevant\n`;
|
||||
const entry = parseEntry(raw);
|
||||
assert.equal(entry.locationName, 'HOLY CROSS CHURCH');
|
||||
assert.equal(entry.parishName, 'Holy Cross Parish');
|
||||
assert.equal(entry.address, '1 Holy Cross Path, Shau Kei Wan, Hong Kong');
|
||||
assert.equal(entry.phone, '(852)2560-1823');
|
||||
assert.equal(entry.email, 'holycrosshk@gmail.com');
|
||||
// Sunday: 2 Cantonese + 1 English = 3 entries
|
||||
const sunday = entry.schedules.filter(s => s.dayOfWeek === 0);
|
||||
assert.equal(sunday.length, 3);
|
||||
// Anticipated (Saturday): 2 entries
|
||||
const saturday = entry.schedules.filter(s => s.dayOfWeek === 6);
|
||||
assert.equal(saturday.length, 2);
|
||||
// Weekday: 5 entries (Mon–Fri)
|
||||
const weekday = entry.schedules.filter(s => s.dayOfWeek >= 1 && s.dayOfWeek <= 5);
|
||||
assert.equal(weekday.length, 5);
|
||||
});
|
||||
|
||||
@@ -319,3 +319,53 @@ export function parseWeekdayLine(line: string): ParsedSchedule[] {
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
|
||||
|
||||
const SKIP_SECTIONS = new Set(['special masses', 'eucharist adoration']);
|
||||
|
||||
/**
|
||||
* Parse a full raw entry string (including pre-marker names) into a ParsedEntry.
|
||||
*/
|
||||
export function parseEntry(raw: string): ParsedEntry {
|
||||
const markerIdx = raw.indexOf('\nPath\nClose\n');
|
||||
const pre = markerIdx >= 0 ? raw.slice(0, markerIdx) : '';
|
||||
const body = markerIdx >= 0 ? raw.slice(markerIdx + '\nPath\nClose\n'.length) : raw;
|
||||
|
||||
const { locationName, parishName } = extractNames(pre);
|
||||
const { address, phone, email } = extractFields(body);
|
||||
|
||||
const schedules: ParsedSchedule[] = [];
|
||||
|
||||
const massSectionMatch = body.match(/Mass Time\n([\s\S]*?)(?:Share\n|$)/i);
|
||||
if (massSectionMatch) {
|
||||
const massText = massSectionMatch[1];
|
||||
const lines = massText.split('\n');
|
||||
let currentSection: string | null = null;
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
const lower = trimmed.toLowerCase();
|
||||
|
||||
if (lower === 'sunday masses') { currentSection = 'sunday'; continue; }
|
||||
if (lower === 'anticipated sunday masses') { currentSection = 'anticipated'; continue; }
|
||||
if (lower === 'weekday masses') { currentSection = 'weekday'; continue; }
|
||||
if (SKIP_SECTIONS.has(lower)) { currentSection = 'skip'; continue; }
|
||||
|
||||
if (currentSection === 'skip') continue;
|
||||
if (currentSection === null) continue;
|
||||
|
||||
if (currentSection === 'sunday') {
|
||||
schedules.push(...parseScheduleLine(trimmed, 0));
|
||||
} else if (currentSection === 'anticipated') {
|
||||
schedules.push(...parseScheduleLine(trimmed, 6));
|
||||
} else if (currentSection === 'weekday') {
|
||||
schedules.push(...parseWeekdayLine(trimmed));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { locationName, parishName, address, phone, email, schedules };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user