feat: add full entry parser for HK parishes
parseEntry composes extractNames, extractFields, parseScheduleLine, and parseWeekdayLine into a single ParsedEntry. Routes schedule lines by section header (Sunday/Anticipated/Weekday) and skips Special Masses and Eucharist Adoration sections. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import {
|
|||||||
normalizeTime,
|
normalizeTime,
|
||||||
parseScheduleLine,
|
parseScheduleLine,
|
||||||
parseWeekdayLine,
|
parseWeekdayLine,
|
||||||
|
parseEntry,
|
||||||
} from './import-hk-parishes.js';
|
} from './import-hk-parishes.js';
|
||||||
|
|
||||||
// ─── Task 2: Entry splitter and name extractor ────────────────────────────────
|
// ─── Task 2: Entry splitter and name extractor ────────────────────────────────
|
||||||
@@ -186,3 +187,24 @@ test('parseWeekdayLine multiple time groups on one line', () => {
|
|||||||
const results = parseWeekdayLine('Monday to Saturday: 7:45 am,Monday to Friday: 12:00 noon,Monday to Friday: 6:00 pm (English)');
|
const results = parseWeekdayLine('Monday to Saturday: 7:45 am,Monday to Friday: 12:00 noon,Monday to Friday: 6:00 pm (English)');
|
||||||
assert.equal(results.length, 16);
|
assert.equal(results.length, 16);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
test('parseEntry extracts names, fields, and schedules from a full entry', () => {
|
||||||
|
const raw = `Holy Cross Parish\nHOLY CROSS CHURCH\nPath\nClose\nAddress\n1 Holy Cross Path, Shau Kei Wan, Hong Kong\n\nPhone\n(852)2560-1823\n\nEmail\nholycrosshk@gmail.com\n\nWebsite\nClick Here\n\nMass Time\nSunday Masses\n8:00am,9:30am (Cantonese)\n1:00 pm (English)\n\nAnticipated Sunday Masses\nSaturday 3:45 pm,Saturday 6:30 pm (Cantonese)\n\nWeekday Masses\n7:15 am (Cantonese)\n\nSpecial Masses\nSomething irrelevant\n`;
|
||||||
|
const entry = parseEntry(raw);
|
||||||
|
assert.equal(entry.locationName, 'HOLY CROSS CHURCH');
|
||||||
|
assert.equal(entry.parishName, 'Holy Cross Parish');
|
||||||
|
assert.equal(entry.address, '1 Holy Cross Path, Shau Kei Wan, Hong Kong');
|
||||||
|
assert.equal(entry.phone, '(852)2560-1823');
|
||||||
|
assert.equal(entry.email, 'holycrosshk@gmail.com');
|
||||||
|
// Sunday: 2 Cantonese + 1 English = 3 entries
|
||||||
|
const sunday = entry.schedules.filter(s => s.dayOfWeek === 0);
|
||||||
|
assert.equal(sunday.length, 3);
|
||||||
|
// Anticipated (Saturday): 2 entries
|
||||||
|
const saturday = entry.schedules.filter(s => s.dayOfWeek === 6);
|
||||||
|
assert.equal(saturday.length, 2);
|
||||||
|
// Weekday: 5 entries (Mon–Fri)
|
||||||
|
const weekday = entry.schedules.filter(s => s.dayOfWeek >= 1 && s.dayOfWeek <= 5);
|
||||||
|
assert.equal(weekday.length, 5);
|
||||||
|
});
|
||||||
|
|||||||
@@ -319,3 +319,53 @@ export function parseWeekdayLine(line: string): ParsedSchedule[] {
|
|||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Task 7: Full entry parser ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const SKIP_SECTIONS = new Set(['special masses', 'eucharist adoration']);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a full raw entry string (including pre-marker names) into a ParsedEntry.
|
||||||
|
*/
|
||||||
|
export function parseEntry(raw: string): ParsedEntry {
|
||||||
|
const markerIdx = raw.indexOf('\nPath\nClose\n');
|
||||||
|
const pre = markerIdx >= 0 ? raw.slice(0, markerIdx) : '';
|
||||||
|
const body = markerIdx >= 0 ? raw.slice(markerIdx + '\nPath\nClose\n'.length) : raw;
|
||||||
|
|
||||||
|
const { locationName, parishName } = extractNames(pre);
|
||||||
|
const { address, phone, email } = extractFields(body);
|
||||||
|
|
||||||
|
const schedules: ParsedSchedule[] = [];
|
||||||
|
|
||||||
|
const massSectionMatch = body.match(/Mass Time\n([\s\S]*?)(?:Share\n|$)/i);
|
||||||
|
if (massSectionMatch) {
|
||||||
|
const massText = massSectionMatch[1];
|
||||||
|
const lines = massText.split('\n');
|
||||||
|
let currentSection: string | null = null;
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
const trimmed = line.trim();
|
||||||
|
if (!trimmed) continue;
|
||||||
|
|
||||||
|
const lower = trimmed.toLowerCase();
|
||||||
|
|
||||||
|
if (lower === 'sunday masses') { currentSection = 'sunday'; continue; }
|
||||||
|
if (lower === 'anticipated sunday masses') { currentSection = 'anticipated'; continue; }
|
||||||
|
if (lower === 'weekday masses') { currentSection = 'weekday'; continue; }
|
||||||
|
if (SKIP_SECTIONS.has(lower)) { currentSection = 'skip'; continue; }
|
||||||
|
|
||||||
|
if (currentSection === 'skip') continue;
|
||||||
|
if (currentSection === null) continue;
|
||||||
|
|
||||||
|
if (currentSection === 'sunday') {
|
||||||
|
schedules.push(...parseScheduleLine(trimmed, 0));
|
||||||
|
} else if (currentSection === 'anticipated') {
|
||||||
|
schedules.push(...parseScheduleLine(trimmed, 6));
|
||||||
|
} else if (currentSection === 'weekday') {
|
||||||
|
schedules.push(...parseWeekdayLine(trimmed));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { locationName, parishName, address, phone, email, schedules };
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user