#!/usr/bin/env tsx /** * Debug why German church has duplicate schedules */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; // Temporarily patch GenericScraper to log sections const originalParse = GenericScraper.prototype['parseSchedules']; GenericScraper.prototype['parseSchedules'] = function(html: string) { const text = html .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); // Call findScheduleSections and log result const sections = this['findScheduleSections'](text); console.log('\n=== Sections found ===\n'); const dayNames = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']; sections.forEach((section: any, i: number) => { console.log(`Section ${i + 1}: ${dayNames[section.day]} (day ${section.day})`); console.log(` Text preview: "${section.text.substring(0, 100)}..."`); }); console.log(`\nTotal sections: ${sections.length}\n`); // Continue with normal processing const result = originalParse.call(this, html); console.log(`\n=== Extracted times per section ===\n`); const schedsByDay: Record = {}; for (const sched of result) { if (!schedsByDay[sched.dayOfWeek]) schedsByDay[sched.dayOfWeek] = []; schedsByDay[sched.dayOfWeek].push(sched); } for (let i = 0; i < 7; i++) { if (schedsByDay[i]) { console.log(`${dayNames[i]}: ${schedsByDay[i].map(s => s.time).join(', ')}`); } } return result; }; async function testGerman() { const url = 'https://www.alterpeter.de/'; console.log(`Testing: ${url}`); const scraper = new GenericScraper(); await scraper.init(); scraper.setCountry('DE'); const result = await scraper.scrape(url); console.log(`\n=== Final Result ===`); console.log(`Success: ${result.success}`); console.log(`Total schedules: ${result.schedules.length}`); await scraper.close(); } testGerman().catch(console.error);