#!/usr/bin/env tsx /** * Test Polish church with detailed section logging */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; // Temporarily modify GenericScraper to add logging const originalParse = GenericScraper.prototype['parseSchedules']; GenericScraper.prototype['parseSchedules'] = function(html: string) { const text = html .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); // Call findScheduleSections and log result const sections = this['findScheduleSections'](text); console.log('\n=== Sections found by findScheduleSections() ===\n'); const dayNames = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']; sections.forEach((section: any, i: number) => { console.log(`Section ${i + 1}: ${dayNames[section.day]} (day ${section.day})`); console.log(` Text: "${section.text.substring(0, 80)}..."`); }); console.log(`\nTotal sections: ${sections.length}\n`); // Continue with normal processing return originalParse.call(this, html); }; async function testPolish() { const url = 'http://parafialubojna.pl'; console.log(`Testing: ${url}`); const scraper = new GenericScraper(); await scraper.init(); scraper.setCountry('PL'); const result = await scraper.scrape(url); console.log(`\nFinal result: ${result.success}`); console.log(`Schedules: ${result.schedules.length}\n`); if (result.schedules.length > 0) { const byDay: Record = {}; for (const sched of result.schedules) { if (!byDay[sched.dayOfWeek]) byDay[sched.dayOfWeek] = []; byDay[sched.dayOfWeek].push(sched); } const dayNamesPL = ['Niedziela', 'Poniedziałek', 'Wtorek', 'Środa', 'Czwartek', 'Piątek', 'Sobota']; console.log('Parsed schedules by day:'); for (let i = 0; i < 7; i++) { if (byDay[i]) { console.log(` ${dayNamesPL[i]}: ${byDay[i].map(s => s.time).join(', ')}`); } } } await scraper.close(); } testPolish().catch(console.error);