#!/usr/bin/env tsx /** * Debug Polish church in detail */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names'; async function debugPolish() { const url = 'http://parafialubojna.pl'; console.log(`Debugging: ${url}\n`); const scraper = new GenericScraper(); await scraper.init(); scraper.setCountry('PL'); const result = await scraper.scrape(url); console.log(`Success: ${result.success}`); console.log(`Schedules found: ${result.schedules.length}\n`); if (result.rawHtml) { const text = result.rawHtml .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); // Find the schedule section const scheduleIndex = text.indexOf('msze święte') || text.indexOf('msze swiete'); if (scheduleIndex !== -1) { const snippet = text.substring(scheduleIndex, scheduleIndex + 500); console.log('Schedule section:'); console.log(snippet); console.log('\n'); // Test all time pattern matches console.log('=== Testing time pattern matches ===\n'); // Space separator pattern const spacePattern = /\b(\d{1,2})\s+(\d{2})(?!\d)/g; const spaceMatches = snippet.match(spacePattern); console.log('Space-separated times (8 00, 9 30):'); console.log(spaceMatches ? spaceMatches.join(', ') : 'none'); console.log(''); // Colon pattern const colonPattern = /\d{1,2}:\d{2}/g; const colonMatches = snippet.match(colonPattern); console.log('Colon times (8:00, 9:30):'); console.log(colonMatches ? colonMatches.join(', ') : 'none'); console.log(''); // Polish day names console.log('=== Polish day names in snippet ===\n'); const dayConfigs = getDayNamesForCountry('PL'); const dayPatterns = buildDayPatterns(dayConfigs); for (const [dayName, dayNum] of Object.entries(dayPatterns)) { if (snippet.includes(dayName)) { console.log(`Found: ${dayName} (day ${dayNum})`); } } } } console.log('\n=== Parsed schedules ===\n'); const byDay: Record = {}; for (const sched of result.schedules) { if (!byDay[sched.dayOfWeek]) byDay[sched.dayOfWeek] = []; byDay[sched.dayOfWeek].push(sched); } const dayNames = ['Niedziela', 'Poniedziałek', 'Wtorek', 'Środa', 'Czwartek', 'Piątek', 'Sobota']; for (let i = 0; i < 7; i++) { if (byDay[i]) { console.log(`${dayNames[i]}: ${byDay[i].map(s => s.time).join(', ')}`); } } await scraper.close(); } debugPolish().catch(console.error);