#!/usr/bin/env tsx /** * Check the 2 potentially real bugs */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; async function checkRealBugs() { const scraper = new GenericScraper(); await scraper.init(); console.log('=== 1. Iglesia de San Fernando (trying Spanish page) ===\n'); scraper.setCountry('ES'); const spanishUrl = 'https://www.parroquiasanfernandomaspalomas.net/'; // Remove /de/ const result1 = await scraper.scrape(spanishUrl); console.log(`URL: ${spanishUrl}`); console.log(`Success: ${result1.success}`); console.log(`Schedules: ${result1.schedules.length}`); console.log(`Error: ${result1.error || 'none'}\n`); if (result1.schedules.length > 0) { console.log('Sample schedules:'); result1.schedules.slice(0, 5).forEach(s => { const days = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']; console.log(` ${days[s.dayOfWeek]} ${s.time} - ${s.language} ${s.massType}`); }); } console.log('\n=== 2. Kościół (Poland) ===\n'); scraper.setCountry('PL'); const result2 = await scraper.scrape('http://parafialubojna.pl'); console.log(`Success: ${result2.success}`); console.log(`Schedules: ${result2.schedules.length}`); console.log(`Error: ${result2.error || 'none'}\n`); if (result2.schedules.length > 0) { console.log('Sample schedules:'); result2.schedules.slice(0, 5).forEach(s => { const days = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']; console.log(` ${days[s.dayOfWeek]} ${s.time} - ${s.language} ${s.massType}`); }); } else if (result2.rawHtml) { const text = result2.rawHtml .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); // Look for Polish schedule keywords const scheduleIndex = text.indexOf('msze') || text.indexOf('msza') || text.indexOf('nabożeńst'); if (scheduleIndex !== -1) { const snippet = text.substring(scheduleIndex, scheduleIndex + 300); console.log('Found schedule section:'); console.log(snippet); } } await scraper.close(); } checkRealBugs().catch(console.error);