Files
ScraperControl/scripts/debug/check-2-real-bugs.ts

67 lines
2.2 KiB
TypeScript
Raw Normal View History

#!/usr/bin/env tsx
/**
* Check the 2 potentially real bugs
*/
import { GenericScraper } from '../../src/scrapers/strategies/generic';
async function checkRealBugs() {
const scraper = new GenericScraper();
await scraper.init();
console.log('=== 1. Iglesia de San Fernando (trying Spanish page) ===\n');
scraper.setCountry('ES');
const spanishUrl = 'https://www.parroquiasanfernandomaspalomas.net/'; // Remove /de/
const result1 = await scraper.scrape(spanishUrl);
console.log(`URL: ${spanishUrl}`);
console.log(`Success: ${result1.success}`);
console.log(`Schedules: ${result1.schedules.length}`);
console.log(`Error: ${result1.error || 'none'}\n`);
if (result1.schedules.length > 0) {
console.log('Sample schedules:');
result1.schedules.slice(0, 5).forEach(s => {
const days = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'];
console.log(` ${days[s.dayOfWeek]} ${s.time} - ${s.language} ${s.massType}`);
});
}
console.log('\n=== 2. Kościół (Poland) ===\n');
scraper.setCountry('PL');
const result2 = await scraper.scrape('http://parafialubojna.pl');
console.log(`Success: ${result2.success}`);
console.log(`Schedules: ${result2.schedules.length}`);
console.log(`Error: ${result2.error || 'none'}\n`);
if (result2.schedules.length > 0) {
console.log('Sample schedules:');
result2.schedules.slice(0, 5).forEach(s => {
const days = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'];
console.log(` ${days[s.dayOfWeek]} ${s.time} - ${s.language} ${s.massType}`);
});
} else if (result2.rawHtml) {
const text = result2.rawHtml
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.toLowerCase();
// Look for Polish schedule keywords
const scheduleIndex = text.indexOf('msze') || text.indexOf('msza') || text.indexOf('nabożeńst');
if (scheduleIndex !== -1) {
const snippet = text.substring(scheduleIndex, scheduleIndex + 300);
console.log('Found schedule section:');
console.log(snippet);
}
}
await scraper.close();
}
checkRealBugs().catch(console.error);