Files
ScraperControl/scripts/debug/test-polish-with-logging.ts

66 lines
2.1 KiB
TypeScript
Raw Normal View History

#!/usr/bin/env tsx
/**
* Test Polish church with detailed section logging
*/
import { GenericScraper } from '../../src/scrapers/strategies/generic';
// Temporarily modify GenericScraper to add logging
const originalParse = GenericScraper.prototype['parseSchedules'];
GenericScraper.prototype['parseSchedules'] = function(html: string) {
const text = html
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.toLowerCase();
// Call findScheduleSections and log result
const sections = this['findScheduleSections'](text);
console.log('\n=== Sections found by findScheduleSections() ===\n');
const dayNames = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
sections.forEach((section: any, i: number) => {
console.log(`Section ${i + 1}: ${dayNames[section.day]} (day ${section.day})`);
console.log(` Text: "${section.text.substring(0, 80)}..."`);
});
console.log(`\nTotal sections: ${sections.length}\n`);
// Continue with normal processing
return originalParse.call(this, html);
};
async function testPolish() {
const url = 'http://parafialubojna.pl';
console.log(`Testing: ${url}`);
const scraper = new GenericScraper();
await scraper.init();
scraper.setCountry('PL');
const result = await scraper.scrape(url);
console.log(`\nFinal result: ${result.success}`);
console.log(`Schedules: ${result.schedules.length}\n`);
if (result.schedules.length > 0) {
const byDay: Record<number, typeof result.schedules> = {};
for (const sched of result.schedules) {
if (!byDay[sched.dayOfWeek]) byDay[sched.dayOfWeek] = [];
byDay[sched.dayOfWeek].push(sched);
}
const dayNamesPL = ['Niedziela', 'Poniedziałek', 'Wtorek', 'Środa', 'Czwartek', 'Piątek', 'Sobota'];
console.log('Parsed schedules by day:');
for (let i = 0; i < 7; i++) {
if (byDay[i]) {
console.log(` ${dayNamesPL[i]}: ${byDay[i].map(s => s.time).join(', ')}`);
}
}
}
await scraper.close();
}
testPolish().catch(console.error);