#!/usr/bin/env tsx /** * Debug a specific French page to see why scraping failed */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; async function debugPage() { const url = 'https://www.chemin-neuf.fr/'; // Last failed church console.log(`Debugging: ${url}\n`); const scraper = new GenericScraper(); await scraper.init(); scraper.setCountry('FR'); const result = await scraper.scrape(url); console.log(`Success: ${result.success}`); console.log(`Schedules found: ${result.schedules.length}`); if (result.error) console.log(`Error: ${result.error}`); if (result.rawHtml) { const text = result.rawHtml .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); console.log('\n=== Page Text Sample (first 2000 chars) ==='); console.log(text.substring(0, 2000)); console.log('\n'); // Check for French day names const frenchDays = ['dimanche', 'lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi']; console.log('=== French day names found ==='); for (const day of frenchDays) { if (text.includes(day)) { console.log(`✓ Found: ${day}`); } } // Check for time patterns console.log('\n=== Time patterns (sample) ==='); const timeRegex = /\d{1,2}[h:\.]\s*\d{0,2}\s*(?:AM|PM|am|pm|Uhr|uur|h)?/g; const times = text.match(timeRegex); if (times) { console.log(`Found ${times.length} time-like patterns:`); console.log(times.slice(0, 20).join(', ')); } else { console.log('No time patterns found'); } } await scraper.close(); } debugPage().catch(console.error);