#!/usr/bin/env tsx /** * Deep dive into Paróquia da Paz parsing bug */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; async function debugPaz() { const url = 'https://www.paroquiadapaz.org.br/'; console.log(`Debugging: ${url}\n`); const scraper = new GenericScraper(); await scraper.init(); scraper.setCountry('BR'); const result = await scraper.scrape(url); console.log(`Success: ${result.success}`); console.log(`Schedules: ${result.schedules.length}\n`); if (result.rawHtml) { const text = result.rawHtml .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); // Find where days appear console.log('=== Finding day + time patterns ===\n'); const days = ['domingo', 'segunda', 'terça', 'terca', 'quarta', 'quinta', 'sexta', 'sábado', 'sabado']; for (const day of days) { const dayIndex = text.indexOf(day); if (dayIndex !== -1) { // Show context around the day (100 chars before and 200 after) const before = Math.max(0, dayIndex - 100); const after = Math.min(text.length, dayIndex + 200); const snippet = text.substring(before, after); console.log(`${day.toUpperCase()}:`); console.log(` Position: ${dayIndex}`); console.log(` Context: ...${snippet}...`); console.log(''); } } // Check for "h" time format specifically console.log('\n=== Checking "h" time format ==='); const hTimeRegex = /(\d{1,2})h(\d{2})?/g; const hTimes = text.match(hTimeRegex); if (hTimes) { console.log(`Found ${hTimes.length} "h" format times:`); console.log(hTimes.slice(0, 30).join(', ')); } // Look for schedule structure console.log('\n=== Looking for schedule structure ==='); const scheduleKeywords = ['horário', 'horario', 'missa', 'missas', 'santa missa']; for (const keyword of scheduleKeywords) { const index = text.indexOf(keyword); if (index !== -1) { const snippet = text.substring(index, Math.min(text.length, index + 500)); console.log(`\nFound "${keyword}" at position ${index}:`); console.log(snippet.substring(0, 300)); } } } await scraper.close(); } debugPaz().catch(console.error);