#!/usr/bin/env tsx /** * Debug Paróquia da Paz with added logging */ import { GenericScraper } from '../../src/scrapers/strategies/generic'; import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names'; async function debugPazWithLogging() { const url = 'https://www.paroquiadapaz.org.br/'; console.log(`Debugging: ${url}\n`); const scraper = new GenericScraper(); await scraper.init(); scraper.setCountry('BR'); const result = await scraper.scrape(url); console.log(`Success: ${result.success}`); console.log(`Schedules: ${result.schedules.length}\n`); if (result.rawHtml) { const text = result.rawHtml .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .toLowerCase(); // Test the regex pattern manually console.log('=== Testing comma-separated day grouping regex ===\n'); const dayConfigs = getDayNamesForCountry('BR'); const dayPatterns = buildDayPatterns(dayConfigs); const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length); const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|'); console.log('Day patterns:', Object.keys(dayPatterns).join(', ')); console.log(''); // The exact regex from the code const dayGroupRegex = new RegExp( `((?:${allDayNamesPattern})(?:[,\\s]+(?:e|and|et|und|y)?\\s*(?:${allDayNamesPattern}))+)[:\\s]+([^]*?)(?=(?:${allDayNamesPattern})|$)`, 'gi' ); console.log('Regex pattern:', dayGroupRegex.source.substring(0, 200) + '...\n'); let groupMatch; let matchCount = 0; while ((groupMatch = dayGroupRegex.exec(text)) !== null) { matchCount++; console.log(`Match #${matchCount}:`); console.log(` Full match: "${groupMatch[0].substring(0, 100)}"`); console.log(` Day group: "${groupMatch[1]}"`); console.log(` Time text: "${groupMatch[2].substring(0, 50)}"`); console.log(''); } if (matchCount === 0) { console.log('No matches found!\n'); // Try to find the schedule text manually const scheduleIndex = text.indexOf('segundas, terças'); if (scheduleIndex !== -1) { const snippet = text.substring(scheduleIndex, scheduleIndex + 300); console.log('Found schedule text at position', scheduleIndex); console.log('Snippet:', snippet); console.log(''); // Test if individual day names are matching console.log('Testing individual day name matches in snippet:'); for (const dayName of sortedDayNames.slice(0, 10)) { if (snippet.includes(dayName)) { console.log(` ✓ Found: ${dayName}`); } } } } } await scraper.close(); } debugPazWithLogging().catch(console.error);