#!/usr/bin/env tsx /** * Debug which sections are being found */ import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names'; // Simulate the exact text from the page const scheduleText = ` horário das missas igreja matriz de santo antônio segundas, terças, quartas e sextas-feiras: 16h e 18h. quintas-feiras: 16h e 19h (adoração ao santíssimo – 18h). sábados: 8h, 16h e 18h. domingos: 8h, 11h, 16h, 18h e 20h. `.toLowerCase(); console.log('Text to parse:'); console.log(scheduleText); console.log(''); const dayConfigs = getDayNamesForCountry('BR'); const dayPatterns = buildDayPatterns(dayConfigs); const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length); const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|'); console.log('=== COMMA-SEPARATED GROUP MATCHING ===\n'); const dayGroupRegex = new RegExp( `((?:${allDayNamesPattern})(?:[,\\s]+(?:e|and|et|und|y)?\\s*(?:${allDayNamesPattern}))+)[:\\s]+([^]*?)(?=(?:${allDayNamesPattern})|$)`, 'gi' ); let groupMatch; let matchCount = 0; while ((groupMatch = dayGroupRegex.exec(scheduleText)) !== null) { matchCount++; console.log(`Match #${matchCount}:`); console.log(` Day group: "${groupMatch[1]}"`); console.log(` Time text: "${groupMatch[2]}"`); console.log(''); } console.log('=== INDIVIDUAL DAY MATCHING ===\n'); for (const [dayName, dayIndex] of Object.entries(dayPatterns)) { const escaped = dayName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const regex = new RegExp( `(?:^|\\s|[,;:])${escaped}[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`, 'i' ); const match = scheduleText.match(regex); if (match) { console.log(`Found ${dayName} (day ${dayIndex}):`); console.log(` Time text: "${match[1].substring(0, 100)}"`); } }