#!/usr/bin/env tsx /** * Test which sections are being created for Polish church */ import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names'; // Exact text from the page const text = `msze święte niedziela i uroczystości: 8 00 , 9 30 (lubojenka), 11 00 , 16 00 w lipcu i sierpniu nie ma mszy popołudniowej!--> dni powszednie: poniedziałek: godz. 8 00 wtorek - sobota: godz. 18 00`.toLowerCase(); console.log('Text:'); console.log(text); console.log('\n'); const dayConfigs = getDayNamesForCountry('PL'); const dayPatterns = buildDayPatterns(dayConfigs); const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length); const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|'); console.log('=== Testing individual day matching ===\n'); // Test niedziela specifically const niedziela = 'niedziela'; const escaped = niedziela.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const regex = new RegExp( `(?:^|\\s|[,;:])${escaped}[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`, 'i' ); const match = text.match(regex); if (match) { console.log(`✓ niedziela matched!`); console.log(` Full match: "${match[0].substring(0, 100)}"`); console.log(` Captured text: "${match[1].substring(0, 100)}"`); console.log(''); // Test if times can be extracted from captured text const spacePattern = /\b(\d{1,2})\s+(\d{2})(?!\d)/g; const times = match[1].match(spacePattern); console.log(` Times in captured text: ${times ? times.join(', ') : 'none'}`); } else { console.log(`✗ niedziela NOT matched`); console.log(''); // Try simpler regex const simpleRegex = /niedziela[:\s]+(.{0,100})/i; const simpleMatch = text.match(simpleRegex); if (simpleMatch) { console.log(`Simple regex matched: "${simpleMatch[1]}"`); } } // Test poniedziałek console.log('\n=== Testing poniedziałek ===\n'); const ponieRegex = new RegExp( `(?:^|\\s|[,;:])poniedziałek[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`, 'i' ); const ponieMatch = text.match(ponieRegex); if (ponieMatch) { console.log(`✓ poniedziałek matched!`); console.log(` Captured text: "${ponieMatch[1].substring(0, 100)}"`); const times = ponieMatch[1].match(/\b(\d{1,2})\s+(\d{2})(?!\d)/g); console.log(` Times: ${times ? times.join(', ') : 'none'}`); } else { console.log(`✗ poniedziałek NOT matched`); }