Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
71 lines
2.3 KiB
TypeScript
71 lines
2.3 KiB
TypeScript
#!/usr/bin/env tsx
|
|
/**
|
|
* Test which sections are being created for Polish church
|
|
*/
|
|
|
|
import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names';
|
|
|
|
// Exact text from the page
|
|
const text = `msze święte niedziela i uroczystości: 8 00 , 9 30 (lubojenka), 11 00 , 16 00 w lipcu i sierpniu nie ma mszy popołudniowej!--> dni powszednie: poniedziałek: godz. 8 00 wtorek - sobota: godz. 18 00`.toLowerCase();
|
|
|
|
console.log('Text:');
|
|
console.log(text);
|
|
console.log('\n');
|
|
|
|
const dayConfigs = getDayNamesForCountry('PL');
|
|
const dayPatterns = buildDayPatterns(dayConfigs);
|
|
const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length);
|
|
const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|');
|
|
|
|
console.log('=== Testing individual day matching ===\n');
|
|
|
|
// Test niedziela specifically
|
|
const niedziela = 'niedziela';
|
|
const escaped = niedziela.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
const regex = new RegExp(
|
|
`(?:^|\\s|[,;:])${escaped}[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`,
|
|
'i'
|
|
);
|
|
|
|
const match = text.match(regex);
|
|
if (match) {
|
|
console.log(`✓ niedziela matched!`);
|
|
console.log(` Full match: "${match[0].substring(0, 100)}"`);
|
|
console.log(` Captured text: "${match[1].substring(0, 100)}"`);
|
|
console.log('');
|
|
|
|
// Test if times can be extracted from captured text
|
|
const spacePattern = /\b(\d{1,2})\s+(\d{2})(?!\d)/g;
|
|
const times = match[1].match(spacePattern);
|
|
console.log(` Times in captured text: ${times ? times.join(', ') : 'none'}`);
|
|
} else {
|
|
console.log(`✗ niedziela NOT matched`);
|
|
console.log('');
|
|
|
|
// Try simpler regex
|
|
const simpleRegex = /niedziela[:\s]+(.{0,100})/i;
|
|
const simpleMatch = text.match(simpleRegex);
|
|
if (simpleMatch) {
|
|
console.log(`Simple regex matched: "${simpleMatch[1]}"`);
|
|
}
|
|
}
|
|
|
|
// Test poniedziałek
|
|
console.log('\n=== Testing poniedziałek ===\n');
|
|
|
|
const ponieRegex = new RegExp(
|
|
`(?:^|\\s|[,;:])poniedziałek[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`,
|
|
'i'
|
|
);
|
|
|
|
const ponieMatch = text.match(ponieRegex);
|
|
if (ponieMatch) {
|
|
console.log(`✓ poniedziałek matched!`);
|
|
console.log(` Captured text: "${ponieMatch[1].substring(0, 100)}"`);
|
|
|
|
const times = ponieMatch[1].match(/\b(\d{1,2})\s+(\d{2})(?!\d)/g);
|
|
console.log(` Times: ${times ? times.join(', ') : 'none'}`);
|
|
} else {
|
|
console.log(`✗ poniedziałek NOT matched`);
|
|
}
|