chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
70
scripts/debug/test-polish-sections.ts
Normal file
70
scripts/debug/test-polish-sections.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Test which sections are being created for Polish church
|
||||
*/
|
||||
|
||||
import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names';
|
||||
|
||||
// Exact text from the page
|
||||
const text = `msze święte niedziela i uroczystości: 8 00 , 9 30 (lubojenka), 11 00 , 16 00 w lipcu i sierpniu nie ma mszy popołudniowej!--> dni powszednie: poniedziałek: godz. 8 00 wtorek - sobota: godz. 18 00`.toLowerCase();
|
||||
|
||||
console.log('Text:');
|
||||
console.log(text);
|
||||
console.log('\n');
|
||||
|
||||
const dayConfigs = getDayNamesForCountry('PL');
|
||||
const dayPatterns = buildDayPatterns(dayConfigs);
|
||||
const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length);
|
||||
const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|');
|
||||
|
||||
console.log('=== Testing individual day matching ===\n');
|
||||
|
||||
// Test niedziela specifically
|
||||
const niedziela = 'niedziela';
|
||||
const escaped = niedziela.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const regex = new RegExp(
|
||||
`(?:^|\\s|[,;:])${escaped}[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const match = text.match(regex);
|
||||
if (match) {
|
||||
console.log(`✓ niedziela matched!`);
|
||||
console.log(` Full match: "${match[0].substring(0, 100)}"`);
|
||||
console.log(` Captured text: "${match[1].substring(0, 100)}"`);
|
||||
console.log('');
|
||||
|
||||
// Test if times can be extracted from captured text
|
||||
const spacePattern = /\b(\d{1,2})\s+(\d{2})(?!\d)/g;
|
||||
const times = match[1].match(spacePattern);
|
||||
console.log(` Times in captured text: ${times ? times.join(', ') : 'none'}`);
|
||||
} else {
|
||||
console.log(`✗ niedziela NOT matched`);
|
||||
console.log('');
|
||||
|
||||
// Try simpler regex
|
||||
const simpleRegex = /niedziela[:\s]+(.{0,100})/i;
|
||||
const simpleMatch = text.match(simpleRegex);
|
||||
if (simpleMatch) {
|
||||
console.log(`Simple regex matched: "${simpleMatch[1]}"`);
|
||||
}
|
||||
}
|
||||
|
||||
// Test poniedziałek
|
||||
console.log('\n=== Testing poniedziałek ===\n');
|
||||
|
||||
const ponieRegex = new RegExp(
|
||||
`(?:^|\\s|[,;:])poniedziałek[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`,
|
||||
'i'
|
||||
);
|
||||
|
||||
const ponieMatch = text.match(ponieRegex);
|
||||
if (ponieMatch) {
|
||||
console.log(`✓ poniedziałek matched!`);
|
||||
console.log(` Captured text: "${ponieMatch[1].substring(0, 100)}"`);
|
||||
|
||||
const times = ponieMatch[1].match(/\b(\d{1,2})\s+(\d{2})(?!\d)/g);
|
||||
console.log(` Times: ${times ? times.join(', ') : 'none'}`);
|
||||
} else {
|
||||
console.log(`✗ poniedziałek NOT matched`);
|
||||
}
|
||||
Reference in New Issue
Block a user