Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
57 lines
1.8 KiB
TypeScript
57 lines
1.8 KiB
TypeScript
#!/usr/bin/env tsx
|
||
/**
|
||
* Debug which sections are being found
|
||
*/
|
||
|
||
import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names';
|
||
|
||
// Simulate the exact text from the page
|
||
const scheduleText = `
|
||
horário das missas igreja matriz de santo antônio
|
||
segundas, terças, quartas e sextas-feiras: 16h e 18h.
|
||
quintas-feiras: 16h e 19h (adoração ao santíssimo – 18h).
|
||
sábados: 8h, 16h e 18h.
|
||
domingos: 8h, 11h, 16h, 18h e 20h.
|
||
`.toLowerCase();
|
||
|
||
console.log('Text to parse:');
|
||
console.log(scheduleText);
|
||
console.log('');
|
||
|
||
const dayConfigs = getDayNamesForCountry('BR');
|
||
const dayPatterns = buildDayPatterns(dayConfigs);
|
||
const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length);
|
||
const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|');
|
||
|
||
console.log('=== COMMA-SEPARATED GROUP MATCHING ===\n');
|
||
|
||
const dayGroupRegex = new RegExp(
|
||
`((?:${allDayNamesPattern})(?:[,\\s]+(?:e|and|et|und|y)?\\s*(?:${allDayNamesPattern}))+)[:\\s]+([^]*?)(?=(?:${allDayNamesPattern})|$)`,
|
||
'gi'
|
||
);
|
||
|
||
let groupMatch;
|
||
let matchCount = 0;
|
||
while ((groupMatch = dayGroupRegex.exec(scheduleText)) !== null) {
|
||
matchCount++;
|
||
console.log(`Match #${matchCount}:`);
|
||
console.log(` Day group: "${groupMatch[1]}"`);
|
||
console.log(` Time text: "${groupMatch[2]}"`);
|
||
console.log('');
|
||
}
|
||
|
||
console.log('=== INDIVIDUAL DAY MATCHING ===\n');
|
||
|
||
for (const [dayName, dayIndex] of Object.entries(dayPatterns)) {
|
||
const escaped = dayName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
const regex = new RegExp(
|
||
`(?:^|\\s|[,;:])${escaped}[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`,
|
||
'i'
|
||
);
|
||
const match = scheduleText.match(regex);
|
||
if (match) {
|
||
console.log(`Found ${dayName} (day ${dayIndex}):`);
|
||
console.log(` Time text: "${match[1].substring(0, 100)}"`);
|
||
}
|
||
}
|