57 lines
1.8 KiB
TypeScript
57 lines
1.8 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
|||
|
|
/**
|
|||
|
|
* Debug which sections are being found
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
import { getDayNamesForCountry, buildDayPatterns } from '../../src/scrapers/i18n/day-names';
|
|||
|
|
|
|||
|
|
// Simulate the exact text from the page
|
|||
|
|
const scheduleText = `
|
|||
|
|
horário das missas igreja matriz de santo antônio
|
|||
|
|
segundas, terças, quartas e sextas-feiras: 16h e 18h.
|
|||
|
|
quintas-feiras: 16h e 19h (adoração ao santíssimo – 18h).
|
|||
|
|
sábados: 8h, 16h e 18h.
|
|||
|
|
domingos: 8h, 11h, 16h, 18h e 20h.
|
|||
|
|
`.toLowerCase();
|
|||
|
|
|
|||
|
|
console.log('Text to parse:');
|
|||
|
|
console.log(scheduleText);
|
|||
|
|
console.log('');
|
|||
|
|
|
|||
|
|
const dayConfigs = getDayNamesForCountry('BR');
|
|||
|
|
const dayPatterns = buildDayPatterns(dayConfigs);
|
|||
|
|
const sortedDayNames = Object.keys(dayPatterns).sort((a, b) => b.length - a.length);
|
|||
|
|
const allDayNamesPattern = sortedDayNames.map(d => d.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|');
|
|||
|
|
|
|||
|
|
console.log('=== COMMA-SEPARATED GROUP MATCHING ===\n');
|
|||
|
|
|
|||
|
|
const dayGroupRegex = new RegExp(
|
|||
|
|
`((?:${allDayNamesPattern})(?:[,\\s]+(?:e|and|et|und|y)?\\s*(?:${allDayNamesPattern}))+)[:\\s]+([^]*?)(?=(?:${allDayNamesPattern})|$)`,
|
|||
|
|
'gi'
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
let groupMatch;
|
|||
|
|
let matchCount = 0;
|
|||
|
|
while ((groupMatch = dayGroupRegex.exec(scheduleText)) !== null) {
|
|||
|
|
matchCount++;
|
|||
|
|
console.log(`Match #${matchCount}:`);
|
|||
|
|
console.log(` Day group: "${groupMatch[1]}"`);
|
|||
|
|
console.log(` Time text: "${groupMatch[2]}"`);
|
|||
|
|
console.log('');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
console.log('=== INDIVIDUAL DAY MATCHING ===\n');
|
|||
|
|
|
|||
|
|
for (const [dayName, dayIndex] of Object.entries(dayPatterns)) {
|
|||
|
|
const escaped = dayName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|||
|
|
const regex = new RegExp(
|
|||
|
|
`(?:^|\\s|[,;:])${escaped}[:\\s]+([^]*?)(?=${allDayNamesPattern}|$)`,
|
|||
|
|
'i'
|
|||
|
|
);
|
|||
|
|
const match = scheduleText.match(regex);
|
|||
|
|
if (match) {
|
|||
|
|
console.log(`Found ${dayName} (day ${dayIndex}):`);
|
|||
|
|
console.log(` Time text: "${match[1].substring(0, 100)}"`);
|
|||
|
|
}
|
|||
|
|
}
|