Files
ScraperControl/scripts/debug/test-time-extraction.ts
Albert 2c51513851 chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored
local-only files: web scrapers, admin dashboard, ChromaDB integration,
debug scripts, and utility libraries that aren't tracked in Gitea.

Gitea master adds: discovermass, buscarmisas-network, hk-parishes,
bohosluzby, kerknet, gottesdienstzeiten, miserend importers,
ClaimRequest model, forward geocoding, heartbeat healthcheck.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-12 19:11:22 -04:00

50 lines
1.7 KiB
TypeScript

#!/usr/bin/env tsx
/**
* Test which pattern is matching "00" time
*/
// Test text from German church
const testText = "10:00 uhr lateinisches amt";
const timePatterns = [
{ name: '12-hour AM/PM', pattern: /(\d{1,2}):(\d{2})\s*(AM|PM|am|pm|a\.m\.|p\.m\.)/g },
{ name: '12-hour no minutes', pattern: /(?<![:\d])(\d{1,2})\s*(AM|PM|am|pm|a\.m\.|p\.m\.)/g },
{ name: '24-hour colon', pattern: /(?<![:\d\w])(\d{1,2}):(\d{2})(?!\s*(AM|PM|am|pm))/g },
{ name: 'French/Portuguese h', pattern: /(?<![:\d\w])(\d{1,2})\s*h\s*(\d{2})?(?!\w)/gi },
{ name: 'Italian period', pattern: /(?<![:\d\w])(\d{1,2})\.(\d{2})(?=\s|$|,|;|\)|\])/g },
{ name: 'German Uhr (old)', pattern: /(\d{1,2})[:\.]?(\d{2})?\s*Uhr/gi },
{ name: 'German Uhr (fixed)', pattern: /(?<![:\d])(\d{1,2})[:\.]?(\d{2})?\s*Uhr/gi },
{ name: 'Polish space', pattern: /\b(\d{1,2})\s+(\d{2})(?!\d)/g },
];
console.log(`Test text: "${testText}"\n`);
for (const { name, pattern } of timePatterns) {
const matches = [...testText.matchAll(pattern)];
if (matches.length > 0) {
console.log(`${name}:`);
for (const match of matches) {
console.log(` Matched: "${match[0]}" at index ${match.index}`);
}
} else {
console.log(`${name}: no match`);
}
}
// Now test with just "00 uhr"
console.log(`\n${'='.repeat(60)}\n`);
const testText2 = "00 uhr lateinisches";
console.log(`Test text: "${testText2}"\n`);
for (const { name, pattern } of timePatterns) {
const matches = [...testText2.matchAll(pattern)];
if (matches.length > 0) {
console.log(`${name}:`);
for (const match of matches) {
console.log(` Matched: "${match[0]}" at index ${match.index}`);
}
} else {
console.log(`${name}: no match`);
}
}