chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
49
scripts/debug/test-time-extraction.ts
Normal file
49
scripts/debug/test-time-extraction.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Test which pattern is matching "00" time
|
||||
*/
|
||||
|
||||
// Test text from German church
|
||||
const testText = "10:00 uhr lateinisches amt";
|
||||
|
||||
const timePatterns = [
|
||||
{ name: '12-hour AM/PM', pattern: /(\d{1,2}):(\d{2})\s*(AM|PM|am|pm|a\.m\.|p\.m\.)/g },
|
||||
{ name: '12-hour no minutes', pattern: /(?<![:\d])(\d{1,2})\s*(AM|PM|am|pm|a\.m\.|p\.m\.)/g },
|
||||
{ name: '24-hour colon', pattern: /(?<![:\d\w])(\d{1,2}):(\d{2})(?!\s*(AM|PM|am|pm))/g },
|
||||
{ name: 'French/Portuguese h', pattern: /(?<![:\d\w])(\d{1,2})\s*h\s*(\d{2})?(?!\w)/gi },
|
||||
{ name: 'Italian period', pattern: /(?<![:\d\w])(\d{1,2})\.(\d{2})(?=\s|$|,|;|\)|\])/g },
|
||||
{ name: 'German Uhr (old)', pattern: /(\d{1,2})[:\.]?(\d{2})?\s*Uhr/gi },
|
||||
{ name: 'German Uhr (fixed)', pattern: /(?<![:\d])(\d{1,2})[:\.]?(\d{2})?\s*Uhr/gi },
|
||||
{ name: 'Polish space', pattern: /\b(\d{1,2})\s+(\d{2})(?!\d)/g },
|
||||
];
|
||||
|
||||
console.log(`Test text: "${testText}"\n`);
|
||||
|
||||
for (const { name, pattern } of timePatterns) {
|
||||
const matches = [...testText.matchAll(pattern)];
|
||||
if (matches.length > 0) {
|
||||
console.log(`✓ ${name}:`);
|
||||
for (const match of matches) {
|
||||
console.log(` Matched: "${match[0]}" at index ${match.index}`);
|
||||
}
|
||||
} else {
|
||||
console.log(`✗ ${name}: no match`);
|
||||
}
|
||||
}
|
||||
|
||||
// Now test with just "00 uhr"
|
||||
console.log(`\n${'='.repeat(60)}\n`);
|
||||
const testText2 = "00 uhr lateinisches";
|
||||
console.log(`Test text: "${testText2}"\n`);
|
||||
|
||||
for (const { name, pattern } of timePatterns) {
|
||||
const matches = [...testText2.matchAll(pattern)];
|
||||
if (matches.length > 0) {
|
||||
console.log(`✓ ${name}:`);
|
||||
for (const match of matches) {
|
||||
console.log(` Matched: "${match[0]}" at index ${match.index}`);
|
||||
}
|
||||
} else {
|
||||
console.log(`✗ ${name}: no match`);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user