Files
ScraperControl/scripts/debug/check-german-office-hours.ts

46 lines
1.3 KiB
TypeScript
Raw Permalink Normal View History

#!/usr/bin/env tsx
/**
* Check the full section text for German church to understand office hours pattern
*/
import { GenericScraper } from '../../src/scrapers/strategies/generic';
async function checkGerman() {
const scraper = new GenericScraper();
await scraper.init();
scraper.setCountry('DE');
const result = await scraper.scrape('https://www.alterpeter.de/');
if (result.rawHtml) {
const text = result.rawHtml
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.toLowerCase();
// Find Monday section
const montagIndex = text.indexOf('montag');
if (montagIndex !== -1) {
const montagContext = text.substring(montagIndex, montagIndex + 200);
console.log('=== Monday (Montag) context ===');
console.log(montagContext);
console.log('');
}
// Find Sunday section
const sonntagIndex = text.indexOf('sonntag');
if (sonntagIndex !== -1) {
const sonntagContext = text.substring(sonntagIndex, sonntagIndex + 300);
console.log('=== Sunday (Sonntag) context ===');
console.log(sonntagContext);
console.log('');
}
}
await scraper.close();
}
checkGerman().catch(console.error);