Files
ScraperControl/scripts/debug/find-office-hours-pattern.ts
Albert 2c51513851 chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored
local-only files: web scrapers, admin dashboard, ChromaDB integration,
debug scripts, and utility libraries that aren't tracked in Gitea.

Gitea master adds: discovermass, buscarmisas-network, hk-parishes,
bohosluzby, kerknet, gottesdienstzeiten, miserend importers,
ClaimRequest model, forward geocoding, heartbeat healthcheck.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-12 19:11:22 -04:00

43 lines
1.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env tsx
import { GenericScraper } from '../../src/scrapers/strategies/generic';
async function main() {
const scraper = new GenericScraper();
await scraper.init();
scraper.setCountry('DE');
const result = await scraper.scrape('https://www.alterpeter.de/');
if (result.rawHtml) {
const text = result.rawHtml
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.toLowerCase();
const idx = text.indexOf('9.00 12.00');
if (idx !== -1) {
console.log('Context around "9.00 12.00":');
console.log(text.substring(Math.max(0, idx - 150), idx + 200));
} else {
console.log('Pattern "9.00 12.00" not found');
// Try alternative patterns
const patterns = ['9.00', '9:00', '09:00', '09.00'];
for (const pattern of patterns) {
const idx2 = text.indexOf(pattern);
if (idx2 !== -1) {
console.log(`\nFound "${pattern}" at position ${idx2}:`);
console.log(text.substring(Math.max(0, idx2 - 100), idx2 + 150));
break;
}
}
}
}
await scraper.close();
}
main().catch(console.error);