43 lines
1.2 KiB
TypeScript
43 lines
1.2 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
|||
|
|
import { GenericScraper } from '../../src/scrapers/strategies/generic';
|
|||
|
|
|
|||
|
|
async function main() {
|
|||
|
|
const scraper = new GenericScraper();
|
|||
|
|
await scraper.init();
|
|||
|
|
scraper.setCountry('DE');
|
|||
|
|
|
|||
|
|
const result = await scraper.scrape('https://www.alterpeter.de/');
|
|||
|
|
|
|||
|
|
if (result.rawHtml) {
|
|||
|
|
const text = result.rawHtml
|
|||
|
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|||
|
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|||
|
|
.replace(/<[^>]+>/g, ' ')
|
|||
|
|
.replace(/\s+/g, ' ')
|
|||
|
|
.toLowerCase();
|
|||
|
|
|
|||
|
|
const idx = text.indexOf('9.00 – 12.00');
|
|||
|
|
if (idx !== -1) {
|
|||
|
|
console.log('Context around "9.00 – 12.00":');
|
|||
|
|
console.log(text.substring(Math.max(0, idx - 150), idx + 200));
|
|||
|
|
} else {
|
|||
|
|
console.log('Pattern "9.00 – 12.00" not found');
|
|||
|
|
|
|||
|
|
// Try alternative patterns
|
|||
|
|
const patterns = ['9.00', '9:00', '09:00', '09.00'];
|
|||
|
|
for (const pattern of patterns) {
|
|||
|
|
const idx2 = text.indexOf(pattern);
|
|||
|
|
if (idx2 !== -1) {
|
|||
|
|
console.log(`\nFound "${pattern}" at position ${idx2}:`);
|
|||
|
|
console.log(text.substring(Math.max(0, idx2 - 100), idx2 + 150));
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
await scraper.close();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
main().catch(console.error);
|