45 lines
1.4 KiB
TypeScript
45 lines
1.4 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
||
|
|
import { GenericScraper } from '../../src/scrapers/strategies/generic';
|
||
|
|
|
||
|
|
async function main() {
|
||
|
|
const scraper = new GenericScraper();
|
||
|
|
await scraper.init();
|
||
|
|
scraper.setCountry('DE');
|
||
|
|
|
||
|
|
const result = await scraper.scrape('https://www.alterpeter.de/');
|
||
|
|
|
||
|
|
if (result.rawHtml) {
|
||
|
|
const text = result.rawHtml
|
||
|
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
||
|
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
||
|
|
.replace(/<[^>]+>/g, ' ')
|
||
|
|
.replace(/\s+/g, ' ')
|
||
|
|
.toLowerCase();
|
||
|
|
|
||
|
|
// Find "montag bis donnerstag" pattern
|
||
|
|
const pattern = /montag[^]*?bis[^]*?donnerstag/gi;
|
||
|
|
const matches = [...text.matchAll(pattern)];
|
||
|
|
|
||
|
|
console.log(`Found ${matches.length} instances of "montag bis donnerstag":\n`);
|
||
|
|
|
||
|
|
for (let i = 0; i < matches.length; i++) {
|
||
|
|
const match = matches[i];
|
||
|
|
const matchIndex = match.index || 0;
|
||
|
|
const contextBefore = text.substring(Math.max(0, matchIndex - 150), matchIndex);
|
||
|
|
const contextAfter = text.substring(matchIndex, Math.min(text.length, matchIndex + 250));
|
||
|
|
|
||
|
|
console.log(`=== Instance ${i + 1} ===`);
|
||
|
|
console.log(`Position: ${matchIndex}`);
|
||
|
|
console.log(`\nContext BEFORE (150 chars):`);
|
||
|
|
console.log(`"${contextBefore}"`);
|
||
|
|
console.log(`\nContext AFTER (250 chars):`);
|
||
|
|
console.log(`"${contextAfter}"`);
|
||
|
|
console.log('');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
await scraper.close();
|
||
|
|
}
|
||
|
|
|
||
|
|
main().catch(console.error);
|