Files
ScraperControl/scripts/debug/test-french-scraper.ts

101 lines
3.2 KiB
TypeScript
Raw Normal View History

#!/usr/bin/env tsx
/**
* Test international scraper against French churches
*/
import { config } from 'dotenv';
config({ path: '.env.local' });
config({ path: '.env' });
import { Pool } from 'pg';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
import { GenericScraper } from '../../src/scrapers/strategies/generic';
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
async function testFrenchScraper() {
console.log('Testing French church mass schedule scraping...\n');
// Get French churches with websites
const churches = await prisma.church.findMany({
where: {
country: 'FR',
website: { not: null },
source: 'osm',
},
take: 5,
orderBy: { createdAt: 'asc' },
});
if (churches.length === 0) {
console.log('No French churches with websites found.');
await prisma.$disconnect();
await pool.end();
return;
}
console.log(`Found ${churches.length} French churches to test:\n`);
const scraper = new GenericScraper();
await scraper.init();
scraper.setCountry('FR');
let successCount = 0;
let failCount = 0;
for (const church of churches) {
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`Church: ${church.name}`);
console.log(`City: ${church.city || 'Unknown'}`);
console.log(`URL: ${church.website}`);
console.log('');
try {
const result = await scraper.scrape(church.website!);
if (result.success && result.schedules.length > 0) {
successCount++;
console.log(`✅ SUCCESS - Found ${result.schedules.length} schedules\n`);
// Group by day and show
const byDay: Record<number, typeof result.schedules> = {};
for (const sched of result.schedules) {
if (!byDay[sched.dayOfWeek]) byDay[sched.dayOfWeek] = [];
byDay[sched.dayOfWeek].push(sched);
}
const dayNames = ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'];
Object.entries(byDay).forEach(([day, scheds]) => {
console.log(` ${dayNames[parseInt(day)]}:`);
scheds.forEach(s => {
console.log(` ${s.time} - ${s.language || 'Unknown'} (${s.massType || 'Mass'})`);
});
});
console.log('');
} else {
failCount++;
console.log(`❌ FAILED - ${result.error}`);
console.log('');
}
} catch (err: any) {
failCount++;
console.log(`❌ ERROR - ${err.message}`);
console.log('');
}
}
await scraper.close();
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
console.log(`\nRESULTS: ${successCount}/${churches.length} successful (${((successCount / churches.length) * 100).toFixed(0)}%)`);
console.log(`Success: ${successCount}, Failed: ${failCount}\n`);
await prisma.$disconnect();
await pool.end();
}
testFrenchScraper().catch(console.error);