101 lines
3.2 KiB
TypeScript
101 lines
3.2 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
||
|
|
/**
|
||
|
|
* Test international scraper against French churches
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { config } from 'dotenv';
|
||
|
|
config({ path: '.env.local' });
|
||
|
|
config({ path: '.env' });
|
||
|
|
|
||
|
|
import { Pool } from 'pg';
|
||
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
||
|
|
import { PrismaClient } from '@prisma/client';
|
||
|
|
import { GenericScraper } from '../../src/scrapers/strategies/generic';
|
||
|
|
|
||
|
|
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||
|
|
const adapter = new PrismaPg(pool);
|
||
|
|
const prisma = new PrismaClient({ adapter });
|
||
|
|
|
||
|
|
async function testFrenchScraper() {
|
||
|
|
console.log('Testing French church mass schedule scraping...\n');
|
||
|
|
|
||
|
|
// Get French churches with websites
|
||
|
|
const churches = await prisma.church.findMany({
|
||
|
|
where: {
|
||
|
|
country: 'FR',
|
||
|
|
website: { not: null },
|
||
|
|
source: 'osm',
|
||
|
|
},
|
||
|
|
take: 5,
|
||
|
|
orderBy: { createdAt: 'asc' },
|
||
|
|
});
|
||
|
|
|
||
|
|
if (churches.length === 0) {
|
||
|
|
console.log('No French churches with websites found.');
|
||
|
|
await prisma.$disconnect();
|
||
|
|
await pool.end();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Found ${churches.length} French churches to test:\n`);
|
||
|
|
|
||
|
|
const scraper = new GenericScraper();
|
||
|
|
await scraper.init();
|
||
|
|
scraper.setCountry('FR');
|
||
|
|
|
||
|
|
let successCount = 0;
|
||
|
|
let failCount = 0;
|
||
|
|
|
||
|
|
for (const church of churches) {
|
||
|
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||
|
|
console.log(`Church: ${church.name}`);
|
||
|
|
console.log(`City: ${church.city || 'Unknown'}`);
|
||
|
|
console.log(`URL: ${church.website}`);
|
||
|
|
console.log('');
|
||
|
|
|
||
|
|
try {
|
||
|
|
const result = await scraper.scrape(church.website!);
|
||
|
|
|
||
|
|
if (result.success && result.schedules.length > 0) {
|
||
|
|
successCount++;
|
||
|
|
console.log(`✅ SUCCESS - Found ${result.schedules.length} schedules\n`);
|
||
|
|
|
||
|
|
// Group by day and show
|
||
|
|
const byDay: Record<number, typeof result.schedules> = {};
|
||
|
|
for (const sched of result.schedules) {
|
||
|
|
if (!byDay[sched.dayOfWeek]) byDay[sched.dayOfWeek] = [];
|
||
|
|
byDay[sched.dayOfWeek].push(sched);
|
||
|
|
}
|
||
|
|
|
||
|
|
const dayNames = ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'];
|
||
|
|
Object.entries(byDay).forEach(([day, scheds]) => {
|
||
|
|
console.log(` ${dayNames[parseInt(day)]}:`);
|
||
|
|
scheds.forEach(s => {
|
||
|
|
console.log(` ${s.time} - ${s.language || 'Unknown'} (${s.massType || 'Mass'})`);
|
||
|
|
});
|
||
|
|
});
|
||
|
|
console.log('');
|
||
|
|
} else {
|
||
|
|
failCount++;
|
||
|
|
console.log(`❌ FAILED - ${result.error}`);
|
||
|
|
console.log('');
|
||
|
|
}
|
||
|
|
} catch (err: any) {
|
||
|
|
failCount++;
|
||
|
|
console.log(`❌ ERROR - ${err.message}`);
|
||
|
|
console.log('');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
await scraper.close();
|
||
|
|
|
||
|
|
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||
|
|
console.log(`\nRESULTS: ${successCount}/${churches.length} successful (${((successCount / churches.length) * 100).toFixed(0)}%)`);
|
||
|
|
console.log(`Success: ${successCount}, Failed: ${failCount}\n`);
|
||
|
|
|
||
|
|
await prisma.$disconnect();
|
||
|
|
await pool.end();
|
||
|
|
}
|
||
|
|
|
||
|
|
testFrenchScraper().catch(console.error);
|