chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
100
scripts/debug/test-french-scraper.ts
Executable file
100
scripts/debug/test-french-scraper.ts
Executable file
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Test international scraper against French churches
|
||||
*/
|
||||
|
||||
import { config } from 'dotenv';
|
||||
config({ path: '.env.local' });
|
||||
config({ path: '.env' });
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import { GenericScraper } from '../../src/scrapers/strategies/generic';
|
||||
|
||||
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
async function testFrenchScraper() {
|
||||
console.log('Testing French church mass schedule scraping...\n');
|
||||
|
||||
// Get French churches with websites
|
||||
const churches = await prisma.church.findMany({
|
||||
where: {
|
||||
country: 'FR',
|
||||
website: { not: null },
|
||||
source: 'osm',
|
||||
},
|
||||
take: 5,
|
||||
orderBy: { createdAt: 'asc' },
|
||||
});
|
||||
|
||||
if (churches.length === 0) {
|
||||
console.log('No French churches with websites found.');
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${churches.length} French churches to test:\n`);
|
||||
|
||||
const scraper = new GenericScraper();
|
||||
await scraper.init();
|
||||
scraper.setCountry('FR');
|
||||
|
||||
let successCount = 0;
|
||||
let failCount = 0;
|
||||
|
||||
for (const church of churches) {
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Church: ${church.name}`);
|
||||
console.log(`City: ${church.city || 'Unknown'}`);
|
||||
console.log(`URL: ${church.website}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
const result = await scraper.scrape(church.website!);
|
||||
|
||||
if (result.success && result.schedules.length > 0) {
|
||||
successCount++;
|
||||
console.log(`✅ SUCCESS - Found ${result.schedules.length} schedules\n`);
|
||||
|
||||
// Group by day and show
|
||||
const byDay: Record<number, typeof result.schedules> = {};
|
||||
for (const sched of result.schedules) {
|
||||
if (!byDay[sched.dayOfWeek]) byDay[sched.dayOfWeek] = [];
|
||||
byDay[sched.dayOfWeek].push(sched);
|
||||
}
|
||||
|
||||
const dayNames = ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'];
|
||||
Object.entries(byDay).forEach(([day, scheds]) => {
|
||||
console.log(` ${dayNames[parseInt(day)]}:`);
|
||||
scheds.forEach(s => {
|
||||
console.log(` ${s.time} - ${s.language || 'Unknown'} (${s.massType || 'Mass'})`);
|
||||
});
|
||||
});
|
||||
console.log('');
|
||||
} else {
|
||||
failCount++;
|
||||
console.log(`❌ FAILED - ${result.error}`);
|
||||
console.log('');
|
||||
}
|
||||
} catch (err: any) {
|
||||
failCount++;
|
||||
console.log(`❌ ERROR - ${err.message}`);
|
||||
console.log('');
|
||||
}
|
||||
}
|
||||
|
||||
await scraper.close();
|
||||
|
||||
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||||
console.log(`\nRESULTS: ${successCount}/${churches.length} successful (${((successCount / churches.length) * 100).toFixed(0)}%)`);
|
||||
console.log(`Success: ${successCount}, Failed: ${failCount}\n`);
|
||||
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
testFrenchScraper().catch(console.error);
|
||||
Reference in New Issue
Block a user