162 lines
4.4 KiB
TypeScript
162 lines
4.4 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
||
|
|
/**
|
||
|
|
* Save mass schedules to database using scrapeChurch() service
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { config } from 'dotenv';
|
||
|
|
config({ path: '.env.local' });
|
||
|
|
config({ path: '.env' });
|
||
|
|
|
||
|
|
import { scrapeChurch } from '../src/lib/scraper-service';
|
||
|
|
import { prisma } from '../src/lib/db';
|
||
|
|
|
||
|
|
const PRIORITY_COUNTRIES = ['FR', 'DE', 'ES', 'PL', 'BR'];
|
||
|
|
const CHURCHES_PER_COUNTRY = 5; // Start small to verify it works
|
||
|
|
|
||
|
|
interface ScrapeResult {
|
||
|
|
churchId: string;
|
||
|
|
churchName: string;
|
||
|
|
country: string;
|
||
|
|
success: boolean;
|
||
|
|
schedulesCreated: number;
|
||
|
|
error?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function saveSchedulesToDb() {
|
||
|
|
console.log('Starting database save operation...\n');
|
||
|
|
console.log(`Target: ${CHURCHES_PER_COUNTRY} churches per country`);
|
||
|
|
console.log(`Countries: ${PRIORITY_COUNTRIES.join(', ')}\n`);
|
||
|
|
|
||
|
|
const results: ScrapeResult[] = [];
|
||
|
|
let totalChurches = 0;
|
||
|
|
let totalSuccess = 0;
|
||
|
|
let totalSchedules = 0;
|
||
|
|
|
||
|
|
for (const country of PRIORITY_COUNTRIES) {
|
||
|
|
console.log(`\n${'='.repeat(60)}`);
|
||
|
|
console.log(`${country} - Finding churches to scrape...`);
|
||
|
|
console.log('='.repeat(60));
|
||
|
|
|
||
|
|
// Get churches with websites that haven't been scraped yet
|
||
|
|
const churches = await prisma.church.findMany({
|
||
|
|
where: {
|
||
|
|
country,
|
||
|
|
website: { not: null },
|
||
|
|
source: 'osm',
|
||
|
|
lastScrapedAt: null, // Only unscrapped churches
|
||
|
|
},
|
||
|
|
take: CHURCHES_PER_COUNTRY,
|
||
|
|
orderBy: { createdAt: 'asc' },
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log(`Found ${churches.length} churches to scrape\n`);
|
||
|
|
|
||
|
|
for (let i = 0; i < churches.length; i++) {
|
||
|
|
const church = churches[i];
|
||
|
|
totalChurches++;
|
||
|
|
|
||
|
|
process.stdout.write(`[${i + 1}/${churches.length}] ${church.name.substring(0, 40).padEnd(40)} `);
|
||
|
|
|
||
|
|
try {
|
||
|
|
// Use the scrapeChurch service which saves to database
|
||
|
|
const result = await scrapeChurch(church.id);
|
||
|
|
|
||
|
|
if (result.success) {
|
||
|
|
totalSuccess++;
|
||
|
|
totalSchedules += result.schedulesCreated;
|
||
|
|
process.stdout.write(`✅ ${result.schedulesCreated} schedules saved\n`);
|
||
|
|
|
||
|
|
results.push({
|
||
|
|
churchId: church.id,
|
||
|
|
churchName: church.name,
|
||
|
|
country,
|
||
|
|
success: true,
|
||
|
|
schedulesCreated: result.schedulesCreated,
|
||
|
|
});
|
||
|
|
} else {
|
||
|
|
process.stdout.write(`❌ ${result.error}\n`);
|
||
|
|
|
||
|
|
results.push({
|
||
|
|
churchId: church.id,
|
||
|
|
churchName: church.name,
|
||
|
|
country,
|
||
|
|
success: false,
|
||
|
|
schedulesCreated: 0,
|
||
|
|
error: result.error,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
} catch (err: any) {
|
||
|
|
process.stdout.write(`❌ ERROR: ${err.message}\n`);
|
||
|
|
|
||
|
|
results.push({
|
||
|
|
churchId: church.id,
|
||
|
|
churchName: church.name,
|
||
|
|
country,
|
||
|
|
success: false,
|
||
|
|
schedulesCreated: 0,
|
||
|
|
error: err.message,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Final summary
|
||
|
|
console.log('\n\n');
|
||
|
|
console.log('═'.repeat(80));
|
||
|
|
console.log('DATABASE SAVE SUMMARY');
|
||
|
|
console.log('═'.repeat(80));
|
||
|
|
console.log('');
|
||
|
|
console.log(`Total churches processed: ${totalChurches}`);
|
||
|
|
console.log(`Successful scrapes: ${totalSuccess} (${((totalSuccess / totalChurches) * 100).toFixed(1)}%)`);
|
||
|
|
console.log(`Total schedules saved to database: ${totalSchedules}`);
|
||
|
|
console.log('');
|
||
|
|
|
||
|
|
// Verify database records
|
||
|
|
console.log('Verifying database records...\n');
|
||
|
|
|
||
|
|
const dbScheduleCount = await prisma.massSchedule.count();
|
||
|
|
const dbChurchesWithSchedules = await prisma.church.count({
|
||
|
|
where: {
|
||
|
|
massSchedules: {
|
||
|
|
some: {},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log(`✓ Total mass schedules in database: ${dbScheduleCount}`);
|
||
|
|
console.log(`✓ Churches with schedules: ${dbChurchesWithSchedules}`);
|
||
|
|
console.log('');
|
||
|
|
|
||
|
|
// Show sample of saved schedules
|
||
|
|
console.log('Sample of saved schedules:\n');
|
||
|
|
|
||
|
|
const sampleChurches = await prisma.church.findMany({
|
||
|
|
where: {
|
||
|
|
massSchedules: {
|
||
|
|
some: {},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
include: {
|
||
|
|
massSchedules: {
|
||
|
|
take: 3,
|
||
|
|
orderBy: { dayOfWeek: 'asc' },
|
||
|
|
},
|
||
|
|
},
|
||
|
|
take: 3,
|
||
|
|
});
|
||
|
|
|
||
|
|
const dayNames = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'];
|
||
|
|
|
||
|
|
sampleChurches.forEach(church => {
|
||
|
|
console.log(`${church.name} (${church.country}):`);
|
||
|
|
church.massSchedules.forEach(schedule => {
|
||
|
|
console.log(` ${dayNames[schedule.dayOfWeek]} ${schedule.time} - ${schedule.language} ${schedule.massType || ''}`);
|
||
|
|
});
|
||
|
|
console.log('');
|
||
|
|
});
|
||
|
|
|
||
|
|
await prisma.$disconnect();
|
||
|
|
}
|
||
|
|
|
||
|
|
saveSchedulesToDb().catch(console.error);
|