import { config } from 'dotenv'; import { PrismaClient } from '@prisma/client'; import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; // Load .env.local first, then .env config({ path: '.env.local' }); config({ path: '.env' }); const connectionString = process.env.DATABASE_URL; if (!connectionString) { throw new Error('DATABASE_URL environment variable is not set'); } const pool = new Pool({ connectionString }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); async function checkScraperStatus() { try { console.log('Checking mass schedule scraper status...\n'); // Overall church stats const totalChurches = await prisma.church.count(); const churchesWithWebsites = await prisma.church.count({ where: { OR: [ { website: { not: null } }, { massScheduleUrl: { not: null } }, ], }, }); const churchesScraped = await prisma.church.count({ where: { lastScrapedAt: { not: null } }, }); // Mass schedule stats const totalMassSchedules = await prisma.massSchedule.count(); const churchesWithSchedules = await prisma.church.count({ where: { massSchedules: { some: {}, }, }, }); // Recently scraped (last 7 days) const weekAgo = new Date(); weekAgo.setDate(weekAgo.getDate() - 7); const recentlyScraped = await prisma.church.count({ where: { lastScrapedAt: { gte: weekAgo }, }, }); // Get scraper sources const bySource = await prisma.church.groupBy({ by: ['source'], _count: { id: true, }, }); console.log('═══════════════════════════════════════════════════════════════'); console.log('CHURCH DATA SOURCES'); console.log('═══════════════════════════════════════════════════════════════'); bySource.forEach((source) => { const percent = ((source._count.id / totalChurches) * 100).toFixed(1); console.log(`${source.source.padEnd(12)} | ${String(source._count.id).padStart(7)} churches (${percent}%)`); }); console.log(''); console.log('═══════════════════════════════════════════════════════════════'); console.log('MASS SCHEDULE SCRAPING STATUS'); console.log('═══════════════════════════════════════════════════════════════'); console.log(`Total churches: ${totalChurches.toLocaleString()}`); console.log(`Churches with websites: ${churchesWithWebsites.toLocaleString()} (${((churchesWithWebsites / totalChurches) * 100).toFixed(1)}%)`); console.log(`Churches ever scraped: ${churchesScraped.toLocaleString()} (${((churchesScraped / totalChurches) * 100).toFixed(1)}%)`); console.log(`Churches with mass schedules: ${churchesWithSchedules.toLocaleString()} (${((churchesWithSchedules / totalChurches) * 100).toFixed(1)}%)`); console.log(`Total mass schedules: ${totalMassSchedules.toLocaleString()}`); console.log(''); console.log(`Scraped in last 7 days: ${recentlyScraped.toLocaleString()}`); console.log(''); // Average schedules per church if (churchesWithSchedules > 0) { const avgSchedules = totalMassSchedules / churchesWithSchedules; console.log(`Average schedules per church: ${avgSchedules.toFixed(1)} masses/week`); console.log(''); } // Get sample of recently scraped churches const recentSample = await prisma.church.findMany({ where: { lastScrapedAt: { not: null }, }, select: { name: true, city: true, state: true, country: true, lastScrapedAt: true, website: true, source: true, _count: { select: { massSchedules: true, }, }, }, orderBy: { lastScrapedAt: 'desc' }, take: 10, }); console.log('═══════════════════════════════════════════════════════════════'); console.log('RECENTLY SCRAPED CHURCHES (Last 10)'); console.log('═══════════════════════════════════════════════════════════════'); if (recentSample.length === 0) { console.log('No churches have been scraped yet.'); } else { recentSample.forEach((church, index) => { const location = [church.city, church.state, church.country].filter(Boolean).join(', '); console.log(`${index + 1}. ${church.name} (${location})`); console.log(` Source: ${church.source}`); console.log(` Website: ${church.website || 'None'}`); console.log(` Last scraped: ${church.lastScrapedAt?.toLocaleString() || 'Never'}`); console.log(` Mass schedules: ${church._count.massSchedules}`); console.log(''); }); } // Churches ready to scrape (have website, not scraped) const readyToScrape = await prisma.church.count({ where: { OR: [ { website: { not: null } }, { massScheduleUrl: { not: null } }, ], lastScrapedAt: null, }, }); console.log('═══════════════════════════════════════════════════════════════'); console.log('SCRAPING POTENTIAL'); console.log('═══════════════════════════════════════════════════════════════'); console.log(`Churches ready to scrape: ${readyToScrape.toLocaleString()}`); console.log(` (have website, never scraped)`); console.log(''); } catch (error) { console.error('Error:', error); process.exit(1); } finally { await prisma.$disconnect(); await pool.end(); } } checkScraperStatus();