import 'dotenv/config'; import { prisma } from '../src/lib/db'; import { MassTimesScraper, ChurchData } from '../src/lib/masstimes-scraper'; const TARGET_STATES = [ 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', ]; function deduplicateMassSchedules(schedules: T[]): T[] { const seen = new Map(); for (const s of schedules) { const key = `${s.dayOfWeek}:${s.time}:${s.language}`; if (!seen.has(key)) { seen.set(key, s); } } return Array.from(seen.values()); } async function saveChurch(data: ChurchData, seenIds: Set): Promise { if (seenIds.has(data.masstimesId)) { console.log(` Skipping duplicate: ${data.name}`); return false; } try { await prisma.$transaction(async (tx) => { const church = await tx.church.upsert({ where: { masstimesId: data.masstimesId }, create: { masstimesId: data.masstimesId, name: data.name, address: data.address, city: data.city, state: data.state, zip: data.zip, country: data.country, latitude: data.latitude, longitude: data.longitude, phone: data.phone, website: data.website, email: data.email, pastorName: data.pastorName, diocese: data.diocese, directions: data.directions, wheelchairAccess: data.wheelchairAccess, lastScrapedAt: new Date(), scrapeStrategy: 'masstimes', }, update: { name: data.name, address: data.address, city: data.city, state: data.state, zip: data.zip, latitude: data.latitude, longitude: data.longitude, phone: data.phone, website: data.website, email: data.email, pastorName: data.pastorName, diocese: data.diocese, directions: data.directions, wheelchairAccess: data.wheelchairAccess, lastScrapedAt: new Date(), }, }); await tx.massSchedule.deleteMany({ where: { churchId: church.id } }); await tx.confessionSchedule.deleteMany({ where: { churchId: church.id } }); await tx.adorationSchedule.deleteMany({ where: { churchId: church.id } }); if (data.massSchedules.length > 0) { await tx.massSchedule.createMany({ data: deduplicateMassSchedules(data.massSchedules).map((ms) => ({ churchId: church.id, dayOfWeek: ms.dayOfWeek, time: ms.time, massType: ms.massType, language: ms.language, notes: ms.notes, })), }); } if (data.confessionSchedules.length > 0) { await tx.confessionSchedule.createMany({ data: data.confessionSchedules.map((cs) => ({ churchId: church.id, dayOfWeek: cs.dayOfWeek, startTime: cs.startTime, endTime: cs.endTime, notes: cs.notes, })), }); } if (data.adorationSchedules.length > 0) { await tx.adorationSchedule.createMany({ data: data.adorationSchedules.map((as) => ({ churchId: church.id, dayOfWeek: as.dayOfWeek, startTime: as.startTime, endTime: as.endTime, isPerpetual: as.isPerpetual, notes: as.notes, })), }); } }); seenIds.add(data.masstimesId); console.log(` Saved: ${data.name}`); return true; } catch (error) { console.error(` Error saving ${data.name}:`, error); return false; } } async function main() { const seenIds = new Set(); console.log('\n' + '='.repeat(70)); console.log('MASSTIMES.ORG CHURCH SCRAPER (JSON API)'); console.log('='.repeat(70)); console.log(`\nTarget states: ${TARGET_STATES.length}`); console.log(`Time: ${new Date().toISOString()}`); console.log('\n' + '-'.repeat(70)); const scraper = new MassTimesScraper(); const stats = { total: 0, saved: 0, errors: 0 }; try { await scraper.init(); console.log('Browser initialized\n'); for (let i = 0; i < TARGET_STATES.length; i++) { const state = TARGET_STATES[i]; console.log(`\n[${'='.repeat(20)}] SCRAPING ${state} [${'='.repeat(20)}]\n`); console.log(`State ${i + 1}/${TARGET_STATES.length}: ${state}`); const churches = await scraper.scrapeState(state); stats.total += churches.length; console.log(`\n Saving ${churches.length} churches from ${state} to database...`); for (const church of churches) { const saved = await saveChurch(church, seenIds); if (saved) stats.saved++; else stats.errors++; } console.log(`\n Resting 5 minutes before next state...\n`); await new Promise(resolve => setTimeout(resolve, 300000)); } } finally { await scraper.close(); await prisma.$disconnect(); } console.log('\n' + '='.repeat(70)); console.log('SUMMARY'); console.log('='.repeat(70)); console.log(`Total scraped: ${stats.total}`); console.log(`Saved: ${stats.saved}`); console.log(`Errors: ${stats.errors}`); console.log('='.repeat(70) + '\n'); } main().catch(console.error);