#!/usr/bin/env tsx /** * Import Catholic churches and mass schedules from OrariMesse.it * * OrariMesse.it is the official CEI (Italian Bishops' Conference) platform for * mass times in Italy. It provides a public REST API organized by diocese. * * Import strategy: * Pass 1: For each diocese, fetch all churches → match against existing DB * records (by ICSC code or proximity+name) → upsert * Pass 2: For churches with active schedules, fetch detail endpoint → * convert 8-day rolling schedule to recurring → replace mass schedules * * Usage: * npx tsx scripts/import-orarimesse.ts --all * npx tsx scripts/import-orarimesse.ts --diocese roma * npx tsx scripts/import-orarimesse.ts --all --dry-run * npx tsx scripts/import-orarimesse.ts --all --schedules-only * npx tsx scripts/import-orarimesse.ts --all --resume-from napoli * npx tsx scripts/import-orarimesse.ts --all --job-id {uuid} */ import dotenv from 'dotenv'; import path from 'path'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { findDuplicateChurch } from '../src/lib/church-matcher'; import type { ExistingChurch } from '../src/lib/church-matcher'; // ─── Constants ─────────────────────────────────────────────────────────────── const API_BASE = 'https://orarimesse.it/api'; const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)'; const DIOCESE_DELAY_MS = 2000; const DETAIL_DELAY_MS = 1000; // ─── Italian Day Map ───────────────────────────────────────────────────────── const ITALIAN_DAY_MAP: Record = { 'domenica': 0, 'lunedì': 1, 'lunedi': 1, 'martedì': 2, 'martedi': 2, 'mercoledì': 3, 'mercoledi': 3, 'giovedì': 4, 'giovedi': 4, 'venerdì': 5, 'venerdi': 5, 'sabato': 6, }; // ─── Types ─────────────────────────────────────────────────────────────────── interface OrariMesseDiocese { codice_cei: string; title: string; slug: string; url: string; countChiese: number; } interface OrariMesseChurch { idchurch: number; address: string; name: string; conosciutaCome: string; isopen: boolean; nextmass: string; lat: string; lon: string; sito: string; emailLdc: string; icsc: string; comune: string; tipologia: string; accessibile: boolean; } interface OrariMesseDioceseResponse { codice_cei: string; title: string; slug: string; countChiese: number; listaChiese: OrariMesseChurch[]; } interface OrariMesseMass { idmass: number; time: string; noteOrarioMessa: string; } interface OrariMesseDay { day: string; mass: OrariMesseMass[]; } interface OrariMesseDetail { idchurch: number; name: string; address: string; lat: string; lon: string; icsc: string; comune: string; diocesi: string; parroco: string; telefono: string; email: string; sito: string; days: OrariMesseDay[]; } interface ImportStats { diocesesProcessed: number; churchesFound: number; churchesMatched: number; churchesCreated: number; churchesSkipped: number; schedulesProcessed: number; massSchedulesCreated: number; errors: number; } interface CLIArgs { all: boolean; diocese?: string; dryRun: boolean; schedulesOnly: boolean; resumeFrom?: string; jobId?: string; } // ─── API Client ────────────────────────────────────────────────────────────── let requestCount = 0; function delay(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } async function fetchApi(endpoint: string, params: Record = {}, delayMs: number): Promise { if (requestCount > 0) { await delay(delayMs); } requestCount++; const url = new URL(`${API_BASE}/${endpoint}`); for (const [key, value] of Object.entries(params)) { url.searchParams.set(key, value); } try { const response = await fetch(url.toString(), { headers: { 'User-Agent': USER_AGENT, 'Accept': 'application/json', }, }); if (!response.ok) { console.error(` HTTP ${response.status} for ${url}`); return null; } const json = await response.json() as { status: boolean; code: string; data: T }; if (json.status === true && json.code === 'OK') { return json.data; } console.error(` API error for ${url}: ${JSON.stringify(json).substring(0, 200)}`); return null; } catch (error) { console.error(` Fetch error for ${url}: ${error instanceof Error ? error.message : error}`); return null; } } async function fetchDioceses(): Promise { const data = await fetchApi('getDiocesi', {}, DIOCESE_DELAY_MS); return data || []; } async function fetchChurchesInDiocese(slug: string): Promise { const data = await fetchApi( 'getListaChiese', { diocesi: slug, type: 'compact' }, DIOCESE_DELAY_MS ); // Response is an array with a single diocese object if (data && data.length > 0) { return data[0]; } return null; } async function fetchChurchDetail(idchurch: number): Promise { return fetchApi( 'getDettaglioMessa', { idchurch: String(idchurch) }, DETAIL_DELAY_MS ); } // ─── Day/Time Conversion ───────────────────────────────────────────────────── function parseItalianDay(dayString: string): number | null { // "Giovedì 26 Febbraio" → extract first word → lowercase → lookup const firstWord = dayString.split(' ')[0].toLowerCase(); return ITALIAN_DAY_MAP[firstWord] ?? null; } function convertTime(time: string): string { // "07.00" → "07:00" return time.replace('.', ':'); } interface RecurringMass { dayOfWeek: number; time: string; notes: string | null; } function convertScheduleToRecurring(days: OrariMesseDay[]): RecurringMass[] { // The API returns an 8-day rolling window. Same weekday can appear twice // (e.g. Thursday this week and Thursday next week). We deduplicate by // dayOfWeek+time to get the recurring weekly schedule. const seen = new Set(); const result: RecurringMass[] = []; for (const day of days) { const dayOfWeek = parseItalianDay(day.day); if (dayOfWeek === null) continue; for (const mass of day.mass) { const time = convertTime(mass.time); const key = `${dayOfWeek}:${time}`; if (seen.has(key)) continue; seen.add(key); result.push({ dayOfWeek, time, notes: mass.noteOrarioMessa || null, }); } } return result; } // ─── Database Operations ───────────────────────────────────────────────────── async function loadExistingItalianChurches(): Promise { console.log('Loading existing Italian churches for deduplication...'); const churches = await prisma.church.findMany({ where: { country: 'IT' }, select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, source: true, website: true, phone: true, address: true, }, }); console.log(`Loaded ${churches.length} existing Italian churches`); return churches; } // ─── Pass 1: Church Upsert ────────────────────────────────────────────────── async function processChurchesForDiocese( dioceseSlug: string, churches: OrariMesseChurch[], existingChurches: ExistingChurch[], idchurchToDbId: Map, dryRun: boolean, stats: ImportStats, ): Promise { for (const church of churches) { stats.churchesFound++; // Parse coordinates const lat = parseFloat(church.lat); const lon = parseFloat(church.lon); if (isNaN(lat) || isNaN(lon) || lat === 0 || lon === 0) { stats.churchesSkipped++; continue; } // Build candidate for dedup const candidate = { name: church.name, lat, lng: lon, orarimesseId: church.icsc || undefined, }; const duplicate = findDuplicateChurch(candidate, existingChurches); if (dryRun) { if (duplicate) { stats.churchesMatched++; } else { stats.churchesCreated++; } // Track idchurch for Pass 2 even in dry run if (duplicate) { idchurchToDbId.set(church.idchurch, duplicate.id); } continue; } if (duplicate) { // Update existing church: set orarimesseId, fill missing fields stats.churchesMatched++; const updateData: Record = { orarimesseId: church.icsc || undefined, orarimesseLastSyncedAt: new Date(), }; if (!duplicate.address && church.address) updateData.address = church.address; if (!duplicate.website && church.sito) { updateData.website = church.sito; updateData.hasWebsite = true; } // Check diocese on the actual DB record (not in ExistingChurch) const dbRecord = await prisma.church.findUnique({ where: { id: duplicate.id }, select: { diocese: true, city: true, email: true }, }); if (dbRecord && !dbRecord.diocese && dioceseSlug) { updateData.diocese = dioceseSlug; } if (dbRecord && !dbRecord.city && church.comune) { updateData.city = church.comune; } if (dbRecord && !dbRecord.email && church.emailLdc) { updateData.email = church.emailLdc; } try { await prisma.church.update({ where: { id: duplicate.id }, data: updateData, }); } catch (error) { // Unique constraint violation on orarimesseId — another church already has this ICSC if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; continue; } throw error; } idchurchToDbId.set(church.idchurch, duplicate.id); } else { // Create new church try { const newChurch = await prisma.church.create({ data: { name: church.name, latitude: lat, longitude: lon, address: church.address || null, city: church.comune || null, country: 'IT', diocese: dioceseSlug, website: church.sito || null, email: church.emailLdc || null, hasWebsite: !!church.sito, orarimesseId: church.icsc || null, orarimesseLastSyncedAt: new Date(), source: 'orarimesse', wheelchairAccess: church.accessibile || false, }, }); stats.churchesCreated++; // Add to in-memory array for within-run dedup existingChurches.push({ id: newChurch.id, name: church.name, latitude: lat, longitude: lon, osmId: null, baiduId: null, masstimesId: null, orarimesseId: church.icsc || null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, source: 'orarimesse', website: church.sito || null, phone: null, address: church.address || null, }); idchurchToDbId.set(church.idchurch, newChurch.id); } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; continue; } throw error; } } } } // ─── Pass 2: Mass Schedules ───────────────────────────────────────────────── async function processSchedulesForDiocese( churches: OrariMesseChurch[], idchurchToDbId: Map, dryRun: boolean, stats: ImportStats, ): Promise { // Filter to churches with active schedules const churchesWithMass = churches.filter((c) => c.nextmass); if (churchesWithMass.length === 0) return; console.log(` Pass 2: Fetching schedules for ${churchesWithMass.length} churches with active masses...`); for (const church of churchesWithMass) { const dbId = idchurchToDbId.get(church.idchurch); if (!dbId) continue; // Church not in our DB (skipped in Pass 1) const detail = await fetchChurchDetail(church.idchurch); if (!detail || !detail.days || detail.days.length === 0) { continue; } stats.schedulesProcessed++; const recurring = convertScheduleToRecurring(detail.days); if (recurring.length === 0) continue; if (dryRun) { stats.massSchedulesCreated += recurring.length; continue; } try { await prisma.$transaction(async (tx) => { // Delete existing mass schedules for this church await tx.massSchedule.deleteMany({ where: { churchId: dbId } }); // Create new recurring schedules await tx.massSchedule.createMany({ data: recurring.map((m) => ({ churchId: dbId, dayOfWeek: m.dayOfWeek, time: m.time, language: 'Italian', notes: m.notes, })), }); // Mark church as scraped await tx.church.update({ where: { id: dbId }, data: { lastScrapedAt: new Date() }, }); }); stats.massSchedulesCreated += recurring.length; } catch (error) { stats.errors++; console.error(` Error saving schedules for idchurch=${church.idchurch}: ${error instanceof Error ? error.message : error}`); } } } // ─── CLI ───────────────────────────────────────────────────────────────────── function parseArgs(): CLIArgs { const args = process.argv.slice(2); const result: CLIArgs = { all: false, dryRun: false, schedulesOnly: false, }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case '--all': result.all = true; break; case '--diocese': result.diocese = args[++i]; break; case '--dry-run': result.dryRun = true; break; case '--schedules-only': result.schedulesOnly = true; break; case '--resume-from': result.resumeFrom = args[++i]; break; case '--job-id': result.jobId = args[++i]; break; case '--help': case '-h': console.log(` Usage: npx tsx scripts/import-orarimesse.ts [options] Options: --all Import from all 77 dioceses --diocese Import from a single diocese (e.g. "roma") --dry-run No database writes, just report what would happen --schedules-only Skip Pass 1 (church upsert), only fetch schedules --resume-from Skip dioceses until reaching this slug --job-id Background job tracking ID --help, -h Show this help message Examples: npx tsx scripts/import-orarimesse.ts --diocese roma --dry-run npx tsx scripts/import-orarimesse.ts --all npx tsx scripts/import-orarimesse.ts --all --schedules-only npx tsx scripts/import-orarimesse.ts --all --resume-from napoli `); process.exit(0); } } if (!result.all && !result.diocese) { console.error('Error: specify --all or --diocese '); process.exit(1); } return result; } // ─── Helpers ───────────────────────────────────────────────────────────────── function formatDuration(ms: number): string { const seconds = Math.floor(ms / 1000); const minutes = Math.floor(seconds / 60); const hours = Math.floor(minutes / 60); if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`; if (minutes > 0) return `${minutes}m ${seconds % 60}s`; return `${seconds}s`; } // ─── Main ──────────────────────────────────────────────────────────────────── async function main() { const args = parseArgs(); const startTime = Date.now(); console.log('\n' + '='.repeat(70)); console.log('ORARIMESSE.IT IMPORTER'); console.log('='.repeat(70)); console.log(`Mode: ${args.all ? 'All dioceses' : `Single diocese: ${args.diocese}`}`); console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`); console.log(`Schedules only: ${args.schedulesOnly ? 'YES' : 'NO'}`); if (args.resumeFrom) console.log(`Resume from: ${args.resumeFrom}`); console.log(`Time: ${new Date().toISOString()}`); console.log('='.repeat(70) + '\n'); // Update background job status if provided if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: 'running', startedAt: new Date() }, }); } catch { // Job might not exist yet, that's fine } } // Load existing Italian churches for dedup const existingChurches = await loadExistingItalianChurches(); // Fetch diocese list console.log('Fetching diocese list from OrariMesse.it...'); const allDioceses = await fetchDioceses(); console.log(`Found ${allDioceses.length} dioceses\n`); // Filter to requested dioceses let diocesesToProcess: OrariMesseDiocese[]; if (args.diocese) { const found = allDioceses.find((d) => d.slug === args.diocese); if (!found) { console.error(`Diocese "${args.diocese}" not found. Available: ${allDioceses.map((d) => d.slug).join(', ')}`); process.exit(1); } diocesesToProcess = [found]; } else { diocesesToProcess = allDioceses; } // Handle --resume-from if (args.resumeFrom) { const idx = diocesesToProcess.findIndex((d) => d.slug === args.resumeFrom); if (idx === -1) { console.error(`Resume diocese "${args.resumeFrom}" not found.`); process.exit(1); } console.log(`Resuming from diocese "${args.resumeFrom}" (skipping ${idx} dioceses)\n`); diocesesToProcess = diocesesToProcess.slice(idx); } const stats: ImportStats = { diocesesProcessed: 0, churchesFound: 0, churchesMatched: 0, churchesCreated: 0, churchesSkipped: 0, schedulesProcessed: 0, massSchedulesCreated: 0, errors: 0, }; // Map OrariMesse idchurch → our DB id (for Pass 2 schedule lookups) const idchurchToDbId = new Map(); // If schedules-only mode, pre-populate the map from existing orarimesseId records if (args.schedulesOnly) { console.log('Schedules-only mode: loading existing orarimesseId mappings...'); const mapped = await prisma.church.findMany({ where: { orarimesseId: { not: null } }, select: { id: true, orarimesseId: true }, }); // We'll build the idchurch map during diocese processing since we need the API's idchurch values console.log(`Found ${mapped.length} churches with orarimesseId in DB\n`); } // Process each diocese for (let i = 0; i < diocesesToProcess.length; i++) { const diocese = diocesesToProcess[i]; const elapsed = formatDuration(Date.now() - startTime); console.log(`[${i + 1}/${diocesesToProcess.length}] Diocese: ${diocese.title} (${diocese.slug}) [${elapsed} elapsed]`); try { // Fetch churches in this diocese const dioceseData = await fetchChurchesInDiocese(diocese.slug); if (!dioceseData || !dioceseData.listaChiese || dioceseData.listaChiese.length === 0) { console.log(` No churches found, skipping`); stats.diocesesProcessed++; continue; } const churches = dioceseData.listaChiese; console.log(` Found ${churches.length} churches (${churches.filter((c) => c.nextmass).length} with active masses)`); // Pass 1: Upsert churches if (!args.schedulesOnly) { const prevMatched = stats.churchesMatched; const prevCreated = stats.churchesCreated; const prevSkipped = stats.churchesSkipped; await processChurchesForDiocese( diocese.slug, churches, existingChurches, idchurchToDbId, args.dryRun, stats ); const matched = stats.churchesMatched - prevMatched; const created = stats.churchesCreated - prevCreated; const skipped = stats.churchesSkipped - prevSkipped; console.log(` Pass 1: ${matched} matched, ${created} created, ${skipped} skipped`); } else { // In schedules-only mode, still need to build idchurch → dbId map for (const church of churches) { if (church.icsc) { const existing = existingChurches.find((e) => e.orarimesseId === church.icsc); if (existing) { idchurchToDbId.set(church.idchurch, existing.id); } } } } // Pass 2: Import schedules const prevSchedules = stats.massSchedulesCreated; await processSchedulesForDiocese(churches, idchurchToDbId, args.dryRun, stats); const newSchedules = stats.massSchedulesCreated - prevSchedules; if (newSchedules > 0) { console.log(` Pass 2: ${stats.schedulesProcessed} churches processed, ${newSchedules} mass times created`); } stats.diocesesProcessed++; } catch (error) { stats.errors++; console.error(` ERROR processing diocese ${diocese.slug}: ${error instanceof Error ? error.message : error}`); } } // Print summary const totalTime = Date.now() - startTime; console.log('\n' + '='.repeat(70)); console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`); console.log('='.repeat(70)); console.log(`Dioceses processed: ${stats.diocesesProcessed}`); console.log(`Churches found: ${stats.churchesFound}`); console.log(` Matched (existing): ${stats.churchesMatched}`); console.log(` Created (new): ${stats.churchesCreated}`); console.log(` Skipped: ${stats.churchesSkipped}`); console.log(`Schedules processed: ${stats.schedulesProcessed}`); console.log(`Mass schedules created: ${stats.massSchedulesCreated}`); console.log(`Errors: ${stats.errors}`); console.log(`Total time: ${formatDuration(totalTime)}`); console.log(`API requests: ${requestCount}`); console.log('='.repeat(70) + '\n'); // Update background job if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: stats.errors > 0 ? 'completed_with_errors' : 'completed', completedAt: new Date(), result: JSON.stringify(stats), }, }); } catch { // Ignore } } } main() .catch((error) => { console.error('Fatal error:', error); process.exit(1); }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });