#!/usr/bin/env tsx /** * Import Catholic churches and mass schedules from messes.info (France) * * messes.info is the official French bishops' conference (CEF) mass schedule * database. It exposes a GWT-RPC API returning structured JSON with parish * data including name, address, coordinates, diocese, and celebration times. * * The API requires no authentication. We enumerate all French dioceses using * the "community:{diocese_code}" query prefix, which returns all parishes * within each diocese. * * Import strategy: * 1. Query each of ~93 diocese codes via GWT-RPC API * 2. Parse response: extract localities (churches) + celebrations (mass times) * 3. Deduce recurring weekly schedule from date-specific celebration entries * 4. Match against existing French churches via church-matcher * 5. Upsert churches and mass schedules * * Usage: * npx tsx scripts/import-messesinfo.ts --all --dry-run * npx tsx scripts/import-messesinfo.ts --all * npx tsx scripts/import-messesinfo.ts --diocese pa --dry-run # Paris only * npx tsx scripts/import-messesinfo.ts --all --resume-from 20 */ import dotenv from 'dotenv'; import path from 'path'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { findDuplicateChurch } from '../src/lib/church-matcher'; import type { ExistingChurch } from '../src/lib/church-matcher'; // ─── Constants ─────────────────────────────────────────────────────────────── const API_URL = 'https://messes.info/gwtRequest'; const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)'; const REQUEST_DELAY_MS = 3000; const RETRY_DELAY_MS = 10000; const MAX_RETRIES = 3; const RESULTS_PER_QUERY = 2000; // Diocese codes discovered from the API. Each code maps to a diocese in France. // The query "community:{code}" returns all parishes within that diocese. // Codes are 2-letter abbreviations (e.g., pa=Paris, ly=Lyon, st=Strasbourg). const DIOCESE_CODES = [ 'a', 'aa', 'ac', 'ad', 'ag', 'al', 'am', 'an', 'ar', 'au', 'av', 'ay', 'ba', 'bb', 'be', 'bl', 'bm', 'bo', 'br', 'bs', 'bv', 'by', 'ca', 'cb', 'cc', 'cd', 'ch', 'cl', 'cm', 'cn', 'cr', 'cs', 'da', 'di', 'dj', 'dn', 'et', 'ex', 'ey', 'ft', 'ga', 'gr', 'lg', 'lh', 'li', 'lm', 'lp', 'lr', 'ls', 'lu', 'lv', 'ly', 'ma', 'md', 'me', 'ml', 'mp', 'mt', 'mx', 'na', 'nc', 'ni', 'nt', 'nv', 'ny', 'or', 'pa', 'pm', 'po', 'ps', 'pt', 'qu', 're', 'rn', 'ro', 'rv', 'sl', 'ss', 'st', 'sz', 'tl', 'to', 'ts', 'tu', 'va', 'vd', 've', 'vl', 'vv', ]; // ─── Types ─────────────────────────────────────────────────────────────────── interface LocalityData { idfixe: string; name: string; address: string | null; city: string | null; zipcode: string | null; latitude: number; longitude: number; sector: string | null; communityId: string | null; localityId: string; // e.g. "75/paris-04/saint-louis-en-l-ile" } interface CelebrationData { date: string; time: string; // normalized to "HH:MM" recurrenceCategory: number; } interface ParsedSchedule { dayOfWeek: number; time: string; } interface ImportStats { diocesesProcessed: number; localitiesFound: number; churchesMatched: number; churchesCreated: number; churchesSkipped: number; schedulesCreated: number; errors: number; } interface CLIArgs { all: boolean; dryRun: boolean; resumeFrom?: number; diocese?: string; jobId?: string; } // ─── HTTP Client ───────────────────────────────────────────────────────────── let requestCount = 0; function delay(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } /** * Convert MessesInfo time format "18h00" or "9h30" to "HH:MM" format. */ function normalizeTime(messesTime: string): string { const match = messesTime.match(/^(\d{1,2})h(\d{2})$/); if (match) { return `${match[1].padStart(2, '0')}:${match[2]}`; } // Already in HH:MM format if (/^\d{1,2}:\d{2}$/.test(messesTime)) { const parts = messesTime.split(':'); return `${parts[0].padStart(2, '0')}:${parts[1]}`; } return messesTime; } async function fetchDioceseData(dioceseCode: string): Promise { if (requestCount > 0) { await delay(REQUEST_DELAY_MS); } requestCount++; const body = JSON.stringify({ F: 'cef.kephas.shared.request.AppRequestFactory', I: [{ O: 'Bzv0wi60qgwcW5aKiRKrtgNaLKo=', P: [`community:${dioceseCode}`, 0, RESULTS_PER_QUERY, 1, null, '48.86:2.35', ''], R: ['listCelebrationTime.locality'], }], }); for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { try { const response = await fetch(API_URL, { method: 'POST', headers: { 'User-Agent': USER_AGENT, 'Content-Type': 'application/json', 'Accept': 'application/json', }, body, }); if (response.status === 503 || response.status === 429) { if (attempt < MAX_RETRIES) { console.log(` HTTP ${response.status} — retrying in ${RETRY_DELAY_MS / 1000}s (attempt ${attempt}/${MAX_RETRIES})`); await delay(RETRY_DELAY_MS); continue; } console.error(` HTTP ${response.status} after ${MAX_RETRIES} attempts`); return null; } if (!response.ok) { console.error(` HTTP ${response.status} from API`); return null; } return await response.json(); } catch (error) { if (attempt < MAX_RETRIES) { console.log(` Network error — retrying in ${RETRY_DELAY_MS / 1000}s (attempt ${attempt}/${MAX_RETRIES})`); await delay(RETRY_DELAY_MS); continue; } console.error(` API error after ${MAX_RETRIES} attempts: ${error instanceof Error ? error.message : error}`); return null; } } return null; } // ─── Response Parser ───────────────────────────────────────────────────────── /** * Parse the GWT-RPC response into a map of locality idfixe → data. * * The response O array contains interleaved objects: * - Locality objects: have P.idfixe, P.name, P.address, P.latitude, etc. * - Celebration objects: have P.date, P.time, P.localityId, P.recurrenceCategory * - Metadata object: has P.size, P.sizeLocalities * * Localities and celebrations are linked by P.localityId matching P.id on localities. */ function parseApiResponse(data: any): Map { const result = new Map(); if (!data?.O || !Array.isArray(data.O)) return result; // First pass: collect all localities by their id const localitiesById = new Map(); for (const obj of data.O) { const p = obj.P; if (!p || typeof p !== 'object') continue; if (p.idfixe && p.name) { const locality: LocalityData = { idfixe: p.idfixe, name: p.name, address: p.address || null, city: p.city || null, zipcode: p.zipcode || null, latitude: p.latitude || 0, longitude: p.longitude || 0, sector: p.sector || null, communityId: p.communityId || null, localityId: p.id || '', }; localitiesById.set(p.id, locality); // Initialize in result map (dedup by idfixe) if (!result.has(p.idfixe)) { result.set(p.idfixe, { locality, celebrations: [] }); } } } // Second pass: collect celebrations and link to localities for (const obj of data.O) { const p = obj.P; if (!p || typeof p !== 'object') continue; if (p.date && p.time && p.localityId) { const locality = localitiesById.get(p.localityId); if (locality && result.has(locality.idfixe)) { result.get(locality.idfixe)!.celebrations.push({ date: p.date, time: normalizeTime(p.time), recurrenceCategory: p.recurrenceCategory ?? 0, }); } } } return result; } // ─── Schedule Deduction ────────────────────────────────────────────────────── function deduceSchedules(celebrations: CelebrationData[]): ParsedSchedule[] { const seen = new Set(); const schedules: ParsedSchedule[] = []; // First pass: weekly recurring entries only (recurrenceCategory=1) for (const celeb of celebrations) { if (celeb.recurrenceCategory !== 1) continue; const date = new Date(celeb.date + 'T12:00:00Z'); const dayOfWeek = date.getUTCDay(); const key = `${dayOfWeek}:${celeb.time}`; if (!seen.has(key)) { seen.add(key); schedules.push({ dayOfWeek, time: celeb.time }); } } // Fallback: if no weekly entries, deduce from all if (schedules.length === 0) { for (const celeb of celebrations) { const date = new Date(celeb.date + 'T12:00:00Z'); const dayOfWeek = date.getUTCDay(); const key = `${dayOfWeek}:${celeb.time}`; if (!seen.has(key)) { seen.add(key); schedules.push({ dayOfWeek, time: celeb.time }); } } } return schedules; } // ─── Database Operations ───────────────────────────────────────────────────── async function loadExistingFrenchChurches(): Promise { console.log('Loading existing French churches for deduplication...'); const churches = await prisma.church.findMany({ where: { country: 'FR' }, select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, source: true, website: true, phone: true, address: true, }, }); console.log(`Loaded ${churches.length} existing French churches`); return churches; } // ─── Import Logic ──────────────────────────────────────────────────────────── async function processDiocese( dioceseCode: string, existingChurches: ExistingChurch[], dryRun: boolean, stats: ImportStats, ): Promise { const data = await fetchDioceseData(dioceseCode); if (!data) { stats.errors++; return; } // Check for API error if (data.S && data.S[0] === false) { console.log(` API error for diocese ${dioceseCode}`); stats.errors++; return; } const localities = parseApiResponse(data); console.log(` Found ${localities.size} unique localities`); stats.localitiesFound += localities.size; stats.diocesesProcessed++; for (const [idfixe, { locality, celebrations }] of localities) { if (locality.latitude === 0 && locality.longitude === 0) { stats.churchesSkipped++; continue; } const schedules = deduceSchedules(celebrations); const candidate = { name: locality.name, lat: locality.latitude, lng: locality.longitude, messesInfoId: idfixe, }; const duplicate = findDuplicateChurch(candidate, existingChurches); if (dryRun) { if (duplicate) { stats.churchesMatched++; } else { stats.churchesCreated++; } stats.schedulesCreated += schedules.length; continue; } if (duplicate) { stats.churchesMatched++; const updateData: Record = { messesInfoId: idfixe }; if (!duplicate.address && locality.address) updateData.address = locality.address; if (duplicate.latitude === 0 && duplicate.longitude === 0 && locality.latitude !== 0) { updateData.latitude = locality.latitude; updateData.longitude = locality.longitude; } try { await prisma.church.update({ where: { id: duplicate.id }, data: updateData, }); } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; continue; } throw error; } if (schedules.length > 0) { try { await prisma.$transaction(async (tx) => { await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } }); await tx.massSchedule.createMany({ data: schedules.map((s) => ({ churchId: duplicate.id, dayOfWeek: s.dayOfWeek, time: s.time, language: 'French', })), }); await tx.church.update({ where: { id: duplicate.id }, data: { lastScrapedAt: new Date() }, }); }); stats.schedulesCreated += schedules.length; } catch (error) { stats.errors++; console.error(` Error saving schedules for ${idfixe}: ${error instanceof Error ? error.message : error}`); } } } else { // Determine country code from zipcode let country = 'FR'; if (locality.zipcode && /^97[1-6]/.test(locality.zipcode)) { country = 'FR'; // DOM-TOM are still FR } try { const newChurch = await prisma.church.create({ data: { name: locality.name, latitude: locality.latitude, longitude: locality.longitude, address: locality.address, zip: locality.zipcode, city: locality.city, country, diocese: locality.sector || undefined, messesInfoId: idfixe, source: 'messes-info', websiteLanguage: 'fr', }, }); stats.churchesCreated++; existingChurches.push({ id: newChurch.id, name: locality.name, latitude: locality.latitude, longitude: locality.longitude, osmId: null, baiduId: null, masstimesId: null, orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: idfixe, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, source: 'messes-info', website: null, phone: null, address: locality.address, }); if (schedules.length > 0) { await prisma.massSchedule.createMany({ data: schedules.map((s) => ({ churchId: newChurch.id, dayOfWeek: s.dayOfWeek, time: s.time, language: 'French', })), }); await prisma.church.update({ where: { id: newChurch.id }, data: { lastScrapedAt: new Date() }, }); stats.schedulesCreated += schedules.length; } } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; continue; } stats.errors++; console.error(` Error creating ${idfixe}: ${error instanceof Error ? error.message : error}`); } } } } // ─── CLI ───────────────────────────────────────────────────────────────────── function parseArgs(): CLIArgs { const args = process.argv.slice(2); const result: CLIArgs = { all: false, dryRun: false }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case '--all': result.all = true; break; case '--dry-run': result.dryRun = true; break; case '--resume-from': result.resumeFrom = parseInt(args[++i]); break; case '--diocese': result.diocese = args[++i]; break; case '--job-id': result.jobId = args[++i]; break; case '--help': case '-h': console.log(` Usage: npx tsx scripts/import-messesinfo.ts [options] Options: --all Import all dioceses --diocese Import a single diocese (e.g., pa for Paris) --dry-run No database writes, just report what would happen --resume-from Skip first N dioceses --job-id Background job tracking ID --help, -h Show this help message Examples: npx tsx scripts/import-messesinfo.ts --diocese pa --dry-run npx tsx scripts/import-messesinfo.ts --all --dry-run npx tsx scripts/import-messesinfo.ts --all `); process.exit(0); } } if (!result.all && !result.diocese) { console.error('Error: specify --all or --diocese '); process.exit(1); } return result; } function formatDuration(ms: number): string { const seconds = Math.floor(ms / 1000); const minutes = Math.floor(seconds / 60); const hours = Math.floor(minutes / 60); if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`; if (minutes > 0) return `${minutes}m ${seconds % 60}s`; return `${seconds}s`; } // ─── Main ──────────────────────────────────────────────────────────────────── async function main() { const args = parseArgs(); const startTime = Date.now(); console.log('\n' + '='.repeat(70)); console.log('MESSES.INFO (FRANCE) IMPORTER'); console.log('='.repeat(70)); console.log(`Mode: ${args.diocese ? `Diocese ${args.diocese}` : 'All dioceses'}`); console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`); if (args.resumeFrom) console.log(`Resume from: diocese index ${args.resumeFrom}`); console.log(`Time: ${new Date().toISOString()}`); console.log('='.repeat(70) + '\n'); if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: 'running', startedAt: new Date() }, }); } catch { /* Job might not exist */ } } const stats: ImportStats = { diocesesProcessed: 0, localitiesFound: 0, churchesMatched: 0, churchesCreated: 0, churchesSkipped: 0, schedulesCreated: 0, errors: 0, }; const existingChurches = await loadExistingFrenchChurches(); let dioceses = args.diocese ? [args.diocese] : [...DIOCESE_CODES]; if (args.diocese && !DIOCESE_CODES.includes(args.diocese)) { console.log(`Warning: diocese "${args.diocese}" not in known list, trying anyway...`); } if (args.resumeFrom && !args.diocese) { dioceses = dioceses.slice(args.resumeFrom); console.log(`Resuming from diocese index ${args.resumeFrom} (${dioceses[0]})\n`); } console.log(`Processing ${dioceses.length} dioceses\n`); for (let i = 0; i < dioceses.length; i++) { const code = dioceses[i]; const elapsed = formatDuration(Date.now() - startTime); console.log(`[${i + 1}/${dioceses.length}] Diocese "${code}" [${elapsed} elapsed]`); try { await processDiocese(code, existingChurches, args.dryRun, stats); } catch (error) { stats.errors++; console.error(` ERROR processing diocese ${code}: ${error instanceof Error ? error.message : error}`); } } const totalTime = Date.now() - startTime; console.log('\n' + '='.repeat(70)); console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`); console.log('='.repeat(70)); console.log(`Dioceses processed: ${stats.diocesesProcessed}`); console.log(`Localities found: ${stats.localitiesFound}`); console.log(` Matched (existing): ${stats.churchesMatched}`); console.log(` Created (new): ${stats.churchesCreated}`); console.log(` Skipped: ${stats.churchesSkipped}`); console.log(`Schedules created: ${stats.schedulesCreated}`); console.log(`Errors: ${stats.errors}`); console.log(`Total time: ${formatDuration(totalTime)}`); console.log(`HTTP requests: ${requestCount}`); console.log('='.repeat(70) + '\n'); if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: stats.errors > 0 ? 'completed_with_errors' : 'completed', completedAt: new Date(), processed: stats.localitiesFound, succeeded: stats.churchesCreated + stats.churchesMatched, failed: stats.errors, itemsFound: stats.schedulesCreated, }, }); } catch { /* Ignore */ } } } main() .catch((error) => { console.error('Fatal error:', error); process.exit(1); }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });