#!/usr/bin/env tsx /** * Import Catholic churches and mass schedules from miserend.hu (Hungary) * * miserend.hu is the Hungarian Catholic mass schedule database, maintained by * the community with ~5,055 churches (mostly Hungary, some Romania/Slovakia). * It publishes a daily-updated SQLite database at: * https://miserend.hu/fajlok/sqlite/miserend_v4.sqlite3 * * The SQLite contains: * - templomok: churches (tid, nev, lat, lng, varos, cim, orszag, megye) * - misek: date-specific mass entries (tid, ido, datumtol, datumig, nyelv) * - kepek: church photos * * Import strategy: * 1. Download the SQLite database * 2. Extract all churches with coordinates * 3. Deduce weekly recurring schedules from date-specific entries * 4. Match against existing churches via church-matcher * 5. Upsert churches and mass schedules * * Usage: * npx tsx scripts/import-miserend.ts --all --dry-run * npx tsx scripts/import-miserend.ts --all * npx tsx scripts/import-miserend.ts --id 37 --dry-run # Single church * npx tsx scripts/import-miserend.ts --all --resume-from 500 */ import dotenv from 'dotenv'; import path from 'path'; import fs from 'fs'; import { execFileSync } from 'child_process'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { findDuplicateChurch } from '../src/lib/church-matcher'; import type { ExistingChurch } from '../src/lib/church-matcher'; // ─── Constants ─────────────────────────────────────────────────────────────── const SQLITE_URL = 'https://miserend.hu/fajlok/sqlite/miserend_v4.sqlite3'; const SQLITE_PATH = '/tmp/miserend_v4.sqlite3'; // Country mapping from Hungarian names to ISO codes const COUNTRY_MAP: Record = { 'Magyarország': 'HU', 'România': 'RO', 'Slovensko': 'SK', 'Szlovákia': 'SK', 'Szerbia-Montenegro': 'RS', 'Србија': 'RS', 'Ukrajna': 'UA', 'Україна': 'UA', 'Österreich': 'AT', 'Schweiz/Suisse/Svizzera/Svizra': 'CH', 'België / Belgique / Belgien': 'BE', 'Éire / Ireland': 'IE', 'Россия': 'RU', }; // ─── Types ─────────────────────────────────────────────────────────────────── interface MiserendChurch { tid: number; nev: string; ismertnev: string | null; orszag: string | null; megye: string | null; varos: string | null; cim: string | null; lat: number; lng: number; } interface MiserendMass { mid: number; tid: number; datumtol: number; // MMDD format datumig: number; ido: string; // HH:MM:SS nyelv: string | null; } interface ParsedSchedule { dayOfWeek: number; time: string; } interface ImportStats { churchesFetched: number; churchesMatched: number; churchesCreated: number; churchesSkipped: number; schedulesCreated: number; errors: number; } interface CLIArgs { all: boolean; dryRun: boolean; resumeFrom?: number; churchId?: string; jobId?: string; } // ─── SQLite Helpers ────────────────────────────────────────────────────────── function sqliteQuery(query: string): string { try { return execFileSync('sqlite3', [SQLITE_PATH, query], { encoding: 'utf-8', maxBuffer: 100 * 1024 * 1024, // 100MB }).trim(); } catch { return ''; } } function downloadSqlite(): void { console.log('Downloading miserend SQLite database...'); execFileSync('curl', ['-sL', '-o', SQLITE_PATH, SQLITE_URL], { timeout: 120000 }); const size = fs.statSync(SQLITE_PATH).size; console.log(`Downloaded ${(size / 1024 / 1024).toFixed(1)}MB`); } function loadChurches(): MiserendChurch[] { const raw = sqliteQuery( "SELECT tid, nev, ismertnev, orszag, megye, varos, cim, lat, lng FROM templomok WHERE lat IS NOT NULL AND lng IS NOT NULL AND lat != 0 AND lng != 0;" ); if (!raw) return []; return raw.split('\n').map(line => { const [tid, nev, ismertnev, orszag, megye, varos, cim, lat, lng] = line.split('|'); return { tid: parseInt(tid), nev: nev || '', ismertnev: ismertnev || null, orszag: orszag || null, megye: megye || null, varos: varos || null, cim: cim || null, lat: parseFloat(lat), lng: parseFloat(lng), }; }).filter(c => !isNaN(c.tid) && !isNaN(c.lat) && !isNaN(c.lng)); } function loadMassesForChurch(tid: number): MiserendMass[] { const raw = sqliteQuery( `SELECT mid, tid, datumtol, datumig, ido, nyelv FROM misek WHERE tid=${tid};` ); if (!raw) return []; return raw.split('\n').map(line => { const [mid, tidStr, datumtol, datumig, ido, nyelv] = line.split('|'); return { mid: parseInt(mid), tid: parseInt(tidStr), datumtol: parseInt(datumtol), datumig: parseInt(datumig), ido: ido || '', nyelv: nyelv || null, }; }).filter(m => !isNaN(m.mid) && m.ido); } // ─── Schedule Deduction ────────────────────────────────────────────────────── /** * Deduce weekly recurring schedule from date-specific mass entries. * Each entry has datumtol/datumig in MMDD format (e.g., 104 = Jan 4). * We convert each date to a day of week and collect unique day+time combos. */ function deduceSchedules(masses: MiserendMass[]): ParsedSchedule[] { const seen = new Set(); const schedules: ParsedSchedule[] = []; // Use current year for date conversion const year = new Date().getFullYear(); for (const mass of masses) { const time = mass.ido.substring(0, 5); // HH:MM from HH:MM:SS if (!time || time === '00:00') continue; // Convert MMDD to a Date to get day of week const mmdd = mass.datumtol; const month = Math.floor(mmdd / 100); const day = mmdd % 100; if (month < 1 || month > 12 || day < 1 || day > 31) continue; const date = new Date(year, month - 1, day); const dayOfWeek = date.getDay(); // 0=Sun, 1=Mon, ..., 6=Sat const key = `${dayOfWeek}:${time}`; if (!seen.has(key)) { seen.add(key); schedules.push({ dayOfWeek, time }); } } return schedules; } // ─── Database Operations ───────────────────────────────────────────────────── async function loadExistingChurches(countryCodes: string[]): Promise { console.log(`Loading existing churches for countries: ${countryCodes.join(', ')}...`); const churches = await prisma.church.findMany({ where: { country: { in: countryCodes } }, select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, source: true, website: true, phone: true, address: true, }, }); console.log(`Loaded ${churches.length} existing churches`); return churches; } // ─── Import Logic ──────────────────────────────────────────────────────────── async function processChurch( church: MiserendChurch, existingChurches: ExistingChurch[], dryRun: boolean, stats: ImportStats, ): Promise { const miserendId = String(church.tid); const country = church.orszag ? (COUNTRY_MAP[church.orszag] || 'HU') : 'HU'; const candidate = { name: church.nev, lat: church.lat, lng: church.lng, miserendId, }; const duplicate = findDuplicateChurch(candidate, existingChurches); // Deduce schedules let schedules: ParsedSchedule[] = []; if (!dryRun) { const masses = loadMassesForChurch(church.tid); schedules = deduceSchedules(masses); } if (dryRun) { if (duplicate) { stats.churchesMatched++; } else { stats.churchesCreated++; } return; } if (duplicate) { stats.churchesMatched++; const updateData: Record = { miserendId }; if (!duplicate.address && church.cim) updateData.address = church.cim; try { await prisma.church.update({ where: { id: duplicate.id }, data: updateData, }); } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; return; } throw error; } if (schedules.length > 0) { try { await prisma.$transaction(async (tx) => { await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } }); await tx.massSchedule.createMany({ data: schedules.map((s) => ({ churchId: duplicate.id, dayOfWeek: s.dayOfWeek, time: s.time, language: 'Hungarian', })), }); await tx.church.update({ where: { id: duplicate.id }, data: { lastScrapedAt: new Date() }, }); }); stats.schedulesCreated += schedules.length; } catch (error) { stats.errors++; console.error(` Error saving schedules for ${miserendId}: ${error instanceof Error ? error.message : error}`); } } } else { try { const newChurch = await prisma.church.create({ data: { name: church.nev, latitude: church.lat, longitude: church.lng, address: church.cim, city: church.varos, state: church.megye, country, miserendId, source: 'miserend', websiteLanguage: 'hu', }, }); stats.churchesCreated++; existingChurches.push({ id: newChurch.id, name: church.nev, latitude: church.lat, longitude: church.lng, osmId: null, baiduId: null, masstimesId: null, orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, bohosluzbyId: null, miserendId, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, source: 'miserend', website: null, phone: null, address: church.cim, }); if (schedules.length > 0) { await prisma.massSchedule.createMany({ data: schedules.map((s) => ({ churchId: newChurch.id, dayOfWeek: s.dayOfWeek, time: s.time, language: 'Hungarian', })), }); await prisma.church.update({ where: { id: newChurch.id }, data: { lastScrapedAt: new Date() }, }); stats.schedulesCreated += schedules.length; } } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; return; } stats.errors++; console.error(` Error creating ${miserendId}: ${error instanceof Error ? error.message : error}`); } } } // ─── CLI ───────────────────────────────────────────────────────────────────── function parseArgs(): CLIArgs { const args = process.argv.slice(2); const result: CLIArgs = { all: false, dryRun: false }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case '--all': result.all = true; break; case '--dry-run': result.dryRun = true; break; case '--resume-from': result.resumeFrom = parseInt(args[++i]); break; case '--id': result.churchId = args[++i]; break; case '--job-id': result.jobId = args[++i]; break; case '--help': case '-h': console.log(` Usage: npx tsx scripts/import-miserend.ts [options] Options: --all Import all churches --id Import a single church by miserend ID --dry-run No database writes, just report what would happen --resume-from Skip first N churches --job-id Background job tracking ID --help, -h Show this help message Examples: npx tsx scripts/import-miserend.ts --id 37 --dry-run npx tsx scripts/import-miserend.ts --all --dry-run npx tsx scripts/import-miserend.ts --all `); process.exit(0); } } if (!result.all && !result.churchId) { console.error('Error: specify --all or --id '); process.exit(1); } return result; } function formatDuration(ms: number): string { const seconds = Math.floor(ms / 1000); const minutes = Math.floor(seconds / 60); const hours = Math.floor(minutes / 60); if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`; if (minutes > 0) return `${minutes}m ${seconds % 60}s`; return `${seconds}s`; } // ─── Main ──────────────────────────────────────────────────────────────────── async function main() { const args = parseArgs(); const startTime = Date.now(); console.log('\n' + '='.repeat(70)); console.log('MISEREND.HU (HUNGARY) IMPORTER'); console.log('='.repeat(70)); console.log(`Mode: ${args.churchId ? `Church ID ${args.churchId}` : 'All churches'}`); console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`); if (args.resumeFrom) console.log(`Resume from: church index ${args.resumeFrom}`); console.log(`Time: ${new Date().toISOString()}`); console.log('='.repeat(70) + '\n'); if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: 'running', startedAt: new Date() }, }); } catch { /* Job might not exist */ } } const stats: ImportStats = { churchesFetched: 0, churchesMatched: 0, churchesCreated: 0, churchesSkipped: 0, schedulesCreated: 0, errors: 0, }; // Download SQLite database downloadSqlite(); // Load churches from SQLite let churches = loadChurches(); stats.churchesFetched = churches.length; console.log(`Found ${churches.length} churches with coordinates in SQLite\n`); if (args.churchId) { churches = churches.filter(c => String(c.tid) === args.churchId); if (churches.length === 0) { console.error(`Church ID ${args.churchId} not found in SQLite database`); return; } } // Get unique country codes from the data const countryCodes = [...new Set(churches.map(c => { return c.orszag ? (COUNTRY_MAP[c.orszag] || 'HU') : 'HU'; }))]; const existingChurches = await loadExistingChurches(countryCodes); if (args.resumeFrom) { churches = churches.slice(args.resumeFrom); console.log(`Resuming from index ${args.resumeFrom} (${churches.length} remaining)\n`); } console.log(`Processing ${churches.length} churches\n`); for (let i = 0; i < churches.length; i++) { const church = churches[i]; if (i % 200 === 0) { const elapsed = formatDuration(Date.now() - startTime); console.log(`[${i + 1}/${churches.length}] Processing ${church.nev} (${church.tid}) [${elapsed} elapsed]`); } try { await processChurch(church, existingChurches, args.dryRun, stats); } catch (error) { stats.errors++; console.error(` ERROR processing church ${church.tid}: ${error instanceof Error ? error.message : error}`); } } const totalTime = Date.now() - startTime; console.log('\n' + '='.repeat(70)); console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`); console.log('='.repeat(70)); console.log(`Churches in SQLite: ${stats.churchesFetched}`); console.log(` Matched (existing): ${stats.churchesMatched}`); console.log(` Created (new): ${stats.churchesCreated}`); console.log(` Skipped: ${stats.churchesSkipped}`); console.log(`Schedules created: ${stats.schedulesCreated}`); console.log(`Errors: ${stats.errors}`); console.log(`Total time: ${formatDuration(totalTime)}`); console.log('='.repeat(70) + '\n'); if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: stats.errors > 0 ? 'completed_with_errors' : 'completed', completedAt: new Date(), processed: stats.churchesFetched, succeeded: stats.churchesCreated + stats.churchesMatched, failed: stats.errors, itemsFound: stats.schedulesCreated, }, }); } catch { /* Ignore */ } } } main() .catch((error) => { console.error('Fatal error:', error); process.exit(1); }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });