#!/usr/bin/env tsx /** * Import Catholic churches and mass schedules from bohosluzby.cz (Czech Republic) * * bohosluzby.cz is the official Czech bishops' conference mass schedule finder. * It exposes a JSON API with two main endpoints: * - POST /index.php/apiWeb/allData — returns all churches (clustered by zoom level) * - GET /index.php/apiWeb/detailById?id={id} — returns mass schedule details * * The API requires no authentication. We fetch all churches at zoom=7 (covers * all of Czech Republic in one request with clustered results), then fetch * individual detail pages for mass schedules. * * Import strategy: * 1. Fetch all churches via allData endpoint (zoom=7, centered on Czech Republic) * 2. Flatten clustered results to get individual church records * 3. For each church, fetch detail to get mass schedules * 4. Match against existing Czech churches via church-matcher * 5. Upsert churches and mass schedules * * Usage: * npx tsx scripts/import-bohosluzby.ts --all --dry-run * npx tsx scripts/import-bohosluzby.ts --all * npx tsx scripts/import-bohosluzby.ts --id 10009 --dry-run # Single church * npx tsx scripts/import-bohosluzby.ts --all --resume-from 500 */ import dotenv from 'dotenv'; import path from 'path'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { findDuplicateChurch } from '../src/lib/church-matcher'; import type { ExistingChurch } from '../src/lib/church-matcher'; // ─── Constants ─────────────────────────────────────────────────────────────── const BASE_URL = 'https://bohosluzby.cirkev.cz'; const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)'; const REQUEST_DELAY_MS = 500; // Be polite — 0.5s between detail requests const RETRY_DELAY_MS = 5000; const MAX_RETRIES = 3; // Czech Republic center coordinates for the allData request const CZ_CENTER_LAT = 49.8; const CZ_CENTER_LNG = 15.5; const CZ_ZOOM = 7; // Returns all churches clustered into ~7 groups // ─── Types ─────────────────────────────────────────────────────────────────── interface BohosluzbyChurch { id: string; name: string; street: string | null; city: string | null; psc: string | null; // zip code latitude: number; longitude: number; type: string; // KOSTEL, KAPLE, etc. } interface BohosluzbySchedule { dayOfWeek: number; // 0=Sunday, 1=Monday, ... time: string; // HH:MM language: string; type: string; // "mše sv.", "růženec", etc. note: string | null; } interface ImportStats { churchesFetched: number; detailsFetched: number; churchesMatched: number; churchesCreated: number; churchesSkipped: number; schedulesCreated: number; errors: number; } interface CLIArgs { all: boolean; dryRun: boolean; resumeFrom?: number; churchId?: string; jobId?: string; } // ─── HTTP Client ───────────────────────────────────────────────────────────── let requestCount = 0; function delay(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } async function fetchWithRetry(url: string, options: RequestInit = {}): Promise { if (requestCount > 0) { await delay(REQUEST_DELAY_MS); } requestCount++; for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { try { const response = await fetch(url, { ...options, headers: { 'User-Agent': USER_AGENT, ...options.headers, }, }); if (response.status === 503 || response.status === 429) { if (attempt < MAX_RETRIES) { console.log(` HTTP ${response.status} — retrying in ${RETRY_DELAY_MS / 1000}s (attempt ${attempt}/${MAX_RETRIES})`); await delay(RETRY_DELAY_MS); continue; } console.error(` HTTP ${response.status} after ${MAX_RETRIES} attempts`); return null; } if (!response.ok) { console.error(` HTTP ${response.status} from ${url}`); return null; } return await response.json(); } catch (error) { if (attempt < MAX_RETRIES) { console.log(` Network error — retrying in ${RETRY_DELAY_MS / 1000}s (attempt ${attempt}/${MAX_RETRIES})`); await delay(RETRY_DELAY_MS); continue; } console.error(` API error after ${MAX_RETRIES} attempts: ${error instanceof Error ? error.message : error}`); return null; } } return null; } // ─── API Methods ───────────────────────────────────────────────────────────── /** * Fetch all churches from the allData endpoint. * Returns clustered results at zoom=7 — we flatten the clusters to get * individual church records with id, name, lat, lng, city, street. */ async function fetchAllChurches(): Promise { console.log('Fetching all churches from allData endpoint...'); const params = new URLSearchParams(); params.append('institutionTypes', "'KOSTEL'"); params.append('latitude', String(CZ_CENTER_LAT)); params.append('longitude', String(CZ_CENTER_LNG)); params.append('zoom', String(CZ_ZOOM)); const data = await fetchWithRetry(`${BASE_URL}/index.php/apiWeb/allData`, { method: 'POST', body: params, headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, }); if (!data) { console.error('Failed to fetch allData'); return []; } const churches: BohosluzbyChurch[] = []; const kostelData = data["'KOSTEL'"] || []; for (const cluster of kostelData) { // Add the cluster representative churches.push({ id: cluster.id, name: cluster.name, street: cluster.street || null, city: cluster.city || null, psc: cluster.psc || null, latitude: parseFloat(cluster.latitude), longitude: parseFloat(cluster.longitude), type: cluster.type || 'KOSTEL', }); // Add churches from the indices array (sub-items in the cluster) if (Array.isArray(cluster.indices)) { for (const sub of cluster.indices) { churches.push({ id: sub.id, name: sub.name, street: sub.street || null, city: sub.city || null, psc: sub.psc || null, latitude: parseFloat(sub.latitude), longitude: parseFloat(sub.longitude), type: sub.type || 'KOSTEL', }); } } } console.log(`Fetched ${churches.length} churches from allData`); return churches; } /** * Fetch mass schedule details for a single church. * Returns parsed regular mass schedules. */ async function fetchChurchDetail(churchId: string): Promise { const data = await fetchWithRetry(`${BASE_URL}/index.php/apiWeb/detailById?id=${churchId}`); if (!data || !data.church) return []; const schedules: BohosluzbySchedule[] = []; const regular = data.church.regular || []; for (const entry of regular) { // Only import "mše sv." (Holy Mass) entries if (entry.chst_name && !entry.chst_name.includes('mše')) continue; const time = entry.cas; // Already in HH:MM format if (!time) continue; // Parse periodic_days: "12345" = Mon-Fri, "6" = Sat, "7" = Sun // Convert to our dayOfWeek: 0=Sun, 1=Mon, ..., 6=Sat const periodicDays = entry.periodic_days || ''; for (const dayChar of periodicDays) { const bohosluzbyDay = parseInt(dayChar); if (isNaN(bohosluzbyDay)) continue; // bohosluzby: 1=Mon, 2=Tue, ..., 6=Sat, 7=Sun // Our format: 0=Sun, 1=Mon, ..., 6=Sat const dayOfWeek = bohosluzbyDay === 7 ? 0 : bohosluzbyDay; const key = `${dayOfWeek}:${time}`; // Deduplicate within this church if (!schedules.some(s => `${s.dayOfWeek}:${s.time}` === key)) { schedules.push({ dayOfWeek, time, language: entry.chsl_name || 'česky', type: entry.chst_name || 'mše sv.', note: entry.note || null, }); } } } return schedules; } // ─── Database Operations ───────────────────────────────────────────────────── async function loadExistingCzechChurches(): Promise { console.log('Loading existing Czech churches for deduplication...'); const churches = await prisma.church.findMany({ where: { country: 'CZ' }, select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, source: true, website: true, phone: true, address: true, }, }); console.log(`Loaded ${churches.length} existing Czech churches`); return churches; } // ─── Import Logic ──────────────────────────────────────────────────────────── async function processChurch( church: BohosluzbyChurch, existingChurches: ExistingChurch[], dryRun: boolean, stats: ImportStats, ): Promise { if (church.latitude === 0 && church.longitude === 0) { stats.churchesSkipped++; return; } // Fetch mass schedules let schedules: BohosluzbySchedule[] = []; if (!dryRun) { schedules = await fetchChurchDetail(church.id); stats.detailsFetched++; } const candidate = { name: church.name, lat: church.latitude, lng: church.longitude, bohosluzbyId: church.id, }; const duplicate = findDuplicateChurch(candidate, existingChurches); if (dryRun) { if (duplicate) { stats.churchesMatched++; } else { stats.churchesCreated++; } return; } if (duplicate) { stats.churchesMatched++; const updateData: Record = { bohosluzbyId: church.id }; if (!duplicate.address && church.street) updateData.address = church.street; try { await prisma.church.update({ where: { id: duplicate.id }, data: updateData, }); } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; return; } throw error; } if (schedules.length > 0) { try { await prisma.$transaction(async (tx) => { await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } }); await tx.massSchedule.createMany({ data: schedules.map((s) => ({ churchId: duplicate.id, dayOfWeek: s.dayOfWeek, time: s.time, language: 'Czech', })), }); await tx.church.update({ where: { id: duplicate.id }, data: { lastScrapedAt: new Date() }, }); }); stats.schedulesCreated += schedules.length; } catch (error) { stats.errors++; console.error(` Error saving schedules for ${church.id}: ${error instanceof Error ? error.message : error}`); } } } else { try { const newChurch = await prisma.church.create({ data: { name: church.name, latitude: church.latitude, longitude: church.longitude, address: church.street, zip: church.psc, city: church.city, country: 'CZ', bohosluzbyId: church.id, source: 'bohosluzby', websiteLanguage: 'cs', }, }); stats.churchesCreated++; existingChurches.push({ id: newChurch.id, name: church.name, latitude: church.latitude, longitude: church.longitude, osmId: null, baiduId: null, masstimesId: null, orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, bohosluzbyId: church.id, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, source: 'bohosluzby', website: null, phone: null, address: church.street, }); if (schedules.length > 0) { await prisma.massSchedule.createMany({ data: schedules.map((s) => ({ churchId: newChurch.id, dayOfWeek: s.dayOfWeek, time: s.time, language: 'Czech', })), }); await prisma.church.update({ where: { id: newChurch.id }, data: { lastScrapedAt: new Date() }, }); stats.schedulesCreated += schedules.length; } } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; return; } stats.errors++; console.error(` Error creating ${church.id}: ${error instanceof Error ? error.message : error}`); } } } // ─── CLI ───────────────────────────────────────────────────────────────────── function parseArgs(): CLIArgs { const args = process.argv.slice(2); const result: CLIArgs = { all: false, dryRun: false }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case '--all': result.all = true; break; case '--dry-run': result.dryRun = true; break; case '--resume-from': result.resumeFrom = parseInt(args[++i]); break; case '--id': result.churchId = args[++i]; break; case '--job-id': result.jobId = args[++i]; break; case '--help': case '-h': console.log(` Usage: npx tsx scripts/import-bohosluzby.ts [options] Options: --all Import all churches --id Import a single church by bohosluzby ID --dry-run No database writes, just report what would happen --resume-from Skip first N churches --job-id Background job tracking ID --help, -h Show this help message Examples: npx tsx scripts/import-bohosluzby.ts --id 10009 --dry-run npx tsx scripts/import-bohosluzby.ts --all --dry-run npx tsx scripts/import-bohosluzby.ts --all `); process.exit(0); } } if (!result.all && !result.churchId) { console.error('Error: specify --all or --id '); process.exit(1); } return result; } function formatDuration(ms: number): string { const seconds = Math.floor(ms / 1000); const minutes = Math.floor(seconds / 60); const hours = Math.floor(minutes / 60); if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`; if (minutes > 0) return `${minutes}m ${seconds % 60}s`; return `${seconds}s`; } // ─── Main ──────────────────────────────────────────────────────────────────── async function main() { const args = parseArgs(); const startTime = Date.now(); console.log('\n' + '='.repeat(70)); console.log('BOHOSLUZBY.CZ (CZECH REPUBLIC) IMPORTER'); console.log('='.repeat(70)); console.log(`Mode: ${args.churchId ? `Church ID ${args.churchId}` : 'All churches'}`); console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`); if (args.resumeFrom) console.log(`Resume from: church index ${args.resumeFrom}`); console.log(`Time: ${new Date().toISOString()}`); console.log('='.repeat(70) + '\n'); if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: 'running', startedAt: new Date() }, }); } catch { /* Job might not exist */ } } const stats: ImportStats = { churchesFetched: 0, detailsFetched: 0, churchesMatched: 0, churchesCreated: 0, churchesSkipped: 0, schedulesCreated: 0, errors: 0, }; const existingChurches = await loadExistingCzechChurches(); let churches: BohosluzbyChurch[]; if (args.churchId) { // Single church mode — create a minimal record and fetch detail churches = [{ id: args.churchId, name: `Church ${args.churchId}`, street: null, city: null, psc: null, latitude: 0, longitude: 0, type: 'KOSTEL', }]; // Fetch detail to get actual data const detail = await fetchWithRetry(`${BASE_URL}/index.php/apiWeb/detailById?id=${args.churchId}`); if (detail?.church?.institution?.[0]) { const inst = detail.church.institution[0]; churches[0].name = inst.name || churches[0].name; churches[0].street = inst.street || null; churches[0].city = inst.city || null; churches[0].latitude = parseFloat(inst.latitude) || 0; churches[0].longitude = parseFloat(inst.longitude) || 0; } } else { churches = await fetchAllChurches(); } stats.churchesFetched = churches.length; if (args.resumeFrom) { churches = churches.slice(args.resumeFrom); console.log(`Resuming from index ${args.resumeFrom} (${churches.length} remaining)\n`); } console.log(`Processing ${churches.length} churches\n`); for (let i = 0; i < churches.length; i++) { const church = churches[i]; if (i % 100 === 0) { const elapsed = formatDuration(Date.now() - startTime); console.log(`[${i + 1}/${churches.length}] Processing ${church.name} (${church.id}) [${elapsed} elapsed]`); } try { await processChurch(church, existingChurches, args.dryRun, stats); } catch (error) { stats.errors++; console.error(` ERROR processing church ${church.id}: ${error instanceof Error ? error.message : error}`); } } const totalTime = Date.now() - startTime; console.log('\n' + '='.repeat(70)); console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`); console.log('='.repeat(70)); console.log(`Churches fetched: ${stats.churchesFetched}`); console.log(`Details fetched: ${stats.detailsFetched}`); console.log(` Matched (existing): ${stats.churchesMatched}`); console.log(` Created (new): ${stats.churchesCreated}`); console.log(` Skipped: ${stats.churchesSkipped}`); console.log(`Schedules created: ${stats.schedulesCreated}`); console.log(`Errors: ${stats.errors}`); console.log(`Total time: ${formatDuration(totalTime)}`); console.log(`HTTP requests: ${requestCount}`); console.log('='.repeat(70) + '\n'); if (args.jobId) { try { await prisma.backgroundJob.update({ where: { id: args.jobId }, data: { status: stats.errors > 0 ? 'completed_with_errors' : 'completed', completedAt: new Date(), processed: stats.churchesFetched, succeeded: stats.churchesCreated + stats.churchesMatched, failed: stats.errors, itemsFound: stats.schedulesCreated, }, }); } catch { /* Ignore */ } } } main() .catch((error) => { console.error('Fatal error:', error); process.exit(1); }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });