([\s\S]*?)<\/tr>/gi);
+ if (!rows) return schedules;
+
+ for (const row of rows) {
+ // Extract all | cells
+ const cells = row.match(/ | ([\s\S]*?)<\/td>/gi);
+ if (!cells) continue;
+
+ for (let colIndex = 0; colIndex < cells.length && colIndex < 7; colIndex++) {
+ const dayOfWeek = colIndex; // 0=Sun, 1=Mon, ..., 6=Sat
+
+ // Extract time from 5:00 AM - 6:00 AM
+ const timeMatch = cells[colIndex].match(/\s*(\d{1,2}:\d{2}\s*[AP]M)/i);
+ if (!timeMatch) continue;
+
+ const time = convertTo24Hour(timeMatch[1].trim());
+ if (!time) continue;
+
+ const key = `${dayOfWeek}:${time}`;
+ if (seen.has(key)) continue;
+ seen.add(key);
+
+ schedules.push({ dayOfWeek, time });
+ }
+ }
+
+ return schedules;
+}
+
+function convertTo24Hour(timeStr: string): string | null {
+ // "5:00 AM" → "05:00", "6:30 PM" → "18:30"
+ const match = timeStr.match(/^(\d{1,2}):(\d{2})\s*(AM|PM)$/i);
+ if (!match) return null;
+
+ let hours = parseInt(match[1]);
+ const minutes = match[2];
+ const period = match[3].toUpperCase();
+
+ if (period === 'AM' && hours === 12) hours = 0;
+ if (period === 'PM' && hours !== 12) hours += 12;
+
+ return `${String(hours).padStart(2, '0')}:${minutes}`;
+}
+
+function parseCoordinates(html: string): { lat: number; lng: number } | null {
+ // Coordinates in JS: ms.ui.church.params.lat = '14.598815'
+ const latMatch = html.match(/ms\.ui\.church\.params\.lat\s*=\s*'([^']+)'/);
+ const lngMatch = html.match(/ms\.ui\.church\.params\.lng\s*=\s*'([^']+)'/);
+
+ if (!latMatch || !lngMatch) return null;
+
+ const lat = parseFloat(latMatch[1]);
+ const lng = parseFloat(lngMatch[1]);
+
+ if (isNaN(lat) || isNaN(lng) || lat === 0 || lng === 0) return null;
+
+ return { lat, lng };
+}
+
+// ─── Database Operations ─────────────────────────────────────────────────────
+
+async function loadExistingPhilippineChurches(): Promise {
+ console.log('Loading existing Philippine churches for deduplication...');
+ const churches = await prisma.church.findMany({
+ where: { country: 'PH' },
+ select: {
+ id: true,
+ name: true,
+ latitude: true,
+ longitude: true,
+ osmId: true,
+ baiduId: true,
+ masstimesId: true,
+ orarimesseId: true,
+ massSchedulesPhId: true,
+ philmassId: true,
+ horariosMisasId: true,
+ mszeInfoId: true,
+ weekdayMassesId: true,
+ messesInfoId: true,
+ bohosluzbyId: true,
+ miserendId: true,
+ kerknetId: true,
+ gottesdienstzeitenId: true,
+ discovermassId: true,
+ source: true,
+ website: true,
+ phone: true,
+ address: true,
+ },
+ });
+ console.log(`Loaded ${churches.length} existing Philippine churches`);
+ return churches;
+}
+
+// ─── Import Logic ────────────────────────────────────────────────────────────
+
+async function processChurch(
+ sitemapEntry: SitemapChurch,
+ existingChurches: ExistingChurch[],
+ dryRun: boolean,
+ skipSchedules: boolean,
+ stats: ImportStats,
+): Promise {
+ stats.churchesFound++;
+
+ // Fetch church page
+ const churchHtml = await fetchPage(sitemapEntry.url);
+ if (!churchHtml) {
+ stats.errors++;
+ return;
+ }
+
+ const parsed = parseChurchPage(churchHtml);
+ if (!parsed.name) {
+ console.log(` Skipping ${sitemapEntry.id}: no name found`);
+ stats.churchesSkipped++;
+ return;
+ }
+
+ // Fetch coordinates from map page
+ let coords: { lat: number; lng: number } | null = null;
+ if (parsed.mapUrl) {
+ const mapHtml = await fetchPage(parsed.mapUrl);
+ if (mapHtml) {
+ coords = parseCoordinates(mapHtml);
+ }
+ }
+
+ if (!coords) {
+ console.log(` Skipping ${sitemapEntry.id} (${parsed.name}): no coordinates`);
+ stats.churchesSkipped++;
+ return;
+ }
+
+ // Parse schedule
+ const schedules = skipSchedules ? [] : parseScheduleTable(churchHtml);
+
+ // Build candidate for dedup
+ const candidate = {
+ name: parsed.name,
+ lat: coords.lat,
+ lng: coords.lng,
+ massSchedulesPhId: sitemapEntry.id,
+ };
+
+ const duplicate = findDuplicateChurch(candidate, existingChurches);
+
+ if (dryRun) {
+ if (duplicate) {
+ stats.churchesMatched++;
+ console.log(` [MATCH] ${sitemapEntry.id}: "${parsed.name}" → existing "${duplicate.name}" (${duplicate.id})`);
+ } else {
+ stats.churchesCreated++;
+ console.log(` [NEW] ${sitemapEntry.id}: "${parsed.name}" at ${coords.lat},${coords.lng}`);
+ }
+ if (schedules.length > 0) {
+ stats.schedulesProcessed++;
+ stats.massSchedulesCreated += schedules.length;
+ }
+ return;
+ }
+
+ if (duplicate) {
+ // Update existing church
+ stats.churchesMatched++;
+ const updateData: Record = {
+ massSchedulesPhId: sitemapEntry.id,
+ };
+
+ if (!duplicate.address && parsed.address) updateData.address = parsed.address;
+ if (!duplicate.phone && parsed.phone) updateData.phone = parsed.phone;
+
+ // Fill city/state from breadcrumbs
+ const dbRecord = await prisma.church.findUnique({
+ where: { id: duplicate.id },
+ select: { city: true, state: true },
+ });
+ if (dbRecord && !dbRecord.city && parsed.city) updateData.city = parsed.city;
+ if (dbRecord && !dbRecord.state && parsed.region) updateData.state = parsed.region;
+
+ try {
+ await prisma.church.update({
+ where: { id: duplicate.id },
+ data: updateData,
+ });
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ return;
+ }
+ throw error;
+ }
+
+ // Replace mass schedules
+ if (schedules.length > 0 && !skipSchedules) {
+ try {
+ await prisma.$transaction(async (tx) => {
+ await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } });
+ await tx.massSchedule.createMany({
+ data: schedules.map((s) => ({
+ churchId: duplicate.id,
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: 'English',
+ })),
+ });
+ await tx.church.update({
+ where: { id: duplicate.id },
+ data: { lastScrapedAt: new Date() },
+ });
+ });
+ stats.schedulesProcessed++;
+ stats.massSchedulesCreated += schedules.length;
+ } catch (error) {
+ stats.errors++;
+ console.error(` Error saving schedules for ${sitemapEntry.id}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+ } else {
+ // Create new church
+ try {
+ const newChurch = await prisma.church.create({
+ data: {
+ name: parsed.name,
+ latitude: coords.lat,
+ longitude: coords.lng,
+ address: parsed.address,
+ city: parsed.city || null,
+ state: parsed.region || null,
+ country: 'PH',
+ phone: parsed.phone,
+ hasWebsite: false,
+ massSchedulesPhId: sitemapEntry.id,
+ source: 'mass-schedules-ph',
+ },
+ });
+ stats.churchesCreated++;
+
+ // Add to in-memory array for within-run dedup
+ existingChurches.push({
+ id: newChurch.id,
+ name: parsed.name,
+ latitude: coords.lat,
+ longitude: coords.lng,
+ osmId: null,
+ baiduId: null,
+ masstimesId: null,
+ orarimesseId: null,
+ massSchedulesPhId: sitemapEntry.id,
+ philmassId: null,
+ horariosMisasId: null,
+ mszeInfoId: null,
+ weekdayMassesId: null,
+ messesInfoId: null,
+ bohosluzbyId: null,
+ miserendId: null,
+ kerknetId: null,
+ gottesdienstzeitenId: null,
+ discovermassId: null,
+ source: 'mass-schedules-ph',
+ website: null,
+ phone: parsed.phone,
+ address: parsed.address,
+ });
+
+ // Create mass schedules
+ if (schedules.length > 0 && !skipSchedules) {
+ await prisma.massSchedule.createMany({
+ data: schedules.map((s) => ({
+ churchId: newChurch.id,
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: 'English',
+ })),
+ });
+ await prisma.church.update({
+ where: { id: newChurch.id },
+ data: { lastScrapedAt: new Date() },
+ });
+ stats.schedulesProcessed++;
+ stats.massSchedulesCreated += schedules.length;
+ }
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ return;
+ }
+ throw error;
+ }
+ }
+}
+
+// ─── CLI ─────────────────────────────────────────────────────────────────────
+
+function parseArgs(): CLIArgs {
+ const args = process.argv.slice(2);
+ const result: CLIArgs = {
+ all: false,
+ dryRun: false,
+ skipSchedules: false,
+ };
+
+ for (let i = 0; i < args.length; i++) {
+ switch (args[i]) {
+ case '--all':
+ result.all = true;
+ break;
+ case '--church-id':
+ result.churchId = args[++i];
+ break;
+ case '--dry-run':
+ result.dryRun = true;
+ break;
+ case '--skip-schedules':
+ result.skipSchedules = true;
+ break;
+ case '--resume-from':
+ result.resumeFrom = parseInt(args[++i]);
+ break;
+ case '--job-id':
+ result.jobId = args[++i];
+ break;
+ case '--help':
+ case '-h':
+ console.log(`
+Usage: npx tsx scripts/import-mass-schedules-ph.ts [options]
+
+Options:
+ --all Import all churches from sitemap
+ --church-id Import a single church by ID (e.g. "34")
+ --dry-run No database writes, just report what would happen
+ --skip-schedules Skip mass schedule import (churches only)
+ --resume-from Skip churches with ID less than this value
+ --job-id Background job tracking ID
+ --help, -h Show this help message
+
+Examples:
+ npx tsx scripts/import-mass-schedules-ph.ts --church-id 34 --dry-run
+ npx tsx scripts/import-mass-schedules-ph.ts --all
+ npx tsx scripts/import-mass-schedules-ph.ts --all --skip-schedules
+ npx tsx scripts/import-mass-schedules-ph.ts --all --resume-from 500
+`);
+ process.exit(0);
+ }
+ }
+
+ if (!result.all && !result.churchId) {
+ console.error('Error: specify --all or --church-id ');
+ process.exit(1);
+ }
+
+ return result;
+}
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function formatDuration(ms: number): string {
+ const seconds = Math.floor(ms / 1000);
+ const minutes = Math.floor(seconds / 60);
+ const hours = Math.floor(minutes / 60);
+ if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
+ if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
+ return `${seconds}s`;
+}
+
+// ─── Main ────────────────────────────────────────────────────────────────────
+
+async function main() {
+ const args = parseArgs();
+ const startTime = Date.now();
+
+ console.log('\n' + '='.repeat(70));
+ console.log('MASS-SCHEDULES.COM (PHILIPPINES) IMPORTER');
+ console.log('='.repeat(70));
+ console.log(`Mode: ${args.all ? 'All churches from sitemap' : `Single church: ${args.churchId}`}`);
+ console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
+ console.log(`Skip schedules: ${args.skipSchedules ? 'YES' : 'NO'}`);
+ if (args.resumeFrom) console.log(`Resume from ID: ${args.resumeFrom}`);
+ console.log(`Time: ${new Date().toISOString()}`);
+ console.log('='.repeat(70) + '\n');
+
+ // Update background job status if provided
+ if (args.jobId) {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: args.jobId },
+ data: { status: 'running', startedAt: new Date() },
+ });
+ } catch {
+ // Job might not exist yet
+ }
+ }
+
+ // Load existing Philippine churches for dedup
+ const existingChurches = await loadExistingPhilippineChurches();
+
+ // Build church list: skip sitemap for single-church mode
+ let churchesToProcess: SitemapChurch[];
+ if (args.churchId) {
+ // Single church: construct URL directly, no sitemap needed
+ churchesToProcess = [{
+ id: args.churchId,
+ slug: 'church',
+ url: `${SITE_BASE}/catholic-church/${args.churchId}/church.html`,
+ }];
+ console.log(`Single church mode: ID ${args.churchId}\n`);
+ } else {
+ // Full mode: fetch sitemap
+ const allChurches = await fetchChurchUrlsFromSitemap();
+ console.log(`Found ${allChurches.length} unique church URLs in sitemap\n`);
+ churchesToProcess = allChurches;
+ }
+
+ // Handle --resume-from
+ if (args.resumeFrom) {
+ const before = churchesToProcess.length;
+ churchesToProcess = churchesToProcess.filter((c) => parseInt(c.id) >= args.resumeFrom!);
+ console.log(`Resuming from ID ${args.resumeFrom} (skipping ${before - churchesToProcess.length} churches)\n`);
+ }
+
+ const stats: ImportStats = {
+ churchesFound: 0,
+ churchesMatched: 0,
+ churchesCreated: 0,
+ churchesSkipped: 0,
+ schedulesProcessed: 0,
+ massSchedulesCreated: 0,
+ errors: 0,
+ };
+
+ // Process each church
+ for (let i = 0; i < churchesToProcess.length; i++) {
+ const church = churchesToProcess[i];
+ const elapsed = formatDuration(Date.now() - startTime);
+ console.log(`[${i + 1}/${churchesToProcess.length}] Church ID ${church.id} [${elapsed} elapsed]`);
+
+ try {
+ await processChurch(church, existingChurches, args.dryRun, args.skipSchedules, stats);
+ } catch (error) {
+ stats.errors++;
+ console.error(` ERROR processing church ${church.id}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+
+ // Print summary
+ const totalTime = Date.now() - startTime;
+ console.log('\n' + '='.repeat(70));
+ console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
+ console.log('='.repeat(70));
+ console.log(`Churches found: ${stats.churchesFound}`);
+ console.log(` Matched (existing): ${stats.churchesMatched}`);
+ console.log(` Created (new): ${stats.churchesCreated}`);
+ console.log(` Skipped: ${stats.churchesSkipped}`);
+ console.log(`Schedules processed: ${stats.schedulesProcessed}`);
+ console.log(`Mass schedules created: ${stats.massSchedulesCreated}`);
+ console.log(`Errors: ${stats.errors}`);
+ console.log(`Total time: ${formatDuration(totalTime)}`);
+ console.log(`HTTP requests: ${requestCount}`);
+ console.log('='.repeat(70) + '\n');
+
+ // Update background job
+ if (args.jobId) {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: args.jobId },
+ data: {
+ status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
+ completedAt: new Date(),
+ result: JSON.stringify(stats),
+ },
+ });
+ } catch {
+ // Ignore
+ }
+ }
+}
+
+main()
+ .catch((error) => {
+ console.error('Fatal error:', error);
+ process.exit(1);
+ })
+ .finally(async () => {
+ await prisma.$disconnect();
+ await pool.end();
+ });
diff --git a/scripts/import-masstimes-api.ts b/scripts/import-masstimes-api.ts
new file mode 100644
index 0000000..bcc8c33
--- /dev/null
+++ b/scripts/import-masstimes-api.ts
@@ -0,0 +1,672 @@
+#!/usr/bin/env tsx
+/**
+ * Import Catholic churches and mass schedules globally from masstimes.org API
+ *
+ * masstimes.org has ~121,000 churches worldwide. This script queries their
+ * geo-search API with a grid of coordinates covering world landmass, then
+ * deduplicates and imports the results.
+ *
+ * API: GET https://masstimes.org/Churchs/?lat={lat}&long={lng}&pg={page}
+ * - Requires Referer header
+ * - Returns 30 results per page within 100-mile (~160km) radius
+ * - Paginate until empty array
+ *
+ * Grid strategy:
+ * - 2.5° latitude spacing (~278km), longitude adjusted for latitude
+ * - Continental bounding boxes to skip oceans
+ * - 100-mile radius means ~322km diameter — 2.5° spacing ensures overlap
+ *
+ * Usage:
+ * npx tsx scripts/import-masstimes-api.ts --all
+ * npx tsx scripts/import-masstimes-api.ts --all --dry-run
+ * npx tsx scripts/import-masstimes-api.ts --region europe
+ * npx tsx scripts/import-masstimes-api.ts --all --skip-us
+ * npx tsx scripts/import-masstimes-api.ts --all --job-id {uuid}
+ */
+
+import dotenv from 'dotenv';
+import path from 'path';
+
+dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
+dotenv.config({ path: path.resolve(process.cwd(), '.env') });
+
+import { Pool } from 'pg';
+import { PrismaPg } from '@prisma/adapter-pg';
+import { PrismaClient } from '@prisma/client';
+
+const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
+console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
+const pool = new Pool({
+ connectionString: dbUrl,
+ ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
+});
+const adapter = new PrismaPg(pool);
+const prisma = new PrismaClient({ adapter });
+
+import { findDuplicateChurch } from '../src/lib/church-matcher';
+import type { ExistingChurch } from '../src/lib/church-matcher';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const API_BASE = 'https://masstimes.org/Churchs/';
+const REFERER = 'https://masstimes.org/map';
+const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
+const RATE_LIMIT_MS = 2000; // 2 seconds between requests — respectful rate
+const PAGE_SIZE = 30;
+const LAT_SPACING = 2.5; // degrees (~278km)
+const TARGET_LNG_SPACING_KM = 250; // target spacing in km
+
+// Country name → ISO code mapping for masstimes country names
+const COUNTRY_CODE_MAP: Record = {
+ 'united states': 'US', 'canada': 'CA', 'mexico': 'MX',
+ 'united kingdom': 'GB', 'ireland': 'IE', 'france': 'FR', 'germany': 'DE',
+ 'spain': 'ES', 'italy': 'IT', 'portugal': 'PT', 'netherlands': 'NL',
+ 'belgium': 'BE', 'luxembourg': 'LU', 'switzerland': 'CH', 'austria': 'AT',
+ 'poland': 'PL', 'czech republic': 'CZ', 'czechia': 'CZ', 'slovakia': 'SK',
+ 'hungary': 'HU', 'croatia': 'HR', 'slovenia': 'SI', 'romania': 'RO',
+ 'bulgaria': 'BG', 'serbia': 'RS', 'bosnia and herzegovina': 'BA',
+ 'montenegro': 'ME', 'north macedonia': 'MK', 'albania': 'AL', 'kosovo': 'XK',
+ 'greece': 'GR', 'cyprus': 'CY', 'malta': 'MT', 'denmark': 'DK',
+ 'sweden': 'SE', 'norway': 'NO', 'finland': 'FI', 'iceland': 'IS',
+ 'estonia': 'EE', 'latvia': 'LV', 'lithuania': 'LT',
+ 'ukraine': 'UA', 'russia': 'RU', 'belarus': 'BY', 'moldova': 'MD',
+ 'georgia': 'GE', 'armenia': 'AM', 'azerbaijan': 'AZ',
+ 'turkey': 'TR', 'israel': 'IL', 'jordan': 'JO', 'lebanon': 'LB',
+ 'egypt': 'EG', 'morocco': 'MA', 'tunisia': 'TN', 'algeria': 'DZ',
+ 'india': 'IN', 'sri lanka': 'LK', 'pakistan': 'PK', 'bangladesh': 'BD',
+ 'nepal': 'NP', 'myanmar': 'MM', 'thailand': 'TH', 'vietnam': 'VN',
+ 'cambodia': 'KH', 'laos': 'LA', 'malaysia': 'MY', 'singapore': 'SG',
+ 'indonesia': 'ID', 'philippines': 'PH', 'china': 'CN', 'japan': 'JP',
+ 'south korea': 'KR', 'korea, south': 'KR', 'taiwan': 'TW',
+ 'hong kong': 'HK', 'macau': 'MO', 'mongolia': 'MN',
+ 'australia': 'AU', 'new zealand': 'NZ', 'fiji': 'FJ',
+ 'papua new guinea': 'PG', 'samoa': 'WS', 'tonga': 'TO', 'guam': 'GU',
+ 'nigeria': 'NG', 'ghana': 'GH', 'kenya': 'KE', 'tanzania': 'TZ',
+ 'uganda': 'UG', 'south africa': 'ZA', 'cameroon': 'CM', 'senegal': 'SN',
+ 'ethiopia': 'ET', 'madagascar': 'MG', 'mozambique': 'MZ',
+ 'zambia': 'ZM', 'zimbabwe': 'ZW', 'malawi': 'MW', 'rwanda': 'RW',
+ 'burundi': 'BI', 'congo, democratic republic of the': 'CD',
+ 'congo, republic of the': 'CG', "côte d'ivoire": 'CI', 'ivory coast': 'CI',
+ 'burkina faso': 'BF', 'mali': 'ML', 'niger': 'NE', 'chad': 'TD',
+ 'central african republic': 'CF', 'gabon': 'GA', 'equatorial guinea': 'GQ',
+ 'angola': 'AO', 'namibia': 'NA', 'botswana': 'BW', 'lesotho': 'LS',
+ 'eswatini': 'SZ', 'swaziland': 'SZ', 'mauritius': 'MU',
+ 'brazil': 'BR', 'argentina': 'AR', 'colombia': 'CO', 'peru': 'PE',
+ 'chile': 'CL', 'venezuela': 'VE', 'ecuador': 'EC', 'bolivia': 'BO',
+ 'paraguay': 'PY', 'uruguay': 'UY', 'guyana': 'GY', 'suriname': 'SR',
+ 'trinidad and tobago': 'TT', 'jamaica': 'JM', 'barbados': 'BB',
+ 'bahamas': 'BS', 'bahamas, the': 'BS', 'haiti': 'HT',
+ 'dominican republic': 'DO', 'cuba': 'CU', 'puerto rico': 'PR',
+ 'guatemala': 'GT', 'honduras': 'HN', 'el salvador': 'SV',
+ 'nicaragua': 'NI', 'costa rica': 'CR', 'panama': 'PA', 'belize': 'BZ',
+ 'grenada': 'GD', 'saint lucia': 'LC', 'dominica': 'DM',
+ 'saint vincent and the grenadines': 'VC', 'antigua and barbuda': 'AG',
+ 'saint kitts and nevis': 'KN', 'bermuda': 'BM', 'cayman islands': 'KY',
+ 'aruba': 'AW', 'curaçao': 'CW', 'curacao': 'CW',
+ 'united arab emirates': 'AE', 'saudi arabia': 'SA', 'qatar': 'QA',
+ 'bahrain': 'BH', 'kuwait': 'KW', 'oman': 'OM', 'iraq': 'IQ',
+ 'iran': 'IR', 'afghanistan': 'AF',
+ 'kazakhstan': 'KZ', 'uzbekistan': 'UZ', 'kyrgyzstan': 'KG',
+ 'tajikistan': 'TJ', 'turkmenistan': 'TM',
+ 'liechtenstein': 'LI', 'monaco': 'MC', 'andorra': 'AD', 'san marino': 'SM',
+ 'vatican city': 'VA', 'holy see (vatican city)': 'VA',
+ 'east timor': 'TL', 'timor-leste': 'TL',
+};
+
+// Continental bounding boxes (lat_min, lat_max, lng_min, lng_max)
+const REGIONS: Record> = {
+ 'north-america': [[7, 72, -170, -50]],
+ 'central-america': [[7, 24, -120, -60]],
+ 'south-america': [[-56, 13, -82, -34]],
+ 'europe': [[35, 72, -12, 45]],
+ 'eastern-europe': [[40, 70, 20, 60]],
+ 'africa': [[-36, 38, -20, 55]],
+ 'middle-east': [[12, 42, 25, 65]],
+ 'south-asia': [[5, 38, 60, 98]],
+ 'east-asia': [[18, 55, 95, 150]],
+ 'southeast-asia': [[-12, 22, 92, 142]],
+ 'oceania': [[-48, -8, 110, 180], [-22, 0, 160, 180]],
+ 'central-asia': [[35, 55, 45, 90]],
+};
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+interface MasstimesChurch {
+ id: string;
+ name: string;
+ latitude: string;
+ longitude: string;
+ church_address_street_address: string;
+ church_address_city_name: string;
+ church_address_providence_name: string;
+ church_address_postal_code: string;
+ church_address_country_territory_name: string;
+ church_address_county: string | null;
+ diocese_name: string;
+ phone_number: string;
+ email: string;
+ url: string;
+ pastors_name: string;
+ church_worship_times: MasstimesWorshipTime[];
+ distance: string;
+ wheel_chair_access: boolean;
+}
+
+interface MasstimesWorshipTime {
+ day_of_week: string;
+ time_start: string;
+ time_end: string;
+ language: string | null;
+ service_typename: string;
+ comment: string;
+ is_perpetual: boolean;
+}
+
+interface ImportStats {
+ gridPoints: number;
+ apiRequests: number;
+ churchesDiscovered: number;
+ churchesMatched: number;
+ churchesCreated: number;
+ churchesSkipped: number;
+ massSchedulesCreated: number;
+ errors: number;
+}
+
+interface CLIArgs {
+ all: boolean;
+ region?: string;
+ dryRun: boolean;
+ skipUs: boolean;
+ resumeFrom: number;
+ jobId?: string;
+}
+
+// ─── CLI ─────────────────────────────────────────────────────────────────────
+
+function parseArgs(): CLIArgs {
+ const args = process.argv.slice(2);
+ const result: CLIArgs = { all: false, dryRun: false, skipUs: false, resumeFrom: 0 };
+
+ for (let i = 0; i < args.length; i++) {
+ switch (args[i]) {
+ case '--all': result.all = true; break;
+ case '--region': result.region = args[++i]; break;
+ case '--dry-run': result.dryRun = true; break;
+ case '--skip-us': result.skipUs = true; break;
+ case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break;
+ case '--job-id': result.jobId = args[++i]; break;
+ case '--help':
+ console.log(`Usage: npx tsx scripts/import-masstimes-api.ts [options]
+ --all Query all regions globally
+ --region Query specific region: ${Object.keys(REGIONS).join(', ')}
+ --skip-us Skip US grid points (already well-covered)
+ --dry-run No database writes
+ --resume-from Skip first N grid points
+ --job-id Background job tracking`);
+ process.exit(0);
+ }
+ }
+
+ if (!result.all && !result.region) {
+ console.error('Error: specify --all or --region ');
+ process.exit(1);
+ }
+
+ return result;
+}
+
+// ─── Grid Generation ─────────────────────────────────────────────────────────
+
+function generateGridPoints(regions: string[], skipUs: boolean): Array<{ lat: number; lng: number }> {
+ const points: Array<{ lat: number; lng: number }> = [];
+ const seen = new Set();
+
+ for (const regionName of regions) {
+ const boxes = REGIONS[regionName];
+ if (!boxes) {
+ console.error(`Unknown region: ${regionName}`);
+ continue;
+ }
+
+ for (const [latMin, latMax, lngMin, lngMax] of boxes) {
+ for (let lat = latMin; lat <= latMax; lat += LAT_SPACING) {
+ // Adjust longitude spacing based on latitude (degrees get narrower)
+ const kmPerDegreeLng = 111.32 * Math.cos((lat * Math.PI) / 180);
+ const lngSpacing = kmPerDegreeLng > 0
+ ? Math.max(LAT_SPACING, TARGET_LNG_SPACING_KM / kmPerDegreeLng)
+ : LAT_SPACING;
+
+ for (let lng = lngMin; lng <= lngMax; lng += lngSpacing) {
+ const roundedLat = Math.round(lat * 10) / 10;
+ const roundedLng = Math.round(lng * 10) / 10;
+ const key = `${roundedLat},${roundedLng}`;
+
+ if (!seen.has(key)) {
+ // Skip US continental bounding box if requested
+ if (skipUs && roundedLat >= 24 && roundedLat <= 50
+ && roundedLng >= -125 && roundedLng <= -66) {
+ continue;
+ }
+ seen.add(key);
+ points.push({ lat: roundedLat, lng: roundedLng });
+ }
+ }
+ }
+ }
+ }
+
+ return points;
+}
+
+// ─── API ─────────────────────────────────────────────────────────────────────
+
+async function sleep(ms: number): Promise {
+ return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+async function fetchPage(lat: number, lng: number, page: number): Promise {
+ const url = `${API_BASE}?lat=${lat}&long=${lng}&pg=${page}`;
+ const response = await fetch(url, {
+ headers: {
+ 'Referer': REFERER,
+ 'User-Agent': USER_AGENT,
+ 'Accept': 'application/json',
+ },
+ });
+
+ if (!response.ok) {
+ if (response.status === 429) {
+ console.error(` Rate limited (429) — backing off 30s`);
+ await sleep(30000);
+ return fetchPage(lat, lng, page); // retry once
+ }
+ throw new Error(`HTTP ${response.status} for ${url}`);
+ }
+
+ return response.json() as Promise;
+}
+
+async function fetchAllForPoint(
+ lat: number,
+ lng: number,
+ stats: ImportStats,
+): Promise {
+ const allChurches: MasstimesChurch[] = [];
+ let page = 1;
+
+ while (true) {
+ stats.apiRequests++;
+ const results = await fetchPage(lat, lng, page);
+ if (results.length === 0) break;
+
+ allChurches.push(...results);
+
+ if (results.length < PAGE_SIZE) break; // last page
+ page++;
+
+ await sleep(RATE_LIMIT_MS);
+ }
+
+ return allChurches;
+}
+
+// ─── Data Conversion ─────────────────────────────────────────────────────────
+
+function resolveCountryCode(countryName: string): string {
+ if (!countryName) return 'XX';
+ const lower = countryName.trim().toLowerCase();
+ return COUNTRY_CODE_MAP[lower] || 'XX';
+}
+
+const DAY_MAP: Record = {
+ 'sunday': [0],
+ 'monday': [1],
+ 'tuesday': [2],
+ 'wednesday': [3],
+ 'thursday': [4],
+ 'friday': [5],
+ 'saturday': [6],
+ 'weekdays': [1, 2, 3, 4, 5],
+};
+
+function parseWorshipTimes(times: MasstimesWorshipTime[]): Array<{
+ dayOfWeek: number;
+ time: string;
+ language: string;
+ notes: string | null;
+ massType: string | null;
+}> {
+ const schedules: Array<{
+ dayOfWeek: number;
+ time: string;
+ language: string;
+ notes: string | null;
+ massType: string | null;
+ }> = [];
+
+ for (const wt of times) {
+ // Only import mass services (Weekend = Sun/Sat, Week Days = weekday masses)
+ if (wt.service_typename !== 'Weekend' && wt.service_typename !== 'Week Days') {
+ continue;
+ }
+
+ const timeStr = wt.time_start?.trim();
+ if (!timeStr || timeStr === '00:00:00') continue;
+
+ // Parse "HH:MM:SS" → "HH:MM"
+ const timeParts = timeStr.split(':');
+ const time24 = `${timeParts[0].padStart(2, '0')}:${timeParts[1] || '00'}`;
+
+ const language = wt.language?.trim() || 'Unknown';
+ const notes = wt.comment?.trim() || null;
+
+ const dayKey = wt.day_of_week?.trim().toLowerCase();
+ const days = DAY_MAP[dayKey];
+
+ if (days) {
+ for (const day of days) {
+ schedules.push({ dayOfWeek: day, time: time24, language, notes, massType: null });
+ }
+ }
+ }
+
+ return schedules;
+}
+
+// ─── Database ────────────────────────────────────────────────────────────────
+
+async function loadExistingChurches(): Promise {
+ console.log('Loading existing churches for deduplication...');
+ const churches = await prisma.church.findMany({
+ select: {
+ id: true,
+ name: true,
+ latitude: true,
+ longitude: true,
+ osmId: true,
+ baiduId: true,
+ masstimesId: true,
+ orarimesseId: true,
+ massSchedulesPhId: true,
+ philmassId: true,
+ horariosMisasId: true,
+ mszeInfoId: true,
+ weekdayMassesId: true,
+ messesInfoId: true,
+ bohosluzbyId: true,
+ miserendId: true,
+ kerknetId: true,
+ gottesdienstzeitenId: true,
+ discovermassId: true,
+ source: true,
+ website: true,
+ phone: true,
+ address: true,
+ country: true,
+ },
+ });
+ console.log(`Loaded ${churches.length} existing churches`);
+ return churches;
+}
+
+async function updateJobProgress(jobId: string, stats: ImportStats): Promise {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: jobId },
+ data: {
+ processed: stats.gridPoints,
+ succeeded: stats.churchesMatched + stats.churchesCreated,
+ failed: stats.errors,
+ itemsFound: stats.churchesDiscovered,
+ },
+ });
+ } catch (err) {
+ console.error(`Failed to update job progress:`, err);
+ }
+}
+
+// ─── Main Import ─────────────────────────────────────────────────────────────
+
+async function main() {
+ const args = parseArgs();
+
+ let regionNames: string[];
+ if (args.all) {
+ regionNames = Object.keys(REGIONS);
+ } else {
+ regionNames = [args.region!];
+ }
+
+ const gridPoints = generateGridPoints(regionNames, args.skipUs);
+
+ console.log(`\n${'='.repeat(70)}`);
+ console.log('MASSTIMES.ORG API GLOBAL IMPORTER');
+ console.log('='.repeat(70));
+ console.log(`Regions: ${regionNames.join(', ')}`);
+ console.log(`Grid points: ${gridPoints.length}`);
+ console.log(`Skip US: ${args.skipUs ? 'YES' : 'NO'}`);
+ console.log(`Dry run: ${args.dryRun ? 'YES' : 'NO'}`);
+ console.log(`Rate limit: ${RATE_LIMIT_MS}ms between requests`);
+ console.log(`Resume from: ${args.resumeFrom || 'start'}`);
+ const estHours = Math.round(gridPoints.length * 2 * RATE_LIMIT_MS / 1000 / 3600 * 10) / 10;
+ console.log(`Est. time: ~${estHours} hours (est. 2 pages/point avg)`);
+ console.log(`Time: ${new Date().toISOString()}`);
+ console.log('='.repeat(70));
+
+ const existingChurches = await loadExistingChurches();
+
+ // Build masstimesId lookup for fast dedup
+ const masstimesIdSet = new Set();
+ for (const c of existingChurches) {
+ if (c.masstimesId) masstimesIdSet.add(c.masstimesId);
+ }
+
+ // Track discovered IDs to deduplicate across grid points
+ const discoveredIds = new Set();
+
+ const stats: ImportStats = {
+ gridPoints: 0,
+ apiRequests: 0,
+ churchesDiscovered: 0,
+ churchesMatched: 0,
+ churchesCreated: 0,
+ churchesSkipped: 0,
+ massSchedulesCreated: 0,
+ errors: 0,
+ };
+
+ let jobId = args.jobId;
+ if (jobId) {
+ await prisma.backgroundJob.update({
+ where: { id: jobId },
+ data: { status: 'running', startedAt: new Date(), totalItems: gridPoints.length },
+ });
+ }
+
+ const startTime = Date.now();
+
+ for (let i = 0; i < gridPoints.length; i++) {
+ const { lat, lng } = gridPoints[i];
+ stats.gridPoints++;
+
+ if (i < args.resumeFrom) continue;
+
+ try {
+ const churches = await fetchAllForPoint(lat, lng, stats);
+
+ if (churches.length > 0) {
+ let newInPoint = 0;
+ for (const mc of churches) {
+ if (discoveredIds.has(mc.id)) continue;
+ discoveredIds.add(mc.id);
+ stats.churchesDiscovered++;
+
+ // Already in DB by masstimesId
+ if (masstimesIdSet.has(mc.id)) {
+ stats.churchesMatched++;
+ continue;
+ }
+
+ const churchLat = parseFloat(mc.latitude);
+ const churchLng = parseFloat(mc.longitude);
+ if (isNaN(churchLat) || isNaN(churchLng) || (churchLat === 0 && churchLng === 0)) continue;
+
+ const country = resolveCountryCode(mc.church_address_country_territory_name);
+ const address = [
+ mc.church_address_street_address,
+ mc.church_address_city_name,
+ mc.church_address_providence_name,
+ mc.church_address_postal_code,
+ ].filter(s => s?.trim()).join(', ').trim() || null;
+
+ // Proximity + name match
+ const candidate = { name: mc.name, lat: churchLat, lng: churchLng };
+ const duplicate = findDuplicateChurch(candidate, existingChurches);
+
+ if (duplicate) {
+ stats.churchesMatched++;
+ if (!args.dryRun) {
+ const updateData: Record = { masstimesId: mc.id };
+ if (!duplicate.phone && mc.phone_number?.trim()) updateData.phone = mc.phone_number.trim();
+ if (!duplicate.website && mc.url?.trim()) {
+ updateData.website = mc.url.trim();
+ updateData.hasWebsite = true;
+ }
+ if (!duplicate.address && address) updateData.address = address;
+ if (duplicate.country === 'XX' && country !== 'XX') updateData.country = country;
+
+ try {
+ await prisma.church.update({ where: { id: duplicate.id }, data: updateData });
+ masstimesIdSet.add(mc.id);
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ } else throw error;
+ }
+ }
+ continue;
+ }
+
+ // Create new church
+ if (!args.dryRun) {
+ const schedules = parseWorshipTimes(mc.church_worship_times || []);
+ try {
+ const newChurch = await prisma.church.create({
+ data: {
+ name: mc.name,
+ latitude: churchLat,
+ longitude: churchLng,
+ address,
+ city: mc.church_address_city_name?.trim() || null,
+ state: mc.church_address_providence_name?.trim() || null,
+ zip: mc.church_address_postal_code?.trim() || null,
+ country,
+ phone: mc.phone_number?.trim() || null,
+ website: mc.url?.trim() || null,
+ email: mc.email?.trim() || null,
+ hasWebsite: !!mc.url?.trim(),
+ masstimesId: mc.id,
+ source: 'masstimes',
+ diocese: mc.diocese_name?.trim() || null,
+ pastorName: mc.pastors_name?.trim() || null,
+ wheelchairAccess: mc.wheel_chair_access || false,
+ massSchedules: schedules.length > 0 ? {
+ create: schedules.map(s => ({
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: s.language,
+ notes: s.notes,
+ massType: s.massType,
+ isActive: true,
+ })),
+ } : undefined,
+ },
+ });
+
+ stats.churchesCreated++;
+ stats.massSchedulesCreated += schedules.length;
+ newInPoint++;
+ masstimesIdSet.add(mc.id);
+
+ existingChurches.push({
+ id: newChurch.id, name: mc.name,
+ latitude: churchLat, longitude: churchLng,
+ osmId: null, baiduId: null, masstimesId: mc.id,
+ orarimesseId: null, massSchedulesPhId: null,
+ philmassId: null, horariosMisasId: null,
+ mszeInfoId: null, weekdayMassesId: null,
+ messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null,
+ source: 'masstimes', website: mc.url?.trim() || null,
+ phone: mc.phone_number?.trim() || null, address, country,
+ });
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ } else {
+ stats.errors++;
+ console.error(` Error creating ${mc.name}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+ } else {
+ stats.churchesCreated++;
+ stats.massSchedulesCreated += parseWorshipTimes(mc.church_worship_times || []).length;
+ newInPoint++;
+ }
+ }
+
+ if (newInPoint > 0) {
+ console.log(` Grid ${i + 1}/${gridPoints.length} (${lat},${lng}): ${churches.length} found, ${newInPoint} new`);
+ }
+ }
+
+ await sleep(RATE_LIMIT_MS);
+ } catch (error) {
+ stats.errors++;
+ console.error(` Error at grid ${i + 1} (${lat},${lng}): ${error instanceof Error ? error.message : error}`);
+ await sleep(RATE_LIMIT_MS * 2);
+ }
+
+ // Progress every 50 points
+ if ((i + 1) % 50 === 0 || i === gridPoints.length - 1) {
+ const elapsed = (Date.now() - startTime) / 1000;
+ const rate = elapsed > 0 ? Math.round(stats.apiRequests / elapsed * 3600) : 0;
+ console.log(` Progress: ${i + 1}/${gridPoints.length} grid points, ${stats.churchesDiscovered} discovered, ${stats.churchesCreated} new, ${stats.apiRequests} API calls [${Math.round(elapsed)}s, ~${rate}/hr]`);
+ }
+
+ if (jobId && (i + 1) % 20 === 0) {
+ await updateJobProgress(jobId, stats);
+ }
+ }
+
+ if (jobId) {
+ await updateJobProgress(jobId, stats);
+ await prisma.backgroundJob.update({
+ where: { id: jobId },
+ data: { status: 'completed', completedAt: new Date() },
+ });
+ }
+
+ const elapsed = (Date.now() - startTime) / 1000;
+ console.log(`\n${'='.repeat(70)}`);
+ console.log('MASSTIMES API IMPORT SUMMARY');
+ console.log('='.repeat(70));
+ console.log(`Grid points queried: ${stats.gridPoints}`);
+ console.log(`API requests: ${stats.apiRequests}`);
+ console.log(`Churches discovered: ${stats.churchesDiscovered}`);
+ console.log(`Churches matched: ${stats.churchesMatched} (already in DB)`);
+ console.log(`Churches created: ${stats.churchesCreated}`);
+ console.log(`Churches skipped: ${stats.churchesSkipped} (duplicates)`);
+ console.log(`Mass schedules created: ${stats.massSchedulesCreated}`);
+ console.log(`Errors: ${stats.errors}`);
+ console.log(`Elapsed: ${Math.round(elapsed)}s (${(elapsed / 3600).toFixed(1)}h)`);
+ console.log('='.repeat(70));
+
+ await prisma.$disconnect();
+ await pool.end();
+}
+
+main().catch((error) => {
+ console.error(`Fatal error: ${error.message}`);
+ process.exit(1);
+});
diff --git a/scripts/import-messesinfo.ts b/scripts/import-messesinfo.ts
new file mode 100644
index 0000000..974ddac
--- /dev/null
+++ b/scripts/import-messesinfo.ts
@@ -0,0 +1,681 @@
+#!/usr/bin/env tsx
+/**
+ * Import Catholic churches and mass schedules from messes.info (France)
+ *
+ * messes.info is the official French bishops' conference (CEF) mass schedule
+ * database. It exposes a GWT-RPC API returning structured JSON with parish
+ * data including name, address, coordinates, diocese, and celebration times.
+ *
+ * The API requires no authentication. We enumerate all French dioceses using
+ * the "community:{diocese_code}" query prefix, which returns all parishes
+ * within each diocese.
+ *
+ * Import strategy:
+ * 1. Query each of ~93 diocese codes via GWT-RPC API
+ * 2. Parse response: extract localities (churches) + celebrations (mass times)
+ * 3. Deduce recurring weekly schedule from date-specific celebration entries
+ * 4. Match against existing French churches via church-matcher
+ * 5. Upsert churches and mass schedules
+ *
+ * Usage:
+ * npx tsx scripts/import-messesinfo.ts --all --dry-run
+ * npx tsx scripts/import-messesinfo.ts --all
+ * npx tsx scripts/import-messesinfo.ts --diocese pa --dry-run # Paris only
+ * npx tsx scripts/import-messesinfo.ts --all --resume-from 20
+ */
+
+import dotenv from 'dotenv';
+import path from 'path';
+
+dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
+dotenv.config({ path: path.resolve(process.cwd(), '.env') });
+
+import { Pool } from 'pg';
+import { PrismaPg } from '@prisma/adapter-pg';
+import { PrismaClient } from '@prisma/client';
+
+const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
+console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
+const pool = new Pool({
+ connectionString: dbUrl,
+ ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
+});
+const adapter = new PrismaPg(pool);
+const prisma = new PrismaClient({ adapter });
+
+import { findDuplicateChurch } from '../src/lib/church-matcher';
+import type { ExistingChurch } from '../src/lib/church-matcher';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const API_URL = 'https://messes.info/gwtRequest';
+const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
+const REQUEST_DELAY_MS = 3000;
+const RETRY_DELAY_MS = 10000;
+const MAX_RETRIES = 3;
+const RESULTS_PER_QUERY = 2000;
+
+// Diocese codes discovered from the API. Each code maps to a diocese in France.
+// The query "community:{code}" returns all parishes within that diocese.
+// Codes are 2-letter abbreviations (e.g., pa=Paris, ly=Lyon, st=Strasbourg).
+const DIOCESE_CODES = [
+ 'a', 'aa', 'ac', 'ad', 'ag', 'al', 'am', 'an', 'ar', 'au', 'av', 'ay',
+ 'ba', 'bb', 'be', 'bl', 'bm', 'bo', 'br', 'bs', 'bv', 'by',
+ 'ca', 'cb', 'cc', 'cd', 'ch', 'cl', 'cm', 'cn', 'cr', 'cs',
+ 'da', 'di', 'dj', 'dn',
+ 'et', 'ex', 'ey',
+ 'ft',
+ 'ga', 'gr',
+ 'lg', 'lh', 'li', 'lm', 'lp', 'lr', 'ls', 'lu', 'lv', 'ly',
+ 'ma', 'md', 'me', 'ml', 'mp', 'mt', 'mx',
+ 'na', 'nc', 'ni', 'nt', 'nv', 'ny',
+ 'or',
+ 'pa', 'pm', 'po', 'ps', 'pt',
+ 'qu',
+ 're', 'rn', 'ro', 'rv',
+ 'sl', 'ss', 'st', 'sz',
+ 'tl', 'to', 'ts', 'tu',
+ 'va', 'vd', 've', 'vl', 'vv',
+];
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+interface LocalityData {
+ idfixe: string;
+ name: string;
+ address: string | null;
+ city: string | null;
+ zipcode: string | null;
+ latitude: number;
+ longitude: number;
+ sector: string | null;
+ communityId: string | null;
+ localityId: string; // e.g. "75/paris-04/saint-louis-en-l-ile"
+}
+
+interface CelebrationData {
+ date: string;
+ time: string; // normalized to "HH:MM"
+ recurrenceCategory: number;
+}
+
+interface ParsedSchedule {
+ dayOfWeek: number;
+ time: string;
+}
+
+interface ImportStats {
+ diocesesProcessed: number;
+ localitiesFound: number;
+ churchesMatched: number;
+ churchesCreated: number;
+ churchesSkipped: number;
+ schedulesCreated: number;
+ errors: number;
+}
+
+interface CLIArgs {
+ all: boolean;
+ dryRun: boolean;
+ resumeFrom?: number;
+ diocese?: string;
+ jobId?: string;
+}
+
+// ─── HTTP Client ─────────────────────────────────────────────────────────────
+
+let requestCount = 0;
+
+function delay(ms: number): Promise {
+ return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+/**
+ * Convert MessesInfo time format "18h00" or "9h30" to "HH:MM" format.
+ */
+function normalizeTime(messesTime: string): string {
+ const match = messesTime.match(/^(\d{1,2})h(\d{2})$/);
+ if (match) {
+ return `${match[1].padStart(2, '0')}:${match[2]}`;
+ }
+ // Already in HH:MM format
+ if (/^\d{1,2}:\d{2}$/.test(messesTime)) {
+ const parts = messesTime.split(':');
+ return `${parts[0].padStart(2, '0')}:${parts[1]}`;
+ }
+ return messesTime;
+}
+
+async function fetchDioceseData(dioceseCode: string): Promise {
+ if (requestCount > 0) {
+ await delay(REQUEST_DELAY_MS);
+ }
+ requestCount++;
+
+ const body = JSON.stringify({
+ F: 'cef.kephas.shared.request.AppRequestFactory',
+ I: [{
+ O: 'Bzv0wi60qgwcW5aKiRKrtgNaLKo=',
+ P: [`community:${dioceseCode}`, 0, RESULTS_PER_QUERY, 1, null, '48.86:2.35', ''],
+ R: ['listCelebrationTime.locality'],
+ }],
+ });
+
+ for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+ try {
+ const response = await fetch(API_URL, {
+ method: 'POST',
+ headers: {
+ 'User-Agent': USER_AGENT,
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ },
+ body,
+ });
+
+ if (response.status === 503 || response.status === 429) {
+ if (attempt < MAX_RETRIES) {
+ console.log(` HTTP ${response.status} — retrying in ${RETRY_DELAY_MS / 1000}s (attempt ${attempt}/${MAX_RETRIES})`);
+ await delay(RETRY_DELAY_MS);
+ continue;
+ }
+ console.error(` HTTP ${response.status} after ${MAX_RETRIES} attempts`);
+ return null;
+ }
+
+ if (!response.ok) {
+ console.error(` HTTP ${response.status} from API`);
+ return null;
+ }
+
+ return await response.json();
+ } catch (error) {
+ if (attempt < MAX_RETRIES) {
+ console.log(` Network error — retrying in ${RETRY_DELAY_MS / 1000}s (attempt ${attempt}/${MAX_RETRIES})`);
+ await delay(RETRY_DELAY_MS);
+ continue;
+ }
+ console.error(` API error after ${MAX_RETRIES} attempts: ${error instanceof Error ? error.message : error}`);
+ return null;
+ }
+ }
+ return null;
+}
+
+// ─── Response Parser ─────────────────────────────────────────────────────────
+
+/**
+ * Parse the GWT-RPC response into a map of locality idfixe → data.
+ *
+ * The response O array contains interleaved objects:
+ * - Locality objects: have P.idfixe, P.name, P.address, P.latitude, etc.
+ * - Celebration objects: have P.date, P.time, P.localityId, P.recurrenceCategory
+ * - Metadata object: has P.size, P.sizeLocalities
+ *
+ * Localities and celebrations are linked by P.localityId matching P.id on localities.
+ */
+function parseApiResponse(data: any): Map {
+ const result = new Map();
+
+ if (!data?.O || !Array.isArray(data.O)) return result;
+
+ // First pass: collect all localities by their id
+ const localitiesById = new Map();
+ for (const obj of data.O) {
+ const p = obj.P;
+ if (!p || typeof p !== 'object') continue;
+
+ if (p.idfixe && p.name) {
+ const locality: LocalityData = {
+ idfixe: p.idfixe,
+ name: p.name,
+ address: p.address || null,
+ city: p.city || null,
+ zipcode: p.zipcode || null,
+ latitude: p.latitude || 0,
+ longitude: p.longitude || 0,
+ sector: p.sector || null,
+ communityId: p.communityId || null,
+ localityId: p.id || '',
+ };
+ localitiesById.set(p.id, locality);
+
+ // Initialize in result map (dedup by idfixe)
+ if (!result.has(p.idfixe)) {
+ result.set(p.idfixe, { locality, celebrations: [] });
+ }
+ }
+ }
+
+ // Second pass: collect celebrations and link to localities
+ for (const obj of data.O) {
+ const p = obj.P;
+ if (!p || typeof p !== 'object') continue;
+
+ if (p.date && p.time && p.localityId) {
+ const locality = localitiesById.get(p.localityId);
+ if (locality && result.has(locality.idfixe)) {
+ result.get(locality.idfixe)!.celebrations.push({
+ date: p.date,
+ time: normalizeTime(p.time),
+ recurrenceCategory: p.recurrenceCategory ?? 0,
+ });
+ }
+ }
+ }
+
+ return result;
+}
+
+// ─── Schedule Deduction ──────────────────────────────────────────────────────
+
+function deduceSchedules(celebrations: CelebrationData[]): ParsedSchedule[] {
+ const seen = new Set();
+ const schedules: ParsedSchedule[] = [];
+
+ // First pass: weekly recurring entries only (recurrenceCategory=1)
+ for (const celeb of celebrations) {
+ if (celeb.recurrenceCategory !== 1) continue;
+ const date = new Date(celeb.date + 'T12:00:00Z');
+ const dayOfWeek = date.getUTCDay();
+ const key = `${dayOfWeek}:${celeb.time}`;
+ if (!seen.has(key)) {
+ seen.add(key);
+ schedules.push({ dayOfWeek, time: celeb.time });
+ }
+ }
+
+ // Fallback: if no weekly entries, deduce from all
+ if (schedules.length === 0) {
+ for (const celeb of celebrations) {
+ const date = new Date(celeb.date + 'T12:00:00Z');
+ const dayOfWeek = date.getUTCDay();
+ const key = `${dayOfWeek}:${celeb.time}`;
+ if (!seen.has(key)) {
+ seen.add(key);
+ schedules.push({ dayOfWeek, time: celeb.time });
+ }
+ }
+ }
+
+ return schedules;
+}
+
+// ─── Database Operations ─────────────────────────────────────────────────────
+
+async function loadExistingFrenchChurches(): Promise {
+ console.log('Loading existing French churches for deduplication...');
+ const churches = await prisma.church.findMany({
+ where: { country: 'FR' },
+ select: {
+ id: true,
+ name: true,
+ latitude: true,
+ longitude: true,
+ osmId: true,
+ baiduId: true,
+ masstimesId: true,
+ orarimesseId: true,
+ massSchedulesPhId: true,
+ philmassId: true,
+ horariosMisasId: true,
+ mszeInfoId: true,
+ weekdayMassesId: true,
+ messesInfoId: true,
+ bohosluzbyId: true,
+ miserendId: true,
+ kerknetId: true,
+ gottesdienstzeitenId: true,
+ discovermassId: true,
+ source: true,
+ website: true,
+ phone: true,
+ address: true,
+ },
+ });
+ console.log(`Loaded ${churches.length} existing French churches`);
+ return churches;
+}
+
+// ─── Import Logic ────────────────────────────────────────────────────────────
+
+async function processDiocese(
+ dioceseCode: string,
+ existingChurches: ExistingChurch[],
+ dryRun: boolean,
+ stats: ImportStats,
+): Promise {
+ const data = await fetchDioceseData(dioceseCode);
+ if (!data) {
+ stats.errors++;
+ return;
+ }
+
+ // Check for API error
+ if (data.S && data.S[0] === false) {
+ console.log(` API error for diocese ${dioceseCode}`);
+ stats.errors++;
+ return;
+ }
+
+ const localities = parseApiResponse(data);
+ console.log(` Found ${localities.size} unique localities`);
+ stats.localitiesFound += localities.size;
+ stats.diocesesProcessed++;
+
+ for (const [idfixe, { locality, celebrations }] of localities) {
+ if (locality.latitude === 0 && locality.longitude === 0) {
+ stats.churchesSkipped++;
+ continue;
+ }
+
+ const schedules = deduceSchedules(celebrations);
+
+ const candidate = {
+ name: locality.name,
+ lat: locality.latitude,
+ lng: locality.longitude,
+ messesInfoId: idfixe,
+ };
+
+ const duplicate = findDuplicateChurch(candidate, existingChurches);
+
+ if (dryRun) {
+ if (duplicate) {
+ stats.churchesMatched++;
+ } else {
+ stats.churchesCreated++;
+ }
+ stats.schedulesCreated += schedules.length;
+ continue;
+ }
+
+ if (duplicate) {
+ stats.churchesMatched++;
+ const updateData: Record = { messesInfoId: idfixe };
+
+ if (!duplicate.address && locality.address) updateData.address = locality.address;
+ if (duplicate.latitude === 0 && duplicate.longitude === 0 && locality.latitude !== 0) {
+ updateData.latitude = locality.latitude;
+ updateData.longitude = locality.longitude;
+ }
+
+ try {
+ await prisma.church.update({
+ where: { id: duplicate.id },
+ data: updateData,
+ });
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ continue;
+ }
+ throw error;
+ }
+
+ if (schedules.length > 0) {
+ try {
+ await prisma.$transaction(async (tx) => {
+ await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } });
+ await tx.massSchedule.createMany({
+ data: schedules.map((s) => ({
+ churchId: duplicate.id,
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: 'French',
+ })),
+ });
+ await tx.church.update({
+ where: { id: duplicate.id },
+ data: { lastScrapedAt: new Date() },
+ });
+ });
+ stats.schedulesCreated += schedules.length;
+ } catch (error) {
+ stats.errors++;
+ console.error(` Error saving schedules for ${idfixe}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+ } else {
+ // Determine country code from zipcode
+ let country = 'FR';
+ if (locality.zipcode && /^97[1-6]/.test(locality.zipcode)) {
+ country = 'FR'; // DOM-TOM are still FR
+ }
+
+ try {
+ const newChurch = await prisma.church.create({
+ data: {
+ name: locality.name,
+ latitude: locality.latitude,
+ longitude: locality.longitude,
+ address: locality.address,
+ zip: locality.zipcode,
+ city: locality.city,
+ country,
+ diocese: locality.sector || undefined,
+ messesInfoId: idfixe,
+ source: 'messes-info',
+ websiteLanguage: 'fr',
+ },
+ });
+ stats.churchesCreated++;
+
+ existingChurches.push({
+ id: newChurch.id,
+ name: locality.name,
+ latitude: locality.latitude,
+ longitude: locality.longitude,
+ osmId: null,
+ baiduId: null,
+ masstimesId: null,
+ orarimesseId: null,
+ massSchedulesPhId: null,
+ philmassId: null,
+ horariosMisasId: null,
+ mszeInfoId: null,
+ weekdayMassesId: null,
+ messesInfoId: idfixe,
+ bohosluzbyId: null,
+ miserendId: null,
+ kerknetId: null,
+ gottesdienstzeitenId: null,
+ discovermassId: null,
+ source: 'messes-info',
+ website: null,
+ phone: null,
+ address: locality.address,
+ });
+
+ if (schedules.length > 0) {
+ await prisma.massSchedule.createMany({
+ data: schedules.map((s) => ({
+ churchId: newChurch.id,
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: 'French',
+ })),
+ });
+ await prisma.church.update({
+ where: { id: newChurch.id },
+ data: { lastScrapedAt: new Date() },
+ });
+ stats.schedulesCreated += schedules.length;
+ }
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ continue;
+ }
+ stats.errors++;
+ console.error(` Error creating ${idfixe}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+ }
+}
+
+// ─── CLI ─────────────────────────────────────────────────────────────────────
+
+function parseArgs(): CLIArgs {
+ const args = process.argv.slice(2);
+ const result: CLIArgs = { all: false, dryRun: false };
+
+ for (let i = 0; i < args.length; i++) {
+ switch (args[i]) {
+ case '--all':
+ result.all = true;
+ break;
+ case '--dry-run':
+ result.dryRun = true;
+ break;
+ case '--resume-from':
+ result.resumeFrom = parseInt(args[++i]);
+ break;
+ case '--diocese':
+ result.diocese = args[++i];
+ break;
+ case '--job-id':
+ result.jobId = args[++i];
+ break;
+ case '--help':
+ case '-h':
+ console.log(`
+Usage: npx tsx scripts/import-messesinfo.ts [options]
+
+Options:
+ --all Import all dioceses
+ --diocese Import a single diocese (e.g., pa for Paris)
+ --dry-run No database writes, just report what would happen
+ --resume-from Skip first N dioceses
+ --job-id Background job tracking ID
+ --help, -h Show this help message
+
+Examples:
+ npx tsx scripts/import-messesinfo.ts --diocese pa --dry-run
+ npx tsx scripts/import-messesinfo.ts --all --dry-run
+ npx tsx scripts/import-messesinfo.ts --all
+`);
+ process.exit(0);
+ }
+ }
+
+ if (!result.all && !result.diocese) {
+ console.error('Error: specify --all or --diocese ');
+ process.exit(1);
+ }
+
+ return result;
+}
+
+function formatDuration(ms: number): string {
+ const seconds = Math.floor(ms / 1000);
+ const minutes = Math.floor(seconds / 60);
+ const hours = Math.floor(minutes / 60);
+ if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
+ if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
+ return `${seconds}s`;
+}
+
+// ─── Main ────────────────────────────────────────────────────────────────────
+
+async function main() {
+ const args = parseArgs();
+ const startTime = Date.now();
+
+ console.log('\n' + '='.repeat(70));
+ console.log('MESSES.INFO (FRANCE) IMPORTER');
+ console.log('='.repeat(70));
+ console.log(`Mode: ${args.diocese ? `Diocese ${args.diocese}` : 'All dioceses'}`);
+ console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
+ if (args.resumeFrom) console.log(`Resume from: diocese index ${args.resumeFrom}`);
+ console.log(`Time: ${new Date().toISOString()}`);
+ console.log('='.repeat(70) + '\n');
+
+ if (args.jobId) {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: args.jobId },
+ data: { status: 'running', startedAt: new Date() },
+ });
+ } catch { /* Job might not exist */ }
+ }
+
+ const stats: ImportStats = {
+ diocesesProcessed: 0,
+ localitiesFound: 0,
+ churchesMatched: 0,
+ churchesCreated: 0,
+ churchesSkipped: 0,
+ schedulesCreated: 0,
+ errors: 0,
+ };
+
+ const existingChurches = await loadExistingFrenchChurches();
+
+ let dioceses = args.diocese ? [args.diocese] : [...DIOCESE_CODES];
+
+ if (args.diocese && !DIOCESE_CODES.includes(args.diocese)) {
+ console.log(`Warning: diocese "${args.diocese}" not in known list, trying anyway...`);
+ }
+
+ if (args.resumeFrom && !args.diocese) {
+ dioceses = dioceses.slice(args.resumeFrom);
+ console.log(`Resuming from diocese index ${args.resumeFrom} (${dioceses[0]})\n`);
+ }
+
+ console.log(`Processing ${dioceses.length} dioceses\n`);
+
+ for (let i = 0; i < dioceses.length; i++) {
+ const code = dioceses[i];
+ const elapsed = formatDuration(Date.now() - startTime);
+ console.log(`[${i + 1}/${dioceses.length}] Diocese "${code}" [${elapsed} elapsed]`);
+
+ try {
+ await processDiocese(code, existingChurches, args.dryRun, stats);
+ } catch (error) {
+ stats.errors++;
+ console.error(` ERROR processing diocese ${code}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+
+ const totalTime = Date.now() - startTime;
+ console.log('\n' + '='.repeat(70));
+ console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
+ console.log('='.repeat(70));
+ console.log(`Dioceses processed: ${stats.diocesesProcessed}`);
+ console.log(`Localities found: ${stats.localitiesFound}`);
+ console.log(` Matched (existing): ${stats.churchesMatched}`);
+ console.log(` Created (new): ${stats.churchesCreated}`);
+ console.log(` Skipped: ${stats.churchesSkipped}`);
+ console.log(`Schedules created: ${stats.schedulesCreated}`);
+ console.log(`Errors: ${stats.errors}`);
+ console.log(`Total time: ${formatDuration(totalTime)}`);
+ console.log(`HTTP requests: ${requestCount}`);
+ console.log('='.repeat(70) + '\n');
+
+ if (args.jobId) {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: args.jobId },
+ data: {
+ status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
+ completedAt: new Date(),
+ processed: stats.localitiesFound,
+ succeeded: stats.churchesCreated + stats.churchesMatched,
+ failed: stats.errors,
+ itemsFound: stats.schedulesCreated,
+ },
+ });
+ } catch { /* Ignore */ }
+ }
+}
+
+main()
+ .catch((error) => {
+ console.error('Fatal error:', error);
+ process.exit(1);
+ })
+ .finally(async () => {
+ await prisma.$disconnect();
+ await pool.end();
+ });
diff --git a/scripts/import-miserend.ts b/scripts/import-miserend.ts
new file mode 100644
index 0000000..591be32
--- /dev/null
+++ b/scripts/import-miserend.ts
@@ -0,0 +1,579 @@
+#!/usr/bin/env tsx
+/**
+ * Import Catholic churches and mass schedules from miserend.hu (Hungary)
+ *
+ * miserend.hu is the Hungarian Catholic mass schedule database, maintained by
+ * the community with ~5,055 churches (mostly Hungary, some Romania/Slovakia).
+ * It publishes a daily-updated SQLite database at:
+ * https://miserend.hu/fajlok/sqlite/miserend_v4.sqlite3
+ *
+ * The SQLite contains:
+ * - templomok: churches (tid, nev, lat, lng, varos, cim, orszag, megye)
+ * - misek: date-specific mass entries (tid, ido, datumtol, datumig, nyelv)
+ * - kepek: church photos
+ *
+ * Import strategy:
+ * 1. Download the SQLite database
+ * 2. Extract all churches with coordinates
+ * 3. Deduce weekly recurring schedules from date-specific entries
+ * 4. Match against existing churches via church-matcher
+ * 5. Upsert churches and mass schedules
+ *
+ * Usage:
+ * npx tsx scripts/import-miserend.ts --all --dry-run
+ * npx tsx scripts/import-miserend.ts --all
+ * npx tsx scripts/import-miserend.ts --id 37 --dry-run # Single church
+ * npx tsx scripts/import-miserend.ts --all --resume-from 500
+ */
+
+import dotenv from 'dotenv';
+import path from 'path';
+import fs from 'fs';
+import { execFileSync } from 'child_process';
+
+dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
+dotenv.config({ path: path.resolve(process.cwd(), '.env') });
+
+import { Pool } from 'pg';
+import { PrismaPg } from '@prisma/adapter-pg';
+import { PrismaClient } from '@prisma/client';
+
+const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
+console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
+const pool = new Pool({
+ connectionString: dbUrl,
+ ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
+});
+const adapter = new PrismaPg(pool);
+const prisma = new PrismaClient({ adapter });
+
+import { findDuplicateChurch } from '../src/lib/church-matcher';
+import type { ExistingChurch } from '../src/lib/church-matcher';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const SQLITE_URL = 'https://miserend.hu/fajlok/sqlite/miserend_v4.sqlite3';
+const SQLITE_PATH = '/tmp/miserend_v4.sqlite3';
+
+// Country mapping from Hungarian names to ISO codes
+const COUNTRY_MAP: Record = {
+ 'Magyarország': 'HU',
+ 'România': 'RO',
+ 'Slovensko': 'SK',
+ 'Szlovákia': 'SK',
+ 'Szerbia-Montenegro': 'RS',
+ 'Србија': 'RS',
+ 'Ukrajna': 'UA',
+ 'Україна': 'UA',
+ 'Österreich': 'AT',
+ 'Schweiz/Suisse/Svizzera/Svizra': 'CH',
+ 'België / Belgique / Belgien': 'BE',
+ 'Éire / Ireland': 'IE',
+ 'Россия': 'RU',
+};
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+interface MiserendChurch {
+ tid: number;
+ nev: string;
+ ismertnev: string | null;
+ orszag: string | null;
+ megye: string | null;
+ varos: string | null;
+ cim: string | null;
+ lat: number;
+ lng: number;
+}
+
+interface MiserendMass {
+ mid: number;
+ tid: number;
+ datumtol: number; // MMDD format
+ datumig: number;
+ ido: string; // HH:MM:SS
+ nyelv: string | null;
+}
+
+interface ParsedSchedule {
+ dayOfWeek: number;
+ time: string;
+}
+
+interface ImportStats {
+ churchesFetched: number;
+ churchesMatched: number;
+ churchesCreated: number;
+ churchesSkipped: number;
+ schedulesCreated: number;
+ errors: number;
+}
+
+interface CLIArgs {
+ all: boolean;
+ dryRun: boolean;
+ resumeFrom?: number;
+ churchId?: string;
+ jobId?: string;
+}
+
+// ─── SQLite Helpers ──────────────────────────────────────────────────────────
+
+function sqliteQuery(query: string): string {
+ try {
+ return execFileSync('sqlite3', [SQLITE_PATH, query], {
+ encoding: 'utf-8',
+ maxBuffer: 100 * 1024 * 1024, // 100MB
+ }).trim();
+ } catch {
+ return '';
+ }
+}
+
+function downloadSqlite(): void {
+ console.log('Downloading miserend SQLite database...');
+ execFileSync('curl', ['-sL', '-o', SQLITE_PATH, SQLITE_URL], { timeout: 120000 });
+ const size = fs.statSync(SQLITE_PATH).size;
+ console.log(`Downloaded ${(size / 1024 / 1024).toFixed(1)}MB`);
+}
+
+function loadChurches(): MiserendChurch[] {
+ const raw = sqliteQuery(
+ "SELECT tid, nev, ismertnev, orszag, megye, varos, cim, lat, lng FROM templomok WHERE lat IS NOT NULL AND lng IS NOT NULL AND lat != 0 AND lng != 0;"
+ );
+ if (!raw) return [];
+
+ return raw.split('\n').map(line => {
+ const [tid, nev, ismertnev, orszag, megye, varos, cim, lat, lng] = line.split('|');
+ return {
+ tid: parseInt(tid),
+ nev: nev || '',
+ ismertnev: ismertnev || null,
+ orszag: orszag || null,
+ megye: megye || null,
+ varos: varos || null,
+ cim: cim || null,
+ lat: parseFloat(lat),
+ lng: parseFloat(lng),
+ };
+ }).filter(c => !isNaN(c.tid) && !isNaN(c.lat) && !isNaN(c.lng));
+}
+
+function loadMassesForChurch(tid: number): MiserendMass[] {
+ const raw = sqliteQuery(
+ `SELECT mid, tid, datumtol, datumig, ido, nyelv FROM misek WHERE tid=${tid};`
+ );
+ if (!raw) return [];
+
+ return raw.split('\n').map(line => {
+ const [mid, tidStr, datumtol, datumig, ido, nyelv] = line.split('|');
+ return {
+ mid: parseInt(mid),
+ tid: parseInt(tidStr),
+ datumtol: parseInt(datumtol),
+ datumig: parseInt(datumig),
+ ido: ido || '',
+ nyelv: nyelv || null,
+ };
+ }).filter(m => !isNaN(m.mid) && m.ido);
+}
+
+// ─── Schedule Deduction ──────────────────────────────────────────────────────
+
+/**
+ * Deduce weekly recurring schedule from date-specific mass entries.
+ * Each entry has datumtol/datumig in MMDD format (e.g., 104 = Jan 4).
+ * We convert each date to a day of week and collect unique day+time combos.
+ */
+function deduceSchedules(masses: MiserendMass[]): ParsedSchedule[] {
+ const seen = new Set();
+ const schedules: ParsedSchedule[] = [];
+
+ // Use current year for date conversion
+ const year = new Date().getFullYear();
+
+ for (const mass of masses) {
+ const time = mass.ido.substring(0, 5); // HH:MM from HH:MM:SS
+ if (!time || time === '00:00') continue;
+
+ // Convert MMDD to a Date to get day of week
+ const mmdd = mass.datumtol;
+ const month = Math.floor(mmdd / 100);
+ const day = mmdd % 100;
+ if (month < 1 || month > 12 || day < 1 || day > 31) continue;
+
+ const date = new Date(year, month - 1, day);
+ const dayOfWeek = date.getDay(); // 0=Sun, 1=Mon, ..., 6=Sat
+
+ const key = `${dayOfWeek}:${time}`;
+ if (!seen.has(key)) {
+ seen.add(key);
+ schedules.push({ dayOfWeek, time });
+ }
+ }
+
+ return schedules;
+}
+
+// ─── Database Operations ─────────────────────────────────────────────────────
+
+async function loadExistingChurches(countryCodes: string[]): Promise {
+ console.log(`Loading existing churches for countries: ${countryCodes.join(', ')}...`);
+ const churches = await prisma.church.findMany({
+ where: { country: { in: countryCodes } },
+ select: {
+ id: true,
+ name: true,
+ latitude: true,
+ longitude: true,
+ osmId: true,
+ baiduId: true,
+ masstimesId: true,
+ orarimesseId: true,
+ massSchedulesPhId: true,
+ philmassId: true,
+ horariosMisasId: true,
+ mszeInfoId: true,
+ weekdayMassesId: true,
+ messesInfoId: true,
+ bohosluzbyId: true,
+ miserendId: true,
+ kerknetId: true,
+ gottesdienstzeitenId: true,
+ discovermassId: true,
+ source: true,
+ website: true,
+ phone: true,
+ address: true,
+ },
+ });
+ console.log(`Loaded ${churches.length} existing churches`);
+ return churches;
+}
+
+// ─── Import Logic ────────────────────────────────────────────────────────────
+
+async function processChurch(
+ church: MiserendChurch,
+ existingChurches: ExistingChurch[],
+ dryRun: boolean,
+ stats: ImportStats,
+): Promise {
+ const miserendId = String(church.tid);
+ const country = church.orszag ? (COUNTRY_MAP[church.orszag] || 'HU') : 'HU';
+
+ const candidate = {
+ name: church.nev,
+ lat: church.lat,
+ lng: church.lng,
+ miserendId,
+ };
+
+ const duplicate = findDuplicateChurch(candidate, existingChurches);
+
+ // Deduce schedules
+ let schedules: ParsedSchedule[] = [];
+ if (!dryRun) {
+ const masses = loadMassesForChurch(church.tid);
+ schedules = deduceSchedules(masses);
+ }
+
+ if (dryRun) {
+ if (duplicate) {
+ stats.churchesMatched++;
+ } else {
+ stats.churchesCreated++;
+ }
+ return;
+ }
+
+ if (duplicate) {
+ stats.churchesMatched++;
+ const updateData: Record = { miserendId };
+
+ if (!duplicate.address && church.cim) updateData.address = church.cim;
+
+ try {
+ await prisma.church.update({
+ where: { id: duplicate.id },
+ data: updateData,
+ });
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ return;
+ }
+ throw error;
+ }
+
+ if (schedules.length > 0) {
+ try {
+ await prisma.$transaction(async (tx) => {
+ await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } });
+ await tx.massSchedule.createMany({
+ data: schedules.map((s) => ({
+ churchId: duplicate.id,
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: 'Hungarian',
+ })),
+ });
+ await tx.church.update({
+ where: { id: duplicate.id },
+ data: { lastScrapedAt: new Date() },
+ });
+ });
+ stats.schedulesCreated += schedules.length;
+ } catch (error) {
+ stats.errors++;
+ console.error(` Error saving schedules for ${miserendId}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+ } else {
+ try {
+ const newChurch = await prisma.church.create({
+ data: {
+ name: church.nev,
+ latitude: church.lat,
+ longitude: church.lng,
+ address: church.cim,
+ city: church.varos,
+ state: church.megye,
+ country,
+ miserendId,
+ source: 'miserend',
+ websiteLanguage: 'hu',
+ },
+ });
+ stats.churchesCreated++;
+
+ existingChurches.push({
+ id: newChurch.id,
+ name: church.nev,
+ latitude: church.lat,
+ longitude: church.lng,
+ osmId: null,
+ baiduId: null,
+ masstimesId: null,
+ orarimesseId: null,
+ massSchedulesPhId: null,
+ philmassId: null,
+ horariosMisasId: null,
+ mszeInfoId: null,
+ weekdayMassesId: null,
+ messesInfoId: null,
+ bohosluzbyId: null,
+ miserendId,
+ kerknetId: null,
+ gottesdienstzeitenId: null,
+ discovermassId: null,
+ source: 'miserend',
+ website: null,
+ phone: null,
+ address: church.cim,
+ });
+
+ if (schedules.length > 0) {
+ await prisma.massSchedule.createMany({
+ data: schedules.map((s) => ({
+ churchId: newChurch.id,
+ dayOfWeek: s.dayOfWeek,
+ time: s.time,
+ language: 'Hungarian',
+ })),
+ });
+ await prisma.church.update({
+ where: { id: newChurch.id },
+ data: { lastScrapedAt: new Date() },
+ });
+ stats.schedulesCreated += schedules.length;
+ }
+ } catch (error) {
+ if (error instanceof Error && error.message.includes('Unique constraint')) {
+ stats.churchesSkipped++;
+ return;
+ }
+ stats.errors++;
+ console.error(` Error creating ${miserendId}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+}
+
+// ─── CLI ─────────────────────────────────────────────────────────────────────
+
+function parseArgs(): CLIArgs {
+ const args = process.argv.slice(2);
+ const result: CLIArgs = { all: false, dryRun: false };
+
+ for (let i = 0; i < args.length; i++) {
+ switch (args[i]) {
+ case '--all':
+ result.all = true;
+ break;
+ case '--dry-run':
+ result.dryRun = true;
+ break;
+ case '--resume-from':
+ result.resumeFrom = parseInt(args[++i]);
+ break;
+ case '--id':
+ result.churchId = args[++i];
+ break;
+ case '--job-id':
+ result.jobId = args[++i];
+ break;
+ case '--help':
+ case '-h':
+ console.log(`
+Usage: npx tsx scripts/import-miserend.ts [options]
+
+Options:
+ --all Import all churches
+ --id Import a single church by miserend ID
+ --dry-run No database writes, just report what would happen
+ --resume-from Skip first N churches
+ --job-id Background job tracking ID
+ --help, -h Show this help message
+
+Examples:
+ npx tsx scripts/import-miserend.ts --id 37 --dry-run
+ npx tsx scripts/import-miserend.ts --all --dry-run
+ npx tsx scripts/import-miserend.ts --all
+`);
+ process.exit(0);
+ }
+ }
+
+ if (!result.all && !result.churchId) {
+ console.error('Error: specify --all or --id ');
+ process.exit(1);
+ }
+
+ return result;
+}
+
+function formatDuration(ms: number): string {
+ const seconds = Math.floor(ms / 1000);
+ const minutes = Math.floor(seconds / 60);
+ const hours = Math.floor(minutes / 60);
+ if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
+ if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
+ return `${seconds}s`;
+}
+
+// ─── Main ────────────────────────────────────────────────────────────────────
+
+async function main() {
+ const args = parseArgs();
+ const startTime = Date.now();
+
+ console.log('\n' + '='.repeat(70));
+ console.log('MISEREND.HU (HUNGARY) IMPORTER');
+ console.log('='.repeat(70));
+ console.log(`Mode: ${args.churchId ? `Church ID ${args.churchId}` : 'All churches'}`);
+ console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
+ if (args.resumeFrom) console.log(`Resume from: church index ${args.resumeFrom}`);
+ console.log(`Time: ${new Date().toISOString()}`);
+ console.log('='.repeat(70) + '\n');
+
+ if (args.jobId) {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: args.jobId },
+ data: { status: 'running', startedAt: new Date() },
+ });
+ } catch { /* Job might not exist */ }
+ }
+
+ const stats: ImportStats = {
+ churchesFetched: 0,
+ churchesMatched: 0,
+ churchesCreated: 0,
+ churchesSkipped: 0,
+ schedulesCreated: 0,
+ errors: 0,
+ };
+
+ // Download SQLite database
+ downloadSqlite();
+
+ // Load churches from SQLite
+ let churches = loadChurches();
+ stats.churchesFetched = churches.length;
+ console.log(`Found ${churches.length} churches with coordinates in SQLite\n`);
+
+ if (args.churchId) {
+ churches = churches.filter(c => String(c.tid) === args.churchId);
+ if (churches.length === 0) {
+ console.error(`Church ID ${args.churchId} not found in SQLite database`);
+ return;
+ }
+ }
+
+ // Get unique country codes from the data
+ const countryCodes = [...new Set(churches.map(c => {
+ return c.orszag ? (COUNTRY_MAP[c.orszag] || 'HU') : 'HU';
+ }))];
+ const existingChurches = await loadExistingChurches(countryCodes);
+
+ if (args.resumeFrom) {
+ churches = churches.slice(args.resumeFrom);
+ console.log(`Resuming from index ${args.resumeFrom} (${churches.length} remaining)\n`);
+ }
+
+ console.log(`Processing ${churches.length} churches\n`);
+
+ for (let i = 0; i < churches.length; i++) {
+ const church = churches[i];
+ if (i % 200 === 0) {
+ const elapsed = formatDuration(Date.now() - startTime);
+ console.log(`[${i + 1}/${churches.length}] Processing ${church.nev} (${church.tid}) [${elapsed} elapsed]`);
+ }
+
+ try {
+ await processChurch(church, existingChurches, args.dryRun, stats);
+ } catch (error) {
+ stats.errors++;
+ console.error(` ERROR processing church ${church.tid}: ${error instanceof Error ? error.message : error}`);
+ }
+ }
+
+ const totalTime = Date.now() - startTime;
+ console.log('\n' + '='.repeat(70));
+ console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
+ console.log('='.repeat(70));
+ console.log(`Churches in SQLite: ${stats.churchesFetched}`);
+ console.log(` Matched (existing): ${stats.churchesMatched}`);
+ console.log(` Created (new): ${stats.churchesCreated}`);
+ console.log(` Skipped: ${stats.churchesSkipped}`);
+ console.log(`Schedules created: ${stats.schedulesCreated}`);
+ console.log(`Errors: ${stats.errors}`);
+ console.log(`Total time: ${formatDuration(totalTime)}`);
+ console.log('='.repeat(70) + '\n');
+
+ if (args.jobId) {
+ try {
+ await prisma.backgroundJob.update({
+ where: { id: args.jobId },
+ data: {
+ status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
+ completedAt: new Date(),
+ processed: stats.churchesFetched,
+ succeeded: stats.churchesCreated + stats.churchesMatched,
+ failed: stats.errors,
+ itemsFound: stats.schedulesCreated,
+ },
+ });
+ } catch { /* Ignore */ }
+ }
+}
+
+main()
+ .catch((error) => {
+ console.error('Fatal error:', error);
+ process.exit(1);
+ })
+ .finally(async () => {
+ await prisma.$disconnect();
+ await pool.end();
+ });
diff --git a/scripts/import-msze-info.ts b/scripts/import-msze-info.ts
new file mode 100644
index 0000000..f224853
--- /dev/null
+++ b/scripts/import-msze-info.ts
@@ -0,0 +1,746 @@
+#!/usr/bin/env tsx
+/**
+ * Import Catholic churches and mass schedules from msze.info (Poland)
+ *
+ * msze.info is a Polish directory of Catholic parishes with mass schedules.
+ * The site uses numbered sitemaps (Churches1.xml through Churches11.xml)
+ * with ~500 URLs each, containing both /kosciol/{id} (church pages) and
+ * /msze-online/{slug} (livestream pages).
+ *
+ * Import strategy:
+ * 1. Fetch all 11 sitemaps → extract /kosciol/{id} URLs (skip /msze-online/)
+ * 2. For each church: fetch HTML, parse name/address/phone/website/schedule
+ * 3. Extract coordinates from embedded tomtom_codeAddress() JS call
+ * 4. Match against existing PL churches, upsert
+ *
+ * Usage:
+ * npx tsx scripts/import-msze-info.ts --all
+ * npx tsx scripts/import-msze-info.ts --all --dry-run
+ * npx tsx scripts/import-msze-info.ts --all --resume-from 500
+ * npx tsx scripts/import-msze-info.ts --all --job-id {uuid}
+ */
+
+import dotenv from 'dotenv';
+import path from 'path';
+
+dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
+dotenv.config({ path: path.resolve(process.cwd(), '.env') });
+
+import { Pool } from 'pg';
+import { PrismaPg } from '@prisma/adapter-pg';
+import { PrismaClient } from '@prisma/client';
+
+const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
+console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
+const pool = new Pool({
+ connectionString: dbUrl,
+ ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
+});
+const adapter = new PrismaPg(pool);
+const prisma = new PrismaClient({ adapter });
+
+import { findDuplicateChurch } from '../src/lib/church-matcher';
+import type { ExistingChurch } from '../src/lib/church-matcher';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const SITE_BASE = 'https://www.msze.info';
+const SITEMAP_COUNT = 11;
+const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
+const REQUEST_DELAY_MS = 1500;
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+interface ParsedChurch {
+ name: string;
+ address: string | null;
+ city: string | null;
+ zip: string | null;
+ phone: string | null;
+ website: string | null;
+ email: string | null;
+ latitude: number;
+ longitude: number;
+}
+
+interface ParsedSchedule {
+ dayOfWeek: number; // 0=Sun, 1=Mon, ..., 6=Sat
+ time: string; // "05:00", "18:30"
+}
+
+interface ImportStats {
+ churchesFound: number;
+ churchesMatched: number;
+ churchesCreated: number;
+ churchesSkipped: number;
+ schedulesProcessed: number;
+ massSchedulesCreated: number;
+ errors: number;
+}
+
+interface CLIArgs {
+ all: boolean;
+ dryRun: boolean;
+ resumeFrom?: number;
+ jobId?: string;
+}
+
+// ─── HTTP Client ─────────────────────────────────────────────────────────────
+
+let requestCount = 0;
+
+function delay(ms: number): Promise {
+ return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+async function fetchPage(url: string, delayMs: number = REQUEST_DELAY_MS): Promise {
+ if (requestCount > 0) {
+ await delay(delayMs);
+ }
+ requestCount++;
+
+ try {
+ const response = await fetch(url, {
+ headers: {
+ 'User-Agent': USER_AGENT,
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ },
+ });
+
+ if (!response.ok) {
+ console.error(` HTTP ${response.status} for ${url}`);
+ return null;
+ }
+
+ return await response.text();
+ } catch (error) {
+ console.error(` Fetch error for ${url}: ${error instanceof Error ? error.message : error}`);
+ return null;
+ }
+}
+
+// ─── Sitemap Parser ──────────────────────────────────────────────────────────
+
+async function fetchChurchUrlsFromSitemaps(): Promise {
+ const allIds: string[] = [];
+ const seen = new Set();
+
+ for (let i = 1; i <= SITEMAP_COUNT; i++) {
+ const sitemapUrl = `${SITE_BASE}/sitemap/Churches${i}.xml`;
+ console.log(` Fetching ${sitemapUrl}...`);
+ const xml = await fetchPage(sitemapUrl);
+ if (!xml) {
+ console.error(` Failed to fetch ${sitemapUrl}`);
+ continue;
+ }
+
+ // Extract /kosciol/{id} URLs, skip /msze-online/
+ const locRegex = /https?:\/\/(?:www\.)?msze\.info\/kosciol\/(\d+)<\/loc>/g;
+ let match;
+ while ((match = locRegex.exec(xml)) !== null) {
+ const id = match[1];
+ if (!seen.has(id)) {
+ seen.add(id);
+ allIds.push(id);
+ }
+ }
+ }
+
+ // Sort numerically for deterministic order
+ allIds.sort((a, b) => parseInt(a) - parseInt(b));
+
+ console.log(`Found ${allIds.length} unique church IDs from ${SITEMAP_COUNT} sitemaps`);
+ return allIds;
+}
+
+// ─── HTML Parsers ────────────────────────────────────────────────────────────
+
+function parseChurchPage(html: string): ParsedChurch {
+ // Name: from Church Name, City
+ const h1Match = html.match(/ |