#!/usr/bin/env tsx /** * Import Catholic churches and mass schedules globally from masstimes.org API * * masstimes.org has ~121,000 churches worldwide. This script queries their * geo-search API with a grid of coordinates covering world landmass, then * deduplicates and imports the results. * * API: GET https://masstimes.org/Churchs/?lat={lat}&long={lng}&pg={page} * - Requires Referer header * - Returns 30 results per page within 100-mile (~160km) radius * - Paginate until empty array * * Grid strategy: * - 2.5° latitude spacing (~278km), longitude adjusted for latitude * - Continental bounding boxes to skip oceans * - 100-mile radius means ~322km diameter — 2.5° spacing ensures overlap * * Usage: * npx tsx scripts/import-masstimes-api.ts --all * npx tsx scripts/import-masstimes-api.ts --all --dry-run * npx tsx scripts/import-masstimes-api.ts --region europe * npx tsx scripts/import-masstimes-api.ts --all --skip-us * npx tsx scripts/import-masstimes-api.ts --all --job-id {uuid} */ import dotenv from 'dotenv'; import path from 'path'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { findDuplicateChurch } from '../src/lib/church-matcher'; import type { ExistingChurch } from '../src/lib/church-matcher'; // ─── Constants ─────────────────────────────────────────────────────────────── const API_BASE = 'https://masstimes.org/Churchs/'; const REFERER = 'https://masstimes.org/map'; const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)'; const RATE_LIMIT_MS = 2000; // 2 seconds between requests — respectful rate const PAGE_SIZE = 30; const LAT_SPACING = 2.5; // degrees (~278km) const TARGET_LNG_SPACING_KM = 250; // target spacing in km // Country name → ISO code mapping for masstimes country names const COUNTRY_CODE_MAP: Record = { 'united states': 'US', 'canada': 'CA', 'mexico': 'MX', 'united kingdom': 'GB', 'ireland': 'IE', 'france': 'FR', 'germany': 'DE', 'spain': 'ES', 'italy': 'IT', 'portugal': 'PT', 'netherlands': 'NL', 'belgium': 'BE', 'luxembourg': 'LU', 'switzerland': 'CH', 'austria': 'AT', 'poland': 'PL', 'czech republic': 'CZ', 'czechia': 'CZ', 'slovakia': 'SK', 'hungary': 'HU', 'croatia': 'HR', 'slovenia': 'SI', 'romania': 'RO', 'bulgaria': 'BG', 'serbia': 'RS', 'bosnia and herzegovina': 'BA', 'montenegro': 'ME', 'north macedonia': 'MK', 'albania': 'AL', 'kosovo': 'XK', 'greece': 'GR', 'cyprus': 'CY', 'malta': 'MT', 'denmark': 'DK', 'sweden': 'SE', 'norway': 'NO', 'finland': 'FI', 'iceland': 'IS', 'estonia': 'EE', 'latvia': 'LV', 'lithuania': 'LT', 'ukraine': 'UA', 'russia': 'RU', 'belarus': 'BY', 'moldova': 'MD', 'georgia': 'GE', 'armenia': 'AM', 'azerbaijan': 'AZ', 'turkey': 'TR', 'israel': 'IL', 'jordan': 'JO', 'lebanon': 'LB', 'egypt': 'EG', 'morocco': 'MA', 'tunisia': 'TN', 'algeria': 'DZ', 'india': 'IN', 'sri lanka': 'LK', 'pakistan': 'PK', 'bangladesh': 'BD', 'nepal': 'NP', 'myanmar': 'MM', 'thailand': 'TH', 'vietnam': 'VN', 'cambodia': 'KH', 'laos': 'LA', 'malaysia': 'MY', 'singapore': 'SG', 'indonesia': 'ID', 'philippines': 'PH', 'china': 'CN', 'japan': 'JP', 'south korea': 'KR', 'korea, south': 'KR', 'taiwan': 'TW', 'hong kong': 'HK', 'macau': 'MO', 'mongolia': 'MN', 'australia': 'AU', 'new zealand': 'NZ', 'fiji': 'FJ', 'papua new guinea': 'PG', 'samoa': 'WS', 'tonga': 'TO', 'guam': 'GU', 'nigeria': 'NG', 'ghana': 'GH', 'kenya': 'KE', 'tanzania': 'TZ', 'uganda': 'UG', 'south africa': 'ZA', 'cameroon': 'CM', 'senegal': 'SN', 'ethiopia': 'ET', 'madagascar': 'MG', 'mozambique': 'MZ', 'zambia': 'ZM', 'zimbabwe': 'ZW', 'malawi': 'MW', 'rwanda': 'RW', 'burundi': 'BI', 'congo, democratic republic of the': 'CD', 'congo, republic of the': 'CG', "côte d'ivoire": 'CI', 'ivory coast': 'CI', 'burkina faso': 'BF', 'mali': 'ML', 'niger': 'NE', 'chad': 'TD', 'central african republic': 'CF', 'gabon': 'GA', 'equatorial guinea': 'GQ', 'angola': 'AO', 'namibia': 'NA', 'botswana': 'BW', 'lesotho': 'LS', 'eswatini': 'SZ', 'swaziland': 'SZ', 'mauritius': 'MU', 'brazil': 'BR', 'argentina': 'AR', 'colombia': 'CO', 'peru': 'PE', 'chile': 'CL', 'venezuela': 'VE', 'ecuador': 'EC', 'bolivia': 'BO', 'paraguay': 'PY', 'uruguay': 'UY', 'guyana': 'GY', 'suriname': 'SR', 'trinidad and tobago': 'TT', 'jamaica': 'JM', 'barbados': 'BB', 'bahamas': 'BS', 'bahamas, the': 'BS', 'haiti': 'HT', 'dominican republic': 'DO', 'cuba': 'CU', 'puerto rico': 'PR', 'guatemala': 'GT', 'honduras': 'HN', 'el salvador': 'SV', 'nicaragua': 'NI', 'costa rica': 'CR', 'panama': 'PA', 'belize': 'BZ', 'grenada': 'GD', 'saint lucia': 'LC', 'dominica': 'DM', 'saint vincent and the grenadines': 'VC', 'antigua and barbuda': 'AG', 'saint kitts and nevis': 'KN', 'bermuda': 'BM', 'cayman islands': 'KY', 'aruba': 'AW', 'curaçao': 'CW', 'curacao': 'CW', 'united arab emirates': 'AE', 'saudi arabia': 'SA', 'qatar': 'QA', 'bahrain': 'BH', 'kuwait': 'KW', 'oman': 'OM', 'iraq': 'IQ', 'iran': 'IR', 'afghanistan': 'AF', 'kazakhstan': 'KZ', 'uzbekistan': 'UZ', 'kyrgyzstan': 'KG', 'tajikistan': 'TJ', 'turkmenistan': 'TM', 'liechtenstein': 'LI', 'monaco': 'MC', 'andorra': 'AD', 'san marino': 'SM', 'vatican city': 'VA', 'holy see (vatican city)': 'VA', 'east timor': 'TL', 'timor-leste': 'TL', }; // Continental bounding boxes (lat_min, lat_max, lng_min, lng_max) const REGIONS: Record> = { 'north-america': [[7, 72, -170, -50]], 'central-america': [[7, 24, -120, -60]], 'south-america': [[-56, 13, -82, -34]], 'europe': [[35, 72, -12, 45]], 'eastern-europe': [[40, 70, 20, 60]], 'africa': [[-36, 38, -20, 55]], 'middle-east': [[12, 42, 25, 65]], 'south-asia': [[5, 38, 60, 98]], 'east-asia': [[18, 55, 95, 150]], 'southeast-asia': [[-12, 22, 92, 142]], 'oceania': [[-48, -8, 110, 180], [-22, 0, 160, 180]], 'central-asia': [[35, 55, 45, 90]], }; // ─── Types ─────────────────────────────────────────────────────────────────── interface MasstimesChurch { id: string; name: string; latitude: string; longitude: string; church_address_street_address: string; church_address_city_name: string; church_address_providence_name: string; church_address_postal_code: string; church_address_country_territory_name: string; church_address_county: string | null; diocese_name: string; phone_number: string; email: string; url: string; pastors_name: string; church_worship_times: MasstimesWorshipTime[]; distance: string; wheel_chair_access: boolean; } interface MasstimesWorshipTime { day_of_week: string; time_start: string; time_end: string; language: string | null; service_typename: string; comment: string; is_perpetual: boolean; } interface ImportStats { gridPoints: number; apiRequests: number; churchesDiscovered: number; churchesMatched: number; churchesCreated: number; churchesSkipped: number; massSchedulesCreated: number; errors: number; } interface CLIArgs { all: boolean; region?: string; dryRun: boolean; skipUs: boolean; resumeFrom: number; jobId?: string; } // ─── CLI ───────────────────────────────────────────────────────────────────── function parseArgs(): CLIArgs { const args = process.argv.slice(2); const result: CLIArgs = { all: false, dryRun: false, skipUs: false, resumeFrom: 0 }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case '--all': result.all = true; break; case '--region': result.region = args[++i]; break; case '--dry-run': result.dryRun = true; break; case '--skip-us': result.skipUs = true; break; case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break; case '--job-id': result.jobId = args[++i]; break; case '--help': console.log(`Usage: npx tsx scripts/import-masstimes-api.ts [options] --all Query all regions globally --region Query specific region: ${Object.keys(REGIONS).join(', ')} --skip-us Skip US grid points (already well-covered) --dry-run No database writes --resume-from Skip first N grid points --job-id Background job tracking`); process.exit(0); } } if (!result.all && !result.region) { console.error('Error: specify --all or --region '); process.exit(1); } return result; } // ─── Grid Generation ───────────────────────────────────────────────────────── function generateGridPoints(regions: string[], skipUs: boolean): Array<{ lat: number; lng: number }> { const points: Array<{ lat: number; lng: number }> = []; const seen = new Set(); for (const regionName of regions) { const boxes = REGIONS[regionName]; if (!boxes) { console.error(`Unknown region: ${regionName}`); continue; } for (const [latMin, latMax, lngMin, lngMax] of boxes) { for (let lat = latMin; lat <= latMax; lat += LAT_SPACING) { // Adjust longitude spacing based on latitude (degrees get narrower) const kmPerDegreeLng = 111.32 * Math.cos((lat * Math.PI) / 180); const lngSpacing = kmPerDegreeLng > 0 ? Math.max(LAT_SPACING, TARGET_LNG_SPACING_KM / kmPerDegreeLng) : LAT_SPACING; for (let lng = lngMin; lng <= lngMax; lng += lngSpacing) { const roundedLat = Math.round(lat * 10) / 10; const roundedLng = Math.round(lng * 10) / 10; const key = `${roundedLat},${roundedLng}`; if (!seen.has(key)) { // Skip US continental bounding box if requested if (skipUs && roundedLat >= 24 && roundedLat <= 50 && roundedLng >= -125 && roundedLng <= -66) { continue; } seen.add(key); points.push({ lat: roundedLat, lng: roundedLng }); } } } } } return points; } // ─── API ───────────────────────────────────────────────────────────────────── async function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } async function fetchPage(lat: number, lng: number, page: number): Promise { const url = `${API_BASE}?lat=${lat}&long=${lng}&pg=${page}`; const response = await fetch(url, { headers: { 'Referer': REFERER, 'User-Agent': USER_AGENT, 'Accept': 'application/json', }, }); if (!response.ok) { if (response.status === 429) { console.error(` Rate limited (429) — backing off 30s`); await sleep(30000); return fetchPage(lat, lng, page); // retry once } throw new Error(`HTTP ${response.status} for ${url}`); } return response.json() as Promise; } async function fetchAllForPoint( lat: number, lng: number, stats: ImportStats, ): Promise { const allChurches: MasstimesChurch[] = []; let page = 1; while (true) { stats.apiRequests++; const results = await fetchPage(lat, lng, page); if (results.length === 0) break; allChurches.push(...results); if (results.length < PAGE_SIZE) break; // last page page++; await sleep(RATE_LIMIT_MS); } return allChurches; } // ─── Data Conversion ───────────────────────────────────────────────────────── function resolveCountryCode(countryName: string): string { if (!countryName) return 'XX'; const lower = countryName.trim().toLowerCase(); return COUNTRY_CODE_MAP[lower] || 'XX'; } const DAY_MAP: Record = { 'sunday': [0], 'monday': [1], 'tuesday': [2], 'wednesday': [3], 'thursday': [4], 'friday': [5], 'saturday': [6], 'weekdays': [1, 2, 3, 4, 5], }; function parseWorshipTimes(times: MasstimesWorshipTime[]): Array<{ dayOfWeek: number; time: string; language: string; notes: string | null; massType: string | null; }> { const schedules: Array<{ dayOfWeek: number; time: string; language: string; notes: string | null; massType: string | null; }> = []; for (const wt of times) { // Only import mass services (Weekend = Sun/Sat, Week Days = weekday masses) if (wt.service_typename !== 'Weekend' && wt.service_typename !== 'Week Days') { continue; } const timeStr = wt.time_start?.trim(); if (!timeStr || timeStr === '00:00:00') continue; // Parse "HH:MM:SS" → "HH:MM" const timeParts = timeStr.split(':'); const time24 = `${timeParts[0].padStart(2, '0')}:${timeParts[1] || '00'}`; const language = wt.language?.trim() || 'Unknown'; const notes = wt.comment?.trim() || null; const dayKey = wt.day_of_week?.trim().toLowerCase(); const days = DAY_MAP[dayKey]; if (days) { for (const day of days) { schedules.push({ dayOfWeek: day, time: time24, language, notes, massType: null }); } } } return schedules; } // ─── Database ──────────────────────────────────────────────────────────────── async function loadExistingChurches(): Promise { console.log('Loading existing churches for deduplication...'); const churches = await prisma.church.findMany({ select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, source: true, website: true, phone: true, address: true, country: true, }, }); console.log(`Loaded ${churches.length} existing churches`); return churches; } async function updateJobProgress(jobId: string, stats: ImportStats): Promise { try { await prisma.backgroundJob.update({ where: { id: jobId }, data: { processed: stats.gridPoints, succeeded: stats.churchesMatched + stats.churchesCreated, failed: stats.errors, itemsFound: stats.churchesDiscovered, }, }); } catch (err) { console.error(`Failed to update job progress:`, err); } } // ─── Main Import ───────────────────────────────────────────────────────────── async function main() { const args = parseArgs(); let regionNames: string[]; if (args.all) { regionNames = Object.keys(REGIONS); } else { regionNames = [args.region!]; } const gridPoints = generateGridPoints(regionNames, args.skipUs); console.log(`\n${'='.repeat(70)}`); console.log('MASSTIMES.ORG API GLOBAL IMPORTER'); console.log('='.repeat(70)); console.log(`Regions: ${regionNames.join(', ')}`); console.log(`Grid points: ${gridPoints.length}`); console.log(`Skip US: ${args.skipUs ? 'YES' : 'NO'}`); console.log(`Dry run: ${args.dryRun ? 'YES' : 'NO'}`); console.log(`Rate limit: ${RATE_LIMIT_MS}ms between requests`); console.log(`Resume from: ${args.resumeFrom || 'start'}`); const estHours = Math.round(gridPoints.length * 2 * RATE_LIMIT_MS / 1000 / 3600 * 10) / 10; console.log(`Est. time: ~${estHours} hours (est. 2 pages/point avg)`); console.log(`Time: ${new Date().toISOString()}`); console.log('='.repeat(70)); const existingChurches = await loadExistingChurches(); // Build masstimesId lookup for fast dedup const masstimesIdSet = new Set(); for (const c of existingChurches) { if (c.masstimesId) masstimesIdSet.add(c.masstimesId); } // Track discovered IDs to deduplicate across grid points const discoveredIds = new Set(); const stats: ImportStats = { gridPoints: 0, apiRequests: 0, churchesDiscovered: 0, churchesMatched: 0, churchesCreated: 0, churchesSkipped: 0, massSchedulesCreated: 0, errors: 0, }; let jobId = args.jobId; if (jobId) { await prisma.backgroundJob.update({ where: { id: jobId }, data: { status: 'running', startedAt: new Date(), totalItems: gridPoints.length }, }); } const startTime = Date.now(); for (let i = 0; i < gridPoints.length; i++) { const { lat, lng } = gridPoints[i]; stats.gridPoints++; if (i < args.resumeFrom) continue; try { const churches = await fetchAllForPoint(lat, lng, stats); if (churches.length > 0) { let newInPoint = 0; for (const mc of churches) { if (discoveredIds.has(mc.id)) continue; discoveredIds.add(mc.id); stats.churchesDiscovered++; // Already in DB by masstimesId if (masstimesIdSet.has(mc.id)) { stats.churchesMatched++; continue; } const churchLat = parseFloat(mc.latitude); const churchLng = parseFloat(mc.longitude); if (isNaN(churchLat) || isNaN(churchLng) || (churchLat === 0 && churchLng === 0)) continue; const country = resolveCountryCode(mc.church_address_country_territory_name); const address = [ mc.church_address_street_address, mc.church_address_city_name, mc.church_address_providence_name, mc.church_address_postal_code, ].filter(s => s?.trim()).join(', ').trim() || null; // Proximity + name match const candidate = { name: mc.name, lat: churchLat, lng: churchLng }; const duplicate = findDuplicateChurch(candidate, existingChurches); if (duplicate) { stats.churchesMatched++; if (!args.dryRun) { const updateData: Record = { masstimesId: mc.id }; if (!duplicate.phone && mc.phone_number?.trim()) updateData.phone = mc.phone_number.trim(); if (!duplicate.website && mc.url?.trim()) { updateData.website = mc.url.trim(); updateData.hasWebsite = true; } if (!duplicate.address && address) updateData.address = address; if (duplicate.country === 'XX' && country !== 'XX') updateData.country = country; try { await prisma.church.update({ where: { id: duplicate.id }, data: updateData }); masstimesIdSet.add(mc.id); } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; } else throw error; } } continue; } // Create new church if (!args.dryRun) { const schedules = parseWorshipTimes(mc.church_worship_times || []); try { const newChurch = await prisma.church.create({ data: { name: mc.name, latitude: churchLat, longitude: churchLng, address, city: mc.church_address_city_name?.trim() || null, state: mc.church_address_providence_name?.trim() || null, zip: mc.church_address_postal_code?.trim() || null, country, phone: mc.phone_number?.trim() || null, website: mc.url?.trim() || null, email: mc.email?.trim() || null, hasWebsite: !!mc.url?.trim(), masstimesId: mc.id, source: 'masstimes', diocese: mc.diocese_name?.trim() || null, pastorName: mc.pastors_name?.trim() || null, wheelchairAccess: mc.wheel_chair_access || false, massSchedules: schedules.length > 0 ? { create: schedules.map(s => ({ dayOfWeek: s.dayOfWeek, time: s.time, language: s.language, notes: s.notes, massType: s.massType, isActive: true, })), } : undefined, }, }); stats.churchesCreated++; stats.massSchedulesCreated += schedules.length; newInPoint++; masstimesIdSet.add(mc.id); existingChurches.push({ id: newChurch.id, name: mc.name, latitude: churchLat, longitude: churchLng, osmId: null, baiduId: null, masstimesId: mc.id, orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, source: 'masstimes', website: mc.url?.trim() || null, phone: mc.phone_number?.trim() || null, address, country, }); } catch (error) { if (error instanceof Error && error.message.includes('Unique constraint')) { stats.churchesSkipped++; } else { stats.errors++; console.error(` Error creating ${mc.name}: ${error instanceof Error ? error.message : error}`); } } } else { stats.churchesCreated++; stats.massSchedulesCreated += parseWorshipTimes(mc.church_worship_times || []).length; newInPoint++; } } if (newInPoint > 0) { console.log(` Grid ${i + 1}/${gridPoints.length} (${lat},${lng}): ${churches.length} found, ${newInPoint} new`); } } await sleep(RATE_LIMIT_MS); } catch (error) { stats.errors++; console.error(` Error at grid ${i + 1} (${lat},${lng}): ${error instanceof Error ? error.message : error}`); await sleep(RATE_LIMIT_MS * 2); } // Progress every 50 points if ((i + 1) % 50 === 0 || i === gridPoints.length - 1) { const elapsed = (Date.now() - startTime) / 1000; const rate = elapsed > 0 ? Math.round(stats.apiRequests / elapsed * 3600) : 0; console.log(` Progress: ${i + 1}/${gridPoints.length} grid points, ${stats.churchesDiscovered} discovered, ${stats.churchesCreated} new, ${stats.apiRequests} API calls [${Math.round(elapsed)}s, ~${rate}/hr]`); } if (jobId && (i + 1) % 20 === 0) { await updateJobProgress(jobId, stats); } } if (jobId) { await updateJobProgress(jobId, stats); await prisma.backgroundJob.update({ where: { id: jobId }, data: { status: 'completed', completedAt: new Date() }, }); } const elapsed = (Date.now() - startTime) / 1000; console.log(`\n${'='.repeat(70)}`); console.log('MASSTIMES API IMPORT SUMMARY'); console.log('='.repeat(70)); console.log(`Grid points queried: ${stats.gridPoints}`); console.log(`API requests: ${stats.apiRequests}`); console.log(`Churches discovered: ${stats.churchesDiscovered}`); console.log(`Churches matched: ${stats.churchesMatched} (already in DB)`); console.log(`Churches created: ${stats.churchesCreated}`); console.log(`Churches skipped: ${stats.churchesSkipped} (duplicates)`); console.log(`Mass schedules created: ${stats.massSchedulesCreated}`); console.log(`Errors: ${stats.errors}`); console.log(`Elapsed: ${Math.round(elapsed)}s (${(elapsed / 3600).toFixed(1)}h)`); console.log('='.repeat(70)); await prisma.$disconnect(); await pool.end(); } main().catch((error) => { console.error(`Fatal error: ${error.message}`); process.exit(1); });