diff --git a/package.json b/package.json index 453f9ba..8318bb6 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,6 @@ "scrape:diocese": "tsx scripts/scrape-diocese-directory.ts", "setup:diocese": "tsx scripts/setup-diocese.ts", "import:gcatholic": "tsx scripts/import-gcatholic.ts", - "import:buscarmisas-network": "tsx scripts/import-buscarmisas-network.ts", "import:orarimesse": "tsx scripts/import-orarimesse.ts", "import:mass-schedules-ph": "tsx scripts/import-mass-schedules-ph.ts", "import:philmass": "tsx scripts/import-philmass.ts", diff --git a/prisma/schema.prisma b/prisma/schema.prisma index d7ea50e..c382713 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -42,9 +42,9 @@ model Church { messesInfoId String? @unique @map("messes_info_id") bohosluzbyId String? @unique @map("bohosluzby_id") miserendId String? @unique @map("miserend_id") - kerknetId String? @unique @map("kerknet_id") - gottesdienstzeitenId String? @unique @map("gottesdienstzeiten_id") discovermassId String? @unique @map("discovermass_id") + gottesdienstzeitenId String? @unique @map("gottesdienstzeiten_id") + kerknetId String? @unique @map("kerknet_id") buscarmisasNetworkId String? @unique @map("buscarmisas_network_id") claimed Boolean @default(false) claimedAt DateTime? @map("claimed_at") @@ -95,9 +95,9 @@ model Church { @@index([messesInfoId]) @@index([bohosluzbyId]) @@index([miserendId]) - @@index([kerknetId]) - @@index([gottesdienstzeitenId]) @@index([discovermassId]) + @@index([gottesdienstzeitenId]) + @@index([kerknetId]) @@index([buscarmisasNetworkId]) @@index([dioceseId]) @@index([claimedByUserId]) diff --git a/scripts/import-baidu-churches.ts b/scripts/import-baidu-churches.ts index b62f6c6..98afd68 100644 --- a/scripts/import-baidu-churches.ts +++ b/scripts/import-baidu-churches.ts @@ -178,7 +178,6 @@ async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: s miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -259,7 +258,6 @@ async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: s miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'baidu', website: baiduChurch.website || null, phone: baiduChurch.phone || null, diff --git a/scripts/import-bohosluzby.ts b/scripts/import-bohosluzby.ts index f7888bd..e768f72 100644 --- a/scripts/import-bohosluzby.ts +++ b/scripts/import-bohosluzby.ts @@ -287,7 +287,6 @@ async function loadExistingCzechChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -415,7 +414,6 @@ async function processChurch( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'bohosluzby', website: null, phone: null, diff --git a/scripts/import-discovermass.ts b/scripts/import-discovermass.ts index ff60184..1b75ede 100644 --- a/scripts/import-discovermass.ts +++ b/scripts/import-discovermass.ts @@ -94,6 +94,7 @@ interface CLIArgs { all: boolean; dryRun: boolean; resumeFrom?: number; + limit?: number; jobId?: string; } @@ -507,6 +508,7 @@ function parseCLIArgs(): CLIArgs { case '--all': result.all = true; break; case '--dry-run': result.dryRun = true; break; case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break; + case '--limit': result.limit = parseInt(args[++i], 10); break; case '--job-id': result.jobId = args[++i]; break; } } @@ -540,14 +542,25 @@ async function main() { try { const urls = await getAllChurchUrls(); const existingChurches = await loadExistingChurches(); + + // Skip already-imported churches — check discovermassId set in DB + const importedSlugs = new Set( + existingChurches.filter(c => c.discovermassId).map(c => c.discovermassId!) + ); + + // Apply --resume-from first, then filter to unimported, then apply --limit const startIdx = args.resumeFrom ?? 0; - const churchUrls = urls.slice(startIdx); - console.log(`\nProcessing ${churchUrls.length} churches (starting from index ${startIdx})...\n`); + const candidateUrls = urls.slice(startIdx).filter(url => { + const slug = url.replace('https://discovermass.com/church/', '').replace(/\/$/, ''); + return !importedSlugs.has(slug); + }); + const churchUrls = args.limit ? candidateUrls.slice(0, args.limit) : candidateUrls; + + console.log(`\nSitemap total: ${urls.length} | Already imported: ${importedSlugs.size} | This run: ${churchUrls.length}${args.limit ? ` (limit ${args.limit})` : ''}\n`); for (let i = 0; i < churchUrls.length; i++) { const url = churchUrls[i]; - const overallIdx = startIdx + i; - console.log(`[${overallIdx + 1}/${urls.length}] ${url}`); + console.log(`[${i + 1}/${churchUrls.length}] ${url}`); await processChurch(url, existingChurches, args, stats); if (i < churchUrls.length - 1) { await sleep(REQUEST_DELAY_MS); diff --git a/scripts/import-gcatholic.ts b/scripts/import-gcatholic.ts index 8151c16..8d66daf 100644 --- a/scripts/import-gcatholic.ts +++ b/scripts/import-gcatholic.ts @@ -401,7 +401,6 @@ async function loadExistingChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -516,7 +515,6 @@ async function importChurch( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'gcatholic', website: church.website || null, phone: church.phone || null, diff --git a/scripts/import-gottesdienstzeiten.ts b/scripts/import-gottesdienstzeiten.ts index d9ab654..5882bf9 100644 --- a/scripts/import-gottesdienstzeiten.ts +++ b/scripts/import-gottesdienstzeiten.ts @@ -316,7 +316,6 @@ async function loadExistingGermanChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -479,7 +478,6 @@ async function processDiocese( miserendId: null, kerknetId: null, gottesdienstzeitenId: gdzId, - discovermassId: null, source: 'gottesdienstzeiten', website: church.website, phone: church.phone, diff --git a/scripts/import-horariosmisas.ts b/scripts/import-horariosmisas.ts index fb4a53f..3718914 100644 --- a/scripts/import-horariosmisas.ts +++ b/scripts/import-horariosmisas.ts @@ -570,7 +570,6 @@ async function loadExistingSpanishChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -783,7 +782,6 @@ async function processChurch( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'horariosmisas', website: parsed.website, phone: parsed.phone, diff --git a/scripts/import-kerknet.ts b/scripts/import-kerknet.ts index a03167a..32c54e8 100644 --- a/scripts/import-kerknet.ts +++ b/scripts/import-kerknet.ts @@ -343,7 +343,6 @@ async function loadExistingBelgianChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -490,8 +489,6 @@ async function processChurch( bohosluzbyId: null, miserendId: null, kerknetId, - gottesdienstzeitenId: null, - discovermassId: null, source: 'kerknet', website: church.website, phone: null, diff --git a/scripts/import-mass-schedules-ph.ts b/scripts/import-mass-schedules-ph.ts index 827e608..01df073 100644 --- a/scripts/import-mass-schedules-ph.ts +++ b/scripts/import-mass-schedules-ph.ts @@ -290,7 +290,6 @@ async function loadExistingPhilippineChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -465,7 +464,6 @@ async function processChurch( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'mass-schedules-ph', website: null, phone: parsed.phone, diff --git a/scripts/import-masstimes-api.ts b/scripts/import-masstimes-api.ts index bcc8c33..ddd7d74 100644 --- a/scripts/import-masstimes-api.ts +++ b/scripts/import-masstimes-api.ts @@ -398,7 +398,6 @@ async function loadExistingChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -596,7 +595,7 @@ async function main() { orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, - messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, + messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, source: 'masstimes', website: mc.url?.trim() || null, phone: mc.phone_number?.trim() || null, address, country, }); diff --git a/scripts/import-messesinfo.ts b/scripts/import-messesinfo.ts index 974ddac..36cbdd3 100644 --- a/scripts/import-messesinfo.ts +++ b/scripts/import-messesinfo.ts @@ -326,7 +326,6 @@ async function loadExistingFrenchChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -480,7 +479,6 @@ async function processDiocese( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'messes-info', website: null, phone: null, diff --git a/scripts/import-miserend.ts b/scripts/import-miserend.ts index 591be32..70e9823 100644 --- a/scripts/import-miserend.ts +++ b/scripts/import-miserend.ts @@ -240,7 +240,6 @@ async function loadExistingChurches(countryCodes: string[]): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -538,7 +537,6 @@ async function processChurch( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'msze-info', website: parsed.website, phone: parsed.phone, diff --git a/scripts/import-orarimesse.ts b/scripts/import-orarimesse.ts index 54065ac..93fa859 100644 --- a/scripts/import-orarimesse.ts +++ b/scripts/import-orarimesse.ts @@ -283,7 +283,6 @@ async function loadExistingItalianChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -425,7 +424,6 @@ async function processChurchesForDiocese( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'orarimesse', website: church.sito || null, phone: null, diff --git a/scripts/import-osm-churches.ts b/scripts/import-osm-churches.ts index 753acac..40b3b90 100644 --- a/scripts/import-osm-churches.ts +++ b/scripts/import-osm-churches.ts @@ -204,7 +204,6 @@ async function importFromOSM(countryCode: string, dryRun: boolean = false): Prom miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -380,7 +379,6 @@ async function importFromOSM(countryCode: string, dryRun: boolean = false): Prom miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'osm', website: osmChurch.website || null, phone: osmChurch.phone || null, diff --git a/scripts/import-osm-region.ts b/scripts/import-osm-region.ts index a0a7bc0..72a0b3e 100644 --- a/scripts/import-osm-region.ts +++ b/scripts/import-osm-region.ts @@ -152,7 +152,6 @@ async function importFromRegion(countryCode: string, regionName: string, dryRun: miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -256,7 +255,6 @@ async function importFromRegion(countryCode: string, regionName: string, dryRun: miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'osm', website: osmChurch.website || null, phone: osmChurch.phone || null, diff --git a/scripts/import-philmass.ts b/scripts/import-philmass.ts index fe6cc2a..e8e5446 100644 --- a/scripts/import-philmass.ts +++ b/scripts/import-philmass.ts @@ -301,7 +301,6 @@ async function loadExistingPhilippineChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, diff --git a/scripts/import-weekdaymasses.ts b/scripts/import-weekdaymasses.ts index e5af79a..676210d 100644 --- a/scripts/import-weekdaymasses.ts +++ b/scripts/import-weekdaymasses.ts @@ -822,7 +822,6 @@ async function loadExistingChurches(): Promise { miserendId: true, kerknetId: true, gottesdienstzeitenId: true, - discovermassId: true, source: true, website: true, phone: true, @@ -982,7 +981,6 @@ async function importAreaBlocks( miserendId: null, kerknetId: null, gottesdienstzeitenId: null, - discovermassId: null, source: 'weekdaymasses', website: church.website, phone: church.phone, diff --git a/src/lib/church-matcher.ts b/src/lib/church-matcher.ts index 371db0d..6290980 100644 --- a/src/lib/church-matcher.ts +++ b/src/lib/church-matcher.ts @@ -27,8 +27,8 @@ export interface ExistingChurch { miserendId: string | null; kerknetId: string | null; gottesdienstzeitenId: string | null; - discovermassId: string | null; - buscarmisasNetworkId: string | null; + discovermassId?: string | null; + buscarmisasNetworkId?: string | null; source: string; website: string | null; phone: string | null; @@ -138,8 +138,6 @@ export type ChurchCandidate = { miserendId?: string; kerknetId?: string; gottesdienstzeitenId?: string; - discovermassId?: string; - buscarmisasNetworkId?: string; }; /** @@ -149,8 +147,8 @@ export type ChurchCandidate = { * Matching strategy (in priority order): * 1. Exact osmId match * 2. Exact baiduId match - * 3-15. Exact importer ID matches (orarimesse, massSchedulesPh, philmass, horariosMisas, mszeInfo, weekdayMasses, messesInfo, bohosluzby, miserend, kerknet, gottesdienstzeiten, discovermass, buscarmisasNetwork) - * 16. Proximity + name similarity (within 200m + similar name) + * 3-9. Exact importer ID matches (orarimesse, massSchedulesPh, philmass, horariosMisas, mszeInfo, weekdayMasses, messesInfo) + * 10. Proximity + name similarity (within 200m + similar name) */ export function findDuplicateChurch( candidate: ChurchCandidate, @@ -260,23 +258,7 @@ export function findDuplicateChurch( if (gdzMatch) return gdzMatch; } - // Fourteenth pass: exact discovermassId match - if (candidate.discovermassId) { - const match = existingChurches.find( - (church) => church.discovermassId === candidate.discovermassId - ); - if (match) return match; - } - - // Fifteenth pass: exact buscarmisasNetworkId match - if (candidate.buscarmisasNetworkId) { - const match = existingChurches.find( - (church) => church.buscarmisasNetworkId === candidate.buscarmisasNetworkId - ); - if (match) return match; - } - - // Sixteenth pass: proximity + name match (skip if candidate has no real coordinates) + // Fourteenth pass: proximity + name match (skip if candidate has no real coordinates) if (candidate.lat === 0 && candidate.lng === 0) { return null; } diff --git a/src/lib/overpass-client.ts b/src/lib/overpass-client.ts new file mode 100644 index 0000000..b711a5e --- /dev/null +++ b/src/lib/overpass-client.ts @@ -0,0 +1,472 @@ +/** + * Overpass API Client for querying OpenStreetMap data + * Used to import Catholic churches globally + */ + +export interface OSMChurch { + osmId: string; // "node/12345" or "way/67890" + name: string; + lat: number; + lng: number; + address?: string; + city?: string; + state?: string; + zip?: string; + country?: string; // ISO 3166-1 alpha-2 code + phone?: string; + website?: string; + diocese?: string; + wheelchairAccess?: boolean; + serviceTimes?: string; // OSM service_times tag (opening_hours syntax) +} + +// Public Overpass API endpoints for failover +const OVERPASS_ENDPOINTS = [ + 'https://overpass-api.de/api/interpreter', + 'https://overpass.osm.ch/api/interpreter', + 'https://overpass.kumi.systems/api/interpreter', +]; + +// Regional bounding boxes for countries that timeout on area queries +export const COUNTRY_BOUNDING_BOXES: Record> = { + GB: [ + { name: "England South", south: 49.9, west: -5.8, north: 52.5, east: 1.8 }, + { name: "England North + Wales", south: 52.5, west: -5.8, north: 55.8, east: 1.8 }, + { name: "Scotland", south: 55.0, west: -8.0, north: 60.9, east: -0.7 }, + { name: "Northern Ireland", south: 54.0, west: -8.2, north: 55.4, east: -5.4 }, + ], + PL: [ + { name: "North", south: 52.0, west: 14.0, north: 54.9, east: 24.2 }, + { name: "South", south: 49.0, west: 14.0, north: 52.0, east: 24.2 }, + ], + PT: [ + { name: "North", south: 40.0, west: -9.6, north: 42.2, east: -6.0 }, + { name: "South", south: 36.9, west: -9.6, north: 40.0, east: -6.0 }, + ], + IT: [ + { name: "North", south: 44.0, west: 6.6, north: 47.1, east: 13.8 }, + { name: "Central", south: 41.0, west: 9.5, north: 44.0, east: 15.0 }, + { name: "South + Sicily", south: 36.6, west: 12.4, north: 41.0, east: 18.6 }, + { name: "Sardinia", south: 38.8, west: 8.1, north: 41.3, east: 9.9 }, + ], + FR: [ + { name: "Northwest", south: 47.0, west: -5.2, north: 51.1, east: 2.0 }, + { name: "Northeast", south: 47.0, west: 2.0, north: 51.1, east: 8.3 }, + { name: "Southwest", south: 42.3, west: -2.0, north: 47.0, east: 2.0 }, + { name: "Southeast", south: 42.3, west: 2.0, north: 47.0, east: 7.8 }, + ], + ES: [ + { name: "North", south: 42.0, west: -9.3, north: 43.8, east: 3.4 }, + { name: "Central", south: 39.0, west: -7.0, north: 42.0, east: 3.4 }, + { name: "South + Balearic", south: 36.0, west: -7.5, north: 39.0, east: 4.4 }, + ], + DE: [ + { name: "North", south: 52.0, west: 5.9, north: 55.1, east: 15.1 }, + { name: "Central", south: 49.5, west: 5.9, north: 52.0, east: 15.1 }, + { name: "South", south: 47.3, west: 5.9, north: 49.5, east: 15.1 }, + ], + PH: [ + { name: "Luzon", south: 12.0, west: 119.0, north: 19.0, east: 127.0 }, + { name: "Visayas", south: 9.0, west: 121.0, north: 12.0, east: 125.5 }, + { name: "Mindanao", south: 5.0, west: 121.0, north: 9.5, east: 127.0 }, + ], + HN: [ + { name: "West", south: 13.0, west: -89.4, north: 16.0, east: -87.0 }, + { name: "East", south: 13.0, west: -87.0, north: 16.5, east: -83.1 }, + ], + BR: [ + { name: "North", south: -5.0, west: -74.0, north: 5.3, east: -35.0 }, + { name: "Northeast", south: -13.0, west: -46.0, north: -5.0, east: -35.0 }, + { name: "Central-West", south: -24.0, west: -60.0, north: -5.0, east: -46.0 }, + { name: "Southeast", south: -24.0, west: -53.0, north: -13.0, east: -39.0 }, + { name: "South", south: -33.8, west: -58.0, north: -24.0, east: -48.0 }, + ], + NG: [ + { name: "South", south: 4.0, west: 2.7, north: 8.0, east: 14.7 }, + { name: "North", south: 8.0, west: 2.7, north: 14.0, east: 14.7 }, + ], + IN: [ + { name: "South", south: 8.0, west: 73.0, north: 16.0, east: 80.5 }, + { name: "Central", south: 16.0, west: 72.0, north: 24.0, east: 88.0 }, + { name: "North", south: 24.0, west: 68.0, north: 37.0, east: 97.5 }, + { name: "Northeast + East Coast", south: 16.0, west: 80.5, north: 28.0, east: 97.5 }, + ], + CD: [ + { name: "West", south: -13.5, west: 12.0, north: 5.5, east: 24.0 }, + { name: "East", south: -13.5, west: 24.0, north: 5.5, east: 31.5 }, + ], + AU: [ + { name: "East Coast", south: -39.0, west: 140.0, north: -10.0, east: 154.0 }, + { name: "West + Central", south: -39.0, west: 112.0, north: -10.0, east: 140.0 }, + ], + US: [ + { name: "Northeast", south: 37.0, west: -82.0, north: 47.5, east: -66.9 }, + { name: "Southeast", south: 24.5, west: -91.7, north: 37.0, east: -75.0 }, + { name: "Midwest", south: 36.0, west: -104.1, north: 49.4, east: -82.0 }, + { name: "West", south: 24.5, west: -125.0, north: 49.4, east: -104.1 }, + ], + MX: [ + { name: "North", south: 25.0, west: -118.0, north: 32.8, east: -97.0 }, + { name: "Central", south: 18.0, west: -106.0, north: 25.0, east: -96.0 }, + { name: "South", south: 14.5, west: -118.0, north: 18.0, east: -86.7 }, + ], + AR: [ + { name: "North", south: -30.0, west: -74.0, north: -21.8, east: -53.6 }, + { name: "Central", south: -40.0, west: -72.0, north: -30.0, east: -56.7 }, + { name: "Patagonia", south: -55.1, west: -74.0, north: -40.0, east: -63.0 }, + ], + CO: [ + { name: "North", south: 5.0, west: -79.0, north: 12.5, east: -66.9 }, + { name: "South", south: -4.2, west: -79.0, north: 5.0, east: -66.9 }, + ], + CA: [ + { name: "BC + Alberta", south: 48.3, west: -139.1, north: 60.0, east: -110.0 }, + { name: "Ontario", south: 41.7, west: -95.2, north: 56.9, east: -74.3 }, + { name: "Quebec", south: 45.0, west: -79.8, north: 62.6, east: -57.1 }, + { name: "Atlantic + Prairies", south: 43.4, west: -110.0, north: 60.0, east: -52.6 }, + ], + ID: [ + { name: "Sumatra + Java", south: -8.8, west: 95.0, north: 5.9, east: 114.6 }, + { name: "Kalimantan + Sulawesi", south: -5.0, west: 114.6, north: 4.0, east: 127.5 }, + { name: "Eastern Indonesia", south: -10.5, west: 127.5, north: 0.9, east: 141.0 }, + ], + CN: [ + { name: "North", south: 35.0, west: 73.5, north: 53.6, east: 135.1 }, + { name: "East", south: 24.0, west: 113.0, north: 35.0, east: 123.0 }, + { name: "South", south: 18.2, west: 97.5, north: 24.0, east: 113.0 }, + { name: "West", south: 24.0, west: 73.5, north: 35.0, east: 113.0 }, + ], + RU: [ + { name: "West (European Russia)", south: 45.0, west: 27.0, north: 70.0, east: 60.0 }, + { name: "Ural + West Siberia", south: 45.0, west: 60.0, north: 70.0, east: 90.0 }, + { name: "East Siberia + Far East", south: 42.0, west: 90.0, north: 72.0, east: 190.0 }, + ], + HK: [ + { name: "Hong Kong", south: 22.15, west: 113.83, north: 22.56, east: 114.44 }, + ], +}; + +// Rate limit: 5 seconds between requests to be respectful +const RATE_LIMIT_MS = 5000; + +let lastRequestTime = 0; + +/** + * Delay helper for rate limiting + */ +async function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Enforce rate limiting between requests + */ +async function enforceRateLimit(): Promise { + const now = Date.now(); + const timeSinceLastRequest = now - lastRequestTime; + + if (timeSinceLastRequest < RATE_LIMIT_MS) { + const waitTime = RATE_LIMIT_MS - timeSinceLastRequest; + console.log(`Rate limiting: waiting ${waitTime}ms...`); + await delay(waitTime); + } + + lastRequestTime = Date.now(); +} + +/** + * Query Overpass API with failover support + */ +async function queryOverpass(query: string): Promise { + await enforceRateLimit(); + + let lastError: Error | null = null; + + // Try each endpoint in order + for (const endpoint of OVERPASS_ENDPOINTS) { + try { + console.log(`Querying Overpass API at ${endpoint}...`); + + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + 'User-Agent': 'NearestMass/1.0 (https://catholicmass.net; church data import)', + }, + body: `data=${encodeURIComponent(query)}`, + }); + + if (response.status === 429) { + console.warn(`Rate limited by ${endpoint}, waiting 60 seconds...`); + await delay(60000); + // Retry this endpoint + const retryResponse = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + 'User-Agent': 'NearestMass/1.0 (https://catholicmass.net; church data import)', + }, + body: `data=${encodeURIComponent(query)}`, + }); + + if (!retryResponse.ok) { + throw new Error(`HTTP ${retryResponse.status}: ${retryResponse.statusText}`); + } + + return await retryResponse.json(); + } + + if (response.status === 504) { + throw new Error('Gateway timeout - query too complex, try bounding box approach'); + } + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const data = await response.json(); + console.log(`Successfully retrieved ${data.elements?.length || 0} elements`); + return data; + + } catch (error) { + console.error(`Failed to query ${endpoint}:`, error); + lastError = error as Error; + // Continue to next endpoint + } + } + + throw new Error(`All Overpass endpoints failed. Last error: ${lastError?.message}`); +} + +/** + * Parse OSM element to OSMChurch object + */ +function parseOSMElement(element: any): OSMChurch | null { + const tags = element.tags || {}; + + // Must have a name + if (!tags.name) { + return null; + } + + // Get coordinates (use center for ways and relations) + let lat: number; + let lng: number; + + if (element.type === 'node') { + lat = element.lat; + lng = element.lon; + } else if (element.center) { + lat = element.center.lat; + lng = element.center.lon; + } else { + // Skip elements without coordinates + return null; + } + + // Build OSM ID + const osmId = `${element.type}/${element.id}`; + + // Extract address components + const address = tags['addr:street'] + ? `${tags['addr:housenumber'] || ''} ${tags['addr:street']}`.trim() + : undefined; + + const city = tags['addr:city']; + const state = tags['addr:state']; + const zip = tags['addr:postcode']; + const country = tags['addr:country']; + + // Phone (try multiple tags) + const phone = tags.phone || tags['contact:phone']; + + // Website (try multiple tags) + const website = tags.website || tags['contact:website']; + + // Diocese + const diocese = tags.diocese; + + // Wheelchair accessibility + let wheelchairAccess: boolean | undefined; + if (tags.wheelchair === 'yes') { + wheelchairAccess = true; + } else if (tags.wheelchair === 'no') { + wheelchairAccess = false; + } + + // Service times (mass schedule in opening_hours format) + const serviceTimes = tags.service_times || tags['service_times:catholic'] || undefined; + + return { + osmId, + name: tags.name, + lat, + lng, + address, + city, + state, + zip, + country, + phone, + website, + diocese, + wheelchairAccess, + serviceTimes, + }; +} + +/** + * Query Overpass API for Catholic churches in a specific country + * @param countryCode ISO 3166-1 alpha-2 country code (e.g., "US", "MX", "BR") + * @returns Array of OSMChurch objects + */ +export async function queryOverpassByCountry(countryCode: string): Promise { + // Build Overpass QL query + const query = ` +[out:json][timeout:300]; +area["ISO3166-1"="${countryCode}"][admin_level=2]->.searchArea; +( + nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="catholic"](area.searchArea); + nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="roman_catholic"](area.searchArea); +); +out center; + `.trim(); + + console.log(`Querying Catholic churches in ${countryCode}...`); + + try { + const data = await queryOverpass(query); + const churches: OSMChurch[] = []; + + for (const element of data.elements || []) { + const church = parseOSMElement(element); + if (church) { + churches.push(church); + } + } + + console.log(`Found ${churches.length} Catholic churches in ${countryCode}`); + return churches; + + } catch (error) { + if ((error as Error).message.includes('Gateway timeout')) { + console.warn(`Query timeout for ${countryCode}, falling back to bounding box approach...`); + // Could implement bounding box fallback here if needed + throw error; + } + throw error; + } +} + +/** + * Query Overpass API for Catholic churches in a bounding box + * Useful for large countries or when country area queries timeout + * @param south Southern latitude + * @param west Western longitude + * @param north Northern latitude + * @param east Eastern longitude + * @returns Array of OSMChurch objects + */ +export async function queryOverpassByBoundingBox( + south: number, + west: number, + north: number, + east: number +): Promise { + const query = ` +[out:json][timeout:300]; +( + nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="catholic"](${south},${west},${north},${east}); + nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="roman_catholic"](${south},${west},${north},${east}); +); +out center; + `.trim(); + + console.log(`Querying Catholic churches in bbox (${south},${west},${north},${east})...`); + + const data = await queryOverpass(query); + const churches: OSMChurch[] = []; + + for (const element of data.elements || []) { + const church = parseOSMElement(element); + if (church) { + churches.push(church); + } + } + + console.log(`Found ${churches.length} Catholic churches in bounding box`); + return churches; +} + +/** + * Query Overpass API for Catholic churches with automatic fallback to regional bounding boxes + * Tries country-level query first, falls back to regions on timeout + * @param countryCode ISO 3166-1 alpha-2 country code (e.g., "US", "MX", "BR") + * @returns Array of OSMChurch objects (deduplicated by osmId) + */ +export async function queryOverpassByCountryWithFallback(countryCode: string): Promise { + try { + // Try country-level query first + const churches = await queryOverpassByCountry(countryCode); + + // If 0 results and we have bounding boxes, the country-level query may have + // silently failed (e.g. Swiss mirror returned 0 after primary timed out) + if (churches.length === 0 && COUNTRY_BOUNDING_BOXES[countryCode]) { + console.log(`Country query returned 0 results for ${countryCode}, retrying with bounding boxes...`); + throw new Error('Gateway timeout - query too complex, try bounding box approach'); + } + + return churches; + } catch (error) { + // Check if it's a timeout and we have bounding boxes for this country + if ((error as Error).message.includes('Gateway timeout')) { + const regions = COUNTRY_BOUNDING_BOXES[countryCode]; + + if (!regions) { + throw new Error( + `Gateway timeout for ${countryCode} and no bounding boxes defined. ` + + `Consider adding regional bounding boxes to COUNTRY_BOUNDING_BOXES in overpass-client.ts` + ); + } + + console.log(`Falling back to ${regions.length} regional queries for ${countryCode}...`); + + const allChurches: OSMChurch[] = []; + const seenOsmIds = new Set(); + + for (const region of regions) { + console.log(`\nQuerying region: ${region.name}`); + + const regionChurches = await queryOverpassByBoundingBox( + region.south, + region.west, + region.north, + region.east + ); + + // Deduplicate by osmId (regions may overlap) + let newChurches = 0; + for (const church of regionChurches) { + if (!seenOsmIds.has(church.osmId)) { + seenOsmIds.add(church.osmId); + allChurches.push(church); + newChurches++; + } + } + + console.log(`Added ${newChurches} new churches from ${region.name} (${regionChurches.length - newChurches} duplicates)`); + + // Rate limiting already handled by queryOverpass, but add extra delay between regions + if (regions.indexOf(region) < regions.length - 1) { + await delay(2000); + } + } + + console.log(`\nTotal churches found across all regions: ${allChurches.length}`); + return allChurches; + } + + // Re-throw non-timeout errors + throw error; + } +}