fix: clean up church-matcher types and add HK OSM bounding box
- Remove discovermassId/buscarmisasNetworkId from findDuplicateChurch match passes (importers now do their own pre-check dedup); restore as optional fields on ExistingChurch to keep type/runtime in sync - Add HK bounding box to COUNTRY_BOUNDING_BOXES; fix silent 0-result fallback when country query returns empty from mirror server - discovermass importer: add --limit flag and skip-already-imported pre-check using importedSlugs set - Import scripts: remove discovermassId from ExistingChurch select/stubs (field not needed in shared matcher context) - Schema: reorder discovermassId/kerknetId/gottesdienstzeitenId fields Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,7 +22,6 @@
|
||||
"scrape:diocese": "tsx scripts/scrape-diocese-directory.ts",
|
||||
"setup:diocese": "tsx scripts/setup-diocese.ts",
|
||||
"import:gcatholic": "tsx scripts/import-gcatholic.ts",
|
||||
"import:buscarmisas-network": "tsx scripts/import-buscarmisas-network.ts",
|
||||
"import:orarimesse": "tsx scripts/import-orarimesse.ts",
|
||||
"import:mass-schedules-ph": "tsx scripts/import-mass-schedules-ph.ts",
|
||||
"import:philmass": "tsx scripts/import-philmass.ts",
|
||||
|
||||
@@ -42,9 +42,9 @@ model Church {
|
||||
messesInfoId String? @unique @map("messes_info_id")
|
||||
bohosluzbyId String? @unique @map("bohosluzby_id")
|
||||
miserendId String? @unique @map("miserend_id")
|
||||
kerknetId String? @unique @map("kerknet_id")
|
||||
gottesdienstzeitenId String? @unique @map("gottesdienstzeiten_id")
|
||||
discovermassId String? @unique @map("discovermass_id")
|
||||
gottesdienstzeitenId String? @unique @map("gottesdienstzeiten_id")
|
||||
kerknetId String? @unique @map("kerknet_id")
|
||||
buscarmisasNetworkId String? @unique @map("buscarmisas_network_id")
|
||||
claimed Boolean @default(false)
|
||||
claimedAt DateTime? @map("claimed_at")
|
||||
@@ -95,9 +95,9 @@ model Church {
|
||||
@@index([messesInfoId])
|
||||
@@index([bohosluzbyId])
|
||||
@@index([miserendId])
|
||||
@@index([kerknetId])
|
||||
@@index([gottesdienstzeitenId])
|
||||
@@index([discovermassId])
|
||||
@@index([gottesdienstzeitenId])
|
||||
@@index([kerknetId])
|
||||
@@index([buscarmisasNetworkId])
|
||||
@@index([dioceseId])
|
||||
@@index([claimedByUserId])
|
||||
|
||||
@@ -178,7 +178,6 @@ async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: s
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -259,7 +258,6 @@ async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: s
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'baidu',
|
||||
website: baiduChurch.website || null,
|
||||
phone: baiduChurch.phone || null,
|
||||
|
||||
@@ -287,7 +287,6 @@ async function loadExistingCzechChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -415,7 +414,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'bohosluzby',
|
||||
website: null,
|
||||
phone: null,
|
||||
|
||||
@@ -94,6 +94,7 @@ interface CLIArgs {
|
||||
all: boolean;
|
||||
dryRun: boolean;
|
||||
resumeFrom?: number;
|
||||
limit?: number;
|
||||
jobId?: string;
|
||||
}
|
||||
|
||||
@@ -507,6 +508,7 @@ function parseCLIArgs(): CLIArgs {
|
||||
case '--all': result.all = true; break;
|
||||
case '--dry-run': result.dryRun = true; break;
|
||||
case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break;
|
||||
case '--limit': result.limit = parseInt(args[++i], 10); break;
|
||||
case '--job-id': result.jobId = args[++i]; break;
|
||||
}
|
||||
}
|
||||
@@ -540,14 +542,25 @@ async function main() {
|
||||
try {
|
||||
const urls = await getAllChurchUrls();
|
||||
const existingChurches = await loadExistingChurches();
|
||||
|
||||
// Skip already-imported churches — check discovermassId set in DB
|
||||
const importedSlugs = new Set(
|
||||
existingChurches.filter(c => c.discovermassId).map(c => c.discovermassId!)
|
||||
);
|
||||
|
||||
// Apply --resume-from first, then filter to unimported, then apply --limit
|
||||
const startIdx = args.resumeFrom ?? 0;
|
||||
const churchUrls = urls.slice(startIdx);
|
||||
console.log(`\nProcessing ${churchUrls.length} churches (starting from index ${startIdx})...\n`);
|
||||
const candidateUrls = urls.slice(startIdx).filter(url => {
|
||||
const slug = url.replace('https://discovermass.com/church/', '').replace(/\/$/, '');
|
||||
return !importedSlugs.has(slug);
|
||||
});
|
||||
const churchUrls = args.limit ? candidateUrls.slice(0, args.limit) : candidateUrls;
|
||||
|
||||
console.log(`\nSitemap total: ${urls.length} | Already imported: ${importedSlugs.size} | This run: ${churchUrls.length}${args.limit ? ` (limit ${args.limit})` : ''}\n`);
|
||||
|
||||
for (let i = 0; i < churchUrls.length; i++) {
|
||||
const url = churchUrls[i];
|
||||
const overallIdx = startIdx + i;
|
||||
console.log(`[${overallIdx + 1}/${urls.length}] ${url}`);
|
||||
console.log(`[${i + 1}/${churchUrls.length}] ${url}`);
|
||||
await processChurch(url, existingChurches, args, stats);
|
||||
if (i < churchUrls.length - 1) {
|
||||
await sleep(REQUEST_DELAY_MS);
|
||||
|
||||
@@ -401,7 +401,6 @@ async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -516,7 +515,6 @@ async function importChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'gcatholic',
|
||||
website: church.website || null,
|
||||
phone: church.phone || null,
|
||||
|
||||
@@ -316,7 +316,6 @@ async function loadExistingGermanChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -479,7 +478,6 @@ async function processDiocese(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: gdzId,
|
||||
discovermassId: null,
|
||||
source: 'gottesdienstzeiten',
|
||||
website: church.website,
|
||||
phone: church.phone,
|
||||
|
||||
@@ -570,7 +570,6 @@ async function loadExistingSpanishChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -783,7 +782,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'horariosmisas',
|
||||
website: parsed.website,
|
||||
phone: parsed.phone,
|
||||
|
||||
@@ -343,7 +343,6 @@ async function loadExistingBelgianChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -490,8 +489,6 @@ async function processChurch(
|
||||
bohosluzbyId: null,
|
||||
miserendId: null,
|
||||
kerknetId,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'kerknet',
|
||||
website: church.website,
|
||||
phone: null,
|
||||
|
||||
@@ -290,7 +290,6 @@ async function loadExistingPhilippineChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -465,7 +464,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'mass-schedules-ph',
|
||||
website: null,
|
||||
phone: parsed.phone,
|
||||
|
||||
@@ -398,7 +398,6 @@ async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -596,7 +595,7 @@ async function main() {
|
||||
orarimesseId: null, massSchedulesPhId: null,
|
||||
philmassId: null, horariosMisasId: null,
|
||||
mszeInfoId: null, weekdayMassesId: null,
|
||||
messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null,
|
||||
messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null,
|
||||
source: 'masstimes', website: mc.url?.trim() || null,
|
||||
phone: mc.phone_number?.trim() || null, address, country,
|
||||
});
|
||||
|
||||
@@ -326,7 +326,6 @@ async function loadExistingFrenchChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -480,7 +479,6 @@ async function processDiocese(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'messes-info',
|
||||
website: null,
|
||||
phone: null,
|
||||
|
||||
@@ -240,7 +240,6 @@ async function loadExistingChurches(countryCodes: string[]): Promise<ExistingChu
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -366,7 +365,6 @@ async function processChurch(
|
||||
miserendId,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'miserend',
|
||||
website: null,
|
||||
phone: null,
|
||||
|
||||
@@ -367,7 +367,6 @@ async function loadExistingPolishChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -538,7 +537,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'msze-info',
|
||||
website: parsed.website,
|
||||
phone: parsed.phone,
|
||||
|
||||
@@ -283,7 +283,6 @@ async function loadExistingItalianChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -425,7 +424,6 @@ async function processChurchesForDiocese(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'orarimesse',
|
||||
website: church.sito || null,
|
||||
phone: null,
|
||||
|
||||
@@ -204,7 +204,6 @@ async function importFromOSM(countryCode: string, dryRun: boolean = false): Prom
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -380,7 +379,6 @@ async function importFromOSM(countryCode: string, dryRun: boolean = false): Prom
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'osm',
|
||||
website: osmChurch.website || null,
|
||||
phone: osmChurch.phone || null,
|
||||
|
||||
@@ -152,7 +152,6 @@ async function importFromRegion(countryCode: string, regionName: string, dryRun:
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -256,7 +255,6 @@ async function importFromRegion(countryCode: string, regionName: string, dryRun:
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'osm',
|
||||
website: osmChurch.website || null,
|
||||
phone: osmChurch.phone || null,
|
||||
|
||||
@@ -301,7 +301,6 @@ async function loadExistingPhilippineChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
|
||||
@@ -822,7 +822,6 @@ async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -982,7 +981,6 @@ async function importAreaBlocks(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'weekdaymasses',
|
||||
website: church.website,
|
||||
phone: church.phone,
|
||||
|
||||
@@ -27,8 +27,8 @@ export interface ExistingChurch {
|
||||
miserendId: string | null;
|
||||
kerknetId: string | null;
|
||||
gottesdienstzeitenId: string | null;
|
||||
discovermassId: string | null;
|
||||
buscarmisasNetworkId: string | null;
|
||||
discovermassId?: string | null;
|
||||
buscarmisasNetworkId?: string | null;
|
||||
source: string;
|
||||
website: string | null;
|
||||
phone: string | null;
|
||||
@@ -138,8 +138,6 @@ export type ChurchCandidate = {
|
||||
miserendId?: string;
|
||||
kerknetId?: string;
|
||||
gottesdienstzeitenId?: string;
|
||||
discovermassId?: string;
|
||||
buscarmisasNetworkId?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -149,8 +147,8 @@ export type ChurchCandidate = {
|
||||
* Matching strategy (in priority order):
|
||||
* 1. Exact osmId match
|
||||
* 2. Exact baiduId match
|
||||
* 3-15. Exact importer ID matches (orarimesse, massSchedulesPh, philmass, horariosMisas, mszeInfo, weekdayMasses, messesInfo, bohosluzby, miserend, kerknet, gottesdienstzeiten, discovermass, buscarmisasNetwork)
|
||||
* 16. Proximity + name similarity (within 200m + similar name)
|
||||
* 3-9. Exact importer ID matches (orarimesse, massSchedulesPh, philmass, horariosMisas, mszeInfo, weekdayMasses, messesInfo)
|
||||
* 10. Proximity + name similarity (within 200m + similar name)
|
||||
*/
|
||||
export function findDuplicateChurch(
|
||||
candidate: ChurchCandidate,
|
||||
@@ -260,23 +258,7 @@ export function findDuplicateChurch(
|
||||
if (gdzMatch) return gdzMatch;
|
||||
}
|
||||
|
||||
// Fourteenth pass: exact discovermassId match
|
||||
if (candidate.discovermassId) {
|
||||
const match = existingChurches.find(
|
||||
(church) => church.discovermassId === candidate.discovermassId
|
||||
);
|
||||
if (match) return match;
|
||||
}
|
||||
|
||||
// Fifteenth pass: exact buscarmisasNetworkId match
|
||||
if (candidate.buscarmisasNetworkId) {
|
||||
const match = existingChurches.find(
|
||||
(church) => church.buscarmisasNetworkId === candidate.buscarmisasNetworkId
|
||||
);
|
||||
if (match) return match;
|
||||
}
|
||||
|
||||
// Sixteenth pass: proximity + name match (skip if candidate has no real coordinates)
|
||||
// Fourteenth pass: proximity + name match (skip if candidate has no real coordinates)
|
||||
if (candidate.lat === 0 && candidate.lng === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
472
src/lib/overpass-client.ts
Normal file
472
src/lib/overpass-client.ts
Normal file
@@ -0,0 +1,472 @@
|
||||
/**
|
||||
* Overpass API Client for querying OpenStreetMap data
|
||||
* Used to import Catholic churches globally
|
||||
*/
|
||||
|
||||
export interface OSMChurch {
|
||||
osmId: string; // "node/12345" or "way/67890"
|
||||
name: string;
|
||||
lat: number;
|
||||
lng: number;
|
||||
address?: string;
|
||||
city?: string;
|
||||
state?: string;
|
||||
zip?: string;
|
||||
country?: string; // ISO 3166-1 alpha-2 code
|
||||
phone?: string;
|
||||
website?: string;
|
||||
diocese?: string;
|
||||
wheelchairAccess?: boolean;
|
||||
serviceTimes?: string; // OSM service_times tag (opening_hours syntax)
|
||||
}
|
||||
|
||||
// Public Overpass API endpoints for failover
|
||||
const OVERPASS_ENDPOINTS = [
|
||||
'https://overpass-api.de/api/interpreter',
|
||||
'https://overpass.osm.ch/api/interpreter',
|
||||
'https://overpass.kumi.systems/api/interpreter',
|
||||
];
|
||||
|
||||
// Regional bounding boxes for countries that timeout on area queries
|
||||
export const COUNTRY_BOUNDING_BOXES: Record<string, Array<{ name: string; south: number; west: number; north: number; east: number }>> = {
|
||||
GB: [
|
||||
{ name: "England South", south: 49.9, west: -5.8, north: 52.5, east: 1.8 },
|
||||
{ name: "England North + Wales", south: 52.5, west: -5.8, north: 55.8, east: 1.8 },
|
||||
{ name: "Scotland", south: 55.0, west: -8.0, north: 60.9, east: -0.7 },
|
||||
{ name: "Northern Ireland", south: 54.0, west: -8.2, north: 55.4, east: -5.4 },
|
||||
],
|
||||
PL: [
|
||||
{ name: "North", south: 52.0, west: 14.0, north: 54.9, east: 24.2 },
|
||||
{ name: "South", south: 49.0, west: 14.0, north: 52.0, east: 24.2 },
|
||||
],
|
||||
PT: [
|
||||
{ name: "North", south: 40.0, west: -9.6, north: 42.2, east: -6.0 },
|
||||
{ name: "South", south: 36.9, west: -9.6, north: 40.0, east: -6.0 },
|
||||
],
|
||||
IT: [
|
||||
{ name: "North", south: 44.0, west: 6.6, north: 47.1, east: 13.8 },
|
||||
{ name: "Central", south: 41.0, west: 9.5, north: 44.0, east: 15.0 },
|
||||
{ name: "South + Sicily", south: 36.6, west: 12.4, north: 41.0, east: 18.6 },
|
||||
{ name: "Sardinia", south: 38.8, west: 8.1, north: 41.3, east: 9.9 },
|
||||
],
|
||||
FR: [
|
||||
{ name: "Northwest", south: 47.0, west: -5.2, north: 51.1, east: 2.0 },
|
||||
{ name: "Northeast", south: 47.0, west: 2.0, north: 51.1, east: 8.3 },
|
||||
{ name: "Southwest", south: 42.3, west: -2.0, north: 47.0, east: 2.0 },
|
||||
{ name: "Southeast", south: 42.3, west: 2.0, north: 47.0, east: 7.8 },
|
||||
],
|
||||
ES: [
|
||||
{ name: "North", south: 42.0, west: -9.3, north: 43.8, east: 3.4 },
|
||||
{ name: "Central", south: 39.0, west: -7.0, north: 42.0, east: 3.4 },
|
||||
{ name: "South + Balearic", south: 36.0, west: -7.5, north: 39.0, east: 4.4 },
|
||||
],
|
||||
DE: [
|
||||
{ name: "North", south: 52.0, west: 5.9, north: 55.1, east: 15.1 },
|
||||
{ name: "Central", south: 49.5, west: 5.9, north: 52.0, east: 15.1 },
|
||||
{ name: "South", south: 47.3, west: 5.9, north: 49.5, east: 15.1 },
|
||||
],
|
||||
PH: [
|
||||
{ name: "Luzon", south: 12.0, west: 119.0, north: 19.0, east: 127.0 },
|
||||
{ name: "Visayas", south: 9.0, west: 121.0, north: 12.0, east: 125.5 },
|
||||
{ name: "Mindanao", south: 5.0, west: 121.0, north: 9.5, east: 127.0 },
|
||||
],
|
||||
HN: [
|
||||
{ name: "West", south: 13.0, west: -89.4, north: 16.0, east: -87.0 },
|
||||
{ name: "East", south: 13.0, west: -87.0, north: 16.5, east: -83.1 },
|
||||
],
|
||||
BR: [
|
||||
{ name: "North", south: -5.0, west: -74.0, north: 5.3, east: -35.0 },
|
||||
{ name: "Northeast", south: -13.0, west: -46.0, north: -5.0, east: -35.0 },
|
||||
{ name: "Central-West", south: -24.0, west: -60.0, north: -5.0, east: -46.0 },
|
||||
{ name: "Southeast", south: -24.0, west: -53.0, north: -13.0, east: -39.0 },
|
||||
{ name: "South", south: -33.8, west: -58.0, north: -24.0, east: -48.0 },
|
||||
],
|
||||
NG: [
|
||||
{ name: "South", south: 4.0, west: 2.7, north: 8.0, east: 14.7 },
|
||||
{ name: "North", south: 8.0, west: 2.7, north: 14.0, east: 14.7 },
|
||||
],
|
||||
IN: [
|
||||
{ name: "South", south: 8.0, west: 73.0, north: 16.0, east: 80.5 },
|
||||
{ name: "Central", south: 16.0, west: 72.0, north: 24.0, east: 88.0 },
|
||||
{ name: "North", south: 24.0, west: 68.0, north: 37.0, east: 97.5 },
|
||||
{ name: "Northeast + East Coast", south: 16.0, west: 80.5, north: 28.0, east: 97.5 },
|
||||
],
|
||||
CD: [
|
||||
{ name: "West", south: -13.5, west: 12.0, north: 5.5, east: 24.0 },
|
||||
{ name: "East", south: -13.5, west: 24.0, north: 5.5, east: 31.5 },
|
||||
],
|
||||
AU: [
|
||||
{ name: "East Coast", south: -39.0, west: 140.0, north: -10.0, east: 154.0 },
|
||||
{ name: "West + Central", south: -39.0, west: 112.0, north: -10.0, east: 140.0 },
|
||||
],
|
||||
US: [
|
||||
{ name: "Northeast", south: 37.0, west: -82.0, north: 47.5, east: -66.9 },
|
||||
{ name: "Southeast", south: 24.5, west: -91.7, north: 37.0, east: -75.0 },
|
||||
{ name: "Midwest", south: 36.0, west: -104.1, north: 49.4, east: -82.0 },
|
||||
{ name: "West", south: 24.5, west: -125.0, north: 49.4, east: -104.1 },
|
||||
],
|
||||
MX: [
|
||||
{ name: "North", south: 25.0, west: -118.0, north: 32.8, east: -97.0 },
|
||||
{ name: "Central", south: 18.0, west: -106.0, north: 25.0, east: -96.0 },
|
||||
{ name: "South", south: 14.5, west: -118.0, north: 18.0, east: -86.7 },
|
||||
],
|
||||
AR: [
|
||||
{ name: "North", south: -30.0, west: -74.0, north: -21.8, east: -53.6 },
|
||||
{ name: "Central", south: -40.0, west: -72.0, north: -30.0, east: -56.7 },
|
||||
{ name: "Patagonia", south: -55.1, west: -74.0, north: -40.0, east: -63.0 },
|
||||
],
|
||||
CO: [
|
||||
{ name: "North", south: 5.0, west: -79.0, north: 12.5, east: -66.9 },
|
||||
{ name: "South", south: -4.2, west: -79.0, north: 5.0, east: -66.9 },
|
||||
],
|
||||
CA: [
|
||||
{ name: "BC + Alberta", south: 48.3, west: -139.1, north: 60.0, east: -110.0 },
|
||||
{ name: "Ontario", south: 41.7, west: -95.2, north: 56.9, east: -74.3 },
|
||||
{ name: "Quebec", south: 45.0, west: -79.8, north: 62.6, east: -57.1 },
|
||||
{ name: "Atlantic + Prairies", south: 43.4, west: -110.0, north: 60.0, east: -52.6 },
|
||||
],
|
||||
ID: [
|
||||
{ name: "Sumatra + Java", south: -8.8, west: 95.0, north: 5.9, east: 114.6 },
|
||||
{ name: "Kalimantan + Sulawesi", south: -5.0, west: 114.6, north: 4.0, east: 127.5 },
|
||||
{ name: "Eastern Indonesia", south: -10.5, west: 127.5, north: 0.9, east: 141.0 },
|
||||
],
|
||||
CN: [
|
||||
{ name: "North", south: 35.0, west: 73.5, north: 53.6, east: 135.1 },
|
||||
{ name: "East", south: 24.0, west: 113.0, north: 35.0, east: 123.0 },
|
||||
{ name: "South", south: 18.2, west: 97.5, north: 24.0, east: 113.0 },
|
||||
{ name: "West", south: 24.0, west: 73.5, north: 35.0, east: 113.0 },
|
||||
],
|
||||
RU: [
|
||||
{ name: "West (European Russia)", south: 45.0, west: 27.0, north: 70.0, east: 60.0 },
|
||||
{ name: "Ural + West Siberia", south: 45.0, west: 60.0, north: 70.0, east: 90.0 },
|
||||
{ name: "East Siberia + Far East", south: 42.0, west: 90.0, north: 72.0, east: 190.0 },
|
||||
],
|
||||
HK: [
|
||||
{ name: "Hong Kong", south: 22.15, west: 113.83, north: 22.56, east: 114.44 },
|
||||
],
|
||||
};
|
||||
|
||||
// Rate limit: 5 seconds between requests to be respectful
|
||||
const RATE_LIMIT_MS = 5000;
|
||||
|
||||
let lastRequestTime = 0;
|
||||
|
||||
/**
|
||||
* Delay helper for rate limiting
|
||||
*/
|
||||
async function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Enforce rate limiting between requests
|
||||
*/
|
||||
async function enforceRateLimit(): Promise<void> {
|
||||
const now = Date.now();
|
||||
const timeSinceLastRequest = now - lastRequestTime;
|
||||
|
||||
if (timeSinceLastRequest < RATE_LIMIT_MS) {
|
||||
const waitTime = RATE_LIMIT_MS - timeSinceLastRequest;
|
||||
console.log(`Rate limiting: waiting ${waitTime}ms...`);
|
||||
await delay(waitTime);
|
||||
}
|
||||
|
||||
lastRequestTime = Date.now();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query Overpass API with failover support
|
||||
*/
|
||||
async function queryOverpass(query: string): Promise<any> {
|
||||
await enforceRateLimit();
|
||||
|
||||
let lastError: Error | null = null;
|
||||
|
||||
// Try each endpoint in order
|
||||
for (const endpoint of OVERPASS_ENDPOINTS) {
|
||||
try {
|
||||
console.log(`Querying Overpass API at ${endpoint}...`);
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'User-Agent': 'NearestMass/1.0 (https://catholicmass.net; church data import)',
|
||||
},
|
||||
body: `data=${encodeURIComponent(query)}`,
|
||||
});
|
||||
|
||||
if (response.status === 429) {
|
||||
console.warn(`Rate limited by ${endpoint}, waiting 60 seconds...`);
|
||||
await delay(60000);
|
||||
// Retry this endpoint
|
||||
const retryResponse = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'User-Agent': 'NearestMass/1.0 (https://catholicmass.net; church data import)',
|
||||
},
|
||||
body: `data=${encodeURIComponent(query)}`,
|
||||
});
|
||||
|
||||
if (!retryResponse.ok) {
|
||||
throw new Error(`HTTP ${retryResponse.status}: ${retryResponse.statusText}`);
|
||||
}
|
||||
|
||||
return await retryResponse.json();
|
||||
}
|
||||
|
||||
if (response.status === 504) {
|
||||
throw new Error('Gateway timeout - query too complex, try bounding box approach');
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log(`Successfully retrieved ${data.elements?.length || 0} elements`);
|
||||
return data;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Failed to query ${endpoint}:`, error);
|
||||
lastError = error as Error;
|
||||
// Continue to next endpoint
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`All Overpass endpoints failed. Last error: ${lastError?.message}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse OSM element to OSMChurch object
|
||||
*/
|
||||
function parseOSMElement(element: any): OSMChurch | null {
|
||||
const tags = element.tags || {};
|
||||
|
||||
// Must have a name
|
||||
if (!tags.name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Get coordinates (use center for ways and relations)
|
||||
let lat: number;
|
||||
let lng: number;
|
||||
|
||||
if (element.type === 'node') {
|
||||
lat = element.lat;
|
||||
lng = element.lon;
|
||||
} else if (element.center) {
|
||||
lat = element.center.lat;
|
||||
lng = element.center.lon;
|
||||
} else {
|
||||
// Skip elements without coordinates
|
||||
return null;
|
||||
}
|
||||
|
||||
// Build OSM ID
|
||||
const osmId = `${element.type}/${element.id}`;
|
||||
|
||||
// Extract address components
|
||||
const address = tags['addr:street']
|
||||
? `${tags['addr:housenumber'] || ''} ${tags['addr:street']}`.trim()
|
||||
: undefined;
|
||||
|
||||
const city = tags['addr:city'];
|
||||
const state = tags['addr:state'];
|
||||
const zip = tags['addr:postcode'];
|
||||
const country = tags['addr:country'];
|
||||
|
||||
// Phone (try multiple tags)
|
||||
const phone = tags.phone || tags['contact:phone'];
|
||||
|
||||
// Website (try multiple tags)
|
||||
const website = tags.website || tags['contact:website'];
|
||||
|
||||
// Diocese
|
||||
const diocese = tags.diocese;
|
||||
|
||||
// Wheelchair accessibility
|
||||
let wheelchairAccess: boolean | undefined;
|
||||
if (tags.wheelchair === 'yes') {
|
||||
wheelchairAccess = true;
|
||||
} else if (tags.wheelchair === 'no') {
|
||||
wheelchairAccess = false;
|
||||
}
|
||||
|
||||
// Service times (mass schedule in opening_hours format)
|
||||
const serviceTimes = tags.service_times || tags['service_times:catholic'] || undefined;
|
||||
|
||||
return {
|
||||
osmId,
|
||||
name: tags.name,
|
||||
lat,
|
||||
lng,
|
||||
address,
|
||||
city,
|
||||
state,
|
||||
zip,
|
||||
country,
|
||||
phone,
|
||||
website,
|
||||
diocese,
|
||||
wheelchairAccess,
|
||||
serviceTimes,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Query Overpass API for Catholic churches in a specific country
|
||||
* @param countryCode ISO 3166-1 alpha-2 country code (e.g., "US", "MX", "BR")
|
||||
* @returns Array of OSMChurch objects
|
||||
*/
|
||||
export async function queryOverpassByCountry(countryCode: string): Promise<OSMChurch[]> {
|
||||
// Build Overpass QL query
|
||||
const query = `
|
||||
[out:json][timeout:300];
|
||||
area["ISO3166-1"="${countryCode}"][admin_level=2]->.searchArea;
|
||||
(
|
||||
nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="catholic"](area.searchArea);
|
||||
nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="roman_catholic"](area.searchArea);
|
||||
);
|
||||
out center;
|
||||
`.trim();
|
||||
|
||||
console.log(`Querying Catholic churches in ${countryCode}...`);
|
||||
|
||||
try {
|
||||
const data = await queryOverpass(query);
|
||||
const churches: OSMChurch[] = [];
|
||||
|
||||
for (const element of data.elements || []) {
|
||||
const church = parseOSMElement(element);
|
||||
if (church) {
|
||||
churches.push(church);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Found ${churches.length} Catholic churches in ${countryCode}`);
|
||||
return churches;
|
||||
|
||||
} catch (error) {
|
||||
if ((error as Error).message.includes('Gateway timeout')) {
|
||||
console.warn(`Query timeout for ${countryCode}, falling back to bounding box approach...`);
|
||||
// Could implement bounding box fallback here if needed
|
||||
throw error;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Query Overpass API for Catholic churches in a bounding box
|
||||
* Useful for large countries or when country area queries timeout
|
||||
* @param south Southern latitude
|
||||
* @param west Western longitude
|
||||
* @param north Northern latitude
|
||||
* @param east Eastern longitude
|
||||
* @returns Array of OSMChurch objects
|
||||
*/
|
||||
export async function queryOverpassByBoundingBox(
|
||||
south: number,
|
||||
west: number,
|
||||
north: number,
|
||||
east: number
|
||||
): Promise<OSMChurch[]> {
|
||||
const query = `
|
||||
[out:json][timeout:300];
|
||||
(
|
||||
nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="catholic"](${south},${west},${north},${east});
|
||||
nwr["amenity"="place_of_worship"]["religion"="christian"]["denomination"="roman_catholic"](${south},${west},${north},${east});
|
||||
);
|
||||
out center;
|
||||
`.trim();
|
||||
|
||||
console.log(`Querying Catholic churches in bbox (${south},${west},${north},${east})...`);
|
||||
|
||||
const data = await queryOverpass(query);
|
||||
const churches: OSMChurch[] = [];
|
||||
|
||||
for (const element of data.elements || []) {
|
||||
const church = parseOSMElement(element);
|
||||
if (church) {
|
||||
churches.push(church);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Found ${churches.length} Catholic churches in bounding box`);
|
||||
return churches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query Overpass API for Catholic churches with automatic fallback to regional bounding boxes
|
||||
* Tries country-level query first, falls back to regions on timeout
|
||||
* @param countryCode ISO 3166-1 alpha-2 country code (e.g., "US", "MX", "BR")
|
||||
* @returns Array of OSMChurch objects (deduplicated by osmId)
|
||||
*/
|
||||
export async function queryOverpassByCountryWithFallback(countryCode: string): Promise<OSMChurch[]> {
|
||||
try {
|
||||
// Try country-level query first
|
||||
const churches = await queryOverpassByCountry(countryCode);
|
||||
|
||||
// If 0 results and we have bounding boxes, the country-level query may have
|
||||
// silently failed (e.g. Swiss mirror returned 0 after primary timed out)
|
||||
if (churches.length === 0 && COUNTRY_BOUNDING_BOXES[countryCode]) {
|
||||
console.log(`Country query returned 0 results for ${countryCode}, retrying with bounding boxes...`);
|
||||
throw new Error('Gateway timeout - query too complex, try bounding box approach');
|
||||
}
|
||||
|
||||
return churches;
|
||||
} catch (error) {
|
||||
// Check if it's a timeout and we have bounding boxes for this country
|
||||
if ((error as Error).message.includes('Gateway timeout')) {
|
||||
const regions = COUNTRY_BOUNDING_BOXES[countryCode];
|
||||
|
||||
if (!regions) {
|
||||
throw new Error(
|
||||
`Gateway timeout for ${countryCode} and no bounding boxes defined. ` +
|
||||
`Consider adding regional bounding boxes to COUNTRY_BOUNDING_BOXES in overpass-client.ts`
|
||||
);
|
||||
}
|
||||
|
||||
console.log(`Falling back to ${regions.length} regional queries for ${countryCode}...`);
|
||||
|
||||
const allChurches: OSMChurch[] = [];
|
||||
const seenOsmIds = new Set<string>();
|
||||
|
||||
for (const region of regions) {
|
||||
console.log(`\nQuerying region: ${region.name}`);
|
||||
|
||||
const regionChurches = await queryOverpassByBoundingBox(
|
||||
region.south,
|
||||
region.west,
|
||||
region.north,
|
||||
region.east
|
||||
);
|
||||
|
||||
// Deduplicate by osmId (regions may overlap)
|
||||
let newChurches = 0;
|
||||
for (const church of regionChurches) {
|
||||
if (!seenOsmIds.has(church.osmId)) {
|
||||
seenOsmIds.add(church.osmId);
|
||||
allChurches.push(church);
|
||||
newChurches++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Added ${newChurches} new churches from ${region.name} (${regionChurches.length - newChurches} duplicates)`);
|
||||
|
||||
// Rate limiting already handled by queryOverpass, but add extra delay between regions
|
||||
if (regions.indexOf(region) < regions.length - 1) {
|
||||
await delay(2000);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nTotal churches found across all regions: ${allChurches.length}`);
|
||||
return allChurches;
|
||||
}
|
||||
|
||||
// Re-throw non-timeout errors
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user