#!/usr/bin/env tsx /** * Enrich churches that have lat/lng=0 with real coordinates via Nominatim forward geocoding. * After this runs, enrich-with-reverse-geocode fills city/state from the new coordinates. * * Usage: * npx tsx scripts/enrich-with-forward-geocode.ts --country HK --dry-run * npx tsx scripts/enrich-with-forward-geocode.ts --country HK * npx tsx scripts/enrich-with-forward-geocode.ts --limit 10 * * Rate limit: 1 request/second (Nominatim usage policy — mandatory). */ import dotenv from 'dotenv'; import path from 'path'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; import axios from 'axios'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); const NOMINATIM_SEARCH_URL = 'https://nominatim.openstreetmap.org/search'; const RATE_LIMIT_MS = 1100; // Some regions use a different ISO code in OSM than in our DB const NOMINATIM_COUNTRY_MAP: Record = { HK: 'cn', // Hong Kong is part of China in OSM MO: 'cn', // Macau likewise }; interface ChurchRecord { id: string; name: string; address: string; country: string; city: string | null; state: string | null; } interface NominatimSearchResult { lat: string; lon: string; display_name: string; address?: { city?: string; town?: string; village?: string; municipality?: string; state?: string; province?: string; }; } function log(msg: string) { console.log(`[${new Date().toISOString()}] ${msg}`); } function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } function cleanAddress(address: string): string { return address // Strip trailing city/region suffixes .replace(/,?\s*(H\.K\.|HK|Hong Kong|Kowloon|Kln\.|New Territories|N\.T\.|Lantau Island)\.?\s*$/i, '') // Strip "R.E." (Religious Education suffix used in HK addresses) .replace(/,?\s*R\.E\./i, '') .replace(/\.$/, '') .trim(); } /** * Fallback: strip any leading non-numeric institution name prefix and floor/unit designators, * returning just the street number onwards. Handles patterns like: * "Canossa School (H.K.) 8 Hoi Chak Street" → "8 Hoi Chak Street" * "G/F., Wai Ming Block, 111 Wing Hong Street" → "111 Wing Hong Street" * "3/F., Chi Wo Commercial Building, 20 Saigon Street" → "20 Saigon Street" */ function extractStreetAddress(address: string): string | null { // Find the first occurrence of a standalone number (house number) const match = address.match(/(?:^|,\s*)(\d+[A-Za-z]?(?:\s|,).*)/); if (!match) return null; const candidate = match[1].trim(); // Must be meaningfully shorter than the full address to be worth retrying return candidate.length < address.length * 0.9 ? cleanAddress(candidate) : null; } async function nominatimSearch(query: string, nominatimCountry: string): Promise { const response = await axios.get(NOMINATIM_SEARCH_URL, { params: { q: query, format: 'json', limit: 1, countrycodes: nominatimCountry, addressdetails: 1, }, headers: { 'User-Agent': 'NearestMass/1.0 (privacy@nearestmass.com)', 'Accept-Language': 'en', }, timeout: 15000, }); const results: NominatimSearchResult[] = response.data; return results.length > 0 ? results[0] : null; } async function forwardGeocode( address: string, countryCode: string ): Promise<{ result: NominatimSearchResult; usedFallback: boolean } | null> { const nominatimCountry = NOMINATIM_COUNTRY_MAP[countryCode] ?? countryCode.toLowerCase(); const cleaned = cleanAddress(address); const primary = await nominatimSearch(cleaned, nominatimCountry); if (primary) return { result: primary, usedFallback: false }; // Fallback: try just the street-number-onwards portion const streetOnly = extractStreetAddress(address); if (streetOnly && streetOnly !== cleaned) { await sleep(RATE_LIMIT_MS); // respect rate limit between retries const fallback = await nominatimSearch(streetOnly, nominatimCountry); if (fallback) return { result: fallback, usedFallback: true }; } return null; } async function main() { const args = process.argv.slice(2); const dryRun = args.includes('--dry-run'); const countryIdx = args.indexOf('--country'); const limitIdx = args.indexOf('--limit'); const countryCode = countryIdx !== -1 ? args[countryIdx + 1] : undefined; const limit = limitIdx !== -1 ? parseInt(args[limitIdx + 1], 10) : undefined; log('============================================================'); log('Nominatim Forward Geocode Enrichment'); log('============================================================'); log(`Country: ${countryCode || 'All'}`); log(`Limit: ${limit || 'No limit'}`); log(`Dry run: ${dryRun ? 'Yes' : 'No'}`); log('============================================================'); const churches = await prisma.church.findMany({ where: { latitude: 0, longitude: 0, address: { not: null }, ...(countryCode ? { country: countryCode } : {}), }, select: { id: true, name: true, address: true, country: true, city: true, state: true }, orderBy: { createdAt: 'asc' }, take: limit, }) as ChurchRecord[]; log(`Found ${churches.length} churches with lat/lng=0 and an address\n`); const stats = { found: 0, notFound: 0, errors: 0 }; for (const church of churches) { try { const geocoded = await forwardGeocode(church.address, church.country); if (!geocoded) { log(` - [NOT FOUND] ${church.name} | ${church.address}`); stats.notFound++; } else { const { result, usedFallback } = geocoded; const lat = parseFloat(result.lat); const lng = parseFloat(result.lon); const city = result.address?.city || result.address?.town || result.address?.village || result.address?.municipality || null; const state = result.address?.state || result.address?.province || null; log(` + [FOUND${usedFallback ? ' (fallback)' : ''}] ${church.name}`); log(` ${church.address}`); log(` → ${lat}, ${lng}${city ? ` (${city})` : ''}`); if (!dryRun) { const updateData: Record = { latitude: lat, longitude: lng }; if (city && !church.city) updateData.city = city; if (state && !church.state) updateData.state = state; await prisma.church.update({ where: { id: church.id }, data: updateData, }); } stats.found++; } } catch (err: any) { log(` ! [ERROR] ${church.name}: ${err.message}`); stats.errors++; } await sleep(RATE_LIMIT_MS); } log(''); log('============================================================'); log('Forward Geocode Summary'); log('============================================================'); log(`Found coords: ${stats.found}`); log(`Not found: ${stats.notFound}`); log(`Errors: ${stats.errors}`); log('============================================================'); await prisma.$disconnect(); await pool.end(); } main().catch(err => { console.error('Fatal error:', err); process.exit(1); });