From 3cf1465fb6974a19a94e4a55fa693dc29d21f6b7 Mon Sep 17 00:00:00 2001 From: albertfj114 Date: Fri, 3 Apr 2026 21:34:42 -0400 Subject: [PATCH] feat: add Nominatim forward geocoding script Enriches churches with lat/lng=0 using Nominatim search API. Cleans trailing city/country suffixes from addresses before querying. Maps HK/MO to 'cn' countrycodes (OSM treats them as part of China). After this runs, enrich-with-reverse-geocode fills city/state fields. Co-Authored-By: Claude Sonnet 4.6 --- scripts/enrich-with-forward-geocode.ts | 188 +++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 scripts/enrich-with-forward-geocode.ts diff --git a/scripts/enrich-with-forward-geocode.ts b/scripts/enrich-with-forward-geocode.ts new file mode 100644 index 0000000..bc26477 --- /dev/null +++ b/scripts/enrich-with-forward-geocode.ts @@ -0,0 +1,188 @@ +#!/usr/bin/env tsx +/** + * Enrich churches that have lat/lng=0 with real coordinates via Nominatim forward geocoding. + * After this runs, enrich-with-reverse-geocode fills city/state from the new coordinates. + * + * Usage: + * npx tsx scripts/enrich-with-forward-geocode.ts --country HK --dry-run + * npx tsx scripts/enrich-with-forward-geocode.ts --country HK + * npx tsx scripts/enrich-with-forward-geocode.ts --limit 10 + * + * Rate limit: 1 request/second (Nominatim usage policy — mandatory). + */ + +import dotenv from 'dotenv'; +import path from 'path'; +dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); +dotenv.config({ path: path.resolve(process.cwd(), '.env') }); + +import { Pool } from 'pg'; +import { PrismaPg } from '@prisma/adapter-pg'; +import { PrismaClient } from '@prisma/client'; +import axios from 'axios'; + +const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; +const pool = new Pool({ + connectionString: dbUrl, + ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, +}); +const adapter = new PrismaPg(pool); +const prisma = new PrismaClient({ adapter }); + +const NOMINATIM_SEARCH_URL = 'https://nominatim.openstreetmap.org/search'; +const RATE_LIMIT_MS = 1100; + +// Some regions use a different ISO code in OSM than in our DB +const NOMINATIM_COUNTRY_MAP: Record = { + HK: 'cn', // Hong Kong is part of China in OSM + MO: 'cn', // Macau likewise +}; + +interface ChurchRecord { + id: string; + name: string; + address: string; + country: string; + city: string | null; + state: string | null; +} + +interface NominatimSearchResult { + lat: string; + lon: string; + display_name: string; + address?: { + city?: string; + town?: string; + village?: string; + municipality?: string; + state?: string; + province?: string; + }; +} + +function log(msg: string) { + console.log(`[${new Date().toISOString()}] ${msg}`); +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +function cleanAddress(address: string): string { + return address + .replace(/,?\s*(H\.K\.|HK|Hong Kong|Kowloon|New Territories|N\.T\.)\.?\s*$/i, '') + .replace(/\.$/, '') + .trim(); +} + +async function forwardGeocode(address: string, countryCode: string): Promise { + const nominatimCountry = NOMINATIM_COUNTRY_MAP[countryCode] ?? countryCode.toLowerCase(); + const cleanedAddress = cleanAddress(address); + const response = await axios.get(NOMINATIM_SEARCH_URL, { + params: { + q: cleanedAddress, + format: 'json', + limit: 1, + countrycodes: nominatimCountry, + addressdetails: 1, + }, + headers: { + 'User-Agent': 'NearestMass/1.0 (privacy@nearestmass.com)', + 'Accept-Language': 'en', + }, + timeout: 15000, + }); + + const results: NominatimSearchResult[] = response.data; + return results.length > 0 ? results[0] : null; +} + +async function main() { + const args = process.argv.slice(2); + const dryRun = args.includes('--dry-run'); + const countryIdx = args.indexOf('--country'); + const limitIdx = args.indexOf('--limit'); + const countryCode = countryIdx !== -1 ? args[countryIdx + 1] : undefined; + const limit = limitIdx !== -1 ? parseInt(args[limitIdx + 1], 10) : undefined; + + log('============================================================'); + log('Nominatim Forward Geocode Enrichment'); + log('============================================================'); + log(`Country: ${countryCode || 'All'}`); + log(`Limit: ${limit || 'No limit'}`); + log(`Dry run: ${dryRun ? 'Yes' : 'No'}`); + log('============================================================'); + + const churches = await prisma.church.findMany({ + where: { + latitude: 0, + longitude: 0, + address: { not: null }, + ...(countryCode ? { country: countryCode } : {}), + }, + select: { id: true, name: true, address: true, country: true, city: true, state: true }, + orderBy: { createdAt: 'asc' }, + take: limit, + }) as ChurchRecord[]; + + log(`Found ${churches.length} churches with lat/lng=0 and an address\n`); + + const stats = { found: 0, notFound: 0, errors: 0 }; + + for (const church of churches) { + try { + const result = await forwardGeocode(church.address, church.country); + + if (!result) { + log(` - [NOT FOUND] ${church.name} | ${church.address}`); + stats.notFound++; + } else { + const lat = parseFloat(result.lat); + const lng = parseFloat(result.lon); + const city = result.address?.city || result.address?.town || + result.address?.village || result.address?.municipality || null; + const state = result.address?.state || result.address?.province || null; + + log(` + [FOUND] ${church.name}`); + log(` ${church.address}`); + log(` → ${lat}, ${lng}${city ? ` (${city})` : ''}`); + + if (!dryRun) { + const updateData: Record = { latitude: lat, longitude: lng }; + if (city && !church.city) updateData.city = city; + if (state && !church.state) updateData.state = state; + + await prisma.church.update({ + where: { id: church.id }, + data: updateData, + }); + } + + stats.found++; + } + } catch (err: any) { + log(` ! [ERROR] ${church.name}: ${err.message}`); + stats.errors++; + } + + await sleep(RATE_LIMIT_MS); + } + + log(''); + log('============================================================'); + log('Forward Geocode Summary'); + log('============================================================'); + log(`Found coords: ${stats.found}`); + log(`Not found: ${stats.notFound}`); + log(`Errors: ${stats.errors}`); + log('============================================================'); + + await prisma.$disconnect(); + await pool.end(); +} + +main().catch(err => { + console.error('Fatal error:', err); + process.exit(1); +});