feat: add Nominatim forward geocoding script
Enriches churches with lat/lng=0 using Nominatim search API. Cleans trailing city/country suffixes from addresses before querying. Maps HK/MO to 'cn' countrycodes (OSM treats them as part of China). After this runs, enrich-with-reverse-geocode fills city/state fields. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
188
scripts/enrich-with-forward-geocode.ts
Normal file
188
scripts/enrich-with-forward-geocode.ts
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
#!/usr/bin/env tsx
|
||||||
|
/**
|
||||||
|
* Enrich churches that have lat/lng=0 with real coordinates via Nominatim forward geocoding.
|
||||||
|
* After this runs, enrich-with-reverse-geocode fills city/state from the new coordinates.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx scripts/enrich-with-forward-geocode.ts --country HK --dry-run
|
||||||
|
* npx tsx scripts/enrich-with-forward-geocode.ts --country HK
|
||||||
|
* npx tsx scripts/enrich-with-forward-geocode.ts --limit 10
|
||||||
|
*
|
||||||
|
* Rate limit: 1 request/second (Nominatim usage policy — mandatory).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import path from 'path';
|
||||||
|
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||||||
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { PrismaPg } from '@prisma/adapter-pg';
|
||||||
|
import { PrismaClient } from '@prisma/client';
|
||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||||||
|
const pool = new Pool({
|
||||||
|
connectionString: dbUrl,
|
||||||
|
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
||||||
|
});
|
||||||
|
const adapter = new PrismaPg(pool);
|
||||||
|
const prisma = new PrismaClient({ adapter });
|
||||||
|
|
||||||
|
const NOMINATIM_SEARCH_URL = 'https://nominatim.openstreetmap.org/search';
|
||||||
|
const RATE_LIMIT_MS = 1100;
|
||||||
|
|
||||||
|
// Some regions use a different ISO code in OSM than in our DB
|
||||||
|
const NOMINATIM_COUNTRY_MAP: Record<string, string> = {
|
||||||
|
HK: 'cn', // Hong Kong is part of China in OSM
|
||||||
|
MO: 'cn', // Macau likewise
|
||||||
|
};
|
||||||
|
|
||||||
|
interface ChurchRecord {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
address: string;
|
||||||
|
country: string;
|
||||||
|
city: string | null;
|
||||||
|
state: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface NominatimSearchResult {
|
||||||
|
lat: string;
|
||||||
|
lon: string;
|
||||||
|
display_name: string;
|
||||||
|
address?: {
|
||||||
|
city?: string;
|
||||||
|
town?: string;
|
||||||
|
village?: string;
|
||||||
|
municipality?: string;
|
||||||
|
state?: string;
|
||||||
|
province?: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function log(msg: string) {
|
||||||
|
console.log(`[${new Date().toISOString()}] ${msg}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanAddress(address: string): string {
|
||||||
|
return address
|
||||||
|
.replace(/,?\s*(H\.K\.|HK|Hong Kong|Kowloon|New Territories|N\.T\.)\.?\s*$/i, '')
|
||||||
|
.replace(/\.$/, '')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function forwardGeocode(address: string, countryCode: string): Promise<NominatimSearchResult | null> {
|
||||||
|
const nominatimCountry = NOMINATIM_COUNTRY_MAP[countryCode] ?? countryCode.toLowerCase();
|
||||||
|
const cleanedAddress = cleanAddress(address);
|
||||||
|
const response = await axios.get(NOMINATIM_SEARCH_URL, {
|
||||||
|
params: {
|
||||||
|
q: cleanedAddress,
|
||||||
|
format: 'json',
|
||||||
|
limit: 1,
|
||||||
|
countrycodes: nominatimCountry,
|
||||||
|
addressdetails: 1,
|
||||||
|
},
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'NearestMass/1.0 (privacy@nearestmass.com)',
|
||||||
|
'Accept-Language': 'en',
|
||||||
|
},
|
||||||
|
timeout: 15000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const results: NominatimSearchResult[] = response.data;
|
||||||
|
return results.length > 0 ? results[0] : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
const dryRun = args.includes('--dry-run');
|
||||||
|
const countryIdx = args.indexOf('--country');
|
||||||
|
const limitIdx = args.indexOf('--limit');
|
||||||
|
const countryCode = countryIdx !== -1 ? args[countryIdx + 1] : undefined;
|
||||||
|
const limit = limitIdx !== -1 ? parseInt(args[limitIdx + 1], 10) : undefined;
|
||||||
|
|
||||||
|
log('============================================================');
|
||||||
|
log('Nominatim Forward Geocode Enrichment');
|
||||||
|
log('============================================================');
|
||||||
|
log(`Country: ${countryCode || 'All'}`);
|
||||||
|
log(`Limit: ${limit || 'No limit'}`);
|
||||||
|
log(`Dry run: ${dryRun ? 'Yes' : 'No'}`);
|
||||||
|
log('============================================================');
|
||||||
|
|
||||||
|
const churches = await prisma.church.findMany({
|
||||||
|
where: {
|
||||||
|
latitude: 0,
|
||||||
|
longitude: 0,
|
||||||
|
address: { not: null },
|
||||||
|
...(countryCode ? { country: countryCode } : {}),
|
||||||
|
},
|
||||||
|
select: { id: true, name: true, address: true, country: true, city: true, state: true },
|
||||||
|
orderBy: { createdAt: 'asc' },
|
||||||
|
take: limit,
|
||||||
|
}) as ChurchRecord[];
|
||||||
|
|
||||||
|
log(`Found ${churches.length} churches with lat/lng=0 and an address\n`);
|
||||||
|
|
||||||
|
const stats = { found: 0, notFound: 0, errors: 0 };
|
||||||
|
|
||||||
|
for (const church of churches) {
|
||||||
|
try {
|
||||||
|
const result = await forwardGeocode(church.address, church.country);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
log(` - [NOT FOUND] ${church.name} | ${church.address}`);
|
||||||
|
stats.notFound++;
|
||||||
|
} else {
|
||||||
|
const lat = parseFloat(result.lat);
|
||||||
|
const lng = parseFloat(result.lon);
|
||||||
|
const city = result.address?.city || result.address?.town ||
|
||||||
|
result.address?.village || result.address?.municipality || null;
|
||||||
|
const state = result.address?.state || result.address?.province || null;
|
||||||
|
|
||||||
|
log(` + [FOUND] ${church.name}`);
|
||||||
|
log(` ${church.address}`);
|
||||||
|
log(` → ${lat}, ${lng}${city ? ` (${city})` : ''}`);
|
||||||
|
|
||||||
|
if (!dryRun) {
|
||||||
|
const updateData: Record<string, unknown> = { latitude: lat, longitude: lng };
|
||||||
|
if (city && !church.city) updateData.city = city;
|
||||||
|
if (state && !church.state) updateData.state = state;
|
||||||
|
|
||||||
|
await prisma.church.update({
|
||||||
|
where: { id: church.id },
|
||||||
|
data: updateData,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.found++;
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
log(` ! [ERROR] ${church.name}: ${err.message}`);
|
||||||
|
stats.errors++;
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(RATE_LIMIT_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
log('');
|
||||||
|
log('============================================================');
|
||||||
|
log('Forward Geocode Summary');
|
||||||
|
log('============================================================');
|
||||||
|
log(`Found coords: ${stats.found}`);
|
||||||
|
log(`Not found: ${stats.notFound}`);
|
||||||
|
log(`Errors: ${stats.errors}`);
|
||||||
|
log('============================================================');
|
||||||
|
|
||||||
|
await prisma.$disconnect();
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(err => {
|
||||||
|
console.error('Fatal error:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user