#!/usr/bin/env tsx /** * Import Catholic churches from a specific region of a country * Usage: * npx tsx scripts/import-osm-region.ts --country GB --region "England South" * npx tsx scripts/import-osm-region.ts --country IT --region "North" --dry-run */ // Load .env for database connection (before importing anything that uses process.env) import dotenv from 'dotenv'; import path from 'path'; // Load .env.local first (production Neon URL), then .env (local fallback) dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); // Create a fresh Prisma client for this script (don't use cached pool from lib/db) import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { COUNTRY_BOUNDING_BOXES, queryOverpassByBoundingBox, type OSMChurch } from '../src/lib/overpass-client'; import { findDuplicateChurch, mergeChurchData } from '../src/lib/church-matcher'; interface ImportStats { osmChurchesFound: number; newChurchesInserted: number; existingUpdated: number; existingLinked: number; churchesWithWebsites: number; churchesWithoutWebsites: number; errors: number; } /** * Parse command line arguments */ function parseArgs(): { country?: string; region?: string; dryRun: boolean } { const args = process.argv.slice(2); const result = { country: undefined as string | undefined, region: undefined as string | undefined, dryRun: false, }; for (let i = 0; i < args.length; i++) { if (args[i] === '--country' && args[i + 1]) { result.country = args[i + 1].toUpperCase(); i++; } else if (args[i] === '--region' && args[i + 1]) { result.region = args[i + 1]; i++; } else if (args[i] === '--dry-run') { result.dryRun = true; } } return result; } /** * Import churches from a single region */ async function importFromRegion(countryCode: string, regionName: string, dryRun: boolean = false): Promise { const stats: ImportStats = { osmChurchesFound: 0, newChurchesInserted: 0, existingUpdated: 0, existingLinked: 0, churchesWithWebsites: 0, churchesWithoutWebsites: 0, errors: 0, }; console.log(`\n${'='.repeat(60)}`); console.log(`Importing from ${countryCode} - ${regionName}`); console.log(`${'='.repeat(60)}\n`); // Look up the bounding box const regions = COUNTRY_BOUNDING_BOXES[countryCode]; if (!regions) { console.error(`Error: No bounding boxes defined for country ${countryCode}`); console.error('Available countries:', Object.keys(COUNTRY_BOUNDING_BOXES).join(', ')); process.exit(1); } const region = regions.find(r => r.name === regionName); if (!region) { console.error(`Error: Region "${regionName}" not found for ${countryCode}`); console.error('Available regions:', regions.map(r => r.name).join(', ')); process.exit(1); } try { // Query Overpass API for this specific region console.log(`Querying bounding box: (${region.south}, ${region.west}, ${region.north}, ${region.east})`); const osmChurches = await queryOverpassByBoundingBox(region.south, region.west, region.north, region.east); stats.osmChurchesFound = osmChurches.length; if (osmChurches.length === 0) { console.log(`No churches found in ${regionName}`); return stats; } console.log(`Found ${osmChurches.length} Catholic churches in ${regionName}`); if (dryRun) { console.log('\n[DRY RUN] Would import the following churches:'); osmChurches.slice(0, 10).forEach((church) => { console.log(` - ${church.name} (${church.city || 'unknown city'})`); console.log(` OSM ID: ${church.osmId}, Website: ${church.website || 'none'}`); }); if (osmChurches.length > 10) { console.log(` ... and ${osmChurches.length - 10} more`); } // Count websites stats.churchesWithWebsites = osmChurches.filter((c) => c.website).length; stats.churchesWithoutWebsites = osmChurches.length - stats.churchesWithWebsites; return stats; } // Fetch all existing churches for deduplication console.log('Fetching existing churches for deduplication...'); const existingChurches = await prisma.church.findMany({ select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, source: true, website: true, phone: true, address: true, }, }); console.log(`Found ${existingChurches.length} existing churches in database`); // Process churches one by one (no batch transactions to avoid rollbacks) let processed = 0; for (const osmChurch of osmChurches) { try { // Check for duplicate const duplicate = findDuplicateChurch(osmChurch, existingChurches); if (duplicate && duplicate.osmId === osmChurch.osmId) { // Existing church with matching osmId - update it const mergedData = mergeChurchData(duplicate, osmChurch); // Verify the church exists in the database (not just in our temp list from this run) const existsInDb = await prisma.church.findUnique({ where: { id: duplicate.id } }); if (existsInDb) { await prisma.church.update({ where: { id: duplicate.id }, data: mergedData, }); stats.existingUpdated++; } else { // Duplicate from earlier in this run - skip (already processed) stats.existingUpdated++; } if (osmChurch.website) stats.churchesWithWebsites++; else stats.churchesWithoutWebsites++; } else if (duplicate) { // Existing church matched by proximity/name - link it with osmId const mergedData = mergeChurchData(duplicate, osmChurch); // Verify the church exists in the database (not just in our temp list from this run) const existsInDb = await prisma.church.findUnique({ where: { id: duplicate.id } }); if (existsInDb) { await prisma.church.update({ where: { id: duplicate.id }, data: mergedData, }); stats.existingLinked++; } else { // Duplicate from earlier in this run - skip (already processed) stats.existingLinked++; } if (osmChurch.website) stats.churchesWithWebsites++; else stats.churchesWithoutWebsites++; } else { // New church - insert it and capture the real ID const newChurch = await prisma.church.create({ data: { name: osmChurch.name, latitude: osmChurch.lat, longitude: osmChurch.lng, address: osmChurch.address, city: osmChurch.city, state: osmChurch.state, zip: osmChurch.zip, country: osmChurch.country || countryCode, phone: osmChurch.phone, website: osmChurch.website, diocese: osmChurch.diocese, wheelchairAccess: osmChurch.wheelchairAccess ?? false, source: 'osm', osmId: osmChurch.osmId, hasWebsite: !!osmChurch.website, osmLastSyncedAt: new Date(), }, }); stats.newChurchesInserted++; if (osmChurch.website) stats.churchesWithWebsites++; else stats.churchesWithoutWebsites++; // Add to existing churches list for future deduplication in this run (use real DB ID) existingChurches.push({ id: newChurch.id, name: osmChurch.name, latitude: osmChurch.lat, longitude: osmChurch.lng, osmId: osmChurch.osmId, baiduId: null, masstimesId: null, orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, source: 'osm', website: osmChurch.website || null, phone: osmChurch.phone || null, address: osmChurch.address || null, }); } processed++; // Log progress every 100 churches if (processed % 100 === 0) { console.log(`Progress: ${processed}/${osmChurches.length} churches processed`); } } catch (error) { console.error(`Error processing church ${osmChurch.name}:`, error); stats.errors++; } } console.log(`\nProcessed all ${osmChurches.length} churches from ${regionName}`); } catch (error) { console.error(`Failed to import from ${regionName}:`, error); stats.errors++; } return stats; } /** * Print import summary */ function printSummary(countryCode: string, regionName: string, stats: ImportStats, dryRun: boolean) { console.log(`\n${'='.repeat(60)}`); console.log(`Import Summary for ${countryCode} - ${regionName} ${dryRun ? '(DRY RUN)' : ''}`); console.log(`${'='.repeat(60)}`); console.log(`OSM churches found: ${stats.osmChurchesFound}`); if (!dryRun) { console.log(`New churches inserted: ${stats.newChurchesInserted}`); console.log(`Existing churches updated: ${stats.existingUpdated} (matched by osmId)`); console.log(`Existing churches linked: ${stats.existingLinked} (matched by proximity)`); } console.log(`Churches with websites: ${stats.churchesWithWebsites}`); console.log(`Churches without websites: ${stats.churchesWithoutWebsites}`); if (!dryRun && stats.errors > 0) { console.log(`Errors encountered: ${stats.errors}`); } console.log(`${'='.repeat(60)}\n`); } /** * Main function */ async function main() { const { country, region, dryRun } = parseArgs(); if (!country || !region) { console.error('Error: Must specify both --country and --region '); console.error('Usage:'); console.error(' npx tsx scripts/import-osm-region.ts --country GB --region "England South"'); console.error(' npx tsx scripts/import-osm-region.ts --country IT --region "North" --dry-run'); console.error('\nAvailable countries:', Object.keys(COUNTRY_BOUNDING_BOXES).join(', ')); process.exit(1); } if (dryRun) { console.log('\n*** DRY RUN MODE - No changes will be made to database ***\n'); } try { const stats = await importFromRegion(country, region, dryRun); printSummary(country, region, stats, dryRun); } catch (error) { console.error('Fatal error:', error); process.exit(1); } finally { await prisma.$disconnect(); } } main();