345 lines
12 KiB
TypeScript
345 lines
12 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
||
|
|
/**
|
||
|
|
* Import Catholic churches from a specific region of a country
|
||
|
|
* Usage:
|
||
|
|
* npx tsx scripts/import-osm-region.ts --country GB --region "England South"
|
||
|
|
* npx tsx scripts/import-osm-region.ts --country IT --region "North" --dry-run
|
||
|
|
*/
|
||
|
|
|
||
|
|
// Load .env for database connection (before importing anything that uses process.env)
|
||
|
|
import dotenv from 'dotenv';
|
||
|
|
import path from 'path';
|
||
|
|
|
||
|
|
// Load .env.local first (production Neon URL), then .env (local fallback)
|
||
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||
|
|
|
||
|
|
// Create a fresh Prisma client for this script (don't use cached pool from lib/db)
|
||
|
|
import { Pool } from 'pg';
|
||
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
||
|
|
import { PrismaClient } from '@prisma/client';
|
||
|
|
|
||
|
|
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||
|
|
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||
|
|
const pool = new Pool({
|
||
|
|
connectionString: dbUrl,
|
||
|
|
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined
|
||
|
|
});
|
||
|
|
const adapter = new PrismaPg(pool);
|
||
|
|
const prisma = new PrismaClient({ adapter });
|
||
|
|
|
||
|
|
import { COUNTRY_BOUNDING_BOXES, queryOverpassByBoundingBox, type OSMChurch } from '../src/lib/overpass-client';
|
||
|
|
import { findDuplicateChurch, mergeChurchData } from '../src/lib/church-matcher';
|
||
|
|
|
||
|
|
interface ImportStats {
|
||
|
|
osmChurchesFound: number;
|
||
|
|
newChurchesInserted: number;
|
||
|
|
existingUpdated: number;
|
||
|
|
existingLinked: number;
|
||
|
|
churchesWithWebsites: number;
|
||
|
|
churchesWithoutWebsites: number;
|
||
|
|
errors: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Parse command line arguments
|
||
|
|
*/
|
||
|
|
function parseArgs(): { country?: string; region?: string; dryRun: boolean } {
|
||
|
|
const args = process.argv.slice(2);
|
||
|
|
const result = {
|
||
|
|
country: undefined as string | undefined,
|
||
|
|
region: undefined as string | undefined,
|
||
|
|
dryRun: false,
|
||
|
|
};
|
||
|
|
|
||
|
|
for (let i = 0; i < args.length; i++) {
|
||
|
|
if (args[i] === '--country' && args[i + 1]) {
|
||
|
|
result.country = args[i + 1].toUpperCase();
|
||
|
|
i++;
|
||
|
|
} else if (args[i] === '--region' && args[i + 1]) {
|
||
|
|
result.region = args[i + 1];
|
||
|
|
i++;
|
||
|
|
} else if (args[i] === '--dry-run') {
|
||
|
|
result.dryRun = true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Import churches from a single region
|
||
|
|
*/
|
||
|
|
async function importFromRegion(countryCode: string, regionName: string, dryRun: boolean = false): Promise<ImportStats> {
|
||
|
|
const stats: ImportStats = {
|
||
|
|
osmChurchesFound: 0,
|
||
|
|
newChurchesInserted: 0,
|
||
|
|
existingUpdated: 0,
|
||
|
|
existingLinked: 0,
|
||
|
|
churchesWithWebsites: 0,
|
||
|
|
churchesWithoutWebsites: 0,
|
||
|
|
errors: 0,
|
||
|
|
};
|
||
|
|
|
||
|
|
console.log(`\n${'='.repeat(60)}`);
|
||
|
|
console.log(`Importing from ${countryCode} - ${regionName}`);
|
||
|
|
console.log(`${'='.repeat(60)}\n`);
|
||
|
|
|
||
|
|
// Look up the bounding box
|
||
|
|
const regions = COUNTRY_BOUNDING_BOXES[countryCode];
|
||
|
|
if (!regions) {
|
||
|
|
console.error(`Error: No bounding boxes defined for country ${countryCode}`);
|
||
|
|
console.error('Available countries:', Object.keys(COUNTRY_BOUNDING_BOXES).join(', '));
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
const region = regions.find(r => r.name === regionName);
|
||
|
|
if (!region) {
|
||
|
|
console.error(`Error: Region "${regionName}" not found for ${countryCode}`);
|
||
|
|
console.error('Available regions:', regions.map(r => r.name).join(', '));
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
// Query Overpass API for this specific region
|
||
|
|
console.log(`Querying bounding box: (${region.south}, ${region.west}, ${region.north}, ${region.east})`);
|
||
|
|
const osmChurches = await queryOverpassByBoundingBox(region.south, region.west, region.north, region.east);
|
||
|
|
stats.osmChurchesFound = osmChurches.length;
|
||
|
|
|
||
|
|
if (osmChurches.length === 0) {
|
||
|
|
console.log(`No churches found in ${regionName}`);
|
||
|
|
return stats;
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Found ${osmChurches.length} Catholic churches in ${regionName}`);
|
||
|
|
|
||
|
|
if (dryRun) {
|
||
|
|
console.log('\n[DRY RUN] Would import the following churches:');
|
||
|
|
osmChurches.slice(0, 10).forEach((church) => {
|
||
|
|
console.log(` - ${church.name} (${church.city || 'unknown city'})`);
|
||
|
|
console.log(` OSM ID: ${church.osmId}, Website: ${church.website || 'none'}`);
|
||
|
|
});
|
||
|
|
if (osmChurches.length > 10) {
|
||
|
|
console.log(` ... and ${osmChurches.length - 10} more`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Count websites
|
||
|
|
stats.churchesWithWebsites = osmChurches.filter((c) => c.website).length;
|
||
|
|
stats.churchesWithoutWebsites = osmChurches.length - stats.churchesWithWebsites;
|
||
|
|
|
||
|
|
return stats;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Fetch all existing churches for deduplication
|
||
|
|
console.log('Fetching existing churches for deduplication...');
|
||
|
|
const existingChurches = await prisma.church.findMany({
|
||
|
|
select: {
|
||
|
|
id: true,
|
||
|
|
name: true,
|
||
|
|
latitude: true,
|
||
|
|
longitude: true,
|
||
|
|
osmId: true,
|
||
|
|
baiduId: true,
|
||
|
|
masstimesId: true,
|
||
|
|
orarimesseId: true,
|
||
|
|
massSchedulesPhId: true,
|
||
|
|
philmassId: true,
|
||
|
|
horariosMisasId: true,
|
||
|
|
mszeInfoId: true,
|
||
|
|
weekdayMassesId: true,
|
||
|
|
messesInfoId: true,
|
||
|
|
bohosluzbyId: true,
|
||
|
|
miserendId: true,
|
||
|
|
kerknetId: true,
|
||
|
|
gottesdienstzeitenId: true,
|
||
|
|
source: true,
|
||
|
|
website: true,
|
||
|
|
phone: true,
|
||
|
|
address: true,
|
||
|
|
},
|
||
|
|
});
|
||
|
|
console.log(`Found ${existingChurches.length} existing churches in database`);
|
||
|
|
|
||
|
|
// Process churches one by one (no batch transactions to avoid rollbacks)
|
||
|
|
let processed = 0;
|
||
|
|
|
||
|
|
for (const osmChurch of osmChurches) {
|
||
|
|
try {
|
||
|
|
// Check for duplicate
|
||
|
|
const duplicate = findDuplicateChurch(osmChurch, existingChurches);
|
||
|
|
|
||
|
|
if (duplicate && duplicate.osmId === osmChurch.osmId) {
|
||
|
|
// Existing church with matching osmId - update it
|
||
|
|
const mergedData = mergeChurchData(duplicate, osmChurch);
|
||
|
|
|
||
|
|
// Verify the church exists in the database (not just in our temp list from this run)
|
||
|
|
const existsInDb = await prisma.church.findUnique({ where: { id: duplicate.id } });
|
||
|
|
if (existsInDb) {
|
||
|
|
await prisma.church.update({
|
||
|
|
where: { id: duplicate.id },
|
||
|
|
data: mergedData,
|
||
|
|
});
|
||
|
|
stats.existingUpdated++;
|
||
|
|
} else {
|
||
|
|
// Duplicate from earlier in this run - skip (already processed)
|
||
|
|
stats.existingUpdated++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (osmChurch.website) stats.churchesWithWebsites++;
|
||
|
|
else stats.churchesWithoutWebsites++;
|
||
|
|
|
||
|
|
} else if (duplicate) {
|
||
|
|
// Existing church matched by proximity/name - link it with osmId
|
||
|
|
const mergedData = mergeChurchData(duplicate, osmChurch);
|
||
|
|
|
||
|
|
// Verify the church exists in the database (not just in our temp list from this run)
|
||
|
|
const existsInDb = await prisma.church.findUnique({ where: { id: duplicate.id } });
|
||
|
|
if (existsInDb) {
|
||
|
|
await prisma.church.update({
|
||
|
|
where: { id: duplicate.id },
|
||
|
|
data: mergedData,
|
||
|
|
});
|
||
|
|
stats.existingLinked++;
|
||
|
|
} else {
|
||
|
|
// Duplicate from earlier in this run - skip (already processed)
|
||
|
|
stats.existingLinked++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (osmChurch.website) stats.churchesWithWebsites++;
|
||
|
|
else stats.churchesWithoutWebsites++;
|
||
|
|
|
||
|
|
} else {
|
||
|
|
// New church - insert it and capture the real ID
|
||
|
|
const newChurch = await prisma.church.create({
|
||
|
|
data: {
|
||
|
|
name: osmChurch.name,
|
||
|
|
latitude: osmChurch.lat,
|
||
|
|
longitude: osmChurch.lng,
|
||
|
|
address: osmChurch.address,
|
||
|
|
city: osmChurch.city,
|
||
|
|
state: osmChurch.state,
|
||
|
|
zip: osmChurch.zip,
|
||
|
|
country: osmChurch.country || countryCode,
|
||
|
|
phone: osmChurch.phone,
|
||
|
|
website: osmChurch.website,
|
||
|
|
diocese: osmChurch.diocese,
|
||
|
|
wheelchairAccess: osmChurch.wheelchairAccess ?? false,
|
||
|
|
source: 'osm',
|
||
|
|
osmId: osmChurch.osmId,
|
||
|
|
hasWebsite: !!osmChurch.website,
|
||
|
|
osmLastSyncedAt: new Date(),
|
||
|
|
},
|
||
|
|
});
|
||
|
|
stats.newChurchesInserted++;
|
||
|
|
|
||
|
|
if (osmChurch.website) stats.churchesWithWebsites++;
|
||
|
|
else stats.churchesWithoutWebsites++;
|
||
|
|
|
||
|
|
// Add to existing churches list for future deduplication in this run (use real DB ID)
|
||
|
|
existingChurches.push({
|
||
|
|
id: newChurch.id,
|
||
|
|
name: osmChurch.name,
|
||
|
|
latitude: osmChurch.lat,
|
||
|
|
longitude: osmChurch.lng,
|
||
|
|
osmId: osmChurch.osmId,
|
||
|
|
baiduId: null,
|
||
|
|
masstimesId: null,
|
||
|
|
orarimesseId: null,
|
||
|
|
massSchedulesPhId: null,
|
||
|
|
philmassId: null,
|
||
|
|
horariosMisasId: null,
|
||
|
|
mszeInfoId: null,
|
||
|
|
weekdayMassesId: null,
|
||
|
|
messesInfoId: null,
|
||
|
|
bohosluzbyId: null,
|
||
|
|
miserendId: null,
|
||
|
|
kerknetId: null,
|
||
|
|
gottesdienstzeitenId: null,
|
||
|
|
source: 'osm',
|
||
|
|
website: osmChurch.website || null,
|
||
|
|
phone: osmChurch.phone || null,
|
||
|
|
address: osmChurch.address || null,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
processed++;
|
||
|
|
|
||
|
|
// Log progress every 100 churches
|
||
|
|
if (processed % 100 === 0) {
|
||
|
|
console.log(`Progress: ${processed}/${osmChurches.length} churches processed`);
|
||
|
|
}
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error(`Error processing church ${osmChurch.name}:`, error);
|
||
|
|
stats.errors++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`\nProcessed all ${osmChurches.length} churches from ${regionName}`);
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error(`Failed to import from ${regionName}:`, error);
|
||
|
|
stats.errors++;
|
||
|
|
}
|
||
|
|
|
||
|
|
return stats;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Print import summary
|
||
|
|
*/
|
||
|
|
function printSummary(countryCode: string, regionName: string, stats: ImportStats, dryRun: boolean) {
|
||
|
|
console.log(`\n${'='.repeat(60)}`);
|
||
|
|
console.log(`Import Summary for ${countryCode} - ${regionName} ${dryRun ? '(DRY RUN)' : ''}`);
|
||
|
|
console.log(`${'='.repeat(60)}`);
|
||
|
|
console.log(`OSM churches found: ${stats.osmChurchesFound}`);
|
||
|
|
|
||
|
|
if (!dryRun) {
|
||
|
|
console.log(`New churches inserted: ${stats.newChurchesInserted}`);
|
||
|
|
console.log(`Existing churches updated: ${stats.existingUpdated} (matched by osmId)`);
|
||
|
|
console.log(`Existing churches linked: ${stats.existingLinked} (matched by proximity)`);
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Churches with websites: ${stats.churchesWithWebsites}`);
|
||
|
|
console.log(`Churches without websites: ${stats.churchesWithoutWebsites}`);
|
||
|
|
|
||
|
|
if (!dryRun && stats.errors > 0) {
|
||
|
|
console.log(`Errors encountered: ${stats.errors}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`${'='.repeat(60)}\n`);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Main function
|
||
|
|
*/
|
||
|
|
async function main() {
|
||
|
|
const { country, region, dryRun } = parseArgs();
|
||
|
|
|
||
|
|
if (!country || !region) {
|
||
|
|
console.error('Error: Must specify both --country <CODE> and --region <NAME>');
|
||
|
|
console.error('Usage:');
|
||
|
|
console.error(' npx tsx scripts/import-osm-region.ts --country GB --region "England South"');
|
||
|
|
console.error(' npx tsx scripts/import-osm-region.ts --country IT --region "North" --dry-run');
|
||
|
|
console.error('\nAvailable countries:', Object.keys(COUNTRY_BOUNDING_BOXES).join(', '));
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (dryRun) {
|
||
|
|
console.log('\n*** DRY RUN MODE - No changes will be made to database ***\n');
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
const stats = await importFromRegion(country, region, dryRun);
|
||
|
|
printSummary(country, region, stats, dryRun);
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error('Fatal error:', error);
|
||
|
|
process.exit(1);
|
||
|
|
} finally {
|
||
|
|
await prisma.$disconnect();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
main();
|