617 lines
22 KiB
TypeScript
617 lines
22 KiB
TypeScript
|
|
#!/usr/bin/env tsx
|
||
|
|
/**
|
||
|
|
* Import Catholic churches from OpenStreetMap
|
||
|
|
* Usage:
|
||
|
|
* npx tsx scripts/import-osm-churches.ts --country US
|
||
|
|
* npx tsx scripts/import-osm-churches.ts --all
|
||
|
|
* npx tsx scripts/import-osm-churches.ts --country MX --dry-run
|
||
|
|
* npx tsx scripts/import-osm-churches.ts --all --sort-by-count
|
||
|
|
*/
|
||
|
|
|
||
|
|
// Load .env for database connection (before importing anything that uses process.env)
|
||
|
|
import dotenv from 'dotenv';
|
||
|
|
import path from 'path';
|
||
|
|
|
||
|
|
// Load .env.local first (production Neon URL), then .env (local fallback)
|
||
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||
|
|
|
||
|
|
// Create a fresh Prisma client for this script (don't use cached pool from lib/db)
|
||
|
|
import { Pool } from 'pg';
|
||
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
||
|
|
import { PrismaClient } from '@prisma/client';
|
||
|
|
|
||
|
|
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||
|
|
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||
|
|
const pool = new Pool({
|
||
|
|
connectionString: dbUrl,
|
||
|
|
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined
|
||
|
|
});
|
||
|
|
const adapter = new PrismaPg(pool);
|
||
|
|
const prisma = new PrismaClient({ adapter });
|
||
|
|
import { queryOverpassByCountryWithFallback, type OSMChurch } from '../src/lib/overpass-client';
|
||
|
|
import { findDuplicateChurch, mergeChurchData } from '../src/lib/church-matcher';
|
||
|
|
import { parseServiceTimes } from '../src/lib/service-times-parser';
|
||
|
|
|
||
|
|
// Countries with significant Catholic populations, organized by priority
|
||
|
|
const CATHOLIC_COUNTRIES = {
|
||
|
|
// Priority 1: Large Catholic populations (North & South America + major European/Asian countries)
|
||
|
|
priority1: [
|
||
|
|
// North America
|
||
|
|
'US', 'MX', 'CA',
|
||
|
|
// South America
|
||
|
|
'BR', 'AR', 'CO', 'PE', 'VE', 'CL', 'EC', 'GT', 'CU', 'BO', 'DO', 'HT', 'HN', 'PY', 'SV', 'NI', 'CR', 'PA', 'UY', 'GY', 'SR', 'GF',
|
||
|
|
// Europe
|
||
|
|
'IT', 'FR', 'ES', 'PL', 'DE', 'PT', 'BE', 'CZ', 'AT', 'HU', 'IE', 'HR', 'GB',
|
||
|
|
// Asia & Oceania
|
||
|
|
'PH', 'AU', 'NG', 'CD',
|
||
|
|
],
|
||
|
|
// Priority 2: Medium Catholic populations
|
||
|
|
priority2: [
|
||
|
|
// Rest of Europe
|
||
|
|
'NL', 'SK', 'SI', 'LT', 'CH', 'LU', 'MT',
|
||
|
|
'UA', 'RO', 'LV', 'BY',
|
||
|
|
// Africa
|
||
|
|
'AO', 'UG', 'TZ', 'KE', 'CM', 'RW', 'BI', 'MG', 'MW', 'ZM', 'ZW', 'MZ', 'BJ', 'TG', 'CI', 'BF', 'ML', 'NE', 'SN', 'GN', 'LR', 'SL', 'GH', 'GA', 'CG', 'CF', 'TD', 'SD', 'ET', 'ER', 'SO',
|
||
|
|
// Asia
|
||
|
|
'IN', 'TL', 'VN', 'KR', 'JP', 'ID', 'MY', 'SG', 'TH', 'LA', 'KH', 'MM', 'CN', 'LK', 'BD', 'PK',
|
||
|
|
// Middle East
|
||
|
|
'LB', 'IL', 'PS', 'JO', 'SY', 'IQ',
|
||
|
|
// Oceania
|
||
|
|
'NZ', 'PG', 'FJ', 'NC', 'PF',
|
||
|
|
],
|
||
|
|
// Priority 3: Smaller Catholic presence
|
||
|
|
priority3: [
|
||
|
|
// Caribbean & Central America (smaller islands)
|
||
|
|
'BS', 'BB', 'JM', 'TT', 'GD', 'LC', 'VC', 'AG', 'DM', 'KN',
|
||
|
|
// Europe (smaller countries + Balkans/Eastern)
|
||
|
|
'MC', 'SM', 'VA', 'LI', 'AD',
|
||
|
|
'RS', 'BA', 'MK', 'AL', 'EE',
|
||
|
|
// Caucasus + Russia
|
||
|
|
'GE', 'AM', 'RU',
|
||
|
|
// Africa (rest)
|
||
|
|
'NA', 'BW', 'LS', 'SZ', 'MU', 'SC', 'KM', 'CV', 'ST', 'GQ', 'DJ', 'GM',
|
||
|
|
// Asia (rest)
|
||
|
|
'BT', 'NP', 'AF', 'KZ', 'UZ', 'TM', 'TJ', 'KG', 'MN', 'BN', 'MV',
|
||
|
|
// Oceania (rest)
|
||
|
|
'WS', 'TO', 'VU', 'SB', 'KI', 'NR', 'TV', 'FM', 'MH', 'PW',
|
||
|
|
],
|
||
|
|
};
|
||
|
|
|
||
|
|
interface ImportStats {
|
||
|
|
osmChurchesFound: number;
|
||
|
|
newChurchesInserted: number;
|
||
|
|
existingUpdated: number;
|
||
|
|
existingLinked: number;
|
||
|
|
churchesWithWebsites: number;
|
||
|
|
churchesWithoutWebsites: number;
|
||
|
|
churchesWithServiceTimes: number;
|
||
|
|
scheduleEntriesCreated: number;
|
||
|
|
errors: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Parse command line arguments
|
||
|
|
*/
|
||
|
|
function parseArgs(): { country?: string; all: boolean; dryRun: boolean; resumeFrom?: string; priority?: number; sortByCount: boolean } {
|
||
|
|
const args = process.argv.slice(2);
|
||
|
|
const result = {
|
||
|
|
country: undefined as string | undefined,
|
||
|
|
all: false,
|
||
|
|
dryRun: false,
|
||
|
|
resumeFrom: undefined as string | undefined,
|
||
|
|
priority: undefined as number | undefined,
|
||
|
|
sortByCount: false,
|
||
|
|
};
|
||
|
|
|
||
|
|
for (let i = 0; i < args.length; i++) {
|
||
|
|
if (args[i] === '--country' && args[i + 1]) {
|
||
|
|
result.country = args[i + 1].toUpperCase();
|
||
|
|
i++;
|
||
|
|
} else if (args[i] === '--all') {
|
||
|
|
result.all = true;
|
||
|
|
} else if (args[i] === '--dry-run') {
|
||
|
|
result.dryRun = true;
|
||
|
|
} else if (args[i] === '--resume-from' && args[i + 1]) {
|
||
|
|
result.resumeFrom = args[i + 1].toUpperCase();
|
||
|
|
i++;
|
||
|
|
} else if (args[i] === '--priority' && args[i + 1]) {
|
||
|
|
const priority = parseInt(args[i + 1], 10);
|
||
|
|
if (priority >= 1 && priority <= 3) {
|
||
|
|
result.priority = priority;
|
||
|
|
} else {
|
||
|
|
console.error('Error: --priority must be 1, 2, or 3');
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
i++;
|
||
|
|
} else if (args[i] === '--sort-by-count') {
|
||
|
|
result.sortByCount = true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Import churches from a single country
|
||
|
|
*/
|
||
|
|
async function importFromOSM(countryCode: string, dryRun: boolean = false): Promise<ImportStats> {
|
||
|
|
const stats: ImportStats = {
|
||
|
|
osmChurchesFound: 0,
|
||
|
|
newChurchesInserted: 0,
|
||
|
|
existingUpdated: 0,
|
||
|
|
existingLinked: 0,
|
||
|
|
churchesWithWebsites: 0,
|
||
|
|
churchesWithoutWebsites: 0,
|
||
|
|
churchesWithServiceTimes: 0,
|
||
|
|
scheduleEntriesCreated: 0,
|
||
|
|
errors: 0,
|
||
|
|
};
|
||
|
|
|
||
|
|
console.log(`\n${'='.repeat(60)}`);
|
||
|
|
console.log(`Importing Catholic churches from ${countryCode}`);
|
||
|
|
console.log(`${'='.repeat(60)}\n`);
|
||
|
|
|
||
|
|
try {
|
||
|
|
// Query Overpass API (with automatic fallback to regional bounding boxes)
|
||
|
|
const osmChurches = await queryOverpassByCountryWithFallback(countryCode);
|
||
|
|
stats.osmChurchesFound = osmChurches.length;
|
||
|
|
|
||
|
|
if (osmChurches.length === 0) {
|
||
|
|
console.log(`No churches found in ${countryCode}`);
|
||
|
|
return stats;
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Found ${osmChurches.length} Catholic churches in ${countryCode}`);
|
||
|
|
|
||
|
|
if (dryRun) {
|
||
|
|
console.log('\n[DRY RUN] Would import the following churches:');
|
||
|
|
osmChurches.slice(0, 10).forEach((church) => {
|
||
|
|
console.log(` - ${church.name} (${church.city || 'unknown city'})`);
|
||
|
|
console.log(` OSM ID: ${church.osmId}, Website: ${church.website || 'none'}`);
|
||
|
|
});
|
||
|
|
if (osmChurches.length > 10) {
|
||
|
|
console.log(` ... and ${osmChurches.length - 10} more`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Count websites
|
||
|
|
stats.churchesWithWebsites = osmChurches.filter((c) => c.website).length;
|
||
|
|
stats.churchesWithoutWebsites = osmChurches.length - stats.churchesWithWebsites;
|
||
|
|
|
||
|
|
return stats;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Fetch all existing churches for deduplication
|
||
|
|
// For large datasets, we could optimize by fetching only churches in the same country/region
|
||
|
|
console.log('Fetching existing churches for deduplication...');
|
||
|
|
const existingChurches = await prisma.church.findMany({
|
||
|
|
select: {
|
||
|
|
id: true,
|
||
|
|
name: true,
|
||
|
|
latitude: true,
|
||
|
|
longitude: true,
|
||
|
|
osmId: true,
|
||
|
|
baiduId: true,
|
||
|
|
masstimesId: true,
|
||
|
|
orarimesseId: true,
|
||
|
|
massSchedulesPhId: true,
|
||
|
|
philmassId: true,
|
||
|
|
horariosMisasId: true,
|
||
|
|
mszeInfoId: true,
|
||
|
|
weekdayMassesId: true,
|
||
|
|
messesInfoId: true,
|
||
|
|
bohosluzbyId: true,
|
||
|
|
miserendId: true,
|
||
|
|
kerknetId: true,
|
||
|
|
gottesdienstzeitenId: true,
|
||
|
|
discovermassId: true,
|
||
|
|
source: true,
|
||
|
|
website: true,
|
||
|
|
phone: true,
|
||
|
|
address: true,
|
||
|
|
},
|
||
|
|
});
|
||
|
|
console.log(`Found ${existingChurches.length} existing churches in database`);
|
||
|
|
|
||
|
|
// Process churches one by one (no batch transactions to avoid rollbacks)
|
||
|
|
let processed = 0;
|
||
|
|
|
||
|
|
for (const osmChurch of osmChurches) {
|
||
|
|
try {
|
||
|
|
// Check for duplicate
|
||
|
|
const duplicate = findDuplicateChurch(osmChurch, existingChurches);
|
||
|
|
|
||
|
|
if (duplicate && duplicate.osmId === osmChurch.osmId) {
|
||
|
|
// Existing church with matching osmId - update it
|
||
|
|
const mergedData = mergeChurchData(duplicate, osmChurch);
|
||
|
|
|
||
|
|
// Verify the church exists in the database (not just in our temp list from this run)
|
||
|
|
const existsInDb = await prisma.church.findUnique({ where: { id: duplicate.id } });
|
||
|
|
if (existsInDb) {
|
||
|
|
await prisma.church.update({
|
||
|
|
where: { id: duplicate.id },
|
||
|
|
data: mergedData,
|
||
|
|
});
|
||
|
|
stats.existingUpdated++;
|
||
|
|
|
||
|
|
// Import service_times for existing churches that don't have schedules yet
|
||
|
|
if (osmChurch.serviceTimes) {
|
||
|
|
const existingSchedules = await prisma.massSchedule.count({ where: { churchId: duplicate.id } });
|
||
|
|
if (existingSchedules === 0) {
|
||
|
|
const scheduleEntries = parseServiceTimes(osmChurch.serviceTimes);
|
||
|
|
if (scheduleEntries.length > 0) {
|
||
|
|
await prisma.massSchedule.createMany({
|
||
|
|
data: scheduleEntries.map(entry => ({
|
||
|
|
churchId: duplicate.id,
|
||
|
|
dayOfWeek: entry.dayOfWeek,
|
||
|
|
time: entry.time,
|
||
|
|
massType: entry.dayOfWeek === 0 ? 'Sunday' :
|
||
|
|
entry.dayOfWeek === 6 ? 'Saturday' : 'Daily',
|
||
|
|
language: 'Unknown',
|
||
|
|
notes: 'From OSM service_times tag',
|
||
|
|
})),
|
||
|
|
});
|
||
|
|
stats.churchesWithServiceTimes++;
|
||
|
|
stats.scheduleEntriesCreated += scheduleEntries.length;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// Duplicate from earlier in this run - skip (already processed)
|
||
|
|
stats.existingUpdated++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (osmChurch.website) stats.churchesWithWebsites++;
|
||
|
|
else stats.churchesWithoutWebsites++;
|
||
|
|
|
||
|
|
} else if (duplicate) {
|
||
|
|
// Existing church matched by proximity/name - link it with osmId
|
||
|
|
const mergedData = mergeChurchData(duplicate, osmChurch);
|
||
|
|
|
||
|
|
// Verify the church exists in the database (not just in our temp list from this run)
|
||
|
|
const existsInDb = await prisma.church.findUnique({ where: { id: duplicate.id } });
|
||
|
|
if (existsInDb) {
|
||
|
|
await prisma.church.update({
|
||
|
|
where: { id: duplicate.id },
|
||
|
|
data: mergedData,
|
||
|
|
});
|
||
|
|
stats.existingLinked++;
|
||
|
|
|
||
|
|
// Import service_times for linked churches that don't have schedules yet
|
||
|
|
if (osmChurch.serviceTimes) {
|
||
|
|
const existingSchedules = await prisma.massSchedule.count({ where: { churchId: duplicate.id } });
|
||
|
|
if (existingSchedules === 0) {
|
||
|
|
const scheduleEntries = parseServiceTimes(osmChurch.serviceTimes);
|
||
|
|
if (scheduleEntries.length > 0) {
|
||
|
|
await prisma.massSchedule.createMany({
|
||
|
|
data: scheduleEntries.map(entry => ({
|
||
|
|
churchId: duplicate.id,
|
||
|
|
dayOfWeek: entry.dayOfWeek,
|
||
|
|
time: entry.time,
|
||
|
|
massType: entry.dayOfWeek === 0 ? 'Sunday' :
|
||
|
|
entry.dayOfWeek === 6 ? 'Saturday' : 'Daily',
|
||
|
|
language: 'Unknown',
|
||
|
|
notes: 'From OSM service_times tag',
|
||
|
|
})),
|
||
|
|
});
|
||
|
|
stats.churchesWithServiceTimes++;
|
||
|
|
stats.scheduleEntriesCreated += scheduleEntries.length;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// Duplicate from earlier in this run - skip (already processed)
|
||
|
|
stats.existingLinked++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (osmChurch.website) stats.churchesWithWebsites++;
|
||
|
|
else stats.churchesWithoutWebsites++;
|
||
|
|
|
||
|
|
} else {
|
||
|
|
// New church - insert it and capture the real ID
|
||
|
|
const newChurch = await prisma.church.create({
|
||
|
|
data: {
|
||
|
|
name: osmChurch.name,
|
||
|
|
latitude: osmChurch.lat,
|
||
|
|
longitude: osmChurch.lng,
|
||
|
|
address: osmChurch.address,
|
||
|
|
city: osmChurch.city,
|
||
|
|
state: osmChurch.state,
|
||
|
|
zip: osmChurch.zip,
|
||
|
|
country: osmChurch.country || countryCode,
|
||
|
|
phone: osmChurch.phone,
|
||
|
|
website: osmChurch.website,
|
||
|
|
diocese: osmChurch.diocese,
|
||
|
|
wheelchairAccess: osmChurch.wheelchairAccess ?? false,
|
||
|
|
source: 'osm',
|
||
|
|
osmId: osmChurch.osmId,
|
||
|
|
hasWebsite: !!osmChurch.website,
|
||
|
|
osmLastSyncedAt: new Date(),
|
||
|
|
},
|
||
|
|
});
|
||
|
|
stats.newChurchesInserted++;
|
||
|
|
|
||
|
|
if (osmChurch.website) stats.churchesWithWebsites++;
|
||
|
|
else stats.churchesWithoutWebsites++;
|
||
|
|
|
||
|
|
// Parse service_times tag and create mass schedules
|
||
|
|
if (osmChurch.serviceTimes) {
|
||
|
|
const scheduleEntries = parseServiceTimes(osmChurch.serviceTimes);
|
||
|
|
if (scheduleEntries.length > 0) {
|
||
|
|
await prisma.massSchedule.createMany({
|
||
|
|
data: scheduleEntries.map(entry => ({
|
||
|
|
churchId: newChurch.id,
|
||
|
|
dayOfWeek: entry.dayOfWeek,
|
||
|
|
time: entry.time,
|
||
|
|
massType: entry.dayOfWeek === 0 ? 'Sunday' :
|
||
|
|
entry.dayOfWeek === 6 ? 'Saturday' : 'Daily',
|
||
|
|
language: 'Unknown',
|
||
|
|
notes: 'From OSM service_times tag',
|
||
|
|
})),
|
||
|
|
});
|
||
|
|
stats.churchesWithServiceTimes++;
|
||
|
|
stats.scheduleEntriesCreated += scheduleEntries.length;
|
||
|
|
|
||
|
|
// Mark as scraped since we have schedule data
|
||
|
|
await prisma.church.update({
|
||
|
|
where: { id: newChurch.id },
|
||
|
|
data: { lastScrapedAt: new Date() },
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Add to existing churches list for future deduplication in this run (use real DB ID)
|
||
|
|
existingChurches.push({
|
||
|
|
id: newChurch.id,
|
||
|
|
name: osmChurch.name,
|
||
|
|
latitude: osmChurch.lat,
|
||
|
|
longitude: osmChurch.lng,
|
||
|
|
osmId: osmChurch.osmId,
|
||
|
|
baiduId: null,
|
||
|
|
masstimesId: null,
|
||
|
|
orarimesseId: null,
|
||
|
|
massSchedulesPhId: null,
|
||
|
|
philmassId: null,
|
||
|
|
horariosMisasId: null,
|
||
|
|
mszeInfoId: null,
|
||
|
|
weekdayMassesId: null,
|
||
|
|
messesInfoId: null,
|
||
|
|
bohosluzbyId: null,
|
||
|
|
miserendId: null,
|
||
|
|
kerknetId: null,
|
||
|
|
gottesdienstzeitenId: null,
|
||
|
|
discovermassId: null,
|
||
|
|
source: 'osm',
|
||
|
|
website: osmChurch.website || null,
|
||
|
|
phone: osmChurch.phone || null,
|
||
|
|
address: osmChurch.address || null,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
processed++;
|
||
|
|
|
||
|
|
// Log progress every 500 churches
|
||
|
|
if (processed % 500 === 0) {
|
||
|
|
console.log(`Progress: ${processed}/${osmChurches.length} churches processed`);
|
||
|
|
}
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error(`Error processing church ${osmChurch.name}:`, error);
|
||
|
|
stats.errors++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`\nProcessed all ${osmChurches.length} churches from ${countryCode}`);
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
console.error(`Failed to import from ${countryCode}:`, error);
|
||
|
|
stats.errors++;
|
||
|
|
}
|
||
|
|
|
||
|
|
return stats;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Print import summary
|
||
|
|
*/
|
||
|
|
function printSummary(countryCode: string, stats: ImportStats, dryRun: boolean) {
|
||
|
|
console.log(`\n${'='.repeat(60)}`);
|
||
|
|
console.log(`Import Summary for ${countryCode} ${dryRun ? '(DRY RUN)' : ''}`);
|
||
|
|
console.log(`${'='.repeat(60)}`);
|
||
|
|
console.log(`OSM churches found: ${stats.osmChurchesFound}`);
|
||
|
|
|
||
|
|
if (!dryRun) {
|
||
|
|
console.log(`New churches inserted: ${stats.newChurchesInserted}`);
|
||
|
|
console.log(`Existing churches updated: ${stats.existingUpdated} (matched by osmId)`);
|
||
|
|
console.log(`Existing churches linked: ${stats.existingLinked} (matched by proximity)`);
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Churches with websites: ${stats.churchesWithWebsites}`);
|
||
|
|
console.log(`Churches without websites: ${stats.churchesWithoutWebsites}`);
|
||
|
|
|
||
|
|
if (!dryRun && stats.churchesWithServiceTimes > 0) {
|
||
|
|
console.log(`Churches with service_times: ${stats.churchesWithServiceTimes}`);
|
||
|
|
console.log(`Schedule entries created: ${stats.scheduleEntriesCreated}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!dryRun && stats.errors > 0) {
|
||
|
|
console.log(`Errors encountered: ${stats.errors}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`${'='.repeat(60)}\n`);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Main function
|
||
|
|
*/
|
||
|
|
async function createOrResumeJob(args: string[]): Promise<string | null> {
|
||
|
|
const jobIdIndex = args.indexOf('--job-id');
|
||
|
|
if (jobIdIndex !== -1) {
|
||
|
|
const jobId = args[jobIdIndex + 1];
|
||
|
|
await prisma.backgroundJob.update({
|
||
|
|
where: { id: jobId },
|
||
|
|
data: { status: 'running', startedAt: new Date() },
|
||
|
|
});
|
||
|
|
return jobId;
|
||
|
|
}
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function completeJob(jobId: string | null, error?: string): Promise<void> {
|
||
|
|
if (!jobId) return;
|
||
|
|
try {
|
||
|
|
await prisma.backgroundJob.update({
|
||
|
|
where: { id: jobId },
|
||
|
|
data: {
|
||
|
|
status: error ? 'failed' : 'completed',
|
||
|
|
error: error || null,
|
||
|
|
completedAt: new Date(),
|
||
|
|
},
|
||
|
|
});
|
||
|
|
} catch (err) {
|
||
|
|
console.error(`Failed to update job ${jobId}:`, err);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
async function main() {
|
||
|
|
const { country, all, dryRun, resumeFrom, priority, sortByCount } = parseArgs();
|
||
|
|
const jobId = await createOrResumeJob(process.argv.slice(2));
|
||
|
|
|
||
|
|
if (!country && !all && !priority) {
|
||
|
|
console.error('Error: Must specify --country <CODE>, --all, or --priority <1|2|3>');
|
||
|
|
console.error('Usage:');
|
||
|
|
console.error(' npx tsx scripts/import-osm-churches.ts --country US');
|
||
|
|
console.error(' npx tsx scripts/import-osm-churches.ts --all');
|
||
|
|
console.error(' npx tsx scripts/import-osm-churches.ts --priority 1');
|
||
|
|
console.error(' npx tsx scripts/import-osm-churches.ts --all --resume-from IT');
|
||
|
|
console.error(' npx tsx scripts/import-osm-churches.ts --country MX --dry-run');
|
||
|
|
console.error(' npx tsx scripts/import-osm-churches.ts --all --sort-by-count');
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (dryRun) {
|
||
|
|
console.log('\n*** DRY RUN MODE - No changes will be made to database ***\n');
|
||
|
|
}
|
||
|
|
|
||
|
|
try {
|
||
|
|
if (country) {
|
||
|
|
// Import single country
|
||
|
|
const stats = await importFromOSM(country, dryRun);
|
||
|
|
printSummary(country, stats, dryRun);
|
||
|
|
|
||
|
|
} else if (all || priority !== undefined) {
|
||
|
|
// Import all countries or specific priority
|
||
|
|
let allCountries: string[];
|
||
|
|
|
||
|
|
if (priority !== undefined) {
|
||
|
|
// Import only specified priority level
|
||
|
|
const priorityKey = `priority${priority}` as keyof typeof CATHOLIC_COUNTRIES;
|
||
|
|
allCountries = CATHOLIC_COUNTRIES[priorityKey];
|
||
|
|
console.log(`Importing Priority ${priority} countries (${allCountries.length} countries)...\n`);
|
||
|
|
} else {
|
||
|
|
// Import all priorities
|
||
|
|
console.log('Importing all Catholic countries by priority...\n');
|
||
|
|
allCountries = [
|
||
|
|
...CATHOLIC_COUNTRIES.priority1,
|
||
|
|
...CATHOLIC_COUNTRIES.priority2,
|
||
|
|
...CATHOLIC_COUNTRIES.priority3,
|
||
|
|
];
|
||
|
|
}
|
||
|
|
|
||
|
|
// Sort by existing OSM church count (least first) if requested
|
||
|
|
if (sortByCount) {
|
||
|
|
console.log('Querying DB for current OSM church counts per country...');
|
||
|
|
const countRows = await prisma.$queryRawUnsafe<Array<{ country: string; count: bigint }>>(
|
||
|
|
`SELECT country, COUNT(*) as count FROM churches WHERE source = 'osm' AND country IS NOT NULL GROUP BY country`
|
||
|
|
);
|
||
|
|
const countMap = new Map<string, number>();
|
||
|
|
for (const row of countRows) {
|
||
|
|
countMap.set(row.country, Number(row.count));
|
||
|
|
}
|
||
|
|
|
||
|
|
allCountries.sort((a, b) => (countMap.get(a) || 0) - (countMap.get(b) || 0));
|
||
|
|
|
||
|
|
console.log('Country processing order (least OSM churches first):');
|
||
|
|
for (const c of allCountries) {
|
||
|
|
console.log(` ${c}: ${countMap.get(c) || 0} existing OSM churches`);
|
||
|
|
}
|
||
|
|
console.log('');
|
||
|
|
}
|
||
|
|
|
||
|
|
// Handle --resume-from flag
|
||
|
|
if (resumeFrom) {
|
||
|
|
const resumeIndex = allCountries.indexOf(resumeFrom);
|
||
|
|
if (resumeIndex === -1) {
|
||
|
|
console.error(`Error: Country ${resumeFrom} not found in the list`);
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
console.log(`Resuming from ${resumeFrom} (skipping first ${resumeIndex} countries)...\n`);
|
||
|
|
allCountries = allCountries.slice(resumeIndex);
|
||
|
|
}
|
||
|
|
|
||
|
|
const totalStats: ImportStats = {
|
||
|
|
osmChurchesFound: 0,
|
||
|
|
newChurchesInserted: 0,
|
||
|
|
existingUpdated: 0,
|
||
|
|
existingLinked: 0,
|
||
|
|
churchesWithWebsites: 0,
|
||
|
|
churchesWithoutWebsites: 0,
|
||
|
|
churchesWithServiceTimes: 0,
|
||
|
|
scheduleEntriesCreated: 0,
|
||
|
|
errors: 0,
|
||
|
|
};
|
||
|
|
|
||
|
|
for (const countryCode of allCountries) {
|
||
|
|
const stats = await importFromOSM(countryCode, dryRun);
|
||
|
|
printSummary(countryCode, stats, dryRun);
|
||
|
|
|
||
|
|
// Aggregate stats
|
||
|
|
totalStats.osmChurchesFound += stats.osmChurchesFound;
|
||
|
|
totalStats.newChurchesInserted += stats.newChurchesInserted;
|
||
|
|
totalStats.existingUpdated += stats.existingUpdated;
|
||
|
|
totalStats.existingLinked += stats.existingLinked;
|
||
|
|
totalStats.churchesWithWebsites += stats.churchesWithWebsites;
|
||
|
|
totalStats.churchesWithoutWebsites += stats.churchesWithoutWebsites;
|
||
|
|
totalStats.churchesWithServiceTimes += stats.churchesWithServiceTimes;
|
||
|
|
totalStats.scheduleEntriesCreated += stats.scheduleEntriesCreated;
|
||
|
|
totalStats.errors += stats.errors;
|
||
|
|
|
||
|
|
// Small delay between countries to be respectful (rate limiting is also in the client)
|
||
|
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||
|
|
}
|
||
|
|
|
||
|
|
// Print overall summary
|
||
|
|
console.log(`\n${'='.repeat(60)}`);
|
||
|
|
console.log(`OVERALL SUMMARY ${dryRun ? '(DRY RUN)' : ''}`);
|
||
|
|
console.log(`${'='.repeat(60)}`);
|
||
|
|
console.log(`Total countries processed: ${allCountries.length}`);
|
||
|
|
console.log(`Total OSM churches found: ${totalStats.osmChurchesFound}`);
|
||
|
|
|
||
|
|
if (!dryRun) {
|
||
|
|
console.log(`Total new churches inserted: ${totalStats.newChurchesInserted}`);
|
||
|
|
console.log(`Total churches updated: ${totalStats.existingUpdated}`);
|
||
|
|
console.log(`Total churches linked: ${totalStats.existingLinked}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`Total with websites: ${totalStats.churchesWithWebsites}`);
|
||
|
|
console.log(`Total without websites: ${totalStats.churchesWithoutWebsites}`);
|
||
|
|
|
||
|
|
if (!dryRun && totalStats.errors > 0) {
|
||
|
|
console.log(`Total errors: ${totalStats.errors}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log(`${'='.repeat(60)}\n`);
|
||
|
|
}
|
||
|
|
|
||
|
|
await completeJob(jobId);
|
||
|
|
} catch (error) {
|
||
|
|
console.error('Fatal error:', error);
|
||
|
|
await completeJob(jobId, String(error));
|
||
|
|
process.exit(1);
|
||
|
|
} finally {
|
||
|
|
await prisma.$disconnect();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
main();
|