#!/usr/bin/env tsx /** * Import Catholic churches from Baidu Maps (China) * Usage: * npx tsx scripts/import-baidu-churches.ts * npx tsx scripts/import-baidu-churches.ts --dry-run * npx tsx scripts/import-baidu-churches.ts --resume-from-cell 100 * npx tsx scripts/import-baidu-churches.ts --job-id */ import dotenv from 'dotenv'; import path from 'path'; dotenv.config({ path: path.resolve(process.cwd(), '.env.local') }); dotenv.config({ path: path.resolve(process.cwd(), '.env') }); import { Pool } from 'pg'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined, }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); import { queryBaiduByGrid, type BaiduChurch } from '../src/lib/baidu-client'; import { findDuplicateChurch, mergeBaiduData, type ExistingChurch } from '../src/lib/church-matcher'; interface ImportStats { baiduChurchesFound: number; newChurchesInserted: number; existingUpdated: number; existingLinked: number; errors: number; } function parseArgs(): { dryRun: boolean; resumeFromCell: number; jobId?: string } { const args = process.argv.slice(2); const result = { dryRun: false, resumeFromCell: 0, jobId: undefined as string | undefined, }; for (let i = 0; i < args.length; i++) { if (args[i] === '--dry-run') { result.dryRun = true; } else if (args[i] === '--resume-from-cell' && args[i + 1]) { result.resumeFromCell = parseInt(args[i + 1], 10); i++; } else if (args[i] === '--job-id' && args[i + 1]) { result.jobId = args[i + 1]; i++; } } return result; } async function createOrResumeJob(jobId?: string): Promise { if (jobId) { await prisma.backgroundJob.update({ where: { id: jobId }, data: { status: 'running', startedAt: new Date() }, }); return jobId; } return null; } async function completeJob(jobId: string | null, error?: string): Promise { if (!jobId) return; try { await prisma.backgroundJob.update({ where: { id: jobId }, data: { status: error ? 'failed' : 'completed', error: error || null, completedAt: new Date(), }, }); } catch (err) { console.error(`Failed to update job ${jobId}:`, err); } } async function updateJobProgress(jobId: string | null, stats: ImportStats, totalCells: number, currentCell: number): Promise { if (!jobId) return; try { await prisma.backgroundJob.update({ where: { id: jobId }, data: { totalItems: totalCells, processed: currentCell, succeeded: stats.newChurchesInserted + stats.existingUpdated + stats.existingLinked, failed: stats.errors, itemsFound: stats.baiduChurchesFound, }, }); } catch (err) { // Non-fatal — just log it console.error(`Failed to update job progress:`, err); } } async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: string | null): Promise { const stats: ImportStats = { baiduChurchesFound: 0, newChurchesInserted: 0, existingUpdated: 0, existingLinked: 0, errors: 0, }; const apiKey = process.env.BAIDU_MAPS_API_KEY; if (!apiKey) { throw new Error('Missing BAIDU_MAPS_API_KEY environment variable'); } console.log(`\n${'='.repeat(60)}`); console.log(`Importing Catholic churches from Baidu Maps (China)`); console.log(`${'='.repeat(60)}\n`); // Step 1: Query Baidu API console.log('Step 1: Querying Baidu Maps API...'); const baiduChurches = await queryBaiduByGrid( apiKey, (progress) => { updateJobProgress(jobId, stats, progress.totalCells, progress.cellIndex); }, resumeFromCell, ); stats.baiduChurchesFound = baiduChurches.length; console.log(`\nFound ${baiduChurches.length} churches from Baidu Maps`); if (baiduChurches.length === 0) { console.log('No churches found'); return stats; } if (dryRun) { console.log('\n[DRY RUN] Would import the following churches:'); baiduChurches.slice(0, 20).forEach((church) => { console.log(` - ${church.name} (${church.city || church.province || 'unknown'})`); console.log(` Baidu ID: ${church.baiduId}, Coords: ${church.lat.toFixed(4)}, ${church.lng.toFixed(4)}`); }); if (baiduChurches.length > 20) { console.log(` ... and ${baiduChurches.length - 20} more`); } return stats; } // Step 2: Load existing churches in China for deduplication console.log('\nStep 2: Loading existing churches in China for deduplication...'); const existingChurches: ExistingChurch[] = await prisma.church.findMany({ where: { country: 'CN' }, select: { id: true, name: true, latitude: true, longitude: true, osmId: true, baiduId: true, masstimesId: true, orarimesseId: true, massSchedulesPhId: true, philmassId: true, horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, messesInfoId: true, bohosluzbyId: true, miserendId: true, kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, source: true, website: true, phone: true, address: true, }, }); console.log(`Found ${existingChurches.length} existing churches in China`); // Step 3: Process each Baidu church console.log('\nStep 3: Processing churches...'); let processed = 0; for (const baiduChurch of baiduChurches) { try { const candidate = { name: baiduChurch.name, lat: baiduChurch.lat, lng: baiduChurch.lng, baiduId: baiduChurch.baiduId, }; const duplicate = findDuplicateChurch(candidate, existingChurches); if (duplicate && duplicate.baiduId === baiduChurch.baiduId) { // Existing church with matching baiduId — update it const mergedData = mergeBaiduData(duplicate, baiduChurch); await prisma.church.update({ where: { id: duplicate.id }, data: mergedData, }); stats.existingUpdated++; } else if (duplicate) { // Existing church matched by proximity/name — link it with baiduId const mergedData = mergeBaiduData(duplicate, baiduChurch); await prisma.church.update({ where: { id: duplicate.id }, data: mergedData, }); stats.existingLinked++; } else { // New church — insert it const newChurch = await prisma.church.create({ data: { name: baiduChurch.name, latitude: baiduChurch.lat, longitude: baiduChurch.lng, address: baiduChurch.address, city: baiduChurch.city, state: baiduChurch.province, country: 'CN', phone: baiduChurch.phone, website: baiduChurch.website, source: 'baidu', baiduId: baiduChurch.baiduId, baiduLastSyncedAt: new Date(), hasWebsite: !!baiduChurch.website, }, }); stats.newChurchesInserted++; // Add to existing churches list for dedup within this run existingChurches.push({ id: newChurch.id, name: baiduChurch.name, latitude: baiduChurch.lat, longitude: baiduChurch.lng, osmId: null, baiduId: baiduChurch.baiduId, masstimesId: null, orarimesseId: null, massSchedulesPhId: null, philmassId: null, horariosMisasId: null, mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null, source: 'baidu', website: baiduChurch.website || null, phone: baiduChurch.phone || null, address: baiduChurch.address || null, }); } processed++; if (processed % 500 === 0) { console.log(`Progress: ${processed}/${baiduChurches.length} churches processed`); await updateJobProgress(jobId, stats, baiduChurches.length, processed); } } catch (error) { console.error(`Error processing church ${baiduChurch.name} (${baiduChurch.baiduId}):`, error); stats.errors++; } } console.log(`\nProcessed all ${baiduChurches.length} churches`); return stats; } function printSummary(stats: ImportStats, dryRun: boolean) { console.log(`\n${'='.repeat(60)}`); console.log(`Baidu Import Summary ${dryRun ? '(DRY RUN)' : ''}`); console.log(`${'='.repeat(60)}`); console.log(`Baidu churches found: ${stats.baiduChurchesFound}`); if (!dryRun) { console.log(`New churches inserted: ${stats.newChurchesInserted}`); console.log(`Existing churches updated: ${stats.existingUpdated} (matched by baiduId)`); console.log(`Existing churches linked: ${stats.existingLinked} (matched by proximity)`); } if (!dryRun && stats.errors > 0) { console.log(`Errors encountered: ${stats.errors}`); } console.log(`${'='.repeat(60)}\n`); } async function main() { const { dryRun, resumeFromCell, jobId: argJobId } = parseArgs(); const jobId = await createOrResumeJob(argJobId); if (dryRun) { console.log('\n*** DRY RUN MODE - No changes will be made to database ***\n'); } try { const stats = await importFromBaidu(dryRun, resumeFromCell, jobId); printSummary(stats, dryRun); await completeJob(jobId); } catch (error) { console.error('Fatal error:', error); await completeJob(jobId, String(error)); process.exit(1); } finally { await prisma.$disconnect(); } } main();