feat: add discovermassId to church-matcher ExistingChurch and ChurchCandidate

Add discovermassId field to ExistingChurch interface and ChurchCandidate type,
insert a dedicated matching pass in findDuplicateChurch, and update all 15 importer
push blocks plus 16 loadExistingChurches select queries to include the new field.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-03-11 06:52:05 -04:00
parent 2706708c51
commit a046928ed0
17 changed files with 11576 additions and 0 deletions

View File

@@ -0,0 +1,325 @@
#!/usr/bin/env tsx
/**
* Import Catholic churches from Baidu Maps (China)
* Usage:
* npx tsx scripts/import-baidu-churches.ts
* npx tsx scripts/import-baidu-churches.ts --dry-run
* npx tsx scripts/import-baidu-churches.ts --resume-from-cell 100
* npx tsx scripts/import-baidu-churches.ts --job-id <uuid>
*/
import dotenv from 'dotenv';
import path from 'path';
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
import { Pool } from 'pg';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
const pool = new Pool({
connectionString: dbUrl,
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
});
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
import { queryBaiduByGrid, type BaiduChurch } from '../src/lib/baidu-client';
import { findDuplicateChurch, mergeBaiduData, type ExistingChurch } from '../src/lib/church-matcher';
interface ImportStats {
baiduChurchesFound: number;
newChurchesInserted: number;
existingUpdated: number;
existingLinked: number;
errors: number;
}
function parseArgs(): { dryRun: boolean; resumeFromCell: number; jobId?: string } {
const args = process.argv.slice(2);
const result = {
dryRun: false,
resumeFromCell: 0,
jobId: undefined as string | undefined,
};
for (let i = 0; i < args.length; i++) {
if (args[i] === '--dry-run') {
result.dryRun = true;
} else if (args[i] === '--resume-from-cell' && args[i + 1]) {
result.resumeFromCell = parseInt(args[i + 1], 10);
i++;
} else if (args[i] === '--job-id' && args[i + 1]) {
result.jobId = args[i + 1];
i++;
}
}
return result;
}
async function createOrResumeJob(jobId?: string): Promise<string | null> {
if (jobId) {
await prisma.backgroundJob.update({
where: { id: jobId },
data: { status: 'running', startedAt: new Date() },
});
return jobId;
}
return null;
}
async function completeJob(jobId: string | null, error?: string): Promise<void> {
if (!jobId) return;
try {
await prisma.backgroundJob.update({
where: { id: jobId },
data: {
status: error ? 'failed' : 'completed',
error: error || null,
completedAt: new Date(),
},
});
} catch (err) {
console.error(`Failed to update job ${jobId}:`, err);
}
}
async function updateJobProgress(jobId: string | null, stats: ImportStats, totalCells: number, currentCell: number): Promise<void> {
if (!jobId) return;
try {
await prisma.backgroundJob.update({
where: { id: jobId },
data: {
totalItems: totalCells,
processed: currentCell,
succeeded: stats.newChurchesInserted + stats.existingUpdated + stats.existingLinked,
failed: stats.errors,
itemsFound: stats.baiduChurchesFound,
},
});
} catch (err) {
// Non-fatal — just log it
console.error(`Failed to update job progress:`, err);
}
}
async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: string | null): Promise<ImportStats> {
const stats: ImportStats = {
baiduChurchesFound: 0,
newChurchesInserted: 0,
existingUpdated: 0,
existingLinked: 0,
errors: 0,
};
const apiKey = process.env.BAIDU_MAPS_API_KEY;
if (!apiKey) {
throw new Error('Missing BAIDU_MAPS_API_KEY environment variable');
}
console.log(`\n${'='.repeat(60)}`);
console.log(`Importing Catholic churches from Baidu Maps (China)`);
console.log(`${'='.repeat(60)}\n`);
// Step 1: Query Baidu API
console.log('Step 1: Querying Baidu Maps API...');
const baiduChurches = await queryBaiduByGrid(
apiKey,
(progress) => {
updateJobProgress(jobId, stats, progress.totalCells, progress.cellIndex);
},
resumeFromCell,
);
stats.baiduChurchesFound = baiduChurches.length;
console.log(`\nFound ${baiduChurches.length} churches from Baidu Maps`);
if (baiduChurches.length === 0) {
console.log('No churches found');
return stats;
}
if (dryRun) {
console.log('\n[DRY RUN] Would import the following churches:');
baiduChurches.slice(0, 20).forEach((church) => {
console.log(` - ${church.name} (${church.city || church.province || 'unknown'})`);
console.log(` Baidu ID: ${church.baiduId}, Coords: ${church.lat.toFixed(4)}, ${church.lng.toFixed(4)}`);
});
if (baiduChurches.length > 20) {
console.log(` ... and ${baiduChurches.length - 20} more`);
}
return stats;
}
// Step 2: Load existing churches in China for deduplication
console.log('\nStep 2: Loading existing churches in China for deduplication...');
const existingChurches: ExistingChurch[] = await prisma.church.findMany({
where: { country: 'CN' },
select: {
id: true,
name: true,
latitude: true,
longitude: true,
osmId: true,
baiduId: true,
masstimesId: true,
orarimesseId: true,
massSchedulesPhId: true,
philmassId: true,
horariosMisasId: true,
mszeInfoId: true,
weekdayMassesId: true,
messesInfoId: true,
bohosluzbyId: true,
miserendId: true,
kerknetId: true,
gottesdienstzeitenId: true,
discovermassId: true,
source: true,
website: true,
phone: true,
address: true,
},
});
console.log(`Found ${existingChurches.length} existing churches in China`);
// Step 3: Process each Baidu church
console.log('\nStep 3: Processing churches...');
let processed = 0;
for (const baiduChurch of baiduChurches) {
try {
const candidate = {
name: baiduChurch.name,
lat: baiduChurch.lat,
lng: baiduChurch.lng,
baiduId: baiduChurch.baiduId,
};
const duplicate = findDuplicateChurch(candidate, existingChurches);
if (duplicate && duplicate.baiduId === baiduChurch.baiduId) {
// Existing church with matching baiduId — update it
const mergedData = mergeBaiduData(duplicate, baiduChurch);
await prisma.church.update({
where: { id: duplicate.id },
data: mergedData,
});
stats.existingUpdated++;
} else if (duplicate) {
// Existing church matched by proximity/name — link it with baiduId
const mergedData = mergeBaiduData(duplicate, baiduChurch);
await prisma.church.update({
where: { id: duplicate.id },
data: mergedData,
});
stats.existingLinked++;
} else {
// New church — insert it
const newChurch = await prisma.church.create({
data: {
name: baiduChurch.name,
latitude: baiduChurch.lat,
longitude: baiduChurch.lng,
address: baiduChurch.address,
city: baiduChurch.city,
state: baiduChurch.province,
country: 'CN',
phone: baiduChurch.phone,
website: baiduChurch.website,
source: 'baidu',
baiduId: baiduChurch.baiduId,
baiduLastSyncedAt: new Date(),
hasWebsite: !!baiduChurch.website,
},
});
stats.newChurchesInserted++;
// Add to existing churches list for dedup within this run
existingChurches.push({
id: newChurch.id,
name: baiduChurch.name,
latitude: baiduChurch.lat,
longitude: baiduChurch.lng,
osmId: null,
baiduId: baiduChurch.baiduId,
masstimesId: null,
orarimesseId: null,
massSchedulesPhId: null,
philmassId: null,
horariosMisasId: null,
mszeInfoId: null,
weekdayMassesId: null,
messesInfoId: null,
bohosluzbyId: null,
miserendId: null,
kerknetId: null,
gottesdienstzeitenId: null,
discovermassId: null,
source: 'baidu',
website: baiduChurch.website || null,
phone: baiduChurch.phone || null,
address: baiduChurch.address || null,
});
}
processed++;
if (processed % 500 === 0) {
console.log(`Progress: ${processed}/${baiduChurches.length} churches processed`);
await updateJobProgress(jobId, stats, baiduChurches.length, processed);
}
} catch (error) {
console.error(`Error processing church ${baiduChurch.name} (${baiduChurch.baiduId}):`, error);
stats.errors++;
}
}
console.log(`\nProcessed all ${baiduChurches.length} churches`);
return stats;
}
function printSummary(stats: ImportStats, dryRun: boolean) {
console.log(`\n${'='.repeat(60)}`);
console.log(`Baidu Import Summary ${dryRun ? '(DRY RUN)' : ''}`);
console.log(`${'='.repeat(60)}`);
console.log(`Baidu churches found: ${stats.baiduChurchesFound}`);
if (!dryRun) {
console.log(`New churches inserted: ${stats.newChurchesInserted}`);
console.log(`Existing churches updated: ${stats.existingUpdated} (matched by baiduId)`);
console.log(`Existing churches linked: ${stats.existingLinked} (matched by proximity)`);
}
if (!dryRun && stats.errors > 0) {
console.log(`Errors encountered: ${stats.errors}`);
}
console.log(`${'='.repeat(60)}\n`);
}
async function main() {
const { dryRun, resumeFromCell, jobId: argJobId } = parseArgs();
const jobId = await createOrResumeJob(argJobId);
if (dryRun) {
console.log('\n*** DRY RUN MODE - No changes will be made to database ***\n');
}
try {
const stats = await importFromBaidu(dryRun, resumeFromCell, jobId);
printSummary(stats, dryRun);
await completeJob(jobId);
} catch (error) {
console.error('Fatal error:', error);
await completeJob(jobId, String(error));
process.exit(1);
} finally {
await prisma.$disconnect();
}
}
main();