- Remove discovermassId/buscarmisasNetworkId from findDuplicateChurch match passes (importers now do their own pre-check dedup); restore as optional fields on ExistingChurch to keep type/runtime in sync - Add HK bounding box to COUNTRY_BOUNDING_BOXES; fix silent 0-result fallback when country query returns empty from mirror server - discovermass importer: add --limit flag and skip-already-imported pre-check using importedSlugs set - Import scripts: remove discovermassId from ExistingChurch select/stubs (field not needed in shared matcher context) - Schema: reorder discovermassId/kerknetId/gottesdienstzeitenId fields Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
324 lines
9.8 KiB
TypeScript
324 lines
9.8 KiB
TypeScript
#!/usr/bin/env tsx
|
|
/**
|
|
* Import Catholic churches from Baidu Maps (China)
|
|
* Usage:
|
|
* npx tsx scripts/import-baidu-churches.ts
|
|
* npx tsx scripts/import-baidu-churches.ts --dry-run
|
|
* npx tsx scripts/import-baidu-churches.ts --resume-from-cell 100
|
|
* npx tsx scripts/import-baidu-churches.ts --job-id <uuid>
|
|
*/
|
|
|
|
import dotenv from 'dotenv';
|
|
import path from 'path';
|
|
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
|
|
|
import { Pool } from 'pg';
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
|
import { PrismaClient } from '@prisma/client';
|
|
|
|
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
|
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
|
const pool = new Pool({
|
|
connectionString: dbUrl,
|
|
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
|
});
|
|
const adapter = new PrismaPg(pool);
|
|
const prisma = new PrismaClient({ adapter });
|
|
|
|
import { queryBaiduByGrid, type BaiduChurch } from '../src/lib/baidu-client';
|
|
import { findDuplicateChurch, mergeBaiduData, type ExistingChurch } from '../src/lib/church-matcher';
|
|
|
|
interface ImportStats {
|
|
baiduChurchesFound: number;
|
|
newChurchesInserted: number;
|
|
existingUpdated: number;
|
|
existingLinked: number;
|
|
errors: number;
|
|
}
|
|
|
|
function parseArgs(): { dryRun: boolean; resumeFromCell: number; jobId?: string } {
|
|
const args = process.argv.slice(2);
|
|
const result = {
|
|
dryRun: false,
|
|
resumeFromCell: 0,
|
|
jobId: undefined as string | undefined,
|
|
};
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
if (args[i] === '--dry-run') {
|
|
result.dryRun = true;
|
|
} else if (args[i] === '--resume-from-cell' && args[i + 1]) {
|
|
result.resumeFromCell = parseInt(args[i + 1], 10);
|
|
i++;
|
|
} else if (args[i] === '--job-id' && args[i + 1]) {
|
|
result.jobId = args[i + 1];
|
|
i++;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
async function createOrResumeJob(jobId?: string): Promise<string | null> {
|
|
if (jobId) {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: jobId },
|
|
data: { status: 'running', startedAt: new Date() },
|
|
});
|
|
return jobId;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
async function completeJob(jobId: string | null, error?: string): Promise<void> {
|
|
if (!jobId) return;
|
|
try {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
status: error ? 'failed' : 'completed',
|
|
error: error || null,
|
|
completedAt: new Date(),
|
|
},
|
|
});
|
|
} catch (err) {
|
|
console.error(`Failed to update job ${jobId}:`, err);
|
|
}
|
|
}
|
|
|
|
async function updateJobProgress(jobId: string | null, stats: ImportStats, totalCells: number, currentCell: number): Promise<void> {
|
|
if (!jobId) return;
|
|
try {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
totalItems: totalCells,
|
|
processed: currentCell,
|
|
succeeded: stats.newChurchesInserted + stats.existingUpdated + stats.existingLinked,
|
|
failed: stats.errors,
|
|
itemsFound: stats.baiduChurchesFound,
|
|
},
|
|
});
|
|
} catch (err) {
|
|
// Non-fatal — just log it
|
|
console.error(`Failed to update job progress:`, err);
|
|
}
|
|
}
|
|
|
|
async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: string | null): Promise<ImportStats> {
|
|
const stats: ImportStats = {
|
|
baiduChurchesFound: 0,
|
|
newChurchesInserted: 0,
|
|
existingUpdated: 0,
|
|
existingLinked: 0,
|
|
errors: 0,
|
|
};
|
|
|
|
const apiKey = process.env.BAIDU_MAPS_API_KEY;
|
|
if (!apiKey) {
|
|
throw new Error('Missing BAIDU_MAPS_API_KEY environment variable');
|
|
}
|
|
|
|
console.log(`\n${'='.repeat(60)}`);
|
|
console.log(`Importing Catholic churches from Baidu Maps (China)`);
|
|
console.log(`${'='.repeat(60)}\n`);
|
|
|
|
// Step 1: Query Baidu API
|
|
console.log('Step 1: Querying Baidu Maps API...');
|
|
const baiduChurches = await queryBaiduByGrid(
|
|
apiKey,
|
|
(progress) => {
|
|
updateJobProgress(jobId, stats, progress.totalCells, progress.cellIndex);
|
|
},
|
|
resumeFromCell,
|
|
);
|
|
|
|
stats.baiduChurchesFound = baiduChurches.length;
|
|
console.log(`\nFound ${baiduChurches.length} churches from Baidu Maps`);
|
|
|
|
if (baiduChurches.length === 0) {
|
|
console.log('No churches found');
|
|
return stats;
|
|
}
|
|
|
|
if (dryRun) {
|
|
console.log('\n[DRY RUN] Would import the following churches:');
|
|
baiduChurches.slice(0, 20).forEach((church) => {
|
|
console.log(` - ${church.name} (${church.city || church.province || 'unknown'})`);
|
|
console.log(` Baidu ID: ${church.baiduId}, Coords: ${church.lat.toFixed(4)}, ${church.lng.toFixed(4)}`);
|
|
});
|
|
if (baiduChurches.length > 20) {
|
|
console.log(` ... and ${baiduChurches.length - 20} more`);
|
|
}
|
|
return stats;
|
|
}
|
|
|
|
// Step 2: Load existing churches in China for deduplication
|
|
console.log('\nStep 2: Loading existing churches in China for deduplication...');
|
|
const existingChurches: ExistingChurch[] = await prisma.church.findMany({
|
|
where: { country: 'CN' },
|
|
select: {
|
|
id: true,
|
|
name: true,
|
|
latitude: true,
|
|
longitude: true,
|
|
osmId: true,
|
|
baiduId: true,
|
|
masstimesId: true,
|
|
orarimesseId: true,
|
|
massSchedulesPhId: true,
|
|
philmassId: true,
|
|
horariosMisasId: true,
|
|
mszeInfoId: true,
|
|
weekdayMassesId: true,
|
|
messesInfoId: true,
|
|
bohosluzbyId: true,
|
|
miserendId: true,
|
|
kerknetId: true,
|
|
gottesdienstzeitenId: true,
|
|
source: true,
|
|
website: true,
|
|
phone: true,
|
|
address: true,
|
|
},
|
|
});
|
|
console.log(`Found ${existingChurches.length} existing churches in China`);
|
|
|
|
// Step 3: Process each Baidu church
|
|
console.log('\nStep 3: Processing churches...');
|
|
let processed = 0;
|
|
|
|
for (const baiduChurch of baiduChurches) {
|
|
try {
|
|
const candidate = {
|
|
name: baiduChurch.name,
|
|
lat: baiduChurch.lat,
|
|
lng: baiduChurch.lng,
|
|
baiduId: baiduChurch.baiduId,
|
|
};
|
|
|
|
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
|
|
|
if (duplicate && duplicate.baiduId === baiduChurch.baiduId) {
|
|
// Existing church with matching baiduId — update it
|
|
const mergedData = mergeBaiduData(duplicate, baiduChurch);
|
|
await prisma.church.update({
|
|
where: { id: duplicate.id },
|
|
data: mergedData,
|
|
});
|
|
stats.existingUpdated++;
|
|
} else if (duplicate) {
|
|
// Existing church matched by proximity/name — link it with baiduId
|
|
const mergedData = mergeBaiduData(duplicate, baiduChurch);
|
|
await prisma.church.update({
|
|
where: { id: duplicate.id },
|
|
data: mergedData,
|
|
});
|
|
stats.existingLinked++;
|
|
} else {
|
|
// New church — insert it
|
|
const newChurch = await prisma.church.create({
|
|
data: {
|
|
name: baiduChurch.name,
|
|
latitude: baiduChurch.lat,
|
|
longitude: baiduChurch.lng,
|
|
address: baiduChurch.address,
|
|
city: baiduChurch.city,
|
|
state: baiduChurch.province,
|
|
country: 'CN',
|
|
phone: baiduChurch.phone,
|
|
website: baiduChurch.website,
|
|
source: 'baidu',
|
|
baiduId: baiduChurch.baiduId,
|
|
baiduLastSyncedAt: new Date(),
|
|
hasWebsite: !!baiduChurch.website,
|
|
},
|
|
});
|
|
stats.newChurchesInserted++;
|
|
|
|
// Add to existing churches list for dedup within this run
|
|
existingChurches.push({
|
|
id: newChurch.id,
|
|
name: baiduChurch.name,
|
|
latitude: baiduChurch.lat,
|
|
longitude: baiduChurch.lng,
|
|
osmId: null,
|
|
baiduId: baiduChurch.baiduId,
|
|
masstimesId: null,
|
|
orarimesseId: null,
|
|
massSchedulesPhId: null,
|
|
philmassId: null,
|
|
horariosMisasId: null,
|
|
mszeInfoId: null,
|
|
weekdayMassesId: null,
|
|
messesInfoId: null,
|
|
bohosluzbyId: null,
|
|
miserendId: null,
|
|
kerknetId: null,
|
|
gottesdienstzeitenId: null,
|
|
source: 'baidu',
|
|
website: baiduChurch.website || null,
|
|
phone: baiduChurch.phone || null,
|
|
address: baiduChurch.address || null,
|
|
});
|
|
}
|
|
|
|
processed++;
|
|
if (processed % 500 === 0) {
|
|
console.log(`Progress: ${processed}/${baiduChurches.length} churches processed`);
|
|
await updateJobProgress(jobId, stats, baiduChurches.length, processed);
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error processing church ${baiduChurch.name} (${baiduChurch.baiduId}):`, error);
|
|
stats.errors++;
|
|
}
|
|
}
|
|
|
|
console.log(`\nProcessed all ${baiduChurches.length} churches`);
|
|
return stats;
|
|
}
|
|
|
|
function printSummary(stats: ImportStats, dryRun: boolean) {
|
|
console.log(`\n${'='.repeat(60)}`);
|
|
console.log(`Baidu Import Summary ${dryRun ? '(DRY RUN)' : ''}`);
|
|
console.log(`${'='.repeat(60)}`);
|
|
console.log(`Baidu churches found: ${stats.baiduChurchesFound}`);
|
|
|
|
if (!dryRun) {
|
|
console.log(`New churches inserted: ${stats.newChurchesInserted}`);
|
|
console.log(`Existing churches updated: ${stats.existingUpdated} (matched by baiduId)`);
|
|
console.log(`Existing churches linked: ${stats.existingLinked} (matched by proximity)`);
|
|
}
|
|
|
|
if (!dryRun && stats.errors > 0) {
|
|
console.log(`Errors encountered: ${stats.errors}`);
|
|
}
|
|
|
|
console.log(`${'='.repeat(60)}\n`);
|
|
}
|
|
|
|
async function main() {
|
|
const { dryRun, resumeFromCell, jobId: argJobId } = parseArgs();
|
|
const jobId = await createOrResumeJob(argJobId);
|
|
|
|
if (dryRun) {
|
|
console.log('\n*** DRY RUN MODE - No changes will be made to database ***\n');
|
|
}
|
|
|
|
try {
|
|
const stats = await importFromBaidu(dryRun, resumeFromCell, jobId);
|
|
printSummary(stats, dryRun);
|
|
await completeJob(jobId);
|
|
} catch (error) {
|
|
console.error('Fatal error:', error);
|
|
await completeJob(jobId, String(error));
|
|
process.exit(1);
|
|
} finally {
|
|
await prisma.$disconnect();
|
|
}
|
|
}
|
|
|
|
main();
|