feat: add discovermassId to church-matcher ExistingChurch and ChurchCandidate

Add discovermassId field to ExistingChurch interface and ChurchCandidate type,
insert a dedicated matching pass in findDuplicateChurch, and update all 15 importer
push blocks plus 16 loadExistingChurches select queries to include the new field.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-03-11 06:52:05 -04:00
parent 2706708c51
commit a046928ed0
17 changed files with 11576 additions and 0 deletions

834
scripts/import-gcatholic.ts Normal file
View File

@@ -0,0 +1,834 @@
#!/usr/bin/env tsx
/**
* Import Catholic churches from GCatholic.org
*
* GCatholic is a comprehensive Catholic directory organized by diocese.
* Each church page includes a Google Plus Code (→ lat/lng), address, phone, website, etc.
* This script discovers churches via country → diocese → church page navigation.
*
* Usage:
* npx tsx scripts/import-gcatholic.ts --country CN
* npx tsx scripts/import-gcatholic.ts --country CN --dry-run
* npx tsx scripts/import-gcatholic.ts --diocese peki0
* npx tsx scripts/import-gcatholic.ts --all
* npx tsx scripts/import-gcatholic.ts --all --limit 100
* npx tsx scripts/import-gcatholic.ts --all --resume-from PL
*/
// Load .env for database connection (before importing anything that uses process.env)
import dotenv from 'dotenv';
import path from 'path';
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
// Create a fresh Prisma client for this script (don't use cached pool from lib/db)
import { Pool } from 'pg';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
const pool = new Pool({
connectionString: dbUrl,
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
});
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
import { findDuplicateChurch } from '../src/lib/church-matcher';
import type { ExistingChurch } from '../src/lib/church-matcher';
// Plus Code decoder
// eslint-disable-next-line @typescript-eslint/no-require-imports
const { OpenLocationCode } = require('open-location-code');
const olc = new OpenLocationCode();
// ─── Constants ───────────────────────────────────────────────────────────────
const BASE_URL = 'https://www.gcatholic.org';
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
const DEFAULT_DELAY_MS = 1500;
// ─── Types ───────────────────────────────────────────────────────────────────
interface GCatholicChurch {
gcatholicId: string;
name: string;
localName?: string;
lat: number;
lng: number;
address?: string;
city?: string;
state?: string;
country?: string;
phone?: string;
website?: string;
diocese?: string;
churchType?: string;
plusCode: string;
sourceUrl: string;
}
interface ImportStats {
churchesFound: number;
newChurchesCreated: number;
existingChurchesMerged: number;
skipped: number;
errors: number;
errorDetails: string[];
}
interface CLIArgs {
country?: string;
all: boolean;
diocese?: string;
dryRun: boolean;
limit?: number;
delay: number;
resumeFrom?: string;
}
// ─── HTTP Fetching ───────────────────────────────────────────────────────────
let requestCount = 0;
async function fetchPage(url: string, delayMs: number): Promise<string | null> {
// Rate limit
if (requestCount > 0) {
await new Promise((resolve) => setTimeout(resolve, delayMs));
}
requestCount++;
try {
const response = await fetch(url, {
headers: {
'User-Agent': USER_AGENT,
'Accept': 'text/html,application/xhtml+xml',
'Accept-Language': 'en-US,en;q=0.9',
},
});
if (!response.ok) {
if (response.status === 404) {
return null; // Expected for some pages
}
console.error(` HTTP ${response.status} for ${url}`);
return null;
}
return await response.text();
} catch (error) {
console.error(` Fetch error for ${url}: ${error instanceof Error ? error.message : error}`);
return null;
}
}
// ─── HTML Parsing ────────────────────────────────────────────────────────────
/**
* Extract all country codes from the GCatholic countries page.
* Links follow pattern: country/{ISO2}
*/
async function discoverCountries(delayMs: number): Promise<string[]> {
console.log('Discovering countries from GCatholic...');
const html = await fetchPage(`${BASE_URL}/dioceses/`, delayMs);
if (!html) {
console.error('Failed to fetch countries page');
return [];
}
const countryCodes = new Set<string>();
// Match links like: href="country/CN" or href="/dioceses/country/CN"
const regex = /href="(?:\.\.\/|\/dioceses\/)?country\/([A-Z]{2})(?:\.htm)?"/g;
let match;
while ((match = regex.exec(html)) !== null) {
countryCodes.add(match[1]);
}
const codes = Array.from(countryCodes).sort();
console.log(`Found ${codes.length} countries`);
return codes;
}
/**
* Extract diocese codes from a country page.
* Links follow pattern: ../diocese/{code} or diocese/{code}
*/
async function discoverDioceses(countryCode: string, delayMs: number): Promise<{ code: string; name: string }[]> {
const html = await fetchPage(`${BASE_URL}/dioceses/country/${countryCode}.htm`, delayMs);
if (!html) {
return [];
}
const dioceses: { code: string; name: string }[] = [];
const seen = new Set<string>();
// Match links like: href="../diocese/peki0" or href="../../dioceses/diocese/peki0"
// The text after the link is the diocese name
const regex = /href="(?:\.\.\/)?(?:\.\.\/dioceses\/)?diocese\/([a-z0-9]+)(?:\.htm)?"[^>]*>([^<]+)</g;
let match;
while ((match = regex.exec(html)) !== null) {
const code = match[1];
const name = match[2].trim();
if (!seen.has(code)) {
seen.add(code);
dioceses.push({ code, name });
}
}
return dioceses;
}
/**
* Extract church page links from a diocese page.
* Church links follow pattern: ../../churches/{region}/{id}
*/
async function discoverChurchLinks(dioceseCode: string, delayMs: number): Promise<string[]> {
const html = await fetchPage(`${BASE_URL}/dioceses/diocese/${dioceseCode}.htm`, delayMs);
if (!html) {
return [];
}
const churchUrls = new Set<string>();
// Match church links like: href="../../churches/china/46492" or href="../../churches/asia/1893"
const regex = /href="(?:\.\.\/)*churches\/([a-z0-9-]+\/\d+)(?:\.htm)?"/g;
let match;
while ((match = regex.exec(html)) !== null) {
const churchPath = match[1];
churchUrls.add(`${BASE_URL}/churches/${churchPath}.htm`);
}
return Array.from(churchUrls);
}
/**
* Parse a single church page and extract structured data.
*/
function parseChurchPage(html: string, url: string, countryCode?: string): GCatholicChurch | null {
// Extract church name from <h1>
const h1Match = html.match(/<h1>([^<]+)<\/h1>/);
if (!h1Match) return null;
const name = h1Match[1].trim();
// Extract local name from <h2>
const h2Match = html.match(/<h2>([^<]+)<\/h2>/);
const localName = h2Match ? h2Match[1].trim() : undefined;
// Extract Plus Code - it's in a link with onclick containing google maps
// Pattern: onclick="window.open('https://www.google.com/maps/search/?api=1&query=PLUSCODE','_blank')"
// The Plus Code text is like: >8PFRW9FF+C2<
let plusCode: string | null = null;
// Try the onclick pattern first
const plusCodeOnclickMatch = html.match(/onclick="window\.open\('https:\/\/www\.google\.com\/maps\/search\/\?api=1&(?:amp;)?query=([^']+)'/);
if (plusCodeOnclickMatch) {
plusCode = decodeURIComponent(plusCodeOnclickMatch[1]);
}
// Fallback: look for Plus Code pattern in text (format: XXXX+XX or longer)
if (!plusCode) {
const plusCodeTextMatch = html.match(/title="Plus Code">([A-Z0-9+]+)<\/a>/);
if (plusCodeTextMatch) {
plusCode = plusCodeTextMatch[1];
}
}
// Another fallback: look for the code near "Location:" label
if (!plusCode) {
const locationMatch = html.match(/Location:.*?>([2-9A-HJ-NP-Z][2-9A-HJ-NP-Z0-9]{3,7}\+[2-9A-HJ-NP-Z0-9]{2,3})</);
if (locationMatch) {
plusCode = locationMatch[1];
}
}
if (!plusCode) {
return null; // Can't geolocate without Plus Code
}
// Decode Plus Code to lat/lng
let lat: number, lng: number;
try {
const decoded = olc.decode(plusCode);
lat = decoded.latitudeCenter;
lng = decoded.longitudeCenter;
} catch {
return null; // Invalid Plus Code
}
// Extract GCatholic ID from URL
const idMatch = url.match(/\/(\d+)(?:\.htm)?$/);
const gcatholicId = idMatch ? idMatch[1] : '';
// Extract labeled fields using the consistent <span class="label"> pattern
const getField = (label: string): string | undefined => {
// Pattern: <span class="label">Label: </span>TEXT or <a>TEXT</a>
const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const regex = new RegExp(`<span class="label">${escaped}:?\\s*</span>\\s*(.+?)(?:</p>|<br)`, 's');
const match = html.match(regex);
if (!match) return undefined;
// Strip HTML tags to get plain text
return match[1].replace(/<[^>]+>/g, '').trim() || undefined;
};
// Extract address
const address = getField('Address');
// Extract phone
const phone = getField('Telephone');
// Extract website URL (it's in an <a> tag)
let website: string | undefined;
const websiteMatch = html.match(/<span class="label">Website:?\s*<\/span>\s*<a\s+href="([^"]+)"/);
if (websiteMatch) {
website = websiteMatch[1];
// Ensure it's an external URL
if (website && !website.startsWith('http')) {
website = undefined;
}
}
// Extract diocese name
const diocese = getField('Jurisdiction');
// Extract church type
let churchType: string | undefined;
const typeMatch = html.match(/<span class="label">Type:?\s*<\/span>.*?class="ch[a-z]">([^<]+)/);
if (typeMatch) {
churchType = typeMatch[1].trim();
}
// Extract country from page
let country = countryCode;
if (!country) {
const countryMatch = html.match(/href="[^"]*country\/([A-Z]{2})(?:\.htm)?"/);
if (countryMatch) {
country = countryMatch[1];
}
}
// Extract city from <h3> tag: "City, Region, Country"
let city: string | undefined;
let state: string | undefined;
const h3Match = html.match(/<h3>([^<]+?)(?:,\s*<span class="zregion">([^<]+)<\/span>)?(?:,\s*<a[^>]*class="zcountry"[^>]*>[^<]+<\/a>)?\s*<\/h3>/);
if (h3Match) {
city = h3Match[1].trim();
state = h3Match[2]?.trim();
// Clean up: remove country code suffix if present (e.g., "Beijing 北京")
// Keep as-is since it may contain local language characters
}
return {
gcatholicId,
name,
localName,
lat,
lng,
address,
city,
state,
country,
phone,
website,
diocese,
churchType,
plusCode,
sourceUrl: url,
};
}
// ─── CLI Argument Parsing ────────────────────────────────────────────────────
function parseArgs(): CLIArgs {
const args = process.argv.slice(2);
const result: CLIArgs = {
all: false,
dryRun: false,
delay: DEFAULT_DELAY_MS,
};
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case '--country':
result.country = args[++i]?.toUpperCase();
break;
case '--all':
result.all = true;
break;
case '--diocese':
result.diocese = args[++i];
break;
case '--dry-run':
result.dryRun = true;
break;
case '--limit':
result.limit = parseInt(args[++i], 10);
break;
case '--delay':
result.delay = parseInt(args[++i], 10);
break;
case '--resume-from':
result.resumeFrom = args[++i]?.toUpperCase();
break;
}
}
return result;
}
// ─── Database Operations ─────────────────────────────────────────────────────
async function loadExistingChurches(): Promise<ExistingChurch[]> {
console.log('Loading existing churches for deduplication...');
const churches = await prisma.church.findMany({
select: {
id: true,
name: true,
latitude: true,
longitude: true,
osmId: true,
baiduId: true,
masstimesId: true,
orarimesseId: true,
massSchedulesPhId: true,
philmassId: true,
horariosMisasId: true,
mszeInfoId: true,
weekdayMassesId: true,
messesInfoId: true,
bohosluzbyId: true,
miserendId: true,
kerknetId: true,
gottesdienstzeitenId: true,
discovermassId: true,
source: true,
website: true,
phone: true,
address: true,
},
});
console.log(`Loaded ${churches.length} existing churches`);
return churches;
}
async function importChurch(
church: GCatholicChurch,
existingChurches: ExistingChurch[],
dryRun: boolean,
stats: ImportStats,
): Promise<void> {
// Build a candidate compatible with findDuplicateChurch (expects OSMChurch shape)
const candidate = {
osmId: `gcatholic-${church.gcatholicId}`,
name: church.name,
lat: church.lat,
lng: church.lng,
address: church.address,
city: church.city,
state: church.state,
country: church.country,
phone: church.phone,
website: church.website,
diocese: church.diocese,
};
const duplicate = findDuplicateChurch(candidate, existingChurches);
if (dryRun) {
if (duplicate) {
console.log(` [MERGE] ${church.name} → existing: ${duplicate.name} (${duplicate.id})`);
stats.existingChurchesMerged++;
} else {
console.log(` [NEW] ${church.name} (${church.lat.toFixed(4)}, ${church.lng.toFixed(4)})`);
stats.newChurchesCreated++;
}
return;
}
if (duplicate) {
// Merge: fill in missing fields only
const updateData: Record<string, unknown> = {};
if (!duplicate.phone && church.phone) updateData.phone = church.phone;
if (!duplicate.website && church.website) {
updateData.website = church.website;
updateData.hasWebsite = true;
}
if (!duplicate.address && church.address) updateData.address = church.address;
// Always set diocese if missing (GCatholic is great for this)
// We need to check diocese on the actual DB record
const dbRecord = await prisma.church.findUnique({
where: { id: duplicate.id },
select: { diocese: true },
});
if (dbRecord && !dbRecord.diocese && church.diocese) {
updateData.diocese = church.diocese;
}
if (Object.keys(updateData).length > 0) {
await prisma.church.update({
where: { id: duplicate.id },
data: updateData,
});
stats.existingChurchesMerged++;
} else {
stats.skipped++;
}
} else {
// Create new church
const newChurch = await prisma.church.create({
data: {
name: church.name,
latitude: church.lat,
longitude: church.lng,
address: church.address,
city: church.city,
state: church.state,
country: church.country,
phone: church.phone,
website: church.website,
hasWebsite: !!church.website,
source: 'gcatholic',
diocese: church.diocese,
},
});
stats.newChurchesCreated++;
// Add to existing list for future dedup within this run
existingChurches.push({
id: newChurch.id,
name: church.name,
latitude: church.lat,
longitude: church.lng,
osmId: null,
baiduId: null,
masstimesId: null,
orarimesseId: null,
massSchedulesPhId: null,
philmassId: null,
horariosMisasId: null,
mszeInfoId: null,
weekdayMassesId: null,
messesInfoId: null,
bohosluzbyId: null,
miserendId: null,
kerknetId: null,
gottesdienstzeitenId: null,
discovermassId: null,
source: 'gcatholic',
website: church.website || null,
phone: church.phone || null,
address: church.address || null,
});
}
}
// ─── Import Logic ────────────────────────────────────────────────────────────
async function importDiocese(
dioceseCode: string,
dioceseName: string,
countryCode: string | undefined,
existingChurches: ExistingChurch[],
args: CLIArgs,
stats: ImportStats,
globalLimit?: { remaining: number },
): Promise<void> {
const churchUrls = await discoverChurchLinks(dioceseCode, args.delay);
if (churchUrls.length === 0) {
return;
}
console.log(` Diocese ${dioceseName} (${dioceseCode}): ${churchUrls.length} church pages found`);
let dioceseNew = 0;
let dioceseMerged = 0;
let dioceseSkipped = 0;
let dioceseErrors = 0;
for (const url of churchUrls) {
// Check global limit
if (globalLimit && globalLimit.remaining <= 0) {
console.log(` Limit reached, stopping`);
return;
}
try {
const html = await fetchPage(url, args.delay);
if (!html) {
stats.errors++;
dioceseErrors++;
stats.errorDetails.push(`Failed to fetch: ${url}`);
continue;
}
const church = parseChurchPage(html, url, countryCode);
if (!church) {
stats.skipped++;
dioceseSkipped++;
continue;
}
stats.churchesFound++;
const prevNew = stats.newChurchesCreated;
const prevMerged = stats.existingChurchesMerged;
await importChurch(church, existingChurches, args.dryRun, stats);
if (stats.newChurchesCreated > prevNew) dioceseNew++;
if (stats.existingChurchesMerged > prevMerged) dioceseMerged++;
if (globalLimit) globalLimit.remaining--;
} catch (error) {
stats.errors++;
dioceseErrors++;
const msg = error instanceof Error ? error.message : String(error);
stats.errorDetails.push(`${url}: ${msg}`);
console.error(` Error processing ${url}: ${msg}`);
}
}
if (churchUrls.length > 0) {
const parts = [`${dioceseNew} new`, `${dioceseMerged} merged`];
if (dioceseSkipped > 0) parts.push(`${dioceseSkipped} skipped`);
if (dioceseErrors > 0) parts.push(`${dioceseErrors} errors`);
console.log(`${parts.join(', ')}`);
}
}
async function importCountry(
countryCode: string,
existingChurches: ExistingChurch[],
args: CLIArgs,
globalLimit?: { remaining: number },
): Promise<ImportStats> {
const stats: ImportStats = {
churchesFound: 0,
newChurchesCreated: 0,
existingChurchesMerged: 0,
skipped: 0,
errors: 0,
errorDetails: [],
};
console.log(`\n${'='.repeat(60)}`);
console.log(`Importing from GCatholic: ${countryCode}`);
console.log(`${'='.repeat(60)}`);
// Discover dioceses
const dioceses = await discoverDioceses(countryCode, args.delay);
if (dioceses.length === 0) {
console.log(`No dioceses found for ${countryCode}`);
return stats;
}
console.log(`Found ${dioceses.length} dioceses in ${countryCode}`);
// Process each diocese
for (const diocese of dioceses) {
if (globalLimit && globalLimit.remaining <= 0) break;
await importDiocese(
diocese.code,
diocese.name,
countryCode,
existingChurches,
args,
stats,
globalLimit,
);
}
return stats;
}
// ─── Summary Printing ────────────────────────────────────────────────────────
function printSummary(label: string, stats: ImportStats, dryRun: boolean): void {
console.log(`\n${'─'.repeat(60)}`);
console.log(`Summary: ${label} ${dryRun ? '(DRY RUN)' : ''}`);
console.log(`${'─'.repeat(60)}`);
console.log(`Churches found on GCatholic: ${stats.churchesFound}`);
console.log(`New churches created: ${stats.newChurchesCreated}`);
console.log(`Merged with existing: ${stats.existingChurchesMerged}`);
console.log(`Skipped (no data/dup): ${stats.skipped}`);
if (stats.errors > 0) {
console.log(`Errors: ${stats.errors}`);
}
console.log(`${'─'.repeat(60)}`);
}
// ─── Job Tracking ────────────────────────────────────────────────────────────
async function createOrResumeJob(args: string[]): Promise<string | null> {
const jobIdIndex = args.indexOf('--job-id');
if (jobIdIndex !== -1) {
const jobId = args[jobIdIndex + 1];
await prisma.backgroundJob.update({
where: { id: jobId },
data: { status: 'running', startedAt: new Date() },
});
return jobId;
}
return null;
}
async function completeJob(jobId: string | null, error?: string): Promise<void> {
if (!jobId) return;
try {
await prisma.backgroundJob.update({
where: { id: jobId },
data: {
status: error ? 'failed' : 'completed',
error: error || null,
completedAt: new Date(),
},
});
} catch (err) {
console.error(`Failed to update job ${jobId}:`, err);
}
}
// ─── Main ────────────────────────────────────────────────────────────────────
async function main() {
const args = parseArgs();
const jobId = await createOrResumeJob(process.argv.slice(2));
if (!args.country && !args.all && !args.diocese) {
console.error('Error: Must specify --country <ISO2>, --diocese <code>, or --all');
console.error('Usage:');
console.error(' npx tsx scripts/import-gcatholic.ts --country CN');
console.error(' npx tsx scripts/import-gcatholic.ts --country CN --dry-run');
console.error(' npx tsx scripts/import-gcatholic.ts --diocese peki0');
console.error(' npx tsx scripts/import-gcatholic.ts --all');
console.error(' npx tsx scripts/import-gcatholic.ts --all --limit 500');
console.error(' npx tsx scripts/import-gcatholic.ts --all --resume-from PL');
process.exit(1);
}
if (args.dryRun) {
console.log('\n*** DRY RUN MODE — no changes will be written to database ***\n');
}
console.log(`Delay between requests: ${args.delay}ms`);
if (args.limit) console.log(`Limit: ${args.limit} churches`);
try {
const existingChurches = await loadExistingChurches();
const globalLimit = args.limit ? { remaining: args.limit } : undefined;
if (args.diocese) {
// Single diocese mode
const stats: ImportStats = {
churchesFound: 0,
newChurchesCreated: 0,
existingChurchesMerged: 0,
skipped: 0,
errors: 0,
errorDetails: [],
};
await importDiocese(args.diocese, args.diocese, args.country, existingChurches, args, stats, globalLimit);
printSummary(`Diocese ${args.diocese}`, stats, args.dryRun);
} else if (args.country) {
// Single country mode
const stats = await importCountry(args.country, existingChurches, args, globalLimit);
printSummary(args.country, stats, args.dryRun);
} else if (args.all) {
// All countries mode — discover from GCatholic
let countries = await discoverCountries(args.delay);
if (countries.length === 0) {
console.error('Failed to discover countries');
process.exit(1);
}
// Handle --resume-from
if (args.resumeFrom) {
const idx = countries.indexOf(args.resumeFrom);
if (idx === -1) {
console.error(`Country ${args.resumeFrom} not found in GCatholic listing`);
process.exit(1);
}
console.log(`Resuming from ${args.resumeFrom} (skipping ${idx} countries)\n`);
countries = countries.slice(idx);
}
console.log(`Will process ${countries.length} countries\n`);
const totalStats: ImportStats = {
churchesFound: 0,
newChurchesCreated: 0,
existingChurchesMerged: 0,
skipped: 0,
errors: 0,
errorDetails: [],
};
let countriesProcessed = 0;
for (const countryCode of countries) {
if (globalLimit && globalLimit.remaining <= 0) {
console.log(`\nGlobal limit reached, stopping.`);
break;
}
const stats = await importCountry(countryCode, existingChurches, args, globalLimit);
printSummary(countryCode, stats, args.dryRun);
// Aggregate
totalStats.churchesFound += stats.churchesFound;
totalStats.newChurchesCreated += stats.newChurchesCreated;
totalStats.existingChurchesMerged += stats.existingChurchesMerged;
totalStats.skipped += stats.skipped;
totalStats.errors += stats.errors;
totalStats.errorDetails.push(...stats.errorDetails);
countriesProcessed++;
// Small extra delay between countries
await new Promise((resolve) => setTimeout(resolve, 2000));
}
// Overall summary
console.log(`\n${'='.repeat(60)}`);
console.log(`OVERALL SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
console.log(`${'='.repeat(60)}`);
console.log(`Countries processed: ${countriesProcessed}`);
console.log(`Total churches found: ${totalStats.churchesFound}`);
console.log(`Total new churches created: ${totalStats.newChurchesCreated}`);
console.log(`Total merged with existing: ${totalStats.existingChurchesMerged}`);
console.log(`Total skipped: ${totalStats.skipped}`);
if (totalStats.errors > 0) {
console.log(`Total errors: ${totalStats.errors}`);
}
console.log(`Total HTTP requests made: ${requestCount}`);
console.log(`${'='.repeat(60)}\n`);
if (totalStats.errorDetails.length > 0 && totalStats.errorDetails.length <= 50) {
console.log('\nError details:');
totalStats.errorDetails.forEach((e) => console.log(` - ${e}`));
} else if (totalStats.errorDetails.length > 50) {
console.log(`\nFirst 50 errors (of ${totalStats.errorDetails.length}):`);
totalStats.errorDetails.slice(0, 50).forEach((e) => console.log(` - ${e}`));
}
}
await completeJob(jobId);
} catch (error) {
console.error('Fatal error:', error);
await completeJob(jobId, String(error));
process.exit(1);
} finally {
await prisma.$disconnect();
await pool.end();
}
}
main();