feat: add discovermassId to church-matcher ExistingChurch and ChurchCandidate
Add discovermassId field to ExistingChurch interface and ChurchCandidate type, insert a dedicated matching pass in findDuplicateChurch, and update all 15 importer push blocks plus 16 loadExistingChurches select queries to include the new field. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
771
scripts/import-orarimesse.ts
Normal file
771
scripts/import-orarimesse.ts
Normal file
@@ -0,0 +1,771 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Import Catholic churches and mass schedules from OrariMesse.it
|
||||
*
|
||||
* OrariMesse.it is the official CEI (Italian Bishops' Conference) platform for
|
||||
* mass times in Italy. It provides a public REST API organized by diocese.
|
||||
*
|
||||
* Import strategy:
|
||||
* Pass 1: For each diocese, fetch all churches → match against existing DB
|
||||
* records (by ICSC code or proximity+name) → upsert
|
||||
* Pass 2: For churches with active schedules, fetch detail endpoint →
|
||||
* convert 8-day rolling schedule to recurring → replace mass schedules
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scripts/import-orarimesse.ts --all
|
||||
* npx tsx scripts/import-orarimesse.ts --diocese roma
|
||||
* npx tsx scripts/import-orarimesse.ts --all --dry-run
|
||||
* npx tsx scripts/import-orarimesse.ts --all --schedules-only
|
||||
* npx tsx scripts/import-orarimesse.ts --all --resume-from napoli
|
||||
* npx tsx scripts/import-orarimesse.ts --all --job-id {uuid}
|
||||
*/
|
||||
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||||
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||||
const pool = new Pool({
|
||||
connectionString: dbUrl,
|
||||
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
||||
});
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
import { findDuplicateChurch } from '../src/lib/church-matcher';
|
||||
import type { ExistingChurch } from '../src/lib/church-matcher';
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const API_BASE = 'https://orarimesse.it/api';
|
||||
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
|
||||
const DIOCESE_DELAY_MS = 2000;
|
||||
const DETAIL_DELAY_MS = 1000;
|
||||
|
||||
// ─── Italian Day Map ─────────────────────────────────────────────────────────
|
||||
|
||||
const ITALIAN_DAY_MAP: Record<string, number> = {
|
||||
'domenica': 0, 'lunedì': 1, 'lunedi': 1,
|
||||
'martedì': 2, 'martedi': 2, 'mercoledì': 3, 'mercoledi': 3,
|
||||
'giovedì': 4, 'giovedi': 4, 'venerdì': 5, 'venerdi': 5,
|
||||
'sabato': 6,
|
||||
};
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
interface OrariMesseDiocese {
|
||||
codice_cei: string;
|
||||
title: string;
|
||||
slug: string;
|
||||
url: string;
|
||||
countChiese: number;
|
||||
}
|
||||
|
||||
interface OrariMesseChurch {
|
||||
idchurch: number;
|
||||
address: string;
|
||||
name: string;
|
||||
conosciutaCome: string;
|
||||
isopen: boolean;
|
||||
nextmass: string;
|
||||
lat: string;
|
||||
lon: string;
|
||||
sito: string;
|
||||
emailLdc: string;
|
||||
icsc: string;
|
||||
comune: string;
|
||||
tipologia: string;
|
||||
accessibile: boolean;
|
||||
}
|
||||
|
||||
interface OrariMesseDioceseResponse {
|
||||
codice_cei: string;
|
||||
title: string;
|
||||
slug: string;
|
||||
countChiese: number;
|
||||
listaChiese: OrariMesseChurch[];
|
||||
}
|
||||
|
||||
interface OrariMesseMass {
|
||||
idmass: number;
|
||||
time: string;
|
||||
noteOrarioMessa: string;
|
||||
}
|
||||
|
||||
interface OrariMesseDay {
|
||||
day: string;
|
||||
mass: OrariMesseMass[];
|
||||
}
|
||||
|
||||
interface OrariMesseDetail {
|
||||
idchurch: number;
|
||||
name: string;
|
||||
address: string;
|
||||
lat: string;
|
||||
lon: string;
|
||||
icsc: string;
|
||||
comune: string;
|
||||
diocesi: string;
|
||||
parroco: string;
|
||||
telefono: string;
|
||||
email: string;
|
||||
sito: string;
|
||||
days: OrariMesseDay[];
|
||||
}
|
||||
|
||||
interface ImportStats {
|
||||
diocesesProcessed: number;
|
||||
churchesFound: number;
|
||||
churchesMatched: number;
|
||||
churchesCreated: number;
|
||||
churchesSkipped: number;
|
||||
schedulesProcessed: number;
|
||||
massSchedulesCreated: number;
|
||||
errors: number;
|
||||
}
|
||||
|
||||
interface CLIArgs {
|
||||
all: boolean;
|
||||
diocese?: string;
|
||||
dryRun: boolean;
|
||||
schedulesOnly: boolean;
|
||||
resumeFrom?: string;
|
||||
jobId?: string;
|
||||
}
|
||||
|
||||
// ─── API Client ──────────────────────────────────────────────────────────────
|
||||
|
||||
let requestCount = 0;
|
||||
|
||||
function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function fetchApi<T>(endpoint: string, params: Record<string, string> = {}, delayMs: number): Promise<T | null> {
|
||||
if (requestCount > 0) {
|
||||
await delay(delayMs);
|
||||
}
|
||||
requestCount++;
|
||||
|
||||
const url = new URL(`${API_BASE}/${endpoint}`);
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
url.searchParams.set(key, value);
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(` HTTP ${response.status} for ${url}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const json = await response.json() as { status: boolean; code: string; data: T };
|
||||
if (json.status === true && json.code === 'OK') {
|
||||
return json.data;
|
||||
}
|
||||
|
||||
console.error(` API error for ${url}: ${JSON.stringify(json).substring(0, 200)}`);
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error(` Fetch error for ${url}: ${error instanceof Error ? error.message : error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchDioceses(): Promise<OrariMesseDiocese[]> {
|
||||
const data = await fetchApi<OrariMesseDiocese[]>('getDiocesi', {}, DIOCESE_DELAY_MS);
|
||||
return data || [];
|
||||
}
|
||||
|
||||
async function fetchChurchesInDiocese(slug: string): Promise<OrariMesseDioceseResponse | null> {
|
||||
const data = await fetchApi<OrariMesseDioceseResponse[]>(
|
||||
'getListaChiese',
|
||||
{ diocesi: slug, type: 'compact' },
|
||||
DIOCESE_DELAY_MS
|
||||
);
|
||||
// Response is an array with a single diocese object
|
||||
if (data && data.length > 0) {
|
||||
return data[0];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchChurchDetail(idchurch: number): Promise<OrariMesseDetail | null> {
|
||||
return fetchApi<OrariMesseDetail>(
|
||||
'getDettaglioMessa',
|
||||
{ idchurch: String(idchurch) },
|
||||
DETAIL_DELAY_MS
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Day/Time Conversion ─────────────────────────────────────────────────────
|
||||
|
||||
function parseItalianDay(dayString: string): number | null {
|
||||
// "Giovedì 26 Febbraio" → extract first word → lowercase → lookup
|
||||
const firstWord = dayString.split(' ')[0].toLowerCase();
|
||||
return ITALIAN_DAY_MAP[firstWord] ?? null;
|
||||
}
|
||||
|
||||
function convertTime(time: string): string {
|
||||
// "07.00" → "07:00"
|
||||
return time.replace('.', ':');
|
||||
}
|
||||
|
||||
interface RecurringMass {
|
||||
dayOfWeek: number;
|
||||
time: string;
|
||||
notes: string | null;
|
||||
}
|
||||
|
||||
function convertScheduleToRecurring(days: OrariMesseDay[]): RecurringMass[] {
|
||||
// The API returns an 8-day rolling window. Same weekday can appear twice
|
||||
// (e.g. Thursday this week and Thursday next week). We deduplicate by
|
||||
// dayOfWeek+time to get the recurring weekly schedule.
|
||||
const seen = new Set<string>();
|
||||
const result: RecurringMass[] = [];
|
||||
|
||||
for (const day of days) {
|
||||
const dayOfWeek = parseItalianDay(day.day);
|
||||
if (dayOfWeek === null) continue;
|
||||
|
||||
for (const mass of day.mass) {
|
||||
const time = convertTime(mass.time);
|
||||
const key = `${dayOfWeek}:${time}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
|
||||
result.push({
|
||||
dayOfWeek,
|
||||
time,
|
||||
notes: mass.noteOrarioMessa || null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── Database Operations ─────────────────────────────────────────────────────
|
||||
|
||||
async function loadExistingItalianChurches(): Promise<ExistingChurch[]> {
|
||||
console.log('Loading existing Italian churches for deduplication...');
|
||||
const churches = await prisma.church.findMany({
|
||||
where: { country: 'IT' },
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
latitude: true,
|
||||
longitude: true,
|
||||
osmId: true,
|
||||
baiduId: true,
|
||||
masstimesId: true,
|
||||
orarimesseId: true,
|
||||
massSchedulesPhId: true,
|
||||
philmassId: true,
|
||||
horariosMisasId: true,
|
||||
mszeInfoId: true,
|
||||
weekdayMassesId: true,
|
||||
messesInfoId: true,
|
||||
bohosluzbyId: true,
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
address: true,
|
||||
},
|
||||
});
|
||||
console.log(`Loaded ${churches.length} existing Italian churches`);
|
||||
return churches;
|
||||
}
|
||||
|
||||
// ─── Pass 1: Church Upsert ──────────────────────────────────────────────────
|
||||
|
||||
async function processChurchesForDiocese(
|
||||
dioceseSlug: string,
|
||||
churches: OrariMesseChurch[],
|
||||
existingChurches: ExistingChurch[],
|
||||
idchurchToDbId: Map<number, string>,
|
||||
dryRun: boolean,
|
||||
stats: ImportStats,
|
||||
): Promise<void> {
|
||||
for (const church of churches) {
|
||||
stats.churchesFound++;
|
||||
|
||||
// Parse coordinates
|
||||
const lat = parseFloat(church.lat);
|
||||
const lon = parseFloat(church.lon);
|
||||
if (isNaN(lat) || isNaN(lon) || lat === 0 || lon === 0) {
|
||||
stats.churchesSkipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Build candidate for dedup
|
||||
const candidate = {
|
||||
name: church.name,
|
||||
lat,
|
||||
lng: lon,
|
||||
orarimesseId: church.icsc || undefined,
|
||||
};
|
||||
|
||||
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
||||
|
||||
if (dryRun) {
|
||||
if (duplicate) {
|
||||
stats.churchesMatched++;
|
||||
} else {
|
||||
stats.churchesCreated++;
|
||||
}
|
||||
// Track idchurch for Pass 2 even in dry run
|
||||
if (duplicate) {
|
||||
idchurchToDbId.set(church.idchurch, duplicate.id);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (duplicate) {
|
||||
// Update existing church: set orarimesseId, fill missing fields
|
||||
stats.churchesMatched++;
|
||||
const updateData: Record<string, unknown> = {
|
||||
orarimesseId: church.icsc || undefined,
|
||||
orarimesseLastSyncedAt: new Date(),
|
||||
};
|
||||
|
||||
if (!duplicate.address && church.address) updateData.address = church.address;
|
||||
if (!duplicate.website && church.sito) {
|
||||
updateData.website = church.sito;
|
||||
updateData.hasWebsite = true;
|
||||
}
|
||||
|
||||
// Check diocese on the actual DB record (not in ExistingChurch)
|
||||
const dbRecord = await prisma.church.findUnique({
|
||||
where: { id: duplicate.id },
|
||||
select: { diocese: true, city: true, email: true },
|
||||
});
|
||||
if (dbRecord && !dbRecord.diocese && dioceseSlug) {
|
||||
updateData.diocese = dioceseSlug;
|
||||
}
|
||||
if (dbRecord && !dbRecord.city && church.comune) {
|
||||
updateData.city = church.comune;
|
||||
}
|
||||
if (dbRecord && !dbRecord.email && church.emailLdc) {
|
||||
updateData.email = church.emailLdc;
|
||||
}
|
||||
|
||||
try {
|
||||
await prisma.church.update({
|
||||
where: { id: duplicate.id },
|
||||
data: updateData,
|
||||
});
|
||||
} catch (error) {
|
||||
// Unique constraint violation on orarimesseId — another church already has this ICSC
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
continue;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
idchurchToDbId.set(church.idchurch, duplicate.id);
|
||||
} else {
|
||||
// Create new church
|
||||
try {
|
||||
const newChurch = await prisma.church.create({
|
||||
data: {
|
||||
name: church.name,
|
||||
latitude: lat,
|
||||
longitude: lon,
|
||||
address: church.address || null,
|
||||
city: church.comune || null,
|
||||
country: 'IT',
|
||||
diocese: dioceseSlug,
|
||||
website: church.sito || null,
|
||||
email: church.emailLdc || null,
|
||||
hasWebsite: !!church.sito,
|
||||
orarimesseId: church.icsc || null,
|
||||
orarimesseLastSyncedAt: new Date(),
|
||||
source: 'orarimesse',
|
||||
wheelchairAccess: church.accessibile || false,
|
||||
},
|
||||
});
|
||||
stats.churchesCreated++;
|
||||
|
||||
// Add to in-memory array for within-run dedup
|
||||
existingChurches.push({
|
||||
id: newChurch.id,
|
||||
name: church.name,
|
||||
latitude: lat,
|
||||
longitude: lon,
|
||||
osmId: null,
|
||||
baiduId: null,
|
||||
masstimesId: null,
|
||||
orarimesseId: church.icsc || null,
|
||||
massSchedulesPhId: null,
|
||||
philmassId: null,
|
||||
horariosMisasId: null,
|
||||
mszeInfoId: null,
|
||||
weekdayMassesId: null,
|
||||
messesInfoId: null,
|
||||
bohosluzbyId: null,
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'orarimesse',
|
||||
website: church.sito || null,
|
||||
phone: null,
|
||||
address: church.address || null,
|
||||
});
|
||||
|
||||
idchurchToDbId.set(church.idchurch, newChurch.id);
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
continue;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Pass 2: Mass Schedules ─────────────────────────────────────────────────
|
||||
|
||||
async function processSchedulesForDiocese(
|
||||
churches: OrariMesseChurch[],
|
||||
idchurchToDbId: Map<number, string>,
|
||||
dryRun: boolean,
|
||||
stats: ImportStats,
|
||||
): Promise<void> {
|
||||
// Filter to churches with active schedules
|
||||
const churchesWithMass = churches.filter((c) => c.nextmass);
|
||||
if (churchesWithMass.length === 0) return;
|
||||
|
||||
console.log(` Pass 2: Fetching schedules for ${churchesWithMass.length} churches with active masses...`);
|
||||
|
||||
for (const church of churchesWithMass) {
|
||||
const dbId = idchurchToDbId.get(church.idchurch);
|
||||
if (!dbId) continue; // Church not in our DB (skipped in Pass 1)
|
||||
|
||||
const detail = await fetchChurchDetail(church.idchurch);
|
||||
if (!detail || !detail.days || detail.days.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
stats.schedulesProcessed++;
|
||||
|
||||
const recurring = convertScheduleToRecurring(detail.days);
|
||||
if (recurring.length === 0) continue;
|
||||
|
||||
if (dryRun) {
|
||||
stats.massSchedulesCreated += recurring.length;
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await prisma.$transaction(async (tx) => {
|
||||
// Delete existing mass schedules for this church
|
||||
await tx.massSchedule.deleteMany({ where: { churchId: dbId } });
|
||||
|
||||
// Create new recurring schedules
|
||||
await tx.massSchedule.createMany({
|
||||
data: recurring.map((m) => ({
|
||||
churchId: dbId,
|
||||
dayOfWeek: m.dayOfWeek,
|
||||
time: m.time,
|
||||
language: 'Italian',
|
||||
notes: m.notes,
|
||||
})),
|
||||
});
|
||||
|
||||
// Mark church as scraped
|
||||
await tx.church.update({
|
||||
where: { id: dbId },
|
||||
data: { lastScrapedAt: new Date() },
|
||||
});
|
||||
});
|
||||
|
||||
stats.massSchedulesCreated += recurring.length;
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` Error saving schedules for idchurch=${church.idchurch}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── CLI ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseArgs(): CLIArgs {
|
||||
const args = process.argv.slice(2);
|
||||
const result: CLIArgs = {
|
||||
all: false,
|
||||
dryRun: false,
|
||||
schedulesOnly: false,
|
||||
};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
switch (args[i]) {
|
||||
case '--all':
|
||||
result.all = true;
|
||||
break;
|
||||
case '--diocese':
|
||||
result.diocese = args[++i];
|
||||
break;
|
||||
case '--dry-run':
|
||||
result.dryRun = true;
|
||||
break;
|
||||
case '--schedules-only':
|
||||
result.schedulesOnly = true;
|
||||
break;
|
||||
case '--resume-from':
|
||||
result.resumeFrom = args[++i];
|
||||
break;
|
||||
case '--job-id':
|
||||
result.jobId = args[++i];
|
||||
break;
|
||||
case '--help':
|
||||
case '-h':
|
||||
console.log(`
|
||||
Usage: npx tsx scripts/import-orarimesse.ts [options]
|
||||
|
||||
Options:
|
||||
--all Import from all 77 dioceses
|
||||
--diocese <slug> Import from a single diocese (e.g. "roma")
|
||||
--dry-run No database writes, just report what would happen
|
||||
--schedules-only Skip Pass 1 (church upsert), only fetch schedules
|
||||
--resume-from <slug> Skip dioceses until reaching this slug
|
||||
--job-id <uuid> Background job tracking ID
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
npx tsx scripts/import-orarimesse.ts --diocese roma --dry-run
|
||||
npx tsx scripts/import-orarimesse.ts --all
|
||||
npx tsx scripts/import-orarimesse.ts --all --schedules-only
|
||||
npx tsx scripts/import-orarimesse.ts --all --resume-from napoli
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.all && !result.diocese) {
|
||||
console.error('Error: specify --all or --diocese <slug>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const hours = Math.floor(minutes / 60);
|
||||
if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
|
||||
if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
// ─── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs();
|
||||
const startTime = Date.now();
|
||||
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('ORARIMESSE.IT IMPORTER');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Mode: ${args.all ? 'All dioceses' : `Single diocese: ${args.diocese}`}`);
|
||||
console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
|
||||
console.log(`Schedules only: ${args.schedulesOnly ? 'YES' : 'NO'}`);
|
||||
if (args.resumeFrom) console.log(`Resume from: ${args.resumeFrom}`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
|
||||
// Update background job status if provided
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: { status: 'running', startedAt: new Date() },
|
||||
});
|
||||
} catch {
|
||||
// Job might not exist yet, that's fine
|
||||
}
|
||||
}
|
||||
|
||||
// Load existing Italian churches for dedup
|
||||
const existingChurches = await loadExistingItalianChurches();
|
||||
|
||||
// Fetch diocese list
|
||||
console.log('Fetching diocese list from OrariMesse.it...');
|
||||
const allDioceses = await fetchDioceses();
|
||||
console.log(`Found ${allDioceses.length} dioceses\n`);
|
||||
|
||||
// Filter to requested dioceses
|
||||
let diocesesToProcess: OrariMesseDiocese[];
|
||||
if (args.diocese) {
|
||||
const found = allDioceses.find((d) => d.slug === args.diocese);
|
||||
if (!found) {
|
||||
console.error(`Diocese "${args.diocese}" not found. Available: ${allDioceses.map((d) => d.slug).join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
diocesesToProcess = [found];
|
||||
} else {
|
||||
diocesesToProcess = allDioceses;
|
||||
}
|
||||
|
||||
// Handle --resume-from
|
||||
if (args.resumeFrom) {
|
||||
const idx = diocesesToProcess.findIndex((d) => d.slug === args.resumeFrom);
|
||||
if (idx === -1) {
|
||||
console.error(`Resume diocese "${args.resumeFrom}" not found.`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(`Resuming from diocese "${args.resumeFrom}" (skipping ${idx} dioceses)\n`);
|
||||
diocesesToProcess = diocesesToProcess.slice(idx);
|
||||
}
|
||||
|
||||
const stats: ImportStats = {
|
||||
diocesesProcessed: 0,
|
||||
churchesFound: 0,
|
||||
churchesMatched: 0,
|
||||
churchesCreated: 0,
|
||||
churchesSkipped: 0,
|
||||
schedulesProcessed: 0,
|
||||
massSchedulesCreated: 0,
|
||||
errors: 0,
|
||||
};
|
||||
|
||||
// Map OrariMesse idchurch → our DB id (for Pass 2 schedule lookups)
|
||||
const idchurchToDbId = new Map<number, string>();
|
||||
|
||||
// If schedules-only mode, pre-populate the map from existing orarimesseId records
|
||||
if (args.schedulesOnly) {
|
||||
console.log('Schedules-only mode: loading existing orarimesseId mappings...');
|
||||
const mapped = await prisma.church.findMany({
|
||||
where: { orarimesseId: { not: null } },
|
||||
select: { id: true, orarimesseId: true },
|
||||
});
|
||||
// We'll build the idchurch map during diocese processing since we need the API's idchurch values
|
||||
console.log(`Found ${mapped.length} churches with orarimesseId in DB\n`);
|
||||
}
|
||||
|
||||
// Process each diocese
|
||||
for (let i = 0; i < diocesesToProcess.length; i++) {
|
||||
const diocese = diocesesToProcess[i];
|
||||
const elapsed = formatDuration(Date.now() - startTime);
|
||||
console.log(`[${i + 1}/${diocesesToProcess.length}] Diocese: ${diocese.title} (${diocese.slug}) [${elapsed} elapsed]`);
|
||||
|
||||
try {
|
||||
// Fetch churches in this diocese
|
||||
const dioceseData = await fetchChurchesInDiocese(diocese.slug);
|
||||
if (!dioceseData || !dioceseData.listaChiese || dioceseData.listaChiese.length === 0) {
|
||||
console.log(` No churches found, skipping`);
|
||||
stats.diocesesProcessed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const churches = dioceseData.listaChiese;
|
||||
console.log(` Found ${churches.length} churches (${churches.filter((c) => c.nextmass).length} with active masses)`);
|
||||
|
||||
// Pass 1: Upsert churches
|
||||
if (!args.schedulesOnly) {
|
||||
const prevMatched = stats.churchesMatched;
|
||||
const prevCreated = stats.churchesCreated;
|
||||
const prevSkipped = stats.churchesSkipped;
|
||||
|
||||
await processChurchesForDiocese(
|
||||
diocese.slug, churches, existingChurches, idchurchToDbId,
|
||||
args.dryRun, stats
|
||||
);
|
||||
|
||||
const matched = stats.churchesMatched - prevMatched;
|
||||
const created = stats.churchesCreated - prevCreated;
|
||||
const skipped = stats.churchesSkipped - prevSkipped;
|
||||
console.log(` Pass 1: ${matched} matched, ${created} created, ${skipped} skipped`);
|
||||
} else {
|
||||
// In schedules-only mode, still need to build idchurch → dbId map
|
||||
for (const church of churches) {
|
||||
if (church.icsc) {
|
||||
const existing = existingChurches.find((e) => e.orarimesseId === church.icsc);
|
||||
if (existing) {
|
||||
idchurchToDbId.set(church.idchurch, existing.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: Import schedules
|
||||
const prevSchedules = stats.massSchedulesCreated;
|
||||
await processSchedulesForDiocese(churches, idchurchToDbId, args.dryRun, stats);
|
||||
const newSchedules = stats.massSchedulesCreated - prevSchedules;
|
||||
if (newSchedules > 0) {
|
||||
console.log(` Pass 2: ${stats.schedulesProcessed} churches processed, ${newSchedules} mass times created`);
|
||||
}
|
||||
|
||||
stats.diocesesProcessed++;
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` ERROR processing diocese ${diocese.slug}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Print summary
|
||||
const totalTime = Date.now() - startTime;
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Dioceses processed: ${stats.diocesesProcessed}`);
|
||||
console.log(`Churches found: ${stats.churchesFound}`);
|
||||
console.log(` Matched (existing): ${stats.churchesMatched}`);
|
||||
console.log(` Created (new): ${stats.churchesCreated}`);
|
||||
console.log(` Skipped: ${stats.churchesSkipped}`);
|
||||
console.log(`Schedules processed: ${stats.schedulesProcessed}`);
|
||||
console.log(`Mass schedules created: ${stats.massSchedulesCreated}`);
|
||||
console.log(`Errors: ${stats.errors}`);
|
||||
console.log(`Total time: ${formatDuration(totalTime)}`);
|
||||
console.log(`API requests: ${requestCount}`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
|
||||
// Update background job
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: {
|
||||
status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
|
||||
completedAt: new Date(),
|
||||
result: JSON.stringify(stats),
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
})
|
||||
.finally(async () => {
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
});
|
||||
Reference in New Issue
Block a user