- Remove discovermassId/buscarmisasNetworkId from findDuplicateChurch match passes (importers now do their own pre-check dedup); restore as optional fields on ExistingChurch to keep type/runtime in sync - Add HK bounding box to COUNTRY_BOUNDING_BOXES; fix silent 0-result fallback when country query returns empty from mirror server - discovermass importer: add --limit flag and skip-already-imported pre-check using importedSlugs set - Import scripts: remove discovermassId from ExistingChurch select/stubs (field not needed in shared matcher context) - Schema: reorder discovermassId/kerknetId/gottesdienstzeitenId fields Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
685 lines
23 KiB
TypeScript
685 lines
23 KiB
TypeScript
#!/usr/bin/env tsx
|
|
/**
|
|
* Import Catholic churches and mass schedules from gottesdienstzeiten.de (Germany)
|
|
*
|
|
* gottesdienstzeiten.de is a German worship service directory with ~6,878 Catholic
|
|
* churches. It runs on WordPress with a fully open REST API at /wp-json/wp/v2/posts.
|
|
*
|
|
* Data includes: church name, address, coordinates (Google Maps embed), diocese,
|
|
* mass schedules (day/type/time table), website, email, phone.
|
|
*
|
|
* Import strategy:
|
|
* 1. Fetch all Catholic diocese category IDs from WP API
|
|
* 2. Paginate through posts per category (100 per page)
|
|
* 3. Parse HTML content for coordinates, address, schedule table, info table
|
|
* 4. Match against existing German churches via church-matcher
|
|
* 5. Upsert churches and mass schedules
|
|
*
|
|
* Usage:
|
|
* npx tsx scripts/import-gottesdienstzeiten.ts --all --dry-run
|
|
* npx tsx scripts/import-gottesdienstzeiten.ts --all
|
|
* npx tsx scripts/import-gottesdienstzeiten.ts --diocese 129 --dry-run # Köln only
|
|
* npx tsx scripts/import-gottesdienstzeiten.ts --all --resume-from 5
|
|
*/
|
|
|
|
import dotenv from 'dotenv';
|
|
import path from 'path';
|
|
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
|
|
|
import { Pool } from 'pg';
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
|
import { PrismaClient } from '@prisma/client';
|
|
|
|
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
|
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
|
const pool = new Pool({
|
|
connectionString: dbUrl,
|
|
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
|
});
|
|
const adapter = new PrismaPg(pool);
|
|
const prisma = new PrismaClient({ adapter });
|
|
|
|
import { findDuplicateChurch } from '../src/lib/church-matcher';
|
|
import type { ExistingChurch } from '../src/lib/church-matcher';
|
|
|
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
|
|
const API_BASE = 'https://gottesdienstzeiten.de/wp-json/wp/v2';
|
|
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
|
|
const REQUEST_DELAY_MS = 1000;
|
|
const RETRY_DELAY_MS = 5000;
|
|
const MAX_RETRIES = 3;
|
|
const POSTS_PER_PAGE = 100;
|
|
const CATHOLIC_PARENT_CATEGORY = 4;
|
|
|
|
// German day names → dayOfWeek (0=Sun, 1=Mon, ..., 6=Sat)
|
|
const GERMAN_DAYS: Record<string, number> = {
|
|
'sonntags': 0, 'montags': 1, 'dienstags': 2, 'mittwochs': 3,
|
|
'donnerstags': 4, 'freitags': 5, 'samstags': 6,
|
|
// Without -s suffix (some entries use these)
|
|
'sonntag': 0, 'montag': 1, 'dienstag': 2, 'mittwoch': 3,
|
|
'donnerstag': 4, 'freitag': 5, 'samstag': 6,
|
|
};
|
|
|
|
// Mass-related types (filter out non-mass services)
|
|
const MASS_TYPES = new Set([
|
|
'messfeier', 'vorabendmesse', 'heilige messe', 'hl. messe',
|
|
'hochamt', 'festmesse', 'familienmesse', 'kindergottesdienst',
|
|
'jugendmesse', 'abendmesse', 'frühmesse', 'werktagsmesse',
|
|
'sonntagsmesse', 'messe', 'eucharistiefeier',
|
|
]);
|
|
|
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
|
|
interface DioceseCat {
|
|
id: number;
|
|
name: string;
|
|
count: number;
|
|
}
|
|
|
|
interface ParsedChurch {
|
|
wpId: number;
|
|
slug: string;
|
|
name: string;
|
|
latitude: number;
|
|
longitude: number;
|
|
address: string | null;
|
|
zip: string | null;
|
|
city: string | null;
|
|
diocese: string | null;
|
|
website: string | null;
|
|
email: string | null;
|
|
phone: string | null;
|
|
schedules: ParsedSchedule[];
|
|
}
|
|
|
|
interface ParsedSchedule {
|
|
dayOfWeek: number;
|
|
time: string;
|
|
}
|
|
|
|
interface ImportStats {
|
|
diocesesProcessed: number;
|
|
postsFound: number;
|
|
churchesParsed: number;
|
|
churchesMatched: number;
|
|
churchesCreated: number;
|
|
churchesSkipped: number;
|
|
schedulesCreated: number;
|
|
errors: number;
|
|
}
|
|
|
|
interface CLIArgs {
|
|
all: boolean;
|
|
dryRun: boolean;
|
|
resumeFrom?: number;
|
|
diocese?: number;
|
|
jobId?: string;
|
|
}
|
|
|
|
// ─── HTTP Helpers ────────────────────────────────────────────────────────────
|
|
|
|
let requestCount = 0;
|
|
|
|
function delay(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function fetchJson(url: string): Promise<any | null> {
|
|
if (requestCount > 0) {
|
|
await delay(REQUEST_DELAY_MS);
|
|
}
|
|
requestCount++;
|
|
|
|
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
try {
|
|
const response = await fetch(url, {
|
|
headers: { 'User-Agent': USER_AGENT },
|
|
});
|
|
|
|
if (response.status === 429 || response.status === 503) {
|
|
if (attempt < MAX_RETRIES) {
|
|
console.log(` HTTP ${response.status} — retrying in ${RETRY_DELAY_MS / 1000}s`);
|
|
await delay(RETRY_DELAY_MS);
|
|
continue;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
if (!response.ok) return null;
|
|
return await response.json();
|
|
} catch (error) {
|
|
if (attempt < MAX_RETRIES) {
|
|
await delay(RETRY_DELAY_MS);
|
|
continue;
|
|
}
|
|
console.error(` Fetch error: ${error instanceof Error ? error.message : error}`);
|
|
return null;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// ─── Parsing ─────────────────────────────────────────────────────────────────
|
|
|
|
function stripHtml(html: string): string {
|
|
return html.replace(/<[^>]+>/g, '').trim();
|
|
}
|
|
|
|
function parsePost(post: any, dioceseName: string | null): ParsedChurch | null {
|
|
const content: string = post.content?.rendered || '';
|
|
const wpId: number = post.id;
|
|
const slug: string = post.slug;
|
|
|
|
// Extract name from title — format: "(City) Church Name"
|
|
let name = stripHtml(post.title?.rendered || '');
|
|
// Remove leading "(City)" prefix for cleaner name
|
|
const nameMatch = name.match(/^\([^)]+\)\s*(.+)$/);
|
|
if (nameMatch) name = nameMatch[1];
|
|
|
|
// Extract coordinates from Google Maps embed
|
|
const coordMatch = content.match(/maps\?q=([-\d.]+),([-\d.]+)/);
|
|
if (!coordMatch) return null;
|
|
|
|
const latitude = parseFloat(coordMatch[1]);
|
|
const longitude = parseFloat(coordMatch[2]);
|
|
if (isNaN(latitude) || isNaN(longitude) || (latitude === 0 && longitude === 0)) return null;
|
|
|
|
// Extract address from first <strong> tag (format: "Street, ZIP City")
|
|
const addrMatch = content.match(/<strong>([^<]+)<\/strong>/);
|
|
let address: string | null = null;
|
|
let zip: string | null = null;
|
|
let city: string | null = null;
|
|
|
|
if (addrMatch) {
|
|
const fullAddr = addrMatch[1].trim();
|
|
address = fullAddr;
|
|
|
|
// Parse "Street, ZIP City" format
|
|
const zipCityMatch = fullAddr.match(/,\s*(\d{5})\s+(.+)$/);
|
|
if (zipCityMatch) {
|
|
zip = zipCityMatch[1];
|
|
city = zipCityMatch[2];
|
|
address = fullAddr.replace(/,\s*\d{5}\s+.+$/, '').trim();
|
|
}
|
|
}
|
|
|
|
// Parse info table (second table) for website, email, phone
|
|
let website: string | null = null;
|
|
let email: string | null = null;
|
|
let phone: string | null = null;
|
|
|
|
const tables = content.match(/<table[^>]*>([\s\S]*?)<\/table>/g) || [];
|
|
if (tables.length >= 2) {
|
|
const infoTable = tables[1];
|
|
// Website
|
|
const websiteMatch = infoTable.match(/Website[\s\S]*?<a[^>]*href="([^"]+)"/);
|
|
if (websiteMatch) website = websiteMatch[1];
|
|
// Email
|
|
const emailMatch = infoTable.match(/E-Mail[\s\S]*?<td[^>]*>([\s\S]*?)<\/td>/);
|
|
if (emailMatch) {
|
|
const emailText = stripHtml(emailMatch[1]);
|
|
if (emailText.includes('@')) email = emailText;
|
|
}
|
|
// Phone
|
|
const phoneMatch = infoTable.match(/Telefon[\s\S]*?<td[^>]*>([\s\S]*?)<\/td>/);
|
|
if (phoneMatch) {
|
|
const phoneText = stripHtml(phoneMatch[1]);
|
|
if (phoneText.length > 3) phone = phoneText;
|
|
}
|
|
}
|
|
|
|
// Parse schedule table (first table)
|
|
const schedules: ParsedSchedule[] = [];
|
|
if (tables.length >= 1) {
|
|
const schedTable = tables[0];
|
|
const rows = schedTable.match(/<tr[^>]*>([\s\S]*?)<\/tr>/g) || [];
|
|
|
|
let currentDay = -1;
|
|
const seen = new Set<string>();
|
|
|
|
for (const row of rows) {
|
|
// Check for day header (in <th> with <em>)
|
|
const dayMatch = row.match(/<th[^>]*>[\s\S]*?<em>([^<]*)<\/em>/);
|
|
if (dayMatch && dayMatch[1].trim()) {
|
|
const dayName = dayMatch[1].trim().toLowerCase();
|
|
if (GERMAN_DAYS[dayName] !== undefined) {
|
|
currentDay = GERMAN_DAYS[dayName];
|
|
}
|
|
}
|
|
|
|
// Get type and time from <td><em>...</em></td>
|
|
const cells = row.match(/<td[^>]*>[\s\S]*?<em>([^<]*)<\/em>[\s\S]*?<\/td>/g);
|
|
if (!cells || cells.length < 2 || currentDay < 0) continue;
|
|
|
|
const typeMatch = cells[0].match(/<em>([^<]*)<\/em>/);
|
|
const timeMatch = cells[1].match(/<em>([^<]*)<\/em>/);
|
|
if (!typeMatch || !timeMatch) continue;
|
|
|
|
const massType = typeMatch[1].trim().toLowerCase();
|
|
const timeStr = timeMatch[1].trim();
|
|
|
|
// Only include mass-related types
|
|
const isMass = MASS_TYPES.has(massType) ||
|
|
massType.includes('messe') || massType.includes('messfeier') ||
|
|
massType.includes('eucharistie');
|
|
if (!isMass) continue;
|
|
|
|
// Parse time: "09.00 Uhr" or "18:30 Uhr" → "09:00" or "18:30"
|
|
const parsedTime = timeStr
|
|
.replace(/\s*Uhr\s*/i, '')
|
|
.replace('.', ':')
|
|
.trim();
|
|
const timeValidation = parsedTime.match(/^(\d{1,2}):(\d{2})$/);
|
|
if (!timeValidation) continue;
|
|
const normalizedTime = `${timeValidation[1].padStart(2, '0')}:${timeValidation[2]}`;
|
|
|
|
const key = `${currentDay}:${normalizedTime}`;
|
|
if (!seen.has(key)) {
|
|
seen.add(key);
|
|
schedules.push({ dayOfWeek: currentDay, time: normalizedTime });
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
wpId, slug, name, latitude, longitude,
|
|
address, zip, city, diocese: dioceseName,
|
|
website, email, phone, schedules,
|
|
};
|
|
}
|
|
|
|
// ─── Database Operations ─────────────────────────────────────────────────────
|
|
|
|
async function loadExistingGermanChurches(): Promise<ExistingChurch[]> {
|
|
console.log('Loading existing German churches for deduplication...');
|
|
const churches = await prisma.church.findMany({
|
|
where: { country: 'DE' },
|
|
select: {
|
|
id: true,
|
|
name: true,
|
|
latitude: true,
|
|
longitude: true,
|
|
osmId: true,
|
|
baiduId: true,
|
|
masstimesId: true,
|
|
orarimesseId: true,
|
|
massSchedulesPhId: true,
|
|
philmassId: true,
|
|
horariosMisasId: true,
|
|
mszeInfoId: true,
|
|
weekdayMassesId: true,
|
|
messesInfoId: true,
|
|
bohosluzbyId: true,
|
|
miserendId: true,
|
|
kerknetId: true,
|
|
gottesdienstzeitenId: true,
|
|
source: true,
|
|
website: true,
|
|
phone: true,
|
|
address: true,
|
|
},
|
|
});
|
|
console.log(`Loaded ${churches.length} existing German churches`);
|
|
return churches;
|
|
}
|
|
|
|
// ─── Import Logic ────────────────────────────────────────────────────────────
|
|
|
|
async function fetchDioceseCategories(): Promise<DioceseCat[]> {
|
|
console.log('Fetching Catholic diocese categories...');
|
|
const data = await fetchJson(
|
|
`${API_BASE}/categories?per_page=100&parent=${CATHOLIC_PARENT_CATEGORY}`
|
|
);
|
|
if (!data) {
|
|
console.error('Failed to fetch categories');
|
|
return [];
|
|
}
|
|
const cats: DioceseCat[] = data.map((c: any) => ({
|
|
id: c.id, name: c.name, count: c.count,
|
|
}));
|
|
const total = cats.reduce((s, c) => s + c.count, 0);
|
|
console.log(`Found ${cats.length} diocese categories with ${total} total posts\n`);
|
|
return cats.sort((a, b) => b.count - a.count);
|
|
}
|
|
|
|
async function processDiocese(
|
|
cat: DioceseCat,
|
|
existingChurches: ExistingChurch[],
|
|
dryRun: boolean,
|
|
stats: ImportStats,
|
|
): Promise<void> {
|
|
const totalPages = Math.ceil(cat.count / POSTS_PER_PAGE);
|
|
|
|
for (let page = 1; page <= totalPages; page++) {
|
|
const url = `${API_BASE}/posts?categories=${cat.id}&per_page=${POSTS_PER_PAGE}&page=${page}`;
|
|
const posts = await fetchJson(url);
|
|
if (!posts || !Array.isArray(posts) || posts.length === 0) break;
|
|
|
|
stats.postsFound += posts.length;
|
|
|
|
for (const post of posts) {
|
|
const church = parsePost(post, cat.name);
|
|
if (!church) {
|
|
stats.churchesSkipped++;
|
|
continue;
|
|
}
|
|
|
|
stats.churchesParsed++;
|
|
const gdzId = String(church.wpId);
|
|
|
|
const candidate = {
|
|
name: church.name,
|
|
lat: church.latitude,
|
|
lng: church.longitude,
|
|
gottesdienstzeitenId: gdzId,
|
|
};
|
|
|
|
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
|
|
|
if (dryRun) {
|
|
if (duplicate) {
|
|
stats.churchesMatched++;
|
|
} else {
|
|
stats.churchesCreated++;
|
|
}
|
|
stats.schedulesCreated += church.schedules.length;
|
|
continue;
|
|
}
|
|
|
|
if (duplicate) {
|
|
stats.churchesMatched++;
|
|
const updateData: Record<string, unknown> = { gottesdienstzeitenId: gdzId };
|
|
|
|
if (!duplicate.address && church.address) updateData.address = church.address;
|
|
if (!duplicate.website && church.website) {
|
|
updateData.website = church.website;
|
|
updateData.hasWebsite = true;
|
|
}
|
|
if (!duplicate.phone && church.phone) updateData.phone = church.phone;
|
|
|
|
try {
|
|
await prisma.church.update({
|
|
where: { id: duplicate.id },
|
|
data: updateData,
|
|
});
|
|
} catch (error) {
|
|
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
|
stats.churchesSkipped++;
|
|
continue;
|
|
}
|
|
throw error;
|
|
}
|
|
|
|
if (church.schedules.length > 0) {
|
|
try {
|
|
await prisma.$transaction(async (tx) => {
|
|
await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } });
|
|
await tx.massSchedule.createMany({
|
|
data: church.schedules.map((s) => ({
|
|
churchId: duplicate.id,
|
|
dayOfWeek: s.dayOfWeek,
|
|
time: s.time,
|
|
language: 'German',
|
|
})),
|
|
});
|
|
await tx.church.update({
|
|
where: { id: duplicate.id },
|
|
data: { lastScrapedAt: new Date() },
|
|
});
|
|
});
|
|
stats.schedulesCreated += church.schedules.length;
|
|
} catch (error) {
|
|
stats.errors++;
|
|
console.error(` Error saving schedules for ${church.slug}: ${error instanceof Error ? error.message : error}`);
|
|
}
|
|
}
|
|
} else {
|
|
try {
|
|
const newChurch = await prisma.church.create({
|
|
data: {
|
|
name: church.name,
|
|
latitude: church.latitude,
|
|
longitude: church.longitude,
|
|
address: church.address,
|
|
zip: church.zip,
|
|
city: church.city,
|
|
country: 'DE',
|
|
diocese: church.diocese || undefined,
|
|
website: church.website,
|
|
hasWebsite: !!church.website,
|
|
email: church.email,
|
|
phone: church.phone,
|
|
gottesdienstzeitenId: gdzId,
|
|
source: 'gottesdienstzeiten',
|
|
websiteLanguage: 'de',
|
|
},
|
|
});
|
|
stats.churchesCreated++;
|
|
|
|
existingChurches.push({
|
|
id: newChurch.id,
|
|
name: church.name,
|
|
latitude: church.latitude,
|
|
longitude: church.longitude,
|
|
osmId: null,
|
|
baiduId: null,
|
|
masstimesId: null,
|
|
orarimesseId: null,
|
|
massSchedulesPhId: null,
|
|
philmassId: null,
|
|
horariosMisasId: null,
|
|
mszeInfoId: null,
|
|
weekdayMassesId: null,
|
|
messesInfoId: null,
|
|
bohosluzbyId: null,
|
|
miserendId: null,
|
|
kerknetId: null,
|
|
gottesdienstzeitenId: gdzId,
|
|
source: 'gottesdienstzeiten',
|
|
website: church.website,
|
|
phone: church.phone,
|
|
address: church.address,
|
|
});
|
|
|
|
if (church.schedules.length > 0) {
|
|
await prisma.massSchedule.createMany({
|
|
data: church.schedules.map((s) => ({
|
|
churchId: newChurch.id,
|
|
dayOfWeek: s.dayOfWeek,
|
|
time: s.time,
|
|
language: 'German',
|
|
})),
|
|
});
|
|
await prisma.church.update({
|
|
where: { id: newChurch.id },
|
|
data: { lastScrapedAt: new Date() },
|
|
});
|
|
stats.schedulesCreated += church.schedules.length;
|
|
}
|
|
} catch (error) {
|
|
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
|
stats.churchesSkipped++;
|
|
continue;
|
|
}
|
|
stats.errors++;
|
|
console.error(` Error creating ${church.slug}: ${error instanceof Error ? error.message : error}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
stats.diocesesProcessed++;
|
|
}
|
|
|
|
// ─── CLI ─────────────────────────────────────────────────────────────────────
|
|
|
|
function parseArgs(): CLIArgs {
|
|
const args = process.argv.slice(2);
|
|
const result: CLIArgs = { all: false, dryRun: false };
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
switch (args[i]) {
|
|
case '--all':
|
|
result.all = true;
|
|
break;
|
|
case '--dry-run':
|
|
result.dryRun = true;
|
|
break;
|
|
case '--resume-from':
|
|
result.resumeFrom = parseInt(args[++i]);
|
|
break;
|
|
case '--diocese':
|
|
result.diocese = parseInt(args[++i]);
|
|
break;
|
|
case '--job-id':
|
|
result.jobId = args[++i];
|
|
break;
|
|
case '--help':
|
|
case '-h':
|
|
console.log(`
|
|
Usage: npx tsx scripts/import-gottesdienstzeiten.ts [options]
|
|
|
|
Options:
|
|
--all Import all Catholic diocese categories
|
|
--diocese <catId> Import a single diocese category (e.g., 129 for Köln)
|
|
--dry-run No database writes, just report what would happen
|
|
--resume-from <n> Skip first N diocese categories
|
|
--job-id <uuid> Background job tracking ID
|
|
--help, -h Show this help message
|
|
|
|
Examples:
|
|
npx tsx scripts/import-gottesdienstzeiten.ts --diocese 129 --dry-run
|
|
npx tsx scripts/import-gottesdienstzeiten.ts --all --dry-run
|
|
npx tsx scripts/import-gottesdienstzeiten.ts --all
|
|
`);
|
|
process.exit(0);
|
|
}
|
|
}
|
|
|
|
if (!result.all && !result.diocese) {
|
|
console.error('Error: specify --all or --diocese <categoryId>');
|
|
process.exit(1);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
function formatDuration(ms: number): string {
|
|
const seconds = Math.floor(ms / 1000);
|
|
const minutes = Math.floor(seconds / 60);
|
|
const hours = Math.floor(minutes / 60);
|
|
if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
|
|
if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
|
|
return `${seconds}s`;
|
|
}
|
|
|
|
// ─── Main ────────────────────────────────────────────────────────────────────
|
|
|
|
async function main() {
|
|
const args = parseArgs();
|
|
const startTime = Date.now();
|
|
|
|
console.log('\n' + '='.repeat(70));
|
|
console.log('GOTTESDIENSTZEITEN.DE (GERMANY) IMPORTER');
|
|
console.log('='.repeat(70));
|
|
console.log(`Mode: ${args.diocese ? `Diocese category ${args.diocese}` : 'All dioceses'}`);
|
|
console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
|
|
if (args.resumeFrom) console.log(`Resume from: diocese index ${args.resumeFrom}`);
|
|
console.log(`Time: ${new Date().toISOString()}`);
|
|
console.log('='.repeat(70) + '\n');
|
|
|
|
if (args.jobId) {
|
|
try {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: args.jobId },
|
|
data: { status: 'running', startedAt: new Date() },
|
|
});
|
|
} catch { /* Job might not exist */ }
|
|
}
|
|
|
|
const stats: ImportStats = {
|
|
diocesesProcessed: 0,
|
|
postsFound: 0,
|
|
churchesParsed: 0,
|
|
churchesMatched: 0,
|
|
churchesCreated: 0,
|
|
churchesSkipped: 0,
|
|
schedulesCreated: 0,
|
|
errors: 0,
|
|
};
|
|
|
|
const existingChurches = await loadExistingGermanChurches();
|
|
|
|
let categories: DioceseCat[];
|
|
if (args.diocese) {
|
|
categories = [{ id: args.diocese, name: `Category ${args.diocese}`, count: 1000 }];
|
|
} else {
|
|
categories = await fetchDioceseCategories();
|
|
}
|
|
|
|
if (args.resumeFrom && !args.diocese) {
|
|
categories = categories.slice(args.resumeFrom);
|
|
console.log(`Resuming from diocese index ${args.resumeFrom} (${categories[0]?.name})\n`);
|
|
}
|
|
|
|
console.log(`Processing ${categories.length} diocese categories\n`);
|
|
|
|
for (let i = 0; i < categories.length; i++) {
|
|
const cat = categories[i];
|
|
const elapsed = formatDuration(Date.now() - startTime);
|
|
console.log(`[${i + 1}/${categories.length}] ${cat.name} (${cat.count} posts) [${elapsed} elapsed]`);
|
|
|
|
try {
|
|
await processDiocese(cat, existingChurches, args.dryRun, stats);
|
|
} catch (error) {
|
|
stats.errors++;
|
|
console.error(` ERROR processing ${cat.name}: ${error instanceof Error ? error.message : error}`);
|
|
}
|
|
}
|
|
|
|
const totalTime = Date.now() - startTime;
|
|
console.log('\n' + '='.repeat(70));
|
|
console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
|
|
console.log('='.repeat(70));
|
|
console.log(`Dioceses processed: ${stats.diocesesProcessed}`);
|
|
console.log(`WP posts found: ${stats.postsFound}`);
|
|
console.log(`Churches parsed: ${stats.churchesParsed}`);
|
|
console.log(` Matched (existing): ${stats.churchesMatched}`);
|
|
console.log(` Created (new): ${stats.churchesCreated}`);
|
|
console.log(` Skipped (no coords): ${stats.churchesSkipped}`);
|
|
console.log(`Schedules created: ${stats.schedulesCreated}`);
|
|
console.log(`Errors: ${stats.errors}`);
|
|
console.log(`Total time: ${formatDuration(totalTime)}`);
|
|
console.log(`HTTP requests: ${requestCount}`);
|
|
console.log('='.repeat(70) + '\n');
|
|
|
|
if (args.jobId) {
|
|
try {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: args.jobId },
|
|
data: {
|
|
status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
|
|
completedAt: new Date(),
|
|
processed: stats.churchesParsed,
|
|
succeeded: stats.churchesCreated + stats.churchesMatched,
|
|
failed: stats.errors,
|
|
itemsFound: stats.schedulesCreated,
|
|
},
|
|
});
|
|
} catch { /* Ignore */ }
|
|
}
|
|
}
|
|
|
|
main()
|
|
.catch((error) => {
|
|
console.error('Fatal error:', error);
|
|
process.exit(1);
|
|
})
|
|
.finally(async () => {
|
|
await prisma.$disconnect();
|
|
await pool.end();
|
|
});
|