feat: add discovermassId to church-matcher ExistingChurch and ChurchCandidate
Add discovermassId field to ExistingChurch interface and ChurchCandidate type, insert a dedicated matching pass in findDuplicateChurch, and update all 15 importer push blocks plus 16 loadExistingChurches select queries to include the new field. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
686
scripts/import-gottesdienstzeiten.ts
Normal file
686
scripts/import-gottesdienstzeiten.ts
Normal file
@@ -0,0 +1,686 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Import Catholic churches and mass schedules from gottesdienstzeiten.de (Germany)
|
||||
*
|
||||
* gottesdienstzeiten.de is a German worship service directory with ~6,878 Catholic
|
||||
* churches. It runs on WordPress with a fully open REST API at /wp-json/wp/v2/posts.
|
||||
*
|
||||
* Data includes: church name, address, coordinates (Google Maps embed), diocese,
|
||||
* mass schedules (day/type/time table), website, email, phone.
|
||||
*
|
||||
* Import strategy:
|
||||
* 1. Fetch all Catholic diocese category IDs from WP API
|
||||
* 2. Paginate through posts per category (100 per page)
|
||||
* 3. Parse HTML content for coordinates, address, schedule table, info table
|
||||
* 4. Match against existing German churches via church-matcher
|
||||
* 5. Upsert churches and mass schedules
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scripts/import-gottesdienstzeiten.ts --all --dry-run
|
||||
* npx tsx scripts/import-gottesdienstzeiten.ts --all
|
||||
* npx tsx scripts/import-gottesdienstzeiten.ts --diocese 129 --dry-run # Köln only
|
||||
* npx tsx scripts/import-gottesdienstzeiten.ts --all --resume-from 5
|
||||
*/
|
||||
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||||
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||||
const pool = new Pool({
|
||||
connectionString: dbUrl,
|
||||
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
||||
});
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
import { findDuplicateChurch } from '../src/lib/church-matcher';
|
||||
import type { ExistingChurch } from '../src/lib/church-matcher';
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const API_BASE = 'https://gottesdienstzeiten.de/wp-json/wp/v2';
|
||||
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
|
||||
const REQUEST_DELAY_MS = 1000;
|
||||
const RETRY_DELAY_MS = 5000;
|
||||
const MAX_RETRIES = 3;
|
||||
const POSTS_PER_PAGE = 100;
|
||||
const CATHOLIC_PARENT_CATEGORY = 4;
|
||||
|
||||
// German day names → dayOfWeek (0=Sun, 1=Mon, ..., 6=Sat)
|
||||
const GERMAN_DAYS: Record<string, number> = {
|
||||
'sonntags': 0, 'montags': 1, 'dienstags': 2, 'mittwochs': 3,
|
||||
'donnerstags': 4, 'freitags': 5, 'samstags': 6,
|
||||
// Without -s suffix (some entries use these)
|
||||
'sonntag': 0, 'montag': 1, 'dienstag': 2, 'mittwoch': 3,
|
||||
'donnerstag': 4, 'freitag': 5, 'samstag': 6,
|
||||
};
|
||||
|
||||
// Mass-related types (filter out non-mass services)
|
||||
const MASS_TYPES = new Set([
|
||||
'messfeier', 'vorabendmesse', 'heilige messe', 'hl. messe',
|
||||
'hochamt', 'festmesse', 'familienmesse', 'kindergottesdienst',
|
||||
'jugendmesse', 'abendmesse', 'frühmesse', 'werktagsmesse',
|
||||
'sonntagsmesse', 'messe', 'eucharistiefeier',
|
||||
]);
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
interface DioceseCat {
|
||||
id: number;
|
||||
name: string;
|
||||
count: number;
|
||||
}
|
||||
|
||||
interface ParsedChurch {
|
||||
wpId: number;
|
||||
slug: string;
|
||||
name: string;
|
||||
latitude: number;
|
||||
longitude: number;
|
||||
address: string | null;
|
||||
zip: string | null;
|
||||
city: string | null;
|
||||
diocese: string | null;
|
||||
website: string | null;
|
||||
email: string | null;
|
||||
phone: string | null;
|
||||
schedules: ParsedSchedule[];
|
||||
}
|
||||
|
||||
interface ParsedSchedule {
|
||||
dayOfWeek: number;
|
||||
time: string;
|
||||
}
|
||||
|
||||
interface ImportStats {
|
||||
diocesesProcessed: number;
|
||||
postsFound: number;
|
||||
churchesParsed: number;
|
||||
churchesMatched: number;
|
||||
churchesCreated: number;
|
||||
churchesSkipped: number;
|
||||
schedulesCreated: number;
|
||||
errors: number;
|
||||
}
|
||||
|
||||
interface CLIArgs {
|
||||
all: boolean;
|
||||
dryRun: boolean;
|
||||
resumeFrom?: number;
|
||||
diocese?: number;
|
||||
jobId?: string;
|
||||
}
|
||||
|
||||
// ─── HTTP Helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
let requestCount = 0;
|
||||
|
||||
function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function fetchJson(url: string): Promise<any | null> {
|
||||
if (requestCount > 0) {
|
||||
await delay(REQUEST_DELAY_MS);
|
||||
}
|
||||
requestCount++;
|
||||
|
||||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
headers: { 'User-Agent': USER_AGENT },
|
||||
});
|
||||
|
||||
if (response.status === 429 || response.status === 503) {
|
||||
if (attempt < MAX_RETRIES) {
|
||||
console.log(` HTTP ${response.status} — retrying in ${RETRY_DELAY_MS / 1000}s`);
|
||||
await delay(RETRY_DELAY_MS);
|
||||
continue;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.ok) return null;
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
if (attempt < MAX_RETRIES) {
|
||||
await delay(RETRY_DELAY_MS);
|
||||
continue;
|
||||
}
|
||||
console.error(` Fetch error: ${error instanceof Error ? error.message : error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── Parsing ─────────────────────────────────────────────────────────────────
|
||||
|
||||
function stripHtml(html: string): string {
|
||||
return html.replace(/<[^>]+>/g, '').trim();
|
||||
}
|
||||
|
||||
function parsePost(post: any, dioceseName: string | null): ParsedChurch | null {
|
||||
const content: string = post.content?.rendered || '';
|
||||
const wpId: number = post.id;
|
||||
const slug: string = post.slug;
|
||||
|
||||
// Extract name from title — format: "(City) Church Name"
|
||||
let name = stripHtml(post.title?.rendered || '');
|
||||
// Remove leading "(City)" prefix for cleaner name
|
||||
const nameMatch = name.match(/^\([^)]+\)\s*(.+)$/);
|
||||
if (nameMatch) name = nameMatch[1];
|
||||
|
||||
// Extract coordinates from Google Maps embed
|
||||
const coordMatch = content.match(/maps\?q=([-\d.]+),([-\d.]+)/);
|
||||
if (!coordMatch) return null;
|
||||
|
||||
const latitude = parseFloat(coordMatch[1]);
|
||||
const longitude = parseFloat(coordMatch[2]);
|
||||
if (isNaN(latitude) || isNaN(longitude) || (latitude === 0 && longitude === 0)) return null;
|
||||
|
||||
// Extract address from first <strong> tag (format: "Street, ZIP City")
|
||||
const addrMatch = content.match(/<strong>([^<]+)<\/strong>/);
|
||||
let address: string | null = null;
|
||||
let zip: string | null = null;
|
||||
let city: string | null = null;
|
||||
|
||||
if (addrMatch) {
|
||||
const fullAddr = addrMatch[1].trim();
|
||||
address = fullAddr;
|
||||
|
||||
// Parse "Street, ZIP City" format
|
||||
const zipCityMatch = fullAddr.match(/,\s*(\d{5})\s+(.+)$/);
|
||||
if (zipCityMatch) {
|
||||
zip = zipCityMatch[1];
|
||||
city = zipCityMatch[2];
|
||||
address = fullAddr.replace(/,\s*\d{5}\s+.+$/, '').trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Parse info table (second table) for website, email, phone
|
||||
let website: string | null = null;
|
||||
let email: string | null = null;
|
||||
let phone: string | null = null;
|
||||
|
||||
const tables = content.match(/<table[^>]*>([\s\S]*?)<\/table>/g) || [];
|
||||
if (tables.length >= 2) {
|
||||
const infoTable = tables[1];
|
||||
// Website
|
||||
const websiteMatch = infoTable.match(/Website[\s\S]*?<a[^>]*href="([^"]+)"/);
|
||||
if (websiteMatch) website = websiteMatch[1];
|
||||
// Email
|
||||
const emailMatch = infoTable.match(/E-Mail[\s\S]*?<td[^>]*>([\s\S]*?)<\/td>/);
|
||||
if (emailMatch) {
|
||||
const emailText = stripHtml(emailMatch[1]);
|
||||
if (emailText.includes('@')) email = emailText;
|
||||
}
|
||||
// Phone
|
||||
const phoneMatch = infoTable.match(/Telefon[\s\S]*?<td[^>]*>([\s\S]*?)<\/td>/);
|
||||
if (phoneMatch) {
|
||||
const phoneText = stripHtml(phoneMatch[1]);
|
||||
if (phoneText.length > 3) phone = phoneText;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse schedule table (first table)
|
||||
const schedules: ParsedSchedule[] = [];
|
||||
if (tables.length >= 1) {
|
||||
const schedTable = tables[0];
|
||||
const rows = schedTable.match(/<tr[^>]*>([\s\S]*?)<\/tr>/g) || [];
|
||||
|
||||
let currentDay = -1;
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const row of rows) {
|
||||
// Check for day header (in <th> with <em>)
|
||||
const dayMatch = row.match(/<th[^>]*>[\s\S]*?<em>([^<]*)<\/em>/);
|
||||
if (dayMatch && dayMatch[1].trim()) {
|
||||
const dayName = dayMatch[1].trim().toLowerCase();
|
||||
if (GERMAN_DAYS[dayName] !== undefined) {
|
||||
currentDay = GERMAN_DAYS[dayName];
|
||||
}
|
||||
}
|
||||
|
||||
// Get type and time from <td><em>...</em></td>
|
||||
const cells = row.match(/<td[^>]*>[\s\S]*?<em>([^<]*)<\/em>[\s\S]*?<\/td>/g);
|
||||
if (!cells || cells.length < 2 || currentDay < 0) continue;
|
||||
|
||||
const typeMatch = cells[0].match(/<em>([^<]*)<\/em>/);
|
||||
const timeMatch = cells[1].match(/<em>([^<]*)<\/em>/);
|
||||
if (!typeMatch || !timeMatch) continue;
|
||||
|
||||
const massType = typeMatch[1].trim().toLowerCase();
|
||||
const timeStr = timeMatch[1].trim();
|
||||
|
||||
// Only include mass-related types
|
||||
const isMass = MASS_TYPES.has(massType) ||
|
||||
massType.includes('messe') || massType.includes('messfeier') ||
|
||||
massType.includes('eucharistie');
|
||||
if (!isMass) continue;
|
||||
|
||||
// Parse time: "09.00 Uhr" or "18:30 Uhr" → "09:00" or "18:30"
|
||||
const parsedTime = timeStr
|
||||
.replace(/\s*Uhr\s*/i, '')
|
||||
.replace('.', ':')
|
||||
.trim();
|
||||
const timeValidation = parsedTime.match(/^(\d{1,2}):(\d{2})$/);
|
||||
if (!timeValidation) continue;
|
||||
const normalizedTime = `${timeValidation[1].padStart(2, '0')}:${timeValidation[2]}`;
|
||||
|
||||
const key = `${currentDay}:${normalizedTime}`;
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
schedules.push({ dayOfWeek: currentDay, time: normalizedTime });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
wpId, slug, name, latitude, longitude,
|
||||
address, zip, city, diocese: dioceseName,
|
||||
website, email, phone, schedules,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Database Operations ─────────────────────────────────────────────────────
|
||||
|
||||
async function loadExistingGermanChurches(): Promise<ExistingChurch[]> {
|
||||
console.log('Loading existing German churches for deduplication...');
|
||||
const churches = await prisma.church.findMany({
|
||||
where: { country: 'DE' },
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
latitude: true,
|
||||
longitude: true,
|
||||
osmId: true,
|
||||
baiduId: true,
|
||||
masstimesId: true,
|
||||
orarimesseId: true,
|
||||
massSchedulesPhId: true,
|
||||
philmassId: true,
|
||||
horariosMisasId: true,
|
||||
mszeInfoId: true,
|
||||
weekdayMassesId: true,
|
||||
messesInfoId: true,
|
||||
bohosluzbyId: true,
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
address: true,
|
||||
},
|
||||
});
|
||||
console.log(`Loaded ${churches.length} existing German churches`);
|
||||
return churches;
|
||||
}
|
||||
|
||||
// ─── Import Logic ────────────────────────────────────────────────────────────
|
||||
|
||||
async function fetchDioceseCategories(): Promise<DioceseCat[]> {
|
||||
console.log('Fetching Catholic diocese categories...');
|
||||
const data = await fetchJson(
|
||||
`${API_BASE}/categories?per_page=100&parent=${CATHOLIC_PARENT_CATEGORY}`
|
||||
);
|
||||
if (!data) {
|
||||
console.error('Failed to fetch categories');
|
||||
return [];
|
||||
}
|
||||
const cats: DioceseCat[] = data.map((c: any) => ({
|
||||
id: c.id, name: c.name, count: c.count,
|
||||
}));
|
||||
const total = cats.reduce((s, c) => s + c.count, 0);
|
||||
console.log(`Found ${cats.length} diocese categories with ${total} total posts\n`);
|
||||
return cats.sort((a, b) => b.count - a.count);
|
||||
}
|
||||
|
||||
async function processDiocese(
|
||||
cat: DioceseCat,
|
||||
existingChurches: ExistingChurch[],
|
||||
dryRun: boolean,
|
||||
stats: ImportStats,
|
||||
): Promise<void> {
|
||||
const totalPages = Math.ceil(cat.count / POSTS_PER_PAGE);
|
||||
|
||||
for (let page = 1; page <= totalPages; page++) {
|
||||
const url = `${API_BASE}/posts?categories=${cat.id}&per_page=${POSTS_PER_PAGE}&page=${page}`;
|
||||
const posts = await fetchJson(url);
|
||||
if (!posts || !Array.isArray(posts) || posts.length === 0) break;
|
||||
|
||||
stats.postsFound += posts.length;
|
||||
|
||||
for (const post of posts) {
|
||||
const church = parsePost(post, cat.name);
|
||||
if (!church) {
|
||||
stats.churchesSkipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
stats.churchesParsed++;
|
||||
const gdzId = String(church.wpId);
|
||||
|
||||
const candidate = {
|
||||
name: church.name,
|
||||
lat: church.latitude,
|
||||
lng: church.longitude,
|
||||
gottesdienstzeitenId: gdzId,
|
||||
};
|
||||
|
||||
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
||||
|
||||
if (dryRun) {
|
||||
if (duplicate) {
|
||||
stats.churchesMatched++;
|
||||
} else {
|
||||
stats.churchesCreated++;
|
||||
}
|
||||
stats.schedulesCreated += church.schedules.length;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (duplicate) {
|
||||
stats.churchesMatched++;
|
||||
const updateData: Record<string, unknown> = { gottesdienstzeitenId: gdzId };
|
||||
|
||||
if (!duplicate.address && church.address) updateData.address = church.address;
|
||||
if (!duplicate.website && church.website) {
|
||||
updateData.website = church.website;
|
||||
updateData.hasWebsite = true;
|
||||
}
|
||||
if (!duplicate.phone && church.phone) updateData.phone = church.phone;
|
||||
|
||||
try {
|
||||
await prisma.church.update({
|
||||
where: { id: duplicate.id },
|
||||
data: updateData,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
continue;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (church.schedules.length > 0) {
|
||||
try {
|
||||
await prisma.$transaction(async (tx) => {
|
||||
await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } });
|
||||
await tx.massSchedule.createMany({
|
||||
data: church.schedules.map((s) => ({
|
||||
churchId: duplicate.id,
|
||||
dayOfWeek: s.dayOfWeek,
|
||||
time: s.time,
|
||||
language: 'German',
|
||||
})),
|
||||
});
|
||||
await tx.church.update({
|
||||
where: { id: duplicate.id },
|
||||
data: { lastScrapedAt: new Date() },
|
||||
});
|
||||
});
|
||||
stats.schedulesCreated += church.schedules.length;
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` Error saving schedules for ${church.slug}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
const newChurch = await prisma.church.create({
|
||||
data: {
|
||||
name: church.name,
|
||||
latitude: church.latitude,
|
||||
longitude: church.longitude,
|
||||
address: church.address,
|
||||
zip: church.zip,
|
||||
city: church.city,
|
||||
country: 'DE',
|
||||
diocese: church.diocese || undefined,
|
||||
website: church.website,
|
||||
hasWebsite: !!church.website,
|
||||
email: church.email,
|
||||
phone: church.phone,
|
||||
gottesdienstzeitenId: gdzId,
|
||||
source: 'gottesdienstzeiten',
|
||||
websiteLanguage: 'de',
|
||||
},
|
||||
});
|
||||
stats.churchesCreated++;
|
||||
|
||||
existingChurches.push({
|
||||
id: newChurch.id,
|
||||
name: church.name,
|
||||
latitude: church.latitude,
|
||||
longitude: church.longitude,
|
||||
osmId: null,
|
||||
baiduId: null,
|
||||
masstimesId: null,
|
||||
orarimesseId: null,
|
||||
massSchedulesPhId: null,
|
||||
philmassId: null,
|
||||
horariosMisasId: null,
|
||||
mszeInfoId: null,
|
||||
weekdayMassesId: null,
|
||||
messesInfoId: null,
|
||||
bohosluzbyId: null,
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: gdzId,
|
||||
discovermassId: null,
|
||||
source: 'gottesdienstzeiten',
|
||||
website: church.website,
|
||||
phone: church.phone,
|
||||
address: church.address,
|
||||
});
|
||||
|
||||
if (church.schedules.length > 0) {
|
||||
await prisma.massSchedule.createMany({
|
||||
data: church.schedules.map((s) => ({
|
||||
churchId: newChurch.id,
|
||||
dayOfWeek: s.dayOfWeek,
|
||||
time: s.time,
|
||||
language: 'German',
|
||||
})),
|
||||
});
|
||||
await prisma.church.update({
|
||||
where: { id: newChurch.id },
|
||||
data: { lastScrapedAt: new Date() },
|
||||
});
|
||||
stats.schedulesCreated += church.schedules.length;
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
continue;
|
||||
}
|
||||
stats.errors++;
|
||||
console.error(` Error creating ${church.slug}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats.diocesesProcessed++;
|
||||
}
|
||||
|
||||
// ─── CLI ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseArgs(): CLIArgs {
|
||||
const args = process.argv.slice(2);
|
||||
const result: CLIArgs = { all: false, dryRun: false };
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
switch (args[i]) {
|
||||
case '--all':
|
||||
result.all = true;
|
||||
break;
|
||||
case '--dry-run':
|
||||
result.dryRun = true;
|
||||
break;
|
||||
case '--resume-from':
|
||||
result.resumeFrom = parseInt(args[++i]);
|
||||
break;
|
||||
case '--diocese':
|
||||
result.diocese = parseInt(args[++i]);
|
||||
break;
|
||||
case '--job-id':
|
||||
result.jobId = args[++i];
|
||||
break;
|
||||
case '--help':
|
||||
case '-h':
|
||||
console.log(`
|
||||
Usage: npx tsx scripts/import-gottesdienstzeiten.ts [options]
|
||||
|
||||
Options:
|
||||
--all Import all Catholic diocese categories
|
||||
--diocese <catId> Import a single diocese category (e.g., 129 for Köln)
|
||||
--dry-run No database writes, just report what would happen
|
||||
--resume-from <n> Skip first N diocese categories
|
||||
--job-id <uuid> Background job tracking ID
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
npx tsx scripts/import-gottesdienstzeiten.ts --diocese 129 --dry-run
|
||||
npx tsx scripts/import-gottesdienstzeiten.ts --all --dry-run
|
||||
npx tsx scripts/import-gottesdienstzeiten.ts --all
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.all && !result.diocese) {
|
||||
console.error('Error: specify --all or --diocese <categoryId>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const hours = Math.floor(minutes / 60);
|
||||
if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
|
||||
if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
// ─── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs();
|
||||
const startTime = Date.now();
|
||||
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('GOTTESDIENSTZEITEN.DE (GERMANY) IMPORTER');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Mode: ${args.diocese ? `Diocese category ${args.diocese}` : 'All dioceses'}`);
|
||||
console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
|
||||
if (args.resumeFrom) console.log(`Resume from: diocese index ${args.resumeFrom}`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: { status: 'running', startedAt: new Date() },
|
||||
});
|
||||
} catch { /* Job might not exist */ }
|
||||
}
|
||||
|
||||
const stats: ImportStats = {
|
||||
diocesesProcessed: 0,
|
||||
postsFound: 0,
|
||||
churchesParsed: 0,
|
||||
churchesMatched: 0,
|
||||
churchesCreated: 0,
|
||||
churchesSkipped: 0,
|
||||
schedulesCreated: 0,
|
||||
errors: 0,
|
||||
};
|
||||
|
||||
const existingChurches = await loadExistingGermanChurches();
|
||||
|
||||
let categories: DioceseCat[];
|
||||
if (args.diocese) {
|
||||
categories = [{ id: args.diocese, name: `Category ${args.diocese}`, count: 1000 }];
|
||||
} else {
|
||||
categories = await fetchDioceseCategories();
|
||||
}
|
||||
|
||||
if (args.resumeFrom && !args.diocese) {
|
||||
categories = categories.slice(args.resumeFrom);
|
||||
console.log(`Resuming from diocese index ${args.resumeFrom} (${categories[0]?.name})\n`);
|
||||
}
|
||||
|
||||
console.log(`Processing ${categories.length} diocese categories\n`);
|
||||
|
||||
for (let i = 0; i < categories.length; i++) {
|
||||
const cat = categories[i];
|
||||
const elapsed = formatDuration(Date.now() - startTime);
|
||||
console.log(`[${i + 1}/${categories.length}] ${cat.name} (${cat.count} posts) [${elapsed} elapsed]`);
|
||||
|
||||
try {
|
||||
await processDiocese(cat, existingChurches, args.dryRun, stats);
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` ERROR processing ${cat.name}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
|
||||
const totalTime = Date.now() - startTime;
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Dioceses processed: ${stats.diocesesProcessed}`);
|
||||
console.log(`WP posts found: ${stats.postsFound}`);
|
||||
console.log(`Churches parsed: ${stats.churchesParsed}`);
|
||||
console.log(` Matched (existing): ${stats.churchesMatched}`);
|
||||
console.log(` Created (new): ${stats.churchesCreated}`);
|
||||
console.log(` Skipped (no coords): ${stats.churchesSkipped}`);
|
||||
console.log(`Schedules created: ${stats.schedulesCreated}`);
|
||||
console.log(`Errors: ${stats.errors}`);
|
||||
console.log(`Total time: ${formatDuration(totalTime)}`);
|
||||
console.log(`HTTP requests: ${requestCount}`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: {
|
||||
status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
|
||||
completedAt: new Date(),
|
||||
processed: stats.churchesParsed,
|
||||
succeeded: stats.churchesCreated + stats.churchesMatched,
|
||||
failed: stats.errors,
|
||||
itemsFound: stats.schedulesCreated,
|
||||
},
|
||||
});
|
||||
} catch { /* Ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
})
|
||||
.finally(async () => {
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
});
|
||||
Reference in New Issue
Block a user