feat: add discovermassId to church-matcher ExistingChurch and ChurchCandidate
Add discovermassId field to ExistingChurch interface and ChurchCandidate type, insert a dedicated matching pass in findDuplicateChurch, and update all 15 importer push blocks plus 16 loadExistingChurches select queries to include the new field. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
695
scripts/import-mass-schedules-ph.ts
Normal file
695
scripts/import-mass-schedules-ph.ts
Normal file
@@ -0,0 +1,695 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Import Catholic churches and mass schedules from mass-schedules.com (Philippines)
|
||||
*
|
||||
* mass-schedules.com has been operating since 2008 and covers ~1,500 Philippine
|
||||
* churches with weekly mass schedule tables and coordinates on separate map pages.
|
||||
*
|
||||
* Import strategy:
|
||||
* 1. Fetch sitemap XML → extract all /catholic-church/{id}/ URLs
|
||||
* 2. For each church: fetch page HTML, parse name/address/schedule, fetch map
|
||||
* page for coordinates, match against existing PH churches, upsert
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scripts/import-mass-schedules-ph.ts --all
|
||||
* npx tsx scripts/import-mass-schedules-ph.ts --all --dry-run
|
||||
* npx tsx scripts/import-mass-schedules-ph.ts --church-id 34
|
||||
* npx tsx scripts/import-mass-schedules-ph.ts --all --resume-from 500
|
||||
* npx tsx scripts/import-mass-schedules-ph.ts --all --skip-schedules
|
||||
* npx tsx scripts/import-mass-schedules-ph.ts --all --job-id {uuid}
|
||||
*/
|
||||
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||||
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||||
const pool = new Pool({
|
||||
connectionString: dbUrl,
|
||||
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
||||
});
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
import { findDuplicateChurch } from '../src/lib/church-matcher';
|
||||
import type { ExistingChurch } from '../src/lib/church-matcher';
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const SITE_BASE = 'https://www.mass-schedules.com';
|
||||
const SITEMAP_URL = `${SITE_BASE}/sitemaps/sitemap02272021.xml`;
|
||||
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
|
||||
const REQUEST_DELAY_MS = 1500;
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
interface SitemapChurch {
|
||||
id: string;
|
||||
slug: string;
|
||||
url: string;
|
||||
}
|
||||
|
||||
interface ParsedChurch {
|
||||
name: string;
|
||||
address: string | null;
|
||||
region: string | null;
|
||||
city: string | null;
|
||||
phone: string | null;
|
||||
mapUrl: string | null;
|
||||
}
|
||||
|
||||
interface ParsedSchedule {
|
||||
dayOfWeek: number; // 0=Sun, 1=Mon, ..., 6=Sat
|
||||
time: string; // "05:00", "18:30"
|
||||
}
|
||||
|
||||
interface ImportStats {
|
||||
churchesFound: number;
|
||||
churchesMatched: number;
|
||||
churchesCreated: number;
|
||||
churchesSkipped: number;
|
||||
schedulesProcessed: number;
|
||||
massSchedulesCreated: number;
|
||||
errors: number;
|
||||
}
|
||||
|
||||
interface CLIArgs {
|
||||
all: boolean;
|
||||
churchId?: string;
|
||||
dryRun: boolean;
|
||||
skipSchedules: boolean;
|
||||
resumeFrom?: number;
|
||||
jobId?: string;
|
||||
}
|
||||
|
||||
// ─── HTTP Client ─────────────────────────────────────────────────────────────
|
||||
|
||||
let requestCount = 0;
|
||||
|
||||
function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string | null> {
|
||||
if (requestCount > 0) {
|
||||
await delay(REQUEST_DELAY_MS);
|
||||
}
|
||||
requestCount++;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(` HTTP ${response.status} for ${url}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
console.error(` Fetch error for ${url}: ${error instanceof Error ? error.message : error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Sitemap Parser ──────────────────────────────────────────────────────────
|
||||
|
||||
async function fetchChurchUrlsFromSitemap(): Promise<SitemapChurch[]> {
|
||||
console.log(`Fetching sitemap: ${SITEMAP_URL}`);
|
||||
const xml = await fetchPage(SITEMAP_URL);
|
||||
if (!xml) {
|
||||
throw new Error('Failed to fetch sitemap');
|
||||
}
|
||||
|
||||
// Extract /catholic-church/{id}/{slug}.html URLs
|
||||
const urlRegex = /\/catholic-church\/(\d+)\/([\w-]+)\.html/g;
|
||||
const seen = new Set<string>();
|
||||
const churches: SitemapChurch[] = [];
|
||||
|
||||
let match;
|
||||
while ((match = urlRegex.exec(xml)) !== null) {
|
||||
const id = match[1];
|
||||
if (seen.has(id)) continue; // Sitemap has duplicates
|
||||
seen.add(id);
|
||||
churches.push({
|
||||
id,
|
||||
slug: match[2],
|
||||
url: `${SITE_BASE}/catholic-church/${id}/${match[2]}.html`,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by ID for predictable ordering
|
||||
churches.sort((a, b) => parseInt(a.id) - parseInt(b.id));
|
||||
return churches;
|
||||
}
|
||||
|
||||
// ─── HTML Parsers ────────────────────────────────────────────────────────────
|
||||
|
||||
function parseChurchPage(html: string): ParsedChurch {
|
||||
// Name from <h1 class="page_title">...</h1>
|
||||
const h1Match = html.match(/<h1[^>]*class="page_title"[^>]*>([\s\S]*?)<\/h1>/i);
|
||||
let name = h1Match ? h1Match[1].trim() : '';
|
||||
// Remove " Mass Schedule" suffix
|
||||
name = name.replace(/\s*Mass\s*Schedule\s*$/i, '').trim();
|
||||
|
||||
// Address from <label>address:</label> ... <p class="data">...</p>
|
||||
const addressMatch = html.match(/<label>address:<\/label>\s*<p class="data">([\s\S]*?)<\/p>/i);
|
||||
let address: string | null = null;
|
||||
let mapUrl: string | null = null;
|
||||
if (addressMatch) {
|
||||
// Extract map link before cleaning
|
||||
const mapLinkMatch = addressMatch[1].match(/href="(\/location-map\/[^"]+)"/);
|
||||
if (mapLinkMatch) {
|
||||
mapUrl = `${SITE_BASE}${mapLinkMatch[1]}`;
|
||||
}
|
||||
// Clean address: remove HTML tags, normalize whitespace
|
||||
address = addressMatch[1]
|
||||
.replace(/<[^>]+>/g, '')
|
||||
.replace(/\(show location map\)/i, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim() || null;
|
||||
}
|
||||
|
||||
// Phone from <label>telephone number:</label> ... <p class="data_inline" id="TELEPHONE">...</p>
|
||||
const phoneMatch = html.match(/id="TELEPHONE"[^>]*>([\s\S]*?)<\/p>/i);
|
||||
const phone = phoneMatch ? phoneMatch[1].trim() || null : null;
|
||||
|
||||
// Region and city from breadcrumbs
|
||||
// Pattern: > {Region} > {City}
|
||||
const breadcrumbMatches = [...html.matchAll(/class="normal"\s+href="[^"]*\/locations\/\d+\/[^"]*"[^>]*>([^<]+)<\/a>/gi)];
|
||||
const region = breadcrumbMatches.length > 0 ? breadcrumbMatches[0][1].trim() : null;
|
||||
|
||||
const cityMatches = [...html.matchAll(/class="normal"\s+href="[^"]*\/catholic-churches\/\d+\/[^"]*"[^>]*>([^<]+)<\/a>/gi)];
|
||||
const city = cityMatches.length > 0 ? cityMatches[0][1].trim() : null;
|
||||
|
||||
return { name, address, region, city, phone, mapUrl };
|
||||
}
|
||||
|
||||
function parseScheduleTable(html: string): ParsedSchedule[] {
|
||||
// The schedule table has 7 columns: Sun(0), Mon(1), Tue(2), Wed(3), Thu(4), Fri(5), Sat(6)
|
||||
// Each row contains <td> cells with <p class="schedule">5:00 AM - 6:00 AM</p>
|
||||
const schedules: ParsedSchedule[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
// Extract all table rows from <tbody>
|
||||
const tbodyMatch = html.match(/<tbody>([\s\S]*?)<\/tbody>/i);
|
||||
if (!tbodyMatch) return schedules;
|
||||
|
||||
const rows = tbodyMatch[1].match(/<tr>([\s\S]*?)<\/tr>/gi);
|
||||
if (!rows) return schedules;
|
||||
|
||||
for (const row of rows) {
|
||||
// Extract all <td> cells
|
||||
const cells = row.match(/<td>([\s\S]*?)<\/td>/gi);
|
||||
if (!cells) continue;
|
||||
|
||||
for (let colIndex = 0; colIndex < cells.length && colIndex < 7; colIndex++) {
|
||||
const dayOfWeek = colIndex; // 0=Sun, 1=Mon, ..., 6=Sat
|
||||
|
||||
// Extract time from <p class="schedule">5:00 AM - 6:00 AM</p>
|
||||
const timeMatch = cells[colIndex].match(/<p class="schedule">\s*(\d{1,2}:\d{2}\s*[AP]M)/i);
|
||||
if (!timeMatch) continue;
|
||||
|
||||
const time = convertTo24Hour(timeMatch[1].trim());
|
||||
if (!time) continue;
|
||||
|
||||
const key = `${dayOfWeek}:${time}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
|
||||
schedules.push({ dayOfWeek, time });
|
||||
}
|
||||
}
|
||||
|
||||
return schedules;
|
||||
}
|
||||
|
||||
function convertTo24Hour(timeStr: string): string | null {
|
||||
// "5:00 AM" → "05:00", "6:30 PM" → "18:30"
|
||||
const match = timeStr.match(/^(\d{1,2}):(\d{2})\s*(AM|PM)$/i);
|
||||
if (!match) return null;
|
||||
|
||||
let hours = parseInt(match[1]);
|
||||
const minutes = match[2];
|
||||
const period = match[3].toUpperCase();
|
||||
|
||||
if (period === 'AM' && hours === 12) hours = 0;
|
||||
if (period === 'PM' && hours !== 12) hours += 12;
|
||||
|
||||
return `${String(hours).padStart(2, '0')}:${minutes}`;
|
||||
}
|
||||
|
||||
function parseCoordinates(html: string): { lat: number; lng: number } | null {
|
||||
// Coordinates in JS: ms.ui.church.params.lat = '14.598815'
|
||||
const latMatch = html.match(/ms\.ui\.church\.params\.lat\s*=\s*'([^']+)'/);
|
||||
const lngMatch = html.match(/ms\.ui\.church\.params\.lng\s*=\s*'([^']+)'/);
|
||||
|
||||
if (!latMatch || !lngMatch) return null;
|
||||
|
||||
const lat = parseFloat(latMatch[1]);
|
||||
const lng = parseFloat(lngMatch[1]);
|
||||
|
||||
if (isNaN(lat) || isNaN(lng) || lat === 0 || lng === 0) return null;
|
||||
|
||||
return { lat, lng };
|
||||
}
|
||||
|
||||
// ─── Database Operations ─────────────────────────────────────────────────────
|
||||
|
||||
async function loadExistingPhilippineChurches(): Promise<ExistingChurch[]> {
|
||||
console.log('Loading existing Philippine churches for deduplication...');
|
||||
const churches = await prisma.church.findMany({
|
||||
where: { country: 'PH' },
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
latitude: true,
|
||||
longitude: true,
|
||||
osmId: true,
|
||||
baiduId: true,
|
||||
masstimesId: true,
|
||||
orarimesseId: true,
|
||||
massSchedulesPhId: true,
|
||||
philmassId: true,
|
||||
horariosMisasId: true,
|
||||
mszeInfoId: true,
|
||||
weekdayMassesId: true,
|
||||
messesInfoId: true,
|
||||
bohosluzbyId: true,
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
address: true,
|
||||
},
|
||||
});
|
||||
console.log(`Loaded ${churches.length} existing Philippine churches`);
|
||||
return churches;
|
||||
}
|
||||
|
||||
// ─── Import Logic ────────────────────────────────────────────────────────────
|
||||
|
||||
async function processChurch(
|
||||
sitemapEntry: SitemapChurch,
|
||||
existingChurches: ExistingChurch[],
|
||||
dryRun: boolean,
|
||||
skipSchedules: boolean,
|
||||
stats: ImportStats,
|
||||
): Promise<void> {
|
||||
stats.churchesFound++;
|
||||
|
||||
// Fetch church page
|
||||
const churchHtml = await fetchPage(sitemapEntry.url);
|
||||
if (!churchHtml) {
|
||||
stats.errors++;
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = parseChurchPage(churchHtml);
|
||||
if (!parsed.name) {
|
||||
console.log(` Skipping ${sitemapEntry.id}: no name found`);
|
||||
stats.churchesSkipped++;
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch coordinates from map page
|
||||
let coords: { lat: number; lng: number } | null = null;
|
||||
if (parsed.mapUrl) {
|
||||
const mapHtml = await fetchPage(parsed.mapUrl);
|
||||
if (mapHtml) {
|
||||
coords = parseCoordinates(mapHtml);
|
||||
}
|
||||
}
|
||||
|
||||
if (!coords) {
|
||||
console.log(` Skipping ${sitemapEntry.id} (${parsed.name}): no coordinates`);
|
||||
stats.churchesSkipped++;
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse schedule
|
||||
const schedules = skipSchedules ? [] : parseScheduleTable(churchHtml);
|
||||
|
||||
// Build candidate for dedup
|
||||
const candidate = {
|
||||
name: parsed.name,
|
||||
lat: coords.lat,
|
||||
lng: coords.lng,
|
||||
massSchedulesPhId: sitemapEntry.id,
|
||||
};
|
||||
|
||||
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
||||
|
||||
if (dryRun) {
|
||||
if (duplicate) {
|
||||
stats.churchesMatched++;
|
||||
console.log(` [MATCH] ${sitemapEntry.id}: "${parsed.name}" → existing "${duplicate.name}" (${duplicate.id})`);
|
||||
} else {
|
||||
stats.churchesCreated++;
|
||||
console.log(` [NEW] ${sitemapEntry.id}: "${parsed.name}" at ${coords.lat},${coords.lng}`);
|
||||
}
|
||||
if (schedules.length > 0) {
|
||||
stats.schedulesProcessed++;
|
||||
stats.massSchedulesCreated += schedules.length;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (duplicate) {
|
||||
// Update existing church
|
||||
stats.churchesMatched++;
|
||||
const updateData: Record<string, unknown> = {
|
||||
massSchedulesPhId: sitemapEntry.id,
|
||||
};
|
||||
|
||||
if (!duplicate.address && parsed.address) updateData.address = parsed.address;
|
||||
if (!duplicate.phone && parsed.phone) updateData.phone = parsed.phone;
|
||||
|
||||
// Fill city/state from breadcrumbs
|
||||
const dbRecord = await prisma.church.findUnique({
|
||||
where: { id: duplicate.id },
|
||||
select: { city: true, state: true },
|
||||
});
|
||||
if (dbRecord && !dbRecord.city && parsed.city) updateData.city = parsed.city;
|
||||
if (dbRecord && !dbRecord.state && parsed.region) updateData.state = parsed.region;
|
||||
|
||||
try {
|
||||
await prisma.church.update({
|
||||
where: { id: duplicate.id },
|
||||
data: updateData,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
return;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Replace mass schedules
|
||||
if (schedules.length > 0 && !skipSchedules) {
|
||||
try {
|
||||
await prisma.$transaction(async (tx) => {
|
||||
await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } });
|
||||
await tx.massSchedule.createMany({
|
||||
data: schedules.map((s) => ({
|
||||
churchId: duplicate.id,
|
||||
dayOfWeek: s.dayOfWeek,
|
||||
time: s.time,
|
||||
language: 'English',
|
||||
})),
|
||||
});
|
||||
await tx.church.update({
|
||||
where: { id: duplicate.id },
|
||||
data: { lastScrapedAt: new Date() },
|
||||
});
|
||||
});
|
||||
stats.schedulesProcessed++;
|
||||
stats.massSchedulesCreated += schedules.length;
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` Error saving schedules for ${sitemapEntry.id}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Create new church
|
||||
try {
|
||||
const newChurch = await prisma.church.create({
|
||||
data: {
|
||||
name: parsed.name,
|
||||
latitude: coords.lat,
|
||||
longitude: coords.lng,
|
||||
address: parsed.address,
|
||||
city: parsed.city || null,
|
||||
state: parsed.region || null,
|
||||
country: 'PH',
|
||||
phone: parsed.phone,
|
||||
hasWebsite: false,
|
||||
massSchedulesPhId: sitemapEntry.id,
|
||||
source: 'mass-schedules-ph',
|
||||
},
|
||||
});
|
||||
stats.churchesCreated++;
|
||||
|
||||
// Add to in-memory array for within-run dedup
|
||||
existingChurches.push({
|
||||
id: newChurch.id,
|
||||
name: parsed.name,
|
||||
latitude: coords.lat,
|
||||
longitude: coords.lng,
|
||||
osmId: null,
|
||||
baiduId: null,
|
||||
masstimesId: null,
|
||||
orarimesseId: null,
|
||||
massSchedulesPhId: sitemapEntry.id,
|
||||
philmassId: null,
|
||||
horariosMisasId: null,
|
||||
mszeInfoId: null,
|
||||
weekdayMassesId: null,
|
||||
messesInfoId: null,
|
||||
bohosluzbyId: null,
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'mass-schedules-ph',
|
||||
website: null,
|
||||
phone: parsed.phone,
|
||||
address: parsed.address,
|
||||
});
|
||||
|
||||
// Create mass schedules
|
||||
if (schedules.length > 0 && !skipSchedules) {
|
||||
await prisma.massSchedule.createMany({
|
||||
data: schedules.map((s) => ({
|
||||
churchId: newChurch.id,
|
||||
dayOfWeek: s.dayOfWeek,
|
||||
time: s.time,
|
||||
language: 'English',
|
||||
})),
|
||||
});
|
||||
await prisma.church.update({
|
||||
where: { id: newChurch.id },
|
||||
data: { lastScrapedAt: new Date() },
|
||||
});
|
||||
stats.schedulesProcessed++;
|
||||
stats.massSchedulesCreated += schedules.length;
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
return;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── CLI ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseArgs(): CLIArgs {
|
||||
const args = process.argv.slice(2);
|
||||
const result: CLIArgs = {
|
||||
all: false,
|
||||
dryRun: false,
|
||||
skipSchedules: false,
|
||||
};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
switch (args[i]) {
|
||||
case '--all':
|
||||
result.all = true;
|
||||
break;
|
||||
case '--church-id':
|
||||
result.churchId = args[++i];
|
||||
break;
|
||||
case '--dry-run':
|
||||
result.dryRun = true;
|
||||
break;
|
||||
case '--skip-schedules':
|
||||
result.skipSchedules = true;
|
||||
break;
|
||||
case '--resume-from':
|
||||
result.resumeFrom = parseInt(args[++i]);
|
||||
break;
|
||||
case '--job-id':
|
||||
result.jobId = args[++i];
|
||||
break;
|
||||
case '--help':
|
||||
case '-h':
|
||||
console.log(`
|
||||
Usage: npx tsx scripts/import-mass-schedules-ph.ts [options]
|
||||
|
||||
Options:
|
||||
--all Import all churches from sitemap
|
||||
--church-id <id> Import a single church by ID (e.g. "34")
|
||||
--dry-run No database writes, just report what would happen
|
||||
--skip-schedules Skip mass schedule import (churches only)
|
||||
--resume-from <id> Skip churches with ID less than this value
|
||||
--job-id <uuid> Background job tracking ID
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
npx tsx scripts/import-mass-schedules-ph.ts --church-id 34 --dry-run
|
||||
npx tsx scripts/import-mass-schedules-ph.ts --all
|
||||
npx tsx scripts/import-mass-schedules-ph.ts --all --skip-schedules
|
||||
npx tsx scripts/import-mass-schedules-ph.ts --all --resume-from 500
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.all && !result.churchId) {
|
||||
console.error('Error: specify --all or --church-id <id>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const hours = Math.floor(minutes / 60);
|
||||
if (hours > 0) return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
|
||||
if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
// ─── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs();
|
||||
const startTime = Date.now();
|
||||
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('MASS-SCHEDULES.COM (PHILIPPINES) IMPORTER');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Mode: ${args.all ? 'All churches from sitemap' : `Single church: ${args.churchId}`}`);
|
||||
console.log(`Dry run: ${args.dryRun ? 'YES (no DB writes)' : 'NO'}`);
|
||||
console.log(`Skip schedules: ${args.skipSchedules ? 'YES' : 'NO'}`);
|
||||
if (args.resumeFrom) console.log(`Resume from ID: ${args.resumeFrom}`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
|
||||
// Update background job status if provided
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: { status: 'running', startedAt: new Date() },
|
||||
});
|
||||
} catch {
|
||||
// Job might not exist yet
|
||||
}
|
||||
}
|
||||
|
||||
// Load existing Philippine churches for dedup
|
||||
const existingChurches = await loadExistingPhilippineChurches();
|
||||
|
||||
// Build church list: skip sitemap for single-church mode
|
||||
let churchesToProcess: SitemapChurch[];
|
||||
if (args.churchId) {
|
||||
// Single church: construct URL directly, no sitemap needed
|
||||
churchesToProcess = [{
|
||||
id: args.churchId,
|
||||
slug: 'church',
|
||||
url: `${SITE_BASE}/catholic-church/${args.churchId}/church.html`,
|
||||
}];
|
||||
console.log(`Single church mode: ID ${args.churchId}\n`);
|
||||
} else {
|
||||
// Full mode: fetch sitemap
|
||||
const allChurches = await fetchChurchUrlsFromSitemap();
|
||||
console.log(`Found ${allChurches.length} unique church URLs in sitemap\n`);
|
||||
churchesToProcess = allChurches;
|
||||
}
|
||||
|
||||
// Handle --resume-from
|
||||
if (args.resumeFrom) {
|
||||
const before = churchesToProcess.length;
|
||||
churchesToProcess = churchesToProcess.filter((c) => parseInt(c.id) >= args.resumeFrom!);
|
||||
console.log(`Resuming from ID ${args.resumeFrom} (skipping ${before - churchesToProcess.length} churches)\n`);
|
||||
}
|
||||
|
||||
const stats: ImportStats = {
|
||||
churchesFound: 0,
|
||||
churchesMatched: 0,
|
||||
churchesCreated: 0,
|
||||
churchesSkipped: 0,
|
||||
schedulesProcessed: 0,
|
||||
massSchedulesCreated: 0,
|
||||
errors: 0,
|
||||
};
|
||||
|
||||
// Process each church
|
||||
for (let i = 0; i < churchesToProcess.length; i++) {
|
||||
const church = churchesToProcess[i];
|
||||
const elapsed = formatDuration(Date.now() - startTime);
|
||||
console.log(`[${i + 1}/${churchesToProcess.length}] Church ID ${church.id} [${elapsed} elapsed]`);
|
||||
|
||||
try {
|
||||
await processChurch(church, existingChurches, args.dryRun, args.skipSchedules, stats);
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` ERROR processing church ${church.id}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Print summary
|
||||
const totalTime = Date.now() - startTime;
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log(`IMPORT SUMMARY ${args.dryRun ? '(DRY RUN)' : ''}`);
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Churches found: ${stats.churchesFound}`);
|
||||
console.log(` Matched (existing): ${stats.churchesMatched}`);
|
||||
console.log(` Created (new): ${stats.churchesCreated}`);
|
||||
console.log(` Skipped: ${stats.churchesSkipped}`);
|
||||
console.log(`Schedules processed: ${stats.schedulesProcessed}`);
|
||||
console.log(`Mass schedules created: ${stats.massSchedulesCreated}`);
|
||||
console.log(`Errors: ${stats.errors}`);
|
||||
console.log(`Total time: ${formatDuration(totalTime)}`);
|
||||
console.log(`HTTP requests: ${requestCount}`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
|
||||
// Update background job
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: {
|
||||
status: stats.errors > 0 ? 'completed_with_errors' : 'completed',
|
||||
completedAt: new Date(),
|
||||
result: JSON.stringify(stats),
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
})
|
||||
.finally(async () => {
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
});
|
||||
Reference in New Issue
Block a user