diff --git a/scripts/import-buscarmisas-network.ts b/scripts/import-buscarmisas-network.ts index 8b4f5d9..5cd889d 100644 --- a/scripts/import-buscarmisas-network.ts +++ b/scripts/import-buscarmisas-network.ts @@ -30,6 +30,12 @@ import { findDuplicateChurch } from '../src/lib/church-matcher'; import type { ExistingChurch } from '../src/lib/church-matcher'; import { getDayNamesForCountry, buildDayPatterns } from '../src/scrapers/i18n/day-names'; +const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass'; +console.log(`Connecting to: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`); +const pool = new Pool({ connectionString: dbUrl, ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined }); +const adapter = new PrismaPg(pool); +const prisma = new PrismaClient({ adapter }); + // ─── Site Config ───────────────────────────────────────────────────────────── interface SiteConfig { @@ -218,6 +224,137 @@ function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } +// ─── DB Helpers ─────────────────────────────────────────────────────────────── + +async function loadExistingChurches(country: string): Promise { + console.log(`Loading existing ${country} churches from DB...`); + const churches = await prisma.church.findMany({ + where: { country }, + select: { + id: true, name: true, latitude: true, longitude: true, + osmId: true, baiduId: true, masstimesId: true, + orarimesseId: true, massSchedulesPhId: true, philmassId: true, + horariosMisasId: true, mszeInfoId: true, weekdayMassesId: true, + messesInfoId: true, bohosluzbyId: true, miserendId: true, + kerknetId: true, gottesdienstzeitenId: true, discovermassId: true, + buscarmisasNetworkId: true, + source: true, website: true, phone: true, address: true, country: true, + }, + }); + console.log(` Loaded ${churches.length} existing ${country} churches`); + return churches as ExistingChurch[]; +} + +// ─── Church Processing ──────────────────────────────────────────────────────── + +async function processChurch( + url: string, + domain: string, + config: SiteConfig, + existingChurches: ExistingChurch[], + args: CLIArgs, + stats: ImportStats, +): Promise { + stats.total++; + try { + const html = await fetchWithRetry(url); + const parsed = parseChurchPage(html, domain, url, config); + if (!parsed) { + console.log(` [skip] No name/coords: ${url}`); + stats.skipped++; + return; + } + + const masses = parseMassSchedule(html, config.country); + + if (args.dryRun) { + console.log(` [dry-run] ${parsed.name} — ${masses.length} masses`); + return; + } + + const candidate = { + name: parsed.name, + lat: parsed.lat, + lng: parsed.lng, + buscarmisasNetworkId: parsed.externalId, + }; + const duplicate = findDuplicateChurch(candidate, existingChurches); + + if (duplicate) { + const updateData: Record = { buscarmisasNetworkId: parsed.externalId }; + if (!duplicate.phone && parsed.phone) updateData.phone = parsed.phone; + if (parsed.lat !== 0 && duplicate.latitude === 0) { + updateData.latitude = parsed.lat; + updateData.longitude = parsed.lng; + } + + await prisma.$transaction(async (tx) => { + await tx.church.update({ where: { id: duplicate.id }, data: updateData }); + if (masses.length > 0) { + await tx.massSchedule.deleteMany({ where: { churchId: duplicate.id } }); + await tx.massSchedule.createMany({ + data: masses.map(m => ({ churchId: duplicate.id, dayOfWeek: m.dayOfWeek, time: m.time, language: config.language === 'pt' ? 'Portuguese' : 'Spanish', notes: null })), + }); + } + await tx.church.update({ where: { id: duplicate.id }, data: { lastScrapedAt: new Date() } }); + }); + duplicate.buscarmisasNetworkId = parsed.externalId; + stats.updated++; + } else { + const church = await prisma.church.create({ + data: { + name: parsed.name, + address: parsed.address, + city: parsed.city, + state: parsed.state, + country: parsed.country, + phone: parsed.phone, + latitude: parsed.lat, + longitude: parsed.lng, + buscarmisasNetworkId: parsed.externalId, + source: 'buscarmisas-network', + hasWebsite: false, + }, + }); + + existingChurches.push({ + id: church.id, name: parsed.name, latitude: parsed.lat, longitude: parsed.lng, + osmId: null, baiduId: null, masstimesId: null, orarimesseId: null, + massSchedulesPhId: null, philmassId: null, horariosMisasId: null, + mszeInfoId: null, weekdayMassesId: null, messesInfoId: null, + bohosluzbyId: null, miserendId: null, kerknetId: null, + gottesdienstzeitenId: null, discovermassId: null, + buscarmisasNetworkId: parsed.externalId, + source: 'buscarmisas-network', website: null, phone: parsed.phone, + address: parsed.address, country: parsed.country, + }); + + if (masses.length > 0) { + await prisma.massSchedule.createMany({ + data: masses.map(m => ({ + churchId: church.id, + dayOfWeek: m.dayOfWeek, + time: m.time, + language: config.language === 'pt' ? 'Portuguese' : 'Spanish', + notes: null, + })), + }); + await prisma.church.update({ where: { id: church.id }, data: { lastScrapedAt: new Date() } }); + } + stats.created++; + } + + stats.massSchedulesCreated += masses.length; + console.log( + ` [${duplicate ? 'update' : 'create'}] ${parsed.name} — ${masses.length} masses — ` + + `${stats.total} total (${stats.created}↑ ${stats.updated}↻ ${stats.errors}✗)` + ); + } catch (err) { + stats.errors++; + console.error(` [error] ${url}: ${err instanceof Error ? err.message : err}`); + } +} + // ─── Sitemap Discovery ──────────────────────────────────────────────────────── /**