chore: sync with Gitea master and restore local-only files

Reset local main to gitea/master (new source of truth) and restored
local-only files: web scrapers, admin dashboard, ChromaDB integration,
debug scripts, and utility libraries that aren't tracked in Gitea.

Gitea master adds: discovermass, buscarmisas-network, hk-parishes,
bohosluzby, kerknet, gottesdienstzeiten, miserend importers,
ClaimRequest model, forward geocoding, heartbeat healthcheck.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Albert
2026-04-12 19:11:22 -04:00
parent 76cca3ba75
commit 2c51513851
133 changed files with 30381 additions and 0 deletions

View File

@@ -0,0 +1,164 @@
import { config } from 'dotenv';
import { PrismaClient } from '@prisma/client';
import { Pool } from 'pg';
import { PrismaPg } from '@prisma/adapter-pg';
// Load .env.local first, then .env
config({ path: '.env.local' });
config({ path: '.env' });
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
throw new Error('DATABASE_URL environment variable is not set');
}
const pool = new Pool({ connectionString });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
async function checkScraperStatus() {
try {
console.log('Checking mass schedule scraper status...\n');
// Overall church stats
const totalChurches = await prisma.church.count();
const churchesWithWebsites = await prisma.church.count({
where: {
OR: [
{ website: { not: null } },
{ massScheduleUrl: { not: null } },
],
},
});
const churchesScraped = await prisma.church.count({
where: { lastScrapedAt: { not: null } },
});
// Mass schedule stats
const totalMassSchedules = await prisma.massSchedule.count();
const churchesWithSchedules = await prisma.church.count({
where: {
massSchedules: {
some: {},
},
},
});
// Recently scraped (last 7 days)
const weekAgo = new Date();
weekAgo.setDate(weekAgo.getDate() - 7);
const recentlyScraped = await prisma.church.count({
where: {
lastScrapedAt: { gte: weekAgo },
},
});
// Get scraper sources
const bySource = await prisma.church.groupBy({
by: ['source'],
_count: {
id: true,
},
});
console.log('═══════════════════════════════════════════════════════════════');
console.log('CHURCH DATA SOURCES');
console.log('═══════════════════════════════════════════════════════════════');
bySource.forEach((source) => {
const percent = ((source._count.id / totalChurches) * 100).toFixed(1);
console.log(`${source.source.padEnd(12)} | ${String(source._count.id).padStart(7)} churches (${percent}%)`);
});
console.log('');
console.log('═══════════════════════════════════════════════════════════════');
console.log('MASS SCHEDULE SCRAPING STATUS');
console.log('═══════════════════════════════════════════════════════════════');
console.log(`Total churches: ${totalChurches.toLocaleString()}`);
console.log(`Churches with websites: ${churchesWithWebsites.toLocaleString()} (${((churchesWithWebsites / totalChurches) * 100).toFixed(1)}%)`);
console.log(`Churches ever scraped: ${churchesScraped.toLocaleString()} (${((churchesScraped / totalChurches) * 100).toFixed(1)}%)`);
console.log(`Churches with mass schedules: ${churchesWithSchedules.toLocaleString()} (${((churchesWithSchedules / totalChurches) * 100).toFixed(1)}%)`);
console.log(`Total mass schedules: ${totalMassSchedules.toLocaleString()}`);
console.log('');
console.log(`Scraped in last 7 days: ${recentlyScraped.toLocaleString()}`);
console.log('');
// Average schedules per church
if (churchesWithSchedules > 0) {
const avgSchedules = totalMassSchedules / churchesWithSchedules;
console.log(`Average schedules per church: ${avgSchedules.toFixed(1)} masses/week`);
console.log('');
}
// Get sample of recently scraped churches
const recentSample = await prisma.church.findMany({
where: {
lastScrapedAt: { not: null },
},
select: {
name: true,
city: true,
state: true,
country: true,
lastScrapedAt: true,
website: true,
source: true,
_count: {
select: {
massSchedules: true,
},
},
},
orderBy: { lastScrapedAt: 'desc' },
take: 10,
});
console.log('═══════════════════════════════════════════════════════════════');
console.log('RECENTLY SCRAPED CHURCHES (Last 10)');
console.log('═══════════════════════════════════════════════════════════════');
if (recentSample.length === 0) {
console.log('No churches have been scraped yet.');
} else {
recentSample.forEach((church, index) => {
const location = [church.city, church.state, church.country].filter(Boolean).join(', ');
console.log(`${index + 1}. ${church.name} (${location})`);
console.log(` Source: ${church.source}`);
console.log(` Website: ${church.website || 'None'}`);
console.log(` Last scraped: ${church.lastScrapedAt?.toLocaleString() || 'Never'}`);
console.log(` Mass schedules: ${church._count.massSchedules}`);
console.log('');
});
}
// Churches ready to scrape (have website, not scraped)
const readyToScrape = await prisma.church.count({
where: {
OR: [
{ website: { not: null } },
{ massScheduleUrl: { not: null } },
],
lastScrapedAt: null,
},
});
console.log('═══════════════════════════════════════════════════════════════');
console.log('SCRAPING POTENTIAL');
console.log('═══════════════════════════════════════════════════════════════');
console.log(`Churches ready to scrape: ${readyToScrape.toLocaleString()}`);
console.log(` (have website, never scraped)`);
console.log('');
} catch (error) {
console.error('Error:', error);
process.exit(1);
} finally {
await prisma.$disconnect();
await pool.end();
}
}
checkScraperStatus();