chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
164
scripts/debug/check-scraper-status.ts
Normal file
164
scripts/debug/check-scraper-status.ts
Normal file
@@ -0,0 +1,164 @@
|
||||
import { config } from 'dotenv';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
|
||||
// Load .env.local first, then .env
|
||||
config({ path: '.env.local' });
|
||||
config({ path: '.env' });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
throw new Error('DATABASE_URL environment variable is not set');
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString });
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
async function checkScraperStatus() {
|
||||
try {
|
||||
console.log('Checking mass schedule scraper status...\n');
|
||||
|
||||
// Overall church stats
|
||||
const totalChurches = await prisma.church.count();
|
||||
|
||||
const churchesWithWebsites = await prisma.church.count({
|
||||
where: {
|
||||
OR: [
|
||||
{ website: { not: null } },
|
||||
{ massScheduleUrl: { not: null } },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const churchesScraped = await prisma.church.count({
|
||||
where: { lastScrapedAt: { not: null } },
|
||||
});
|
||||
|
||||
// Mass schedule stats
|
||||
const totalMassSchedules = await prisma.massSchedule.count();
|
||||
|
||||
const churchesWithSchedules = await prisma.church.count({
|
||||
where: {
|
||||
massSchedules: {
|
||||
some: {},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// Recently scraped (last 7 days)
|
||||
const weekAgo = new Date();
|
||||
weekAgo.setDate(weekAgo.getDate() - 7);
|
||||
|
||||
const recentlyScraped = await prisma.church.count({
|
||||
where: {
|
||||
lastScrapedAt: { gte: weekAgo },
|
||||
},
|
||||
});
|
||||
|
||||
// Get scraper sources
|
||||
const bySource = await prisma.church.groupBy({
|
||||
by: ['source'],
|
||||
_count: {
|
||||
id: true,
|
||||
},
|
||||
});
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('CHURCH DATA SOURCES');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
bySource.forEach((source) => {
|
||||
const percent = ((source._count.id / totalChurches) * 100).toFixed(1);
|
||||
console.log(`${source.source.padEnd(12)} | ${String(source._count.id).padStart(7)} churches (${percent}%)`);
|
||||
});
|
||||
console.log('');
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('MASS SCHEDULE SCRAPING STATUS');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log(`Total churches: ${totalChurches.toLocaleString()}`);
|
||||
console.log(`Churches with websites: ${churchesWithWebsites.toLocaleString()} (${((churchesWithWebsites / totalChurches) * 100).toFixed(1)}%)`);
|
||||
console.log(`Churches ever scraped: ${churchesScraped.toLocaleString()} (${((churchesScraped / totalChurches) * 100).toFixed(1)}%)`);
|
||||
console.log(`Churches with mass schedules: ${churchesWithSchedules.toLocaleString()} (${((churchesWithSchedules / totalChurches) * 100).toFixed(1)}%)`);
|
||||
console.log(`Total mass schedules: ${totalMassSchedules.toLocaleString()}`);
|
||||
console.log('');
|
||||
console.log(`Scraped in last 7 days: ${recentlyScraped.toLocaleString()}`);
|
||||
console.log('');
|
||||
|
||||
// Average schedules per church
|
||||
if (churchesWithSchedules > 0) {
|
||||
const avgSchedules = totalMassSchedules / churchesWithSchedules;
|
||||
console.log(`Average schedules per church: ${avgSchedules.toFixed(1)} masses/week`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Get sample of recently scraped churches
|
||||
const recentSample = await prisma.church.findMany({
|
||||
where: {
|
||||
lastScrapedAt: { not: null },
|
||||
},
|
||||
select: {
|
||||
name: true,
|
||||
city: true,
|
||||
state: true,
|
||||
country: true,
|
||||
lastScrapedAt: true,
|
||||
website: true,
|
||||
source: true,
|
||||
_count: {
|
||||
select: {
|
||||
massSchedules: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
orderBy: { lastScrapedAt: 'desc' },
|
||||
take: 10,
|
||||
});
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('RECENTLY SCRAPED CHURCHES (Last 10)');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
if (recentSample.length === 0) {
|
||||
console.log('No churches have been scraped yet.');
|
||||
} else {
|
||||
recentSample.forEach((church, index) => {
|
||||
const location = [church.city, church.state, church.country].filter(Boolean).join(', ');
|
||||
console.log(`${index + 1}. ${church.name} (${location})`);
|
||||
console.log(` Source: ${church.source}`);
|
||||
console.log(` Website: ${church.website || 'None'}`);
|
||||
console.log(` Last scraped: ${church.lastScrapedAt?.toLocaleString() || 'Never'}`);
|
||||
console.log(` Mass schedules: ${church._count.massSchedules}`);
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
|
||||
// Churches ready to scrape (have website, not scraped)
|
||||
const readyToScrape = await prisma.church.count({
|
||||
where: {
|
||||
OR: [
|
||||
{ website: { not: null } },
|
||||
{ massScheduleUrl: { not: null } },
|
||||
],
|
||||
lastScrapedAt: null,
|
||||
},
|
||||
});
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('SCRAPING POTENTIAL');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log(`Churches ready to scrape: ${readyToScrape.toLocaleString()}`);
|
||||
console.log(` (have website, never scraped)`);
|
||||
console.log('');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
checkScraperStatus();
|
||||
Reference in New Issue
Block a user