chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
146
scripts/debug/check-enrichment-status.ts
Normal file
146
scripts/debug/check-enrichment-status.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import { config } from 'dotenv';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
|
||||
// Load .env.local first, then .env
|
||||
config({ path: '.env.local' });
|
||||
config({ path: '.env' });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
throw new Error('DATABASE_URL environment variable is not set');
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString });
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
async function checkEnrichmentStatus() {
|
||||
try {
|
||||
console.log('Checking enrichment status...\n');
|
||||
|
||||
// Overall stats
|
||||
const totalOSM = await prisma.church.count({
|
||||
where: { source: 'osm' },
|
||||
});
|
||||
|
||||
const enriched = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
googlePlaceId: { not: null },
|
||||
},
|
||||
});
|
||||
|
||||
const withWebsite = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
hasWebsite: true,
|
||||
},
|
||||
});
|
||||
|
||||
const needEnrichment = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
hasWebsite: false,
|
||||
website: null,
|
||||
},
|
||||
});
|
||||
|
||||
// Recently enriched (last 24 hours)
|
||||
const yesterday = new Date();
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
|
||||
const recentlyEnriched = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
googlePlaceId: { not: null },
|
||||
updatedAt: { gte: yesterday },
|
||||
},
|
||||
});
|
||||
|
||||
// Get top 10 priority countries status
|
||||
const PRIORITY_COUNTRIES = ['FR', 'DE', 'ES', 'PL', 'BR', 'PT', 'PH', 'CZ', 'MX', 'HU'];
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('OVERALL ENRICHMENT STATUS');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log(`Total OSM churches: ${totalOSM.toLocaleString()}`);
|
||||
console.log(`Churches with Google Place ID: ${enriched.toLocaleString()} (${((enriched / totalOSM) * 100).toFixed(2)}%)`);
|
||||
console.log(`Churches with websites: ${withWebsite.toLocaleString()} (${((withWebsite / totalOSM) * 100).toFixed(2)}%)`);
|
||||
console.log(`Need enrichment: ${needEnrichment.toLocaleString()} (${((needEnrichment / totalOSM) * 100).toFixed(2)}%)`);
|
||||
console.log('');
|
||||
console.log(`Recently enriched (24h): ${recentlyEnriched.toLocaleString()}`);
|
||||
console.log('');
|
||||
|
||||
// Priority countries breakdown
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('TOP 10 PRIORITY COUNTRIES STATUS');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('');
|
||||
|
||||
for (const country of PRIORITY_COUNTRIES) {
|
||||
const total = await prisma.church.count({
|
||||
where: { source: 'osm', country },
|
||||
});
|
||||
|
||||
const countryEnriched = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
country,
|
||||
googlePlaceId: { not: null },
|
||||
},
|
||||
});
|
||||
|
||||
const countryWithWebsite = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
country,
|
||||
OR: [
|
||||
{ hasWebsite: true },
|
||||
{ googlePlaceId: { not: null } },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const countryNeedEnrichment = await prisma.church.count({
|
||||
where: {
|
||||
source: 'osm',
|
||||
country,
|
||||
hasWebsite: false,
|
||||
website: null,
|
||||
},
|
||||
});
|
||||
|
||||
const websitePercent = (countryWithWebsite / total) * 100;
|
||||
const enrichedPercent = (countryEnriched / total) * 100;
|
||||
|
||||
console.log(`${country.padEnd(4)} | Total: ${String(total).padStart(6)} | Enriched: ${String(countryEnriched).padStart(5)} (${enrichedPercent.toFixed(1)}%) | With Website: ${String(countryWithWebsite).padStart(5)} (${websitePercent.toFixed(1)}%) | Need: ${String(countryNeedEnrichment).padStart(6)}`);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
// Estimate timeline
|
||||
const daysRemaining = Math.ceil(needEnrichment / 390);
|
||||
const monthsRemaining = (daysRemaining / 30).toFixed(1);
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log('TIMELINE ESTIMATE');
|
||||
console.log('═══════════════════════════════════════════════════════════════');
|
||||
console.log(`At 390 churches/day:`);
|
||||
console.log(` Days remaining: ${daysRemaining} days`);
|
||||
console.log(` Months remaining: ~${monthsRemaining} months`);
|
||||
console.log(` Estimated completion: ${new Date(Date.now() + daysRemaining * 24 * 60 * 60 * 1000).toLocaleDateString()}`);
|
||||
console.log('');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
checkEnrichmentStatus();
|
||||
Reference in New Issue
Block a user