Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
166 lines
7.1 KiB
TypeScript
166 lines
7.1 KiB
TypeScript
import { config } from 'dotenv';
|
|
import { PrismaClient } from '@prisma/client';
|
|
import { Pool } from 'pg';
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
|
|
|
// Load .env.local first, then .env
|
|
config({ path: '.env.local' });
|
|
config({ path: '.env' });
|
|
|
|
const connectionString = process.env.DATABASE_URL;
|
|
|
|
if (!connectionString) {
|
|
throw new Error('DATABASE_URL environment variable is not set');
|
|
}
|
|
|
|
const pool = new Pool({ connectionString });
|
|
const adapter = new PrismaPg(pool);
|
|
const prisma = new PrismaClient({ adapter });
|
|
|
|
interface CountryStats {
|
|
country: string;
|
|
totalChurches: number;
|
|
withWebsite: number;
|
|
withoutWebsite: number;
|
|
websitePercent: number;
|
|
needEnrichment: number;
|
|
priority: number;
|
|
}
|
|
|
|
async function analyzeEnrichmentPriority() {
|
|
try {
|
|
console.log('Analyzing enrichment priority by country...\n');
|
|
|
|
// Get all OSM churches grouped by country
|
|
const churches = await prisma.church.findMany({
|
|
where: {
|
|
source: 'osm',
|
|
},
|
|
select: {
|
|
country: true,
|
|
hasWebsite: true,
|
|
website: true,
|
|
},
|
|
});
|
|
|
|
// Group by country and calculate stats
|
|
const byCountry = churches.reduce((acc, church) => {
|
|
const country = church.country || 'Unknown';
|
|
if (!acc[country]) {
|
|
acc[country] = {
|
|
country,
|
|
totalChurches: 0,
|
|
withWebsite: 0,
|
|
withoutWebsite: 0,
|
|
websitePercent: 0,
|
|
needEnrichment: 0,
|
|
priority: 0,
|
|
};
|
|
}
|
|
|
|
acc[country].totalChurches++;
|
|
if (church.hasWebsite || church.website) {
|
|
acc[country].withWebsite++;
|
|
} else {
|
|
acc[country].withoutWebsite++;
|
|
acc[country].needEnrichment++;
|
|
}
|
|
|
|
return acc;
|
|
}, {} as Record<string, CountryStats>);
|
|
|
|
// Calculate percentages and priority score
|
|
const stats = Object.values(byCountry).map((stat) => {
|
|
stat.websitePercent = (stat.withWebsite / stat.totalChurches) * 100;
|
|
|
|
// Priority formula:
|
|
// - Weight heavily on churches needing enrichment (80%)
|
|
// - Weight on low website coverage (20%)
|
|
// This favors large countries with low coverage
|
|
const needWeight = stat.needEnrichment / 1000; // Normalize to thousands
|
|
const coverageGap = 100 - stat.websitePercent; // How much coverage is missing
|
|
stat.priority = needWeight * 0.8 + (coverageGap / 100) * needWeight * 0.2;
|
|
|
|
return stat;
|
|
});
|
|
|
|
// Sort by priority (highest first)
|
|
stats.sort((a, b) => b.priority - a.priority);
|
|
|
|
// Display results
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log('ENRICHMENT PRIORITY RANKING');
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log('');
|
|
console.log('Priority formula: (churches_needing_enrichment * 0.8) + (coverage_gap * 0.2)');
|
|
console.log('This favors countries with many churches and low website coverage.');
|
|
console.log('');
|
|
console.log('Rank | Country | Total | Need Enrichment | Coverage | Priority Score');
|
|
console.log('─────┼─────────┼───────┼────────────────┼──────────┼────────────────');
|
|
|
|
stats.forEach((stat, index) => {
|
|
const rank = String(index + 1).padStart(4);
|
|
const country = stat.country.padEnd(7);
|
|
const total = String(stat.totalChurches).padStart(5);
|
|
const need = String(stat.needEnrichment).padStart(15);
|
|
const coverage = `${stat.websitePercent.toFixed(1)}%`.padStart(8);
|
|
const priority = stat.priority.toFixed(2).padStart(14);
|
|
|
|
console.log(`${rank} | ${country} | ${total} | ${need} | ${coverage} | ${priority}`);
|
|
});
|
|
|
|
console.log('');
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log('');
|
|
|
|
// Show top 10 with details
|
|
console.log('TOP 10 COUNTRIES TO PRIORITIZE:');
|
|
console.log('');
|
|
|
|
stats.slice(0, 10).forEach((stat, index) => {
|
|
console.log(`${index + 1}. ${stat.country}`);
|
|
console.log(` Total churches: ${stat.totalChurches.toLocaleString()}`);
|
|
console.log(` Need enrichment: ${stat.needEnrichment.toLocaleString()} (${(100 - stat.websitePercent).toFixed(1)}% missing)`);
|
|
console.log(` Current coverage: ${stat.websitePercent.toFixed(1)}%`);
|
|
console.log(` Priority score: ${stat.priority.toFixed(2)}`);
|
|
console.log('');
|
|
});
|
|
|
|
// Calculate enrichment timeline
|
|
const totalNeedEnrichment = stats.reduce((sum, s) => sum + s.needEnrichment, 0);
|
|
const daysAtFullSpeed = Math.ceil(totalNeedEnrichment / 390);
|
|
const monthsAtFullSpeed = (daysAtFullSpeed / 30).toFixed(1);
|
|
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log('ENRICHMENT TIMELINE');
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log(`Total churches needing enrichment: ${totalNeedEnrichment.toLocaleString()}`);
|
|
console.log(`At 390 churches/day (free tier): ${daysAtFullSpeed} days (~${monthsAtFullSpeed} months)`);
|
|
console.log('');
|
|
|
|
// Output country priority order for the script
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log('COUNTRY PRIORITY ORDER (for enrichment script)');
|
|
console.log('═══════════════════════════════════════════════════════════════════════════');
|
|
console.log('');
|
|
console.log('const COUNTRY_PRIORITY = [');
|
|
stats
|
|
.filter((s) => s.needEnrichment > 0)
|
|
.forEach((stat, index) => {
|
|
const comma = index < stats.filter((s) => s.needEnrichment > 0).length - 1 ? ',' : '';
|
|
console.log(` '${stat.country}'${comma} // ${stat.needEnrichment.toLocaleString()} churches`);
|
|
});
|
|
console.log('];');
|
|
console.log('');
|
|
|
|
} catch (error) {
|
|
console.error('Error:', error);
|
|
process.exit(1);
|
|
} finally {
|
|
await prisma.$disconnect();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
analyzeEnrichmentPriority();
|