Files
ScraperControl/scripts/normalize-country-codes.ts
Albert 2c51513851 chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored
local-only files: web scrapers, admin dashboard, ChromaDB integration,
debug scripts, and utility libraries that aren't tracked in Gitea.

Gitea master adds: discovermass, buscarmisas-network, hk-parishes,
bohosluzby, kerknet, gottesdienstzeiten, miserend importers,
ClaimRequest model, forward geocoding, heartbeat healthcheck.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-12 19:11:22 -04:00

111 lines
3.3 KiB
TypeScript

/**
* Normalize country codes in the database.
* Converts full country names to ISO 3166-1 alpha-2 codes.
*
* Usage:
* npx tsx scripts/normalize-country-codes.ts --dry-run
* npx tsx scripts/normalize-country-codes.ts --execute
*/
import path from 'path';
import dotenv from 'dotenv';
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
import { Pool } from 'pg';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
import { normalizeCountryCode } from '../src/lib/country-normalize';
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
async function main() {
const dryRun = !process.argv.includes('--execute');
if (dryRun) {
console.log('DRY RUN — no changes will be made. Use --execute to apply.\n');
}
// Get all distinct country values
const countries = await prisma.church.findMany({
select: { country: true },
distinct: ['country'],
where: { country: { not: null } },
});
const countryValues = countries
.map(c => c.country)
.filter((c): c is string => c !== null);
console.log(`Found ${countryValues.length} distinct country values.\n`);
// Group by normalization result
const changes: { original: string; normalized: string; count?: number }[] = [];
const alreadyNormalized: string[] = [];
const unknown: string[] = [];
for (const country of countryValues) {
const normalized = normalizeCountryCode(country);
if (normalized === country) {
// Already correct or unknown
if (country.length === 2 && country === country.toUpperCase()) {
alreadyNormalized.push(country);
} else {
unknown.push(country);
}
} else {
changes.push({ original: country, normalized });
}
}
// Get counts for changes
for (const change of changes) {
const count = await prisma.church.count({
where: { country: change.original },
});
change.count = count;
}
// Report
console.log(`Already normalized (${alreadyNormalized.length}): ${alreadyNormalized.sort().join(', ')}\n`);
if (changes.length > 0) {
console.log(`Changes to apply (${changes.length}):`);
for (const { original, normalized, count } of changes) {
console.log(` "${original}" → "${normalized}" (${count} churches)`);
}
console.log();
} else {
console.log('No changes needed — all country values are already normalized.\n');
}
if (unknown.length > 0) {
console.log(`Unknown values (${unknown.length}): ${unknown.join(', ')}`);
console.log(' These could not be mapped to ISO codes. Review manually.\n');
}
// Apply changes
if (!dryRun && changes.length > 0) {
let totalUpdated = 0;
for (const { original, normalized } of changes) {
const result = await prisma.church.updateMany({
where: { country: original },
data: { country: normalized },
});
totalUpdated += result.count;
console.log(`Updated "${original}" → "${normalized}": ${result.count} churches`);
}
console.log(`\nTotal updated: ${totalUpdated} churches`);
}
await prisma.$disconnect();
await pool.end();
}
main().catch(err => {
console.error('Error:', err);
process.exit(1);
});