feat: add buscarmisas-network importer — CLI + main loop

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-03-19 23:47:41 -04:00
parent dde083c32e
commit 2b37c2d5f2

View File

@@ -394,3 +394,141 @@ export async function getChurchUrls(domain: string, config: SiteConfig): Promise
console.log(` Total church URLs: ${unique.length}`); console.log(` Total church URLs: ${unique.length}`);
return unique; return unique;
} }
// ─── CLI ──────────────────────────────────────────────────────────────────────
function parseCLIArgs(): CLIArgs {
const argv = process.argv.slice(2);
const result: CLIArgs = { domain: null, all: false, dryRun: false, resumeFrom: 0, limit: null, jobId: null };
for (let i = 0; i < argv.length; i++) {
switch (argv[i]) {
case '--domain': result.domain = argv[++i]; break;
case '--all': result.all = true; break;
case '--dry-run': result.dryRun = true; break;
case '--resume-from': result.resumeFrom = parseInt(argv[++i], 10); break;
case '--limit': result.limit = parseInt(argv[++i], 10); break;
case '--job-id': result.jobId = argv[++i]; break;
}
}
return result;
}
function validateArgs(args: CLIArgs): void {
if (!args.domain && !args.all) {
console.error('Usage:');
console.error(' npx tsx scripts/import-buscarmisas-network.ts --domain <domain>');
console.error(' npx tsx scripts/import-buscarmisas-network.ts --all');
console.error('\nValid domains:', Object.keys(NETWORK_SITES).join(', '));
process.exit(1);
}
if (args.domain && !NETWORK_SITES[args.domain]) {
console.error(`Unknown domain: ${args.domain}`);
console.error('Valid domains:', Object.keys(NETWORK_SITES).join(', '));
process.exit(1);
}
if (args.all && args.resumeFrom > 0) {
console.error('--resume-from cannot be used with --all. Use --domain to resume a specific site.');
process.exit(1);
}
}
async function runDomain(domain: string, config: SiteConfig, args: CLIArgs): Promise<ImportStats> {
const stats: ImportStats = { total: 0, created: 0, updated: 0, skipped: 0, errors: 0, massSchedulesCreated: 0 };
const allUrls = await getChurchUrls(domain, config);
const existingChurches = await loadExistingChurches(config.country);
// Build set of already-imported IDs for fast skip
const importedIds = new Set(
existingChurches.filter(c => c.buscarmisasNetworkId).map(c => c.buscarmisasNetworkId!)
);
let candidateUrls = allUrls.slice(args.resumeFrom).filter(url => {
const externalId = buildExternalId(domain, url);
return !importedIds.has(externalId);
});
if (args.limit !== null) candidateUrls = candidateUrls.slice(0, args.limit);
console.log(`\n${domain}: ${allUrls.length} total | ${importedIds.size} already imported | ${candidateUrls.length} to process\n`);
for (let i = 0; i < candidateUrls.length; i++) {
const url = candidateUrls[i];
console.log(`[${i + 1}/${candidateUrls.length}] ${url}`);
await processChurch(url, domain, config, existingChurches, args, stats);
if (i < candidateUrls.length - 1) await sleep(REQUEST_DELAY_MS);
}
return stats;
}
// ─── Main ─────────────────────────────────────────────────────────────────────
async function main() {
const args = parseCLIArgs();
validateArgs(args);
if (args.jobId) {
try {
await prisma.backgroundJob.update({
where: { id: args.jobId },
data: { status: 'running', startedAt: new Date() },
});
} catch { /* job may not exist yet */ }
}
const domainsToRun: [string, SiteConfig][] = args.all
? Object.entries(NETWORK_SITES)
: [[args.domain!, NETWORK_SITES[args.domain!]]];
const totalStats: ImportStats = { total: 0, created: 0, updated: 0, skipped: 0, errors: 0, massSchedulesCreated: 0 };
try {
for (let d = 0; d < domainsToRun.length; d++) {
const [domain, config] = domainsToRun[d];
console.log(`\n${'─'.repeat(60)}`);
console.log(`Domain ${d + 1}/${domainsToRun.length}: ${domain} (${config.country})`);
console.log('─'.repeat(60));
const stats = await runDomain(domain, config, args);
totalStats.total += stats.total;
totalStats.created += stats.created;
totalStats.updated += stats.updated;
totalStats.skipped += stats.skipped;
totalStats.errors += stats.errors;
totalStats.massSchedulesCreated += stats.massSchedulesCreated;
if (d < domainsToRun.length - 1) await sleep(DOMAIN_DELAY_MS);
}
} finally {
console.log('\n─── Import Complete ───────────────────────────────────────');
console.log(`Total processed: ${totalStats.total}`);
console.log(`Created: ${totalStats.created}`);
console.log(`Updated: ${totalStats.updated}`);
console.log(`Skipped: ${totalStats.skipped}`);
console.log(`Errors: ${totalStats.errors}`);
console.log(`Mass schedules: ${totalStats.massSchedulesCreated}`);
if (args.jobId) {
const status = totalStats.errors > totalStats.total * 0.1 ? 'failed' : 'completed';
try {
await prisma.backgroundJob.update({
where: { id: args.jobId },
data: {
status,
completedAt: new Date(),
processed: totalStats.total,
succeeded: totalStats.created + totalStats.updated,
failed: totalStats.errors,
itemsFound: totalStats.massSchedulesCreated,
},
});
} catch { /* ignore */ }
}
await prisma.$disconnect();
await pool.end();
}
}
main().catch(err => {
console.error('Fatal error:', err);
process.exit(1);
});