feat: add buscarmisas-network importer — CLI + main loop
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -394,3 +394,141 @@ export async function getChurchUrls(domain: string, config: SiteConfig): Promise
|
||||
console.log(` Total church URLs: ${unique.length}`);
|
||||
return unique;
|
||||
}
|
||||
|
||||
// ─── CLI ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseCLIArgs(): CLIArgs {
|
||||
const argv = process.argv.slice(2);
|
||||
const result: CLIArgs = { domain: null, all: false, dryRun: false, resumeFrom: 0, limit: null, jobId: null };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
switch (argv[i]) {
|
||||
case '--domain': result.domain = argv[++i]; break;
|
||||
case '--all': result.all = true; break;
|
||||
case '--dry-run': result.dryRun = true; break;
|
||||
case '--resume-from': result.resumeFrom = parseInt(argv[++i], 10); break;
|
||||
case '--limit': result.limit = parseInt(argv[++i], 10); break;
|
||||
case '--job-id': result.jobId = argv[++i]; break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function validateArgs(args: CLIArgs): void {
|
||||
if (!args.domain && !args.all) {
|
||||
console.error('Usage:');
|
||||
console.error(' npx tsx scripts/import-buscarmisas-network.ts --domain <domain>');
|
||||
console.error(' npx tsx scripts/import-buscarmisas-network.ts --all');
|
||||
console.error('\nValid domains:', Object.keys(NETWORK_SITES).join(', '));
|
||||
process.exit(1);
|
||||
}
|
||||
if (args.domain && !NETWORK_SITES[args.domain]) {
|
||||
console.error(`Unknown domain: ${args.domain}`);
|
||||
console.error('Valid domains:', Object.keys(NETWORK_SITES).join(', '));
|
||||
process.exit(1);
|
||||
}
|
||||
if (args.all && args.resumeFrom > 0) {
|
||||
console.error('--resume-from cannot be used with --all. Use --domain to resume a specific site.');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
async function runDomain(domain: string, config: SiteConfig, args: CLIArgs): Promise<ImportStats> {
|
||||
const stats: ImportStats = { total: 0, created: 0, updated: 0, skipped: 0, errors: 0, massSchedulesCreated: 0 };
|
||||
|
||||
const allUrls = await getChurchUrls(domain, config);
|
||||
const existingChurches = await loadExistingChurches(config.country);
|
||||
|
||||
// Build set of already-imported IDs for fast skip
|
||||
const importedIds = new Set(
|
||||
existingChurches.filter(c => c.buscarmisasNetworkId).map(c => c.buscarmisasNetworkId!)
|
||||
);
|
||||
|
||||
let candidateUrls = allUrls.slice(args.resumeFrom).filter(url => {
|
||||
const externalId = buildExternalId(domain, url);
|
||||
return !importedIds.has(externalId);
|
||||
});
|
||||
if (args.limit !== null) candidateUrls = candidateUrls.slice(0, args.limit);
|
||||
|
||||
console.log(`\n${domain}: ${allUrls.length} total | ${importedIds.size} already imported | ${candidateUrls.length} to process\n`);
|
||||
|
||||
for (let i = 0; i < candidateUrls.length; i++) {
|
||||
const url = candidateUrls[i];
|
||||
console.log(`[${i + 1}/${candidateUrls.length}] ${url}`);
|
||||
await processChurch(url, domain, config, existingChurches, args, stats);
|
||||
if (i < candidateUrls.length - 1) await sleep(REQUEST_DELAY_MS);
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
// ─── Main ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = parseCLIArgs();
|
||||
validateArgs(args);
|
||||
|
||||
if (args.jobId) {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: { status: 'running', startedAt: new Date() },
|
||||
});
|
||||
} catch { /* job may not exist yet */ }
|
||||
}
|
||||
|
||||
const domainsToRun: [string, SiteConfig][] = args.all
|
||||
? Object.entries(NETWORK_SITES)
|
||||
: [[args.domain!, NETWORK_SITES[args.domain!]]];
|
||||
|
||||
const totalStats: ImportStats = { total: 0, created: 0, updated: 0, skipped: 0, errors: 0, massSchedulesCreated: 0 };
|
||||
|
||||
try {
|
||||
for (let d = 0; d < domainsToRun.length; d++) {
|
||||
const [domain, config] = domainsToRun[d];
|
||||
console.log(`\n${'─'.repeat(60)}`);
|
||||
console.log(`Domain ${d + 1}/${domainsToRun.length}: ${domain} (${config.country})`);
|
||||
console.log('─'.repeat(60));
|
||||
const stats = await runDomain(domain, config, args);
|
||||
totalStats.total += stats.total;
|
||||
totalStats.created += stats.created;
|
||||
totalStats.updated += stats.updated;
|
||||
totalStats.skipped += stats.skipped;
|
||||
totalStats.errors += stats.errors;
|
||||
totalStats.massSchedulesCreated += stats.massSchedulesCreated;
|
||||
if (d < domainsToRun.length - 1) await sleep(DOMAIN_DELAY_MS);
|
||||
}
|
||||
} finally {
|
||||
console.log('\n─── Import Complete ───────────────────────────────────────');
|
||||
console.log(`Total processed: ${totalStats.total}`);
|
||||
console.log(`Created: ${totalStats.created}`);
|
||||
console.log(`Updated: ${totalStats.updated}`);
|
||||
console.log(`Skipped: ${totalStats.skipped}`);
|
||||
console.log(`Errors: ${totalStats.errors}`);
|
||||
console.log(`Mass schedules: ${totalStats.massSchedulesCreated}`);
|
||||
|
||||
if (args.jobId) {
|
||||
const status = totalStats.errors > totalStats.total * 0.1 ? 'failed' : 'completed';
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: args.jobId },
|
||||
data: {
|
||||
status,
|
||||
completedAt: new Date(),
|
||||
processed: totalStats.total,
|
||||
succeeded: totalStats.created + totalStats.updated,
|
||||
failed: totalStats.errors,
|
||||
itemsFound: totalStats.massSchedulesCreated,
|
||||
},
|
||||
});
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('Fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user