Files
ScraperControl/scripts/transfer-enriched-to-neon.ts
Albert 9d0af3289a feat: throttle Neon transfer with smaller batches + 1s delay
Reduce BATCH_SIZE from 200 to 100 and add a 1-second pause between
batches to avoid overwhelming the Neon production database during
large incremental syncs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 11:09:02 -04:00

324 lines
13 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env tsx
/**
* Transfer enriched church data from Synology NAS to Neon production
*
* This script transfers ONLY churches that have been enriched or scraped
* (have websites, phone numbers, or mass schedules) to reduce data transfer.
*
* Usage:
* npx tsx scripts/transfer-enriched-to-neon.ts # Dry run
* npx tsx scripts/transfer-enriched-to-neon.ts --execute # Actually transfer
*/
import { PrismaClient } from '@prisma/client';
import { PrismaPg } from '@prisma/adapter-pg';
import { Pool } from 'pg';
import dotenv from 'dotenv';
import path from 'path';
interface TransferStats {
churchesProcessed: number;
churchesInserted: number;
churchesUpdated: number;
massSchedules: number;
confessionSchedules: number;
adorationSchedules: number;
errors: number;
}
async function main() {
// Parse CLI arguments
const args = process.argv.slice(2);
const executeIndex = args.indexOf('--execute');
const sinceIndex = args.indexOf('--since');
const forceAllIndex = args.indexOf('--force-all');
const dryRun = executeIndex === -1;
const sinceTimestamp = sinceIndex !== -1 && args[sinceIndex + 1]
? new Date(args[sinceIndex + 1])
: null;
const forceAll = forceAllIndex !== -1;
console.log('════════════════════════════════════════════════════════════');
console.log(' Transfer Enriched Data: Synology NAS → Neon Production');
console.log('════════════════════════════════════════════════════════════\n');
if (dryRun) {
console.log('🔍 DRY RUN MODE - No data will be written to Neon\n');
} else {
console.log('⚠️ PRODUCTION MODE - Data will be written to Neon');
console.log('Press Ctrl+C within 5 seconds to cancel...\n');
await new Promise(resolve => setTimeout(resolve, 5000));
}
if (forceAll) {
console.log('🔄 FORCE ALL MODE - Transferring all enriched churches\n');
} else if (sinceTimestamp) {
console.log(`📅 INCREMENTAL MODE - Only churches modified since ${sinceTimestamp.toISOString()}\n`);
} else {
console.log('📅 AUTO INCREMENTAL MODE - Detecting last transfer timestamp...\n');
}
// Step 1: Connect to NAS database
console.log('[1/3] Connecting to Synology NAS database...');
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
const nasPool = new Pool({ connectionString: process.env.DATABASE_URL });
const nasAdapter = new PrismaPg(nasPool);
const nasPrisma = new PrismaClient({ adapter: nasAdapter });
try {
await nasPrisma.$connect();
const nasUrl = process.env.DATABASE_URL?.split('@')[1]?.split('/')[0] || 'unknown';
console.log(`✅ Connected to NAS: ${nasUrl}\n`);
// Detect last transfer timestamp if not specified
let transferSince: Date | null = sinceTimestamp;
if (!forceAll && !sinceTimestamp) {
// Auto-detect: find the most recent lastTransferredAt across all churches
const lastTransfer = await nasPrisma.church.findFirst({
where: { lastTransferredAt: { not: null } },
orderBy: { lastTransferredAt: 'desc' },
select: { lastTransferredAt: true }
});
if (lastTransfer?.lastTransferredAt) {
transferSince = lastTransfer.lastTransferredAt;
console.log(`✅ Last transfer detected: ${transferSince.toISOString()}`);
console.log(` Will transfer churches modified after this time\n`);
} else {
console.log(' No previous transfer detected - will transfer all enriched churches\n');
}
}
// Step 2: Export enriched churches from NAS
console.log('[2/3] Exporting enriched churches from NAS...');
console.log('Criteria: Has website OR phone OR google_place_id OR mass schedules\n');
// Build WHERE clause
const whereClause: any = {
OR: [
{ website: { not: null } },
{ phone: { not: null } },
{ googlePlaceId: { not: null } },
{ massSchedules: { some: {} } },
],
NOT: { latitude: 0, longitude: 0 },
};
// Add incremental filter if applicable
if (!forceAll && transferSince) {
whereClause.AND = { updatedAt: { gt: transferSince } };
console.log(`🔄 Incremental filter: updatedAt > ${transferSince.toISOString()}\n`);
}
const BATCH_SIZE = 100;
const totalCount = await nasPrisma.church.count({ where: whereClause });
console.log(`Found ${totalCount} enriched churches (will process in batches of ${BATCH_SIZE})\n`);
if (totalCount === 0) {
console.log('⚠️ No enriched churches to transfer');
await nasPrisma.$disconnect();
return;
}
// Step 3: Import to Neon
console.log('[3/3] Importing to Neon production database...');
// Load Neon credentials
dotenv.config({ path: path.resolve(process.cwd(), '.env.production'), override: true });
const neonPool = new Pool({ connectionString: process.env.DATABASE_URL });
const neonAdapter = new PrismaPg(neonPool);
const neonPrisma = new PrismaClient({ adapter: neonAdapter });
try {
await neonPrisma.$connect();
const neonUrl = process.env.DATABASE_URL?.split('@')[1]?.split('/')[0] || 'unknown';
console.log(`✅ Connected to Neon: ${neonUrl}\n`);
const stats: TransferStats = {
churchesProcessed: 0,
churchesInserted: 0,
churchesUpdated: 0,
massSchedules: 0,
confessionSchedules: 0,
adorationSchedules: 0,
errors: 0,
};
for (let skip = 0; skip < totalCount; skip += BATCH_SIZE) {
const churches = await nasPrisma.church.findMany({
where: whereClause,
include: {
massSchedules: true,
confessionSchedules: true,
adorationSchedules: true,
},
skip,
take: BATCH_SIZE,
orderBy: { id: 'asc' },
});
console.log(`\nBatch ${Math.floor(skip / BATCH_SIZE) + 1}: processing ${churches.length} churches (${skip + 1}${skip + churches.length} of ${totalCount})`);
for (const church of churches) {
try {
stats.churchesProcessed++;
const massSchedules = church.massSchedules || [];
const confessionSchedules = church.confessionSchedules || [];
const adorationSchedules = church.adorationSchedules || [];
// Extract church data without relations (preserve lastTransferredAt)
const { massSchedules: _, confessionSchedules: __, adorationSchedules: ___, id, createdAt, updatedAt, lastTransferredAt, ...churchData } = church;
if (!dryRun) {
// Check if church exists in Neon
const existing = await neonPrisma.church.findFirst({
where: {
latitude: church.latitude,
longitude: church.longitude,
}
});
let resultId: string;
if (existing) {
// Update existing church (only overwrite if NAS has better data)
await neonPrisma.church.update({
where: { id: existing.id },
data: {
website: churchData.website || existing.website,
phone: churchData.phone || existing.phone,
googlePlaceId: churchData.googlePlaceId || existing.googlePlaceId,
// Always update name, address if provided
name: churchData.name,
address: churchData.address || existing.address,
city: churchData.city || existing.city,
state: churchData.state || existing.state,
zip: churchData.zip || existing.zip,
massScheduleUrl: churchData.massScheduleUrl || existing.massScheduleUrl,
lastTransferredAt: new Date(), // Mark as transferred
}
});
resultId = existing.id;
stats.churchesUpdated++;
// Delete old schedules
await neonPrisma.massSchedule.deleteMany({ where: { churchId: existing.id } });
await neonPrisma.confessionSchedule.deleteMany({ where: { churchId: existing.id } });
await neonPrisma.adorationSchedule.deleteMany({ where: { churchId: existing.id } });
} else {
// Create new church
const newChurch = await neonPrisma.church.create({
data: {
...churchData,
lastTransferredAt: new Date(), // Mark as transferred
}
});
resultId = newChurch.id;
stats.churchesInserted++;
}
// Insert schedules
for (const schedule of massSchedules) {
const { id, createdAt, updatedAt, ...scheduleData } = schedule;
await neonPrisma.massSchedule.create({
data: { ...scheduleData, churchId: resultId }
});
stats.massSchedules++;
}
for (const schedule of confessionSchedules) {
const { id, createdAt, updatedAt, ...scheduleData } = schedule;
await neonPrisma.confessionSchedule.create({
data: { ...scheduleData, churchId: resultId }
});
stats.confessionSchedules++;
}
for (const schedule of adorationSchedules) {
const { id, createdAt, updatedAt, ...scheduleData } = schedule;
await neonPrisma.adorationSchedule.create({
data: { ...scheduleData, churchId: resultId }
});
stats.adorationSchedules++;
}
// Update NAS record with transfer timestamp (after successful transfer to Neon)
await nasPrisma.church.update({
where: { id: church.id },
data: { lastTransferredAt: new Date() }
});
} else {
// Dry run - just count
stats.massSchedules += massSchedules.length;
stats.confessionSchedules += confessionSchedules.length;
stats.adorationSchedules += adorationSchedules.length;
}
if (stats.churchesProcessed % 100 === 0) {
console.log(`Progress: ${stats.churchesProcessed}/${totalCount} churches...`);
}
} catch (error) {
stats.errors++;
console.error(`Error transferring ${church.name}:`, error instanceof Error ? error.message : error);
}
}
// Brief pause between batches to avoid overwhelming Neon
await new Promise(resolve => setTimeout(resolve, 1000));
} // end batch loop
console.log('\n════════════════════════════════════════════════════════════');
console.log('Transfer Summary');
console.log('════════════════════════════════════════════════════════════');
if (!forceAll && transferSince) {
console.log(`Transfer mode: Incremental (since ${transferSince.toISOString()})`);
} else {
console.log(`Transfer mode: Full (all enriched churches)`);
}
console.log(`Churches processed: ${stats.churchesProcessed}`);
console.log(`Churches inserted: ${stats.churchesInserted}`);
console.log(`Churches updated: ${stats.churchesUpdated}`);
console.log(`Mass schedules: ${stats.massSchedules}`);
console.log(`Confession schedules: ${stats.confessionSchedules}`);
console.log(`Adoration schedules: ${stats.adorationSchedules}`);
console.log(`Errors: ${stats.errors}`);
console.log('════════════════════════════════════════════════════════════\n');
await neonPrisma.$disconnect();
await nasPrisma.$disconnect();
if (dryRun) {
console.log('💡 This was a DRY RUN. To actually transfer to Neon, run:');
console.log(' Incremental sync (default):');
console.log(' npx tsx scripts/transfer-enriched-to-neon.ts --execute\n');
console.log(' Transfer all enriched churches:');
console.log(' npx tsx scripts/transfer-enriched-to-neon.ts --execute --force-all\n');
console.log(' Transfer since specific date:');
console.log(' npx tsx scripts/transfer-enriched-to-neon.ts --execute --since 2026-02-01T00:00:00Z\n');
} else {
console.log('🎉 Data successfully transferred to Neon production!\n');
}
} catch (error) {
console.error('❌ Neon import failed:', error);
await neonPrisma.$disconnect();
throw error;
}
} catch (error) {
console.error('❌ Transfer failed:', error);
await nasPrisma.$disconnect();
process.exit(1);
}
}
main().catch(console.error);