Files
ScraperControl/scripts/dedup-mass-schedules.ts
Albert 6d1c7eb3c5 feat: add forward-geocode to scheduler pipeline
- Wire enrich-with-forward-geocode.ts as scheduler job type
- Add geocode-enrichment pipeline group (500/cycle, post-imports)
- Harden transfer script: skip churches at (0,0) coordinates
- Rewrite dedup-mass-schedules.ts with raw SQL to avoid Prisma 7 stack overflow

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 10:37:08 -04:00

73 lines
2.0 KiB
TypeScript

#!/usr/bin/env tsx
import dotenv from 'dotenv';
import path from 'path';
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
import { Pool } from 'pg';
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
interface CountResult {
churches_with_dups: string;
duplicate_rows: string;
}
async function main() {
const dryRun = !process.argv.includes('--execute');
if (dryRun) {
console.log('DRY RUN - pass --execute to actually delete duplicates\n');
}
const client = await pool.connect();
try {
const countResult = await client.query<CountResult>(`
WITH ranked AS (
SELECT church_id,
ROW_NUMBER() OVER (
PARTITION BY church_id, day_of_week, time, language
ORDER BY created_at ASC
) AS rn
FROM mass_schedules
WHERE is_active = true
)
SELECT COUNT(DISTINCT church_id) AS churches_with_dups,
COUNT(*) AS duplicate_rows
FROM ranked
WHERE rn > 1;
`);
const { churches_with_dups, duplicate_rows } = countResult.rows[0];
console.log(`Churches with duplicate schedules: ${churches_with_dups}`);
console.log(`Duplicate rows to ${dryRun ? 'delete' : 'delete'}: ${duplicate_rows}\n`);
if (!dryRun && Number(duplicate_rows) > 0) {
console.log('Deleting duplicates (keeping oldest by created_at)...');
const deleteResult = await client.query(`
WITH ranked AS (
SELECT id,
ROW_NUMBER() OVER (
PARTITION BY church_id, day_of_week, time, language
ORDER BY created_at ASC
) AS rn
FROM mass_schedules
WHERE is_active = true
)
DELETE FROM mass_schedules
WHERE id IN (SELECT id FROM ranked WHERE rn > 1);
`);
console.log(`Deleted ${deleteResult.rowCount} duplicate mass schedule rows.`);
}
} finally {
client.release();
await pool.end();
}
}
main().catch((err) => {
console.error('Fatal error:', err);
process.exit(1);
});