Compare commits
3 Commits
master
...
6d1c7eb3c5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d1c7eb3c5 | ||
| 206b64b9b8 | |||
|
|
4609fd97db |
@@ -46,6 +46,7 @@ model Church {
|
||||
gottesdienstzeitenId String? @unique @map("gottesdienstzeiten_id")
|
||||
kerknetId String? @unique @map("kerknet_id")
|
||||
buscarmisasNetworkId String? @unique @map("buscarmisas_network_id")
|
||||
gcatholicId String? @unique @map("gcatholic_id")
|
||||
claimed Boolean @default(false)
|
||||
claimedAt DateTime? @map("claimed_at")
|
||||
lastScrapedAt DateTime? @map("last_scraped_at")
|
||||
@@ -59,6 +60,7 @@ model Church {
|
||||
googleSearchedAt DateTime? @map("google_searched_at") // When Google Places enrichment was attempted
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
parochiaSlug String? @map("parochia_slug")
|
||||
|
||||
dioceseId String? @map("diocese_id")
|
||||
|
||||
@@ -99,6 +101,7 @@ model Church {
|
||||
@@index([gottesdienstzeitenId])
|
||||
@@index([kerknetId])
|
||||
@@index([buscarmisasNetworkId])
|
||||
@@index([gcatholicId])
|
||||
@@index([dioceseId])
|
||||
@@index([claimedByUserId])
|
||||
@@map("churches")
|
||||
|
||||
@@ -3,12 +3,13 @@ import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
interface CountResult {
|
||||
churches_with_dups: string;
|
||||
duplicate_rows: string;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const dryRun = !process.argv.includes('--execute');
|
||||
@@ -17,44 +18,52 @@ async function main() {
|
||||
console.log('DRY RUN - pass --execute to actually delete duplicates\n');
|
||||
}
|
||||
|
||||
const churches = await prisma.church.findMany({
|
||||
where: { massSchedules: { some: { isActive: true } } },
|
||||
include: { massSchedules: { where: { isActive: true }, orderBy: { createdAt: 'asc' } } },
|
||||
});
|
||||
const client = await pool.connect();
|
||||
|
||||
let churchesFixed = 0;
|
||||
let rowsDeleted = 0;
|
||||
try {
|
||||
const countResult = await client.query<CountResult>(`
|
||||
WITH ranked AS (
|
||||
SELECT church_id,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY church_id, day_of_week, time, language
|
||||
ORDER BY created_at ASC
|
||||
) AS rn
|
||||
FROM mass_schedules
|
||||
WHERE is_active = true
|
||||
)
|
||||
SELECT COUNT(DISTINCT church_id) AS churches_with_dups,
|
||||
COUNT(*) AS duplicate_rows
|
||||
FROM ranked
|
||||
WHERE rn > 1;
|
||||
`);
|
||||
|
||||
for (const church of churches) {
|
||||
const seen = new Map<string, string>();
|
||||
const toDelete: string[] = [];
|
||||
const { churches_with_dups, duplicate_rows } = countResult.rows[0];
|
||||
console.log(`Churches with duplicate schedules: ${churches_with_dups}`);
|
||||
console.log(`Duplicate rows to ${dryRun ? 'delete' : 'delete'}: ${duplicate_rows}\n`);
|
||||
|
||||
for (const m of church.massSchedules) {
|
||||
const key = `${m.dayOfWeek}:${m.time}:${m.language}`;
|
||||
if (seen.has(key)) {
|
||||
toDelete.push(m.id);
|
||||
} else {
|
||||
seen.set(key, m.id);
|
||||
if (!dryRun && Number(duplicate_rows) > 0) {
|
||||
console.log('Deleting duplicates (keeping oldest by created_at)...');
|
||||
|
||||
const deleteResult = await client.query(`
|
||||
WITH ranked AS (
|
||||
SELECT id,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY church_id, day_of_week, time, language
|
||||
ORDER BY created_at ASC
|
||||
) AS rn
|
||||
FROM mass_schedules
|
||||
WHERE is_active = true
|
||||
)
|
||||
DELETE FROM mass_schedules
|
||||
WHERE id IN (SELECT id FROM ranked WHERE rn > 1);
|
||||
`);
|
||||
|
||||
console.log(`Deleted ${deleteResult.rowCount} duplicate mass schedule rows.`);
|
||||
}
|
||||
}
|
||||
|
||||
if (toDelete.length > 0) {
|
||||
churchesFixed++;
|
||||
rowsDeleted += toDelete.length;
|
||||
|
||||
if (!dryRun) {
|
||||
await prisma.massSchedule.deleteMany({
|
||||
where: { id: { in: toDelete } },
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Churches with duplicates: ${churchesFixed}`);
|
||||
console.log(`Duplicate rows ${dryRun ? 'found' : 'deleted'}: ${rowsDeleted}`);
|
||||
|
||||
await prisma.$disconnect();
|
||||
} finally {
|
||||
client.release();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
|
||||
@@ -59,6 +59,13 @@ const PIPELINE_GROUPS: PipelineGroup[] = [
|
||||
{ name: 'masstimes-api-import', type: 'masstimes-api-import', config: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'geocode-enrichment',
|
||||
mode: 'sequential',
|
||||
phases: [
|
||||
{ name: 'forward-geocode', type: 'forward-geocode-enrichment', config: { limit: 500 } },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'scrapers-batch-1',
|
||||
mode: 'parallel',
|
||||
@@ -138,6 +145,12 @@ function getJobCommand(type: string, language?: string | null, config?: Record<s
|
||||
if (config?.country) args.push('--country', String(config.country));
|
||||
return { command: 'npx', args };
|
||||
}
|
||||
case 'forward-geocode-enrichment': {
|
||||
const args = ['tsx', 'scripts/enrich-with-forward-geocode.ts'];
|
||||
if (limit) args.push('--limit', String(limit));
|
||||
if (config?.country) args.push('--country', String(config.country));
|
||||
return { command: 'npx', args };
|
||||
}
|
||||
case 'match-search-results': {
|
||||
const args = ['tsx', 'scripts/match-search-results.ts'];
|
||||
if (limit) args.push('--limit', String(limit));
|
||||
|
||||
@@ -103,7 +103,8 @@ async function main() {
|
||||
{ phone: { not: null } },
|
||||
{ googlePlaceId: { not: null } },
|
||||
{ massSchedules: { some: {} } },
|
||||
]
|
||||
],
|
||||
NOT: { latitude: 0, longitude: 0 },
|
||||
};
|
||||
|
||||
// Add incremental filter if applicable
|
||||
|
||||
Reference in New Issue
Block a user