Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
624 lines
20 KiB
TypeScript
624 lines
20 KiB
TypeScript
#!/usr/bin/env tsx
|
|
/**
|
|
* Second-pass matching: analyze stored ChromaDB search results to find websites
|
|
* that the FreeSearch first pass missed.
|
|
*
|
|
* Usage:
|
|
* npx tsx scripts/match-search-results.ts --dry-run
|
|
* npx tsx scripts/match-search-results.ts --country IT --limit 100
|
|
* npx tsx scripts/match-search-results.ts --threshold 0.3
|
|
*
|
|
* Algorithm:
|
|
* 1. Get churches without websites that have been FreeSearch'd
|
|
* 2. Query ChromaDB search_results collection for semantically similar results
|
|
* 3. Cross-church matching: URLs from nearby churches may match
|
|
* 4. URL frequency analysis: URLs appearing for multiple churches in same area
|
|
* 5. Verify best candidates against page content
|
|
* 6. Update church.website if verified
|
|
*/
|
|
|
|
import dotenv from 'dotenv';
|
|
import path from 'path';
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
|
|
|
import { Pool } from 'pg';
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
|
import { PrismaClient } from '@prisma/client';
|
|
import { Collection } from 'chromadb';
|
|
import axios from 'axios';
|
|
import { getCollection, COLLECTION_NAMES } from '../src/chromadb/collections';
|
|
import { embedSingle } from '../src/chromadb/embeddings';
|
|
|
|
// Fresh DB connection
|
|
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
|
const adapter = new PrismaPg(pool);
|
|
const prisma = new PrismaClient({ adapter });
|
|
|
|
// --- Job Tracking ---
|
|
async function createOrResumeJob(args: string[]): Promise<string | null> {
|
|
const jobIdIndex = args.indexOf('--job-id');
|
|
if (jobIdIndex !== -1) {
|
|
const jobId = args[jobIdIndex + 1];
|
|
await prisma.backgroundJob.update({
|
|
where: { id: jobId },
|
|
data: { status: 'running', startedAt: new Date() },
|
|
});
|
|
return jobId;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
async function createNewJob(config: Record<string, unknown>): Promise<string> {
|
|
const job = await prisma.backgroundJob.create({
|
|
data: {
|
|
type: 'match-search-results',
|
|
status: 'running',
|
|
startedAt: new Date(),
|
|
config,
|
|
},
|
|
});
|
|
return job.id;
|
|
}
|
|
|
|
async function updateJobProgress(jobId: string, processed: number, found: number, total: number): Promise<void> {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: jobId },
|
|
data: { processed, succeeded: found, totalItems: total },
|
|
});
|
|
}
|
|
|
|
async function checkJobStopping(jobId: string): Promise<boolean> {
|
|
const job = await prisma.backgroundJob.findUnique({ where: { id: jobId } });
|
|
return job?.status === 'stopping';
|
|
}
|
|
|
|
async function completeJob(jobId: string, error?: string): Promise<void> {
|
|
await prisma.backgroundJob.update({
|
|
where: { id: jobId },
|
|
data: {
|
|
status: error ? 'failed' : 'completed',
|
|
error,
|
|
completedAt: new Date(),
|
|
},
|
|
});
|
|
}
|
|
|
|
// --- Types ---
|
|
|
|
interface ChurchRecord {
|
|
id: string;
|
|
name: string;
|
|
address: string | null;
|
|
city: string | null;
|
|
state: string | null;
|
|
country: string;
|
|
latitude: number;
|
|
longitude: number;
|
|
}
|
|
|
|
interface MatchStats {
|
|
processed: number;
|
|
matched: number;
|
|
noResults: number;
|
|
verifyFailed: number;
|
|
errors: number;
|
|
startTime: number;
|
|
}
|
|
|
|
// --- Helpers ---
|
|
|
|
let shuttingDown = false;
|
|
|
|
function log(msg: string) {
|
|
console.log(`[${new Date().toISOString()}] ${msg}`);
|
|
}
|
|
|
|
function logError(msg: string) {
|
|
console.error(`[${new Date().toISOString()}] ${msg}`);
|
|
}
|
|
|
|
function normalizeForMatch(str: string): string {
|
|
return str.toLowerCase()
|
|
.replace(/[^a-z0-9\s]/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
const CATHOLIC_KEYWORDS = [
|
|
'parish', 'church', 'catholic', 'parroquia', 'paroisse', 'pfarrei',
|
|
'parafia', 'paroquia', 'parrocchia', 'farnost', 'plebania', 'parochie',
|
|
'župnija', 'farnosť', 'iglesia', 'église', 'kirche', 'kościół',
|
|
'chiesa', 'kostel', 'templom', 'kerk',
|
|
];
|
|
|
|
const MASS_SCHEDULE_KEYWORDS = [
|
|
'mass schedule', 'mass times', 'worship schedule', 'worship times',
|
|
'service times', 'sunday mass', 'weekday mass',
|
|
'horario de misas', 'horarios de misa', 'horaires des messes',
|
|
'gottesdienst', 'gottesdienstzeiten', 'messzeiten',
|
|
'msze święte', 'godziny mszy', 'msze św',
|
|
'orari delle messe', 'orario messe',
|
|
'horário das missas',
|
|
];
|
|
|
|
const TOURISM_KEYWORDS = [
|
|
'tourism', 'turismo', 'tourisme', 'turisme', 'touristik', 'turistico',
|
|
'attractions', 'things to do', 'sightseeing', 'sehenswürdigkeiten',
|
|
'what to see', 'places to visit', 'travel guide', 'reiseführer',
|
|
'patrimoine', 'heritage trail', 'cultural heritage',
|
|
'punto de interés', 'point of interest', 'points of interest',
|
|
];
|
|
|
|
function getSignificantWords(name: string): string[] {
|
|
const stopWords = new Set([
|
|
'the', 'of', 'and', 'in', 'at', 'for', 'our', 'lady',
|
|
'st', 'saint', 'saints', 'san', 'sant', 'santa', 'santo', 'sacred',
|
|
'christ', 'jesus', 'mary', 'maria', 'king', 'lord', 'heart',
|
|
'cross', 'lady', 'queen', 'angel', 'angels', 'good', 'star',
|
|
'nome', 'pere', 'madre', 'notre', 'dame', 'bien',
|
|
'onze', 'lieve', 'vrouw', 'heer',
|
|
'rosa', 'paul', 'anne', 'jean', 'joan', 'luke', 'marc',
|
|
'rita', 'jose', 'leon', 'pius', 'roch', 'yves', 'ines',
|
|
'vita', 'fara', 'bona',
|
|
'cristo', 'fatima', 'lourdes', 'perpetuo', 'socorro', 'calvario',
|
|
'rosario', 'pilar', 'carmen', 'dolores', 'remedios', 'nieves',
|
|
'grotte', 'mission', 'sagrada', 'sagrado', 'familia',
|
|
'guadalupe', 'assumption', 'immaculate', 'perpetual', 'divine',
|
|
'knights', 'columbus',
|
|
'house', 'home', 'hall', 'center', 'centre', 'centro',
|
|
'deacon', 'priest', 'bishop', 'father', 'sister', 'brother',
|
|
'school', 'academy', 'college', 'seminary', 'rectory', 'retreat',
|
|
'church', 'parish', 'catholic', 'roman', 'holy', 'chapel',
|
|
'cathedral', 'basilica', 'shrine', 'convent', 'monastery',
|
|
'chapelle', 'eglise', 'église', 'paroisse', 'couvent', 'grotte',
|
|
'iglesia', 'parroquia', 'capilla', 'ermita', 'convento', 'basílica',
|
|
'kirche', 'kapelle', 'pfarrei', 'kloster',
|
|
'chiesa', 'parrocchia', 'cappella', 'oratorio',
|
|
'igreja', 'capela', 'paroquia',
|
|
'kościół', 'kaplica', 'parafia', 'droga',
|
|
'kostel', 'kaple', 'farnost', 'templom', 'kápolna',
|
|
'de', 'la', 'le', 'les', 'du', 'des', 'el', 'los', 'las',
|
|
'di', 'del', 'della', 'delle', 'degli',
|
|
'do', 'da', 'dos', 'das',
|
|
'und', 'der', 'die', 'das', 'von',
|
|
'nad', 'pod', 'przy',
|
|
]);
|
|
|
|
return normalizeForMatch(name)
|
|
.split(' ')
|
|
.filter(w => w.length >= 3 && !stopWords.has(w));
|
|
}
|
|
|
|
function stripHtml(html: string): string {
|
|
return html
|
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
.replace(/<[^>]+>/g, ' ')
|
|
.replace(/&[a-z]+;/gi, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.toLowerCase();
|
|
}
|
|
|
|
// --- URL Verification (same logic as enrich-with-freesearch.ts) ---
|
|
|
|
async function verifyUrl(url: string, church: ChurchRecord): Promise<boolean> {
|
|
try {
|
|
const response = await axios.get(url, {
|
|
timeout: 10000,
|
|
maxRedirects: 3,
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; NearestMass/1.0; +https://nearestmass.com)',
|
|
'Accept': 'text/html',
|
|
},
|
|
maxContentLength: 200000,
|
|
responseType: 'text',
|
|
});
|
|
|
|
if (typeof response.data !== 'string') return false;
|
|
|
|
const text = stripHtml(response.data);
|
|
const nameWords = getSignificantWords(church.name);
|
|
|
|
let nameMatches = 0;
|
|
for (const word of nameWords) {
|
|
if (text.includes(word)) nameMatches++;
|
|
}
|
|
|
|
let cityMatch = false;
|
|
if (church.city) {
|
|
const cityNorm = normalizeForMatch(church.city);
|
|
if (cityNorm.length > 2 && text.includes(cityNorm)) cityMatch = true;
|
|
}
|
|
|
|
let addressMatch = false;
|
|
if (church.address) {
|
|
const addrNorm = normalizeForMatch(church.address);
|
|
const addrWords = addrNorm.split(' ').filter(w => w.length >= 4 && !/^\d+$/.test(w));
|
|
let addrWordMatches = 0;
|
|
for (const w of addrWords) {
|
|
if (text.includes(w)) addrWordMatches++;
|
|
}
|
|
if (addrWordMatches >= 2) addressMatch = true;
|
|
}
|
|
|
|
let hasCatholicKeyword = false;
|
|
for (const kw of CATHOLIC_KEYWORDS) {
|
|
if (text.includes(kw)) { hasCatholicKeyword = true; break; }
|
|
}
|
|
|
|
let hasMassSchedule = false;
|
|
for (const kw of MASS_SCHEDULE_KEYWORDS) {
|
|
if (text.includes(kw)) { hasMassSchedule = true; break; }
|
|
}
|
|
|
|
let isTourismPage = false;
|
|
for (const kw of TOURISM_KEYWORDS) {
|
|
if (text.includes(kw)) { isTourismPage = true; break; }
|
|
}
|
|
|
|
let domainMatchesName = false;
|
|
try {
|
|
const hostname = new URL(url).hostname.toLowerCase();
|
|
for (const word of nameWords) {
|
|
if (word.length >= 4 && hostname.includes(word)) {
|
|
domainMatchesName = true;
|
|
break;
|
|
}
|
|
}
|
|
} catch { /* ignore */ }
|
|
|
|
if (isTourismPage && !hasMassSchedule) return false;
|
|
|
|
let isDeepUrl = false;
|
|
try {
|
|
const pathSegments = new URL(url).pathname.split('/').filter(Boolean);
|
|
isDeepUrl = pathSegments.length > 2;
|
|
} catch { /* ignore */ }
|
|
if (isDeepUrl && !domainMatchesName && !hasMassSchedule) return false;
|
|
|
|
const hasCity = !!(church.city && church.city.trim());
|
|
|
|
if (hasMassSchedule && nameMatches >= 1) return true;
|
|
if (domainMatchesName && nameMatches >= 1 && hasCatholicKeyword) return true;
|
|
|
|
if (hasCity) {
|
|
if (nameMatches >= 2) return true;
|
|
if (nameMatches >= 1 && cityMatch) return true;
|
|
if (nameMatches >= 1 && addressMatch) return true;
|
|
}
|
|
|
|
if (!hasCity) {
|
|
if (nameMatches >= 1 && addressMatch) return true;
|
|
if (nameMatches >= 3) return true;
|
|
}
|
|
|
|
return false;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// --- ChromaDB Querying ---
|
|
|
|
interface ChromaResult {
|
|
id: string;
|
|
url: string;
|
|
title: string;
|
|
score: number;
|
|
distance: number;
|
|
churchId: string;
|
|
churchName: string;
|
|
churchCity: string;
|
|
verified?: boolean;
|
|
}
|
|
|
|
async function findCandidatesForChurch(
|
|
church: ChurchRecord,
|
|
collection: Collection,
|
|
threshold: number,
|
|
nResults: number
|
|
): Promise<ChromaResult[]> {
|
|
// Build identity text for semantic search
|
|
const identityText = `${church.name} ${church.address || ''} ${church.city || ''} ${church.country}`.trim();
|
|
const queryEmbedding = await embedSingle(identityText);
|
|
|
|
const results = await collection.query({
|
|
queryEmbeddings: [queryEmbedding],
|
|
nResults,
|
|
where: { churchCountry: church.country },
|
|
});
|
|
|
|
if (!results.ids[0]) return [];
|
|
|
|
return results.ids[0]
|
|
.map((id, i) => {
|
|
const metadata = results.metadatas[0][i] as Record<string, unknown>;
|
|
return {
|
|
id,
|
|
url: (metadata.resultUrl as string) || '',
|
|
title: (metadata.resultTitle as string) || '',
|
|
score: (metadata.score as number) || 0,
|
|
distance: results.distances?.[0]?.[i] ?? 1,
|
|
churchId: (metadata.churchId as string) || '',
|
|
churchName: (metadata.churchName as string) || '',
|
|
churchCity: (metadata.churchCity as string) || '',
|
|
verified: (metadata.verified as boolean) || false,
|
|
};
|
|
})
|
|
.filter(r => r.distance <= threshold && r.url);
|
|
}
|
|
|
|
function deduplicateByUrl(results: ChromaResult[]): ChromaResult[] {
|
|
const seen = new Map<string, ChromaResult>();
|
|
for (const r of results) {
|
|
const existing = seen.get(r.url);
|
|
if (!existing || r.distance < existing.distance) {
|
|
seen.set(r.url, r);
|
|
}
|
|
}
|
|
return [...seen.values()].sort((a, b) => a.distance - b.distance);
|
|
}
|
|
|
|
// --- Main Processing ---
|
|
|
|
async function processChurch(
|
|
church: ChurchRecord,
|
|
collection: Collection,
|
|
stats: MatchStats,
|
|
threshold: number,
|
|
dryRun: boolean
|
|
): Promise<void> {
|
|
const label = `${church.name} (${church.city || 'unknown'}, ${church.country})`;
|
|
|
|
try {
|
|
// 1. Semantic search for similar results in ChromaDB
|
|
const candidates = await findCandidatesForChurch(church, collection, threshold, 20);
|
|
|
|
if (candidates.length === 0) {
|
|
log(` - ${label} => no ChromaDB results within threshold`);
|
|
stats.noResults++;
|
|
return;
|
|
}
|
|
|
|
// 2. Separate results: own church vs cross-church
|
|
const ownResults = candidates.filter(r => r.churchId === church.id);
|
|
const crossResults = candidates.filter(r => r.churchId !== church.id);
|
|
|
|
// 3. URL frequency: URLs appearing for multiple churches are likely real parish/diocese sites
|
|
const urlFrequency = new Map<string, number>();
|
|
for (const r of candidates) {
|
|
urlFrequency.set(r.url, (urlFrequency.get(r.url) || 0) + 1);
|
|
}
|
|
|
|
// 4. Prioritize: already-verified URLs from other churches, then high-frequency URLs,
|
|
// then own-church results, then cross-church results
|
|
const verifiedFromOthers = crossResults.filter(r => r.verified);
|
|
const highFreqUrls = [...urlFrequency.entries()]
|
|
.filter(([, count]) => count >= 2)
|
|
.map(([url]) => url);
|
|
|
|
// Build candidate list in priority order
|
|
const urlsToTry: string[] = [];
|
|
const addUrl = (url: string) => {
|
|
if (!urlsToTry.includes(url)) urlsToTry.push(url);
|
|
};
|
|
|
|
// Verified URLs from nearby churches (highest priority)
|
|
for (const r of verifiedFromOthers) addUrl(r.url);
|
|
|
|
// High-frequency URLs (appear in results for multiple churches)
|
|
for (const url of highFreqUrls) addUrl(url);
|
|
|
|
// Own church results by distance (closest semantic match first)
|
|
const dedupedOwn = deduplicateByUrl(ownResults);
|
|
for (const r of dedupedOwn) addUrl(r.url);
|
|
|
|
// Cross-church results from same city
|
|
const sameCityCross = crossResults.filter(r =>
|
|
church.city && r.churchCity &&
|
|
normalizeForMatch(r.churchCity) === normalizeForMatch(church.city)
|
|
);
|
|
const dedupedCross = deduplicateByUrl(sameCityCross);
|
|
for (const r of dedupedCross) addUrl(r.url);
|
|
|
|
// Limit to top 5 candidates
|
|
const topUrls = urlsToTry.slice(0, 5);
|
|
|
|
log(` ? ${label} => ${candidates.length} results, trying ${topUrls.length} candidates`);
|
|
|
|
// 5. Verify each candidate
|
|
let verifiedUrl: string | null = null;
|
|
for (const url of topUrls) {
|
|
const ok = await verifyUrl(url, church);
|
|
if (ok) {
|
|
verifiedUrl = url;
|
|
break;
|
|
} else {
|
|
stats.verifyFailed++;
|
|
}
|
|
}
|
|
|
|
if (verifiedUrl) {
|
|
log(` + ${label} => ${verifiedUrl}`);
|
|
stats.matched++;
|
|
if (!dryRun) {
|
|
await prisma.church.update({
|
|
where: { id: church.id },
|
|
data: {
|
|
website: verifiedUrl,
|
|
hasWebsite: true,
|
|
},
|
|
});
|
|
// Mark in ChromaDB (update replaces metadata, so include all fields)
|
|
try {
|
|
const matchingResult = candidates.find(r => r.url === verifiedUrl);
|
|
if (matchingResult) {
|
|
await collection.update({
|
|
ids: [matchingResult.id],
|
|
metadatas: [{
|
|
churchId: matchingResult.churchId,
|
|
churchName: matchingResult.churchName,
|
|
churchCity: matchingResult.churchCity,
|
|
churchCountry: church.country,
|
|
searchQuery: '',
|
|
resultUrl: verifiedUrl,
|
|
resultTitle: matchingResult.title || '',
|
|
score: matchingResult.score || 0,
|
|
verified: true,
|
|
}],
|
|
});
|
|
}
|
|
} catch { /* ignore */ }
|
|
}
|
|
} else {
|
|
log(` ~ ${label} => ${topUrls.length} candidates failed verification`);
|
|
stats.noResults++;
|
|
}
|
|
} catch (error: any) {
|
|
stats.errors++;
|
|
logError(` ! ${label} => error: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
// --- Main ---
|
|
|
|
async function main() {
|
|
const args = process.argv.slice(2);
|
|
const countryIndex = args.indexOf('--country');
|
|
const limitIndex = args.indexOf('--limit');
|
|
const thresholdIndex = args.indexOf('--threshold');
|
|
const dryRun = args.includes('--dry-run');
|
|
|
|
const countryCode = countryIndex !== -1 ? args[countryIndex + 1] : undefined;
|
|
const limit = limitIndex !== -1 ? parseInt(args[limitIndex + 1]) : 500;
|
|
const threshold = thresholdIndex !== -1 ? parseFloat(args[thresholdIndex + 1]) : 0.4;
|
|
|
|
// Graceful shutdown
|
|
process.on('SIGTERM', () => { log('Received SIGTERM'); shuttingDown = true; });
|
|
process.on('SIGINT', () => { log('Received SIGINT'); shuttingDown = true; });
|
|
|
|
log('============================================================');
|
|
log('Second-Pass Search Result Matching');
|
|
log('============================================================');
|
|
log(`Country: ${countryCode || 'All'}`);
|
|
log(`Limit: ${limit}`);
|
|
log(`Threshold: ${threshold}`);
|
|
log(`Dry run: ${dryRun ? 'Yes' : 'No'}`);
|
|
log('============================================================');
|
|
|
|
// Connect to ChromaDB
|
|
let collection: Collection;
|
|
try {
|
|
collection = await getCollection(COLLECTION_NAMES.SEARCH_RESULTS);
|
|
log('ChromaDB search_results collection connected');
|
|
} catch (e: any) {
|
|
logError(`ChromaDB unavailable: ${e.message}`);
|
|
logError('This script requires ChromaDB. Make sure it is running.');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Check collection has data
|
|
const count = await collection.count();
|
|
log(`ChromaDB search_results: ${count} entries`);
|
|
if (count === 0) {
|
|
log('No search results stored yet. Run enrich-with-freesearch.ts first.');
|
|
process.exit(0);
|
|
}
|
|
|
|
// Job tracking
|
|
let jobId = await createOrResumeJob(args);
|
|
if (!jobId) {
|
|
jobId = await createNewJob({ countryCode, limit, threshold, dryRun });
|
|
}
|
|
log(`Job ID: ${jobId}`);
|
|
|
|
// Get churches without websites that have been FreeSearch'd
|
|
const whereClause: Record<string, unknown> = {
|
|
source: 'osm',
|
|
website: null,
|
|
freeSearchedAt: { not: null },
|
|
};
|
|
if (countryCode) {
|
|
(whereClause as any).country = countryCode;
|
|
}
|
|
|
|
const churches = await prisma.church.findMany({
|
|
where: whereClause as any,
|
|
select: {
|
|
id: true, name: true, address: true, city: true, state: true,
|
|
country: true, latitude: true, longitude: true,
|
|
},
|
|
take: limit,
|
|
orderBy: { updatedAt: 'asc' },
|
|
});
|
|
|
|
log(`Found ${churches.length} churches without websites (already FreeSearch'd)`);
|
|
|
|
const stats: MatchStats = {
|
|
processed: 0,
|
|
matched: 0,
|
|
noResults: 0,
|
|
verifyFailed: 0,
|
|
errors: 0,
|
|
startTime: Date.now(),
|
|
};
|
|
|
|
for (const church of churches) {
|
|
if (shuttingDown) break;
|
|
stats.processed++;
|
|
|
|
await processChurch(church, collection, stats, threshold, dryRun);
|
|
|
|
// Job tracking every 10 items
|
|
if (jobId && stats.processed % 10 === 0) {
|
|
await updateJobProgress(jobId, stats.processed, stats.matched, churches.length);
|
|
const stopping = await checkJobStopping(jobId);
|
|
if (stopping) {
|
|
log('Job stop requested via admin dashboard.');
|
|
shuttingDown = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Progress logging every 50 items
|
|
if (stats.processed % 50 === 0) {
|
|
const elapsed = (Date.now() - stats.startTime) / 1000;
|
|
const rate = Math.round((stats.processed / elapsed) * 3600);
|
|
log(`Progress: ${stats.processed}/${churches.length} processed, ${stats.matched} matched, ${stats.noResults} no match, ${stats.errors} errors (~${rate}/hour)`);
|
|
}
|
|
}
|
|
|
|
// Complete job
|
|
if (jobId) {
|
|
await updateJobProgress(jobId, stats.processed, stats.matched, churches.length);
|
|
await completeJob(jobId);
|
|
}
|
|
|
|
// Print summary
|
|
const elapsed = ((Date.now() - stats.startTime) / 1000).toFixed(1);
|
|
const matchRate = stats.processed > 0
|
|
? ((stats.matched / stats.processed) * 100).toFixed(1)
|
|
: '0.0';
|
|
|
|
log('');
|
|
log('============================================================');
|
|
log('Second-Pass Matching Summary');
|
|
log('============================================================');
|
|
log(`Churches processed: ${stats.processed}`);
|
|
log(`Websites matched: ${stats.matched}`);
|
|
log(`No match found: ${stats.noResults}`);
|
|
log(`Verify rejected: ${stats.verifyFailed}`);
|
|
log(`Errors: ${stats.errors}`);
|
|
log(`Match rate: ${matchRate}%`);
|
|
log(`Elapsed: ${elapsed}s`);
|
|
log('============================================================');
|
|
|
|
await prisma.$disconnect();
|
|
await pool.end();
|
|
}
|
|
|
|
main().catch((error) => {
|
|
logError(`Fatal error: ${error.message}`);
|
|
process.exit(1);
|
|
});
|