fix: clean up church-matcher types and add HK OSM bounding box

- Remove discovermassId/buscarmisasNetworkId from findDuplicateChurch match
  passes (importers now do their own pre-check dedup); restore as optional
  fields on ExistingChurch to keep type/runtime in sync
- Add HK bounding box to COUNTRY_BOUNDING_BOXES; fix silent 0-result
  fallback when country query returns empty from mirror server
- discovermass importer: add --limit flag and skip-already-imported
  pre-check using importedSlugs set
- Import scripts: remove discovermassId from ExistingChurch select/stubs
  (field not needed in shared matcher context)
- Schema: reorder discovermassId/kerknetId/gottesdienstzeitenId fields

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
albertfj114
2026-04-01 22:20:45 -04:00
parent 3bd4d2e2f9
commit 033f805965
21 changed files with 499 additions and 64 deletions

View File

@@ -94,6 +94,7 @@ interface CLIArgs {
all: boolean;
dryRun: boolean;
resumeFrom?: number;
limit?: number;
jobId?: string;
}
@@ -507,6 +508,7 @@ function parseCLIArgs(): CLIArgs {
case '--all': result.all = true; break;
case '--dry-run': result.dryRun = true; break;
case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break;
case '--limit': result.limit = parseInt(args[++i], 10); break;
case '--job-id': result.jobId = args[++i]; break;
}
}
@@ -540,14 +542,25 @@ async function main() {
try {
const urls = await getAllChurchUrls();
const existingChurches = await loadExistingChurches();
// Skip already-imported churches — check discovermassId set in DB
const importedSlugs = new Set(
existingChurches.filter(c => c.discovermassId).map(c => c.discovermassId!)
);
// Apply --resume-from first, then filter to unimported, then apply --limit
const startIdx = args.resumeFrom ?? 0;
const churchUrls = urls.slice(startIdx);
console.log(`\nProcessing ${churchUrls.length} churches (starting from index ${startIdx})...\n`);
const candidateUrls = urls.slice(startIdx).filter(url => {
const slug = url.replace('https://discovermass.com/church/', '').replace(/\/$/, '');
return !importedSlugs.has(slug);
});
const churchUrls = args.limit ? candidateUrls.slice(0, args.limit) : candidateUrls;
console.log(`\nSitemap total: ${urls.length} | Already imported: ${importedSlugs.size} | This run: ${churchUrls.length}${args.limit ? ` (limit ${args.limit})` : ''}\n`);
for (let i = 0; i < churchUrls.length; i++) {
const url = churchUrls[i];
const overallIdx = startIdx + i;
console.log(`[${overallIdx + 1}/${urls.length}] ${url}`);
console.log(`[${i + 1}/${churchUrls.length}] ${url}`);
await processChurch(url, existingChurches, args, stats);
if (i < churchUrls.length - 1) {
await sleep(REQUEST_DELAY_MS);