fix: clean up church-matcher types and add HK OSM bounding box
- Remove discovermassId/buscarmisasNetworkId from findDuplicateChurch match passes (importers now do their own pre-check dedup); restore as optional fields on ExistingChurch to keep type/runtime in sync - Add HK bounding box to COUNTRY_BOUNDING_BOXES; fix silent 0-result fallback when country query returns empty from mirror server - discovermass importer: add --limit flag and skip-already-imported pre-check using importedSlugs set - Import scripts: remove discovermassId from ExistingChurch select/stubs (field not needed in shared matcher context) - Schema: reorder discovermassId/kerknetId/gottesdienstzeitenId fields Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -178,7 +178,6 @@ async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: s
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -259,7 +258,6 @@ async function importFromBaidu(dryRun: boolean, resumeFromCell: number, jobId: s
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'baidu',
|
||||
website: baiduChurch.website || null,
|
||||
phone: baiduChurch.phone || null,
|
||||
|
||||
@@ -287,7 +287,6 @@ async function loadExistingCzechChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -415,7 +414,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'bohosluzby',
|
||||
website: null,
|
||||
phone: null,
|
||||
|
||||
@@ -94,6 +94,7 @@ interface CLIArgs {
|
||||
all: boolean;
|
||||
dryRun: boolean;
|
||||
resumeFrom?: number;
|
||||
limit?: number;
|
||||
jobId?: string;
|
||||
}
|
||||
|
||||
@@ -507,6 +508,7 @@ function parseCLIArgs(): CLIArgs {
|
||||
case '--all': result.all = true; break;
|
||||
case '--dry-run': result.dryRun = true; break;
|
||||
case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break;
|
||||
case '--limit': result.limit = parseInt(args[++i], 10); break;
|
||||
case '--job-id': result.jobId = args[++i]; break;
|
||||
}
|
||||
}
|
||||
@@ -540,14 +542,25 @@ async function main() {
|
||||
try {
|
||||
const urls = await getAllChurchUrls();
|
||||
const existingChurches = await loadExistingChurches();
|
||||
|
||||
// Skip already-imported churches — check discovermassId set in DB
|
||||
const importedSlugs = new Set(
|
||||
existingChurches.filter(c => c.discovermassId).map(c => c.discovermassId!)
|
||||
);
|
||||
|
||||
// Apply --resume-from first, then filter to unimported, then apply --limit
|
||||
const startIdx = args.resumeFrom ?? 0;
|
||||
const churchUrls = urls.slice(startIdx);
|
||||
console.log(`\nProcessing ${churchUrls.length} churches (starting from index ${startIdx})...\n`);
|
||||
const candidateUrls = urls.slice(startIdx).filter(url => {
|
||||
const slug = url.replace('https://discovermass.com/church/', '').replace(/\/$/, '');
|
||||
return !importedSlugs.has(slug);
|
||||
});
|
||||
const churchUrls = args.limit ? candidateUrls.slice(0, args.limit) : candidateUrls;
|
||||
|
||||
console.log(`\nSitemap total: ${urls.length} | Already imported: ${importedSlugs.size} | This run: ${churchUrls.length}${args.limit ? ` (limit ${args.limit})` : ''}\n`);
|
||||
|
||||
for (let i = 0; i < churchUrls.length; i++) {
|
||||
const url = churchUrls[i];
|
||||
const overallIdx = startIdx + i;
|
||||
console.log(`[${overallIdx + 1}/${urls.length}] ${url}`);
|
||||
console.log(`[${i + 1}/${churchUrls.length}] ${url}`);
|
||||
await processChurch(url, existingChurches, args, stats);
|
||||
if (i < churchUrls.length - 1) {
|
||||
await sleep(REQUEST_DELAY_MS);
|
||||
|
||||
@@ -401,7 +401,6 @@ async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -516,7 +515,6 @@ async function importChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'gcatholic',
|
||||
website: church.website || null,
|
||||
phone: church.phone || null,
|
||||
|
||||
@@ -316,7 +316,6 @@ async function loadExistingGermanChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -479,7 +478,6 @@ async function processDiocese(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: gdzId,
|
||||
discovermassId: null,
|
||||
source: 'gottesdienstzeiten',
|
||||
website: church.website,
|
||||
phone: church.phone,
|
||||
|
||||
@@ -570,7 +570,6 @@ async function loadExistingSpanishChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -783,7 +782,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'horariosmisas',
|
||||
website: parsed.website,
|
||||
phone: parsed.phone,
|
||||
|
||||
@@ -343,7 +343,6 @@ async function loadExistingBelgianChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -490,8 +489,6 @@ async function processChurch(
|
||||
bohosluzbyId: null,
|
||||
miserendId: null,
|
||||
kerknetId,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'kerknet',
|
||||
website: church.website,
|
||||
phone: null,
|
||||
|
||||
@@ -290,7 +290,6 @@ async function loadExistingPhilippineChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -465,7 +464,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'mass-schedules-ph',
|
||||
website: null,
|
||||
phone: parsed.phone,
|
||||
|
||||
@@ -398,7 +398,6 @@ async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -596,7 +595,7 @@ async function main() {
|
||||
orarimesseId: null, massSchedulesPhId: null,
|
||||
philmassId: null, horariosMisasId: null,
|
||||
mszeInfoId: null, weekdayMassesId: null,
|
||||
messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null,
|
||||
messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null,
|
||||
source: 'masstimes', website: mc.url?.trim() || null,
|
||||
phone: mc.phone_number?.trim() || null, address, country,
|
||||
});
|
||||
|
||||
@@ -326,7 +326,6 @@ async function loadExistingFrenchChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -480,7 +479,6 @@ async function processDiocese(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'messes-info',
|
||||
website: null,
|
||||
phone: null,
|
||||
|
||||
@@ -240,7 +240,6 @@ async function loadExistingChurches(countryCodes: string[]): Promise<ExistingChu
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -366,7 +365,6 @@ async function processChurch(
|
||||
miserendId,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'miserend',
|
||||
website: null,
|
||||
phone: null,
|
||||
|
||||
@@ -367,7 +367,6 @@ async function loadExistingPolishChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -538,7 +537,6 @@ async function processChurch(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'msze-info',
|
||||
website: parsed.website,
|
||||
phone: parsed.phone,
|
||||
|
||||
@@ -283,7 +283,6 @@ async function loadExistingItalianChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -425,7 +424,6 @@ async function processChurchesForDiocese(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'orarimesse',
|
||||
website: church.sito || null,
|
||||
phone: null,
|
||||
|
||||
@@ -204,7 +204,6 @@ async function importFromOSM(countryCode: string, dryRun: boolean = false): Prom
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -380,7 +379,6 @@ async function importFromOSM(countryCode: string, dryRun: boolean = false): Prom
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'osm',
|
||||
website: osmChurch.website || null,
|
||||
phone: osmChurch.phone || null,
|
||||
|
||||
@@ -152,7 +152,6 @@ async function importFromRegion(countryCode: string, regionName: string, dryRun:
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -256,7 +255,6 @@ async function importFromRegion(countryCode: string, regionName: string, dryRun:
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'osm',
|
||||
website: osmChurch.website || null,
|
||||
phone: osmChurch.phone || null,
|
||||
|
||||
@@ -301,7 +301,6 @@ async function loadExistingPhilippineChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
|
||||
@@ -822,7 +822,6 @@ async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
@@ -982,7 +981,6 @@ async function importAreaBlocks(
|
||||
miserendId: null,
|
||||
kerknetId: null,
|
||||
gottesdienstzeitenId: null,
|
||||
discovermassId: null,
|
||||
source: 'weekdaymasses',
|
||||
website: church.website,
|
||||
phone: church.phone,
|
||||
|
||||
Reference in New Issue
Block a user