feat: add discovermassId to church-matcher ExistingChurch and ChurchCandidate
Add discovermassId field to ExistingChurch interface and ChurchCandidate type, insert a dedicated matching pass in findDuplicateChurch, and update all 15 importer push blocks plus 16 loadExistingChurches select queries to include the new field. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
672
scripts/import-masstimes-api.ts
Normal file
672
scripts/import-masstimes-api.ts
Normal file
@@ -0,0 +1,672 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* Import Catholic churches and mass schedules globally from masstimes.org API
|
||||
*
|
||||
* masstimes.org has ~121,000 churches worldwide. This script queries their
|
||||
* geo-search API with a grid of coordinates covering world landmass, then
|
||||
* deduplicates and imports the results.
|
||||
*
|
||||
* API: GET https://masstimes.org/Churchs/?lat={lat}&long={lng}&pg={page}
|
||||
* - Requires Referer header
|
||||
* - Returns 30 results per page within 100-mile (~160km) radius
|
||||
* - Paginate until empty array
|
||||
*
|
||||
* Grid strategy:
|
||||
* - 2.5° latitude spacing (~278km), longitude adjusted for latitude
|
||||
* - Continental bounding boxes to skip oceans
|
||||
* - 100-mile radius means ~322km diameter — 2.5° spacing ensures overlap
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scripts/import-masstimes-api.ts --all
|
||||
* npx tsx scripts/import-masstimes-api.ts --all --dry-run
|
||||
* npx tsx scripts/import-masstimes-api.ts --region europe
|
||||
* npx tsx scripts/import-masstimes-api.ts --all --skip-us
|
||||
* npx tsx scripts/import-masstimes-api.ts --all --job-id {uuid}
|
||||
*/
|
||||
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
||||
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
||||
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||||
const pool = new Pool({
|
||||
connectionString: dbUrl,
|
||||
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
||||
});
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
import { findDuplicateChurch } from '../src/lib/church-matcher';
|
||||
import type { ExistingChurch } from '../src/lib/church-matcher';
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const API_BASE = 'https://masstimes.org/Churchs/';
|
||||
const REFERER = 'https://masstimes.org/map';
|
||||
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
|
||||
const RATE_LIMIT_MS = 2000; // 2 seconds between requests — respectful rate
|
||||
const PAGE_SIZE = 30;
|
||||
const LAT_SPACING = 2.5; // degrees (~278km)
|
||||
const TARGET_LNG_SPACING_KM = 250; // target spacing in km
|
||||
|
||||
// Country name → ISO code mapping for masstimes country names
|
||||
const COUNTRY_CODE_MAP: Record<string, string> = {
|
||||
'united states': 'US', 'canada': 'CA', 'mexico': 'MX',
|
||||
'united kingdom': 'GB', 'ireland': 'IE', 'france': 'FR', 'germany': 'DE',
|
||||
'spain': 'ES', 'italy': 'IT', 'portugal': 'PT', 'netherlands': 'NL',
|
||||
'belgium': 'BE', 'luxembourg': 'LU', 'switzerland': 'CH', 'austria': 'AT',
|
||||
'poland': 'PL', 'czech republic': 'CZ', 'czechia': 'CZ', 'slovakia': 'SK',
|
||||
'hungary': 'HU', 'croatia': 'HR', 'slovenia': 'SI', 'romania': 'RO',
|
||||
'bulgaria': 'BG', 'serbia': 'RS', 'bosnia and herzegovina': 'BA',
|
||||
'montenegro': 'ME', 'north macedonia': 'MK', 'albania': 'AL', 'kosovo': 'XK',
|
||||
'greece': 'GR', 'cyprus': 'CY', 'malta': 'MT', 'denmark': 'DK',
|
||||
'sweden': 'SE', 'norway': 'NO', 'finland': 'FI', 'iceland': 'IS',
|
||||
'estonia': 'EE', 'latvia': 'LV', 'lithuania': 'LT',
|
||||
'ukraine': 'UA', 'russia': 'RU', 'belarus': 'BY', 'moldova': 'MD',
|
||||
'georgia': 'GE', 'armenia': 'AM', 'azerbaijan': 'AZ',
|
||||
'turkey': 'TR', 'israel': 'IL', 'jordan': 'JO', 'lebanon': 'LB',
|
||||
'egypt': 'EG', 'morocco': 'MA', 'tunisia': 'TN', 'algeria': 'DZ',
|
||||
'india': 'IN', 'sri lanka': 'LK', 'pakistan': 'PK', 'bangladesh': 'BD',
|
||||
'nepal': 'NP', 'myanmar': 'MM', 'thailand': 'TH', 'vietnam': 'VN',
|
||||
'cambodia': 'KH', 'laos': 'LA', 'malaysia': 'MY', 'singapore': 'SG',
|
||||
'indonesia': 'ID', 'philippines': 'PH', 'china': 'CN', 'japan': 'JP',
|
||||
'south korea': 'KR', 'korea, south': 'KR', 'taiwan': 'TW',
|
||||
'hong kong': 'HK', 'macau': 'MO', 'mongolia': 'MN',
|
||||
'australia': 'AU', 'new zealand': 'NZ', 'fiji': 'FJ',
|
||||
'papua new guinea': 'PG', 'samoa': 'WS', 'tonga': 'TO', 'guam': 'GU',
|
||||
'nigeria': 'NG', 'ghana': 'GH', 'kenya': 'KE', 'tanzania': 'TZ',
|
||||
'uganda': 'UG', 'south africa': 'ZA', 'cameroon': 'CM', 'senegal': 'SN',
|
||||
'ethiopia': 'ET', 'madagascar': 'MG', 'mozambique': 'MZ',
|
||||
'zambia': 'ZM', 'zimbabwe': 'ZW', 'malawi': 'MW', 'rwanda': 'RW',
|
||||
'burundi': 'BI', 'congo, democratic republic of the': 'CD',
|
||||
'congo, republic of the': 'CG', "côte d'ivoire": 'CI', 'ivory coast': 'CI',
|
||||
'burkina faso': 'BF', 'mali': 'ML', 'niger': 'NE', 'chad': 'TD',
|
||||
'central african republic': 'CF', 'gabon': 'GA', 'equatorial guinea': 'GQ',
|
||||
'angola': 'AO', 'namibia': 'NA', 'botswana': 'BW', 'lesotho': 'LS',
|
||||
'eswatini': 'SZ', 'swaziland': 'SZ', 'mauritius': 'MU',
|
||||
'brazil': 'BR', 'argentina': 'AR', 'colombia': 'CO', 'peru': 'PE',
|
||||
'chile': 'CL', 'venezuela': 'VE', 'ecuador': 'EC', 'bolivia': 'BO',
|
||||
'paraguay': 'PY', 'uruguay': 'UY', 'guyana': 'GY', 'suriname': 'SR',
|
||||
'trinidad and tobago': 'TT', 'jamaica': 'JM', 'barbados': 'BB',
|
||||
'bahamas': 'BS', 'bahamas, the': 'BS', 'haiti': 'HT',
|
||||
'dominican republic': 'DO', 'cuba': 'CU', 'puerto rico': 'PR',
|
||||
'guatemala': 'GT', 'honduras': 'HN', 'el salvador': 'SV',
|
||||
'nicaragua': 'NI', 'costa rica': 'CR', 'panama': 'PA', 'belize': 'BZ',
|
||||
'grenada': 'GD', 'saint lucia': 'LC', 'dominica': 'DM',
|
||||
'saint vincent and the grenadines': 'VC', 'antigua and barbuda': 'AG',
|
||||
'saint kitts and nevis': 'KN', 'bermuda': 'BM', 'cayman islands': 'KY',
|
||||
'aruba': 'AW', 'curaçao': 'CW', 'curacao': 'CW',
|
||||
'united arab emirates': 'AE', 'saudi arabia': 'SA', 'qatar': 'QA',
|
||||
'bahrain': 'BH', 'kuwait': 'KW', 'oman': 'OM', 'iraq': 'IQ',
|
||||
'iran': 'IR', 'afghanistan': 'AF',
|
||||
'kazakhstan': 'KZ', 'uzbekistan': 'UZ', 'kyrgyzstan': 'KG',
|
||||
'tajikistan': 'TJ', 'turkmenistan': 'TM',
|
||||
'liechtenstein': 'LI', 'monaco': 'MC', 'andorra': 'AD', 'san marino': 'SM',
|
||||
'vatican city': 'VA', 'holy see (vatican city)': 'VA',
|
||||
'east timor': 'TL', 'timor-leste': 'TL',
|
||||
};
|
||||
|
||||
// Continental bounding boxes (lat_min, lat_max, lng_min, lng_max)
|
||||
const REGIONS: Record<string, Array<[number, number, number, number]>> = {
|
||||
'north-america': [[7, 72, -170, -50]],
|
||||
'central-america': [[7, 24, -120, -60]],
|
||||
'south-america': [[-56, 13, -82, -34]],
|
||||
'europe': [[35, 72, -12, 45]],
|
||||
'eastern-europe': [[40, 70, 20, 60]],
|
||||
'africa': [[-36, 38, -20, 55]],
|
||||
'middle-east': [[12, 42, 25, 65]],
|
||||
'south-asia': [[5, 38, 60, 98]],
|
||||
'east-asia': [[18, 55, 95, 150]],
|
||||
'southeast-asia': [[-12, 22, 92, 142]],
|
||||
'oceania': [[-48, -8, 110, 180], [-22, 0, 160, 180]],
|
||||
'central-asia': [[35, 55, 45, 90]],
|
||||
};
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
interface MasstimesChurch {
|
||||
id: string;
|
||||
name: string;
|
||||
latitude: string;
|
||||
longitude: string;
|
||||
church_address_street_address: string;
|
||||
church_address_city_name: string;
|
||||
church_address_providence_name: string;
|
||||
church_address_postal_code: string;
|
||||
church_address_country_territory_name: string;
|
||||
church_address_county: string | null;
|
||||
diocese_name: string;
|
||||
phone_number: string;
|
||||
email: string;
|
||||
url: string;
|
||||
pastors_name: string;
|
||||
church_worship_times: MasstimesWorshipTime[];
|
||||
distance: string;
|
||||
wheel_chair_access: boolean;
|
||||
}
|
||||
|
||||
interface MasstimesWorshipTime {
|
||||
day_of_week: string;
|
||||
time_start: string;
|
||||
time_end: string;
|
||||
language: string | null;
|
||||
service_typename: string;
|
||||
comment: string;
|
||||
is_perpetual: boolean;
|
||||
}
|
||||
|
||||
interface ImportStats {
|
||||
gridPoints: number;
|
||||
apiRequests: number;
|
||||
churchesDiscovered: number;
|
||||
churchesMatched: number;
|
||||
churchesCreated: number;
|
||||
churchesSkipped: number;
|
||||
massSchedulesCreated: number;
|
||||
errors: number;
|
||||
}
|
||||
|
||||
interface CLIArgs {
|
||||
all: boolean;
|
||||
region?: string;
|
||||
dryRun: boolean;
|
||||
skipUs: boolean;
|
||||
resumeFrom: number;
|
||||
jobId?: string;
|
||||
}
|
||||
|
||||
// ─── CLI ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseArgs(): CLIArgs {
|
||||
const args = process.argv.slice(2);
|
||||
const result: CLIArgs = { all: false, dryRun: false, skipUs: false, resumeFrom: 0 };
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
switch (args[i]) {
|
||||
case '--all': result.all = true; break;
|
||||
case '--region': result.region = args[++i]; break;
|
||||
case '--dry-run': result.dryRun = true; break;
|
||||
case '--skip-us': result.skipUs = true; break;
|
||||
case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break;
|
||||
case '--job-id': result.jobId = args[++i]; break;
|
||||
case '--help':
|
||||
console.log(`Usage: npx tsx scripts/import-masstimes-api.ts [options]
|
||||
--all Query all regions globally
|
||||
--region <name> Query specific region: ${Object.keys(REGIONS).join(', ')}
|
||||
--skip-us Skip US grid points (already well-covered)
|
||||
--dry-run No database writes
|
||||
--resume-from <n> Skip first N grid points
|
||||
--job-id <uuid> Background job tracking`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.all && !result.region) {
|
||||
console.error('Error: specify --all or --region <name>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ─── Grid Generation ─────────────────────────────────────────────────────────
|
||||
|
||||
function generateGridPoints(regions: string[], skipUs: boolean): Array<{ lat: number; lng: number }> {
|
||||
const points: Array<{ lat: number; lng: number }> = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const regionName of regions) {
|
||||
const boxes = REGIONS[regionName];
|
||||
if (!boxes) {
|
||||
console.error(`Unknown region: ${regionName}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const [latMin, latMax, lngMin, lngMax] of boxes) {
|
||||
for (let lat = latMin; lat <= latMax; lat += LAT_SPACING) {
|
||||
// Adjust longitude spacing based on latitude (degrees get narrower)
|
||||
const kmPerDegreeLng = 111.32 * Math.cos((lat * Math.PI) / 180);
|
||||
const lngSpacing = kmPerDegreeLng > 0
|
||||
? Math.max(LAT_SPACING, TARGET_LNG_SPACING_KM / kmPerDegreeLng)
|
||||
: LAT_SPACING;
|
||||
|
||||
for (let lng = lngMin; lng <= lngMax; lng += lngSpacing) {
|
||||
const roundedLat = Math.round(lat * 10) / 10;
|
||||
const roundedLng = Math.round(lng * 10) / 10;
|
||||
const key = `${roundedLat},${roundedLng}`;
|
||||
|
||||
if (!seen.has(key)) {
|
||||
// Skip US continental bounding box if requested
|
||||
if (skipUs && roundedLat >= 24 && roundedLat <= 50
|
||||
&& roundedLng >= -125 && roundedLng <= -66) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
points.push({ lat: roundedLat, lng: roundedLng });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return points;
|
||||
}
|
||||
|
||||
// ─── API ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function fetchPage(lat: number, lng: number, page: number): Promise<MasstimesChurch[]> {
|
||||
const url = `${API_BASE}?lat=${lat}&long=${lng}&pg=${page}`;
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'Referer': REFERER,
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
if (response.status === 429) {
|
||||
console.error(` Rate limited (429) — backing off 30s`);
|
||||
await sleep(30000);
|
||||
return fetchPage(lat, lng, page); // retry once
|
||||
}
|
||||
throw new Error(`HTTP ${response.status} for ${url}`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<MasstimesChurch[]>;
|
||||
}
|
||||
|
||||
async function fetchAllForPoint(
|
||||
lat: number,
|
||||
lng: number,
|
||||
stats: ImportStats,
|
||||
): Promise<MasstimesChurch[]> {
|
||||
const allChurches: MasstimesChurch[] = [];
|
||||
let page = 1;
|
||||
|
||||
while (true) {
|
||||
stats.apiRequests++;
|
||||
const results = await fetchPage(lat, lng, page);
|
||||
if (results.length === 0) break;
|
||||
|
||||
allChurches.push(...results);
|
||||
|
||||
if (results.length < PAGE_SIZE) break; // last page
|
||||
page++;
|
||||
|
||||
await sleep(RATE_LIMIT_MS);
|
||||
}
|
||||
|
||||
return allChurches;
|
||||
}
|
||||
|
||||
// ─── Data Conversion ─────────────────────────────────────────────────────────
|
||||
|
||||
function resolveCountryCode(countryName: string): string {
|
||||
if (!countryName) return 'XX';
|
||||
const lower = countryName.trim().toLowerCase();
|
||||
return COUNTRY_CODE_MAP[lower] || 'XX';
|
||||
}
|
||||
|
||||
const DAY_MAP: Record<string, number[]> = {
|
||||
'sunday': [0],
|
||||
'monday': [1],
|
||||
'tuesday': [2],
|
||||
'wednesday': [3],
|
||||
'thursday': [4],
|
||||
'friday': [5],
|
||||
'saturday': [6],
|
||||
'weekdays': [1, 2, 3, 4, 5],
|
||||
};
|
||||
|
||||
function parseWorshipTimes(times: MasstimesWorshipTime[]): Array<{
|
||||
dayOfWeek: number;
|
||||
time: string;
|
||||
language: string;
|
||||
notes: string | null;
|
||||
massType: string | null;
|
||||
}> {
|
||||
const schedules: Array<{
|
||||
dayOfWeek: number;
|
||||
time: string;
|
||||
language: string;
|
||||
notes: string | null;
|
||||
massType: string | null;
|
||||
}> = [];
|
||||
|
||||
for (const wt of times) {
|
||||
// Only import mass services (Weekend = Sun/Sat, Week Days = weekday masses)
|
||||
if (wt.service_typename !== 'Weekend' && wt.service_typename !== 'Week Days') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const timeStr = wt.time_start?.trim();
|
||||
if (!timeStr || timeStr === '00:00:00') continue;
|
||||
|
||||
// Parse "HH:MM:SS" → "HH:MM"
|
||||
const timeParts = timeStr.split(':');
|
||||
const time24 = `${timeParts[0].padStart(2, '0')}:${timeParts[1] || '00'}`;
|
||||
|
||||
const language = wt.language?.trim() || 'Unknown';
|
||||
const notes = wt.comment?.trim() || null;
|
||||
|
||||
const dayKey = wt.day_of_week?.trim().toLowerCase();
|
||||
const days = DAY_MAP[dayKey];
|
||||
|
||||
if (days) {
|
||||
for (const day of days) {
|
||||
schedules.push({ dayOfWeek: day, time: time24, language, notes, massType: null });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return schedules;
|
||||
}
|
||||
|
||||
// ─── Database ────────────────────────────────────────────────────────────────
|
||||
|
||||
async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
||||
console.log('Loading existing churches for deduplication...');
|
||||
const churches = await prisma.church.findMany({
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
latitude: true,
|
||||
longitude: true,
|
||||
osmId: true,
|
||||
baiduId: true,
|
||||
masstimesId: true,
|
||||
orarimesseId: true,
|
||||
massSchedulesPhId: true,
|
||||
philmassId: true,
|
||||
horariosMisasId: true,
|
||||
mszeInfoId: true,
|
||||
weekdayMassesId: true,
|
||||
messesInfoId: true,
|
||||
bohosluzbyId: true,
|
||||
miserendId: true,
|
||||
kerknetId: true,
|
||||
gottesdienstzeitenId: true,
|
||||
discovermassId: true,
|
||||
source: true,
|
||||
website: true,
|
||||
phone: true,
|
||||
address: true,
|
||||
country: true,
|
||||
},
|
||||
});
|
||||
console.log(`Loaded ${churches.length} existing churches`);
|
||||
return churches;
|
||||
}
|
||||
|
||||
async function updateJobProgress(jobId: string, stats: ImportStats): Promise<void> {
|
||||
try {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: jobId },
|
||||
data: {
|
||||
processed: stats.gridPoints,
|
||||
succeeded: stats.churchesMatched + stats.churchesCreated,
|
||||
failed: stats.errors,
|
||||
itemsFound: stats.churchesDiscovered,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(`Failed to update job progress:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Main Import ─────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs();
|
||||
|
||||
let regionNames: string[];
|
||||
if (args.all) {
|
||||
regionNames = Object.keys(REGIONS);
|
||||
} else {
|
||||
regionNames = [args.region!];
|
||||
}
|
||||
|
||||
const gridPoints = generateGridPoints(regionNames, args.skipUs);
|
||||
|
||||
console.log(`\n${'='.repeat(70)}`);
|
||||
console.log('MASSTIMES.ORG API GLOBAL IMPORTER');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Regions: ${regionNames.join(', ')}`);
|
||||
console.log(`Grid points: ${gridPoints.length}`);
|
||||
console.log(`Skip US: ${args.skipUs ? 'YES' : 'NO'}`);
|
||||
console.log(`Dry run: ${args.dryRun ? 'YES' : 'NO'}`);
|
||||
console.log(`Rate limit: ${RATE_LIMIT_MS}ms between requests`);
|
||||
console.log(`Resume from: ${args.resumeFrom || 'start'}`);
|
||||
const estHours = Math.round(gridPoints.length * 2 * RATE_LIMIT_MS / 1000 / 3600 * 10) / 10;
|
||||
console.log(`Est. time: ~${estHours} hours (est. 2 pages/point avg)`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
console.log('='.repeat(70));
|
||||
|
||||
const existingChurches = await loadExistingChurches();
|
||||
|
||||
// Build masstimesId lookup for fast dedup
|
||||
const masstimesIdSet = new Set<string>();
|
||||
for (const c of existingChurches) {
|
||||
if (c.masstimesId) masstimesIdSet.add(c.masstimesId);
|
||||
}
|
||||
|
||||
// Track discovered IDs to deduplicate across grid points
|
||||
const discoveredIds = new Set<string>();
|
||||
|
||||
const stats: ImportStats = {
|
||||
gridPoints: 0,
|
||||
apiRequests: 0,
|
||||
churchesDiscovered: 0,
|
||||
churchesMatched: 0,
|
||||
churchesCreated: 0,
|
||||
churchesSkipped: 0,
|
||||
massSchedulesCreated: 0,
|
||||
errors: 0,
|
||||
};
|
||||
|
||||
let jobId = args.jobId;
|
||||
if (jobId) {
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: jobId },
|
||||
data: { status: 'running', startedAt: new Date(), totalItems: gridPoints.length },
|
||||
});
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
for (let i = 0; i < gridPoints.length; i++) {
|
||||
const { lat, lng } = gridPoints[i];
|
||||
stats.gridPoints++;
|
||||
|
||||
if (i < args.resumeFrom) continue;
|
||||
|
||||
try {
|
||||
const churches = await fetchAllForPoint(lat, lng, stats);
|
||||
|
||||
if (churches.length > 0) {
|
||||
let newInPoint = 0;
|
||||
for (const mc of churches) {
|
||||
if (discoveredIds.has(mc.id)) continue;
|
||||
discoveredIds.add(mc.id);
|
||||
stats.churchesDiscovered++;
|
||||
|
||||
// Already in DB by masstimesId
|
||||
if (masstimesIdSet.has(mc.id)) {
|
||||
stats.churchesMatched++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const churchLat = parseFloat(mc.latitude);
|
||||
const churchLng = parseFloat(mc.longitude);
|
||||
if (isNaN(churchLat) || isNaN(churchLng) || (churchLat === 0 && churchLng === 0)) continue;
|
||||
|
||||
const country = resolveCountryCode(mc.church_address_country_territory_name);
|
||||
const address = [
|
||||
mc.church_address_street_address,
|
||||
mc.church_address_city_name,
|
||||
mc.church_address_providence_name,
|
||||
mc.church_address_postal_code,
|
||||
].filter(s => s?.trim()).join(', ').trim() || null;
|
||||
|
||||
// Proximity + name match
|
||||
const candidate = { name: mc.name, lat: churchLat, lng: churchLng };
|
||||
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
||||
|
||||
if (duplicate) {
|
||||
stats.churchesMatched++;
|
||||
if (!args.dryRun) {
|
||||
const updateData: Record<string, unknown> = { masstimesId: mc.id };
|
||||
if (!duplicate.phone && mc.phone_number?.trim()) updateData.phone = mc.phone_number.trim();
|
||||
if (!duplicate.website && mc.url?.trim()) {
|
||||
updateData.website = mc.url.trim();
|
||||
updateData.hasWebsite = true;
|
||||
}
|
||||
if (!duplicate.address && address) updateData.address = address;
|
||||
if (duplicate.country === 'XX' && country !== 'XX') updateData.country = country;
|
||||
|
||||
try {
|
||||
await prisma.church.update({ where: { id: duplicate.id }, data: updateData });
|
||||
masstimesIdSet.add(mc.id);
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
} else throw error;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create new church
|
||||
if (!args.dryRun) {
|
||||
const schedules = parseWorshipTimes(mc.church_worship_times || []);
|
||||
try {
|
||||
const newChurch = await prisma.church.create({
|
||||
data: {
|
||||
name: mc.name,
|
||||
latitude: churchLat,
|
||||
longitude: churchLng,
|
||||
address,
|
||||
city: mc.church_address_city_name?.trim() || null,
|
||||
state: mc.church_address_providence_name?.trim() || null,
|
||||
zip: mc.church_address_postal_code?.trim() || null,
|
||||
country,
|
||||
phone: mc.phone_number?.trim() || null,
|
||||
website: mc.url?.trim() || null,
|
||||
email: mc.email?.trim() || null,
|
||||
hasWebsite: !!mc.url?.trim(),
|
||||
masstimesId: mc.id,
|
||||
source: 'masstimes',
|
||||
diocese: mc.diocese_name?.trim() || null,
|
||||
pastorName: mc.pastors_name?.trim() || null,
|
||||
wheelchairAccess: mc.wheel_chair_access || false,
|
||||
massSchedules: schedules.length > 0 ? {
|
||||
create: schedules.map(s => ({
|
||||
dayOfWeek: s.dayOfWeek,
|
||||
time: s.time,
|
||||
language: s.language,
|
||||
notes: s.notes,
|
||||
massType: s.massType,
|
||||
isActive: true,
|
||||
})),
|
||||
} : undefined,
|
||||
},
|
||||
});
|
||||
|
||||
stats.churchesCreated++;
|
||||
stats.massSchedulesCreated += schedules.length;
|
||||
newInPoint++;
|
||||
masstimesIdSet.add(mc.id);
|
||||
|
||||
existingChurches.push({
|
||||
id: newChurch.id, name: mc.name,
|
||||
latitude: churchLat, longitude: churchLng,
|
||||
osmId: null, baiduId: null, masstimesId: mc.id,
|
||||
orarimesseId: null, massSchedulesPhId: null,
|
||||
philmassId: null, horariosMisasId: null,
|
||||
mszeInfoId: null, weekdayMassesId: null,
|
||||
messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null, discovermassId: null,
|
||||
source: 'masstimes', website: mc.url?.trim() || null,
|
||||
phone: mc.phone_number?.trim() || null, address, country,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
||||
stats.churchesSkipped++;
|
||||
} else {
|
||||
stats.errors++;
|
||||
console.error(` Error creating ${mc.name}: ${error instanceof Error ? error.message : error}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
stats.churchesCreated++;
|
||||
stats.massSchedulesCreated += parseWorshipTimes(mc.church_worship_times || []).length;
|
||||
newInPoint++;
|
||||
}
|
||||
}
|
||||
|
||||
if (newInPoint > 0) {
|
||||
console.log(` Grid ${i + 1}/${gridPoints.length} (${lat},${lng}): ${churches.length} found, ${newInPoint} new`);
|
||||
}
|
||||
}
|
||||
|
||||
await sleep(RATE_LIMIT_MS);
|
||||
} catch (error) {
|
||||
stats.errors++;
|
||||
console.error(` Error at grid ${i + 1} (${lat},${lng}): ${error instanceof Error ? error.message : error}`);
|
||||
await sleep(RATE_LIMIT_MS * 2);
|
||||
}
|
||||
|
||||
// Progress every 50 points
|
||||
if ((i + 1) % 50 === 0 || i === gridPoints.length - 1) {
|
||||
const elapsed = (Date.now() - startTime) / 1000;
|
||||
const rate = elapsed > 0 ? Math.round(stats.apiRequests / elapsed * 3600) : 0;
|
||||
console.log(` Progress: ${i + 1}/${gridPoints.length} grid points, ${stats.churchesDiscovered} discovered, ${stats.churchesCreated} new, ${stats.apiRequests} API calls [${Math.round(elapsed)}s, ~${rate}/hr]`);
|
||||
}
|
||||
|
||||
if (jobId && (i + 1) % 20 === 0) {
|
||||
await updateJobProgress(jobId, stats);
|
||||
}
|
||||
}
|
||||
|
||||
if (jobId) {
|
||||
await updateJobProgress(jobId, stats);
|
||||
await prisma.backgroundJob.update({
|
||||
where: { id: jobId },
|
||||
data: { status: 'completed', completedAt: new Date() },
|
||||
});
|
||||
}
|
||||
|
||||
const elapsed = (Date.now() - startTime) / 1000;
|
||||
console.log(`\n${'='.repeat(70)}`);
|
||||
console.log('MASSTIMES API IMPORT SUMMARY');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Grid points queried: ${stats.gridPoints}`);
|
||||
console.log(`API requests: ${stats.apiRequests}`);
|
||||
console.log(`Churches discovered: ${stats.churchesDiscovered}`);
|
||||
console.log(`Churches matched: ${stats.churchesMatched} (already in DB)`);
|
||||
console.log(`Churches created: ${stats.churchesCreated}`);
|
||||
console.log(`Churches skipped: ${stats.churchesSkipped} (duplicates)`);
|
||||
console.log(`Mass schedules created: ${stats.massSchedulesCreated}`);
|
||||
console.log(`Errors: ${stats.errors}`);
|
||||
console.log(`Elapsed: ${Math.round(elapsed)}s (${(elapsed / 3600).toFixed(1)}h)`);
|
||||
console.log('='.repeat(70));
|
||||
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(`Fatal error: ${error.message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user