2026-03-11 06:52:05 -04:00
|
|
|
#!/usr/bin/env tsx
|
|
|
|
|
/**
|
|
|
|
|
* Import Catholic churches and mass schedules globally from masstimes.org API
|
|
|
|
|
*
|
|
|
|
|
* masstimes.org has ~121,000 churches worldwide. This script queries their
|
|
|
|
|
* geo-search API with a grid of coordinates covering world landmass, then
|
|
|
|
|
* deduplicates and imports the results.
|
|
|
|
|
*
|
|
|
|
|
* API: GET https://masstimes.org/Churchs/?lat={lat}&long={lng}&pg={page}
|
|
|
|
|
* - Requires Referer header
|
|
|
|
|
* - Returns 30 results per page within 100-mile (~160km) radius
|
|
|
|
|
* - Paginate until empty array
|
|
|
|
|
*
|
|
|
|
|
* Grid strategy:
|
|
|
|
|
* - 2.5° latitude spacing (~278km), longitude adjusted for latitude
|
|
|
|
|
* - Continental bounding boxes to skip oceans
|
|
|
|
|
* - 100-mile radius means ~322km diameter — 2.5° spacing ensures overlap
|
|
|
|
|
*
|
|
|
|
|
* Usage:
|
|
|
|
|
* npx tsx scripts/import-masstimes-api.ts --all
|
|
|
|
|
* npx tsx scripts/import-masstimes-api.ts --all --dry-run
|
|
|
|
|
* npx tsx scripts/import-masstimes-api.ts --region europe
|
|
|
|
|
* npx tsx scripts/import-masstimes-api.ts --all --skip-us
|
|
|
|
|
* npx tsx scripts/import-masstimes-api.ts --all --job-id {uuid}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
import dotenv from 'dotenv';
|
|
|
|
|
import path from 'path';
|
|
|
|
|
|
|
|
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env.local') });
|
|
|
|
|
dotenv.config({ path: path.resolve(process.cwd(), '.env') });
|
|
|
|
|
|
|
|
|
|
import { Pool } from 'pg';
|
|
|
|
|
import { PrismaPg } from '@prisma/adapter-pg';
|
|
|
|
|
import { PrismaClient } from '@prisma/client';
|
|
|
|
|
|
|
|
|
|
const dbUrl = process.env.DATABASE_URL || 'postgresql://postgres:postgres@localhost:5432/nearestmass';
|
|
|
|
|
console.log(`Connecting to database: ${dbUrl.replace(/:[^:@]+@/, ':***@')}`);
|
|
|
|
|
const pool = new Pool({
|
|
|
|
|
connectionString: dbUrl,
|
|
|
|
|
ssl: dbUrl.includes('neon') ? { rejectUnauthorized: false } : undefined,
|
|
|
|
|
});
|
|
|
|
|
const adapter = new PrismaPg(pool);
|
|
|
|
|
const prisma = new PrismaClient({ adapter });
|
|
|
|
|
|
|
|
|
|
import { findDuplicateChurch } from '../src/lib/church-matcher';
|
|
|
|
|
import type { ExistingChurch } from '../src/lib/church-matcher';
|
|
|
|
|
|
|
|
|
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
const API_BASE = 'https://masstimes.org/Churchs/';
|
|
|
|
|
const REFERER = 'https://masstimes.org/map';
|
|
|
|
|
const USER_AGENT = 'NearestMass-Importer/1.0 (parish data aggregator; contact: privacy@nearestmass.com)';
|
|
|
|
|
const RATE_LIMIT_MS = 2000; // 2 seconds between requests — respectful rate
|
|
|
|
|
const PAGE_SIZE = 30;
|
|
|
|
|
const LAT_SPACING = 2.5; // degrees (~278km)
|
|
|
|
|
const TARGET_LNG_SPACING_KM = 250; // target spacing in km
|
|
|
|
|
|
|
|
|
|
// Country name → ISO code mapping for masstimes country names
|
|
|
|
|
const COUNTRY_CODE_MAP: Record<string, string> = {
|
|
|
|
|
'united states': 'US', 'canada': 'CA', 'mexico': 'MX',
|
|
|
|
|
'united kingdom': 'GB', 'ireland': 'IE', 'france': 'FR', 'germany': 'DE',
|
|
|
|
|
'spain': 'ES', 'italy': 'IT', 'portugal': 'PT', 'netherlands': 'NL',
|
|
|
|
|
'belgium': 'BE', 'luxembourg': 'LU', 'switzerland': 'CH', 'austria': 'AT',
|
|
|
|
|
'poland': 'PL', 'czech republic': 'CZ', 'czechia': 'CZ', 'slovakia': 'SK',
|
|
|
|
|
'hungary': 'HU', 'croatia': 'HR', 'slovenia': 'SI', 'romania': 'RO',
|
|
|
|
|
'bulgaria': 'BG', 'serbia': 'RS', 'bosnia and herzegovina': 'BA',
|
|
|
|
|
'montenegro': 'ME', 'north macedonia': 'MK', 'albania': 'AL', 'kosovo': 'XK',
|
|
|
|
|
'greece': 'GR', 'cyprus': 'CY', 'malta': 'MT', 'denmark': 'DK',
|
|
|
|
|
'sweden': 'SE', 'norway': 'NO', 'finland': 'FI', 'iceland': 'IS',
|
|
|
|
|
'estonia': 'EE', 'latvia': 'LV', 'lithuania': 'LT',
|
|
|
|
|
'ukraine': 'UA', 'russia': 'RU', 'belarus': 'BY', 'moldova': 'MD',
|
|
|
|
|
'georgia': 'GE', 'armenia': 'AM', 'azerbaijan': 'AZ',
|
|
|
|
|
'turkey': 'TR', 'israel': 'IL', 'jordan': 'JO', 'lebanon': 'LB',
|
|
|
|
|
'egypt': 'EG', 'morocco': 'MA', 'tunisia': 'TN', 'algeria': 'DZ',
|
|
|
|
|
'india': 'IN', 'sri lanka': 'LK', 'pakistan': 'PK', 'bangladesh': 'BD',
|
|
|
|
|
'nepal': 'NP', 'myanmar': 'MM', 'thailand': 'TH', 'vietnam': 'VN',
|
|
|
|
|
'cambodia': 'KH', 'laos': 'LA', 'malaysia': 'MY', 'singapore': 'SG',
|
|
|
|
|
'indonesia': 'ID', 'philippines': 'PH', 'china': 'CN', 'japan': 'JP',
|
|
|
|
|
'south korea': 'KR', 'korea, south': 'KR', 'taiwan': 'TW',
|
|
|
|
|
'hong kong': 'HK', 'macau': 'MO', 'mongolia': 'MN',
|
|
|
|
|
'australia': 'AU', 'new zealand': 'NZ', 'fiji': 'FJ',
|
|
|
|
|
'papua new guinea': 'PG', 'samoa': 'WS', 'tonga': 'TO', 'guam': 'GU',
|
|
|
|
|
'nigeria': 'NG', 'ghana': 'GH', 'kenya': 'KE', 'tanzania': 'TZ',
|
|
|
|
|
'uganda': 'UG', 'south africa': 'ZA', 'cameroon': 'CM', 'senegal': 'SN',
|
|
|
|
|
'ethiopia': 'ET', 'madagascar': 'MG', 'mozambique': 'MZ',
|
|
|
|
|
'zambia': 'ZM', 'zimbabwe': 'ZW', 'malawi': 'MW', 'rwanda': 'RW',
|
|
|
|
|
'burundi': 'BI', 'congo, democratic republic of the': 'CD',
|
|
|
|
|
'congo, republic of the': 'CG', "côte d'ivoire": 'CI', 'ivory coast': 'CI',
|
|
|
|
|
'burkina faso': 'BF', 'mali': 'ML', 'niger': 'NE', 'chad': 'TD',
|
|
|
|
|
'central african republic': 'CF', 'gabon': 'GA', 'equatorial guinea': 'GQ',
|
|
|
|
|
'angola': 'AO', 'namibia': 'NA', 'botswana': 'BW', 'lesotho': 'LS',
|
|
|
|
|
'eswatini': 'SZ', 'swaziland': 'SZ', 'mauritius': 'MU',
|
|
|
|
|
'brazil': 'BR', 'argentina': 'AR', 'colombia': 'CO', 'peru': 'PE',
|
|
|
|
|
'chile': 'CL', 'venezuela': 'VE', 'ecuador': 'EC', 'bolivia': 'BO',
|
|
|
|
|
'paraguay': 'PY', 'uruguay': 'UY', 'guyana': 'GY', 'suriname': 'SR',
|
|
|
|
|
'trinidad and tobago': 'TT', 'jamaica': 'JM', 'barbados': 'BB',
|
|
|
|
|
'bahamas': 'BS', 'bahamas, the': 'BS', 'haiti': 'HT',
|
|
|
|
|
'dominican republic': 'DO', 'cuba': 'CU', 'puerto rico': 'PR',
|
|
|
|
|
'guatemala': 'GT', 'honduras': 'HN', 'el salvador': 'SV',
|
|
|
|
|
'nicaragua': 'NI', 'costa rica': 'CR', 'panama': 'PA', 'belize': 'BZ',
|
|
|
|
|
'grenada': 'GD', 'saint lucia': 'LC', 'dominica': 'DM',
|
|
|
|
|
'saint vincent and the grenadines': 'VC', 'antigua and barbuda': 'AG',
|
|
|
|
|
'saint kitts and nevis': 'KN', 'bermuda': 'BM', 'cayman islands': 'KY',
|
|
|
|
|
'aruba': 'AW', 'curaçao': 'CW', 'curacao': 'CW',
|
|
|
|
|
'united arab emirates': 'AE', 'saudi arabia': 'SA', 'qatar': 'QA',
|
|
|
|
|
'bahrain': 'BH', 'kuwait': 'KW', 'oman': 'OM', 'iraq': 'IQ',
|
|
|
|
|
'iran': 'IR', 'afghanistan': 'AF',
|
|
|
|
|
'kazakhstan': 'KZ', 'uzbekistan': 'UZ', 'kyrgyzstan': 'KG',
|
|
|
|
|
'tajikistan': 'TJ', 'turkmenistan': 'TM',
|
|
|
|
|
'liechtenstein': 'LI', 'monaco': 'MC', 'andorra': 'AD', 'san marino': 'SM',
|
|
|
|
|
'vatican city': 'VA', 'holy see (vatican city)': 'VA',
|
|
|
|
|
'east timor': 'TL', 'timor-leste': 'TL',
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Continental bounding boxes (lat_min, lat_max, lng_min, lng_max)
|
|
|
|
|
const REGIONS: Record<string, Array<[number, number, number, number]>> = {
|
|
|
|
|
'north-america': [[7, 72, -170, -50]],
|
|
|
|
|
'central-america': [[7, 24, -120, -60]],
|
|
|
|
|
'south-america': [[-56, 13, -82, -34]],
|
|
|
|
|
'europe': [[35, 72, -12, 45]],
|
|
|
|
|
'eastern-europe': [[40, 70, 20, 60]],
|
|
|
|
|
'africa': [[-36, 38, -20, 55]],
|
|
|
|
|
'middle-east': [[12, 42, 25, 65]],
|
|
|
|
|
'south-asia': [[5, 38, 60, 98]],
|
|
|
|
|
'east-asia': [[18, 55, 95, 150]],
|
|
|
|
|
'southeast-asia': [[-12, 22, 92, 142]],
|
|
|
|
|
'oceania': [[-48, -8, 110, 180], [-22, 0, 160, 180]],
|
|
|
|
|
'central-asia': [[35, 55, 45, 90]],
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
interface MasstimesChurch {
|
|
|
|
|
id: string;
|
|
|
|
|
name: string;
|
|
|
|
|
latitude: string;
|
|
|
|
|
longitude: string;
|
|
|
|
|
church_address_street_address: string;
|
|
|
|
|
church_address_city_name: string;
|
|
|
|
|
church_address_providence_name: string;
|
|
|
|
|
church_address_postal_code: string;
|
|
|
|
|
church_address_country_territory_name: string;
|
|
|
|
|
church_address_county: string | null;
|
|
|
|
|
diocese_name: string;
|
|
|
|
|
phone_number: string;
|
|
|
|
|
email: string;
|
|
|
|
|
url: string;
|
|
|
|
|
pastors_name: string;
|
|
|
|
|
church_worship_times: MasstimesWorshipTime[];
|
|
|
|
|
distance: string;
|
|
|
|
|
wheel_chair_access: boolean;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface MasstimesWorshipTime {
|
|
|
|
|
day_of_week: string;
|
|
|
|
|
time_start: string;
|
|
|
|
|
time_end: string;
|
|
|
|
|
language: string | null;
|
|
|
|
|
service_typename: string;
|
|
|
|
|
comment: string;
|
|
|
|
|
is_perpetual: boolean;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ImportStats {
|
|
|
|
|
gridPoints: number;
|
|
|
|
|
apiRequests: number;
|
|
|
|
|
churchesDiscovered: number;
|
|
|
|
|
churchesMatched: number;
|
|
|
|
|
churchesCreated: number;
|
|
|
|
|
churchesSkipped: number;
|
|
|
|
|
massSchedulesCreated: number;
|
|
|
|
|
errors: number;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface CLIArgs {
|
|
|
|
|
all: boolean;
|
|
|
|
|
region?: string;
|
|
|
|
|
dryRun: boolean;
|
|
|
|
|
skipUs: boolean;
|
|
|
|
|
resumeFrom: number;
|
|
|
|
|
jobId?: string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ─── CLI ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
function parseArgs(): CLIArgs {
|
|
|
|
|
const args = process.argv.slice(2);
|
|
|
|
|
const result: CLIArgs = { all: false, dryRun: false, skipUs: false, resumeFrom: 0 };
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
|
|
|
switch (args[i]) {
|
|
|
|
|
case '--all': result.all = true; break;
|
|
|
|
|
case '--region': result.region = args[++i]; break;
|
|
|
|
|
case '--dry-run': result.dryRun = true; break;
|
|
|
|
|
case '--skip-us': result.skipUs = true; break;
|
|
|
|
|
case '--resume-from': result.resumeFrom = parseInt(args[++i], 10); break;
|
|
|
|
|
case '--job-id': result.jobId = args[++i]; break;
|
|
|
|
|
case '--help':
|
|
|
|
|
console.log(`Usage: npx tsx scripts/import-masstimes-api.ts [options]
|
|
|
|
|
--all Query all regions globally
|
|
|
|
|
--region <name> Query specific region: ${Object.keys(REGIONS).join(', ')}
|
|
|
|
|
--skip-us Skip US grid points (already well-covered)
|
|
|
|
|
--dry-run No database writes
|
|
|
|
|
--resume-from <n> Skip first N grid points
|
|
|
|
|
--job-id <uuid> Background job tracking`);
|
|
|
|
|
process.exit(0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!result.all && !result.region) {
|
|
|
|
|
console.error('Error: specify --all or --region <name>');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ─── Grid Generation ─────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
function generateGridPoints(regions: string[], skipUs: boolean): Array<{ lat: number; lng: number }> {
|
|
|
|
|
const points: Array<{ lat: number; lng: number }> = [];
|
|
|
|
|
const seen = new Set<string>();
|
|
|
|
|
|
|
|
|
|
for (const regionName of regions) {
|
|
|
|
|
const boxes = REGIONS[regionName];
|
|
|
|
|
if (!boxes) {
|
|
|
|
|
console.error(`Unknown region: ${regionName}`);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const [latMin, latMax, lngMin, lngMax] of boxes) {
|
|
|
|
|
for (let lat = latMin; lat <= latMax; lat += LAT_SPACING) {
|
|
|
|
|
// Adjust longitude spacing based on latitude (degrees get narrower)
|
|
|
|
|
const kmPerDegreeLng = 111.32 * Math.cos((lat * Math.PI) / 180);
|
|
|
|
|
const lngSpacing = kmPerDegreeLng > 0
|
|
|
|
|
? Math.max(LAT_SPACING, TARGET_LNG_SPACING_KM / kmPerDegreeLng)
|
|
|
|
|
: LAT_SPACING;
|
|
|
|
|
|
|
|
|
|
for (let lng = lngMin; lng <= lngMax; lng += lngSpacing) {
|
|
|
|
|
const roundedLat = Math.round(lat * 10) / 10;
|
|
|
|
|
const roundedLng = Math.round(lng * 10) / 10;
|
|
|
|
|
const key = `${roundedLat},${roundedLng}`;
|
|
|
|
|
|
|
|
|
|
if (!seen.has(key)) {
|
|
|
|
|
// Skip US continental bounding box if requested
|
|
|
|
|
if (skipUs && roundedLat >= 24 && roundedLat <= 50
|
|
|
|
|
&& roundedLng >= -125 && roundedLng <= -66) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
seen.add(key);
|
|
|
|
|
points.push({ lat: roundedLat, lng: roundedLng });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return points;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ─── API ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
|
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchPage(lat: number, lng: number, page: number): Promise<MasstimesChurch[]> {
|
|
|
|
|
const url = `${API_BASE}?lat=${lat}&long=${lng}&pg=${page}`;
|
|
|
|
|
const response = await fetch(url, {
|
|
|
|
|
headers: {
|
|
|
|
|
'Referer': REFERER,
|
|
|
|
|
'User-Agent': USER_AGENT,
|
|
|
|
|
'Accept': 'application/json',
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!response.ok) {
|
|
|
|
|
if (response.status === 429) {
|
|
|
|
|
console.error(` Rate limited (429) — backing off 30s`);
|
|
|
|
|
await sleep(30000);
|
|
|
|
|
return fetchPage(lat, lng, page); // retry once
|
|
|
|
|
}
|
|
|
|
|
throw new Error(`HTTP ${response.status} for ${url}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return response.json() as Promise<MasstimesChurch[]>;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function fetchAllForPoint(
|
|
|
|
|
lat: number,
|
|
|
|
|
lng: number,
|
|
|
|
|
stats: ImportStats,
|
|
|
|
|
): Promise<MasstimesChurch[]> {
|
|
|
|
|
const allChurches: MasstimesChurch[] = [];
|
|
|
|
|
let page = 1;
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
stats.apiRequests++;
|
|
|
|
|
const results = await fetchPage(lat, lng, page);
|
|
|
|
|
if (results.length === 0) break;
|
|
|
|
|
|
|
|
|
|
allChurches.push(...results);
|
|
|
|
|
|
|
|
|
|
if (results.length < PAGE_SIZE) break; // last page
|
|
|
|
|
page++;
|
|
|
|
|
|
|
|
|
|
await sleep(RATE_LIMIT_MS);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return allChurches;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ─── Data Conversion ─────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
function resolveCountryCode(countryName: string): string {
|
|
|
|
|
if (!countryName) return 'XX';
|
|
|
|
|
const lower = countryName.trim().toLowerCase();
|
|
|
|
|
return COUNTRY_CODE_MAP[lower] || 'XX';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const DAY_MAP: Record<string, number[]> = {
|
|
|
|
|
'sunday': [0],
|
|
|
|
|
'monday': [1],
|
|
|
|
|
'tuesday': [2],
|
|
|
|
|
'wednesday': [3],
|
|
|
|
|
'thursday': [4],
|
|
|
|
|
'friday': [5],
|
|
|
|
|
'saturday': [6],
|
|
|
|
|
'weekdays': [1, 2, 3, 4, 5],
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function parseWorshipTimes(times: MasstimesWorshipTime[]): Array<{
|
|
|
|
|
dayOfWeek: number;
|
|
|
|
|
time: string;
|
|
|
|
|
language: string;
|
|
|
|
|
notes: string | null;
|
|
|
|
|
massType: string | null;
|
|
|
|
|
}> {
|
|
|
|
|
const schedules: Array<{
|
|
|
|
|
dayOfWeek: number;
|
|
|
|
|
time: string;
|
|
|
|
|
language: string;
|
|
|
|
|
notes: string | null;
|
|
|
|
|
massType: string | null;
|
|
|
|
|
}> = [];
|
|
|
|
|
|
|
|
|
|
for (const wt of times) {
|
|
|
|
|
// Only import mass services (Weekend = Sun/Sat, Week Days = weekday masses)
|
|
|
|
|
if (wt.service_typename !== 'Weekend' && wt.service_typename !== 'Week Days') {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const timeStr = wt.time_start?.trim();
|
|
|
|
|
if (!timeStr || timeStr === '00:00:00') continue;
|
|
|
|
|
|
|
|
|
|
// Parse "HH:MM:SS" → "HH:MM"
|
|
|
|
|
const timeParts = timeStr.split(':');
|
|
|
|
|
const time24 = `${timeParts[0].padStart(2, '0')}:${timeParts[1] || '00'}`;
|
|
|
|
|
|
|
|
|
|
const language = wt.language?.trim() || 'Unknown';
|
|
|
|
|
const notes = wt.comment?.trim() || null;
|
|
|
|
|
|
|
|
|
|
const dayKey = wt.day_of_week?.trim().toLowerCase();
|
|
|
|
|
const days = DAY_MAP[dayKey];
|
|
|
|
|
|
|
|
|
|
if (days) {
|
|
|
|
|
for (const day of days) {
|
|
|
|
|
schedules.push({ dayOfWeek: day, time: time24, language, notes, massType: null });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return schedules;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ─── Database ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
async function loadExistingChurches(): Promise<ExistingChurch[]> {
|
|
|
|
|
console.log('Loading existing churches for deduplication...');
|
|
|
|
|
const churches = await prisma.church.findMany({
|
|
|
|
|
select: {
|
|
|
|
|
id: true,
|
|
|
|
|
name: true,
|
|
|
|
|
latitude: true,
|
|
|
|
|
longitude: true,
|
|
|
|
|
osmId: true,
|
|
|
|
|
baiduId: true,
|
|
|
|
|
masstimesId: true,
|
|
|
|
|
orarimesseId: true,
|
|
|
|
|
massSchedulesPhId: true,
|
|
|
|
|
philmassId: true,
|
|
|
|
|
horariosMisasId: true,
|
|
|
|
|
mszeInfoId: true,
|
|
|
|
|
weekdayMassesId: true,
|
|
|
|
|
messesInfoId: true,
|
|
|
|
|
bohosluzbyId: true,
|
|
|
|
|
miserendId: true,
|
|
|
|
|
kerknetId: true,
|
|
|
|
|
gottesdienstzeitenId: true,
|
|
|
|
|
source: true,
|
|
|
|
|
website: true,
|
|
|
|
|
phone: true,
|
|
|
|
|
address: true,
|
|
|
|
|
country: true,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
console.log(`Loaded ${churches.length} existing churches`);
|
|
|
|
|
return churches;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function updateJobProgress(jobId: string, stats: ImportStats): Promise<void> {
|
|
|
|
|
try {
|
|
|
|
|
await prisma.backgroundJob.update({
|
|
|
|
|
where: { id: jobId },
|
|
|
|
|
data: {
|
|
|
|
|
processed: stats.gridPoints,
|
|
|
|
|
succeeded: stats.churchesMatched + stats.churchesCreated,
|
|
|
|
|
failed: stats.errors,
|
|
|
|
|
itemsFound: stats.churchesDiscovered,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
} catch (err) {
|
|
|
|
|
console.error(`Failed to update job progress:`, err);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ─── Main Import ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
async function main() {
|
|
|
|
|
const args = parseArgs();
|
|
|
|
|
|
|
|
|
|
let regionNames: string[];
|
|
|
|
|
if (args.all) {
|
|
|
|
|
regionNames = Object.keys(REGIONS);
|
|
|
|
|
} else {
|
|
|
|
|
regionNames = [args.region!];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const gridPoints = generateGridPoints(regionNames, args.skipUs);
|
|
|
|
|
|
|
|
|
|
console.log(`\n${'='.repeat(70)}`);
|
|
|
|
|
console.log('MASSTIMES.ORG API GLOBAL IMPORTER');
|
|
|
|
|
console.log('='.repeat(70));
|
|
|
|
|
console.log(`Regions: ${regionNames.join(', ')}`);
|
|
|
|
|
console.log(`Grid points: ${gridPoints.length}`);
|
|
|
|
|
console.log(`Skip US: ${args.skipUs ? 'YES' : 'NO'}`);
|
|
|
|
|
console.log(`Dry run: ${args.dryRun ? 'YES' : 'NO'}`);
|
|
|
|
|
console.log(`Rate limit: ${RATE_LIMIT_MS}ms between requests`);
|
|
|
|
|
console.log(`Resume from: ${args.resumeFrom || 'start'}`);
|
|
|
|
|
const estHours = Math.round(gridPoints.length * 2 * RATE_LIMIT_MS / 1000 / 3600 * 10) / 10;
|
|
|
|
|
console.log(`Est. time: ~${estHours} hours (est. 2 pages/point avg)`);
|
|
|
|
|
console.log(`Time: ${new Date().toISOString()}`);
|
|
|
|
|
console.log('='.repeat(70));
|
|
|
|
|
|
|
|
|
|
const existingChurches = await loadExistingChurches();
|
|
|
|
|
|
|
|
|
|
// Build masstimesId lookup for fast dedup
|
|
|
|
|
const masstimesIdSet = new Set<string>();
|
|
|
|
|
for (const c of existingChurches) {
|
|
|
|
|
if (c.masstimesId) masstimesIdSet.add(c.masstimesId);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Track discovered IDs to deduplicate across grid points
|
|
|
|
|
const discoveredIds = new Set<string>();
|
|
|
|
|
|
|
|
|
|
const stats: ImportStats = {
|
|
|
|
|
gridPoints: 0,
|
|
|
|
|
apiRequests: 0,
|
|
|
|
|
churchesDiscovered: 0,
|
|
|
|
|
churchesMatched: 0,
|
|
|
|
|
churchesCreated: 0,
|
|
|
|
|
churchesSkipped: 0,
|
|
|
|
|
massSchedulesCreated: 0,
|
|
|
|
|
errors: 0,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let jobId = args.jobId;
|
|
|
|
|
if (jobId) {
|
|
|
|
|
await prisma.backgroundJob.update({
|
|
|
|
|
where: { id: jobId },
|
|
|
|
|
data: { status: 'running', startedAt: new Date(), totalItems: gridPoints.length },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const startTime = Date.now();
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < gridPoints.length; i++) {
|
|
|
|
|
const { lat, lng } = gridPoints[i];
|
|
|
|
|
stats.gridPoints++;
|
|
|
|
|
|
|
|
|
|
if (i < args.resumeFrom) continue;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const churches = await fetchAllForPoint(lat, lng, stats);
|
|
|
|
|
|
|
|
|
|
if (churches.length > 0) {
|
|
|
|
|
let newInPoint = 0;
|
|
|
|
|
for (const mc of churches) {
|
|
|
|
|
if (discoveredIds.has(mc.id)) continue;
|
|
|
|
|
discoveredIds.add(mc.id);
|
|
|
|
|
stats.churchesDiscovered++;
|
|
|
|
|
|
|
|
|
|
// Already in DB by masstimesId
|
|
|
|
|
if (masstimesIdSet.has(mc.id)) {
|
|
|
|
|
stats.churchesMatched++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const churchLat = parseFloat(mc.latitude);
|
|
|
|
|
const churchLng = parseFloat(mc.longitude);
|
|
|
|
|
if (isNaN(churchLat) || isNaN(churchLng) || (churchLat === 0 && churchLng === 0)) continue;
|
|
|
|
|
|
|
|
|
|
const country = resolveCountryCode(mc.church_address_country_territory_name);
|
|
|
|
|
const address = [
|
|
|
|
|
mc.church_address_street_address,
|
|
|
|
|
mc.church_address_city_name,
|
|
|
|
|
mc.church_address_providence_name,
|
|
|
|
|
mc.church_address_postal_code,
|
|
|
|
|
].filter(s => s?.trim()).join(', ').trim() || null;
|
|
|
|
|
|
|
|
|
|
// Proximity + name match
|
|
|
|
|
const candidate = { name: mc.name, lat: churchLat, lng: churchLng };
|
|
|
|
|
const duplicate = findDuplicateChurch(candidate, existingChurches);
|
|
|
|
|
|
|
|
|
|
if (duplicate) {
|
|
|
|
|
stats.churchesMatched++;
|
|
|
|
|
if (!args.dryRun) {
|
|
|
|
|
const updateData: Record<string, unknown> = { masstimesId: mc.id };
|
|
|
|
|
if (!duplicate.phone && mc.phone_number?.trim()) updateData.phone = mc.phone_number.trim();
|
|
|
|
|
if (!duplicate.website && mc.url?.trim()) {
|
|
|
|
|
updateData.website = mc.url.trim();
|
|
|
|
|
updateData.hasWebsite = true;
|
|
|
|
|
}
|
|
|
|
|
if (!duplicate.address && address) updateData.address = address;
|
|
|
|
|
if (duplicate.country === 'XX' && country !== 'XX') updateData.country = country;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
await prisma.church.update({ where: { id: duplicate.id }, data: updateData });
|
|
|
|
|
masstimesIdSet.add(mc.id);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
|
|
|
|
stats.churchesSkipped++;
|
|
|
|
|
} else throw error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create new church
|
|
|
|
|
if (!args.dryRun) {
|
|
|
|
|
const schedules = parseWorshipTimes(mc.church_worship_times || []);
|
|
|
|
|
try {
|
|
|
|
|
const newChurch = await prisma.church.create({
|
|
|
|
|
data: {
|
|
|
|
|
name: mc.name,
|
|
|
|
|
latitude: churchLat,
|
|
|
|
|
longitude: churchLng,
|
|
|
|
|
address,
|
|
|
|
|
city: mc.church_address_city_name?.trim() || null,
|
|
|
|
|
state: mc.church_address_providence_name?.trim() || null,
|
|
|
|
|
zip: mc.church_address_postal_code?.trim() || null,
|
|
|
|
|
country,
|
|
|
|
|
phone: mc.phone_number?.trim() || null,
|
|
|
|
|
website: mc.url?.trim() || null,
|
|
|
|
|
email: mc.email?.trim() || null,
|
|
|
|
|
hasWebsite: !!mc.url?.trim(),
|
|
|
|
|
masstimesId: mc.id,
|
|
|
|
|
source: 'masstimes',
|
|
|
|
|
diocese: mc.diocese_name?.trim() || null,
|
|
|
|
|
pastorName: mc.pastors_name?.trim() || null,
|
|
|
|
|
wheelchairAccess: mc.wheel_chair_access || false,
|
|
|
|
|
massSchedules: schedules.length > 0 ? {
|
|
|
|
|
create: schedules.map(s => ({
|
|
|
|
|
dayOfWeek: s.dayOfWeek,
|
|
|
|
|
time: s.time,
|
|
|
|
|
language: s.language,
|
|
|
|
|
notes: s.notes,
|
|
|
|
|
massType: s.massType,
|
|
|
|
|
isActive: true,
|
|
|
|
|
})),
|
|
|
|
|
} : undefined,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
stats.churchesCreated++;
|
|
|
|
|
stats.massSchedulesCreated += schedules.length;
|
|
|
|
|
newInPoint++;
|
|
|
|
|
masstimesIdSet.add(mc.id);
|
|
|
|
|
|
|
|
|
|
existingChurches.push({
|
|
|
|
|
id: newChurch.id, name: mc.name,
|
|
|
|
|
latitude: churchLat, longitude: churchLng,
|
|
|
|
|
osmId: null, baiduId: null, masstimesId: mc.id,
|
|
|
|
|
orarimesseId: null, massSchedulesPhId: null,
|
|
|
|
|
philmassId: null, horariosMisasId: null,
|
|
|
|
|
mszeInfoId: null, weekdayMassesId: null,
|
2026-04-01 22:20:45 -04:00
|
|
|
messesInfoId: null, bohosluzbyId: null, miserendId: null, kerknetId: null, gottesdienstzeitenId: null,
|
2026-03-11 06:52:05 -04:00
|
|
|
source: 'masstimes', website: mc.url?.trim() || null,
|
|
|
|
|
phone: mc.phone_number?.trim() || null, address, country,
|
|
|
|
|
});
|
|
|
|
|
} catch (error) {
|
|
|
|
|
if (error instanceof Error && error.message.includes('Unique constraint')) {
|
|
|
|
|
stats.churchesSkipped++;
|
|
|
|
|
} else {
|
|
|
|
|
stats.errors++;
|
|
|
|
|
console.error(` Error creating ${mc.name}: ${error instanceof Error ? error.message : error}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
stats.churchesCreated++;
|
|
|
|
|
stats.massSchedulesCreated += parseWorshipTimes(mc.church_worship_times || []).length;
|
|
|
|
|
newInPoint++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (newInPoint > 0) {
|
|
|
|
|
console.log(` Grid ${i + 1}/${gridPoints.length} (${lat},${lng}): ${churches.length} found, ${newInPoint} new`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await sleep(RATE_LIMIT_MS);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
stats.errors++;
|
|
|
|
|
console.error(` Error at grid ${i + 1} (${lat},${lng}): ${error instanceof Error ? error.message : error}`);
|
|
|
|
|
await sleep(RATE_LIMIT_MS * 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Progress every 50 points
|
|
|
|
|
if ((i + 1) % 50 === 0 || i === gridPoints.length - 1) {
|
|
|
|
|
const elapsed = (Date.now() - startTime) / 1000;
|
|
|
|
|
const rate = elapsed > 0 ? Math.round(stats.apiRequests / elapsed * 3600) : 0;
|
|
|
|
|
console.log(` Progress: ${i + 1}/${gridPoints.length} grid points, ${stats.churchesDiscovered} discovered, ${stats.churchesCreated} new, ${stats.apiRequests} API calls [${Math.round(elapsed)}s, ~${rate}/hr]`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (jobId && (i + 1) % 20 === 0) {
|
|
|
|
|
await updateJobProgress(jobId, stats);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (jobId) {
|
|
|
|
|
await updateJobProgress(jobId, stats);
|
|
|
|
|
await prisma.backgroundJob.update({
|
|
|
|
|
where: { id: jobId },
|
|
|
|
|
data: { status: 'completed', completedAt: new Date() },
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const elapsed = (Date.now() - startTime) / 1000;
|
|
|
|
|
console.log(`\n${'='.repeat(70)}`);
|
|
|
|
|
console.log('MASSTIMES API IMPORT SUMMARY');
|
|
|
|
|
console.log('='.repeat(70));
|
|
|
|
|
console.log(`Grid points queried: ${stats.gridPoints}`);
|
|
|
|
|
console.log(`API requests: ${stats.apiRequests}`);
|
|
|
|
|
console.log(`Churches discovered: ${stats.churchesDiscovered}`);
|
|
|
|
|
console.log(`Churches matched: ${stats.churchesMatched} (already in DB)`);
|
|
|
|
|
console.log(`Churches created: ${stats.churchesCreated}`);
|
|
|
|
|
console.log(`Churches skipped: ${stats.churchesSkipped} (duplicates)`);
|
|
|
|
|
console.log(`Mass schedules created: ${stats.massSchedulesCreated}`);
|
|
|
|
|
console.log(`Errors: ${stats.errors}`);
|
|
|
|
|
console.log(`Elapsed: ${Math.round(elapsed)}s (${(elapsed / 3600).toFixed(1)}h)`);
|
|
|
|
|
console.log('='.repeat(70));
|
|
|
|
|
|
|
|
|
|
await prisma.$disconnect();
|
|
|
|
|
await pool.end();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
main().catch((error) => {
|
|
|
|
|
console.error(`Fatal error: ${error.message}`);
|
|
|
|
|
process.exit(1);
|
|
|
|
|
});
|