Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
136 lines
4.2 KiB
TypeScript
136 lines
4.2 KiB
TypeScript
import { discoverMassScheduleUrl } from '../src/scrapers/url-discovery';
|
|
|
|
const TEST_SITES = [
|
|
'https://www.saintpatrickscathedral.org',
|
|
'https://www.holynamecathedral.org',
|
|
'https://www.olacathedral.org',
|
|
];
|
|
|
|
const CONFIDENCE_ICONS: Record<string, string> = {
|
|
high: '🟢',
|
|
medium: '🟡',
|
|
low: '🔴',
|
|
};
|
|
|
|
const METHOD_DESCRIPTIONS: Record<string, string> = {
|
|
pattern: 'Found via URL pattern matching',
|
|
link: 'Found via link crawling',
|
|
homepage: 'Fell back to homepage',
|
|
};
|
|
|
|
async function testSingleUrl(url: string) {
|
|
console.log('\n' + '='.repeat(70));
|
|
console.log('NEARESTMASS URL DISCOVERY TEST');
|
|
console.log('='.repeat(70));
|
|
console.log(`\nURL: ${url}`);
|
|
console.log(`Time: ${new Date().toISOString()}`);
|
|
console.log('\n' + '-'.repeat(70));
|
|
|
|
console.log('\n[1/2] Discovering mass schedule URL...');
|
|
const startTime = Date.now();
|
|
const result = await discoverMassScheduleUrl(url);
|
|
const elapsed = Date.now() - startTime;
|
|
console.log(` ✓ Discovery completed in ${elapsed}ms`);
|
|
|
|
console.log('\n[2/2] Results:');
|
|
console.log(` Discovered URL: ${result.url}`);
|
|
console.log(` Method: ${result.method} (${METHOD_DESCRIPTIONS[result.method]})`);
|
|
console.log(` Confidence: ${CONFIDENCE_ICONS[result.confidence]} ${result.confidence}`);
|
|
|
|
console.log('\n' + '='.repeat(70));
|
|
console.log('SUMMARY');
|
|
console.log('='.repeat(70));
|
|
console.log(`Input: ${url}`);
|
|
console.log(`Output: ${result.url}`);
|
|
console.log(`Method: ${result.method}`);
|
|
console.log(`Confidence: ${result.confidence}`);
|
|
console.log(`Time: ${elapsed}ms`);
|
|
console.log('='.repeat(70) + '\n');
|
|
}
|
|
|
|
async function testMultipleSites() {
|
|
console.log('\n' + '='.repeat(70));
|
|
console.log('NEARESTMASS URL DISCOVERY TEST (BATCH)');
|
|
console.log('='.repeat(70));
|
|
console.log(`\nTesting ${TEST_SITES.length} sites...`);
|
|
console.log(`Time: ${new Date().toISOString()}`);
|
|
|
|
const results: Array<{
|
|
site: string;
|
|
url: string;
|
|
method: string;
|
|
confidence: string;
|
|
elapsed: number;
|
|
}> = [];
|
|
|
|
for (let i = 0; i < TEST_SITES.length; i++) {
|
|
const site = TEST_SITES[i];
|
|
console.log('\n' + '-'.repeat(70));
|
|
console.log(`[${i + 1}/${TEST_SITES.length}] Testing: ${site}`);
|
|
console.log('-'.repeat(70));
|
|
|
|
const startTime = Date.now();
|
|
const result = await discoverMassScheduleUrl(site);
|
|
const elapsed = Date.now() - startTime;
|
|
|
|
console.log(`\n Discovered URL: ${result.url}`);
|
|
console.log(` Method: ${result.method} (${METHOD_DESCRIPTIONS[result.method]})`);
|
|
console.log(` Confidence: ${CONFIDENCE_ICONS[result.confidence]} ${result.confidence}`);
|
|
console.log(` Time: ${elapsed}ms`);
|
|
|
|
results.push({
|
|
site,
|
|
url: result.url,
|
|
method: result.method,
|
|
confidence: result.confidence,
|
|
elapsed,
|
|
});
|
|
|
|
// Rate limiting between sites
|
|
if (i < TEST_SITES.length - 1) {
|
|
console.log('\n Waiting 2s before next site...');
|
|
await new Promise((r) => setTimeout(r, 2000));
|
|
}
|
|
}
|
|
|
|
// Summary table
|
|
console.log('\n' + '='.repeat(70));
|
|
console.log('SUMMARY');
|
|
console.log('='.repeat(70));
|
|
|
|
const highCount = results.filter((r) => r.confidence === 'high').length;
|
|
const mediumCount = results.filter((r) => r.confidence === 'medium').length;
|
|
const lowCount = results.filter((r) => r.confidence === 'low').length;
|
|
const totalTime = results.reduce((sum, r) => sum + r.elapsed, 0);
|
|
|
|
console.log(`\nSites tested: ${results.length}`);
|
|
console.log(`High conf: ${highCount} 🟢`);
|
|
console.log(`Medium conf: ${mediumCount} 🟡`);
|
|
console.log(`Low conf: ${lowCount} 🔴`);
|
|
console.log(`Total time: ${totalTime}ms`);
|
|
|
|
console.log('\n' + '-'.repeat(70));
|
|
console.log('RESULTS BY SITE');
|
|
console.log('-'.repeat(70));
|
|
|
|
for (const r of results) {
|
|
console.log(`\n${r.site}`);
|
|
console.log(` → ${r.url}`);
|
|
console.log(` ${CONFIDENCE_ICONS[r.confidence]} ${r.confidence} via ${r.method}`);
|
|
}
|
|
|
|
console.log('\n' + '='.repeat(70) + '\n');
|
|
}
|
|
|
|
async function main() {
|
|
const testUrl = process.argv[2];
|
|
|
|
if (testUrl) {
|
|
await testSingleUrl(testUrl);
|
|
} else {
|
|
await testMultipleSites();
|
|
}
|
|
}
|
|
|
|
main().catch(console.error);
|