chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
135
scripts/test-url-discovery.ts
Normal file
135
scripts/test-url-discovery.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { discoverMassScheduleUrl } from '../src/scrapers/url-discovery';
|
||||
|
||||
const TEST_SITES = [
|
||||
'https://www.saintpatrickscathedral.org',
|
||||
'https://www.holynamecathedral.org',
|
||||
'https://www.olacathedral.org',
|
||||
];
|
||||
|
||||
const CONFIDENCE_ICONS: Record<string, string> = {
|
||||
high: '🟢',
|
||||
medium: '🟡',
|
||||
low: '🔴',
|
||||
};
|
||||
|
||||
const METHOD_DESCRIPTIONS: Record<string, string> = {
|
||||
pattern: 'Found via URL pattern matching',
|
||||
link: 'Found via link crawling',
|
||||
homepage: 'Fell back to homepage',
|
||||
};
|
||||
|
||||
async function testSingleUrl(url: string) {
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('NEARESTMASS URL DISCOVERY TEST');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`\nURL: ${url}`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
console.log('\n' + '-'.repeat(70));
|
||||
|
||||
console.log('\n[1/2] Discovering mass schedule URL...');
|
||||
const startTime = Date.now();
|
||||
const result = await discoverMassScheduleUrl(url);
|
||||
const elapsed = Date.now() - startTime;
|
||||
console.log(` ✓ Discovery completed in ${elapsed}ms`);
|
||||
|
||||
console.log('\n[2/2] Results:');
|
||||
console.log(` Discovered URL: ${result.url}`);
|
||||
console.log(` Method: ${result.method} (${METHOD_DESCRIPTIONS[result.method]})`);
|
||||
console.log(` Confidence: ${CONFIDENCE_ICONS[result.confidence]} ${result.confidence}`);
|
||||
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Input: ${url}`);
|
||||
console.log(`Output: ${result.url}`);
|
||||
console.log(`Method: ${result.method}`);
|
||||
console.log(`Confidence: ${result.confidence}`);
|
||||
console.log(`Time: ${elapsed}ms`);
|
||||
console.log('='.repeat(70) + '\n');
|
||||
}
|
||||
|
||||
async function testMultipleSites() {
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('NEARESTMASS URL DISCOVERY TEST (BATCH)');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`\nTesting ${TEST_SITES.length} sites...`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
|
||||
const results: Array<{
|
||||
site: string;
|
||||
url: string;
|
||||
method: string;
|
||||
confidence: string;
|
||||
elapsed: number;
|
||||
}> = [];
|
||||
|
||||
for (let i = 0; i < TEST_SITES.length; i++) {
|
||||
const site = TEST_SITES[i];
|
||||
console.log('\n' + '-'.repeat(70));
|
||||
console.log(`[${i + 1}/${TEST_SITES.length}] Testing: ${site}`);
|
||||
console.log('-'.repeat(70));
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await discoverMassScheduleUrl(site);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
console.log(`\n Discovered URL: ${result.url}`);
|
||||
console.log(` Method: ${result.method} (${METHOD_DESCRIPTIONS[result.method]})`);
|
||||
console.log(` Confidence: ${CONFIDENCE_ICONS[result.confidence]} ${result.confidence}`);
|
||||
console.log(` Time: ${elapsed}ms`);
|
||||
|
||||
results.push({
|
||||
site,
|
||||
url: result.url,
|
||||
method: result.method,
|
||||
confidence: result.confidence,
|
||||
elapsed,
|
||||
});
|
||||
|
||||
// Rate limiting between sites
|
||||
if (i < TEST_SITES.length - 1) {
|
||||
console.log('\n Waiting 2s before next site...');
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
}
|
||||
}
|
||||
|
||||
// Summary table
|
||||
console.log('\n' + '='.repeat(70));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(70));
|
||||
|
||||
const highCount = results.filter((r) => r.confidence === 'high').length;
|
||||
const mediumCount = results.filter((r) => r.confidence === 'medium').length;
|
||||
const lowCount = results.filter((r) => r.confidence === 'low').length;
|
||||
const totalTime = results.reduce((sum, r) => sum + r.elapsed, 0);
|
||||
|
||||
console.log(`\nSites tested: ${results.length}`);
|
||||
console.log(`High conf: ${highCount} 🟢`);
|
||||
console.log(`Medium conf: ${mediumCount} 🟡`);
|
||||
console.log(`Low conf: ${lowCount} 🔴`);
|
||||
console.log(`Total time: ${totalTime}ms`);
|
||||
|
||||
console.log('\n' + '-'.repeat(70));
|
||||
console.log('RESULTS BY SITE');
|
||||
console.log('-'.repeat(70));
|
||||
|
||||
for (const r of results) {
|
||||
console.log(`\n${r.site}`);
|
||||
console.log(` → ${r.url}`);
|
||||
console.log(` ${CONFIDENCE_ICONS[r.confidence]} ${r.confidence} via ${r.method}`);
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(70) + '\n');
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const testUrl = process.argv[2];
|
||||
|
||||
if (testUrl) {
|
||||
await testSingleUrl(testUrl);
|
||||
} else {
|
||||
await testMultipleSites();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user