Files
ScraperControl/scripts/test-url-discovery.ts

136 lines
4.2 KiB
TypeScript
Raw Normal View History

import { discoverMassScheduleUrl } from '../src/scrapers/url-discovery';
const TEST_SITES = [
'https://www.saintpatrickscathedral.org',
'https://www.holynamecathedral.org',
'https://www.olacathedral.org',
];
const CONFIDENCE_ICONS: Record<string, string> = {
high: '🟢',
medium: '🟡',
low: '🔴',
};
const METHOD_DESCRIPTIONS: Record<string, string> = {
pattern: 'Found via URL pattern matching',
link: 'Found via link crawling',
homepage: 'Fell back to homepage',
};
async function testSingleUrl(url: string) {
console.log('\n' + '='.repeat(70));
console.log('NEARESTMASS URL DISCOVERY TEST');
console.log('='.repeat(70));
console.log(`\nURL: ${url}`);
console.log(`Time: ${new Date().toISOString()}`);
console.log('\n' + '-'.repeat(70));
console.log('\n[1/2] Discovering mass schedule URL...');
const startTime = Date.now();
const result = await discoverMassScheduleUrl(url);
const elapsed = Date.now() - startTime;
console.log(` ✓ Discovery completed in ${elapsed}ms`);
console.log('\n[2/2] Results:');
console.log(` Discovered URL: ${result.url}`);
console.log(` Method: ${result.method} (${METHOD_DESCRIPTIONS[result.method]})`);
console.log(` Confidence: ${CONFIDENCE_ICONS[result.confidence]} ${result.confidence}`);
console.log('\n' + '='.repeat(70));
console.log('SUMMARY');
console.log('='.repeat(70));
console.log(`Input: ${url}`);
console.log(`Output: ${result.url}`);
console.log(`Method: ${result.method}`);
console.log(`Confidence: ${result.confidence}`);
console.log(`Time: ${elapsed}ms`);
console.log('='.repeat(70) + '\n');
}
async function testMultipleSites() {
console.log('\n' + '='.repeat(70));
console.log('NEARESTMASS URL DISCOVERY TEST (BATCH)');
console.log('='.repeat(70));
console.log(`\nTesting ${TEST_SITES.length} sites...`);
console.log(`Time: ${new Date().toISOString()}`);
const results: Array<{
site: string;
url: string;
method: string;
confidence: string;
elapsed: number;
}> = [];
for (let i = 0; i < TEST_SITES.length; i++) {
const site = TEST_SITES[i];
console.log('\n' + '-'.repeat(70));
console.log(`[${i + 1}/${TEST_SITES.length}] Testing: ${site}`);
console.log('-'.repeat(70));
const startTime = Date.now();
const result = await discoverMassScheduleUrl(site);
const elapsed = Date.now() - startTime;
console.log(`\n Discovered URL: ${result.url}`);
console.log(` Method: ${result.method} (${METHOD_DESCRIPTIONS[result.method]})`);
console.log(` Confidence: ${CONFIDENCE_ICONS[result.confidence]} ${result.confidence}`);
console.log(` Time: ${elapsed}ms`);
results.push({
site,
url: result.url,
method: result.method,
confidence: result.confidence,
elapsed,
});
// Rate limiting between sites
if (i < TEST_SITES.length - 1) {
console.log('\n Waiting 2s before next site...');
await new Promise((r) => setTimeout(r, 2000));
}
}
// Summary table
console.log('\n' + '='.repeat(70));
console.log('SUMMARY');
console.log('='.repeat(70));
const highCount = results.filter((r) => r.confidence === 'high').length;
const mediumCount = results.filter((r) => r.confidence === 'medium').length;
const lowCount = results.filter((r) => r.confidence === 'low').length;
const totalTime = results.reduce((sum, r) => sum + r.elapsed, 0);
console.log(`\nSites tested: ${results.length}`);
console.log(`High conf: ${highCount} 🟢`);
console.log(`Medium conf: ${mediumCount} 🟡`);
console.log(`Low conf: ${lowCount} 🔴`);
console.log(`Total time: ${totalTime}ms`);
console.log('\n' + '-'.repeat(70));
console.log('RESULTS BY SITE');
console.log('-'.repeat(70));
for (const r of results) {
console.log(`\n${r.site}`);
console.log(`${r.url}`);
console.log(` ${CONFIDENCE_ICONS[r.confidence]} ${r.confidence} via ${r.method}`);
}
console.log('\n' + '='.repeat(70) + '\n');
}
async function main() {
const testUrl = process.argv[2];
if (testUrl) {
await testSingleUrl(testUrl);
} else {
await testMultipleSites();
}
}
main().catch(console.error);