import { GenericScraper } from '../src/scrapers/strategies/generic'; import { getScraper } from '../src/scrapers/registry'; import type { BaseScraper, ScrapeResult } from '../src/scrapers/base-scraper'; const TEST_URL = process.argv[2] || 'https://www.saintpatrickscathedral.org/masses'; // Parse --country flag from CLI args const countryFlagIndex = process.argv.indexOf('--country'); const COUNTRY_CODE = countryFlagIndex !== -1 ? process.argv[countryFlagIndex + 1] : null; // Parse --lang flag from CLI args (e.g., --lang english) const langFlagIndex = process.argv.indexOf('--lang'); const LANG = langFlagIndex !== -1 ? process.argv[langFlagIndex + 1] : null; const DAY_NAMES = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']; async function main() { console.log('\n' + '='.repeat(70)); console.log('NEARESTMASS SCRAPER TEST'); console.log('='.repeat(70)); console.log(`\nURL: ${TEST_URL}`); console.log(`Country: ${COUNTRY_CODE || '(auto-detect from )'}`); console.log(`Scraper: ${LANG || 'generic'}`); console.log(`Time: ${new Date().toISOString()}`); console.log('\n' + '-'.repeat(70)); let scraper: BaseScraper; if (LANG) { scraper = getScraper(LANG); console.log(`\n Using ${LANG} scraper`); } else { scraper = new GenericScraper(); } try { console.log('\n[1/4] Initializing browser...'); await scraper.init(); console.log(' ✓ Browser ready'); if (COUNTRY_CODE && scraper instanceof GenericScraper) { scraper.setCountry(COUNTRY_CODE); console.log(` Country set to: ${COUNTRY_CODE}`); } console.log('\n[2/4] Fetching page...'); const startTime = Date.now(); const result: ScrapeResult = await scraper.scrape(TEST_URL); const elapsed = Date.now() - startTime; console.log(` ✓ Page loaded in ${elapsed}ms`); console.log('\n[3/4] Parsing results...'); console.log(` Status: ${result.success ? '✓ SUCCESS' : '✗ FAILED'}`); console.log(` Schedules found: ${result.schedules.length}`); if (result.detectedLanguage) { console.log(` Detected language: ${result.detectedLanguage}`); } if (result.churchData) { console.log('\n Church Data:'); if (result.churchData.phone) console.log(` Phone: ${result.churchData.phone}`); if (result.churchData.email) console.log(` Email: ${result.churchData.email}`); if (result.churchData.pastorName) console.log(` Pastor: ${result.churchData.pastorName}`); if (result.churchData.diocese) console.log(` Diocese: ${result.churchData.diocese}`); } if (result.error) { console.log(` Error: ${result.error}`); } if (result.schedules.length > 0) { console.log('\n' + '-'.repeat(70)); console.log('PARSED MASS SCHEDULES'); console.log('-'.repeat(70)); const byDay: Record = {}; for (const schedule of result.schedules) { if (!byDay[schedule.dayOfWeek]) { byDay[schedule.dayOfWeek] = []; } byDay[schedule.dayOfWeek].push(schedule); } for (let day = 0; day < 7; day++) { const schedules = byDay[day]; if (schedules && schedules.length > 0) { console.log(`\n${DAY_NAMES[day]}:`); for (const s of schedules) { const parts = [ ` ${s.time}`, s.language && s.language !== 'English' ? `(${s.language})` : '', s.massType ? `[${s.massType}]` : '', s.notes ? `- ${s.notes}` : '', ].filter(Boolean); console.log(parts.join(' ')); } } } } if (result.rawHtml) { console.log('\n' + '-'.repeat(70)); console.log('RAW TEXT PREVIEW (first 1000 chars, stripped of HTML)'); console.log('-'.repeat(70)); const textOnly = result.rawHtml .replace(/]*>[\s\S]*?<\/script>/gi, '') .replace(/]*>[\s\S]*?<\/style>/gi, '') .replace(/<[^>]+>/g, ' ') .replace(/[\u2013\u2014]/g, '-') .replace(/\s+/g, ' ') .trim() .substring(0, 1000); console.log('\n' + textOnly); if (result.rawHtml.length > 1000) { console.log('\n... (truncated)'); } } console.log('\n' + '='.repeat(70)); console.log('SUMMARY'); console.log('='.repeat(70)); console.log(`URL: ${TEST_URL}`); console.log(`Scraper: ${LANG || 'generic'}`); console.log(`Country: ${COUNTRY_CODE || '(auto-detected)'}`); console.log(`Language: ${result.detectedLanguage || '(unknown)'}`); console.log(`Success: ${result.success ? 'Yes' : 'No'}`); console.log(`Schedules: ${result.schedules.length}`); console.log(`HTML Size: ${result.rawHtml ? Math.round(result.rawHtml.length / 1024) + ' KB' : 'N/A'}`); if (result.schedules.length > 0) { const days = [...new Set(result.schedules.map(s => s.dayOfWeek))]; const languages = [...new Set(result.schedules.map(s => s.language || 'English'))]; console.log(`Days: ${days.map(d => DAY_NAMES[d]).join(', ')}`); console.log(`Languages: ${languages.join(', ')}`); } console.log('='.repeat(70) + '\n'); } catch (error) { console.error('\n[ERROR]', error); } finally { console.log('[4/4] Closing browser...'); await scraper.close(); console.log(' ✓ Done\n'); } } main().catch(console.error);