#!/usr/bin/env tsx
/**
 * Debug a specific French page to see why scraping failed
 */

import { GenericScraper } from '../../src/scrapers/strategies/generic';

async function debugPage() {
  const url = 'https://www.chemin-neuf.fr/'; // Last failed church
  console.log(`Debugging: ${url}\n`);

  const scraper = new GenericScraper();
  await scraper.init();
  scraper.setCountry('FR');

  const result = await scraper.scrape(url);

  console.log(`Success: ${result.success}`);
  console.log(`Schedules found: ${result.schedules.length}`);
  if (result.error) console.log(`Error: ${result.error}`);

  if (result.rawHtml) {
    const text = result.rawHtml
      .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
      .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
      .replace(/<[^>]+>/g, ' ')
      .replace(/\s+/g, ' ')
      .toLowerCase();

    console.log('\n=== Page Text Sample (first 2000 chars) ===');
    console.log(text.substring(0, 2000));
    console.log('\n');

    // Check for French day names
    const frenchDays = ['dimanche', 'lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi'];
    console.log('=== French day names found ===');
    for (const day of frenchDays) {
      if (text.includes(day)) {
        console.log(`✓ Found: ${day}`);
      }
    }

    // Check for time patterns
    console.log('\n=== Time patterns (sample) ===');
    const timeRegex = /\d{1,2}[h:\.]\s*\d{0,2}\s*(?:AM|PM|am|pm|Uhr|uur|h)?/g;
    const times = text.match(timeRegex);
    if (times) {
      console.log(`Found ${times.length} time-like patterns:`);
      console.log(times.slice(0, 20).join(', '));
    } else {
      console.log('No time patterns found');
    }
  }

  await scraper.close();
}

debugPage().catch(console.error);