chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
167
scripts/debug/pipeline-report.js
Normal file
167
scripts/debug/pipeline-report.js
Normal file
@@ -0,0 +1,167 @@
|
||||
const { Client } = require("pg");
|
||||
const client = new Client({
|
||||
connectionString: "postgresql://postgres:postgres@192.168.0.145:5434/nearestmass"
|
||||
});
|
||||
|
||||
const queries = [
|
||||
{
|
||||
name: "1. Overall church counts by country (top 20)",
|
||||
sql: `SELECT country, COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE website IS NOT NULL) as has_website,
|
||||
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as scraped,
|
||||
COUNT(*) FILTER (WHERE has_website = true) as has_website_flag,
|
||||
COUNT(*) FILTER (WHERE website_language IS NOT NULL) as has_language
|
||||
FROM churches
|
||||
GROUP BY country
|
||||
ORDER BY total DESC
|
||||
LIMIT 20`
|
||||
},
|
||||
{
|
||||
name: "2. Total mass schedule counts",
|
||||
sql: `SELECT COUNT(*) as total_schedules,
|
||||
COUNT(DISTINCT church_id) as churches_with_schedules
|
||||
FROM mass_schedules`
|
||||
},
|
||||
{
|
||||
name: "3. Scrape results by language",
|
||||
sql: `SELECT website_language as language,
|
||||
COUNT(*) as total_scraped,
|
||||
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as scraped
|
||||
FROM churches
|
||||
WHERE website_language IS NOT NULL
|
||||
GROUP BY website_language
|
||||
ORDER BY total_scraped DESC`
|
||||
},
|
||||
{
|
||||
name: "4. Churches with websites but never scraped",
|
||||
sql: `SELECT COUNT(*) as has_website_not_scraped
|
||||
FROM churches
|
||||
WHERE website IS NOT NULL AND last_scraped_at IS NULL`
|
||||
},
|
||||
{
|
||||
name: "5. Overall pipeline funnel",
|
||||
sql: `SELECT
|
||||
COUNT(*) as total_churches,
|
||||
COUNT(*) FILTER (WHERE website IS NOT NULL) as has_website,
|
||||
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as attempted_scrape,
|
||||
COUNT(*) FILTER (WHERE website_language IS NOT NULL) as has_detected_language,
|
||||
(SELECT COUNT(DISTINCT church_id) FROM mass_schedules) as has_schedules_saved,
|
||||
(SELECT COUNT(*) FROM mass_schedules) as total_schedule_rows
|
||||
FROM churches`
|
||||
},
|
||||
{
|
||||
name: "6. Recent scrape activity (last 7 days) by language",
|
||||
sql: `SELECT website_language as language,
|
||||
COUNT(*) as scraped_last_7d
|
||||
FROM churches
|
||||
WHERE last_scraped_at > NOW() - INTERVAL '7 days'
|
||||
GROUP BY website_language
|
||||
ORDER BY scraped_last_7d DESC`
|
||||
},
|
||||
{
|
||||
name: "7. Background job history (last 15 completed/failed jobs)",
|
||||
sql: `SELECT type, language, status,
|
||||
created_at::date as created,
|
||||
completed_at::date as completed,
|
||||
ROUND(CAST(EXTRACT(EPOCH FROM (completed_at - created_at))/3600 AS numeric), 2) as hours,
|
||||
total_items, processed, succeeded, failed
|
||||
FROM background_jobs
|
||||
WHERE status IN ('completed', 'failed')
|
||||
ORDER BY completed_at DESC
|
||||
LIMIT 15`
|
||||
},
|
||||
{
|
||||
name: "8. Mass schedule breakdown by day of week",
|
||||
sql: `SELECT day_of_week,
|
||||
CASE day_of_week
|
||||
WHEN 0 THEN 'Sunday' WHEN 1 THEN 'Monday' WHEN 2 THEN 'Tuesday'
|
||||
WHEN 3 THEN 'Wednesday' WHEN 4 THEN 'Thursday' WHEN 5 THEN 'Friday'
|
||||
WHEN 6 THEN 'Saturday' ELSE 'Other'
|
||||
END as day_name,
|
||||
COUNT(*) as count
|
||||
FROM mass_schedules
|
||||
GROUP BY day_of_week
|
||||
ORDER BY day_of_week`
|
||||
},
|
||||
{
|
||||
name: "9. Churches with schedules by country (top 15)",
|
||||
sql: `SELECT c.country,
|
||||
COUNT(DISTINCT c.id) as total_churches,
|
||||
COUNT(DISTINCT ms.church_id) as churches_with_schedules,
|
||||
ROUND(100.0 * COUNT(DISTINCT ms.church_id) / NULLIF(COUNT(DISTINCT c.id), 0), 1) as coverage_pct,
|
||||
COUNT(ms.id) as total_schedule_rows
|
||||
FROM churches c
|
||||
LEFT JOIN mass_schedules ms ON ms.church_id = c.id
|
||||
GROUP BY c.country
|
||||
ORDER BY total_churches DESC
|
||||
LIMIT 15`
|
||||
},
|
||||
{
|
||||
name: "10. Enrichment sources - how churches were found",
|
||||
sql: `SELECT source, COUNT(*) as count
|
||||
FROM churches
|
||||
GROUP BY source
|
||||
ORDER BY count DESC`
|
||||
},
|
||||
{
|
||||
name: "11. Google Places enrichment impact",
|
||||
sql: `SELECT
|
||||
COUNT(*) FILTER (WHERE google_place_id IS NOT NULL) as has_google_place,
|
||||
COUNT(*) FILTER (WHERE google_place_id IS NOT NULL AND website IS NOT NULL) as google_with_website,
|
||||
COUNT(*) FILTER (WHERE google_place_id IS NULL) as no_google_place,
|
||||
COUNT(*) FILTER (WHERE google_searched_at IS NOT NULL) as google_searched,
|
||||
COUNT(*) FILTER (WHERE free_searched_at IS NOT NULL) as free_searched
|
||||
FROM churches`
|
||||
},
|
||||
{
|
||||
name: "12. Website presence by source",
|
||||
sql: `SELECT source,
|
||||
COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE website IS NOT NULL) as has_website,
|
||||
ROUND(100.0 * COUNT(*) FILTER (WHERE website IS NOT NULL) / NULLIF(COUNT(*), 0), 1) as website_pct,
|
||||
COUNT(*) FILTER (WHERE google_place_id IS NOT NULL) as has_google_place,
|
||||
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as scraped
|
||||
FROM churches
|
||||
GROUP BY source
|
||||
ORDER BY total DESC`
|
||||
}
|
||||
];
|
||||
|
||||
async function run() {
|
||||
await client.connect();
|
||||
|
||||
for (const q of queries) {
|
||||
console.log("=".repeat(90));
|
||||
console.log(q.name);
|
||||
console.log("=".repeat(90));
|
||||
try {
|
||||
const res = await client.query(q.sql);
|
||||
if (res.rows.length === 0) {
|
||||
console.log("(no rows returned)");
|
||||
} else {
|
||||
// Calculate column widths
|
||||
const cols = Object.keys(res.rows[0]);
|
||||
const widths = cols.map(c => {
|
||||
const maxData = Math.max(...res.rows.map(r => String(r[c] ?? "NULL").length));
|
||||
return Math.max(c.length, maxData);
|
||||
});
|
||||
|
||||
// Print header
|
||||
console.log(cols.map((c, i) => c.padEnd(widths[i])).join(" | "));
|
||||
console.log(widths.map(w => "-".repeat(w)).join("-+-"));
|
||||
|
||||
// Print rows
|
||||
for (const row of res.rows) {
|
||||
console.log(cols.map((c, i) => String(row[c] ?? "NULL").padEnd(widths[i])).join(" | "));
|
||||
}
|
||||
}
|
||||
console.log("\n(" + res.rows.length + " rows)\n");
|
||||
} catch (err) {
|
||||
console.log("ERROR:", err.message, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
await client.end();
|
||||
}
|
||||
|
||||
run().catch(e => { console.error(e); process.exit(1); });
|
||||
Reference in New Issue
Block a user