Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
168 lines
5.7 KiB
JavaScript
168 lines
5.7 KiB
JavaScript
const { Client } = require("pg");
|
|
const client = new Client({
|
|
connectionString: "postgresql://postgres:postgres@192.168.0.145:5434/nearestmass"
|
|
});
|
|
|
|
const queries = [
|
|
{
|
|
name: "1. Overall church counts by country (top 20)",
|
|
sql: `SELECT country, COUNT(*) as total,
|
|
COUNT(*) FILTER (WHERE website IS NOT NULL) as has_website,
|
|
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as scraped,
|
|
COUNT(*) FILTER (WHERE has_website = true) as has_website_flag,
|
|
COUNT(*) FILTER (WHERE website_language IS NOT NULL) as has_language
|
|
FROM churches
|
|
GROUP BY country
|
|
ORDER BY total DESC
|
|
LIMIT 20`
|
|
},
|
|
{
|
|
name: "2. Total mass schedule counts",
|
|
sql: `SELECT COUNT(*) as total_schedules,
|
|
COUNT(DISTINCT church_id) as churches_with_schedules
|
|
FROM mass_schedules`
|
|
},
|
|
{
|
|
name: "3. Scrape results by language",
|
|
sql: `SELECT website_language as language,
|
|
COUNT(*) as total_scraped,
|
|
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as scraped
|
|
FROM churches
|
|
WHERE website_language IS NOT NULL
|
|
GROUP BY website_language
|
|
ORDER BY total_scraped DESC`
|
|
},
|
|
{
|
|
name: "4. Churches with websites but never scraped",
|
|
sql: `SELECT COUNT(*) as has_website_not_scraped
|
|
FROM churches
|
|
WHERE website IS NOT NULL AND last_scraped_at IS NULL`
|
|
},
|
|
{
|
|
name: "5. Overall pipeline funnel",
|
|
sql: `SELECT
|
|
COUNT(*) as total_churches,
|
|
COUNT(*) FILTER (WHERE website IS NOT NULL) as has_website,
|
|
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as attempted_scrape,
|
|
COUNT(*) FILTER (WHERE website_language IS NOT NULL) as has_detected_language,
|
|
(SELECT COUNT(DISTINCT church_id) FROM mass_schedules) as has_schedules_saved,
|
|
(SELECT COUNT(*) FROM mass_schedules) as total_schedule_rows
|
|
FROM churches`
|
|
},
|
|
{
|
|
name: "6. Recent scrape activity (last 7 days) by language",
|
|
sql: `SELECT website_language as language,
|
|
COUNT(*) as scraped_last_7d
|
|
FROM churches
|
|
WHERE last_scraped_at > NOW() - INTERVAL '7 days'
|
|
GROUP BY website_language
|
|
ORDER BY scraped_last_7d DESC`
|
|
},
|
|
{
|
|
name: "7. Background job history (last 15 completed/failed jobs)",
|
|
sql: `SELECT type, language, status,
|
|
created_at::date as created,
|
|
completed_at::date as completed,
|
|
ROUND(CAST(EXTRACT(EPOCH FROM (completed_at - created_at))/3600 AS numeric), 2) as hours,
|
|
total_items, processed, succeeded, failed
|
|
FROM background_jobs
|
|
WHERE status IN ('completed', 'failed')
|
|
ORDER BY completed_at DESC
|
|
LIMIT 15`
|
|
},
|
|
{
|
|
name: "8. Mass schedule breakdown by day of week",
|
|
sql: `SELECT day_of_week,
|
|
CASE day_of_week
|
|
WHEN 0 THEN 'Sunday' WHEN 1 THEN 'Monday' WHEN 2 THEN 'Tuesday'
|
|
WHEN 3 THEN 'Wednesday' WHEN 4 THEN 'Thursday' WHEN 5 THEN 'Friday'
|
|
WHEN 6 THEN 'Saturday' ELSE 'Other'
|
|
END as day_name,
|
|
COUNT(*) as count
|
|
FROM mass_schedules
|
|
GROUP BY day_of_week
|
|
ORDER BY day_of_week`
|
|
},
|
|
{
|
|
name: "9. Churches with schedules by country (top 15)",
|
|
sql: `SELECT c.country,
|
|
COUNT(DISTINCT c.id) as total_churches,
|
|
COUNT(DISTINCT ms.church_id) as churches_with_schedules,
|
|
ROUND(100.0 * COUNT(DISTINCT ms.church_id) / NULLIF(COUNT(DISTINCT c.id), 0), 1) as coverage_pct,
|
|
COUNT(ms.id) as total_schedule_rows
|
|
FROM churches c
|
|
LEFT JOIN mass_schedules ms ON ms.church_id = c.id
|
|
GROUP BY c.country
|
|
ORDER BY total_churches DESC
|
|
LIMIT 15`
|
|
},
|
|
{
|
|
name: "10. Enrichment sources - how churches were found",
|
|
sql: `SELECT source, COUNT(*) as count
|
|
FROM churches
|
|
GROUP BY source
|
|
ORDER BY count DESC`
|
|
},
|
|
{
|
|
name: "11. Google Places enrichment impact",
|
|
sql: `SELECT
|
|
COUNT(*) FILTER (WHERE google_place_id IS NOT NULL) as has_google_place,
|
|
COUNT(*) FILTER (WHERE google_place_id IS NOT NULL AND website IS NOT NULL) as google_with_website,
|
|
COUNT(*) FILTER (WHERE google_place_id IS NULL) as no_google_place,
|
|
COUNT(*) FILTER (WHERE google_searched_at IS NOT NULL) as google_searched,
|
|
COUNT(*) FILTER (WHERE free_searched_at IS NOT NULL) as free_searched
|
|
FROM churches`
|
|
},
|
|
{
|
|
name: "12. Website presence by source",
|
|
sql: `SELECT source,
|
|
COUNT(*) as total,
|
|
COUNT(*) FILTER (WHERE website IS NOT NULL) as has_website,
|
|
ROUND(100.0 * COUNT(*) FILTER (WHERE website IS NOT NULL) / NULLIF(COUNT(*), 0), 1) as website_pct,
|
|
COUNT(*) FILTER (WHERE google_place_id IS NOT NULL) as has_google_place,
|
|
COUNT(*) FILTER (WHERE last_scraped_at IS NOT NULL) as scraped
|
|
FROM churches
|
|
GROUP BY source
|
|
ORDER BY total DESC`
|
|
}
|
|
];
|
|
|
|
async function run() {
|
|
await client.connect();
|
|
|
|
for (const q of queries) {
|
|
console.log("=".repeat(90));
|
|
console.log(q.name);
|
|
console.log("=".repeat(90));
|
|
try {
|
|
const res = await client.query(q.sql);
|
|
if (res.rows.length === 0) {
|
|
console.log("(no rows returned)");
|
|
} else {
|
|
// Calculate column widths
|
|
const cols = Object.keys(res.rows[0]);
|
|
const widths = cols.map(c => {
|
|
const maxData = Math.max(...res.rows.map(r => String(r[c] ?? "NULL").length));
|
|
return Math.max(c.length, maxData);
|
|
});
|
|
|
|
// Print header
|
|
console.log(cols.map((c, i) => c.padEnd(widths[i])).join(" | "));
|
|
console.log(widths.map(w => "-".repeat(w)).join("-+-"));
|
|
|
|
// Print rows
|
|
for (const row of res.rows) {
|
|
console.log(cols.map((c, i) => String(row[c] ?? "NULL").padEnd(widths[i])).join(" | "));
|
|
}
|
|
}
|
|
console.log("\n(" + res.rows.length + " rows)\n");
|
|
} catch (err) {
|
|
console.log("ERROR:", err.message, "\n");
|
|
}
|
|
}
|
|
|
|
await client.end();
|
|
}
|
|
|
|
run().catch(e => { console.error(e); process.exit(1); });
|