chore: sync with Gitea master and restore local-only files
Reset local main to gitea/master (new source of truth) and restored local-only files: web scrapers, admin dashboard, ChromaDB integration, debug scripts, and utility libraries that aren't tracked in Gitea. Gitea master adds: discovermass, buscarmisas-network, hk-parishes, bohosluzby, kerknet, gottesdienstzeiten, miserend importers, ClaimRequest model, forward geocoding, heartbeat healthcheck. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
113
src/app/api/admin/scraper-health/route.ts
Normal file
113
src/app/api/admin/scraper-health/route.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { prisma } from '@/lib/db';
|
||||
import { validateAdminApiKey, unauthorizedResponse } from '@/lib/admin-auth';
|
||||
import { buildLanguageFilter } from '@/lib/scraper-service';
|
||||
|
||||
const LANGUAGES = [
|
||||
'english', 'french', 'spanish', 'italian', 'german',
|
||||
'polish', 'portuguese', 'dutch', 'czech', 'hungarian', 'generic',
|
||||
];
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
const hours = Math.floor(ms / 3_600_000);
|
||||
const minutes = Math.floor((ms % 3_600_000) / 60_000);
|
||||
if (hours > 0) return `${hours}h ${minutes}m`;
|
||||
return `${minutes}m`;
|
||||
}
|
||||
|
||||
// GET /api/admin/scraper-health — Quick health check for scraper pipeline
|
||||
export async function GET(request: NextRequest) {
|
||||
if (!validateAdminApiKey(request)) return unauthorizedResponse();
|
||||
|
||||
try {
|
||||
const now = Date.now();
|
||||
const thirtyDaysAgo = new Date(now - 30 * 24 * 60 * 60 * 1000);
|
||||
|
||||
// --- Throughput: count churches scraped in last 1h, 6h, 24h ---
|
||||
const throughputPromise = Promise.all([
|
||||
prisma.church.count({ where: { lastScrapedAt: { gte: new Date(now - 1 * 3_600_000) } } }),
|
||||
prisma.church.count({ where: { lastScrapedAt: { gte: new Date(now - 6 * 3_600_000) } } }),
|
||||
prisma.church.count({ where: { lastScrapedAt: { gte: new Date(now - 24 * 3_600_000) } } }),
|
||||
]);
|
||||
|
||||
// --- Running jobs ---
|
||||
const runningJobsPromise = prisma.backgroundJob.findMany({
|
||||
where: { status: 'running', type: 'scraper' },
|
||||
select: { id: true, type: true, language: true, startedAt: true, processed: true },
|
||||
});
|
||||
|
||||
// --- Per-language queue counts ---
|
||||
const baseWhere = {
|
||||
claimed: false,
|
||||
website: { not: null },
|
||||
OR: [
|
||||
{ lastScrapedAt: null },
|
||||
{ lastScrapedAt: { lt: thirtyDaysAgo } },
|
||||
],
|
||||
AND: [
|
||||
{
|
||||
OR: [
|
||||
{ scraperConfig: null },
|
||||
{ scraperConfig: { failureCount: { lt: 5 } } },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const queuePromises = LANGUAGES.map(async (lang) => {
|
||||
const filter = buildLanguageFilter(lang);
|
||||
const count = await prisma.church.count({
|
||||
where: {
|
||||
...baseWhere,
|
||||
AND: [...(baseWhere.AND as object[]), ...(filter ? [filter] : [])],
|
||||
},
|
||||
});
|
||||
return [lang, count] as const;
|
||||
});
|
||||
|
||||
// Run all queries concurrently
|
||||
const [[last1h, last6h, last24h], runningJobs, queueResults] = await Promise.all([
|
||||
throughputPromise,
|
||||
runningJobsPromise,
|
||||
Promise.all(queuePromises),
|
||||
]);
|
||||
|
||||
const queue: Record<string, number> = {};
|
||||
for (const [lang, count] of queueResults) {
|
||||
if (count > 0) queue[lang] = count;
|
||||
}
|
||||
|
||||
// Format running jobs
|
||||
const jobs = runningJobs.map((job) => ({
|
||||
id: job.id,
|
||||
type: job.type,
|
||||
language: job.language,
|
||||
startedAt: job.startedAt,
|
||||
runningFor: job.startedAt ? formatDuration(now - job.startedAt.getTime()) : null,
|
||||
processed: job.processed,
|
||||
}));
|
||||
|
||||
// Health check: unhealthy if any scraper running >6h with zero throughput in last hour
|
||||
const hasStuckJob = runningJobs.some(
|
||||
(job) => job.startedAt && (now - job.startedAt.getTime()) > 6 * 3_600_000
|
||||
);
|
||||
const healthy = !(hasStuckJob && last6h === 0);
|
||||
const warning = !healthy
|
||||
? 'Scraper job running >6h with zero throughput in last 6 hours'
|
||||
: null;
|
||||
|
||||
return NextResponse.json({
|
||||
throughput: { last1h, last6h, last24h },
|
||||
runningJobs: jobs,
|
||||
queue,
|
||||
healthy,
|
||||
warning,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error in scraper health:', error);
|
||||
return NextResponse.json(
|
||||
{ error: 'Failed to get scraper health' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user