Files
ScraperControl/src/app/api/admin/scraper-health/route.ts

114 lines
3.6 KiB
TypeScript
Raw Normal View History

import { NextRequest, NextResponse } from 'next/server';
import { prisma } from '@/lib/db';
import { validateAdminApiKey, unauthorizedResponse } from '@/lib/admin-auth';
import { buildLanguageFilter } from '@/lib/scraper-service';
const LANGUAGES = [
'english', 'french', 'spanish', 'italian', 'german',
'polish', 'portuguese', 'dutch', 'czech', 'hungarian', 'generic',
];
function formatDuration(ms: number): string {
const hours = Math.floor(ms / 3_600_000);
const minutes = Math.floor((ms % 3_600_000) / 60_000);
if (hours > 0) return `${hours}h ${minutes}m`;
return `${minutes}m`;
}
// GET /api/admin/scraper-health — Quick health check for scraper pipeline
export async function GET(request: NextRequest) {
if (!validateAdminApiKey(request)) return unauthorizedResponse();
try {
const now = Date.now();
const thirtyDaysAgo = new Date(now - 30 * 24 * 60 * 60 * 1000);
// --- Throughput: count churches scraped in last 1h, 6h, 24h ---
const throughputPromise = Promise.all([
prisma.church.count({ where: { lastScrapedAt: { gte: new Date(now - 1 * 3_600_000) } } }),
prisma.church.count({ where: { lastScrapedAt: { gte: new Date(now - 6 * 3_600_000) } } }),
prisma.church.count({ where: { lastScrapedAt: { gte: new Date(now - 24 * 3_600_000) } } }),
]);
// --- Running jobs ---
const runningJobsPromise = prisma.backgroundJob.findMany({
where: { status: 'running', type: 'scraper' },
select: { id: true, type: true, language: true, startedAt: true, processed: true },
});
// --- Per-language queue counts ---
const baseWhere = {
claimed: false,
website: { not: null },
OR: [
{ lastScrapedAt: null },
{ lastScrapedAt: { lt: thirtyDaysAgo } },
],
AND: [
{
OR: [
{ scraperConfig: null },
{ scraperConfig: { failureCount: { lt: 5 } } },
],
},
],
};
const queuePromises = LANGUAGES.map(async (lang) => {
const filter = buildLanguageFilter(lang);
const count = await prisma.church.count({
where: {
...baseWhere,
AND: [...(baseWhere.AND as object[]), ...(filter ? [filter] : [])],
},
});
return [lang, count] as const;
});
// Run all queries concurrently
const [[last1h, last6h, last24h], runningJobs, queueResults] = await Promise.all([
throughputPromise,
runningJobsPromise,
Promise.all(queuePromises),
]);
const queue: Record<string, number> = {};
for (const [lang, count] of queueResults) {
if (count > 0) queue[lang] = count;
}
// Format running jobs
const jobs = runningJobs.map((job) => ({
id: job.id,
type: job.type,
language: job.language,
startedAt: job.startedAt,
runningFor: job.startedAt ? formatDuration(now - job.startedAt.getTime()) : null,
processed: job.processed,
}));
// Health check: unhealthy if any scraper running >6h with zero throughput in last hour
const hasStuckJob = runningJobs.some(
(job) => job.startedAt && (now - job.startedAt.getTime()) > 6 * 3_600_000
);
const healthy = !(hasStuckJob && last6h === 0);
const warning = !healthy
? 'Scraper job running >6h with zero throughput in last 6 hours'
: null;
return NextResponse.json({
throughput: { last1h, last6h, last24h },
runningJobs: jobs,
queue,
healthy,
warning,
});
} catch (error) {
console.error('Error in scraper health:', error);
return NextResponse.json(
{ error: 'Failed to get scraper health' },
{ status: 500 }
);
}
}