From 3cb780a6926c32feafc8ded1e99b4d9006798d3b Mon Sep 17 00:00:00 2001 From: albertfj114 Date: Sat, 28 Mar 2026 08:51:58 -0400 Subject: [PATCH] fix: replace pgrep healthcheck with heartbeat file check --- docker-compose.yml | 315 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..4e1f413 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,315 @@ +x-scraper-logging: &scraper-logging + driver: json-file + options: + max-size: "50m" + max-file: "3" + +x-scraper-limits: &scraper-limits + deploy: + resources: + limits: + memory: 4G + +services: + db: + image: postgres:15-alpine + ports: + - "5434:5432" + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} + - POSTGRES_DB=nearestmass + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + restart: unless-stopped + deploy: + resources: + limits: + memory: 4G + shm_size: 256m + logging: + driver: json-file + options: + max-size: "50m" + max-file: "3" + + app: + build: . + ports: + - "3001:3001" + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - ADMIN_API_KEY=${ADMIN_API_KEY} + depends_on: + db: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + memory: 1G + logging: + driver: json-file + options: + max-size: "20m" + max-file: "3" + + scraper: + build: + context: . + dockerfile: Dockerfile.scraper + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - tools + <<: *scraper-limits + logging: *scraper-logging + + # English scraper (on-demand via scheduler or API) + scraper-english: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "english", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-english + <<: *scraper-limits + logging: *scraper-logging + + # Generic scraper (for languages without dedicated scrapers) + scraper-generic: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "generic", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-generic + <<: *scraper-limits + logging: *scraper-logging + + # French scraper (on-demand via scheduler or API) + scraper-french: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "french", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-french + <<: *scraper-limits + logging: *scraper-logging + + # German scraper (on-demand via scheduler or API) + scraper-german: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "german", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-german + <<: *scraper-limits + logging: *scraper-logging + + # Italian scraper (on-demand via scheduler or API) + scraper-italian: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "italian", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-italian + <<: *scraper-limits + logging: *scraper-logging + + # Spanish scraper (on-demand via scheduler or API) + scraper-spanish: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "spanish", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-spanish + <<: *scraper-limits + logging: *scraper-logging + + # Polish scraper (on-demand via scheduler or API) + scraper-polish: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "polish", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-polish + <<: *scraper-limits + logging: *scraper-logging + + # Portuguese scraper (on-demand via scheduler or API) + scraper-portuguese: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "portuguese", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-portuguese + <<: *scraper-limits + logging: *scraper-logging + + # Dutch scraper (on-demand via scheduler or API) + scraper-dutch: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "dutch", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-dutch + <<: *scraper-limits + logging: *scraper-logging + + # Czech scraper (on-demand via scheduler or API) + scraper-czech: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "czech", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-czech + <<: *scraper-limits + logging: *scraper-logging + + # Hungarian scraper (on-demand via scheduler or API) + scraper-hungarian: + build: + context: . + dockerfile: Dockerfile.scraper + command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "hungarian", "--max-failures", "10"] + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + profiles: + - scraper-hungarian + <<: *scraper-limits + logging: *scraper-logging + + scheduler: + build: + context: . + dockerfile: Dockerfile.scraper + init: true # tini as PID 1 — reaps zombie Chromium processes + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - CHROMADB_URL=${CHROMADB_URL} + - BAIDU_MAPS_API_KEY=${BAIDU_MAPS_API_KEY} + command: ["npx", "tsx", "scripts/scheduler.ts"] + volumes: + - ./logs:/app/logs + depends_on: + db: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + memory: 8G + stop_grace_period: 30s + healthcheck: + test: ["CMD-SHELL", "find /app/logs/scheduler.heartbeat -mmin -120 2>/dev/null | grep -q . || exit 1"] + interval: 90s + timeout: 10s + retries: 3 + start_period: 90s + logging: + driver: json-file + options: + max-size: "100m" + max-file: "5" + + freesearch-enrichment: + build: + context: . + dockerfile: Dockerfile.scraper + env_file: + - .env + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass + - FREESEARCH_URL=${FREESEARCH_URL} + - CHROMADB_URL=${CHROMADB_URL} + command: ["npx", "tsx", "scripts/enrich-with-freesearch.ts", "--continuous"] + volumes: + - ./logs:/app/logs + depends_on: + db: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + memory: 4G + logging: + driver: json-file + options: + max-size: "50m" + max-file: "3" + +volumes: + postgres_data: