Files
ScraperControl/docker-compose.yml

316 lines
8.5 KiB
YAML
Raw Permalink Normal View History

x-scraper-logging: &scraper-logging
driver: json-file
options:
max-size: "50m"
max-file: "3"
x-scraper-limits: &scraper-limits
deploy:
resources:
limits:
memory: 4G
services:
db:
image: postgres:15-alpine
ports:
- "5434:5432"
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
- POSTGRES_DB=nearestmass
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
deploy:
resources:
limits:
memory: 4G
shm_size: 256m
logging:
driver: json-file
options:
max-size: "50m"
max-file: "3"
app:
build: .
ports:
- "3001:3001"
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- ADMIN_API_KEY=${ADMIN_API_KEY}
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
memory: 1G
logging:
driver: json-file
options:
max-size: "20m"
max-file: "3"
scraper:
build:
context: .
dockerfile: Dockerfile.scraper
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- tools
<<: *scraper-limits
logging: *scraper-logging
# English scraper (on-demand via scheduler or API)
scraper-english:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "english", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-english
<<: *scraper-limits
logging: *scraper-logging
# Generic scraper (for languages without dedicated scrapers)
scraper-generic:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "generic", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-generic
<<: *scraper-limits
logging: *scraper-logging
# French scraper (on-demand via scheduler or API)
scraper-french:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "french", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-french
<<: *scraper-limits
logging: *scraper-logging
# German scraper (on-demand via scheduler or API)
scraper-german:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "german", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-german
<<: *scraper-limits
logging: *scraper-logging
# Italian scraper (on-demand via scheduler or API)
scraper-italian:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "italian", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-italian
<<: *scraper-limits
logging: *scraper-logging
# Spanish scraper (on-demand via scheduler or API)
scraper-spanish:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "spanish", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-spanish
<<: *scraper-limits
logging: *scraper-logging
# Polish scraper (on-demand via scheduler or API)
scraper-polish:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "polish", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-polish
<<: *scraper-limits
logging: *scraper-logging
# Portuguese scraper (on-demand via scheduler or API)
scraper-portuguese:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "portuguese", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-portuguese
<<: *scraper-limits
logging: *scraper-logging
# Dutch scraper (on-demand via scheduler or API)
scraper-dutch:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "dutch", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-dutch
<<: *scraper-limits
logging: *scraper-logging
# Czech scraper (on-demand via scheduler or API)
scraper-czech:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "czech", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-czech
<<: *scraper-limits
logging: *scraper-logging
# Hungarian scraper (on-demand via scheduler or API)
scraper-hungarian:
build:
context: .
dockerfile: Dockerfile.scraper
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "hungarian", "--max-failures", "10"]
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
profiles:
- scraper-hungarian
<<: *scraper-limits
logging: *scraper-logging
scheduler:
build:
context: .
dockerfile: Dockerfile.scraper
init: true # tini as PID 1 — reaps zombie Chromium processes
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- CHROMADB_URL=${CHROMADB_URL}
- BAIDU_MAPS_API_KEY=${BAIDU_MAPS_API_KEY}
command: ["npx", "tsx", "scripts/scheduler.ts"]
volumes:
- ./logs:/app/logs
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
memory: 8G
stop_grace_period: 30s
healthcheck:
test: ["CMD-SHELL", "find /app/logs/scheduler.heartbeat -mmin -120 2>/dev/null | grep -q . || exit 1"]
interval: 90s
timeout: 10s
retries: 3
start_period: 90s
logging:
driver: json-file
options:
max-size: "100m"
max-file: "5"
freesearch-enrichment:
build:
context: .
dockerfile: Dockerfile.scraper
env_file:
- .env
environment:
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
- FREESEARCH_URL=${FREESEARCH_URL}
- CHROMADB_URL=${CHROMADB_URL}
command: ["npx", "tsx", "scripts/enrich-with-freesearch.ts", "--continuous"]
volumes:
- ./logs:/app/logs
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
memory: 4G
logging:
driver: json-file
options:
max-size: "50m"
max-file: "3"
volumes:
postgres_data: