316 lines
8.5 KiB
YAML
316 lines
8.5 KiB
YAML
x-scraper-logging: &scraper-logging
|
|
driver: json-file
|
|
options:
|
|
max-size: "50m"
|
|
max-file: "3"
|
|
|
|
x-scraper-limits: &scraper-limits
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 4G
|
|
|
|
services:
|
|
db:
|
|
image: postgres:15-alpine
|
|
ports:
|
|
- "5434:5432"
|
|
environment:
|
|
- POSTGRES_USER=postgres
|
|
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
|
|
- POSTGRES_DB=nearestmass
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
|
interval: 5s
|
|
timeout: 5s
|
|
retries: 5
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 4G
|
|
shm_size: 256m
|
|
logging:
|
|
driver: json-file
|
|
options:
|
|
max-size: "50m"
|
|
max-file: "3"
|
|
|
|
app:
|
|
build: .
|
|
ports:
|
|
- "3001:3001"
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- ADMIN_API_KEY=${ADMIN_API_KEY}
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
logging:
|
|
driver: json-file
|
|
options:
|
|
max-size: "20m"
|
|
max-file: "3"
|
|
|
|
scraper:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- tools
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# English scraper (on-demand via scheduler or API)
|
|
scraper-english:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "english", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-english
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Generic scraper (for languages without dedicated scrapers)
|
|
scraper-generic:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "generic", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-generic
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# French scraper (on-demand via scheduler or API)
|
|
scraper-french:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "french", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-french
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# German scraper (on-demand via scheduler or API)
|
|
scraper-german:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "german", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-german
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Italian scraper (on-demand via scheduler or API)
|
|
scraper-italian:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "italian", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-italian
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Spanish scraper (on-demand via scheduler or API)
|
|
scraper-spanish:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "spanish", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-spanish
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Polish scraper (on-demand via scheduler or API)
|
|
scraper-polish:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "polish", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-polish
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Portuguese scraper (on-demand via scheduler or API)
|
|
scraper-portuguese:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "portuguese", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-portuguese
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Dutch scraper (on-demand via scheduler or API)
|
|
scraper-dutch:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "dutch", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-dutch
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Czech scraper (on-demand via scheduler or API)
|
|
scraper-czech:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "czech", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-czech
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
# Hungarian scraper (on-demand via scheduler or API)
|
|
scraper-hungarian:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
command: ["npx", "tsx", "scripts/scrape-churches.ts", "--all", "--language", "hungarian", "--max-failures", "10"]
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
profiles:
|
|
- scraper-hungarian
|
|
<<: *scraper-limits
|
|
logging: *scraper-logging
|
|
|
|
scheduler:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
init: true # tini as PID 1 — reaps zombie Chromium processes
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
- BAIDU_MAPS_API_KEY=${BAIDU_MAPS_API_KEY}
|
|
command: ["npx", "tsx", "scripts/scheduler.ts"]
|
|
volumes:
|
|
- ./logs:/app/logs
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 8G
|
|
stop_grace_period: 30s
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "find /app/logs/scheduler.heartbeat -mmin -120 2>/dev/null | grep -q . || exit 1"]
|
|
interval: 90s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 90s
|
|
logging:
|
|
driver: json-file
|
|
options:
|
|
max-size: "100m"
|
|
max-file: "5"
|
|
|
|
freesearch-enrichment:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.scraper
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- DATABASE_URL=postgresql://postgres:postgres@db:5432/nearestmass
|
|
- FREESEARCH_URL=${FREESEARCH_URL}
|
|
- CHROMADB_URL=${CHROMADB_URL}
|
|
command: ["npx", "tsx", "scripts/enrich-with-freesearch.ts", "--continuous"]
|
|
volumes:
|
|
- ./logs:/app/logs
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 4G
|
|
logging:
|
|
driver: json-file
|
|
options:
|
|
max-size: "50m"
|
|
max-file: "3"
|
|
|
|
volumes:
|
|
postgres_data:
|