Files
ja4-platform/tests/integration/docker-compose.yml
toto 12d60975da feat: Python traffic generator with realistic varied HTTP/HTTPS traffic
- Replace curlimages/curl with Python stdlib traffic generator
- 200 requests, 10 workers, 16 scenario types:
  browsers (Chrome/Firefox/Safari/Edge/mobile), bots (Googlebot/Bing/curl/wget),
  GET/POST/HEAD/PUT/PATCH/DELETE/OPTIONS, HTTP + HTTPS
- Multiple SSL contexts (default, TLS1.2-only, TLS1.3-only, few_ciphers)
  → 4 distinct JA4/JA3 fingerprints per test run
- Realistic headers: Accept, Accept-Language, Sec-Fetch-*, Referer,
  X-Forwarded-For, Cookie, Cache-Control
- JSON payloads, form data, CORS preflights
- DB always reset (down -v) at start of each test run
- Enhanced Phase 5 checks: distinct UAs, method variety, JA4/JA3 counts + uniqueness

Results: 199/200 OK, 24 distinct UAs, 7 HTTP methods, TLS 1.2+1.3, 4 JA4 fingerprints

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-07 21:14:55 +02:00

163 lines
5.7 KiB
YAML

# =============================================================================
# ja4-platform — Full-stack integration test
#
# Compose:
# clickhouse — ClickHouse server (schema init via entrypoint)
# platform — Rocky Linux 9: Apache + mod-reqin-log + sentinel + correlator
# bot-detector — ML anomaly detection (Python)
# dashboard — SOC dashboard API (FastAPI)
#
# Usage:
# cd tests/integration && ./run-tests.sh
# =============================================================================
services:
# ---------------------------------------------------------------------------
# ClickHouse — schema auto-init from shared/clickhouse/*.sql
# ---------------------------------------------------------------------------
clickhouse:
image: clickhouse/clickhouse-server:24.8
hostname: clickhouse
environment:
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
# Init script: copies, patches credentials, and executes SQL files
- ./platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh
# SQL sources (read-only, patched by init script before execution)
- ../../shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro
- ../../shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro
- ../../shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro
- ../../shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro
- ../../shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro
- ../../shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro
- ../../shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro
- ../../shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro
- ../../shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro
- ../../shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro
# Empty CSV stubs (dictionaries expect these files)
- ./platform/csv-stubs:/var/lib/clickhouse/user_files
ports:
- "9000:9000"
- "8123:8123"
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 5s
timeout: 3s
retries: 30
networks:
- ja4net
# ---------------------------------------------------------------------------
# Platform — Rocky Linux 9: Apache (HTTPS) + mod-reqin-log + sentinel + correlator
# ---------------------------------------------------------------------------
platform:
build:
context: ../..
dockerfile: tests/integration/platform/Dockerfile
hostname: platform
cap_add:
- NET_RAW
- NET_ADMIN
environment:
LOGCORRELATOR_CLICKHOUSE_DSN: "clickhouse://default:@clickhouse:9000/ja4_logs"
depends_on:
clickhouse:
condition: service_healthy
ports:
- "443:443"
- "80:80"
healthcheck:
test: ["CMD", "curl", "-sfk", "https://localhost/health"]
interval: 5s
timeout: 3s
retries: 30
networks:
- ja4net
# ---------------------------------------------------------------------------
# Bot-detector — ML anomaly detection
# ---------------------------------------------------------------------------
bot-detector:
build:
context: ../..
dockerfile: services/bot-detector/bot_detector/Dockerfile
hostname: bot-detector
environment:
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 8123
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
CYCLE_INTERVAL_SEC: 30
RETRAIN_INTERVAL_HOURS: 1
ANOMALY_THRESHOLD: "-0.05"
HEALTH_PORT: 8080
depends_on:
clickhouse:
condition: service_healthy
platform:
condition: service_healthy
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/')"]
interval: 10s
timeout: 5s
retries: 10
networks:
- ja4net
# ---------------------------------------------------------------------------
# Dashboard — FastAPI SOC UI
# ---------------------------------------------------------------------------
dashboard:
build:
context: ../..
dockerfile: services/dashboard/Dockerfile
hostname: dashboard
environment:
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 8123
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
API_HOST: 0.0.0.0
API_PORT: 8000
CORS_ORIGINS: '["*"]'
depends_on:
clickhouse:
condition: service_healthy
ports:
- "8000:8000"
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 5s
timeout: 3s
retries: 30
networks:
- ja4net
# ---------------------------------------------------------------------------
# Traffic generator — Python (stdlib only) sending varied HTTP/HTTPS requests
# to platform across the Docker network so sentinel (pcap on eth0) captures
# TLS ClientHello packets with real JA4/JA3 fingerprints.
# Multiple SSL contexts produce different TLS fingerprints per request.
# ---------------------------------------------------------------------------
traffic-gen:
build:
context: traffic-gen
hostname: traffic-gen
depends_on:
platform:
condition: service_healthy
networks:
- ja4net
networks:
ja4net:
driver: bridge