Files
ja4-platform/tests/vm/analysis/docker-compose.yml
Jacquin Antoine 36b5065a0a feat(e2e): add multi-IP endpoint architecture with dedicated traffic VM
Replace single-service-per-endpoint with all-ips mode running nginx, apache,
and hitch+varnish simultaneously on 3 dedicated IPs per VM (eth1 alias IPs).
Add a dedicated traffic VM with curl-impersonate for realistic TLS fingerprints,
parallelized traffic generation, and paired SNI_HOSTS/TARGET_IPS lists for
per-VM per-service hostname identification (e.g. rocky9-nginx-platform.test).

Key changes:
- run-tests-vm.sh: add setup_all_ips(), IP-specific Listen/bind directives
  with reset-before-apply pattern, graceful service availability checks
- run-e2e-test.sh: traffic VM architecture, all-ips mode, eth1 network,
  paired IP/SNI lists, updated cleanup for alias IPs
- generate-traffic.sh: parallel background jobs, curl-impersonate detection,
  auto source interface detection via ip route get, Host header in HTTP traffic
- Vagrantfile: add traffic VM with provision-traffic.sh
- provision-traffic.sh: install curl-impersonate and httpx for traffic gen
- test-rpm.sh: multi-interface TC check, updated ja4ebpf config
- clickhouse-init.sh: load CSV stubs for Anubis/bot-networks dictionaries
- Remove obsolete correlator/sentinel/mod-reqin-log docs
- Add h2_settings_ack column to http_logs schema
- Upgrade Go toolchain to 1.25.0

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 14:25:24 +02:00

123 lines
5.6 KiB
YAML

# =============================================================================
# Stack analysis — ClickHouse + bot-detector + dashboard
#
# Déployée sur la VM analysis (192.168.42.10) pour le test E2E distribué.
# Les endpoints EL8/9/10 envoient leurs logs ja4ebpf vers ce ClickHouse.
# =============================================================================
services:
clickhouse:
image: clickhouse/clickhouse-server:24.8
hostname: clickhouse
ports:
- "0.0.0.0:9000:9000" # Native protocol (ja4ebpf des endpoints)
- "0.0.0.0:8123:8123" # HTTP API (bot-detector, dashboard, vérifications)
environment:
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
# Script d'initialisation (patch credentials pour test)
- ../../integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh
# Schéma SQL (réutilise les fichiers partagés)
- ../../../shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro
- ../../../shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro
- ../../../shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro
- ../../../shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro
- ../../../shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro
- ../../../shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro
- ../../../shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro
- ../../../shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro
- ../../../shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro
- ../../../shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro
- ../../../shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro
- ../../../shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro
- ../../../shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro
- ../../../shared/data/browser_h2.csv:/initdb-src/browser_h2.csv:ro
# CSV stubs pour les dictionnaires ClickHouse + tables (Anubis, ref_bot_networks, etc.)
- ../../integration/platform/csv-stubs:/var/lib/clickhouse/user_files
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 5s
timeout: 3s
retries: 30
networks: [analysis-net]
bot-detector:
build:
context: /ja4-platform
dockerfile: services/bot-detector/bot_detector/Dockerfile
container_name: bot_detector_ai
restart: unless-stopped
environment:
# ── ClickHouse ────────────────────────────────────────────────────────────
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
# ── Cycle accéléré pour les tests ─────────────────────────────────────────
CYCLE_INTERVAL_SEC: 30
MAX_CONSECUTIVE_FAILURES: 5
# ── ML ────────────────────────────────────────────────────────────────────
ISOLATION_CONTAMINATION: 0.02
ANOMALY_THRESHOLD: -0.03
MIN_VALID_FEATURE_RATIO: 0.10
MIN_HUMAN_BASELINE: 5
BASELINE_ACCEPT_UNKNOWN: "true"
# ── Fonctionnalités désactivées pour accélérer les tests ──────────────────
ENABLE_SHAP: "false"
ENABLE_CLUSTERING: "false"
ENABLE_MULTIWINDOW: "false"
# ── Logs ──────────────────────────────────────────────────────────────────
BOT_DETECTOR_LOG: /var/log/bot_detector/decisions.jsonl
LOG_BACKUP_COUNT: 3
# ── Health check ──────────────────────────────────────────────────────────
HEALTH_PORT: 8080
volumes:
- bot-detector-logs:/var/log/bot_detector
- bot-detector-models:/var/lib/bot_detector
# CSV reputation (stubs de test)
- /ja4-platform/tests/integration/platform/csv-stubs/bot_ip.csv:/data/bot_ip.csv:ro
- /ja4-platform/tests/integration/platform/csv-stubs/bot_ja4.csv:/data/bot_ja4.csv:ro
- /ja4-platform/tests/integration/platform/csv-stubs/asn_reputation.csv:/data/asn_reputation.csv:ro
depends_on:
clickhouse:
condition: service_healthy
ports:
- "0.0.0.0:8080:8080"
networks: [analysis-net]
dashboard:
build:
context: /ja4-platform
dockerfile: services/dashboard/Dockerfile
container_name: ja4-dashboard
environment:
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 8123
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
depends_on:
clickhouse:
condition: service_healthy
ports:
- "0.0.0.0:8000:8000"
networks: [analysis-net]
networks:
analysis-net:
driver: bridge
volumes:
bot-detector-logs:
bot-detector-models: