feat(e2e): add distributed E2E test framework with parametric traffic generation

Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls,
--src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic
runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce
distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead
of --tls-v1-2), skip redundant local verification in distributed mode, and
fix dashboard is_available() cache that never retried after ClickHouse recovery.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-15 00:09:32 +02:00
parent 7894d39f1c
commit f88b739992
40 changed files with 2154 additions and 337 deletions

View File

@ -0,0 +1,122 @@
# =============================================================================
# Stack analysis — ClickHouse + bot-detector + dashboard
#
# Déployée sur la VM analysis (192.168.42.10) pour le test E2E distribué.
# Les endpoints EL8/9/10 envoient leurs logs ja4ebpf vers ce ClickHouse.
# =============================================================================
services:
clickhouse:
image: clickhouse/clickhouse-server:24.8
hostname: clickhouse
ports:
- "0.0.0.0:9000:9000" # Native protocol (ja4ebpf des endpoints)
- "0.0.0.0:8123:8123" # HTTP API (bot-detector, dashboard, vérifications)
environment:
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
# Script d'initialisation (patch credentials pour test)
- ../../integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh
# Schéma SQL (réutilise les fichiers partagés)
- ../../../shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro
- ../../../shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro
- ../../../shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro
- ../../../shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro
- ../../../shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro
- ../../../shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro
- ../../../shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro
- ../../../shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro
- ../../../shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro
- ../../../shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro
- ../../../shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro
- ../../../shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro
- ../../../shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro
- ../../../shared/data/browser_h2.csv:/initdb-src/browser_h2.csv:ro
# CSV stubs pour les dictionnaires ClickHouse
- ../../integration/platform/csv-stubs:/var/lib/clickhouse/user_files
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 5s
timeout: 3s
retries: 30
networks: [analysis-net]
bot-detector:
build:
context: /ja4-platform
dockerfile: services/bot-detector/bot_detector/Dockerfile
container_name: bot_detector_ai
restart: unless-stopped
environment:
# ── ClickHouse ────────────────────────────────────────────────────────────
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
# ── Cycle accéléré pour les tests ─────────────────────────────────────────
CYCLE_INTERVAL_SEC: 30
MAX_CONSECUTIVE_FAILURES: 5
# ── ML ────────────────────────────────────────────────────────────────────
ISOLATION_CONTAMINATION: 0.02
ANOMALY_THRESHOLD: -0.03
MIN_VALID_FEATURE_RATIO: 0.10
MIN_HUMAN_BASELINE: 5
BASELINE_ACCEPT_UNKNOWN: "true"
# ── Fonctionnalités désactivées pour accélérer les tests ──────────────────
ENABLE_SHAP: "false"
ENABLE_CLUSTERING: "false"
ENABLE_MULTIWINDOW: "false"
# ── Logs ──────────────────────────────────────────────────────────────────
BOT_DETECTOR_LOG: /var/log/bot_detector/decisions.jsonl
LOG_BACKUP_COUNT: 3
# ── Health check ──────────────────────────────────────────────────────────
HEALTH_PORT: 8080
volumes:
- bot-detector-logs:/var/log/bot_detector
- bot-detector-models:/var/lib/bot_detector
# CSV reputation (stubs de test)
- /ja4-platform/tests/integration/platform/csv-stubs/bot_ip.csv:/data/bot_ip.csv:ro
- /ja4-platform/tests/integration/platform/csv-stubs/bot_ja4.csv:/data/bot_ja4.csv:ro
- /ja4-platform/tests/integration/platform/csv-stubs/asn_reputation.csv:/data/asn_reputation.csv:ro
depends_on:
clickhouse:
condition: service_healthy
ports:
- "0.0.0.0:8080:8080"
networks: [analysis-net]
dashboard:
build:
context: /ja4-platform
dockerfile: services/dashboard/Dockerfile
container_name: ja4-dashboard
environment:
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 8123
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
depends_on:
clickhouse:
condition: service_healthy
ports:
- "0.0.0.0:8000:8000"
networks: [analysis-net]
networks:
analysis-net:
driver: bridge
volumes:
bot-detector-logs:
bot-detector-models: