feat(e2e): add distributed E2E test framework with parametric traffic generation

Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls,
--src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic
runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce
distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead
of --tls-v1-2), skip redundant local verification in distributed mode, and
fix dashboard is_available() cache that never retried after ClickHouse recovery.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-15 00:09:32 +02:00
parent 7894d39f1c
commit f88b739992
40 changed files with 2154 additions and 337 deletions

34
tests/vm/Vagrantfile vendored
View File

@ -54,6 +54,9 @@ Vagrant.configure("2") do |config|
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "centos8", autostart: false do |node|
node.vm.box = "centos/8"
node.vm.network "private_network",
libvirt__network_name: "ja4-e2e",
type: "dhcp"
node.vm.provision "shell", path: "provision-el8.sh"
node.vm.post_up_message = "VM centos8 prête ! Tests : make test-vm-centos8"
end
@ -63,6 +66,9 @@ Vagrant.configure("2") do |config|
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "rocky9", primary: true do |node|
node.vm.box = "generic/rocky9"
node.vm.network "private_network",
libvirt__network_name: "ja4-e2e",
type: "dhcp"
node.vm.provision "shell", path: "provision.sh"
node.vm.post_up_message = <<~MSG
VM rocky9 prête !
@ -80,8 +86,36 @@ Vagrant.configure("2") do |config|
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "rocky10", autostart: false do |node|
node.vm.box = "almalinux/10"
node.vm.network "private_network",
libvirt__network_name: "ja4-e2e",
type: "dhcp"
node.vm.provision "shell", path: "provision.sh"
node.vm.post_up_message = "VM rocky10 prête ! Tests : make test-vm-rocky10"
end
# ═══════════════════════════════════════════════════════════════════════════
# VM 4 : Analysis Server (ClickHouse + bot-detector + dashboard)
#
# VM centralisée pour le test E2E distribué. Les endpoints EL8/9/10 envoient
# leurs logs ja4ebpf vers le ClickHouse de cette VM (192.168.42.10).
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "analysis", autostart: false do |node|
node.vm.box = "generic/rocky9"
node.vm.network "private_network", ip: "192.168.42.10",
libvirt__network_name: "ja4-e2e",
libvirt__netmask: "255.255.255.0"
node.vm.provider :libvirt do |v|
v.cpus = 4
v.memory = 12288 # 12 Go — torch + isotree build gourmand en RAM
end
node.vm.provision "shell", path: "provision-analysis.sh"
node.vm.post_up_message = <<~MSG
VM analysis prête !
Depuis la racine du projet :
make test-e2e # test E2E complet (capture + ML + dashboard)
make test-e2e-quick # test rapide avec trafic réduit
MSG
end
end

View File

@ -0,0 +1,122 @@
# =============================================================================
# Stack analysis — ClickHouse + bot-detector + dashboard
#
# Déployée sur la VM analysis (192.168.42.10) pour le test E2E distribué.
# Les endpoints EL8/9/10 envoient leurs logs ja4ebpf vers ce ClickHouse.
# =============================================================================
services:
clickhouse:
image: clickhouse/clickhouse-server:24.8
hostname: clickhouse
ports:
- "0.0.0.0:9000:9000" # Native protocol (ja4ebpf des endpoints)
- "0.0.0.0:8123:8123" # HTTP API (bot-detector, dashboard, vérifications)
environment:
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
# Script d'initialisation (patch credentials pour test)
- ../../integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh
# Schéma SQL (réutilise les fichiers partagés)
- ../../../shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro
- ../../../shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro
- ../../../shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro
- ../../../shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro
- ../../../shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro
- ../../../shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro
- ../../../shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro
- ../../../shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro
- ../../../shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro
- ../../../shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro
- ../../../shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro
- ../../../shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro
- ../../../shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro
- ../../../shared/data/browser_h2.csv:/initdb-src/browser_h2.csv:ro
# CSV stubs pour les dictionnaires ClickHouse
- ../../integration/platform/csv-stubs:/var/lib/clickhouse/user_files
healthcheck:
test: ["CMD", "clickhouse-client", "--query", "SELECT 1"]
interval: 5s
timeout: 3s
retries: 30
networks: [analysis-net]
bot-detector:
build:
context: /ja4-platform
dockerfile: services/bot-detector/bot_detector/Dockerfile
container_name: bot_detector_ai
restart: unless-stopped
environment:
# ── ClickHouse ────────────────────────────────────────────────────────────
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_DB: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
# ── Cycle accéléré pour les tests ─────────────────────────────────────────
CYCLE_INTERVAL_SEC: 30
MAX_CONSECUTIVE_FAILURES: 5
# ── ML ────────────────────────────────────────────────────────────────────
ISOLATION_CONTAMINATION: 0.02
ANOMALY_THRESHOLD: -0.03
MIN_VALID_FEATURE_RATIO: 0.10
MIN_HUMAN_BASELINE: 5
BASELINE_ACCEPT_UNKNOWN: "true"
# ── Fonctionnalités désactivées pour accélérer les tests ──────────────────
ENABLE_SHAP: "false"
ENABLE_CLUSTERING: "false"
ENABLE_MULTIWINDOW: "false"
# ── Logs ──────────────────────────────────────────────────────────────────
BOT_DETECTOR_LOG: /var/log/bot_detector/decisions.jsonl
LOG_BACKUP_COUNT: 3
# ── Health check ──────────────────────────────────────────────────────────
HEALTH_PORT: 8080
volumes:
- bot-detector-logs:/var/log/bot_detector
- bot-detector-models:/var/lib/bot_detector
# CSV reputation (stubs de test)
- /ja4-platform/tests/integration/platform/csv-stubs/bot_ip.csv:/data/bot_ip.csv:ro
- /ja4-platform/tests/integration/platform/csv-stubs/bot_ja4.csv:/data/bot_ja4.csv:ro
- /ja4-platform/tests/integration/platform/csv-stubs/asn_reputation.csv:/data/asn_reputation.csv:ro
depends_on:
clickhouse:
condition: service_healthy
ports:
- "0.0.0.0:8080:8080"
networks: [analysis-net]
dashboard:
build:
context: /ja4-platform
dockerfile: services/dashboard/Dockerfile
container_name: ja4-dashboard
environment:
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_PORT: 8123
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
CLICKHOUSE_DB_PROCESSING: ja4_processing
CLICKHOUSE_DB_LOGS: ja4_logs
depends_on:
clickhouse:
condition: service_healthy
ports:
- "0.0.0.0:8000:8000"
networks: [analysis-net]
networks:
analysis-net:
driver: bridge
volumes:
bot-detector-logs:
bot-detector-models:

59
tests/vm/provision-analysis.sh Executable file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env bash
# =============================================================================
# provision-analysis.sh — Provisionnement de la VM analysis (ClickHouse + bot-detector + dashboard)
#
# Installe uniquement Docker et les prérequis pour la stack d'analyse.
# Pas de toolchain eBPF, Go, ni serveurs web (uniquement sur les endpoints).
# =============================================================================
set -euo pipefail
log() { echo "[provision] $(date +%H:%M:%S) $*"; }
# ── 1. Mise à jour système + dépôts ──────────────────────────────────────────
log "Mise à jour des dépôts..."
dnf install -y epel-release dnf-plugins-core
dnf config-manager --enable crb
dnf update -y --quiet
# ── 2. Docker + docker-compose ───────────────────────────────────────────────
log "Installation de Docker..."
dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo
dnf install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
# Sur kernel 6.12+ (el10+), nf_tables incompatibilité avec iptables-nft
if ! systemctl start docker 2>/dev/null; then
log "Docker: fallback iptables=false pour kernel $(uname -r)"
mkdir -p /etc/docker
echo '{"iptables": false}' > /etc/docker/daemon.json
fi
systemctl enable --now docker
usermod -aG docker vagrant
chmod 666 /var/run/docker.sock || true
# ── 3. Firewall — ouvrir les ports d'analyse ──────────────────────────────────
log "Configuration firewall..."
firewall-cmd --add-port=9000/tcp --permanent 2>/dev/null || true # ClickHouse native
firewall-cmd --add-port=8123/tcp --permanent 2>/dev/null || true # ClickHouse HTTP
firewall-cmd --add-port=8080/tcp --permanent 2>/dev/null || true # bot-detector health
firewall-cmd --add-port=8000/tcp --permanent 2>/dev/null || true # dashboard
firewall-cmd --reload 2>/dev/null || true
# ── 4. Swap (sécurité pour le build Docker des images ML) ────────────────────
log "Configuration swap 4G..."
if [ ! -f /swapfile ]; then
dd if=/dev/zero of=/swapfile bs=1M count=4096 status=progress
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile
echo '/swapfile none swap sw 0 0' >> /etc/fstab
fi
# ── 5. Outils utilitaires ─────────────────────────────────────────────────────
log "Installation outils..."
dnf install -y curl python3 python3-pip
# ── 6. Répertoire de travail ──────────────────────────────────────────────────
# Le projet est rsync par Vagrant (config vm.synced_folder dans Vagrantfile)
log "Provisionnement analysis terminé !"
log "Lancer 'make test-e2e' depuis le host pour démarrer le test distribué."

795
tests/vm/run-e2e-test.sh Executable file
View File

@ -0,0 +1,795 @@
#!/usr/bin/env bash
# =============================================================================
# run-e2e-test.sh — Test E2E distribué ja4-platform
#
# Architecture :
# 3 VMs endpoint (centos8/rocky9/rocky10) : nginx + ja4ebpf
# 1 VM analysis (192.168.42.10) : ClickHouse + bot-detector + dashboard
# Host : orchestrateur + génération de trafic
#
# Pipeline testé :
# trafic host → endpoints → ja4ebpf → ClickHouse central →
# MV agrégation → bot-detector ML → dashboard API
#
# Usage :
# make e2e-up && make test-e2e
# TRAFFIC_COUNT=100 make test-e2e-quick
# ./run-e2e-test.sh --hits 1000 --http-ratio 0.3 --dns 2 --tls 1.2,1.3
#
# Paramètres :
# --hits N Nombre total de requêtes par VM (déf. 500)
# --http-ratio R Ratio HTTP/HTTPS : R = part HTTP (0.01.0, déf. 0.2)
# --dns N Nombre de hostnames SNI utilisés (14, déf. 4)
# --tls VERS Versions TLS à utiliser, séparées par virgules (déf. 1.2,1.3)
# --src-ips N Nombre d'IPs sources par VM (alias sur eth0 dans la VM, déf. 1)
# --keep-analysis Conserver la stack analysis (CH + dashboard) après le test
# --up Démarrer la stack analysis et les endpoints, puis s'arrêter
# (pas de trafic ni vérification — utile pour inspection manuelle)
# =============================================================================
set -euo pipefail
VM_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$VM_DIR/../.." && pwd)"
ANALYSIS_IP="192.168.42.10"
ENDPOINT_VMS="rocky9 rocky10"
ALL_VMS="rocky9 rocky10 analysis"
STACK="nginx"
# ── Paramètres par défaut (surchARGEABLES par CLI ou env) ──
TRAFFIC_COUNT="${TRAFFIC_COUNT:-500}"
HTTP_RATIO="${HTTP_RATIO:-0.2}"
DNS_COUNT="${DNS_COUNT:-4}"
TLS_VERSIONS="${TLS_VERSIONS:-1.2,1.3}"
SRC_IP_COUNT="${SRC_IP_COUNT:-1}"
KEEP_ANALYSIS="${KEEP_ANALYSIS:-false}"
UP_ONLY="${UP_ONLY:-false}"
# ── Parsing des arguments CLI ──
usage() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options :"
echo " --hits N Nombre de requêtes par VM (déf. 500)"
echo " --http-ratio R Ratio HTTP (0=100% HTTPS, 1=100% HTTP, déf. 0.2)"
echo " --dns N Nombre de hostnames SNI utilisés (1-4, déf. 4)"
echo " --tls VERS Versions TLS, séparées par virgules (déf. 1.2,1.3)"
echo " --src-ips N Nombre d'IPs sources par VM (déf. 1)"
echo " --keep-analysis Conserver la stack analysis (CH + dashboard) après le test"
echo " --up Démarrer stack analysis + endpoints, puis s'arrêter"
echo " -h, --help Afficher cette aide"
exit 0
}
while [[ $# -gt 0 ]]; do
case "$1" in
--hits) TRAFFIC_COUNT="$2"; shift 2 ;;
--http-ratio) HTTP_RATIO="$2"; shift 2 ;;
--dns) DNS_COUNT="$2"; shift 2 ;;
--tls) TLS_VERSIONS="$2"; shift 2 ;;
--src-ips) SRC_IP_COUNT="$2"; shift 2 ;;
--keep-analysis) KEEP_ANALYSIS="true"; shift ;;
--up) UP_ONLY="true"; shift ;;
-h|--help) usage ;;
*) echo "Option inconnue : $1"; usage ;;
esac
done
# ── Validation ──
if ! python3 -c "exit(0 if 0.0 <= ${HTTP_RATIO} <= 1.0 else 1)" 2>/dev/null; then
echo "ERREUR: --http-ratio doit être entre 0.0 et 1.0" >&2; exit 1
fi
if [ "$DNS_COUNT" -lt 1 ] || [ "$DNS_COUNT" -gt 4 ]; then
echo "ERREUR: --dns doit être entre 1 et 4" >&2; exit 1
fi
# ── Résolution des versions TLS pour curl ──
CURL_TLS_FLAGS=""
for v in $(echo "$TLS_VERSIONS" | tr ',' ' '); do
case "$v" in
1.0) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.0" ;;
1.1) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.1" ;;
1.2) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.2" ;;
1.3) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.3" ;;
*) echo "[e2e] WARN: Version TLS '$v' non reconnue, ignorée" ;;
esac
done
[ -z "$CURL_TLS_FLAGS" ] && CURL_TLS_FLAGS="--tlsv1.2 --tlsv1.3" && TLS_VERSIONS="1.2,1.3"
# Nombre de requêtes HTTP vs HTTPS dérivés du ratio
HTTPS_COUNT=$(python3 -c "print(int(${TRAFFIC_COUNT} * (1 - ${HTTP_RATIO})))")
HTTP_COUNT=$(python3 -c "print(int(${TRAFFIC_COUNT} * ${HTTP_RATIO}))")
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'
RESET='\033[0m'; BOLD='\033[1m'
PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0
log() { echo -e "${BOLD}[e2e]${RESET} $(date +%H:%M:%S) $*"; }
pass() { echo -e " ${GREEN}PASS${RESET} $*"; ((PASS_COUNT++)) || true; }
fail() { echo -e " ${RED}FAIL${RESET} $*"; ((FAIL_COUNT++)) || true; }
warn() { echo -e " ${YELLOW}WARN${RESET} $*"; ((WARN_COUNT++)) || true; }
# Requête ClickHouse via HTTP API sur la VM analysis
ch_query() {
curl -sf "http://${ANALYSIS_IP}:8123/" -d "$1" 2>/dev/null || echo ""
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 0 : Setup environnement
# ═════════════════════════════════════════════════════════════════════════════
phase0_setup() {
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Phase 0 : Setup environnement ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
cd "$VM_DIR"
# Vérifier que les VMs existent
for vm in $ALL_VMS; do
if ! vagrant status "$vm" 2>/dev/null | grep -q "running"; then
log "Démarrage de $vm..."
vagrant up "$vm" 2>&1 | tail -5
else
log "$vm déjà en cours d'exécution"
fi
done
# Rsync des fichiers vers toutes les VMs
log "Synchronisation des fichiers..."
for vm in $ALL_VMS; do
vagrant rsync "$vm" 2>&1 | tail -1
done
# Découvrir les IPs eth0 des endpoints et filtrer ceux sans ja4ebpf
declare -A VM_IPS
ACTIVE_ENDPOINTS=""
for vm in $ENDPOINT_VMS; do
# Vérifier que ja4ebpf est installé
if ! vagrant ssh "$vm" -- 'which ja4ebpf' 2>/dev/null | grep -q ja4ebpf; then
warn "$vm ignoré — ja4ebpf non installé"
continue
fi
VM_IPS[$vm]=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \
| awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}')
if [ -z "${VM_IPS[$vm]}" ]; then
fail "Impossible d'obtenir l'IP eth0 de $vm"
else
log " $vm : ${VM_IPS[$vm]}"
ACTIVE_ENDPOINTS="$ACTIVE_ENDPOINTS $vm"
fi
done
ENDPOINT_VMS=$(echo $ACTIVE_ENDPOINTS) # overwrite with active VMs only
# Exporter les IPs pour les phases suivantes
ENDPOINT_IPS_ARRAY=()
for vm in $ENDPOINT_VMS; do
ENDPOINT_IPS_ARRAY+=("${VM_IPS[$vm]}")
done
pass "Endpoints actifs : ${ENDPOINT_VMS}"
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 1 : Démarrage de la stack analysis
# ═════════════════════════════════════════════════════════════════════════════
phase1_analysis() {
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Phase 1 : Stack analysis (ClickHouse + ML + Dashboard) ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
cd "$VM_DIR"
# Purger les volumes pour garantir une DB vide avant le test
log "Purge des volumes Docker pour DB vierge..."
vagrant ssh analysis -- "cd /ja4-platform/tests/vm/analysis && docker compose down -v --remove-orphans" 2>&1 | tail -5
# Démarrer la stack analysis via docker compose
log "Démarrage de la stack analysis sur la VM..."
vagrant ssh analysis -- "cd /ja4-platform/tests/vm/analysis && docker compose up -d --build" 2>&1 | tail -20
# Attendre ClickHouse
log "Attente ClickHouse (max 120s)..."
for i in $(seq 1 60); do
if curl -sf "http://${ANALYSIS_IP}:8123/ping" >/dev/null 2>&1; then
pass "ClickHouse prêt (${i}*2s)"
break
fi
sleep 2
done
# Vérifier les bases
local db_count
db_count=$(ch_query "SELECT count() FROM system.databases WHERE name IN ('ja4_logs','ja4_processing')" | tr -d ' \n')
[ "$db_count" = "2" ] \
&& pass "Bases ja4_logs + ja4_processing créées" \
|| fail "Bases manquantes (obtenu: $db_count)"
# Vérifier que la DB est bien vide (nouvelle instance)
local raw_init
raw_init=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw" | tr -d ' \n')
[ "${raw_init:-0}" -eq 0 ] 2>/dev/null \
&& pass "ClickHouse vierge avant test (http_logs_raw=0)" \
|| warn "ClickHouse non vide (http_logs_raw=${raw_init}) — données résiduelles"
# Attendre bot-detector
log "Attente bot-detector (max 120s)..."
for i in $(seq 1 60); do
if curl -sf "http://${ANALYSIS_IP}:8080/" >/dev/null 2>&1; then
pass "bot-detector prêt (${i}*2s)"
break
fi
sleep 2
done
# Attendre dashboard
log "Attente dashboard (max 60s)..."
for i in $(seq 1 30); do
if curl -sf "http://${ANALYSIS_IP}:8000/health" >/dev/null 2>&1; then
pass "dashboard prêt (${i}*2s)"
break
fi
sleep 2
done
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 2 : Démarrage des endpoints
# ═════════════════════════════════════════════════════════════════════════════
phase2_endpoints() {
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Phase 2 : Endpoints (nginx + ja4ebpf → analysis CH) ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
cd "$VM_DIR"
# Arrêter les services existants sur les endpoints (session précédente)
log "Arrêt des services existants sur les endpoints..."
for vm in $ENDPOINT_VMS; do
vagrant ssh "$vm" -- "sudo nginx -s stop 2>/dev/null; sudo pkill ja4ebpf 2>/dev/null; sudo pkill httpd 2>/dev/null; sudo pkill hitch 2>/dev/null; sudo pkill varnishd 2>/dev/null" 2>/dev/null || true
done
sleep 2
# Démarrer chaque endpoint en arrière-plan
# ja4ebpf pointe vers le ClickHouse de la VM analysis (CH_HOST=192.168.42.10)
PIDS=()
for vm in $ENDPOINT_VMS; do
log "Démarrage $vm (nginx + ja4ebpf → ${ANALYSIS_IP})..."
vagrant ssh "$vm" -- "sudo rm -f /tmp/ja4ebpf-traffic-done" 2>/dev/null || true
vagrant ssh "$vm" -- "sudo CH_HOST=${ANALYSIS_IP} bash /ja4-platform/tests/vm/run-tests-vm.sh ${STACK} start" &
PIDS+=($!)
done
# Attendre que les services soient prêts
log "Attente démarrage des endpoints (30s)..."
sleep 30
# Vérifier la connectivité HTTP de chaque endpoint
for vm in $ENDPOINT_VMS; do
local vm_ip
vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \
| awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}')
if curl -sf "http://${vm_ip}/health" >/dev/null 2>&1; then
pass "$vm HTTP OK (${vm_ip}:80)"
else
warn "$vm HTTP injoignable (${vm_ip}:80)"
fi
if curl -sf -k "https://${vm_ip}/health" >/dev/null 2>&1; then
pass "$vm HTTPS OK (${vm_ip}:443)"
else
warn "$vm HTTPS injoignable (${vm_ip}:443)"
fi
done
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 3 : Génération de trafic
# ═════════════════════════════════════════════════════════════════════════════
phase3_traffic() {
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Phase 3 : Génération de trafic host → endpoints ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
local total_ok=0 total_err=0
# User-Agents variés pour diversifier les empreintes TLS/HTTP
local UA_BROWSER=( \
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36" \
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15" \
"Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0" \
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0" \
)
local UA_BOT=( \
"python-requests/2.32.3" \
"curl/8.9.1" \
"Go-http-client/2.0" \
"python-httpx/0.28.1" \
"Googlebot/2.1" \
)
# Paths diversifiés pour des fingerprints différents
local PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \
"/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \
"/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin")
# Hostnames pour SNI (cert CN=platform.test, nginx accepte tout via server_name _)
local SNI_HOSTS_ALL=("platform.test" "api.platform.test" "www.platform.test" "app.platform.test")
local SNI_HOSTS=("${SNI_HOSTS_ALL[@]:0:${DNS_COUNT}}")
# ── Collecter les IPs de tous les endpoints ──
local -A VM_IPS_MAP
for vm in $ENDPOINT_VMS; do
VM_IPS_MAP[$vm]=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \
| awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}')
done
# ── Ajouter des IPs alias sur chaque VM pour diversifier les sources ──
if [ "$SRC_IP_COUNT" -gt 1 ]; then
log "Ajout de ${SRC_IP_COUNT} IPs sources sur chaque VM..."
for vm in $ENDPOINT_VMS; do
local base_ip="${VM_IPS_MAP[$vm]}"
local net_prefix
net_prefix=$(echo "$base_ip" | awk -F. '{print $1"."$2"."$3}')
local base_last
base_last=$(echo "$base_ip" | awk -F. '{print $4}')
for i in $(seq 1 $((SRC_IP_COUNT - 1))); do
local alias_last=$((base_last + 100 + i))
[ "$alias_last" -gt 254 ] && alias_last=$((10 + i))
local alias_ip="${net_prefix}.${alias_last}"
vagrant ssh "$vm" -- "sudo ip addr add ${alias_ip}/24 dev eth0 2>/dev/null || true" 2>/dev/null || true
done
done
fi
# ── Trafic HTTPS généré depuis les VMs (IPs sources distinctes) ──
for src_vm in $ENDPOINT_VMS; do
local src_ip="${VM_IPS_MAP[$src_vm]}"
log "Génération depuis $src_vm ($src_ip) : ${HTTPS_COUNT} requêtes HTTPS (${SRC_IP_COUNT} IPs src)..."
# Construire la liste des IPs cibles (toutes les endpoints)
local TARGET_IPS=""
for target_vm in $ENDPOINT_VMS; do
TARGET_IPS="$TARGET_IPS ${VM_IPS_MAP[$target_vm]}"
done
# Script de génération exécuté sur la VM source
vagrant ssh "$src_vm" -- "bash -s" <<REMOTE_SCRIPT &
#!/bin/bash
set -uo pipefail
HITS=${HTTPS_COUNT}
TARGET_IPS=(${TARGET_IPS})
SNI_HOSTS=(${SNI_HOSTS[@]})
TLS_FLAGS="${CURL_TLS_FLAGS}"
DNS_COUNT=${DNS_COUNT}
SRC_IP_COUNT=${SRC_IP_COUNT}
# Collecter les IPs sources disponibles sur eth0
SRC_IPS=(\$(ip -4 addr show eth0 2>/dev/null | awk '/inet / {sub(/\/.*/, "", \$2); print \$2}'))
UA_BROWSER=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15"
"Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0"
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
)
UA_BOT=(
"python-requests/2.32.3"
"curl/8.9.1"
"Go-http-client/2.0"
"python-httpx/0.28.1"
"Googlebot/2.1"
)
PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \
"/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \
"/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin")
ok=0
for i in \$(seq 1 \$HITS); do
idx=\$((i - 1))
target_ip="\${TARGET_IPS[\$((idx % \${#TARGET_IPS[@]}))]}"
sni_host="\${SNI_HOSTS[\$((idx % \${#SNI_HOSTS[@]}))]}"
path="\${PATHS[\$((idx % \${#PATHS[@]}))]}"
case \$((i % 10)) in
0|1|2|3|4) method="GET" ;;
5|6) method="POST" ;;
7) method="PUT" ;;
8) method="DELETE" ;;
9) method="HEAD" ;;
esac
if [ \$((i % 10)) -lt 7 ]; then
ua="\${UA_BROWSER[\$((idx % \${#UA_BROWSER[@]}))]}"
else
ua="\${UA_BOT[\$((idx % \${#UA_BOT[@]}))]}"
fi
extra_flags="--resolve \${sni_host}:443:\${target_ip} \$TLS_FLAGS"
# Alterner entre les IPs sources disponibles
if [ \${#SRC_IPS[@]} -gt 1 ]; then
src_ip="\${SRC_IPS[\$((idx % \${#SRC_IPS[@]}))]}"
extra_flags="\$extra_flags --interface \$src_ip"
fi
case \$method in
POST)
curl -sf -k \$extra_flags -X POST "https://\${sni_host}\${path}" \
-H "User-Agent: \${ua}" -H "Content-Type: application/json" \
-d '{"test":1,"seq":'\$i'}' >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
PUT)
curl -sf -k \$extra_flags -X PUT "https://\${sni_host}\${path}" \
-H "User-Agent: \${ua}" -d '{}' >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
DELETE)
curl -sf -k \$extra_flags -X DELETE "https://\${sni_host}\${path}" \
-H "User-Agent: \${ua}" >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
HEAD)
curl -sf -k \$extra_flags -I "https://\${sni_host}\${path}" \
-H "User-Agent: \${ua}" >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
*)
curl -sf -k \$extra_flags -X "\$method" "https://\${sni_host}\${path}" \
-H "User-Agent: \${ua}" >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
esac
done
echo "\$ok/\$HITS"
REMOTE_SCRIPT
done
# Collecter les résultats des processus en arrière-plan
for src_vm in $ENDPOINT_VMS; do
local result
result=$(wait 2>/dev/null || echo "?/?")
log " $src_vm HTTPS : $result requêtes réussies"
local ok_count
ok_count=$(echo "$result" | cut -d/ -f1)
total_ok=$((total_ok + ok_count))
done
# HTTP/2 massif depuis les VMs si httpx est disponible
for src_vm in $ENDPOINT_VMS; do
if vagrant ssh "$src_vm" -- 'python3 -c "import httpx"' 2>/dev/null; then
local src_ip="${VM_IPS_MAP[$src_vm]}"
log "Génération HTTP/2 depuis $src_vm (${HTTPS_COUNT} requêtes, TLS=${TLS_VERSIONS}, DNS=${DNS_COUNT})..."
local TARGET_IPS_H2=""
for target_vm in $ENDPOINT_VMS; do
TARGET_IPS_H2="$TARGET_IPS_H2 ${VM_IPS_MAP[$target_vm]}"
done
vagrant ssh "$src_vm" -- "python3 -c \"
import httpx, ssl as _ssl, warnings, random
warnings.filterwarnings('ignore')
paths = ['/', '/health', '/data', '/api/users', '/api/v1/status', '/login', '/api/search']
sni_hosts = ${SNI_HOSTS[@]@Q}
target_ips = '${TARGET_IPS_H2}'.split()
uas_browser = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15',
'Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0',
]
uas_bot = ['python-httpx/0.28.1', 'Googlebot/2.1', 'Go-http-client/2.0']
tls_versions = [v.strip() for v in '${TLS_VERSIONS}'.split(',')]
supported = {'1.2': _ssl.TLSVersion.TLSv1_2, '1.3': _ssl.TLSVersion.TLSv1_3}
tls_map = [supported[v] for v in tls_versions if v in supported]
ctx = _ssl.SSLContext(_ssl.PROTOCOL_TLS_CLIENT)
ctx.check_hostname = False
ctx.verify_mode = _ssl.CERT_NONE
if tls_map:
ctx.minimum_version = min(tls_map)
ctx.maximum_version = max(tls_map)
with httpx.Client(http2=True, verify=ctx) as c:
for i in range(${HTTPS_COUNT}):
p = random.choice(paths)
target = random.choice(target_ips)
h = random.choice(sni_hosts)
ua = random.choice(uas_browser if random.random() < 0.7 else uas_bot)
try:
c.get(f'https://{target}' + p, headers={'User-Agent': ua, 'Host': h})
except:
pass
\"" 2>/dev/null || true
fi
done
pass "HTTP/2 généré depuis tous les endpoints"
# Trafic HTTP (port 80) en plus pour diversifier
if [ "${HTTP_COUNT}" -gt 0 ]; then
log "Génération HTTP (port 80) depuis les VMs : ${HTTP_COUNT} requêtes/VM..."
for src_vm in $ENDPOINT_VMS; do
local ok80
ok80=$(vagrant ssh "$src_vm" -- "
ok=0
for i in \$(seq 1 ${HTTP_COUNT}); do
curl -sf http://localhost/health >/dev/null 2>&1 && ok=\$((ok + 1)) || true
done
echo \$ok
" 2>/dev/null || echo "0")
log " $src_vm HTTP : ${ok80}/${HTTP_COUNT} requêtes"
total_ok=$((total_ok + ok80))
done
else
log "HTTP (port 80) désactivé (http-ratio=0)"
fi
pass "Trafic total : ${total_ok} requêtes réussies"
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 4 : Attente du pipeline
# ═════════════════════════════════════════════════════════════════════════════
phase4_wait() {
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Phase 4 : Attente du pipeline ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
# Flush ja4ebpf
log "Attente flush ja4ebpf (15s)..."
sleep 15
# Signaler aux endpoints que le trafic est terminé
cd "$VM_DIR"
for vm in $ENDPOINT_VMS; do
vagrant ssh "$vm" -- 'sudo touch /tmp/ja4ebpf-traffic-done' 2>/dev/null || true
done
# Attendre que les données brutes arrivent dans ClickHouse
log "Attente données dans ClickHouse (max 60s)..."
for i in $(seq 1 30); do
local raw_count
raw_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw" 2>/dev/null | tr -d ' \n')
if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then
pass "Données brutes reçues : ${raw_count} lignes (${i}*2s)"
break
fi
sleep 2
done
# Attendre au moins un cycle bot-detector
log "Attente cycle bot-detector (max 120s)..."
for i in $(seq 1 60); do
local scores
scores=$(ch_query "SELECT count() FROM ja4_processing.ml_all_scores" 2>/dev/null | tr -d ' \n')
if [ "${scores:-0}" -gt 0 ] 2>/dev/null; then
pass "bot-detector a complété au moins 1 cycle ($scores scores)"
break
fi
sleep 2
done
# Attendre les processus endpoint en arrière-plan
log "Attente fin des processus endpoint..."
for pid in "${PIDS[@]:-}"; do
wait "$pid" 2>/dev/null || true
done
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 5 : Vérification complète
# ═════════════════════════════════════════════════════════════════════════════
phase5_verify() {
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Phase 5 : Vérification du pipeline complet ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
# ── Layer 1 : Données brutes ─────────────────────────────────────────────
log "── Layer 1 : Données brutes ──"
local raw_count
raw_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw" | tr -d ' \n')
if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then
pass "http_logs_raw : ${raw_count} lignes"
else
fail "http_logs_raw vide"
fi
# Multi-source : données de plusieurs endpoints
local host_count
host_count=$(ch_query "SELECT uniqExact(host) FROM ja4_logs.http_logs" | tr -d ' \n')
if [ "${host_count:-0}" -ge 2 ] 2>/dev/null; then
pass "Multi-source : ${host_count} hôtes distincts"
else
warn "Multi-source : ${host_count:-0} hôte(s) distinct(s)"
fi
# ── Layer 2 : Pipeline ClickHouse ────────────────────────────────────────
log "── Layer 2 : Pipeline ClickHouse (MVs) ──"
local ja4_count
ja4_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja4 != ''" | tr -d ' \n')
[ "${ja4_count:-0}" -gt 0 ] 2>/dev/null \
&& pass "JA4 fingerprints : ${ja4_count}" \
|| fail "JA4 fingerprints absents"
local method_count
method_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE method != ''" | tr -d ' \n')
[ "${method_count:-0}" -gt 0 ] 2>/dev/null \
&& pass "L7 HTTP : ${method_count} requêtes capturées" \
|| fail "L7 HTTP absent"
local agg_count
agg_count=$(ch_query "SELECT count() FROM ja4_processing.agg_host_ip_ja4_1h" | tr -d ' \n')
[ "${agg_count:-0}" -gt 0 ] 2>/dev/null \
&& pass "Agrégation agg_host_ip_ja4_1h : ${agg_count} entrées" \
|| warn "Agrégation agg_host_ip_ja4_1h vide (volume insuffisant pour la fenêtre horaire)"
local feat_count
feat_count=$(ch_query "SELECT count() FROM ja4_processing.view_ai_features_1h" | tr -d ' \n')
[ "${feat_count:-0}" -gt 0 ] 2>/dev/null \
&& pass "AI features : ${feat_count} lignes" \
|| warn "AI features vides (agrégation horaire pas encore calculée)"
# ── Layer 3 : ML bot-detector ────────────────────────────────────────────
log "── Layer 3 : ML bot-detector ──"
local scores_count
scores_count=$(ch_query "SELECT count() FROM ja4_processing.ml_all_scores" | tr -d ' \n')
[ "${scores_count:-0}" -gt 0 ] 2>/dev/null \
&& pass "ml_all_scores : ${scores_count} classifications" \
|| fail "ml_all_scores vide — bot-detector n'a pas produit de résultats"
local anomaly_count
anomaly_count=$(ch_query "SELECT count() FROM ja4_processing.ml_detected_anomalies" | tr -d ' \n')
if [ "${anomaly_count:-0}" -gt 0 ] 2>/dev/null; then
pass "ml_detected_anomalies : ${anomaly_count} anomalies détectées"
else
warn "ml_detected_anomalies vide (pas d'anomalies dans le trafic de test)"
fi
# bot-detector health
if curl -sf "http://${ANALYSIS_IP}:8080/" >/dev/null 2>&1; then
pass "bot-detector health OK"
else
fail "bot-detector health KO"
fi
# ── Layer 4 : Dashboard ──────────────────────────────────────────────────
log "── Layer 4 : Dashboard ──"
if curl -sf "http://${ANALYSIS_IP}:8000/health" >/dev/null 2>&1; then
pass "dashboard /health OK"
else
fail "dashboard /health KO"
fi
local overview
overview=$(curl -sf "http://${ANALYSIS_IP}:8000/api/overview" 2>/dev/null || echo "")
if [ -n "$overview" ] && [ "$overview" != "{}" ] && [ "$overview" != "null" ]; then
pass "dashboard /api/overview retourne des données"
else
warn "dashboard /api/overview vide"
fi
local detections
detections=$(curl -sf "http://${ANALYSIS_IP}:8000/api/detections" 2>/dev/null || echo "")
if [ -n "$detections" ]; then
pass "dashboard /api/detections accessible"
else
warn "dashboard /api/detections pas de réponse"
fi
}
# ═════════════════════════════════════════════════════════════════════════════
# Résumé
# ═════════════════════════════════════════════════════════════════════════════
phase_summary() {
local total=$((PASS_COUNT + FAIL_COUNT + WARN_COUNT))
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ RÉSULTATS E2E ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo -e " ${GREEN}Réussis : $PASS_COUNT${RESET} / $total"
[ "$WARN_COUNT" -gt 0 ] && echo -e " ${YELLOW}Avertissements : $WARN_COUNT${RESET} / $total"
[ "$FAIL_COUNT" -gt 0 ] && echo -e " ${RED}Échoués : $FAIL_COUNT${RESET} / $total"
echo ""
if [ "$FAIL_COUNT" -eq 0 ]; then
echo -e " ${GREEN}${BOLD}Test E2E distribué : SUCCÈS${RESET}"
else
echo -e " ${RED}${BOLD}Test E2E distribué : $FAIL_COUNT ÉCHECS${RESET}"
fi
echo ""
}
# ═════════════════════════════════════════════════════════════════════════════
# Nettoyage
# ═════════════════════════════════════════════════════════════════════════════
cleanup() {
# Supprimer les IPs alias sur les VMs
if [ "${SRC_IP_COUNT:-1}" -gt 1 ]; then
log "Suppression des IPs alias sur les VMs..."
for vm in $ENDPOINT_VMS; do
vagrant ssh "$vm" -- "
base_ip=\$(ip -4 addr show eth0 | awk '/inet / {sub(/\/.*/, \"\", \\\$2); print \\\$2; exit}')
net_prefix=\$(echo \$base_ip | awk -F. '{print \\\$1\".\"\\\$2\".\"\\\$3}')
base_last=\$(echo \$base_ip | awk -F. '{print \\\$4}')
for i in \$(seq 1 $((SRC_IP_COUNT - 1))); do
alias_last=\$((base_last + 100 + i))
[ \$alias_last -gt 254 ] && alias_last=\$((10 + i))
sudo ip addr del \${net_prefix}.\${alias_last}/24 dev eth0 2>/dev/null || true
done
" 2>/dev/null || true
done
fi
# Toujours arrêter les endpoints (sauf --keep global)
if [ "${KEEP_RUNNING:-false}" != "true" ]; then
log "Nettoyage des endpoints..."
cd "$VM_DIR"
for vm in $ENDPOINT_VMS; do
vagrant ssh "$vm" -- "sudo pkill ja4ebpf 2>/dev/null; sudo nginx -s stop 2>/dev/null; sudo pkill httpd 2>/dev/null" 2>/dev/null || true
done
fi
# Stack analysis : conserver si --keep-analysis ou --keep, sinon arrêter
if [ "${KEEP_RUNNING:-false}" = "true" ] || [ "$KEEP_ANALYSIS" = "true" ]; then
log "Stack analysis conservée :"
log " ClickHouse : http://${ANALYSIS_IP}:8123/play"
log " Dashboard : http://${ANALYSIS_IP}:8000"
log " Bot-detector : http://${ANALYSIS_IP}:8080"
else
log "Arrêt de la stack analysis..."
vagrant ssh analysis -- "cd /ja4-platform/tests/vm/analysis && docker compose down -v" 2>/dev/null || true
fi
if [ "${KEEP_RUNNING:-false}" = "true" ]; then
log "VMs endpoints conservées :"
for vm in $ENDPOINT_VMS; do
local vm_ip
vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \
| awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}')
log " $vm : ${vm_ip}"
done
fi
}
# ═════════════════════════════════════════════════════════════════════════════
# Main
# ═════════════════════════════════════════════════════════════════════════════
KEEP_RUNNING="${KEEP_RUNNING:-false}"
trap cleanup EXIT
phase0_setup
phase1_analysis
phase2_endpoints
if [ "$UP_ONLY" = "true" ]; then
echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Stack prête — mode --up (pas de trafic ni vérification) ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
log "ClickHouse : http://${ANALYSIS_IP}:8123/play"
log "Dashboard : http://${ANALYSIS_IP}:8000"
log "Bot-detector : http://${ANALYSIS_IP}:8080"
for vm in $ENDPOINT_VMS; do
local vm_ip
vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \
| awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}')
log "$vm HTTP : http://${vm_ip}:80"
log "$vm HTTPS : https://${vm_ip}:443"
done
log ""
log "Pour générer du trafic :"
log " ./run-e2e-test.sh --hits 200 --dns 3 --tls 1.2,1.3 --keep-analysis"
log " curl -sk https://<endpoint_ip>/health"
KEEP_ANALYSIS=true
exit 0
fi
phase3_traffic
phase4_wait
phase5_verify
phase_summary
[ "$FAIL_COUNT" -eq 0 ] && exit 0 || exit 1

View File

@ -68,6 +68,17 @@ get_eth0_ip() {
# ── ClickHouse ────────────────────────────────────────────────────────────────
start_clickhouse() {
# Si un ClickHouse externe est configuré, ne pas démarrer le conteneur local
if [ -n "${CH_HOST:-}" ] && [ "$CH_HOST" != "127.0.0.1" ] && [ "$CH_HOST" != "localhost" ]; then
log "ClickHouse externe ($CH_HOST) — démarrage local ignoré"
# Vérifier que le ClickHouse distant est accessible
for i in $(seq 1 30); do
curl -sf "http://${CH_HOST}:8123/ping" >/dev/null 2>&1 && { pass "ClickHouse distant prêt"; return 0; }
sleep 2
done
fail "ClickHouse distant ($CH_HOST) inaccessible"; return 1
fi
log "Démarrage ClickHouse..."
docker rm -f ja4-clickhouse 2>/dev/null || true
@ -104,11 +115,12 @@ start_ja4ebpf() {
done
[ -z "$ssl_lib" ] && ssl_lib="/usr/lib64/libssl.so.3"
local ch_addr="${CH_HOST:-127.0.0.1}"
cat > /tmp/ja4ebpf.yml << EOF
interface: eth0
ssl_lib_path: "${ssl_lib}"
clickhouse:
dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs"
dsn: "clickhouse://default:@${ch_addr}:9000/ja4_logs"
batch_size: 100
flush_secs: 1
correlation:
@ -287,21 +299,47 @@ verify_db() {
log "Vérification des données dans ClickHouse..."
ch_val() {
curl -sf "http://localhost:8123/?database=ja4_logs" \
local ch_http_host="${CH_HOST:-localhost}"
curl -sf "http://${ch_http_host}:8123/?database=ja4_logs" \
--data-urlencode "query=$1" 2>/dev/null | tr -d ' \n' || echo "0"
}
# http_logs_raw (données brutes avant MV)
local raw_count
raw_count=$(ch_val "SELECT count() FROM http_logs_raw")
if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then
pass "http_logs_raw : $raw_count lignes"
else
# Attendre que http_logs_raw contienne des données (max 30s)
local raw_ok=false
log " Attente données brutes dans ClickHouse..."
for i in $(seq 1 15); do
local raw_count
raw_count=$(ch_val "SELECT count() FROM http_logs_raw")
if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then
pass "http_logs_raw : $raw_count lignes (${i}*2s)"
raw_ok=true
break
fi
sleep 2
done
if [ "$raw_ok" = "false" ]; then
fail "http_logs_raw vide — ja4ebpf n'a rien capturé"
log " Logs ja4ebpf :"
tail -10 /tmp/ja4ebpf.log 2>/dev/null | sed 's/^/ /'
fi
# Attendre que la MV http_logs se remplisse (max 30s)
local logs_ok=false
log " Attente MV http_logs..."
for i in $(seq 1 15); do
local logs_count
logs_count=$(ch_val "SELECT count() FROM http_logs")
if [ "${logs_count:-0}" -gt 0 ] 2>/dev/null; then
logs_ok=true
break
fi
sleep 2
done
if [ "$logs_ok" = "false" ]; then
warn "MV http_logs vide après 30s — vérification partielle uniquement"
fi
# L3/L4
ttl=$(ch_val "SELECT count() FROM http_logs WHERE ip_meta_ttl > 0")
[ "${ttl:-0}" -gt 0 ] 2>/dev/null && pass "L3/L4 TTL ($ttl)" || fail "L3/L4 TTL absent"
@ -343,7 +381,10 @@ stop_stack() {
apache) stop_apache ;;
hitch-varnish) stop_hitch_varnish ;;
esac
docker rm -f ja4-clickhouse 2>/dev/null || true
# Ne pas supprimer le ClickHouse s'il est externe (VM analysis)
if [ -z "${CH_HOST:-}" ] || [ "$CH_HOST" = "127.0.0.1" ] || [ "$CH_HOST" = "localhost" ]; then
docker rm -f ja4-clickhouse 2>/dev/null || true
fi
}
cleanup() {
@ -371,7 +412,10 @@ do_start() {
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3
GOWORK=off CGO_ENABLED=0 go build -o /tmp/ja4ebpf_new ./cmd/ja4ebpf/ && mv /tmp/ja4ebpf_new /usr/local/bin/ja4ebpf
}
command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; }
# Docker n'est nécessaire que pour un ClickHouse local
if [ -z "${CH_HOST:-}" ] || [ "$CH_HOST" = "127.0.0.1" ] || [ "$CH_HOST" = "localhost" ]; then
command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; }
fi
start_clickhouse
@ -434,7 +478,21 @@ case "$MODE" in
[ -f /tmp/ja4ebpf-traffic-done ] && break
sleep 1
done
do_verify
# En mode ClickHouse externe (E2E distribué), la vérification est faite
# par le script orchestrateur (run-e2e-test.sh Phase 5). On saute la
# vérification locale car les MV peuvent ne pas encore être peuplées.
if [ -n "${CH_HOST:-}" ] && [ "$CH_HOST" != "127.0.0.1" ] && [ "$CH_HOST" != "localhost" ]; then
log "ClickHouse externe — vérification locale ignorée (gérée par l'orchestrateur)"
log "Logs ja4ebpf :"
tail -5 /tmp/ja4ebpf.log 2>/dev/null | sed 's/^/ /'
pass "ja4ebpf actif (ClickHouse externe)"
else
# Laisser le temps au pipeline ClickHouse de traiter les données brutes
# (http_logs_raw → MV http_logs) avant de vérifier
log "Attente pipeline ClickHouse (20s)..."
sleep 20
do_verify
fi
;;
verify)
do_verify