Files
ja4-platform/tests/vm/run-tests-vm.sh
Jacquin Antoine f88b739992 feat(e2e): add distributed E2E test framework with parametric traffic generation
Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls,
--src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic
runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce
distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead
of --tls-v1-2), skip redundant local verification in distributed mode, and
fix dashboard is_available() cache that never retried after ClickHouse recovery.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-15 00:09:32 +02:00

516 lines
21 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# run-tests-vm.sh — Tests ja4ebpf multi-stack dans une VM Vagrant
#
# Architecture :
# Phase 1 (dans la VM) : démarrer ClickHouse, serveur web, ja4ebpf
# Phase 2 (depuis le host) : générer du trafic vers l'IP eth0 de la VM
# Phase 3 (dans la VM) : vérifier les données dans ClickHouse
#
# Stacks supportées :
# nginx — nginx avec TLS (HTTP/1.1 + HTTP/2)
# apache — Apache httpd avec TLS (HTTP/1.1 + HTTP/2)
# hitch-varnish — hitch (TLS) → Varnish (cache/H2) → backend Python
# all — exécute les 3 stacks séquentiellement
#
# Modes :
# start — démarrer les services (Phase 1)
# verify — vérifier les données (Phase 3)
# (défaut) — start + verify (le trafic doit être généré entre les deux)
#
# Usage (depuis le host via Makefile) :
# make test-vm-nginx
# make test-vm-apache
# make test-vm-hitch-varnish
# make test-vm-matrix
# =============================================================================
set -euo pipefail
export PATH="/usr/local/bin:/usr/local/go/bin:$PATH"
STACK="${1:-nginx}"
MODE="${2:-full}" # start | verify | full
KEEP_RUNNING="${KEEP_RUNNING:-false}"
PROJECT="/ja4-platform"
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m'
BOLD='\033[1m'
log() { echo -e "${BOLD}[$STACK]${RESET} $(date +%H:%M:%S) $*"; }
pass() { echo -e " ${GREEN}PASS${RESET} $*"; ((PASS_COUNT++)) || true; }
fail() { echo -e " ${RED}FAIL${RESET} $*"; ((FAIL_COUNT++)) || true; }
warn() { echo -e " ${YELLOW}WARN${RESET} $*"; ((WARN_COUNT++)) || true; }
PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0
# ── Helpers communs ──────────────────────────────────────────────────────────
gen_tls_cert() {
local name="$1"
openssl req -x509 -nodes -days 365 -subj "/CN=platform.test" \
-newkey rsa:2048 \
-keyout "/etc/pki/tls/private/${name}.key" \
-out "/etc/pki/tls/certs/${name}.crt" 2>/dev/null
}
setup_docroot() {
mkdir -p /var/www/html
echo '{"status":"ok","stack":"'"$STACK"'"}' > /var/www/html/health
for p in data api/users api/data/test; do
mkdir -p "/var/www/html/$(dirname $p)"
echo '{"ok":true}' > "/var/www/html/$p"
done
}
get_eth0_ip() {
ip -4 addr show eth0 | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}' 2>/dev/null || echo ""
}
# ── ClickHouse ────────────────────────────────────────────────────────────────
start_clickhouse() {
# Si un ClickHouse externe est configuré, ne pas démarrer le conteneur local
if [ -n "${CH_HOST:-}" ] && [ "$CH_HOST" != "127.0.0.1" ] && [ "$CH_HOST" != "localhost" ]; then
log "ClickHouse externe ($CH_HOST) — démarrage local ignoré"
# Vérifier que le ClickHouse distant est accessible
for i in $(seq 1 30); do
curl -sf "http://${CH_HOST}:8123/ping" >/dev/null 2>&1 && { pass "ClickHouse distant prêt"; return 0; }
sleep 2
done
fail "ClickHouse distant ($CH_HOST) inaccessible"; return 1
fi
log "Démarrage ClickHouse..."
docker rm -f ja4-clickhouse 2>/dev/null || true
CSV_DIR="$PROJECT/tests/integration/platform/csv-stubs"
docker run -d --name ja4-clickhouse \
-p 8123:8123 -p 9000:9000 \
-e CLICKHOUSE_DB=ja4_processing \
-e CLICKHOUSE_USER=default \
-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \
-v "$PROJECT/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh" \
-v "$CSV_DIR:/var/lib/clickhouse/user_files" \
$(for f in "$PROJECT/shared/clickhouse/"*.sql; do
echo "-v $f:/initdb-src/$(basename $f):ro"
done) \
clickhouse/clickhouse-server:24.8 2>&1 | tail -1
log "Attente ClickHouse (max 120s)..."
for i in $(seq 1 60); do
curl -sf "http://localhost:8123/ping" >/dev/null 2>&1 && { pass "ClickHouse prêt"; return 0; }
sleep 2
done
fail "ClickHouse timeout"; exit 1
}
# ── ja4ebpf ────────────────────────────────────────────────────────────────────
start_ja4ebpf() {
log "Démarrage ja4ebpf..."
pkill ja4ebpf 2>/dev/null || true
sleep 1
local ssl_lib=""
for lib in /usr/lib64/libssl.so.3 /usr/lib64/libssl.so.1.1 /usr/lib/libssl.so.3 /usr/lib/libssl.so.1.1; do
[ -f "$lib" ] && { ssl_lib="$lib"; break; }
done
[ -z "$ssl_lib" ] && ssl_lib="/usr/lib64/libssl.so.3"
local ch_addr="${CH_HOST:-127.0.0.1}"
cat > /tmp/ja4ebpf.yml << EOF
interface: eth0
ssl_lib_path: "${ssl_lib}"
clickhouse:
dsn: "clickhouse://default:@${ch_addr}:9000/ja4_logs"
batch_size: 100
flush_secs: 1
correlation:
timeout_ms: 500
slowloris_ms: 10000
log:
level: "info"
format: "json"
EOF
JA4EBPF_CONFIG=/tmp/ja4ebpf.yml ja4ebpf > /tmp/ja4ebpf.log 2>&1 &
JA4EBPF_PID=$!
sleep 3
if ! kill -0 "$JA4EBPF_PID" 2>/dev/null; then
fail "ja4ebpf s'est arrêté immédiatement"
tail -10 /tmp/ja4ebpf.log
return 1
fi
log "ja4ebpf démarré (PID $JA4EBPF_PID)"
# Vérifier XDP
if ip link show dev eth0 2>/dev/null | grep -q "xdp"; then
local xdp_info
xdp_info=$(ip link show dev eth0 | grep "prog/xdp" | sed 's/^[[:space:]]*//')
pass "XDP attaché : $xdp_info"
else
warn "Aucun XDP sur eth0"
bpftool prog show name capture_xdp 2>/dev/null || true
fi
}
# ═════════════════════════════════════════════════════════════════════════════
# Stack : nginx
# ═════════════════════════════════════════════════════════════════════════════
setup_nginx() {
log "Configuration nginx avec TLS..."
gen_tls_cert nginx
setup_docroot
cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf
mkdir -p /run/nginx
nginx -t && nginx
for i in $(seq 1 20); do
curl -sf http://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "nginx démarré"
}
stop_nginx() { nginx -s stop 2>/dev/null || true; }
# ═════════════════════════════════════════════════════════════════════════════
# Stack : apache
# ═════════════════════════════════════════════════════════════════════════════
setup_apache() {
log "Configuration Apache httpd avec TLS..."
gen_tls_cert apache
setup_docroot
if command -v httpd >/dev/null 2>&1; then
if ! httpd -M 2>/dev/null | grep -q http2_module; then
echo "LoadModule http2_module modules/mod_http2.so" \
>> /etc/httpd/conf.modules.d/00-base.conf 2>/dev/null || true
fi
fi
mkdir -p /run/httpd /var/log/httpd
cp "$PROJECT/tests/integration/apache/platform/httpd-ssl.conf" \
/etc/httpd/conf.d/ssl.conf 2>/dev/null || true
httpd -t 2>&1 && httpd -DFOREGROUND &
sleep 2
for i in $(seq 1 20); do
curl -sf http://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "Apache httpd démarré"
}
stop_apache() { pkill httpd 2>/dev/null || true; }
# ═════════════════════════════════════════════════════════════════════════════
# Stack : hitch + varnish
# ═════════════════════════════════════════════════════════════════════════════
setup_hitch_varnish() {
log "Configuration hitch + Varnish..."
gen_tls_cert hitch
mkdir -p /etc/hitch
cat /etc/pki/tls/private/hitch.key /etc/pki/tls/certs/hitch.crt \
> /etc/hitch/hitch.pem
cat > /etc/hitch/hitch.conf << 'HCONF'
frontend = "[*]:443"
backend = "[127.0.0.1]:6081"
pem-file = "/etc/hitch/hitch.pem"
write-proxy-v1 = on
tls-protos = TLSv1.2 TLSv1.3
ciphers = "ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256"
alpn-protos = "h2,http/1.1"
workers = 2
user = "nobody"
daemon = off
log-level = 1
syslog = off
HCONF
mkdir -p /etc/varnish
cp "$PROJECT/tests/integration/hitch-varnish/platform/varnish.vcl" \
/etc/varnish/default.vcl 2>/dev/null || {
cat > /etc/varnish/default.vcl << 'VCL'
vcl 4.1;
backend default { .host = "127.0.0.1"; .port = "8080"; }
sub vcl_deliver {
set resp.http.Via = "1.1 varnish";
set resp.http.X-Client-IP = client.ip;
}
VCL
}
setup_docroot
# Backend HTTP (port 8080)
python3 -c "
import http.server, socketserver, json
class H(http.server.BaseHTTPRequestHandler):
def log_message(self, *a): pass
def do_GET(self):
body = json.dumps({'status':'ok','stack':'hitch-varnish','path':self.path}).encode()
self.send_response(200)
self.send_header('Content-Type','application/json')
self.send_header('Content-Length',len(body))
self.end_headers()
self.wfile.write(body)
def do_POST(self):
n = int(self.headers.get('Content-Length',0))
self.rfile.read(n)
body = b'{\"result\":\"accepted\"}'
self.send_response(200)
self.send_header('Content-Type','application/json')
self.send_header('Content-Length',len(body))
self.end_headers()
self.wfile.write(body)
with socketserver.TCPServer(('127.0.0.1', 8080), H) as s:
s.serve_forever()
" &
sleep 1
varnishd -F -f /etc/varnish/default.vcl \
-a "127.0.0.1:6081,PROXY" \
-p feature=+http2 \
-s malloc,64m \
-T 127.0.0.1:6082 &
sleep 2
hitch --config=/etc/hitch/hitch.conf &
sleep 2
for i in $(seq 1 20); do
curl -skf https://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "hitch + Varnish démarrés"
}
stop_hitch_varnish() {
pkill hitch 2>/dev/null || true
pkill varnishd 2>/dev/null || true
pkill -f "TCPServer.*8080" 2>/dev/null || true
}
# ═════════════════════════════════════════════════════════════════════════════
# Vérification ClickHouse
# ═════════════════════════════════════════════════════════════════════════════
verify_db() {
log "Vérification des données dans ClickHouse..."
ch_val() {
local ch_http_host="${CH_HOST:-localhost}"
curl -sf "http://${ch_http_host}:8123/?database=ja4_logs" \
--data-urlencode "query=$1" 2>/dev/null | tr -d ' \n' || echo "0"
}
# Attendre que http_logs_raw contienne des données (max 30s)
local raw_ok=false
log " Attente données brutes dans ClickHouse..."
for i in $(seq 1 15); do
local raw_count
raw_count=$(ch_val "SELECT count() FROM http_logs_raw")
if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then
pass "http_logs_raw : $raw_count lignes (${i}*2s)"
raw_ok=true
break
fi
sleep 2
done
if [ "$raw_ok" = "false" ]; then
fail "http_logs_raw vide — ja4ebpf n'a rien capturé"
log " Logs ja4ebpf :"
tail -10 /tmp/ja4ebpf.log 2>/dev/null | sed 's/^/ /'
fi
# Attendre que la MV http_logs se remplisse (max 30s)
local logs_ok=false
log " Attente MV http_logs..."
for i in $(seq 1 15); do
local logs_count
logs_count=$(ch_val "SELECT count() FROM http_logs")
if [ "${logs_count:-0}" -gt 0 ] 2>/dev/null; then
logs_ok=true
break
fi
sleep 2
done
if [ "$logs_ok" = "false" ]; then
warn "MV http_logs vide après 30s — vérification partielle uniquement"
fi
# L3/L4
ttl=$(ch_val "SELECT count() FROM http_logs WHERE ip_meta_ttl > 0")
[ "${ttl:-0}" -gt 0 ] 2>/dev/null && pass "L3/L4 TTL ($ttl)" || fail "L3/L4 TTL absent"
mss=$(ch_val "SELECT count() FROM http_logs WHERE tcp_meta_mss > 0")
[ "${mss:-0}" -gt 0 ] 2>/dev/null && pass "TCP MSS ($mss)" || fail "TCP MSS absent"
# TLS
ja4=$(ch_val "SELECT count() FROM http_logs WHERE ja4 != ''")
[ "${ja4:-0}" -gt 0 ] 2>/dev/null && pass "JA4 fingerprint ($ja4)" || fail "JA4 absent"
sni=$(ch_val "SELECT count() FROM http_logs WHERE tls_sni != ''")
[ "${sni:-0}" -gt 0 ] 2>/dev/null && pass "TLS SNI ($sni)" || warn "TLS SNI absent"
# L7 HTTP
method=$(ch_val "SELECT count() FROM http_logs WHERE method != ''")
[ "${method:-0}" -gt 0 ] 2>/dev/null && pass "L7 HTTP ($method)" || fail "L7 HTTP ABSENT"
path=$(ch_val "SELECT count() FROM http_logs WHERE path != ''")
[ "${path:-0}" -gt 0 ] 2>/dev/null && pass "L7 path ($path)" || fail "L7 path absent"
status=$(ch_val "SELECT count() FROM http_logs WHERE status_code > 0")
[ "${status:-0}" -gt 0 ] 2>/dev/null && pass "status_code ($status)" || warn "status_code absent"
methods=$(ch_val "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM http_logs WHERE method != '')")
log "Méthodes HTTP : $methods"
total=$(ch_val "SELECT count() FROM http_logs")
pass "Total http_logs : $total"
}
# ═════════════════════════════════════════════════════════════════════════════
# Nettoyage
# ═════════════════════════════════════════════════════════════════════════════
stop_stack() {
pkill ja4ebpf 2>/dev/null || true
case "$STACK" in
nginx) stop_nginx ;;
apache) stop_apache ;;
hitch-varnish) stop_hitch_varnish ;;
esac
# Ne pas supprimer le ClickHouse s'il est externe (VM analysis)
if [ -z "${CH_HOST:-}" ] || [ "$CH_HOST" = "127.0.0.1" ] || [ "$CH_HOST" = "localhost" ]; then
docker rm -f ja4-clickhouse 2>/dev/null || true
fi
}
cleanup() {
if [ "$KEEP_RUNNING" != "true" ]; then
log "Nettoyage..."
stop_stack
fi
}
trap cleanup EXIT
# ═════════════════════════════════════════════════════════════════════════════
# Phase 1 : démarrage des services
# ═════════════════════════════════════════════════════════════════════════════
do_start() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Phase 1 : Démarrage — $STACK"
echo "╚══════════════════════════════════════════╝"
echo ""
# Vérifier prérequis
command -v ja4ebpf >/dev/null 2>&1 || {
log "Rebuild ja4ebpf..."
cd "$PROJECT/services/ja4ebpf"
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3
GOWORK=off CGO_ENABLED=0 go build -o /tmp/ja4ebpf_new ./cmd/ja4ebpf/ && mv /tmp/ja4ebpf_new /usr/local/bin/ja4ebpf
}
# Docker n'est nécessaire que pour un ClickHouse local
if [ -z "${CH_HOST:-}" ] || [ "$CH_HOST" = "127.0.0.1" ] || [ "$CH_HOST" = "localhost" ]; then
command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; }
fi
start_clickhouse
case "$STACK" in
nginx) setup_nginx ;;
apache) setup_apache ;;
hitch-varnish) setup_hitch_varnish ;;
*) fail "Stack inconnue: $STACK"; exit 1 ;;
esac
start_ja4ebpf
# Afficher l'IP pour le host
local eth0_ip
eth0_ip=$(get_eth0_ip)
echo ""
echo " ┌─────────────────────────────────────────┐"
echo " │ Services prêts ! │"
echo " │ IP eth0 : $eth0_ip"
echo " │ HTTP : http://$eth0_ip:80"
echo " │ HTTPS : https://$eth0_ip:443"
echo " └─────────────────────────────────────────┘"
echo ""
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 3 : vérification
# ═════════════════════════════════════════════════════════════════════════════
do_verify() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Phase 3 : Vérification — $STACK"
echo "╚══════════════════════════════════════════╝"
echo ""
verify_db
echo ""
echo "════════════════════════════════════════════"
echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT"
if [ "$FAIL_COUNT" -eq 0 ]; then
echo -e " ${GREEN}${BOLD}$STACK : Tous les tests réussis !${RESET}"
else
echo -e " ${RED}${BOLD}$STACK : $FAIL_COUNT tests échoués${RESET}"
tail -20 /tmp/ja4ebpf.log 2>/dev/null || true
fi
}
# ═════════════════════════════════════════════════════════════════════════════
# Main
# ═════════════════════════════════════════════════════════════════════════════
case "$MODE" in
start)
do_start
echo " En attente de trafic depuis le host..."
# Attendre que le host génère le trafic
# Le fichier /tmp/ja4ebpf-traffic-done est créé par le host après le trafic
for i in $(seq 1 120); do
[ -f /tmp/ja4ebpf-traffic-done ] && break
sleep 1
done
# En mode ClickHouse externe (E2E distribué), la vérification est faite
# par le script orchestrateur (run-e2e-test.sh Phase 5). On saute la
# vérification locale car les MV peuvent ne pas encore être peuplées.
if [ -n "${CH_HOST:-}" ] && [ "$CH_HOST" != "127.0.0.1" ] && [ "$CH_HOST" != "localhost" ]; then
log "ClickHouse externe — vérification locale ignorée (gérée par l'orchestrateur)"
log "Logs ja4ebpf :"
tail -5 /tmp/ja4ebpf.log 2>/dev/null | sed 's/^/ /'
pass "ja4ebpf actif (ClickHouse externe)"
else
# Laisser le temps au pipeline ClickHouse de traiter les données brutes
# (http_logs_raw → MV http_logs) avant de vérifier
log "Attente pipeline ClickHouse (20s)..."
sleep 20
do_verify
fi
;;
verify)
do_verify
;;
*)
# Mode legacy : tout dans la VM (trafic local uniquement)
# Note : XDP sur eth0 ne capturera PAS le trafic localhost
do_start
log "ATTENTION : le trafic localhost n'est pas capturé par XDP/eth0"
log "Utilisez 'make test-vm-matrix' pour le test complet avec trafic host"
# Générer quand même du trafic pour les uprobes
for path in / /health; do
curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true
done
sleep 10
do_verify
;;
esac
[ "$FAIL_COUNT" -eq 0 ] && exit 0 || exit 1