#!/usr/bin/env bash # ============================================================================= # run-e2e-test.sh — Test E2E distribué ja4-platform # # Architecture : # 3 VMs endpoint (centos8 / rocky9 / rocky10) : nginx + ja4ebpf # 1 VM analysis (192.168.42.10) : ClickHouse + bot-detector + dashboard # 1 VM traffic : curl-impersonate + httpx (génération de trafic externe) # # Pipeline testé : # trafic host → endpoints → ja4ebpf → ClickHouse central → # MV agrégation → bot-detector ML → dashboard API # # Usage : # make e2e-up && make test-e2e # TRAFFIC_COUNT=100 make test-e2e-quick # ./run-e2e-test.sh --hits 1000 --http-ratio 0.3 --dns 2 --tls 1.2,1.3 # # Paramètres : # --hits N Nombre total de requêtes par VM (déf. 500) # --http-ratio R Ratio HTTP/HTTPS : R = part HTTP (0.0–1.0, déf. 0.2) # --dns N Nombre de hostnames SNI utilisés (1–4, déf. 4) # --tls VERS Versions TLS à utiliser, séparées par virgules (déf. 1.2,1.3) # --src-ips N Nombre d'IPs sources par VM (alias sur eth0 dans la VM, déf. 1) # --keep-analysis Conserver la stack analysis (CH + dashboard) après le test # --up Démarrer la stack analysis et les endpoints, puis s'arrêter # (pas de trafic ni vérification — utile pour inspection manuelle) # ============================================================================= set -euo pipefail VM_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$VM_DIR/../.." && pwd)" ANALYSIS_IP="192.168.42.10" ENDPOINT_VMS="centos8 rocky9 rocky10" TRAFFIC_VM="traffic" ALL_VMS="centos8 rocky9 rocky10 analysis traffic" STACK="all-ips" # ── Paramètres par défaut (surchARGEABLES par CLI ou env) ── TRAFFIC_COUNT="${TRAFFIC_COUNT:-500}" HTTP_RATIO="${HTTP_RATIO:-0.2}" DNS_COUNT="${DNS_COUNT:-4}" TLS_VERSIONS="${TLS_VERSIONS:-1.2,1.3}" SRC_IP_COUNT="${SRC_IP_COUNT:-1}" KEEP_ANALYSIS="${KEEP_ANALYSIS:-false}" UP_ONLY="${UP_ONLY:-false}" # ── Parsing des arguments CLI ── usage() { echo "Usage: $0 [OPTIONS]" echo "" echo "Options :" echo " --hits N Nombre de requêtes par VM (déf. 500)" echo " --http-ratio R Ratio HTTP (0=100% HTTPS, 1=100% HTTP, déf. 0.2)" echo " --dns N Nombre de hostnames SNI utilisés (1-4, déf. 4)" echo " --tls VERS Versions TLS, séparées par virgules (déf. 1.2,1.3)" echo " --src-ips N Nombre d'IPs sources par VM (déf. 1)" echo " --keep-analysis Conserver la stack analysis (CH + dashboard) après le test" echo " --up Démarrer stack analysis + endpoints, puis s'arrêter" echo " -h, --help Afficher cette aide" exit 0 } while [[ $# -gt 0 ]]; do case "$1" in --hits) TRAFFIC_COUNT="$2"; shift 2 ;; --http-ratio) HTTP_RATIO="$2"; shift 2 ;; --dns) DNS_COUNT="$2"; shift 2 ;; --tls) TLS_VERSIONS="$2"; shift 2 ;; --src-ips) SRC_IP_COUNT="$2"; shift 2 ;; --keep-analysis) KEEP_ANALYSIS="true"; shift ;; --up) UP_ONLY="true"; shift ;; -h|--help) usage ;; *) echo "Option inconnue : $1"; usage ;; esac done # ── Validation ── if ! python3 -c "exit(0 if 0.0 <= ${HTTP_RATIO} <= 1.0 else 1)" 2>/dev/null; then echo "ERREUR: --http-ratio doit être entre 0.0 et 1.0" >&2; exit 1 fi if [ "$DNS_COUNT" -lt 1 ] || [ "$DNS_COUNT" -gt 4 ]; then echo "ERREUR: --dns doit être entre 1 et 4" >&2; exit 1 fi # ── Résolution des versions TLS pour curl ── CURL_TLS_FLAGS="" for v in $(echo "$TLS_VERSIONS" | tr ',' ' '); do case "$v" in 1.0) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.0" ;; 1.1) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.1" ;; 1.2) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.2" ;; 1.3) CURL_TLS_FLAGS="$CURL_TLS_FLAGS --tlsv1.3" ;; *) echo "[e2e] WARN: Version TLS '$v' non reconnue, ignorée" ;; esac done [ -z "$CURL_TLS_FLAGS" ] && CURL_TLS_FLAGS="--tlsv1.2 --tlsv1.3" && TLS_VERSIONS="1.2,1.3" CURL_TLS_FLAGS="${CURL_TLS_FLAGS# }" # trim leading space # Nombre de requêtes HTTP vs HTTPS dérivés du ratio HTTPS_COUNT=$(python3 -c "print(int(${TRAFFIC_COUNT} * (1 - ${HTTP_RATIO})))") HTTP_COUNT=$(python3 -c "print(int(${TRAFFIC_COUNT} * ${HTTP_RATIO}))") GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m' RESET='\033[0m'; BOLD='\033[1m' PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0 log() { echo -e "${BOLD}[e2e]${RESET} $(date +%H:%M:%S) $*"; } pass() { echo -e " ${GREEN}PASS${RESET} $*"; ((PASS_COUNT++)) || true; } fail() { echo -e " ${RED}FAIL${RESET} $*"; ((FAIL_COUNT++)) || true; } warn() { echo -e " ${YELLOW}WARN${RESET} $*"; ((WARN_COUNT++)) || true; } # Requête ClickHouse via HTTP API sur la VM analysis ch_query() { curl -sf "http://${ANALYSIS_IP}:8123/" -d "$1" 2>/dev/null || echo "" } # ═════════════════════════════════════════════════════════════════════════════ # Phase 0 : Setup environnement # ═════════════════════════════════════════════════════════════════════════════ phase0_setup() { echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Phase 0 : Setup environnement ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" cd "$VM_DIR" # Vérifier que les VMs existent for vm in $ALL_VMS; do if ! vagrant status "$vm" 2>/dev/null | grep -q "running"; then log "Démarrage de $vm..." vagrant up "$vm" 2>&1 | tail -5 else log "$vm déjà en cours d'exécution" fi done # Rsync des fichiers vers toutes les VMs log "Synchronisation des fichiers..." for vm in $ALL_VMS; do vagrant rsync "$vm" 2>&1 | tail -1 done # Découvrir les IPs eth0 des endpoints et filtrer ceux sans ja4ebpf declare -A VM_IPS ACTIVE_ENDPOINTS="" for vm in $ENDPOINT_VMS; do # Vérifier que ja4ebpf est installé if ! vagrant ssh "$vm" -- 'which ja4ebpf' 2>/dev/null | grep -q ja4ebpf; then warn "$vm ignoré — ja4ebpf non installé" continue fi VM_IPS[$vm]=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') if [ -z "${VM_IPS[$vm]}" ]; then fail "Impossible d'obtenir l'IP eth0 de $vm" else log " $vm : ${VM_IPS[$vm]}" ACTIVE_ENDPOINTS="$ACTIVE_ENDPOINTS $vm" fi done ENDPOINT_VMS=$(echo $ACTIVE_ENDPOINTS) # overwrite with active VMs only # Exporter les IPs pour les phases suivantes ENDPOINT_IPS_ARRAY=() for vm in $ENDPOINT_VMS; do ENDPOINT_IPS_ARRAY+=("${VM_IPS[$vm]}") done pass "Endpoints actifs : ${ENDPOINT_VMS}" } # ═════════════════════════════════════════════════════════════════════════════ # Phase 1 : Démarrage de la stack analysis # ═════════════════════════════════════════════════════════════════════════════ phase1_analysis() { echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Phase 1 : Stack analysis (ClickHouse + ML + Dashboard) ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" cd "$VM_DIR" # Purger les volumes pour garantir une DB vide avant le test log "Purge des volumes Docker pour DB vierge..." vagrant ssh analysis -- "cd /ja4-platform/tests/vm/analysis && docker compose down -v --remove-orphans" 2>&1 | tail -5 # Démarrer la stack analysis via docker compose log "Démarrage de la stack analysis sur la VM..." vagrant ssh analysis -- "cd /ja4-platform/tests/vm/analysis && docker compose up -d --build" 2>&1 | tail -20 # Attendre ClickHouse log "Attente ClickHouse (max 120s)..." for i in $(seq 1 60); do if curl -sf "http://${ANALYSIS_IP}:8123/ping" >/dev/null 2>&1; then pass "ClickHouse prêt (${i}*2s)" break fi sleep 2 done # Vérifier les bases local db_count db_count=$(ch_query "SELECT count() FROM system.databases WHERE name IN ('ja4_logs','ja4_processing')" | tr -d ' \n') [ "$db_count" = "2" ] \ && pass "Bases ja4_logs + ja4_processing créées" \ || fail "Bases manquantes (obtenu: $db_count)" # Vérifier que la DB est bien vide (nouvelle instance) local raw_init raw_init=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw" | tr -d ' \n') [ "${raw_init:-0}" -eq 0 ] 2>/dev/null \ && pass "ClickHouse vierge avant test (http_logs_raw=0)" \ || warn "ClickHouse non vide (http_logs_raw=${raw_init}) — données résiduelles" # Attendre bot-detector log "Attente bot-detector (max 120s)..." for i in $(seq 1 60); do if curl -sf "http://${ANALYSIS_IP}:8080/" >/dev/null 2>&1; then pass "bot-detector prêt (${i}*2s)" break fi sleep 2 done # Attendre dashboard log "Attente dashboard (max 60s)..." for i in $(seq 1 30); do if curl -sf "http://${ANALYSIS_IP}:8000/health" >/dev/null 2>&1; then pass "dashboard prêt (${i}*2s)" break fi sleep 2 done } # ═════════════════════════════════════════════════════════════════════════════ # Phase 2 : Démarrage des endpoints # ═════════════════════════════════════════════════════════════════════════════ phase2_endpoints() { echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Phase 2 : Endpoints (nginx+apache+hitch + ja4ebpf) ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" cd "$VM_DIR" # Arrêter les services existants sur les endpoints (session précédente) log "Arrêt des services existants sur les endpoints..." for vm in $ENDPOINT_VMS; do vagrant ssh "$vm" -- "sudo nginx -s stop 2>/dev/null; sudo pkill ja4ebpf 2>/dev/null; sudo pkill httpd 2>/dev/null; sudo pkill hitch 2>/dev/null; sudo pkill varnishd 2>/dev/null; sudo pkill -f 'TCPServer.*8080' 2>/dev/null; sudo pkill -f 'TCPServer.*:80' 2>/dev/null" 2>/dev/null || true done sleep 2 # Démarrer chaque endpoint en arrière-plan # ja4ebpf pointe vers le ClickHouse de la VM analysis (CH_HOST=192.168.42.10) ENDPOINT_PIDS=() for vm in $ENDPOINT_VMS; do log "Démarrage $vm (nginx + ja4ebpf → ${ANALYSIS_IP})..." vagrant ssh "$vm" -- "sudo rm -f /tmp/ja4ebpf-traffic-done" 2>/dev/null || true vagrant ssh "$vm" -- "sudo CH_HOST=${ANALYSIS_IP} bash /ja4-platform/tests/vm/run-tests-vm.sh ${STACK} start" & ENDPOINT_PIDS+=($!) done # Attendre que les services soient prêts log "Attente démarrage des endpoints (45s)..." sleep 45 # Vérifier la connectivité de chaque endpoint (3 IPs × 2 ports) # Les services sont sur le réseau ja4-e2e (eth1, 192.168.42.0/24) for vm in $ENDPOINT_VMS; do local vm_ip1 vm_ip1=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') # Fallback eth0 si eth1 n'existe pas if [ -z "$vm_ip1" ]; then vm_ip1=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') fi local np bl vm_ip2 vm_ip3 np=$(echo "$vm_ip1" | awk -F. '{print $1"."$2"."$3}') bl=$(echo "$vm_ip1" | awk -F. '{print $4}') # Même logique que setup_all_ips : base+50 et base+51 vm_ip2="${np}.$((bl + 50))" vm_ip3="${np}.$((bl + 51))" # nginx (IP1) if curl -sf --connect-timeout 3 "http://${vm_ip1}/health" >/dev/null 2>&1; then pass "$vm nginx:80 OK (${vm_ip1})" else warn "$vm nginx:80 injoignable (${vm_ip1})" fi if curl -sf -k --connect-timeout 3 "https://${vm_ip1}/health" >/dev/null 2>&1; then pass "$vm nginx:443 OK (${vm_ip1})" else warn "$vm nginx:443 injoignable (${vm_ip1})" fi # apache (IP2) if curl -sf --connect-timeout 3 "http://${vm_ip2}/health" >/dev/null 2>&1; then pass "$vm apache:80 OK (${vm_ip2})" else warn "$vm apache:80 injoignable (${vm_ip2})" fi if curl -sf -k --connect-timeout 3 "https://${vm_ip2}/health" >/dev/null 2>&1; then pass "$vm apache:443 OK (${vm_ip2})" else warn "$vm apache:443 injoignable (${vm_ip2})" fi # hitch+varnish (IP3) if curl -sf --connect-timeout 3 "http://${vm_ip3}/health" >/dev/null 2>&1; then pass "$vm hitch:80 OK (${vm_ip3})" else warn "$vm hitch:80 injoignable (${vm_ip3})" fi if curl -sf -k --connect-timeout 3 "https://${vm_ip3}/health" >/dev/null 2>&1; then pass "$vm hitch:443 OK (${vm_ip3})" else warn "$vm hitch:443 injoignable (${vm_ip3})" fi done } # ═════════════════════════════════════════════════════════════════════════════ # Phase 3 : Génération de trafic # ═════════════════════════════════════════════════════════════════════════════ phase3_traffic() { echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Phase 3 : Génération de trafic → endpoints ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" local total_ok=0 # Domaines pour SNI (cert CN=platform.test, nginx accepte tout via server_name _) local DOMAIN_SUFFIXES_ALL=("platform.test" "api.platform.test" "www.platform.test" "app.platform.test") local DOMAIN_SUFFIXES=("${DOMAIN_SUFFIXES_ALL[@]:0:${DNS_COUNT}}") # ── Collecter les IPs de tous les endpoints (3 IPs par VM sur réseau ja4-e2e/eth1) ── local -A VM_IPS_MAP for vm in $ENDPOINT_VMS; do local base_ip base_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') # Fallback eth0 if [ -z "$base_ip" ]; then base_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') fi VM_IPS_MAP[$vm]="$base_ip" # Calculer IP2 et IP3 (identique à setup_all_ips : base+50 et base+51) local np bl np=$(echo "$base_ip" | awk -F. '{print $1"."$2"."$3}') bl=$(echo "$base_ip" | awk -F. '{print $4}') VM_IPS_MAP["${vm}_ip2"]="${np}.$((bl + 50))" VM_IPS_MAP["${vm}_ip3"]="${np}.$((bl + 51))" done # ── Ajouter des IPs alias sur la VM traffic pour diversifier les sources ── # Utiliser eth1 (réseau ja4-e2e, 192.168.42.x) car les cibles sont sur ce réseau. local TRAFFIC_IP TRAFFIC_IP=$(vagrant ssh "$TRAFFIC_VM" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') # Fallback eth0 si eth1 n'existe pas if [ -z "$TRAFFIC_IP" ]; then TRAFFIC_IP=$(vagrant ssh "$TRAFFIC_VM" -- 'ip -4 addr show eth0' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') fi local TRAFFIC_IFACE if [ -n "$TRAFFIC_IP" ]; then case "$TRAFFIC_IP" in 192.168.42.*) TRAFFIC_IFACE="eth1" ;; *) TRAFFIC_IFACE="eth0" ;; esac fi if [ "$SRC_IP_COUNT" -gt 1 ] && [ -n "$TRAFFIC_IP" ]; then log "Ajout de ${SRC_IP_COUNT} IPs sources sur ${TRAFFIC_VM} (${TRAFFIC_IFACE})..." local net_prefix net_prefix=$(echo "$TRAFFIC_IP" | awk -F. '{print $1"."$2"."$3}') local base_last base_last=$(echo "$TRAFFIC_IP" | awk -F. '{print $4}') for i in $(seq 1 $((SRC_IP_COUNT - 1))); do local alias_last=$((base_last + 100 + i)) [ "$alias_last" -gt 254 ] && alias_last=$((10 + i)) local alias_ip="${net_prefix}.${alias_last}" vagrant ssh "$TRAFFIC_VM" -- "sudo ip addr add ${alias_ip}/24 dev ${TRAFFIC_IFACE} 2>/dev/null || true" 2>/dev/null || true done fi # ── Construire les listes d'IPs cibles et SNI appariées ── # Chaque entrée (VM, service, domaine) a un SNI unique : -- # TARGET_IPS et SNI_HOSTS sont des listes parallèles de même longueur. local TARGET_IPS="" local SNI_HOSTS_STR="" # Noms de services par clé IP local SVC_NAMES_ip1="nginx" SVC_NAMES_ip2="apache" SVC_NAMES_ip3="hitch" for target_vm in $ENDPOINT_VMS; do local vm_base="${VM_IPS_MAP[$target_vm]}" local vm_ip2="${VM_IPS_MAP[${target_vm}_ip2]}" local vm_ip3="${VM_IPS_MAP[${target_vm}_ip3]}" for ip_key in ip1 ip2 ip3; do local svc_name_var="SVC_NAMES_${ip_key}" local svc_name="${!svc_name_var}" local map_key="${target_vm}_${ip_key}" local target_ip="${VM_IPS_MAP[$map_key]:-$vm_base}" for domain in "${DOMAIN_SUFFIXES[@]}"; do local sni="${target_vm}-${svc_name}-${domain}" TARGET_IPS="$TARGET_IPS $target_ip" SNI_HOSTS_STR="$SNI_HOSTS_STR $sni" done done done TARGET_IPS=$(echo $TARGET_IPS) SNI_HOSTS_STR=$(echo $SNI_HOSTS_STR) log "Cibles : $(echo "$TARGET_IPS" | wc -w) paires IP/SNI ($(echo "$SNI_HOSTS_STR" | tr ' ' '\n' | sort -u | wc -l) uniques)" # ── Synchroniser generate-traffic.sh vers la VM traffic ── log "Synchronisation du script de trafic vers ${TRAFFIC_VM}..." vagrant rsync "$TRAFFIC_VM" 2>&1 | tail -1 # ── Écrire le config et lancer le trafic depuis la VM traffic ── log "Génération depuis ${TRAFFIC_VM} : ${HTTPS_COUNT} HTTPS + ${HTTP_COUNT} HTTP..." vagrant ssh "$TRAFFIC_VM" -- "cat > /tmp/e2e-traffic.env << 'ENVEOF' export HITS=${HTTPS_COUNT} export HITS_HTTP=${HTTP_COUNT} export TARGET_IPS='${TARGET_IPS}' export SNI_HOSTS='${SNI_HOSTS_STR}' export TLS_FLAGS='${CURL_TLS_FLAGS}' export SRC_IP_COUNT=${SRC_IP_COUNT} ENVEOF" # Lancer le générateur de trafic (curl-impersonate + curl) vagrant ssh "$TRAFFIC_VM" -- \ "source /tmp/e2e-traffic.env && bash /ja4-platform/tests/vm/generate-traffic.sh" \ > /tmp/e2e-traffic.out 2>&1 & # Attendre que le processus se termine (max 300s) for i in $(seq 1 300); do if [ -f /tmp/e2e-traffic.out ] && ! pgrep -f "vagrant ssh $TRAFFIC_VM.*generate-traffic" >/dev/null 2>&1; then break fi sleep 1 done local result result=$(tail -1 /tmp/e2e-traffic.out 2>/dev/null || echo "0/${HTTPS_COUNT} 0/${HTTP_COUNT}") # Format: "ok_https/hits_https ok_http/hits_http" local https_result http_result https_result=$(echo "$result" | awk '{print $1}') http_result=$(echo "$result" | awk '{print $2}') local ok_https ok_http ok_https=$(echo "$https_result" | cut -d/ -f1) ok_http=$(echo "${http_result:-0/0}" | cut -d/ -f1) log " ${TRAFFIC_VM} HTTPS : ${https_result} HTTP : ${http_result:-0/0}" total_ok=$((total_ok + ok_https + ok_http)) # ── HTTP/2 massif depuis la VM traffic si httpx est disponible ── if vagrant ssh "$TRAFFIC_VM" -- 'python3 -c "import httpx"' 2>/dev/null; then log "Génération HTTP/2 depuis ${TRAFFIC_VM} (${HTTPS_COUNT} requêtes, TLS=${TLS_VERSIONS}, DNS=${DNS_COUNT})..." # Écrire le script httpx sur la VM traffic vagrant ssh "$TRAFFIC_VM" -- "cat > /tmp/e2e-h2-traffic.py << 'PYEOF' import httpx, ssl as _ssl, warnings, random, os warnings.filterwarnings('ignore') paths = ['/', '/health', '/data', '/api/users', '/api/v1/status', '/login', '/api/search'] sni_hosts = os.environ.get('SNI_HOSTS', 'platform.test').split() target_ips = os.environ.get('TARGET_IPS', '127.0.0.1').split() # Les listes sont appariées : même index = même cible (vm-service-domaine) assert len(sni_hosts) == len(target_ips), f'SNI_HOSTS ({len(sni_hosts)}) != TARGET_IPS ({len(target_ips)})' targets = list(zip(target_ips, sni_hosts)) tls_versions = [v.strip() for v in os.environ.get('TLS_VERSIONS', '1.2,1.3').split(',')] uas_browser = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15', 'Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0', ] uas_bot = ['python-httpx/0.28.1', 'Googlebot/2.1', 'Go-http-client/2.0'] supported = {'1.2': _ssl.TLSVersion.TLSv1_2, '1.3': _ssl.TLSVersion.TLSv1_3} tls_map = [supported[v] for v in tls_versions if v in supported] ctx = _ssl.SSLContext(_ssl.PROTOCOL_TLS_CLIENT) ctx.check_hostname = False ctx.verify_mode = _ssl.CERT_NONE if tls_map: ctx.minimum_version = min(tls_map) ctx.maximum_version = max(tls_map) hits = int(os.environ.get('HITS', '100')) with httpx.Client(http2=True, verify=ctx) as c: for i in range(hits): p = random.choice(paths) target_ip, sni = random.choice(targets) ua = random.choice(uas_browser if random.random() < 0.7 else uas_bot) try: c.get(f'https://{target_ip}' + p, headers={'User-Agent': ua, 'Host': sni}) except: pass PYEOF" vagrant ssh "$TRAFFIC_VM" -- \ "source /tmp/e2e-traffic.env && TLS_VERSIONS='${TLS_VERSIONS}' python3 /tmp/e2e-h2-traffic.py" \ 2>/dev/null || true pass "HTTP/2 généré depuis ${TRAFFIC_VM}" else warn "httpx non disponible sur ${TRAFFIC_VM} — HTTP/2 ignoré" fi pass "Trafic total : ${total_ok} requêtes réussies" } # ═════════════════════════════════════════════════════════════════════════════ # Phase 4 : Attente du pipeline # ═════════════════════════════════════════════════════════════════════════════ phase4_wait() { echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Phase 4 : Attente du pipeline ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" # Flush ja4ebpf log "Attente flush ja4ebpf (15s)..." sleep 15 # Signaler aux endpoints que le trafic est terminé cd "$VM_DIR" for vm in $ENDPOINT_VMS; do vagrant ssh "$vm" -- 'sudo touch /tmp/ja4ebpf-traffic-done' 2>/dev/null || true done # Attendre que les données brutes arrivent dans ClickHouse log "Attente données dans ClickHouse (max 60s)..." for i in $(seq 1 30); do local raw_count raw_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw" 2>/dev/null | tr -d ' \n') if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then pass "Données brutes reçues : ${raw_count} lignes (${i}*2s)" break fi sleep 2 done # Attendre au moins un cycle bot-detector log "Attente cycle bot-detector (max 120s)..." for i in $(seq 1 60); do local scores scores=$(ch_query "SELECT count() FROM ja4_processing.ml_all_scores" 2>/dev/null | tr -d ' \n') if [ "${scores:-0}" -gt 0 ] 2>/dev/null; then pass "bot-detector a complété au moins 1 cycle ($scores scores)" break fi sleep 2 done # Attendre les processus endpoint en arrière-plan log "Attente fin des processus endpoint..." for pid in "${ENDPOINT_PIDS[@]:-}"; do wait "$pid" 2>/dev/null || true done } # ═════════════════════════════════════════════════════════════════════════════ # Phase 5 : Vérification complète # ═════════════════════════════════════════════════════════════════════════════ phase5_verify() { echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Phase 5 : Vérification du pipeline complet ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" # ── Layer 1 : Données brutes ───────────────────────────────────────────── log "── Layer 1 : Données brutes ──" local raw_count raw_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw" | tr -d ' \n') if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then pass "http_logs_raw : ${raw_count} lignes" else fail "http_logs_raw vide" fi # Multi-source : données de plusieurs endpoints local host_count host_count=$(ch_query "SELECT uniqExact(host) FROM ja4_logs.http_logs" | tr -d ' \n') if [ "${host_count:-0}" -ge 2 ] 2>/dev/null; then pass "Multi-source : ${host_count} hôtes distincts" else warn "Multi-source : ${host_count:-0} hôte(s) distinct(s)" fi # ── Layer 2 : Pipeline ClickHouse ──────────────────────────────────────── log "── Layer 2 : Pipeline ClickHouse (MVs) ──" local ja4_count ja4_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja4 != ''" | tr -d ' \n') [ "${ja4_count:-0}" -gt 0 ] 2>/dev/null \ && pass "JA4 fingerprints : ${ja4_count}" \ || fail "JA4 fingerprints absents" local method_count method_count=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE method != ''" | tr -d ' \n') [ "${method_count:-0}" -gt 0 ] 2>/dev/null \ && pass "L7 HTTP : ${method_count} requêtes capturées" \ || fail "L7 HTTP absent" local agg_count agg_count=$(ch_query "SELECT count() FROM ja4_processing.agg_host_ip_ja4_1h" | tr -d ' \n') [ "${agg_count:-0}" -gt 0 ] 2>/dev/null \ && pass "Agrégation agg_host_ip_ja4_1h : ${agg_count} entrées" \ || warn "Agrégation agg_host_ip_ja4_1h vide (volume insuffisant pour la fenêtre horaire)" local feat_count feat_count=$(ch_query "SELECT count() FROM ja4_processing.view_ai_features_1h" | tr -d ' \n') [ "${feat_count:-0}" -gt 0 ] 2>/dev/null \ && pass "AI features : ${feat_count} lignes" \ || warn "AI features vides (agrégation horaire pas encore calculée)" # ── Layer 3 : ML bot-detector ──────────────────────────────────────────── log "── Layer 3 : ML bot-detector ──" local scores_count scores_count=$(ch_query "SELECT count() FROM ja4_processing.ml_all_scores" | tr -d ' \n') [ "${scores_count:-0}" -gt 0 ] 2>/dev/null \ && pass "ml_all_scores : ${scores_count} classifications" \ || fail "ml_all_scores vide — bot-detector n'a pas produit de résultats" local anomaly_count anomaly_count=$(ch_query "SELECT count() FROM ja4_processing.ml_detected_anomalies" | tr -d ' \n') if [ "${anomaly_count:-0}" -gt 0 ] 2>/dev/null; then pass "ml_detected_anomalies : ${anomaly_count} anomalies détectées" else warn "ml_detected_anomalies vide (pas d'anomalies dans le trafic de test)" fi # bot-detector health if curl -sf "http://${ANALYSIS_IP}:8080/" >/dev/null 2>&1; then pass "bot-detector health OK" else fail "bot-detector health KO" fi # ── Layer 4 : Dashboard ────────────────────────────────────────────────── log "── Layer 4 : Dashboard ──" if curl -sf "http://${ANALYSIS_IP}:8000/health" >/dev/null 2>&1; then pass "dashboard /health OK" else fail "dashboard /health KO" fi local overview overview=$(curl -sf "http://${ANALYSIS_IP}:8000/api/overview" 2>/dev/null || echo "") if [ -n "$overview" ] && [ "$overview" != "{}" ] && [ "$overview" != "null" ]; then pass "dashboard /api/overview retourne des données" else warn "dashboard /api/overview vide" fi local detections detections=$(curl -sf "http://${ANALYSIS_IP}:8000/api/detections" 2>/dev/null || echo "") if [ -n "$detections" ]; then pass "dashboard /api/detections accessible" else warn "dashboard /api/detections pas de réponse" fi } # ═════════════════════════════════════════════════════════════════════════════ # Résumé # ═════════════════════════════════════════════════════════════════════════════ phase_summary() { local total=$((PASS_COUNT + FAIL_COUNT + WARN_COUNT)) echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ RÉSULTATS E2E ║" echo "╚══════════════════════════════════════════════════════════╝" echo -e " ${GREEN}Réussis : $PASS_COUNT${RESET} / $total" [ "$WARN_COUNT" -gt 0 ] && echo -e " ${YELLOW}Avertissements : $WARN_COUNT${RESET} / $total" [ "$FAIL_COUNT" -gt 0 ] && echo -e " ${RED}Échoués : $FAIL_COUNT${RESET} / $total" echo "" if [ "$FAIL_COUNT" -eq 0 ]; then echo -e " ${GREEN}${BOLD}Test E2E distribué : SUCCÈS${RESET}" else echo -e " ${RED}${BOLD}Test E2E distribué : $FAIL_COUNT ÉCHECS${RESET}" fi echo "" } # ═════════════════════════════════════════════════════════════════════════════ # Nettoyage # ═════════════════════════════════════════════════════════════════════════════ cleanup() { # Supprimer les IPs alias sur la VM traffic (sur eth1, réseau ja4-e2e) if [ "${SRC_IP_COUNT:-1}" -gt 1 ]; then log "Suppression des IPs alias sur ${TRAFFIC_VM}..." vagrant ssh "$TRAFFIC_VM" -- " base_ip=\$(ip -4 addr show eth1 2>/dev/null | awk '/inet / {sub(/\/.*/, \"\", \\\$2); print \\\$2; exit}') if [ -z \"\$base_ip\" ]; then base_ip=\$(ip -4 addr show eth0 | awk '/inet / {sub(/\/.*/, \"\", \\\$2); print \\\$2; exit}') iface=eth0 else iface=eth1 fi net_prefix=\$(echo \$base_ip | awk -F. '{print \\\$1\".\"\\\$2\".\"\\\$3}') base_last=\$(echo \$base_ip | awk -F. '{print \\\$4}') for i in \$(seq 1 $((SRC_IP_COUNT - 1))); do alias_last=\$((base_last + 100 + i)) [ \$alias_last -gt 254 ] && alias_last=\$((10 + i)) sudo ip addr del \${net_prefix}.\${alias_last}/24 dev \$iface 2>/dev/null || true done " 2>/dev/null || true fi # Toujours arrêter les endpoints (sauf --keep global) if [ "${KEEP_RUNNING:-false}" != "true" ]; then log "Nettoyage des endpoints..." cd "$VM_DIR" for vm in $ENDPOINT_VMS; do vagrant ssh "$vm" -- "sudo pkill ja4ebpf 2>/dev/null; sudo nginx -s stop 2>/dev/null; sudo pkill httpd 2>/dev/null; sudo pkill hitch 2>/dev/null; sudo pkill varnishd 2>/dev/null; sudo pkill -f 'TCPServer.*8080' 2>/dev/null; sudo pkill -f 'TCPServer.*:80' 2>/dev/null" 2>/dev/null || true # Supprimer les IPs alias (IP2, IP3) sur eth1 local vm_base_ip vm_base_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') if [ -n "$vm_base_ip" ]; then local np bl ip2 ip3 np=$(echo "$vm_base_ip" | awk -F. '{print $1"."$2"."$3}') bl=$(echo "$vm_base_ip" | awk -F. '{print $4}') ip2="${np}.$((bl + 50))" ip3="${np}.$((bl + 51))" vagrant ssh "$vm" -- "sudo ip addr del ${ip2}/24 dev eth1 2>/dev/null || true; sudo ip addr del ${ip3}/24 dev eth1 2>/dev/null || true" 2>/dev/null || true fi done fi # Stack analysis : conserver si --keep-analysis ou --keep, sinon arrêter if [ "${KEEP_RUNNING:-false}" = "true" ] || [ "$KEEP_ANALYSIS" = "true" ]; then log "Stack analysis conservée :" log " ClickHouse : http://${ANALYSIS_IP}:8123/play" log " Dashboard : http://${ANALYSIS_IP}:8000" log " Bot-detector : http://${ANALYSIS_IP}:8080" else log "Arrêt de la stack analysis..." vagrant ssh analysis -- "cd /ja4-platform/tests/vm/analysis && docker compose down -v" 2>/dev/null || true fi if [ "${KEEP_RUNNING:-false}" = "true" ]; then log "VMs endpoints conservées :" for vm in $ENDPOINT_VMS; do local vm_ip vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') if [ -z "$vm_ip" ]; then vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') fi local np bl np=$(echo "$vm_ip" | awk -F. '{print $1"."$2"."$3}') bl=$(echo "$vm_ip" | awk -F. '{print $4}') log " $vm : ${vm_ip} (nginx) ${np}.$((bl + 50)) (apache) ${np}.$((bl + 51)) (hitch+varnish)" done fi } # ═════════════════════════════════════════════════════════════════════════════ # Main # ═════════════════════════════════════════════════════════════════════════════ KEEP_RUNNING="${KEEP_RUNNING:-false}" trap cleanup EXIT phase0_setup phase1_analysis phase2_endpoints if [ "$UP_ONLY" = "true" ]; then echo "" echo "╔══════════════════════════════════════════════════════════╗" echo "║ Stack prête — mode --up (pas de trafic ni vérification) ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" log "ClickHouse : http://${ANALYSIS_IP}:8123/play" log "Dashboard : http://${ANALYSIS_IP}:8000" log "Bot-detector : http://${ANALYSIS_IP}:8080" for vm in $ENDPOINT_VMS; do local vm_ip vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') if [ -z "$vm_ip" ]; then vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') fi local np bl vm_ip2 vm_ip3 np=$(echo "$vm_ip" | awk -F. '{print $1"."$2"."$3}') bl=$(echo "$vm_ip" | awk -F. '{print $4}') vm_ip2="${np}.$((bl + 50))" vm_ip3="${np}.$((bl + 51))" log "$vm nginx : http://${vm_ip}:80 https://${vm_ip}:443" log "$vm apache : http://${vm_ip2}:80 https://${vm_ip2}:443" log "$vm hitch+varnish : http://${vm_ip3}:80 https://${vm_ip3}:443" done log "" log "Pour générer du trafic :" log " ./run-e2e-test.sh --hits 200 --dns 3 --tls 1.2,1.3 --keep-analysis" log " curl -sk https:///health" KEEP_ANALYSIS=true exit 0 fi phase3_traffic phase4_wait phase5_verify phase_summary [ "$FAIL_COUNT" -eq 0 ] && exit 0 || exit 1