feat: JA3 fingerprinting, SSL correlation fix, ML pipeline overhaul, E2E test infra
ja4ebpf: - Add JA3 raw + MD5 hash fingerprinting (ComputeJA3 in TLS parser) - Fix accept4 port double-swap bug (__builtin_bswap16 on already-host-order value) - Fix scheme override bug in ClickHouse writer (HTTP block clearing HTTPS) - Add HTTP/2 passive fingerprinting (Akamai H2 FP, SETTINGS, pseudo-header order) - Enrich ClickHouse schema with IP/TCP metadata, H2 settings, Sec-* headers - Ensure maximum data completeness: all available L3/L4, TLS, HTTP fields emitted bot-detector: - Replace logistic regression with MLP fusion classifier - Replace KS drift detection with ADWIN online learning - Replace NetworkX/Louvain with PyTorch Geometric GraphSAGE for fleet detection - Replace autoencoder with RealNVP normalizing flow + SessionTransformer embeddings infra: - Add distributed E2E test infrastructure (4 VMs: endpoints + analysis) - Add Vagrant provisioning for analysis VM, e2e Makefile targets, run scripts docs: - Restructure thesis into chapter files with corrected references - Add E2E testing documentation - Update architecture, schema, deployment, service docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -3,7 +3,7 @@
|
||||
# run-e2e-test.sh — Test E2E distribué ja4-platform
|
||||
#
|
||||
# Architecture :
|
||||
# 3 VMs endpoint (centos8/rocky9/rocky10) : nginx + ja4ebpf
|
||||
# 3 VMs endpoint (centos8 / rocky9 / rocky10) : nginx + ja4ebpf
|
||||
# 1 VM analysis (192.168.42.10) : ClickHouse + bot-detector + dashboard
|
||||
# Host : orchestrateur + génération de trafic
|
||||
#
|
||||
@ -31,8 +31,8 @@ set -euo pipefail
|
||||
VM_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$VM_DIR/../.." && pwd)"
|
||||
ANALYSIS_IP="192.168.42.10"
|
||||
ENDPOINT_VMS="rocky9 rocky10"
|
||||
ALL_VMS="rocky9 rocky10 analysis"
|
||||
ENDPOINT_VMS="centos8 rocky9 rocky10"
|
||||
ALL_VMS="centos8 rocky9 rocky10 analysis"
|
||||
STACK="nginx"
|
||||
|
||||
# ── Paramètres par défaut (surchARGEABLES par CLI ou env) ──
|
||||
@ -94,6 +94,7 @@ for v in $(echo "$TLS_VERSIONS" | tr ',' ' '); do
|
||||
esac
|
||||
done
|
||||
[ -z "$CURL_TLS_FLAGS" ] && CURL_TLS_FLAGS="--tlsv1.2 --tlsv1.3" && TLS_VERSIONS="1.2,1.3"
|
||||
CURL_TLS_FLAGS="${CURL_TLS_FLAGS# }" # trim leading space
|
||||
|
||||
# Nombre de requêtes HTTP vs HTTPS dérivés du ratio
|
||||
HTTPS_COUNT=$(python3 -c "print(int(${TRAFFIC_COUNT} * (1 - ${HTTP_RATIO})))")
|
||||
@ -292,31 +293,11 @@ phase2_endpoints() {
|
||||
phase3_traffic() {
|
||||
echo ""
|
||||
echo "╔══════════════════════════════════════════════════════════╗"
|
||||
echo "║ Phase 3 : Génération de trafic host → endpoints ║"
|
||||
echo "║ Phase 3 : Génération de trafic → endpoints ║"
|
||||
echo "╚══════════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
local total_ok=0 total_err=0
|
||||
|
||||
# User-Agents variés pour diversifier les empreintes TLS/HTTP
|
||||
local UA_BROWSER=( \
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36" \
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15" \
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0" \
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0" \
|
||||
)
|
||||
local UA_BOT=( \
|
||||
"python-requests/2.32.3" \
|
||||
"curl/8.9.1" \
|
||||
"Go-http-client/2.0" \
|
||||
"python-httpx/0.28.1" \
|
||||
"Googlebot/2.1" \
|
||||
)
|
||||
|
||||
# Paths diversifiés pour des fingerprints différents
|
||||
local PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \
|
||||
"/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \
|
||||
"/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin")
|
||||
local total_ok=0
|
||||
|
||||
# Hostnames pour SNI (cert CN=platform.test, nginx accepte tout via server_name _)
|
||||
local SNI_HOSTS_ALL=("platform.test" "api.platform.test" "www.platform.test" "app.platform.test")
|
||||
@ -347,133 +328,90 @@ phase3_traffic() {
|
||||
done
|
||||
fi
|
||||
|
||||
# ── Trafic HTTPS généré depuis les VMs (IPs sources distinctes) ──
|
||||
for src_vm in $ENDPOINT_VMS; do
|
||||
local src_ip="${VM_IPS_MAP[$src_vm]}"
|
||||
log "Génération depuis $src_vm ($src_ip) : ${HTTPS_COUNT} requêtes HTTPS (${SRC_IP_COUNT} IPs src)..."
|
||||
# ── Construire les listes d'IPs cibles et SNI ──
|
||||
local TARGET_IPS=""
|
||||
for target_vm in $ENDPOINT_VMS; do
|
||||
TARGET_IPS="$TARGET_IPS ${VM_IPS_MAP[$target_vm]}"
|
||||
done
|
||||
TARGET_IPS=$(echo $TARGET_IPS)
|
||||
|
||||
# Construire la liste des IPs cibles (toutes les endpoints)
|
||||
local TARGET_IPS=""
|
||||
for target_vm in $ENDPOINT_VMS; do
|
||||
TARGET_IPS="$TARGET_IPS ${VM_IPS_MAP[$target_vm]}"
|
||||
local SNI_HOSTS_STR=""
|
||||
for h in "${SNI_HOSTS[@]}"; do
|
||||
SNI_HOSTS_STR="$SNI_HOSTS_STR $h"
|
||||
done
|
||||
SNI_HOSTS_STR=$(echo $SNI_HOSTS_STR)
|
||||
|
||||
# ── Synchroniser generate-traffic.sh vers les VMs ──
|
||||
log "Synchronisation du script de trafic..."
|
||||
for vm in $ENDPOINT_VMS; do
|
||||
vagrant rsync "$vm" 2>&1 | tail -1
|
||||
done
|
||||
|
||||
# ── Écrire le config et lancer le trafic depuis chaque VM ──
|
||||
for src_vm in $ENDPOINT_VMS; do
|
||||
log "Génération depuis $src_vm : ${HTTPS_COUNT} HTTPS + ${HTTP_COUNT} HTTP (${SRC_IP_COUNT} IPs src)..."
|
||||
|
||||
# Écrire le fichier de config sur la VM (heredoc quoté — pas d'expansion SSH)
|
||||
vagrant ssh "$src_vm" -- "cat > /tmp/e2e-traffic.env << 'ENVEOF'
|
||||
export HITS=${HTTPS_COUNT}
|
||||
export HITS_HTTP=${HTTP_COUNT}
|
||||
export TARGET_IPS='${TARGET_IPS}'
|
||||
export SNI_HOSTS='${SNI_HOSTS_STR}'
|
||||
export TLS_FLAGS='${CURL_TLS_FLAGS}'
|
||||
export SRC_IP_COUNT=${SRC_IP_COUNT}
|
||||
ENVEOF"
|
||||
|
||||
# Lancer le générateur de trafic en arrière-plan
|
||||
vagrant ssh "$src_vm" -- \
|
||||
"source /tmp/e2e-traffic.env && bash /ja4-platform/tests/vm/generate-traffic.sh" \
|
||||
> /tmp/e2e-traffic-${src_vm}.out 2>&1 &
|
||||
done
|
||||
|
||||
# ── Collecter les résultats HTTPS + HTTP ──
|
||||
for src_vm in $ENDPOINT_VMS; do
|
||||
# Attendre que le processus se termine (max 300s)
|
||||
for i in $(seq 1 300); do
|
||||
if [ -f /tmp/e2e-traffic-${src_vm}.out ] && ! pgrep -f "vagrant ssh $src_vm.*generate-traffic" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Script de génération exécuté sur la VM source
|
||||
vagrant ssh "$src_vm" -- "bash -s" <<REMOTE_SCRIPT &
|
||||
#!/bin/bash
|
||||
set -uo pipefail
|
||||
|
||||
HITS=${HTTPS_COUNT}
|
||||
TARGET_IPS=(${TARGET_IPS})
|
||||
SNI_HOSTS=(${SNI_HOSTS[@]})
|
||||
TLS_FLAGS="${CURL_TLS_FLAGS}"
|
||||
DNS_COUNT=${DNS_COUNT}
|
||||
SRC_IP_COUNT=${SRC_IP_COUNT}
|
||||
|
||||
# Collecter les IPs sources disponibles sur eth0
|
||||
SRC_IPS=(\$(ip -4 addr show eth0 2>/dev/null | awk '/inet / {sub(/\/.*/, "", \$2); print \$2}'))
|
||||
|
||||
UA_BROWSER=(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36"
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15"
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0"
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0"
|
||||
)
|
||||
UA_BOT=(
|
||||
"python-requests/2.32.3"
|
||||
"curl/8.9.1"
|
||||
"Go-http-client/2.0"
|
||||
"python-httpx/0.28.1"
|
||||
"Googlebot/2.1"
|
||||
)
|
||||
PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \
|
||||
"/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \
|
||||
"/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin")
|
||||
|
||||
ok=0
|
||||
for i in \$(seq 1 \$HITS); do
|
||||
idx=\$((i - 1))
|
||||
target_ip="\${TARGET_IPS[\$((idx % \${#TARGET_IPS[@]}))]}"
|
||||
sni_host="\${SNI_HOSTS[\$((idx % \${#SNI_HOSTS[@]}))]}"
|
||||
path="\${PATHS[\$((idx % \${#PATHS[@]}))]}"
|
||||
|
||||
case \$((i % 10)) in
|
||||
0|1|2|3|4) method="GET" ;;
|
||||
5|6) method="POST" ;;
|
||||
7) method="PUT" ;;
|
||||
8) method="DELETE" ;;
|
||||
9) method="HEAD" ;;
|
||||
esac
|
||||
|
||||
if [ \$((i % 10)) -lt 7 ]; then
|
||||
ua="\${UA_BROWSER[\$((idx % \${#UA_BROWSER[@]}))]}"
|
||||
else
|
||||
ua="\${UA_BOT[\$((idx % \${#UA_BOT[@]}))]}"
|
||||
fi
|
||||
|
||||
extra_flags="--resolve \${sni_host}:443:\${target_ip} \$TLS_FLAGS"
|
||||
|
||||
# Alterner entre les IPs sources disponibles
|
||||
if [ \${#SRC_IPS[@]} -gt 1 ]; then
|
||||
src_ip="\${SRC_IPS[\$((idx % \${#SRC_IPS[@]}))]}"
|
||||
extra_flags="\$extra_flags --interface \$src_ip"
|
||||
fi
|
||||
|
||||
case \$method in
|
||||
POST)
|
||||
curl -sf -k \$extra_flags -X POST "https://\${sni_host}\${path}" \
|
||||
-H "User-Agent: \${ua}" -H "Content-Type: application/json" \
|
||||
-d '{"test":1,"seq":'\$i'}' >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
|
||||
PUT)
|
||||
curl -sf -k \$extra_flags -X PUT "https://\${sni_host}\${path}" \
|
||||
-H "User-Agent: \${ua}" -d '{}' >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
|
||||
DELETE)
|
||||
curl -sf -k \$extra_flags -X DELETE "https://\${sni_host}\${path}" \
|
||||
-H "User-Agent: \${ua}" >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
|
||||
HEAD)
|
||||
curl -sf -k \$extra_flags -I "https://\${sni_host}\${path}" \
|
||||
-H "User-Agent: \${ua}" >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
|
||||
*)
|
||||
curl -sf -k \$extra_flags -X "\$method" "https://\${sni_host}\${path}" \
|
||||
-H "User-Agent: \${ua}" >/dev/null 2>&1 && ok=\$((ok + 1)) || true ;;
|
||||
esac
|
||||
done
|
||||
echo "\$ok/\$HITS"
|
||||
REMOTE_SCRIPT
|
||||
done
|
||||
|
||||
# Collecter les résultats des processus en arrière-plan
|
||||
for src_vm in $ENDPOINT_VMS; do
|
||||
local result
|
||||
result=$(wait 2>/dev/null || echo "?/?")
|
||||
log " $src_vm HTTPS : $result requêtes réussies"
|
||||
local ok_count
|
||||
ok_count=$(echo "$result" | cut -d/ -f1)
|
||||
total_ok=$((total_ok + ok_count))
|
||||
result=$(tail -1 /tmp/e2e-traffic-${src_vm}.out 2>/dev/null || echo "0/${HTTPS_COUNT} 0/${HTTP_COUNT}")
|
||||
rm -f /tmp/e2e-traffic-${src_vm}.out 2>/dev/null
|
||||
|
||||
# Format: "ok_https/hits_https ok_http/hits_http"
|
||||
local https_result http_result
|
||||
https_result=$(echo "$result" | awk '{print $1}')
|
||||
http_result=$(echo "$result" | awk '{print $2}')
|
||||
local ok_https ok_http
|
||||
ok_https=$(echo "$https_result" | cut -d/ -f1)
|
||||
ok_http=$(echo "${http_result:-0/0}" | cut -d/ -f1)
|
||||
|
||||
log " $src_vm HTTPS : ${https_result} HTTP : ${http_result:-0/0}"
|
||||
total_ok=$((total_ok + ok_https + ok_http))
|
||||
done
|
||||
|
||||
# HTTP/2 massif depuis les VMs si httpx est disponible
|
||||
# ── HTTP/2 massif depuis les VMs si httpx est disponible ──
|
||||
for src_vm in $ENDPOINT_VMS; do
|
||||
if vagrant ssh "$src_vm" -- 'python3 -c "import httpx"' 2>/dev/null; then
|
||||
local src_ip="${VM_IPS_MAP[$src_vm]}"
|
||||
log "Génération HTTP/2 depuis $src_vm (${HTTPS_COUNT} requêtes, TLS=${TLS_VERSIONS}, DNS=${DNS_COUNT})..."
|
||||
local TARGET_IPS_H2=""
|
||||
for target_vm in $ENDPOINT_VMS; do
|
||||
TARGET_IPS_H2="$TARGET_IPS_H2 ${VM_IPS_MAP[$target_vm]}"
|
||||
done
|
||||
vagrant ssh "$src_vm" -- "python3 -c \"
|
||||
import httpx, ssl as _ssl, warnings, random
|
||||
|
||||
# Écrire le script httpx sur la VM (évite les problèmes d'échappement)
|
||||
vagrant ssh "$src_vm" -- "cat > /tmp/e2e-h2-traffic.py << 'PYEOF'
|
||||
import httpx, ssl as _ssl, warnings, random, os
|
||||
warnings.filterwarnings('ignore')
|
||||
paths = ['/', '/health', '/data', '/api/users', '/api/v1/status', '/login', '/api/search']
|
||||
sni_hosts = ${SNI_HOSTS[@]@Q}
|
||||
target_ips = '${TARGET_IPS_H2}'.split()
|
||||
sni_hosts = os.environ.get('SNI_HOSTS', 'platform.test').split()
|
||||
target_ips = os.environ.get('TARGET_IPS', '127.0.0.1').split()
|
||||
tls_versions = [v.strip() for v in os.environ.get('TLS_VERSIONS', '1.2,1.3').split(',')]
|
||||
uas_browser = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
||||
]
|
||||
uas_bot = ['python-httpx/0.28.1', 'Googlebot/2.1', 'Go-http-client/2.0']
|
||||
tls_versions = [v.strip() for v in '${TLS_VERSIONS}'.split(',')]
|
||||
supported = {'1.2': _ssl.TLSVersion.TLSv1_2, '1.3': _ssl.TLSVersion.TLSv1_3}
|
||||
tls_map = [supported[v] for v in tls_versions if v in supported]
|
||||
ctx = _ssl.SSLContext(_ssl.PROTOCOL_TLS_CLIENT)
|
||||
@ -482,8 +420,9 @@ ctx.verify_mode = _ssl.CERT_NONE
|
||||
if tls_map:
|
||||
ctx.minimum_version = min(tls_map)
|
||||
ctx.maximum_version = max(tls_map)
|
||||
hits = int(os.environ.get('HITS', '100'))
|
||||
with httpx.Client(http2=True, verify=ctx) as c:
|
||||
for i in range(${HTTPS_COUNT}):
|
||||
for i in range(hits):
|
||||
p = random.choice(paths)
|
||||
target = random.choice(target_ips)
|
||||
h = random.choice(sni_hosts)
|
||||
@ -492,30 +431,15 @@ with httpx.Client(http2=True, verify=ctx) as c:
|
||||
c.get(f'https://{target}' + p, headers={'User-Agent': ua, 'Host': h})
|
||||
except:
|
||||
pass
|
||||
\"" 2>/dev/null || true
|
||||
PYEOF"
|
||||
|
||||
vagrant ssh "$src_vm" -- \
|
||||
"source /tmp/e2e-traffic.env && TLS_VERSIONS='${TLS_VERSIONS}' python3 /tmp/e2e-h2-traffic.py" \
|
||||
2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
pass "HTTP/2 généré depuis tous les endpoints"
|
||||
|
||||
# Trafic HTTP (port 80) en plus pour diversifier
|
||||
if [ "${HTTP_COUNT}" -gt 0 ]; then
|
||||
log "Génération HTTP (port 80) depuis les VMs : ${HTTP_COUNT} requêtes/VM..."
|
||||
for src_vm in $ENDPOINT_VMS; do
|
||||
local ok80
|
||||
ok80=$(vagrant ssh "$src_vm" -- "
|
||||
ok=0
|
||||
for i in \$(seq 1 ${HTTP_COUNT}); do
|
||||
curl -sf http://localhost/health >/dev/null 2>&1 && ok=\$((ok + 1)) || true
|
||||
done
|
||||
echo \$ok
|
||||
" 2>/dev/null || echo "0")
|
||||
log " $src_vm HTTP : ${ok80}/${HTTP_COUNT} requêtes"
|
||||
total_ok=$((total_ok + ok80))
|
||||
done
|
||||
else
|
||||
log "HTTP (port 80) désactivé (http-ratio=0)"
|
||||
fi
|
||||
|
||||
pass "Trafic total : ${total_ok} requêtes réussies"
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user