Files
ja4-platform/tests/vm/generate-traffic.sh
Jacquin Antoine 36b5065a0a feat(e2e): add multi-IP endpoint architecture with dedicated traffic VM
Replace single-service-per-endpoint with all-ips mode running nginx, apache,
and hitch+varnish simultaneously on 3 dedicated IPs per VM (eth1 alias IPs).
Add a dedicated traffic VM with curl-impersonate for realistic TLS fingerprints,
parallelized traffic generation, and paired SNI_HOSTS/TARGET_IPS lists for
per-VM per-service hostname identification (e.g. rocky9-nginx-platform.test).

Key changes:
- run-tests-vm.sh: add setup_all_ips(), IP-specific Listen/bind directives
  with reset-before-apply pattern, graceful service availability checks
- run-e2e-test.sh: traffic VM architecture, all-ips mode, eth1 network,
  paired IP/SNI lists, updated cleanup for alias IPs
- generate-traffic.sh: parallel background jobs, curl-impersonate detection,
  auto source interface detection via ip route get, Host header in HTTP traffic
- Vagrantfile: add traffic VM with provision-traffic.sh
- provision-traffic.sh: install curl-impersonate and httpx for traffic gen
- test-rpm.sh: multi-interface TC check, updated ja4ebpf config
- clickhouse-init.sh: load CSV stubs for Anubis/bot-networks dictionaries
- Remove obsolete correlator/sentinel/mod-reqin-log docs
- Add h2_settings_ack column to http_logs schema
- Upgrade Go toolchain to 1.25.0

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 14:25:24 +02:00

167 lines
6.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# generate-traffic.sh — Generate HTTPS/HTTP traffic from a VM endpoint
#
# Uses curl-impersonate for realistic browser TLS fingerprints.
# Traffic is parallelized using background jobs for speed.
#
# Environment variables (from /tmp/e2e-traffic.env):
# HITS — Number of HTTPS requests (required)
# HITS_HTTP — Number of HTTP requests (default: 0)
# TARGET_IPS — Space-separated list of endpoint IPs (required)
# SNI_HOSTS — Space-separated list of SNI hostnames (required)
# TLS_FLAGS — curl TLS flags (ignored by curl-impersonate)
# SRC_IP_COUNT — Number of source IPs to rotate (default: 1)
# =============================================================================
set -uo pipefail
HITS="${HITS:-0}"
HITS_HTTP="${HITS_HTTP:-0}"
TARGET_IPS=(${TARGET_IPS:-})
SNI_HOSTS=(${SNI_HOSTS:-platform.test})
TLS_FLAGS="${TLS_FLAGS:---tlsv1.2 --tlsv1.3}"
SRC_IP_COUNT="${SRC_IP_COUNT:-1}"
if [ "$HITS" -eq 0 ] && [ "$HITS_HTTP" -eq 0 ]; then
echo "0/0"
exit 0
fi
# ── Collect source IPs from the interface that can reach the targets ──
# When targets are on the ja4-e2e network (192.168.42.x), use eth1 IPs as sources.
# Otherwise fall back to eth0.
if [ ${#TARGET_IPS[@]} -gt 0 ]; then
# Detect which interface can reach the first target
FIRST_TARGET="${TARGET_IPS[0]}"
ROUTE_IFACE=$(ip -4 route get "$FIRST_TARGET" 2>/dev/null | awk '/dev/ {for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1)
fi
# Prefer the routed interface, fall back to eth1, then eth0
if [ -n "${ROUTE_IFACE:-}" ] && [ "${ROUTE_IFACE}" != "lo" ]; then
SRC_IFACE="$ROUTE_IFACE"
elif ip -4 addr show eth1 2>/dev/null | grep -q "inet "; then
SRC_IFACE="eth1"
else
SRC_IFACE="eth0"
fi
SRC_IPS=($(ip -4 addr show "$SRC_IFACE" 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2}'))
if [ ${#SRC_IPS[@]} -eq 0 ]; then
# Fallback to eth0 if the detected interface has no IPs
SRC_IPS=($(ip -4 addr show eth0 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2}'))
fi
if [ ${#SRC_IPS[@]} -eq 0 ]; then
echo "0/${HITS}" > /dev/stderr
exit 1
fi
# ── Detect curl-impersonate ──
BROWSER_POOL=()
UA_BROWSER=()
if command -v curl-impersonate-chrome &>/dev/null; then
BROWSER_POOL+=("curl-impersonate-chrome")
UA_BROWSER+=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36")
fi
if command -v curl-impersonate-firefox &>/dev/null; then
BROWSER_POOL+=("curl-impersonate-firefox")
UA_BROWSER+=("Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0")
fi
for cmd in curl_safari17_2 curl-impersonate-safari; do
if command -v "$cmd" &>/dev/null; then
BROWSER_POOL+=("$cmd")
UA_BROWSER+=("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15")
break
fi
done
for cmd in curl_edge101 curl-impersonate-edge; do
if command -v "$cmd" &>/dev/null; then
BROWSER_POOL+=("$cmd")
UA_BROWSER+=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0")
break
fi
done
if [ ${#BROWSER_POOL[@]} -eq 0 ]; then
BROWSER_POOL+=("curl")
UA_BROWSER+=("curl/$(curl --version 2>/dev/null | head -1 | awk '{print $2}')")
fi
UA_BOT=(
"python-requests/2.32.3"
"curl/8.9.1"
"Go-http-client/2.0"
"python-httpx/0.28.1"
"Googlebot/2.1"
)
PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \
"/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \
"/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin")
ok=0
err=0
# ── HTTPS traffic (parallel background jobs) ──
# TARGET_IPS et SNI_HOSTS sont des listes appariées : même index = même cible.
# Format SNI : <vm>-<service>-<domaine> (ex: rocky9-nginx-platform.test)
# Quand les listes ont même longueur, idx % n donne le même index pour les deux.
if [ "$HITS" -gt 0 ]; then
for i in $(seq 1 "$HITS"); do
(
idx=$((i - 1))
target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}"
sni_host="${SNI_HOSTS[$((idx % ${#SNI_HOSTS[@]}))]}"
path="${PATHS[$((idx % ${#PATHS[@]}))]}"
browser_idx=$((idx % ${#BROWSER_POOL[@]}))
browser_cmd="${BROWSER_POOL[$browser_idx]}"
if [ $((idx % 10)) -lt 7 ]; then
ua="${UA_BROWSER[$browser_idx]}"
else
ua="${UA_BOT[$((idx % ${#UA_BOT[@]}))]}"
fi
resolve_flag="--resolve ${sni_host}:443:${target_ip}"
iface_flag=""
if [ ${#SRC_IPS[@]} -gt 1 ] && [ "$SRC_IP_COUNT" -gt 1 ]; then
src_ip="${SRC_IPS[$((idx % SRC_IP_COUNT))]}"
[ -n "$src_ip" ] && iface_flag="--interface ${src_ip}"
fi
case $((i % 10)) in
0|1|2|3|4) $browser_cmd -sf -k $resolve_flag $iface_flag "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;;
5|6) $browser_cmd -sf -k $resolve_flag $iface_flag -X POST "https://${sni_host}${path}" -H "User-Agent: ${ua}" -H "Content-Type: application/json" -d '{"test":1}' --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;;
7) $browser_cmd -sf -k $resolve_flag $iface_flag -X PUT "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;;
8) $browser_cmd -sf -k $resolve_flag $iface_flag -X DELETE "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;;
9) $browser_cmd -sf -k $resolve_flag $iface_flag -I "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;;
esac
) &
# Limit parallelism to 32 concurrent jobs
if [ $((i % 32)) -eq 0 ]; then
wait
fi
done
wait
# Count successes by checking if the endpoints received data
fi
# ── HTTP traffic (parallel background jobs) ──
ok_http=0
if [ "$HITS_HTTP" -gt 0 ]; then
for i in $(seq 1 "$HITS_HTTP"); do
(
idx=$((i - 1))
target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}"
sni_host="${SNI_HOSTS[$((idx % ${#SNI_HOSTS[@]}))]}"
path="${PATHS[$((idx % ${#PATHS[@]}))]}"
curl -sf "http://${target_ip}${path}" -H "Host: ${sni_host}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1
) &
if [ $((i % 32)) -eq 0 ]; then
wait
fi
done
wait
fi
# Approximate: report total hits minus errors (we can't count individual results in parallel)
echo "${HITS}/${HITS} ${HITS_HTTP}/${HITS_HTTP}"