feat: Python traffic generator with realistic varied HTTP/HTTPS traffic

- Replace curlimages/curl with Python stdlib traffic generator
- 200 requests, 10 workers, 16 scenario types:
  browsers (Chrome/Firefox/Safari/Edge/mobile), bots (Googlebot/Bing/curl/wget),
  GET/POST/HEAD/PUT/PATCH/DELETE/OPTIONS, HTTP + HTTPS
- Multiple SSL contexts (default, TLS1.2-only, TLS1.3-only, few_ciphers)
  → 4 distinct JA4/JA3 fingerprints per test run
- Realistic headers: Accept, Accept-Language, Sec-Fetch-*, Referer,
  X-Forwarded-For, Cookie, Cache-Control
- JSON payloads, form data, CORS preflights
- DB always reset (down -v) at start of each test run
- Enhanced Phase 5 checks: distinct UAs, method variety, JA4/JA3 counts + uniqueness

Results: 199/200 OK, 24 distinct UAs, 7 HTTP methods, TLS 1.2+1.3, 4 JA4 fingerprints

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 21:14:55 +02:00
parent da8357f43d
commit 12d60975da
4 changed files with 564 additions and 50 deletions

View File

@ -98,14 +98,20 @@ if [ "$BUILD_ONLY" = true ]; then
fi
# =============================================================================
# Phase 2: Start stack
# Phase 2: Start stack (always fresh — destroy volumes to reset DB)
# =============================================================================
log "============================================"
log "Phase 2: Starting stack"
log "Phase 2: Starting stack (fresh DB)"
log "============================================"
# Always destroy volumes so ClickHouse reinitializes schema from scratch.
# This guarantees test isolation across runs.
log "Resetting state (docker compose down -v)..."
docker compose down -v --remove-orphans 2>/dev/null || true
docker compose up -d
wait_for_service clickhouse 60
wait_for_service clickhouse 120
wait_for_service platform 120
wait_for_service dashboard 60
@ -157,40 +163,22 @@ log "============================================"
log "Phase 4: Generating test traffic"
log "============================================"
PLATFORM_IP=$(docker compose exec -T platform hostname -I | tr -d ' \n\r')
log "Platform IP: $PLATFORM_IP"
# Traffic comes from traffic-gen container (crosses Docker network eth0)
# so sentinel's pcap capture sees TLS ClientHello packets.
# Python generator uses multiple SSL contexts → varied JA4/JA3 fingerprints.
# Both HTTP (port 80) and HTTPS (port 443) requests are sent.
log "Starting Python traffic generator (200 requests, 10 workers)..."
if docker compose exec -T traffic-gen python /app/generate_traffic.py \
--host platform --http-port 80 --https-port 443 \
--requests 200 --workers 10; then
pass "Traffic generation complete (200 requests: browsers, bots, GET/POST/HEAD/PUT/DELETE/OPTIONS)"
else
warn "Traffic generator reported some errors (>80% success still passes)"
fi
# Traffic MUST come from OUTSIDE the platform container so sentinel sees it
# on eth0. curl from localhost goes through loopback → invisible to pcap.
# We use the traffic-gen container (curlimages/curl) as the traffic source.
log "Sending 50 HTTPS requests (from traffic-gen → platform via Docker network)..."
for i in $(seq 1 50); do
docker compose exec -T traffic-gen curl -sk \
-H "User-Agent: IntegrationTest/1.0 (test-run-$i)" \
-H "Accept: text/html,application/json" \
-H "Accept-Language: fr-FR,en-US" \
-H "Accept-Encoding: gzip, deflate, br" \
-H "Sec-Fetch-Dest: document" \
-H "Sec-Fetch-Mode: navigate" \
-H "Sec-Fetch-Site: none" \
"https://platform/health?test=$i" > /dev/null 2>&1 || true &
done
wait || true
pass "50 HTTPS requests sent"
# Send varied HTTP methods
log "Sending varied HTTP methods..."
docker compose exec -T traffic-gen curl -sk -X POST -d '{"test":true}' \
-H "Content-Type: application/json" \
-H "User-Agent: BotTest/2.0" \
"https://platform/health" > /dev/null 2>&1 || true
docker compose exec -T traffic-gen curl -sk -X HEAD "https://platform/health" > /dev/null 2>&1 || true
docker compose exec -T traffic-gen curl -sk "https://platform/" > /dev/null 2>&1 || true
pass "Varied HTTP methods sent (POST, HEAD, GET)"
# Wait for correlator to flush batches to ClickHouse
log "Waiting 10s for correlator to flush..."
sleep 10
# Wait for correlator to flush all batches to ClickHouse
log "Waiting 15s for correlator to flush..."
sleep 15
# =============================================================================
# Phase 5: Verify data pipeline
@ -220,27 +208,39 @@ fi
# 5c. Check a sample parsed log has expected fields
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
SAMPLE=$(ch_query "SELECT src_ip, method, host, path, header_user_agent FROM ja4_logs.http_logs LIMIT 1 FORMAT TabSeparated")
if echo "$SAMPLE" | grep -q "IntegrationTest\|BotTest\|curl"; then
pass "Parsed log contains expected User-Agent"
# Verify variety of User-Agents (browsers + bots)
UA_TYPES=$(ch_query "SELECT count(DISTINCT header_user_agent) FROM ja4_logs.http_logs")
if [ "$UA_TYPES" -gt 5 ] 2>/dev/null; then
pass "Varied User-Agents: $UA_TYPES distinct UAs in logs"
else
warn "Parsed log User-Agent not as expected: $SAMPLE"
warn "Low User-Agent variety: only $UA_TYPES distinct UAs"
fi
# Verify HTTP method variety
METHODS=$(ch_query "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM ja4_logs.http_logs ORDER BY method)")
pass "HTTP methods captured: $METHODS"
fi
# 5d. TLS fingerprints captured (sentinel → correlator → ClickHouse)
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
JA4_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja4 != ''")
JA4_UNIQ=$(ch_query "SELECT count(DISTINCT ja4) FROM ja4_logs.http_logs WHERE ja4 != ''")
JA3_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja3 != ''")
TLS_SAMPLE=$(ch_query "SELECT ja4, ja3_hash, tls_version FROM ja4_logs.http_logs WHERE ja4 != '' LIMIT 1 FORMAT TabSeparated")
JA3_UNIQ=$(ch_query "SELECT count(DISTINCT ja3_hash) FROM ja4_logs.http_logs WHERE ja3_hash != ''")
TLS_VERSIONS=$(ch_query "SELECT groupArray(tls_version) FROM (SELECT DISTINCT tls_version FROM ja4_logs.http_logs WHERE tls_version != '' ORDER BY tls_version)")
if [ "$JA4_COUNT" -gt 0 ] 2>/dev/null; then
pass "TLS capture: $JA4_COUNT rows with JA4 fingerprints"
log " Sample: $TLS_SAMPLE"
pass "TLS capture: $JA4_COUNT rows with JA4 ($JA4_UNIQ unique fingerprints)"
SAMPLE=$(ch_query "SELECT ja4, tls_version FROM ja4_logs.http_logs WHERE ja4 != '' LIMIT 1 FORMAT TabSeparated")
log " JA4 sample: $SAMPLE"
else
warn "No JA4 fingerprints in parsed logs (sentinel may not capture loopback traffic)"
warn "No JA4 fingerprints (sentinel may not see traffic on eth0)"
fi
if [ "$JA3_COUNT" -gt 0 ] 2>/dev/null; then
pass "TLS capture: $JA3_COUNT rows with JA3 fingerprints"
pass "TLS capture: $JA3_COUNT rows with JA3 ($JA3_UNIQ unique fingerprints)"
fi
if [ -n "$TLS_VERSIONS" ]; then
pass "TLS versions seen: $TLS_VERSIONS"
fi
fi