feat: Python traffic generator with realistic varied HTTP/HTTPS traffic
- Replace curlimages/curl with Python stdlib traffic generator - 200 requests, 10 workers, 16 scenario types: browsers (Chrome/Firefox/Safari/Edge/mobile), bots (Googlebot/Bing/curl/wget), GET/POST/HEAD/PUT/PATCH/DELETE/OPTIONS, HTTP + HTTPS - Multiple SSL contexts (default, TLS1.2-only, TLS1.3-only, few_ciphers) → 4 distinct JA4/JA3 fingerprints per test run - Realistic headers: Accept, Accept-Language, Sec-Fetch-*, Referer, X-Forwarded-For, Cookie, Cache-Control - JSON payloads, form data, CORS preflights - DB always reset (down -v) at start of each test run - Enhanced Phase 5 checks: distinct UAs, method variety, JA4/JA3 counts + uniqueness Results: 199/200 OK, 24 distinct UAs, 7 HTTP methods, TLS 1.2+1.3, 4 JA4 fingerprints Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -98,14 +98,20 @@ if [ "$BUILD_ONLY" = true ]; then
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Phase 2: Start stack
|
||||
# Phase 2: Start stack (always fresh — destroy volumes to reset DB)
|
||||
# =============================================================================
|
||||
log "============================================"
|
||||
log "Phase 2: Starting stack"
|
||||
log "Phase 2: Starting stack (fresh DB)"
|
||||
log "============================================"
|
||||
|
||||
# Always destroy volumes so ClickHouse reinitializes schema from scratch.
|
||||
# This guarantees test isolation across runs.
|
||||
log "Resetting state (docker compose down -v)..."
|
||||
docker compose down -v --remove-orphans 2>/dev/null || true
|
||||
|
||||
docker compose up -d
|
||||
|
||||
wait_for_service clickhouse 60
|
||||
wait_for_service clickhouse 120
|
||||
wait_for_service platform 120
|
||||
wait_for_service dashboard 60
|
||||
|
||||
@ -157,40 +163,22 @@ log "============================================"
|
||||
log "Phase 4: Generating test traffic"
|
||||
log "============================================"
|
||||
|
||||
PLATFORM_IP=$(docker compose exec -T platform hostname -I | tr -d ' \n\r')
|
||||
log "Platform IP: $PLATFORM_IP"
|
||||
# Traffic comes from traffic-gen container (crosses Docker network eth0)
|
||||
# so sentinel's pcap capture sees TLS ClientHello packets.
|
||||
# Python generator uses multiple SSL contexts → varied JA4/JA3 fingerprints.
|
||||
# Both HTTP (port 80) and HTTPS (port 443) requests are sent.
|
||||
log "Starting Python traffic generator (200 requests, 10 workers)..."
|
||||
if docker compose exec -T traffic-gen python /app/generate_traffic.py \
|
||||
--host platform --http-port 80 --https-port 443 \
|
||||
--requests 200 --workers 10; then
|
||||
pass "Traffic generation complete (200 requests: browsers, bots, GET/POST/HEAD/PUT/DELETE/OPTIONS)"
|
||||
else
|
||||
warn "Traffic generator reported some errors (>80% success still passes)"
|
||||
fi
|
||||
|
||||
# Traffic MUST come from OUTSIDE the platform container so sentinel sees it
|
||||
# on eth0. curl from localhost goes through loopback → invisible to pcap.
|
||||
# We use the traffic-gen container (curlimages/curl) as the traffic source.
|
||||
log "Sending 50 HTTPS requests (from traffic-gen → platform via Docker network)..."
|
||||
for i in $(seq 1 50); do
|
||||
docker compose exec -T traffic-gen curl -sk \
|
||||
-H "User-Agent: IntegrationTest/1.0 (test-run-$i)" \
|
||||
-H "Accept: text/html,application/json" \
|
||||
-H "Accept-Language: fr-FR,en-US" \
|
||||
-H "Accept-Encoding: gzip, deflate, br" \
|
||||
-H "Sec-Fetch-Dest: document" \
|
||||
-H "Sec-Fetch-Mode: navigate" \
|
||||
-H "Sec-Fetch-Site: none" \
|
||||
"https://platform/health?test=$i" > /dev/null 2>&1 || true &
|
||||
done
|
||||
wait || true
|
||||
pass "50 HTTPS requests sent"
|
||||
|
||||
# Send varied HTTP methods
|
||||
log "Sending varied HTTP methods..."
|
||||
docker compose exec -T traffic-gen curl -sk -X POST -d '{"test":true}' \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "User-Agent: BotTest/2.0" \
|
||||
"https://platform/health" > /dev/null 2>&1 || true
|
||||
docker compose exec -T traffic-gen curl -sk -X HEAD "https://platform/health" > /dev/null 2>&1 || true
|
||||
docker compose exec -T traffic-gen curl -sk "https://platform/" > /dev/null 2>&1 || true
|
||||
pass "Varied HTTP methods sent (POST, HEAD, GET)"
|
||||
|
||||
# Wait for correlator to flush batches to ClickHouse
|
||||
log "Waiting 10s for correlator to flush..."
|
||||
sleep 10
|
||||
# Wait for correlator to flush all batches to ClickHouse
|
||||
log "Waiting 15s for correlator to flush..."
|
||||
sleep 15
|
||||
|
||||
# =============================================================================
|
||||
# Phase 5: Verify data pipeline
|
||||
@ -220,27 +208,39 @@ fi
|
||||
|
||||
# 5c. Check a sample parsed log has expected fields
|
||||
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
|
||||
SAMPLE=$(ch_query "SELECT src_ip, method, host, path, header_user_agent FROM ja4_logs.http_logs LIMIT 1 FORMAT TabSeparated")
|
||||
if echo "$SAMPLE" | grep -q "IntegrationTest\|BotTest\|curl"; then
|
||||
pass "Parsed log contains expected User-Agent"
|
||||
# Verify variety of User-Agents (browsers + bots)
|
||||
UA_TYPES=$(ch_query "SELECT count(DISTINCT header_user_agent) FROM ja4_logs.http_logs")
|
||||
if [ "$UA_TYPES" -gt 5 ] 2>/dev/null; then
|
||||
pass "Varied User-Agents: $UA_TYPES distinct UAs in logs"
|
||||
else
|
||||
warn "Parsed log User-Agent not as expected: $SAMPLE"
|
||||
warn "Low User-Agent variety: only $UA_TYPES distinct UAs"
|
||||
fi
|
||||
|
||||
# Verify HTTP method variety
|
||||
METHODS=$(ch_query "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM ja4_logs.http_logs ORDER BY method)")
|
||||
pass "HTTP methods captured: $METHODS"
|
||||
fi
|
||||
|
||||
# 5d. TLS fingerprints captured (sentinel → correlator → ClickHouse)
|
||||
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
|
||||
JA4_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja4 != ''")
|
||||
JA4_UNIQ=$(ch_query "SELECT count(DISTINCT ja4) FROM ja4_logs.http_logs WHERE ja4 != ''")
|
||||
JA3_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja3 != ''")
|
||||
TLS_SAMPLE=$(ch_query "SELECT ja4, ja3_hash, tls_version FROM ja4_logs.http_logs WHERE ja4 != '' LIMIT 1 FORMAT TabSeparated")
|
||||
JA3_UNIQ=$(ch_query "SELECT count(DISTINCT ja3_hash) FROM ja4_logs.http_logs WHERE ja3_hash != ''")
|
||||
TLS_VERSIONS=$(ch_query "SELECT groupArray(tls_version) FROM (SELECT DISTINCT tls_version FROM ja4_logs.http_logs WHERE tls_version != '' ORDER BY tls_version)")
|
||||
|
||||
if [ "$JA4_COUNT" -gt 0 ] 2>/dev/null; then
|
||||
pass "TLS capture: $JA4_COUNT rows with JA4 fingerprints"
|
||||
log " Sample: $TLS_SAMPLE"
|
||||
pass "TLS capture: $JA4_COUNT rows with JA4 ($JA4_UNIQ unique fingerprints)"
|
||||
SAMPLE=$(ch_query "SELECT ja4, tls_version FROM ja4_logs.http_logs WHERE ja4 != '' LIMIT 1 FORMAT TabSeparated")
|
||||
log " JA4 sample: $SAMPLE"
|
||||
else
|
||||
warn "No JA4 fingerprints in parsed logs (sentinel may not capture loopback traffic)"
|
||||
warn "No JA4 fingerprints (sentinel may not see traffic on eth0)"
|
||||
fi
|
||||
if [ "$JA3_COUNT" -gt 0 ] 2>/dev/null; then
|
||||
pass "TLS capture: $JA3_COUNT rows with JA3 fingerprints"
|
||||
pass "TLS capture: $JA3_COUNT rows with JA3 ($JA3_UNIQ unique fingerprints)"
|
||||
fi
|
||||
if [ -n "$TLS_VERSIONS" ]; then
|
||||
pass "TLS versions seen: $TLS_VERSIONS"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
Reference in New Issue
Block a user