- Add traffic-gen container (curlimages/curl) to send HTTPS traffic across Docker network so sentinel (pcap on eth0) captures ClientHello - Seed anubis_ua_rules with catch-all rule (REGEXP_TREE needs ≥1 entry) so MV mv_http_logs processes raw logs without errors - Add JA4/JA3 fingerprint verification in Phase 5 tests - Dashboard healthcheck via python urllib (no curl in image) Results: 59 raw logs, 59 parsed, 53 with JA4+JA3 fingerprints (TLS 1.3) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
359 lines
13 KiB
Bash
Executable File
359 lines
13 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# run-tests.sh — Full-stack integration test for ja4-platform
|
|
#
|
|
# Starts the entire pipeline in Docker Compose, generates traffic, and verifies
|
|
# data flows end-to-end: Apache → mod-reqin-log → correlator → ClickHouse
|
|
# sentinel ↗ ↓
|
|
# bot-detector → ML scores
|
|
# dashboard API ← query
|
|
#
|
|
# Usage:
|
|
# ./run-tests.sh # run tests (build + up + test + down)
|
|
# ./run-tests.sh --no-down # keep stack running after tests (for debugging)
|
|
# ./run-tests.sh --build-only # build images only, don't run tests
|
|
# =============================================================================
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
cd "$SCRIPT_DIR"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
KEEP_UP=false
|
|
BUILD_ONLY=false
|
|
TESTS_PASSED=0
|
|
TESTS_FAILED=0
|
|
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--no-down) KEEP_UP=true ;;
|
|
--build-only) BUILD_ONLY=true ;;
|
|
esac
|
|
done
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
log() { echo -e "${CYAN}[test]${NC} $(date +%H:%M:%S) $*"; }
|
|
pass() { echo -e "${GREEN} ✓ $*${NC}"; TESTS_PASSED=$((TESTS_PASSED + 1)); }
|
|
fail() { echo -e "${RED} ✗ $*${NC}"; TESTS_FAILED=$((TESTS_FAILED + 1)); }
|
|
warn() { echo -e "${YELLOW} ⚠ $*${NC}"; }
|
|
|
|
cleanup() {
|
|
if [ "$KEEP_UP" = false ]; then
|
|
log "Tearing down stack..."
|
|
docker compose down -v --remove-orphans 2>/dev/null || true
|
|
else
|
|
log "Stack left running (--no-down). Stop with: docker compose down -v"
|
|
fi
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
ch_query() {
|
|
docker compose exec -T clickhouse clickhouse-client --query "$1" 2>/dev/null
|
|
}
|
|
|
|
wait_for_service() {
|
|
local service="$1"
|
|
local max_wait="${2:-120}"
|
|
log "Waiting for $service to be healthy (max ${max_wait}s)..."
|
|
local elapsed=0
|
|
while [ $elapsed -lt "$max_wait" ]; do
|
|
local status
|
|
status=$(docker compose ps --format json "$service" 2>/dev/null | python3 -c "
|
|
import sys, json
|
|
for line in sys.stdin:
|
|
d = json.loads(line)
|
|
print(d.get('Health','unknown'))
|
|
" 2>/dev/null || echo "unknown")
|
|
if [ "$status" = "healthy" ]; then
|
|
log "$service is healthy (${elapsed}s)"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
elapsed=$((elapsed + 2))
|
|
done
|
|
log "ERROR: $service not healthy after ${max_wait}s"
|
|
docker compose logs --tail=30 "$service"
|
|
return 1
|
|
}
|
|
|
|
# =============================================================================
|
|
# Phase 1: Build
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 1: Building images"
|
|
log "============================================"
|
|
docker compose build --parallel 2>&1 | tail -20
|
|
|
|
if [ "$BUILD_ONLY" = true ]; then
|
|
log "Build complete (--build-only). Exiting."
|
|
exit 0
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Phase 2: Start stack
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 2: Starting stack"
|
|
log "============================================"
|
|
docker compose up -d
|
|
|
|
wait_for_service clickhouse 60
|
|
wait_for_service platform 120
|
|
wait_for_service dashboard 60
|
|
|
|
# Give bot-detector time to start (it's expected to fail initially — no data yet)
|
|
log "Waiting 10s for bot-detector to initialize..."
|
|
sleep 10
|
|
|
|
# =============================================================================
|
|
# Phase 3: Verify ClickHouse schema
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 3: Verifying ClickHouse schema"
|
|
log "============================================"
|
|
|
|
# Check databases exist
|
|
DB_COUNT=$(ch_query "SELECT count() FROM system.databases WHERE name IN ('ja4_logs','ja4_processing')")
|
|
if [ "$DB_COUNT" = "2" ]; then
|
|
pass "Both databases created (ja4_logs, ja4_processing)"
|
|
else
|
|
fail "Expected 2 databases, got $DB_COUNT"
|
|
fi
|
|
|
|
# Check key tables
|
|
for table in "ja4_logs.http_logs_raw" "ja4_logs.http_logs" "ja4_processing.ml_detected_anomalies" "ja4_processing.agg_host_ip_ja4_1h"; do
|
|
db=$(echo "$table" | cut -d. -f1)
|
|
tbl=$(echo "$table" | cut -d. -f2)
|
|
EXISTS=$(ch_query "SELECT count() FROM system.tables WHERE database='$db' AND name='$tbl'")
|
|
if [ "$EXISTS" = "1" ]; then
|
|
pass "Table $table exists"
|
|
else
|
|
fail "Table $table missing"
|
|
fi
|
|
done
|
|
|
|
# Check users
|
|
for user in data_writer analyst; do
|
|
EXISTS=$(ch_query "SELECT count() FROM system.users WHERE name='$user'")
|
|
if [ "$EXISTS" = "1" ]; then
|
|
pass "User '$user' created"
|
|
else
|
|
fail "User '$user' missing"
|
|
fi
|
|
done
|
|
|
|
# =============================================================================
|
|
# Phase 4: Generate test traffic
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 4: Generating test traffic"
|
|
log "============================================"
|
|
|
|
PLATFORM_IP=$(docker compose exec -T platform hostname -I | tr -d ' \n\r')
|
|
log "Platform IP: $PLATFORM_IP"
|
|
|
|
# Traffic MUST come from OUTSIDE the platform container so sentinel sees it
|
|
# on eth0. curl from localhost goes through loopback → invisible to pcap.
|
|
# We use the traffic-gen container (curlimages/curl) as the traffic source.
|
|
log "Sending 50 HTTPS requests (from traffic-gen → platform via Docker network)..."
|
|
for i in $(seq 1 50); do
|
|
docker compose exec -T traffic-gen curl -sk \
|
|
-H "User-Agent: IntegrationTest/1.0 (test-run-$i)" \
|
|
-H "Accept: text/html,application/json" \
|
|
-H "Accept-Language: fr-FR,en-US" \
|
|
-H "Accept-Encoding: gzip, deflate, br" \
|
|
-H "Sec-Fetch-Dest: document" \
|
|
-H "Sec-Fetch-Mode: navigate" \
|
|
-H "Sec-Fetch-Site: none" \
|
|
"https://platform/health?test=$i" > /dev/null 2>&1 || true &
|
|
done
|
|
wait || true
|
|
pass "50 HTTPS requests sent"
|
|
|
|
# Send varied HTTP methods
|
|
log "Sending varied HTTP methods..."
|
|
docker compose exec -T traffic-gen curl -sk -X POST -d '{"test":true}' \
|
|
-H "Content-Type: application/json" \
|
|
-H "User-Agent: BotTest/2.0" \
|
|
"https://platform/health" > /dev/null 2>&1 || true
|
|
docker compose exec -T traffic-gen curl -sk -X HEAD "https://platform/health" > /dev/null 2>&1 || true
|
|
docker compose exec -T traffic-gen curl -sk "https://platform/" > /dev/null 2>&1 || true
|
|
pass "Varied HTTP methods sent (POST, HEAD, GET)"
|
|
|
|
# Wait for correlator to flush batches to ClickHouse
|
|
log "Waiting 10s for correlator to flush..."
|
|
sleep 10
|
|
|
|
# =============================================================================
|
|
# Phase 5: Verify data pipeline
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 5: Verifying data pipeline"
|
|
log "============================================"
|
|
|
|
# 5a. Raw logs ingested
|
|
RAW_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs_raw")
|
|
if [ "$RAW_COUNT" -gt 0 ] 2>/dev/null; then
|
|
pass "Raw logs ingested: $RAW_COUNT rows in http_logs_raw"
|
|
else
|
|
fail "No raw logs in http_logs_raw (correlator → ClickHouse failed)"
|
|
# Debug
|
|
log "Correlator logs:"
|
|
docker compose logs --tail=30 platform 2>&1 | grep -i "correlator\|error\|clickhouse" | head -20
|
|
fi
|
|
|
|
# 5b. Parsed logs via materialized view
|
|
PARSED_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs")
|
|
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
|
|
pass "Parsed logs: $PARSED_COUNT rows in http_logs (MV working)"
|
|
else
|
|
warn "No parsed logs in http_logs (MV may need INSERT trigger, or dict loading failed)"
|
|
fi
|
|
|
|
# 5c. Check a sample parsed log has expected fields
|
|
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
|
|
SAMPLE=$(ch_query "SELECT src_ip, method, host, path, header_user_agent FROM ja4_logs.http_logs LIMIT 1 FORMAT TabSeparated")
|
|
if echo "$SAMPLE" | grep -q "IntegrationTest\|BotTest\|curl"; then
|
|
pass "Parsed log contains expected User-Agent"
|
|
else
|
|
warn "Parsed log User-Agent not as expected: $SAMPLE"
|
|
fi
|
|
fi
|
|
|
|
# 5d. TLS fingerprints captured (sentinel → correlator → ClickHouse)
|
|
if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then
|
|
JA4_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja4 != ''")
|
|
JA3_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja3 != ''")
|
|
TLS_SAMPLE=$(ch_query "SELECT ja4, ja3_hash, tls_version FROM ja4_logs.http_logs WHERE ja4 != '' LIMIT 1 FORMAT TabSeparated")
|
|
if [ "$JA4_COUNT" -gt 0 ] 2>/dev/null; then
|
|
pass "TLS capture: $JA4_COUNT rows with JA4 fingerprints"
|
|
log " Sample: $TLS_SAMPLE"
|
|
else
|
|
warn "No JA4 fingerprints in parsed logs (sentinel may not capture loopback traffic)"
|
|
fi
|
|
if [ "$JA3_COUNT" -gt 0 ] 2>/dev/null; then
|
|
pass "TLS capture: $JA3_COUNT rows with JA3 fingerprints"
|
|
fi
|
|
fi
|
|
|
|
# 5e. Check correlator log file
|
|
CORR_LINES=$(docker compose exec -T platform wc -l < /var/log/logcorrelator/correlated.log 2>/dev/null || echo 0)
|
|
if [ "$CORR_LINES" -gt 0 ] 2>/dev/null; then
|
|
pass "Correlator file output: $CORR_LINES lines in correlated.log"
|
|
else
|
|
warn "Correlator file output empty"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Phase 6: Verify dashboard API
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 6: Verifying dashboard API"
|
|
log "============================================"
|
|
|
|
# Health check (dashboard has no curl, use python urllib)
|
|
HEALTH=$(docker compose exec -T dashboard python -c "
|
|
import urllib.request, json
|
|
r = urllib.request.urlopen('http://localhost:8000/health')
|
|
print(json.loads(r.read()).get('status',''))
|
|
" 2>/dev/null || echo "FAIL")
|
|
if [ "$HEALTH" = "healthy" ] || [ "$HEALTH" = "ok" ]; then
|
|
pass "Dashboard /health returns $HEALTH"
|
|
else
|
|
fail "Dashboard /health failed: $HEALTH"
|
|
fi
|
|
|
|
# Metrics endpoint
|
|
METRICS_STATUS=$(docker compose exec -T dashboard python -c "
|
|
import urllib.request
|
|
try:
|
|
r = urllib.request.urlopen('http://localhost:8000/api/metrics')
|
|
print(r.status)
|
|
except urllib.error.HTTPError as e:
|
|
print(e.code)
|
|
except Exception:
|
|
print(0)
|
|
" 2>/dev/null || echo "000")
|
|
if [ "$METRICS_STATUS" = "200" ] || [ "$METRICS_STATUS" = "404" ]; then
|
|
pass "Dashboard /api/metrics responds (HTTP $METRICS_STATUS)"
|
|
else
|
|
fail "Dashboard /api/metrics failed (HTTP $METRICS_STATUS)"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Phase 7: Verify bot-detector
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 7: Verifying bot-detector"
|
|
log "============================================"
|
|
|
|
BOT_STATUS=$(docker compose ps --format json bot-detector 2>/dev/null | python3 -c "
|
|
import sys, json
|
|
for line in sys.stdin:
|
|
d = json.loads(line)
|
|
print(d.get('State','unknown'))
|
|
" 2>/dev/null || echo "unknown")
|
|
|
|
if [ "$BOT_STATUS" = "running" ]; then
|
|
pass "Bot-detector is running"
|
|
else
|
|
warn "Bot-detector state: $BOT_STATUS (may need more data to start properly)"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Phase 8: Network capture verification (sentinel)
|
|
# =============================================================================
|
|
log "============================================"
|
|
log "Phase 8: Verifying sentinel capture"
|
|
log "============================================"
|
|
|
|
SENTINEL_RUNNING=$(docker compose exec -T platform pgrep -x sentinel > /dev/null 2>&1 && echo "yes" || echo "no")
|
|
if [ "$SENTINEL_RUNNING" = "yes" ]; then
|
|
pass "Sentinel process is running"
|
|
else
|
|
fail "Sentinel process not found"
|
|
docker compose logs --tail=10 platform 2>&1 | grep -i sentinel | head -5
|
|
fi
|
|
|
|
# Check sentinel log output
|
|
SENTINEL_LOG=$(docker compose exec -T platform cat /var/log/ja4sentinel/sentinel.log 2>/dev/null | head -5 || echo "")
|
|
if [ -n "$SENTINEL_LOG" ]; then
|
|
pass "Sentinel producing log output"
|
|
else
|
|
warn "No sentinel log file found (may be logging to stdout only)"
|
|
fi
|
|
|
|
# =============================================================================
|
|
# Summary
|
|
# =============================================================================
|
|
echo ""
|
|
log "============================================"
|
|
log "RESULTS"
|
|
log "============================================"
|
|
TOTAL=$((TESTS_PASSED + TESTS_FAILED))
|
|
echo -e " ${GREEN}Passed: $TESTS_PASSED${NC} / $TOTAL"
|
|
if [ "$TESTS_FAILED" -gt 0 ]; then
|
|
echo -e " ${RED}Failed: $TESTS_FAILED${NC} / $TOTAL"
|
|
fi
|
|
echo ""
|
|
|
|
if [ "$TESTS_FAILED" -gt 0 ]; then
|
|
log "Some tests failed. Use --no-down to keep the stack running for debugging."
|
|
log "Debug commands:"
|
|
log " docker compose logs platform"
|
|
log " docker compose exec platform cat /var/log/logcorrelator/correlated.log"
|
|
log " docker compose exec clickhouse clickhouse-client -q 'SELECT * FROM ja4_logs.http_logs_raw LIMIT 5'"
|
|
exit 1
|
|
else
|
|
log "All tests passed!"
|
|
exit 0
|
|
fi
|