diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index 9cbcc2a..75bb620 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -141,6 +141,21 @@ services: networks: - ja4net + # --------------------------------------------------------------------------- + # Traffic generator — lightweight container with curl for sending external + # HTTPS requests to platform. Traffic must cross the Docker network so + # sentinel (pcap on eth0) can capture TLS ClientHello packets. + # --------------------------------------------------------------------------- + traffic-gen: + image: curlimages/curl:latest + hostname: traffic-gen + entrypoint: ["sleep", "infinity"] + depends_on: + platform: + condition: service_healthy + networks: + - ja4net + networks: ja4net: driver: bridge diff --git a/tests/integration/platform/clickhouse-init.sh b/tests/integration/platform/clickhouse-init.sh index 2c45da7..f58e07f 100755 --- a/tests/integration/platform/clickhouse-init.sh +++ b/tests/integration/platform/clickhouse-init.sh @@ -27,4 +27,16 @@ for f in "$TMP_DIR"/*.sql; do clickhouse-client --multiquery < "$f" done -echo "[init] All SQL files executed successfully" +# --------------------------------------------------------------------------- +# Seed data required for dictionaries to function +# REGEXP_TREE dictionaries require at least one rule; without it, any INSERT +# into http_logs_raw fails because the MV mv_http_logs calls dictGet() on +# the empty dict. Insert a catch-all "unknown" rule so the pipeline works. +# --------------------------------------------------------------------------- +echo "[init] Seeding anubis_ua_rules (REGEXP_TREE needs ≥1 rule)..." +clickhouse-client --multiquery <<'SEED' +INSERT INTO ja4_processing.anubis_ua_rules (id, parent_id, regexp, keys, values) VALUES + (1, 0, '.*', ['bot_name','action','has_ip','rule_id','category'], ['','','0','0','']); +SEED + +echo "[init] All SQL files executed and seed data inserted" diff --git a/tests/integration/run-tests.sh b/tests/integration/run-tests.sh index 482b66e..edadbbd 100755 --- a/tests/integration/run-tests.sh +++ b/tests/integration/run-tests.sh @@ -157,13 +157,15 @@ log "============================================" log "Phase 4: Generating test traffic" log "============================================" -PLATFORM_IP=$(docker compose exec -T platform hostname -I | tr -d ' ') +PLATFORM_IP=$(docker compose exec -T platform hostname -I | tr -d ' \n\r') log "Platform IP: $PLATFORM_IP" -# Send HTTPS requests to Apache (triggers mod-reqin-log + sentinel) -log "Sending 50 HTTPS requests..." +# Traffic MUST come from OUTSIDE the platform container so sentinel sees it +# on eth0. curl from localhost goes through loopback → invisible to pcap. +# We use the traffic-gen container (curlimages/curl) as the traffic source. +log "Sending 50 HTTPS requests (from traffic-gen → platform via Docker network)..." for i in $(seq 1 50); do - docker compose exec -T platform curl -sk \ + docker compose exec -T traffic-gen curl -sk \ -H "User-Agent: IntegrationTest/1.0 (test-run-$i)" \ -H "Accept: text/html,application/json" \ -H "Accept-Language: fr-FR,en-US" \ @@ -171,19 +173,19 @@ for i in $(seq 1 50); do -H "Sec-Fetch-Dest: document" \ -H "Sec-Fetch-Mode: navigate" \ -H "Sec-Fetch-Site: none" \ - "https://localhost/health?test=$i" > /dev/null 2>&1 || true & + "https://platform/health?test=$i" > /dev/null 2>&1 || true & done wait || true pass "50 HTTPS requests sent" # Send varied HTTP methods log "Sending varied HTTP methods..." -docker compose exec -T platform curl -sk -X POST -d '{"test":true}' \ +docker compose exec -T traffic-gen curl -sk -X POST -d '{"test":true}' \ -H "Content-Type: application/json" \ -H "User-Agent: BotTest/2.0" \ - "https://localhost/health" > /dev/null 2>&1 || true -docker compose exec -T platform curl -sk -X HEAD "https://localhost/health" > /dev/null 2>&1 || true -docker compose exec -T platform curl -sk "https://localhost/" > /dev/null 2>&1 || true + "https://platform/health" > /dev/null 2>&1 || true +docker compose exec -T traffic-gen curl -sk -X HEAD "https://platform/health" > /dev/null 2>&1 || true +docker compose exec -T traffic-gen curl -sk "https://platform/" > /dev/null 2>&1 || true pass "Varied HTTP methods sent (POST, HEAD, GET)" # Wait for correlator to flush batches to ClickHouse @@ -226,7 +228,23 @@ if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then fi fi -# 5d. Check correlator log file +# 5d. TLS fingerprints captured (sentinel → correlator → ClickHouse) +if [ "$PARSED_COUNT" -gt 0 ] 2>/dev/null; then + JA4_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja4 != ''") + JA3_COUNT=$(ch_query "SELECT count() FROM ja4_logs.http_logs WHERE ja3 != ''") + TLS_SAMPLE=$(ch_query "SELECT ja4, ja3_hash, tls_version FROM ja4_logs.http_logs WHERE ja4 != '' LIMIT 1 FORMAT TabSeparated") + if [ "$JA4_COUNT" -gt 0 ] 2>/dev/null; then + pass "TLS capture: $JA4_COUNT rows with JA4 fingerprints" + log " Sample: $TLS_SAMPLE" + else + warn "No JA4 fingerprints in parsed logs (sentinel may not capture loopback traffic)" + fi + if [ "$JA3_COUNT" -gt 0 ] 2>/dev/null; then + pass "TLS capture: $JA3_COUNT rows with JA3 fingerprints" + fi +fi + +# 5e. Check correlator log file CORR_LINES=$(docker compose exec -T platform wc -l < /var/log/logcorrelator/correlated.log 2>/dev/null || echo 0) if [ "$CORR_LINES" -gt 0 ] 2>/dev/null; then pass "Correlator file output: $CORR_LINES lines in correlated.log"