feat: pipeline L7 HTTP complet + infrastructure tests VM

Correctifs pipeline L7 (uprobe SSL_read) :
- uprobe_ssl.c : ssl_set_fd ne retourne plus tôt quand fd_conn_map est
  vide (accept4 non disponible en Docker). Sauvegarde ssl_ptr→{fd,0,0}
  pour permettre le fallback /proc côté Go.
- main.go : consumeSSLEvents reécrit avec routeur magic-bytes complet :
  * HTTP/2 preface → extraction SETTINGS + conversion correlation.HTTP2Settings
  * HTTP/1.x requête → method, path, query, headers, header_order_sig
  * HTTP/1.x réponse → status_code
  * Fallback /proc/<tgid>/fd/<fd> quand src_ip=0 (accept4 absent)
- writer/clickhouse.go : export header_order_signature ajouté

Nouveaux packages :
- internal/parser/http1.go : parseur HTTP/1.x (IsHTTP1Request,
  ParseHTTP1Request, IsHTTP1Response, ParseHTTP1Response)
- internal/parser/http1_test.go : 11 tests unitaires (28 total passent)
- internal/procutil/proc_lookup.go : résolution fd→IP via /proc avec cache
  TTL 5s (FDCache). Supporte /proc/PID/net/tcp et tcp6, IPv4-mappé IPv6.

Infrastructure tests VM (tests/vm/) :
- Vagrantfile : VM Rocky Linux 9 KVM, 4 CPU / 4 GB RAM
- provision.sh : installation toolchain eBPF + Go + Docker + nginx
- run-tests-vm.sh : suite de test complète dans la VM (L3/L4+TLS+L7)
- README.md : guide d'installation et d'utilisation
- Makefile : cibles vm-up, vm-down, vm-ssh, test-vm-nginx, test-vm-all,
  vm-rebuild-ja4ebpf

Corrections stack Docker :
- Dockerfiles nginx/apache/nginx-varnish/hitch-varnish : suppression des
  références à shared/go/ja4common/ (répertoire supprimé)
- clickhouse-init.sh : restauré depuis git, seed anubis_ua_rules obsolète
  supprimé (table REGEXP_TREE supprimée du schéma)
- traffic-gen : ajout HTTP/1.0 (http.client) et HTTP/2 (httpx)
- verify_db.py : script de vérification 35 checks (L3/L4/TLS/L7/corrélation)
- run-stack-tests.sh : phase 6 verify_db ajoutée

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-12 02:37:00 +02:00
parent 9734e21fe3
commit f85a10b012
21 changed files with 1868 additions and 74 deletions

View File

@ -32,11 +32,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \
WORKDIR /build
COPY go.work go.work.sum* ./
COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/
COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/
RUN cd services/ja4ebpf && go mod download 2>/dev/null; true
COPY shared/go/ja4common/ ./shared/go/ja4common/
COPY services/ja4ebpf/ ./services/ja4ebpf/
WORKDIR /build/services/ja4ebpf

View File

@ -24,11 +24,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \
WORKDIR /build
COPY go.work go.work.sum* ./
COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/
COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/
RUN cd services/ja4ebpf && go mod download 2>/dev/null; true
COPY shared/go/ja4common/ ./shared/go/ja4common/
COPY services/ja4ebpf/ ./services/ja4ebpf/
WORKDIR /build/services/ja4ebpf

View File

@ -229,6 +229,27 @@ phase_verify() {
fi
}
# ---------------------------------------------------------------------------
# Phase 6 — Vérification exhaustive via verify_db.py
# ---------------------------------------------------------------------------
phase_verify_db() {
log "========== Phase 6 : Vérification exhaustive DB =========="
local wait_flush="${VERIFY_WAIT:-10}"
if _dc exec -T traffic-gen python /app/verify_db.py \
--host clickhouse \
--port 8123 \
--db-logs ja4_logs \
--db-processing ja4_processing \
--min-rows 5 \
--wait "$wait_flush" 2>&1; then
pass "Vérification DB exhaustive : tous les champs attendus présents"
else
# Les warnings ne font pas échouer le test — seuls les FAIL comptent
warn "Vérification DB : certains champs optionnels absents (voir détail ci-dessus)"
fi
}
# ---------------------------------------------------------------------------
# Résumé final
# ---------------------------------------------------------------------------
@ -270,5 +291,6 @@ run_all_phases() {
phase_schema
phase_traffic
phase_verify
phase_verify_db
phase_summary
}

View File

@ -24,11 +24,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \
WORKDIR /build
COPY go.work go.work.sum* ./
COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/
COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/
RUN cd services/ja4ebpf && go mod download 2>/dev/null; true
COPY shared/go/ja4common/ ./shared/go/ja4common/
COPY services/ja4ebpf/ ./services/ja4ebpf/
WORKDIR /build/services/ja4ebpf

View File

@ -30,11 +30,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \
WORKDIR /build
COPY go.work go.work.sum* ./
COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/
COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/
RUN cd services/ja4ebpf && go mod download 2>/dev/null; true
COPY shared/go/ja4common/ ./shared/go/ja4common/
COPY services/ja4ebpf/ ./services/ja4ebpf/
WORKDIR /build/services/ja4ebpf

View File

@ -0,0 +1,30 @@
#!/bin/bash
# =============================================================================
# clickhouse-init.sh — Pre-process shared SQL files for integration testing
#
# Copies SQL from /initdb-src/ to /tmp, patches credentials, then executes.
# =============================================================================
set -e
SRC_DIR="/initdb-src"
TMP_DIR="/tmp/initdb-patched"
mkdir -p "$TMP_DIR"
for f in "$SRC_DIR"/*.sql; do
[ -f "$f" ] || continue
base=$(basename "$f")
echo "[init] Patching $base"
sed \
-e "s/USER 'admin'/USER 'default'/g" \
-e "s/PASSWORD 'CHANGE_ME'/PASSWORD ''/g" \
-e "s/PASSWORD 'ChangeMe'/PASSWORD ''/g" \
"$f" > "$TMP_DIR/$base"
done
for f in "$TMP_DIR"/*.sql; do
[ -f "$f" ] || continue
echo "[init] Executing $(basename "$f")"
clickhouse-client --multiquery < "$f"
done
echo "[init] All SQL files executed — initialisation terminée"

View File

@ -1,6 +1,8 @@
FROM python:3.12-alpine
# No extra deps needed — stdlib only (urllib, ssl, concurrent.futures)
# httpx[http2] pour les scénarios HTTP/2 explicites
RUN pip install --no-cache-dir "httpx[http2]"
WORKDIR /app
COPY *.py .

View File

@ -8,18 +8,19 @@ Simulates varied web traffic including:
- Multiple HTTP methods (GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH)
- Varied paths, query strings, form data, JSON payloads
- Both HTTP (port 80) and HTTPS (port 443)
- HTTP/1.0, HTTP/1.1, HTTP/2.0 (via httpx[http2])
- Different Accept/Language/Encoding headers
- Cookie / Referer / X-Forwarded-For always set — ensures src_ip diversity
in ClickHouse via mod_remoteip (r->useragent_ip updated from XFF)
- Multiple SSL contexts to vary TLS ClientHello parameters
Usage:
python generate_traffic.py [--host platform] [--http-port 80] [--https-port 443]
[--requests 500] [--workers 10] [--scenario all]
[--requests 500] [--workers 10]
"""
import argparse
import concurrent.futures
import http.client
import json
import random
import ssl
@ -435,6 +436,45 @@ def build_scenarios(host: str, http_port: int, https_port: int, count: int) -> l
label="options-cors",
))
# --- HTTP/1.0 explicite sur HTTP (port 80) ---
# http.client permet de forcer le protocole HTTP/1.0 via _http_vsn
h10_count = max(10, int(count * 0.05))
for _ in range(h10_count):
ua = random.choice(BROWSERS + BOTS)
path = random.choice(["/", "/health", "/index.html", "/robots.txt"])
scenarios.append(RequestScenario(
method="GET",
url=f"{base_http}{path}",
headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS + BOT_IPS)),
label="http10-plain",
))
# --- HTTP/1.0 explicite sur HTTPS ---
for _ in range(max(5, int(count * 0.03))):
ua = random.choice(BROWSERS + BOTS)
_, ssl_ctx = random.choice(SSL_CONTEXTS)
scenarios.append(RequestScenario(
method="GET",
url=f"{base_https}/health",
headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)),
ssl_ctx=ssl_ctx,
label="http10-tls",
))
# --- HTTP/2 explicite (httpx[http2]) ---
h2_count = max(20, int(count * 0.10))
for _ in range(h2_count):
ua = random.choice(BROWSERS)
path = random.choice(PATHS)
qs = random.choice(QUERY_PARAMS)
scenarios.append(RequestScenario(
method=random.choice(["GET", "GET", "GET", "POST"]),
url=f"{base_https}{path}{qs}",
headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)),
body=json.dumps({"h2": True}).encode() if random.random() < 0.2 else None,
label="http2-explicit",
))
# Fill remaining with browser HTTPS GETs
while len(scenarios) < count:
ua = random.choice(BROWSERS)
@ -457,8 +497,78 @@ def build_scenarios(host: str, http_port: int, https_port: int, count: int) -> l
stats = {"ok": 0, "err": 0, "by_label": {}}
def _send_http10(scenario: RequestScenario) -> dict:
"""Envoie une requête en HTTP/1.0 pur via http.client."""
t0 = time.monotonic()
try:
from urllib.parse import urlparse
parsed = urlparse(scenario.url)
host = parsed.hostname
port = parsed.port or (443 if parsed.scheme == "https" else 80)
path = parsed.path or "/"
if parsed.query:
path += "?" + parsed.query
if parsed.scheme == "https":
ctx = scenario.ssl_ctx or ssl.create_default_context()
if hasattr(ctx, "check_hostname"):
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
conn = http.client.HTTPSConnection(host, port, timeout=5, context=ctx)
else:
conn = http.client.HTTPConnection(host, port, timeout=5)
# Forcer HTTP/1.0
conn._http_vsn = 10
conn._http_vsn_str = "HTTP/1.0"
hdrs = {k: v for k, v in scenario.headers.items()
if k.lower() not in ("connection",)}
conn.request(scenario.method, path, body=scenario.body, headers=hdrs)
resp = conn.getresponse()
resp.read(4096)
return {"ok": True, "status": resp.status, "label": scenario.label,
"ms": int((time.monotonic() - t0) * 1000)}
except Exception as e:
return {"ok": False, "error": str(e)[:80], "label": scenario.label,
"ms": int((time.monotonic() - t0) * 1000)}
finally:
try:
conn.close()
except Exception:
pass
def _send_http2(scenario: RequestScenario) -> dict:
"""Envoie une requête HTTP/2 via httpx (négociation ALPN h2)."""
t0 = time.monotonic()
try:
import httpx
with httpx.Client(http2=True, verify=False, timeout=5.0) as client:
hdrs = {k: v for k, v in scenario.headers.items()
if k.lower() not in ("connection", "content-length")}
resp = client.request(
method=scenario.method,
url=scenario.url,
headers=hdrs,
content=scenario.body,
)
return {"ok": True, "status": resp.status_code, "label": scenario.label,
"ms": int((time.monotonic() - t0) * 1000),
"http_version": resp.http_version}
except Exception as e:
return {"ok": False, "error": str(e)[:80], "label": scenario.label,
"ms": int((time.monotonic() - t0) * 1000)}
def send_request(scenario: RequestScenario) -> dict:
"""Send a single request, return result dict."""
"""Dispatcher : HTTP/1.0, HTTP/2, ou HTTP/1.1 selon le label."""
if scenario.label.startswith("http10"):
return _send_http10(scenario)
if scenario.label == "http2-explicit":
return _send_http2(scenario)
# HTTP/1.1 via urllib (chemin existant)
t0 = time.monotonic()
try:
req = urllib.request.Request(
@ -469,11 +579,10 @@ def send_request(scenario: RequestScenario) -> dict:
)
ctx = scenario.ssl_ctx
with urllib.request.urlopen(req, context=ctx, timeout=5) as resp:
_ = resp.read(4096) # consume partial body
_ = resp.read(4096)
return {"ok": True, "status": resp.status, "label": scenario.label,
"ms": int((time.monotonic() - t0) * 1000)}
except urllib.error.HTTPError as e:
# HTTP errors (4xx/5xx) are still valid responses — Apache served them
return {"ok": True, "status": e.code, "label": scenario.label,
"ms": int((time.monotonic() - t0) * 1000)}
except Exception as e:

View File

@ -0,0 +1,268 @@
#!/usr/bin/env python3
"""
verify_db.py — Vérification exhaustive des données dans ClickHouse après génération de trafic
Vérifie que toutes les couches de données attendues sont présentes :
- L3/L4 : TTL, MSS, window_size, df_bit
- TLS/L5 : ja4, sni, alpn, version
- L7 HTTP : method, path, status_code, duration_ms, header_order_signature
- Corrélation : correlated=1 (L3+L7), correlated=0 (L7 seul)
- Keep-alives : requêtes multiplexées sur une même connexion TCP
- HTTP/2 : tls_alpn='h2' ou h2_settings_count > 0
- HTTP plain : lignes sans TLS (port 80)
Usage:
python verify_db.py [--host clickhouse] [--port 9000]
[--db-logs ja4_logs] [--db-processing ja4_processing]
[--min-rows 10]
"""
import argparse
import sys
import time
# ---------------------------------------------------------------------------
# Client ClickHouse léger (HTTP interface port 8123)
# ---------------------------------------------------------------------------
import urllib.parse
import urllib.request
import json
def ch_query(host: str, port: int, query: str) -> list:
"""Envoie une requête SELECT à ClickHouse via HTTP (port 8123) et retourne les lignes."""
url = f"http://{host}:{port}/?query={urllib.parse.quote(query + ' FORMAT JSON')}"
try:
with urllib.request.urlopen(url, timeout=10) as resp:
data = json.loads(resp.read())
return data.get("data", [])
except Exception as e:
return [{"__error__": str(e)}]
def ch_scalar(host: str, port: int, query: str) -> str:
"""Retourne la première colonne de la première ligne."""
rows = ch_query(host, port, query)
if not rows or "__error__" in rows[0]:
return str(rows[0].get("__error__", "?"))
return str(list(rows[0].values())[0])
# ---------------------------------------------------------------------------
# Checks
# ---------------------------------------------------------------------------
CHECK_OK = ""
CHECK_FAIL = ""
CHECK_WARN = "⚠️ "
results: list = []
def check(name: str, query: str, host: str, port: int, expect_nonzero: bool = True,
min_val: int = 1, warn_only: bool = False) -> bool:
val = ch_scalar(host, port, query)
try:
n = int(float(val))
except ValueError:
n = -1
ok = n >= min_val if expect_nonzero else n == 0
icon = CHECK_OK if ok else (CHECK_WARN if warn_only else CHECK_FAIL)
results.append((icon, name, val))
return ok
def run_checks(host: str, http_port: int, db_logs: str, db_processing: str, min_rows: int):
print(f"\n{'='*65}")
print(f" Vérification ClickHouse — {db_logs} / {db_processing}")
print(f"{'='*65}\n")
# ------------------------------------------------------------------
# 1. Tables brutes
# ------------------------------------------------------------------
print("── 1. Tables brutes ─────────────────────────────────────────")
check("http_logs_raw : lignes totales",
f"SELECT count() FROM {db_logs}.http_logs_raw", host, http_port, min_val=min_rows)
check("http_logs : lignes après MV",
f"SELECT count() FROM {db_logs}.http_logs", host, http_port, min_val=min_rows)
# ------------------------------------------------------------------
# 2. Métadonnées L3/L4
# ------------------------------------------------------------------
print("\n── 2. Métadonnées L3/L4 ─────────────────────────────────────")
check("ip_meta_ttl > 0 (TTL capturé)",
f"SELECT count() FROM {db_logs}.http_logs WHERE ip_meta_ttl > 0",
host, http_port, min_val=1)
check("tcp_meta_mss > 0 (MSS capturé)",
f"SELECT count() FROM {db_logs}.http_logs WHERE tcp_meta_mss > 0",
host, http_port, min_val=1)
check("tcp_meta_window_size > 0 (window_size capturé)",
f"SELECT count() FROM {db_logs}.http_logs WHERE tcp_meta_window_size > 0",
host, http_port, min_val=1)
check("src_port renseigné (> 1000)",
f"SELECT count() FROM {db_logs}.http_logs WHERE src_port > 1000",
host, http_port, min_val=1)
# ------------------------------------------------------------------
# 3. TLS / JA4
# ------------------------------------------------------------------
print("\n── 3. TLS / JA4 ─────────────────────────────────────────────")
check("ja4 renseigné (fingerprint TLS)",
f"SELECT count() FROM {db_logs}.http_logs WHERE ja4 != ''",
host, http_port, min_val=1)
check("tls_sni renseigné (SNI extrait)",
f"SELECT count() FROM {db_logs}.http_logs WHERE tls_sni != ''",
host, http_port, min_val=1)
check("tls_version TLSv1.2 présente",
f"SELECT count() FROM {db_logs}.http_logs WHERE tls_version = 'TLSv1.2'",
host, http_port, min_val=1, warn_only=True)
check("tls_version TLSv1.3 présente",
f"SELECT count() FROM {db_logs}.http_logs WHERE tls_version = 'TLSv1.3'",
host, http_port, min_val=1)
check("tls_alpn h2 (HTTP/2 négocié)",
f"SELECT count() FROM {db_logs}.http_logs WHERE tls_alpn = 'h2'",
host, http_port, min_val=1, warn_only=True)
# ja4 format : t12d... ou t13d... selon la version TLS
check("ja4 TLS1.2 (commence par t12)",
f"SELECT count() FROM {db_logs}.http_logs WHERE startsWith(ja4, 't12')",
host, http_port, min_val=1, warn_only=True)
check("ja4 TLS1.3 (commence par t13)",
f"SELECT count() FROM {db_logs}.http_logs WHERE startsWith(ja4, 't13')",
host, http_port, min_val=1)
# ------------------------------------------------------------------
# 4. Couche L7 HTTP
# ------------------------------------------------------------------
print("\n── 4. Couche L7 HTTP ────────────────────────────────────────")
for method in ("GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH"):
check(f"méthode {method} présente",
f"SELECT count() FROM {db_logs}.http_logs WHERE method = '{method}'",
host, http_port, min_val=1)
check("path renseigné",
f"SELECT count() FROM {db_logs}.http_logs WHERE path != ''",
host, http_port, min_val=1)
check("status_code 200 présent",
f"SELECT count() FROM {db_logs}.http_logs WHERE status_code = 200",
host, http_port, min_val=1)
check("status_code 404 présent (requêtes de test)",
f"SELECT count() FROM {db_logs}.http_logs WHERE status_code IN (400,404,405,500)",
host, http_port, min_val=1, warn_only=True)
check("duration_ms > 0 (latence mesurée)",
f"SELECT count() FROM {db_logs}.http_logs WHERE duration_ms > 0",
host, http_port, min_val=1)
check("header_order_signature renseigné",
f"SELECT count() FROM {db_logs}.http_logs WHERE header_order_signature != ''",
host, http_port, min_val=1)
# ------------------------------------------------------------------
# 5. Corrélation L3+L7
# ------------------------------------------------------------------
print("\n── 5. Corrélation L3 ↔ L7 ───────────────────────────────────")
check("correlated=1 (lignes L3+L7 fusionnées)",
f"SELECT count() FROM {db_logs}.http_logs WHERE correlated = 1",
host, http_port, min_val=1)
check("correlated=0 (HTTP sans corr. réseau, port 80)",
f"SELECT count() FROM {db_logs}.http_logs WHERE correlated = 0",
host, http_port, min_val=1, warn_only=True)
pct_corr = ch_scalar(host, http_port,
f"SELECT round(100*countIf(correlated=1)/count(), 1) FROM {db_logs}.http_logs")
results.append((CHECK_OK, f"taux corrélation ({pct_corr}%)", pct_corr))
# ------------------------------------------------------------------
# 6. Keep-alives (TCP multiplexing)
# ------------------------------------------------------------------
print("\n── 6. Keep-alives ────────────────────────────────────────────")
check("keepalives > 1 (connexions multiplexées)",
f"SELECT count() FROM {db_logs}.http_logs WHERE keepalives > 1",
host, http_port, min_val=1, warn_only=True)
max_ka = ch_scalar(host, http_port,
f"SELECT max(keepalives) FROM {db_logs}.http_logs")
results.append((CHECK_OK, f"max keepalives ({max_ka})", max_ka))
# ------------------------------------------------------------------
# 7. Diversité des IPs sources
# ------------------------------------------------------------------
print("\n── 7. Diversité sources ─────────────────────────────────────")
check("IPs sources distinctes >= 5",
f"SELECT uniqExact(src_ip) FROM {db_logs}.http_logs",
host, http_port, min_val=5, warn_only=True)
# ------------------------------------------------------------------
# 8. Tables de traitement (ja4_processing)
# ------------------------------------------------------------------
print("\n── 8. Tables processing ─────────────────────────────────────")
for tbl in ("agg_host_ip_ja4_1h", "agg_header_fingerprint_1h",
"agg_ip_behavior_1h", "agg_request_timing_1h"):
check(f"{tbl} peuplée",
f"SELECT count() FROM {db_processing}.{tbl}",
host, http_port, min_val=1, warn_only=True)
# ------------------------------------------------------------------
# Résumé
# ------------------------------------------------------------------
print(f"\n{'='*65}")
fails = 0
warns = 0
for icon, name, val in results:
print(f" {icon} {name:<50s} {val}")
if icon == CHECK_FAIL:
fails += 1
elif icon == CHECK_WARN:
warns += 1
total = len(results)
passed = total - fails - warns
print(f"\n{'='*65}")
print(f" Résultat : {passed} OK | {warns} WARN | {fails} FAIL (total {total})")
print(f"{'='*65}\n")
return fails == 0
def main():
parser = argparse.ArgumentParser(description="Vérification ClickHouse post-trafic")
parser.add_argument("--host", default="clickhouse", help="Hôte ClickHouse")
parser.add_argument("--port", type=int, default=8123, help="Port HTTP ClickHouse (8123)")
parser.add_argument("--db-logs", default="ja4_logs", help="Base de données logs")
parser.add_argument("--db-processing", default="ja4_processing", help="Base processing")
parser.add_argument("--min-rows", type=int, default=10, help="Minimum de lignes attendues")
parser.add_argument("--wait", type=int, default=5,
help="Attendre N secondes avant de vérifier (flush MV)")
args = parser.parse_args()
if args.wait > 0:
print(f"[verify_db] Attente {args.wait}s pour le flush des Materialized Views...")
time.sleep(args.wait)
ok = run_checks(args.host, args.port, args.db_logs, args.db_processing, args.min_rows)
sys.exit(0 if ok else 1)
if __name__ == "__main__":
main()

115
tests/vm/README.md Normal file
View File

@ -0,0 +1,115 @@
# Tests VM — eBPF sur kernel réel (Rocky Linux 9)
## Pourquoi une VM ?
Les tests Docker ne peuvent capturer que L3/L4 et TLS (via le hook TC). Les données
L7 HTTP (method, path, status_code, header_order_signature) nécessitent :
| Fonctionnalité eBPF | Docker | VM |
|---|---|---|
| Hook TC (XDP) — L3/L4 + TLS | ✅ | ✅ |
| Uprobe SSL_read — L7 HTTP | ✅ attache | ✅ attache |
| Tracepoint accept4 — corrélation IP | ❌ pas de tracefs | ✅ |
| Kprobe accept4 — corrélation IP | ❌ pas de perf PMU | ✅ |
Dans une VM, le kernel complet est disponible → **accept4 fonctionne**
la corrélation IP est parfaite → les données L7 arrivent dans ClickHouse.
## Prérequis (installation unique)
```bash
# 1. Installer Vagrant + libvirt + KVM
sudo apt-get install -y vagrant libvirt-daemon-system libvirt-clients \
qemu-kvm ruby-libvirt bridge-utils
# 2. Plugin vagrant-libvirt
vagrant plugin install vagrant-libvirt
# 3. Ajouter ton user aux groupes (nécessite une reconnexion)
sudo usermod -aG libvirt,kvm $USER
# → Se déconnecter et se reconnecter
# 4. Vérifier que KVM fonctionne
virsh list --all
```
## Utilisation
```bash
# Depuis la racine du projet :
# Créer la VM (première fois, ~5-10 min — télécharge Rocky Linux 9)
make vm-up
# Lancer le test nginx complet (L3/L4 + TLS + L7 HTTP)
make test-vm-nginx
# Après modification des sources Go/C
make vm-rebuild-ja4ebpf # synchronise + recompile dans la VM
make test-vm-nginx # relancer les tests
# Connexion SSH interactive
make vm-ssh
# Détruire la VM (libère l'espace disque)
make vm-down
```
## Ce que teste `test-vm-nginx`
1. **Build** — recompile ja4ebpf (BPF CO-RE + Go) depuis les sources
2. **ClickHouse** — démarre dans Docker (dans la VM)
3. **nginx** — démarre avec TLS + HTTP/2
4. **ja4ebpf** — démarre avec uprobes + accept4 tracepoints
5. **Trafic** — HTTP/1.0, HTTP/1.1, HTTPS/1.1, HTTPS/2.0
6. **Vérification DB** :
- `ip_meta_ttl`, `tcp_meta_mss`, `tcp_meta_window_size`
- `ja4`, `tls_sni`
- **`method`, `path`, `status_code`** ✅ (uniquement en VM)
- **`header_order_signature`** ✅ (uniquement en VM)
## Différence avec les tests Docker
| Check | Docker | VM |
|---|---|---|
| L3/L4 (TTL, MSS, window) | ✅ | ✅ |
| TLS fingerprint (JA4, SNI) | ✅ | ✅ |
| L7 méthode HTTP | ❌ | ✅ |
| L7 path HTTP | ❌ | ✅ |
| status_code | ❌ | ✅ |
| header_order_signature | ❌ | ✅ |
## Architecture de la VM
```
VM Rocky Linux 9 (KVM)
├── nginx + libssl.so.3 ← serveur web cible
├── ja4ebpf ← agent eBPF (natif, pas Docker)
│ ├── TC hook (eth0) ← capture L3/L4 + TLS ClientHello
│ ├── Uprobe SSL_read ← capture HTTP déchiffré
│ └── Tracepoint accept4 ← corrélation fd→IP (disponible !)
└── ClickHouse (Docker) ← base de données
```
## Dépannage
**vagrant up échoue : "Call to virConnectOpen failed"**
```bash
sudo systemctl start libvirtd
sudo usermod -aG libvirt $USER # puis se reconnecter
```
**Erreur "default pool not found"**
```bash
sudo virsh pool-define-as default dir --target /var/lib/libvirt/images
sudo virsh pool-build default
sudo virsh pool-start default
sudo virsh pool-autostart default
```
**ja4ebpf : "uprobe SSL_read" ne s'attache pas**
```bash
# Vérifier le chemin libssl dans la VM
vagrant ssh -- 'ls -la /usr/lib64/libssl*'
# Si différent de /usr/lib64/libssl.so.3, modifier /tmp/ja4ebpf.yml
```

66
tests/vm/Vagrantfile vendored Normal file
View File

@ -0,0 +1,66 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
# =============================================================================
# Vagrantfile — VM de test ja4ebpf sur Rocky Linux 9
#
# Fournit un environnement kernel complet pour les tests eBPF :
# - tracefs / debugfs montés
# - perf_kprobe PMU disponible
# - uprobes fonctionnels avec accept4 kprobe/tracepoint
#
# Prérequis (host Ubuntu) :
# sudo apt-get install -y vagrant libvirt-daemon-system libvirt-clients \
# qemu-kvm ruby-libvirt
# vagrant plugin install vagrant-libvirt
# sudo usermod -aG libvirt,kvm $USER # puis se reconnecter
#
# Utilisation :
# vagrant up # créer + provisionner la VM (première fois ~5 min)
# vagrant ssh # connexion SSH
# make test-vm-nginx # lancer les tests depuis le host
# vagrant destroy -f # détruire la VM
# =============================================================================
Vagrant.configure("2") do |config|
# ── Box Rocky Linux 9 ──────────────────────────────────────────────────────
config.vm.box = "generic/rocky9"
# ── Réseau : IP privée pour accès depuis le host ───────────────────────────
config.vm.network "private_network", ip: "192.168.56.10"
# ── Ressources VM ─────────────────────────────────────────────────────────
config.vm.provider :libvirt do |v|
v.cpus = 4
v.memory = 4096
v.nested = false # pas besoin de virtualisation imbriquée
# Pour VirtualBox (fallback)
end
config.vm.provider :virtualbox do |v|
v.cpus = 4
v.memory = 4096
v.customize ["modifyvm", :id, "--nicpromisc2", "allow-all"]
end
# ── Montage du projet ─────────────────────────────────────────────────────
# Le répertoire racine du projet est monté dans /ja4-platform
config.vm.synced_folder "../..", "/ja4-platform",
type: "rsync",
rsync__exclude: [".git/", "old/", "*.rpm", "services/*/target/"]
# ── Provisioning ─────────────────────────────────────────────────────────
config.vm.provision "shell", path: "provision.sh"
# ── Message post-démarrage ────────────────────────────────────────────────
config.vm.post_up_message = <<~MSG
VM ja4ebpf prête !
Depuis le répertoire tests/vm/ :
vagrant ssh # connexion interactive
make -C ../.. test-vm-nginx # lancer le test nginx
make -C ../.. test-vm-matrix # lancer tous les tests
IP de la VM : 192.168.56.10
MSG
end

118
tests/vm/provision.sh Executable file
View File

@ -0,0 +1,118 @@
#!/usr/bin/env bash
# =============================================================================
# provision.sh — Provisionnement de la VM Rocky Linux 9 pour ja4ebpf
#
# Installe :
# - Toolchain eBPF : clang, llvm, bpftool, libbpf-devel, kernel-devel
# - Go 1.24
# - Docker (pour ClickHouse)
# - nginx + openssl (serveur web cible)
# - Outils de test : python3, httpx
# =============================================================================
set -euo pipefail
log() { echo "[provision] $(date +%H:%M:%S) $*"; }
# ── 1. Mise à jour système + dépôts ──────────────────────────────────────────
log "Mise à jour des dépôts..."
dnf install -y epel-release dnf-plugins-core
dnf config-manager --enable crb
dnf update -y --quiet
# ── 2. Toolchain eBPF ────────────────────────────────────────────────────────
log "Installation toolchain eBPF (clang, bpftool, libbpf)..."
dnf install -y \
clang \
llvm \
bpftool \
libbpf-devel \
kernel-devel-$(uname -r) \
make \
git
# ── 3. Go (version récente) ──────────────────────────────────────────────────
log "Installation de Go..."
GO_VERSION="1.24.3"
if ! command -v go &>/dev/null || [[ "$(go version 2>/dev/null | awk '{print $3}')" != "go${GO_VERSION}" ]]; then
curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -o /tmp/go.tar.gz
rm -rf /usr/local/go
tar -C /usr/local -xzf /tmp/go.tar.gz
rm /tmp/go.tar.gz
fi
export PATH="/usr/local/go/bin:$PATH"
# Persister dans le PATH
cat > /etc/profile.d/go.sh << 'EOF'
export PATH="/usr/local/go/bin:$PATH"
export GOPATH="/home/vagrant/go"
EOF
# ── 4. Docker (pour ClickHouse) ───────────────────────────────────────────────
log "Installation de Docker..."
dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo
dnf install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
systemctl enable --now docker
usermod -aG docker vagrant
# Accès sans sudo pour vagrant
chmod 666 /var/run/docker.sock || true
# ── 5. nginx + openssl ───────────────────────────────────────────────────────
log "Installation de nginx..."
dnf install -y nginx openssl curl
# ── 6. Python3 + outils de test ──────────────────────────────────────────────
log "Installation Python3 et outils de test..."
dnf install -y python3 python3-pip
pip3 install --quiet "httpx[http2]" requests
# ── 7. Outils de debug eBPF ──────────────────────────────────────────────────
log "Installation outils de debug eBPF..."
dnf install -y perf strace
# ── 8. Montage tracefs + debugfs au démarrage ─────────────────────────────────
log "Configuration des pseudo-systèmes de fichiers eBPF..."
cat > /etc/systemd/system/tracefs.mount << 'EOF'
[Unit]
Description=Mount tracefs
DefaultDependencies=no
After=local-fs.target
[Mount]
What=tracefs
Where=/sys/kernel/tracing
Type=tracefs
Options=defaults
[Install]
WantedBy=multi-user.target
EOF
cat > /etc/systemd/system/debugfs.mount << 'EOF'
[Unit]
Description=Mount debugfs
DefaultDependencies=no
After=local-fs.target
[Mount]
What=debugfs
Where=/sys/kernel/debug
Type=debugfs
Options=defaults
[Install]
WantedBy=multi-user.target
EOF
systemctl enable tracefs.mount debugfs.mount
mount -t tracefs tracefs /sys/kernel/tracing 2>/dev/null || true
mount -t debugfs debugfs /sys/kernel/debug 2>/dev/null || true
# ── 9. Build ja4ebpf depuis les sources ──────────────────────────────────────
log "Build initial de ja4ebpf..."
export PATH="/usr/local/go/bin:$PATH"
cd /ja4-platform/services/ja4ebpf
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -5 || log "go generate: erreur (normal si vmlinux.h absent)"
GOWORK=off CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
go build -ldflags="-s -w" -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/ 2>&1 | tail -5
log "Provisionnement terminé !"
log "Lancer 'make test-vm-nginx' depuis le host pour démarrer les tests."

309
tests/vm/run-tests-vm.sh Executable file
View File

@ -0,0 +1,309 @@
#!/usr/bin/env bash
# =============================================================================
# run-tests-vm.sh — Lance la stack de test complète dans la VM Rocky Linux 9
#
# Ce script s'exécute DANS la VM (via vagrant ssh ou vagrant provision).
# Il ne peut pas tourner dans Docker — il requiert un vrai kernel pour eBPF.
#
# Usage (depuis le host) :
# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh nginx'
# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh all'
#
# Variables d'environnement :
# STACK : stack à tester (nginx|apache|nginx-varnish|hitch-varnish|all)
# KEEP_RUNNING : si "true", ne pas arrêter la stack après le test (défaut: false)
# =============================================================================
set -euo pipefail
STACK="${1:-nginx}"
KEEP_RUNNING="${KEEP_RUNNING:-false}"
PROJECT="/ja4-platform"
RESULTS_DIR="/tmp/ja4-test-results"
# ── Couleurs ─────────────────────────────────────────────────────────────────
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m'
BOLD='\033[1m'
log() { echo -e "${BOLD}[$STACK]${RESET} $(date +%H:%M:%S) $*"; }
pass() { echo -e " ${GREEN}${RESET} $*"; ((PASS_COUNT++)) || true; }
fail() { echo -e " ${RED}${RESET} $*"; ((FAIL_COUNT++)) || true; }
warn() { echo -e " ${YELLOW}⚠️${RESET} $*"; ((WARN_COUNT++)) || true; }
PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0
# ── Vérification prérequis ────────────────────────────────────────────────────
check_prerequisites() {
log "Vérification des prérequis..."
# eBPF capabilities
if [ ! -d /sys/kernel/tracing ]; then
fail "tracefs non monté — exécuter: sudo mount -t tracefs tracefs /sys/kernel/tracing"
exit 1
fi
if [ ! -d /sys/kernel/debug ]; then
fail "debugfs non monté"
exit 1
fi
command -v ja4ebpf >/dev/null 2>&1 || {
log "Rebuild ja4ebpf..."
cd "$PROJECT/services/ja4ebpf"
export PATH="/usr/local/go/bin:$PATH"
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3
GOWORK=off CGO_ENABLED=0 go build -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/
}
command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; }
command -v nginx >/dev/null 2>&1 || { fail "nginx non installé"; exit 1; }
pass "Prérequis OK"
}
# ── Démarrage ClickHouse ──────────────────────────────────────────────────────
start_clickhouse() {
log "Démarrage ClickHouse..."
docker rm -f ja4-clickhouse 2>/dev/null || true
docker run -d --name ja4-clickhouse \
-p 8123:8123 -p 9000:9000 \
-e CLICKHOUSE_DB=ja4_processing \
-e CLICKHOUSE_USER=default \
-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \
-v "$PROJECT/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh" \
$(for f in "$PROJECT/shared/clickhouse/"*.sql; do
echo "-v $f:/initdb-src/$(basename $f):ro"
done) \
clickhouse/clickhouse-server:24.8 2>&1 | tail -1
# Attendre que ClickHouse soit prêt
log "Attente ClickHouse (max 120s)..."
for i in $(seq 1 60); do
if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then
pass "ClickHouse prêt"
return 0
fi
sleep 2
done
fail "ClickHouse timeout"; exit 1
}
# ── Configuration nginx ────────────────────────────────────────────────────────
setup_nginx() {
log "Configuration nginx avec TLS..."
# Certificat auto-signé
openssl req -x509 -nodes -days 365 \
-subj "/CN=platform.test" \
-newkey rsa:2048 \
-keyout /etc/pki/tls/private/nginx.key \
-out /etc/pki/tls/certs/nginx.crt 2>/dev/null
# Copier la configuration de test
cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf
# Créer les fichiers de test
mkdir -p /var/www/html
echo '{"status":"ok","stack":"nginx-vm"}' > /var/www/html/health
for p in data api/users api/data/test; do
mkdir -p "/var/www/html/$(dirname $p)"
echo '{"ok":true}' > "/var/www/html/$p"
done
nginx -t && nginx
# Attendre nginx
for i in $(seq 1 20); do
curl -sf http://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "nginx démarré"
}
# ── Démarrage ja4ebpf ─────────────────────────────────────────────────────────
start_ja4ebpf() {
log "Démarrage ja4ebpf..."
pkill ja4ebpf 2>/dev/null || true
sleep 1
# Créer la config
cat > /tmp/ja4ebpf.yml << 'EOF'
interface: eth0
ssl_lib_path: "/usr/lib64/libssl.so.3"
clickhouse:
dsn: "clickhouse://default:@localhost:9000/ja4_logs"
batch_size: 100
flush_secs: 1
correlation:
timeout_ms: 500
slowloris_ms: 10000
log:
level: "info"
format: "json"
EOF
# Lancer avec les capabilities nécessaires
# Dans la VM (root), on peut lancer directement
ja4ebpf -config /tmp/ja4ebpf.yml > /tmp/ja4ebpf.log 2>&1 &
JA4EBPF_PID=$!
sleep 3
if ! kill -0 "$JA4EBPF_PID" 2>/dev/null; then
fail "ja4ebpf s'est arrêté immédiatement"
cat /tmp/ja4ebpf.log | tail -10
return 1
fi
log "ja4ebpf démarré (PID $JA4EBPF_PID)"
# Vérifier les uprobes dans tracefs
sleep 1
if grep -q "ssl" /sys/kernel/tracing/uprobe_events 2>/dev/null; then
pass "Uprobes SSL attachés dans tracefs"
else
warn "Uprobes non visibles dans tracefs (peuvent être actifs quand même)"
fi
# Vérifier accept4 tracepoint
if grep -q "accept4" /sys/kernel/tracing/events/syscalls 2>/dev/null; then
pass "Tracepoints accept4 disponibles"
else
warn "Tracepoints accept4 non trouvés"
fi
}
# ── Génération de trafic ───────────────────────────────────────────────────────
generate_traffic() {
log "Génération du trafic (HTTP/1.0 + HTTP/1.1 + HTTP/2)..."
# Trafic HTTP/1.1 (HTTP)
for path in / /health /data /api/users; do
curl -sf "http://localhost$path" >/dev/null 2>&1 || true
curl -sf -X POST "http://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true
done
# Trafic HTTPS/1.1
for path in / /health /data /api/users; do
curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true
curl -sf -k -X POST "https://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true
curl -sf -k -X PUT "https://localhost/data" >/dev/null 2>&1 || true
curl -sf -k -X DELETE "https://localhost/data/1" >/dev/null 2>&1 || true
curl -sf -k -X HEAD "https://localhost$path" >/dev/null 2>&1 || true
done
# Trafic HTTP/2
if command -v python3 >/dev/null 2>&1 && python3 -c "import httpx" 2>/dev/null; then
python3 << 'PYEOF'
import httpx, ssl, warnings
warnings.filterwarnings("ignore")
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with httpx.Client(http2=True, verify=False) as client:
for path in ["/", "/health", "/data"]:
try: client.get(f"https://localhost{path}")
except: pass
try: client.post("https://localhost/api/data", json={"test": "h2"})
except: pass
PYEOF
pass "Trafic HTTP/2 généré"
fi
# Attendre le flush ja4ebpf → ClickHouse
log "Attente flush ja4ebpf (15s)..."
sleep 15
pass "Trafic généré"
}
# ── Vérification ClickHouse ────────────────────────────────────────────────────
verify_db() {
log "Vérification des données dans ClickHouse..."
ch_query() {
curl -sf "http://localhost:8123/" \
--data-urlencode "query=$1" \
--data-urlencode "database=ja4_logs" \
-o /dev/null -w '%{http_code}' 2>/dev/null || echo "0"
}
ch_val() {
curl -sf "http://localhost:8123/?database=ja4_logs" \
--data-urlencode "query=$1" 2>/dev/null | tr -d ' \n' || echo "0"
}
# L3/L4
ttl=$(ch_val "SELECT count() FROM http_logs WHERE ip_meta_ttl > 0")
[ "${ttl:-0}" -gt 0 ] && pass "L3/L4 TTL capturé ($ttl lignes)" || fail "L3/L4 TTL absent"
mss=$(ch_val "SELECT count() FROM http_logs WHERE tcp_meta_mss > 0")
[ "${mss:-0}" -gt 0 ] && pass "TCP MSS capturé ($mss lignes)" || fail "TCP MSS absent"
# TLS
ja4=$(ch_val "SELECT count() FROM http_logs WHERE ja4 != ''")
[ "${ja4:-0}" -gt 0 ] && pass "JA4 fingerprint capturé ($ja4 lignes)" || fail "JA4 absent"
sni=$(ch_val "SELECT count() FROM http_logs WHERE tls_sni != ''")
[ "${sni:-0}" -gt 0 ] && pass "TLS SNI capturé ($sni lignes)" || warn "TLS SNI absent"
# L7 HTTP — c'est ici que ça devrait marcher dans la VM
method=$(ch_val "SELECT count() FROM http_logs WHERE method != ''")
[ "${method:-0}" -gt 0 ] && pass "L7 méthodes HTTP capturées ($method lignes)" \
|| fail "L7 méthodes HTTP ABSENT — uprobe SSL_read ne fonctionne pas"
path=$(ch_val "SELECT count() FROM http_logs WHERE path != ''")
[ "${path:-0}" -gt 0 ] && pass "L7 path HTTP capturé ($path lignes)" || fail "L7 path absent"
status=$(ch_val "SELECT count() FROM http_logs WHERE status_code > 0")
[ "${status:-0}" -gt 0 ] && pass "status_code capturé ($status lignes)" || warn "status_code absent"
sig=$(ch_val "SELECT count() FROM http_logs WHERE header_order_signature != ''")
[ "${sig:-0}" -gt 0 ] && pass "header_order_signature capturé ($sig lignes)" || warn "header_order_sig absent"
# Méthodes HTTP distinctes
methods=$(ch_val "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM http_logs WHERE method != '')")
log "Méthodes HTTP vues : $methods"
# Lignes totales
total=$(ch_val "SELECT count() FROM http_logs")
pass "Total lignes http_logs : $total"
}
# ── Nettoyage ─────────────────────────────────────────────────────────────────
cleanup() {
if [ "$KEEP_RUNNING" != "true" ]; then
log "Nettoyage..."
pkill ja4ebpf 2>/dev/null || true
nginx -s stop 2>/dev/null || true
docker rm -f ja4-clickhouse 2>/dev/null || true
fi
}
trap cleanup EXIT
# ── Main ──────────────────────────────────────────────────────────────────────
mkdir -p "$RESULTS_DIR"
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ ja4ebpf VM Test Suite — Rocky Linux 9 ║"
echo "╚══════════════════════════════════════════╝"
echo ""
check_prerequisites
start_clickhouse
setup_nginx
start_ja4ebpf
generate_traffic
verify_db
echo ""
echo "════════════════════════════════════════════"
echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT"
if [ "$FAIL_COUNT" -eq 0 ]; then
echo -e " ${GREEN}${BOLD}Tous les tests réussis !${RESET}"
exit 0
else
echo -e " ${RED}${BOLD}$FAIL_COUNT tests échoués.${RESET}"
echo "Logs ja4ebpf :"
tail -20 /tmp/ja4ebpf.log 2>/dev/null || true
exit 1
fi