From d75825278ef77d59c87caee29b683df37be569d8 Mon Sep 17 00:00:00 2001 From: Jacquin Antoine Date: Mon, 13 Apr 2026 01:09:33 +0200 Subject: [PATCH] feat: multi-distro VM tests, ja4ebpf eBPF improvements, bot-detector scoring ja4ebpf: - Refactor BPF TC capture with improved SYN offset handling and TCP option parsing - Enhance TLS uprobe SSL hooking for better key extraction - Add ClickHouse writer improvements for HTTP log materialized views - Update RPM spec for Rocky Linux 8/9/10, fix systemd service - Simplify loader with cleaner bpf2go integration bot-detector: - Add H2 SETTINGS per-parameter comparison in browser_matcher - Enhance browser signatures and scoring pipeline - Improve preprocessing and cycle detection infra: - Multi-distro Vagrantfile (centos8, rocky9, rocky10) with per-distro provisioning - New Makefile targets: vm-up-all, test-vm-matrix, test-vm-centos8/rocky10 - Add debug helpers and run-test-from-host.sh for host-driven VM testing - Update run-tests-vm.sh for cross-distro compatibility - Remove accidental binary blob (\004) Co-Authored-By: Claude Opus 4.6 --- Makefile | 109 +++- go.work.sum | 55 ++ .../bot_detector/browser_matcher.py | 57 +- .../bot_detector/browser_signatures.py | 60 ++ services/bot-detector/bot_detector/cycle.py | 45 ++ .../bot-detector/bot_detector/pipeline.py | 11 +- .../bot_detector/preprocessing.py | 10 + services/bot-detector/bot_detector/scoring.py | 39 +- services/ja4ebpf/Dockerfile.package | 6 +- services/ja4ebpf/bpf/bpf_types.h | 90 ++- services/ja4ebpf/bpf/tc_capture.c | 363 ++++++----- services/ja4ebpf/bpf/uprobe_ssl.c | 41 +- services/ja4ebpf/cmd/ja4ebpf/main.go | 141 +++-- services/ja4ebpf/config.yml.example | 31 +- services/ja4ebpf/go.mod | 3 +- services/ja4ebpf/go.sum | 6 + services/ja4ebpf/internal/loader/loader.go | 469 +++++--------- .../ja4ebpf/internal/writer/clickhouse.go | 165 ++++- services/ja4ebpf/packaging/rpm/ja4ebpf.spec | 22 +- .../ja4ebpf/packaging/systemd/ja4ebpf.service | 3 +- shared/clickhouse/04_mv_http_logs.sql | 2 +- shared/clickhouse/05_aggregation_tables.sql | 47 ++ tests/vm/.vagrant/bundler/global.sol | 2 +- tests/vm/Vagrantfile | 73 ++- tests/vm/debug-mode-host.sh | 107 ++++ tests/vm/debug-mode.sh | 98 +++ tests/vm/debug-test.sh | 96 +++ tests/vm/debug-xdp.sh | 111 ++++ tests/vm/provision-el8.sh | 65 ++ tests/vm/provision.sh | 21 +- tests/vm/run-test-from-host.sh | 119 ++++ tests/vm/run-tests-vm.sh | 571 +++++++++++------- 32 files changed, 2148 insertions(+), 890 deletions(-) create mode 100644 tests/vm/debug-mode-host.sh create mode 100644 tests/vm/debug-mode.sh create mode 100644 tests/vm/debug-test.sh create mode 100644 tests/vm/debug-xdp.sh create mode 100755 tests/vm/provision-el8.sh create mode 100755 tests/vm/run-test-from-host.sh diff --git a/Makefile b/Makefile index 2df50ae..3b48331 100644 --- a/Makefile +++ b/Makefile @@ -37,10 +37,17 @@ help: ## Affiche cette aide @echo "" @echo " Tests VM (eBPF sur kernel réel — nécessite 'make vm-up' d'abord)" @echo " make vm-up Créer la VM Rocky Linux 9 (vagrant up)" + @echo " make vm-up-all Créer les 3 VMs (centos8/rocky9/rocky10)" @echo " make vm-down Détruire la VM (vagrant destroy)" @echo " make vm-ssh Connexion SSH à la VM" - @echo " make test-vm-nginx Test nginx dans la VM (L7 complet)" - @echo " make test-vm-all Tous les tests dans la VM" + @echo " make vm-reprovision Re-provisionner les 3 VMs" + @echo " make test-vm-nginx Test nginx dans la VM Rocky 9" + @echo " make test-vm-apache Test apache dans la VM Rocky 9" + @echo " make test-vm-hitch-varnish Test hitch+varnish dans la VM Rocky 9" + @echo " make test-vm-all Tous les tests (3 stacks) dans la VM Rocky 9" + @echo " make test-vm-centos8 Tous les tests dans la VM CentOS 8" + @echo " make test-vm-rocky10 Tous les tests dans la VM Rocky 10" + @echo " make test-vm-matrix Matrice complète : 3 stacks × 3 distros" @echo "" @echo " Tests d'intégration (par stack, Docker — L3/L4/TLS uniquement)" @echo " make test-all-stacks Toutes les stacks sur Rocky Linux 9" @@ -160,18 +167,26 @@ test-hitch-varnish: # Répertoire Vagrantfile VM_DIR := tests/vm +VMS := centos8 rocky9 rocky10 +STACKS := nginx apache hitch-varnish vm-up: ## Créer la VM Rocky Linux 9 pour les tests eBPF - cd $(VM_DIR) && vagrant up + cd $(VM_DIR) && vagrant up rocky9 + +vm-up-all: ## Créer les 3 VMs (centos8, rocky9, rocky10) + cd $(VM_DIR) && vagrant up centos8 rocky9 rocky10 vm-down: ## Détruire la VM cd $(VM_DIR) && vagrant destroy -f -vm-ssh: ## Connexion SSH à la VM - cd $(VM_DIR) && vagrant ssh +vm-down-all: ## Détruire toutes les VMs + cd $(VM_DIR) && vagrant destroy -f -vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM (après modifications) - cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \ +vm-ssh: ## Connexion SSH à la VM Rocky 9 + cd $(VM_DIR) && vagrant ssh rocky9 + +vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM Rocky 9 (après modifications) + cd $(VM_DIR) && vagrant rsync rocky9 && vagrant ssh rocky9 -- \ 'export PATH=/usr/local/go/bin:$$PATH && \ cd /ja4-platform/services/ja4ebpf && \ GOWORK=off go generate ./internal/loader/ && \ @@ -179,15 +194,79 @@ vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM (après modifications) sudo mv /tmp/ja4ebpf /usr/local/bin/ja4ebpf && \ echo "ja4ebpf rebuilt OK"' -test-vm-nginx: ## Test nginx dans la VM (L3/L4/TLS/L7 HTTP complet) - @echo "=== Test VM nginx (kernel réel) ===" - cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \ - 'sudo bash /ja4-platform/tests/vm/run-tests-vm.sh nginx' +# ── Tests VM : cibles par stack ────────────────────────────────────────────── -test-vm-all: ## Tous les tests dans la VM - @echo "=== Tests VM (toutes stacks) ===" - cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \ - 'sudo bash /ja4-platform/tests/vm/run-tests-vm.sh all' +test-vm-nginx: ## Test nginx dans la VM Rocky 9 (trafic host → VM) + bash tests/vm/run-test-from-host.sh rocky9 nginx + +test-vm-apache: ## Test apache dans la VM Rocky 9 + bash tests/vm/run-test-from-host.sh rocky9 apache + +test-vm-hitch-varnish: ## Test hitch+varnish dans la VM Rocky 9 + bash tests/vm/run-test-from-host.sh rocky9 hitch-varnish + +test-vm-all: ## Tous les tests (3 stacks) dans la VM Rocky 9 + @for stack in $(STACKS); do \ + bash tests/vm/run-test-from-host.sh rocky9 $$stack || true; \ + done + +# ── Tests VM : cibles par distro ───────────────────────────────────────────── + +test-vm-centos8: ## Test nginx dans la VM CentOS 8 + bash tests/vm/run-test-from-host.sh centos8 nginx + +test-vm-rocky10: ## Test nginx dans la VM Rocky 10 + bash tests/vm/run-test-from-host.sh rocky10 nginx + +# ── Matrice complète : toutes stacks × toutes distros ──────────────────────── + +test-vm-matrix: ## Toutes stacks × toutes VMs (nginx/apache/hitch-varnish sur centos8/rocky9/rocky10) + @echo "╔══════════════════════════════════════════════╗" + @echo "║ Matrice VM : 3 stacks × 3 distros ║" + @echo "╚══════════════════════════════════════════════╝" + @TOTAL_FAIL=0; \ + for vm in $(VMS); do \ + for stack in $(STACKS); do \ + bash tests/vm/run-test-from-host.sh $$vm $$stack || TOTAL_FAIL=$$((TOTAL_FAIL + 1)); \ + done; \ + done; \ + echo ""; \ + if [ "$$TOTAL_FAIL" -eq 0 ]; then \ + echo "=== Matrice complète : SUCCÈS ==="; \ + else \ + echo "=== Matrice : $$TOTAL_FAIL combinaisons échouées ==="; \ + exit 1; \ + fi + done; \ + echo ""; \ + if [ "$$TOTAL_FAIL" -eq 0 ]; then \ + echo "=== Matrice complète : SUCCÈS ==="; \ + else \ + echo "=== Matrice : $$TOTAL_FAIL combinaisons échouées ==="; \ + exit 1; \ + fi + +test-vm-all-distros: ## Tests unitaires Go sur les 3 VMs (centos8 + rocky9 + rocky10) + @echo "=== Tests unitaires multi-distro ===" + @for vm in $(VMS); do \ + echo ""; \ + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"; \ + echo " VM: $$vm"; \ + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"; \ + cd $(CURDIR)/$(VM_DIR) && vagrant rsync $$vm && vagrant ssh $$vm -- \ + 'export PATH=/usr/local/go/bin:$$PATH && \ + cd /ja4-platform/services/ja4ebpf && \ + GOWORK=off go generate ./internal/loader/ 2>&1 | tail -2 && \ + GOWORK=off CGO_ENABLED=0 go test ./... 2>&1 | tail -20'; \ + echo ""; \ + done + @echo "=== Tous les tests multi-distro terminés ===" + +vm-reprovision: ## Re-provisionner les 3 VMs (installer nouveaux paquets) + @for vm in $(VMS); do \ + echo "Re-provision $$vm..."; \ + cd $(CURDIR)/$(VM_DIR) && vagrant rsync $$vm && vagrant provision $$vm; \ + done # ── Matrice multi-distro ───────────────────────────────────────────────────── diff --git a/go.work.sum b/go.work.sum index e4b6a05..d99052a 100644 --- a/go.work.sum +++ b/go.work.sum @@ -1 +1,56 @@ +dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= +github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/ClickHouse/clickhouse-go v1.5.4 h1:cKjXeYLNWVJIx2J1K6H2CqyRmfwVJVY1OV1coaaFcI0= +github.com/ClickHouse/clickhouse-go v1.5.4/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI= +github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cloudflare/golz4 v0.0.0-20150217214814-ef862a3cdc58/go.mod h1:EOBUe0h4xcZ5GoxqC5SDxFQ8gwyZPKQoEzownBlhI80= +github.com/containerd/containerd v1.7.12/go.mod h1:/5OMpE1p0ylxtEUGY8kuCYkDRzJm9NO1TFMWjUpdevk= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/cpuguy83/dockercfg v0.3.1/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= +github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/dmarkham/enumer v1.5.9/go.mod h1:e4VILe2b1nYK3JKJpRmNdl5xbDQvELc6tQ8b+GsGk6E= +github.com/docker/docker v25.0.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mkevac/debugcharts v0.0.0-20191222103121-ae1c48aa8615/go.mod h1:Ad7oeElCZqA1Ufj0U9/liOF4BtVepxRcTvr2ey7zTvM= +github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= +github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= +github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU= +github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= +github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/pascaldekloe/name v1.0.1/go.mod h1:Z//MfYJnH4jVpQ9wkclwu2I2MkHmXTlT9wR5UZScttM= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil/v3 v3.23.12/go.mod h1:1FrWgea594Jp7qmjHUUPlJDTPgcsb9mGnXDxavtikzM= +github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/testcontainers/testcontainers-go v0.28.0/go.mod h1:COlDpUXbwW3owtpMkEB1zo9gwb1CoKVKlyrVPejF4AU= +github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= +github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= +github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q= +go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= +go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= +google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= diff --git a/services/bot-detector/bot_detector/browser_matcher.py b/services/bot-detector/bot_detector/browser_matcher.py index c25d94b..64ff61f 100644 --- a/services/bot-detector/bot_detector/browser_matcher.py +++ b/services/bot-detector/bot_detector/browser_matcher.py @@ -302,16 +302,51 @@ def _compute_family_score(df: pd.DataFrame, family: str) -> pd.Series: """Calcule le score de correspondance [0.0, 1.0] pour une famille navigateur. Score = somme pondérée des 7 dimensions. + Quand has_xff=1 (CDN/proxy), les dimensions H2 sont neutralisées à 0.5 + et leur poids (0.70) est redistribué vers HTTP headers (+0.35) et TLS (+0.35). """ w = DIMENSION_WEIGHTS - score = ( - _d1_h2_settings(df, family) * w["h2_settings"] - + _d2_h2_window(df, family) * w["h2_window"] - + _d3_pseudo_order(df, family) * w["pseudo_order"] - + _d4_h2_priority(df, family) * w["h2_priority"] - + _d5_http_headers(df, family) * w["http_headers"] - + _d6_tls_structure(df, family) * w["tls_structure"] - + _d7_ja4_dict(df, family) * w["ja4_dict"] + has_xff = _col(df, "has_xff").astype(bool) + + # Dimensions H2 + d_h2_settings = _d1_h2_settings(df, family) + d_h2_window = _d2_h2_window(df, family) + d_pseudo = _d3_pseudo_order(df, family) + d_priority = _d4_h2_priority(df, family) + # Dimensions non-H2 + d_headers = _d5_http_headers(df, family) + d_tls = _d6_tls_structure(df, family) + d_ja4 = _d7_ja4_dict(df, family) + + # Neutraliser les dimensions H2 à 0.5 derrière CDN (le H2 observé est celui du proxy) + h2_weight_total = w["h2_settings"] + w["h2_window"] + w["pseudo_order"] + w["h2_priority"] + # Redistribuer : chaque dimension non-H2 reçoit une part proportionnelle + # au poids H2 redistribué (0.35 vers headers, 0.35 vers TLS) + http_bonus = h2_weight_total / 2 # 0.35 + tls_bonus = h2_weight_total / 2 # 0.35 + + # Score avec poids normaux (pas CDN) + score_normal = ( + d_h2_settings * w["h2_settings"] + + d_h2_window * w["h2_window"] + + d_pseudo * w["pseudo_order"] + + d_priority * w["h2_priority"] + + d_headers * w["http_headers"] + + d_tls * w["tls_structure"] + + d_ja4 * w["ja4_dict"] + ) + + # Score avec poids redistribués (CDN : H2 neutralisé à 0.5) + score_cdn = ( + 0.5 * h2_weight_total # H2 dimensions neutralisées + + d_headers * (w["http_headers"] + http_bonus) # 0.15 + 0.35 = 0.50 + + d_tls * (w["tls_structure"] + tls_bonus) # 0.10 + 0.35 = 0.45 + + d_ja4 * w["ja4_dict"] # 0.05 (inchangé) + ) + + score = pd.Series( + np.where(has_xff, score_cdn, score_normal), + index=df.index, ) return score.clip(0.0, 1.0) @@ -414,6 +449,12 @@ def run_browser_matcher(df: pd.DataFrame) -> pd.DataFrame: df["bm_non_browser"] = non_browser_mask df["bm_decision"] = decision + # Scores par famille pour le vecteur ML (§3.9.4) + for family in BROWSER_SIGNATURES: + df[f"browser_match_{family.lower()}"] = scores[family].round(4) + df["browser_match_max"] = adjusted_score.round(4) + df["browser_family_detected"] = df["bm_family"] + return df diff --git a/services/bot-detector/bot_detector/browser_signatures.py b/services/bot-detector/bot_detector/browser_signatures.py index 3b998ee..548077f 100644 --- a/services/bot-detector/bot_detector/browser_signatures.py +++ b/services/bot-detector/bot_detector/browser_signatures.py @@ -103,3 +103,63 @@ DIMENSION_WEIGHTS: dict = { "tls_structure": 0.10, "ja4_dict": 0.05, } + +# Timestamp du dernier rechargement des signatures depuis ClickHouse. +_last_signature_reload: float = 0.0 +_SIGNATURE_RELOAD_INTERVAL: float = 86400.0 # 24 heures + + +def reload_signatures_from_clickhouse(client) -> bool: + """§3.9.5 : Recharge les signatures H2 depuis ja4_processing.browser_h2_signatures. + + Fusionne les signatures dynamiques (ClickHouse) avec les signatures statiques. + Les signatures dynamiques sont ajoutées ou remplacent les existantes par famille. + Appelé une fois par cycle, mais n'effectue le rechargement que toutes les 24h. + """ + import json + import time as _time + + global _last_signature_reload + + now = _time.time() + if now - _last_signature_reload < _SIGNATURE_RELOAD_INTERVAL: + return False + + try: + df = client.query_df( + "SELECT * FROM ja4_processing.browser_h2_signatures WHERE is_active = 1" + ) + if df is None or df.empty: + return False + + loaded = 0 + for _, row in df.iterrows(): + family = str(row.get('family', '')) + if not family: + continue + try: + settings = json.loads(str(row.get('h2_settings_json', '{}'))) + forbidden = json.loads(str(row.get('h2_settings_forbidden', '[]'))) + tls = json.loads(str(row.get('tls_json', '{}'))) + headers_req = json.loads(str(row.get('headers_required', '[]'))) + headers_forbid = json.loads(str(row.get('headers_forbidden', '[]'))) + except (json.JSONDecodeError, TypeError): + continue + + BROWSER_SIGNATURES[family] = { + "h2_settings_exact": {int(k): int(v) for k, v in settings.items()}, + "h2_settings_forbidden_keys": [int(x) for x in forbidden], + "h2_window_update": int(row.get('h2_window_update', 0)), + "h2_window_update_tolerance": int(row.get('h2_window_update_tolerance', 1000)), + "h2_priority_frames_expected": bool(row.get('h2_priority_expected', 0)), + "pseudo_header_order": str(row.get('pseudo_header_order', '')), + "tls": tls, + "headers_required": headers_req, + "headers_forbidden": headers_forbid, + } + loaded += 1 + + _last_signature_reload = now + return loaded > 0 + except Exception: + return False diff --git a/services/bot-detector/bot_detector/cycle.py b/services/bot-detector/bot_detector/cycle.py index 6ab5a9a..24ea8bd 100644 --- a/services/bot-detector/bot_detector/cycle.py +++ b/services/bot-detector/bot_detector/cycle.py @@ -18,6 +18,7 @@ from .infra import get_client, set_healthy from .preprocessing import preprocess_df, FEATURES, FEATURES_COMPLET from .pipeline import run_semi_supervised_logic from .fleet import enrich_with_fleet_score +from .browser_signatures import reload_signatures_from_clickhouse from .metrics import record_cycle_metrics @@ -120,6 +121,13 @@ def fetch_and_analyze(): client = get_client() + # §3.9.5 — Rechargement périodique des signatures H2 depuis ClickHouse + try: + if reload_signatures_from_clickhouse(client): + log_info('[Signatures] Signatures H2 rechargées depuis browser_h2_signatures.') + except Exception: + pass + # ── Récupération du trafic (fenêtre 1h) ────────────────────────────────── try: df = client.query_df(f'SELECT * FROM {DB}.view_ai_features_1h') @@ -171,6 +179,43 @@ def fetch_and_analyze(): except Exception as e: log_info(f'[Fleet §5] Enrichissement de flotte échoué : {e}') + # §3.9.5 — Queue unknown_h2_fingerprints : sessions H2 inconnues mais navigateur-like + try: + bm_col = 'bm_score' if 'bm_score' in df.columns else None + bc_col = 'browser_confidence' if 'browser_confidence' in df.columns else None + h2_col = 'h2_settings_known' if 'h2_settings_known' in df.columns else None + tls_col = 'tls_version' if 'tls_version' in df.columns else None + + if bm_col and h2_col: + # Conditions : H2 inconnu + comportement navigateur + TLS 1.3 + unknown_h2_mask = ( + (df[h2_col] == 0) # H2 SETTINGS inconnu + & ( + (df[bm_col] < 0.45) # browser_matcher ne reconnaît pas + | (bc_col and df[bc_col] >= 0.55) # mais browser_confidence élevé + ) + ) + if tls_col: + unknown_h2_mask = unknown_h2_mask & (df[tls_col].astype(str).str.startswith('TLSv1.3')) + + unknown_h2 = df[unknown_h2_mask] + if not unknown_h2.empty: + n_unknown = len(unknown_h2) + # Insérer les fingerprints inconnus dans la table ClickHouse + client.command( + "INSERT INTO ja4_processing.unknown_h2_fingerprints " + "(observed_at, src_ip, ja4, h2_fingerprint, h2_settings_fp, " + "h2_window_update, h2_pseudo_order, h2_has_priority, " + "browser_confidence_score, header_user_agent, tls_version) " + "SELECT now(), src_ip, ja4, h2_fingerprint, h2_settings_fp, " + "h2_window_update, h2_pseudo_order, h2_has_priority, " + "browser_confidence, header_user_agent, tls_version " + "FROM input" + ) + log_info(f'[H2 Queue] {n_unknown} fingerprint(s) H2 inconnu(s) mis en file d\'examen.') + except Exception as e: + log_info(f'[H2 Queue] Erreur insertion unknown_h2_fingerprints : {e}') + # ── Résumé des données chargées ─────────────────────────────────────────── n_total = len(df) n_correlated = int((df.get('correlated', pd.Series()) == 1).sum()) diff --git a/services/bot-detector/bot_detector/pipeline.py b/services/bot-detector/bot_detector/pipeline.py index 8b33ede..3d44873 100644 --- a/services/bot-detector/bot_detector/pipeline.py +++ b/services/bot-detector/bot_detector/pipeline.py @@ -140,6 +140,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map): # XGBoost supervisé — troisième voix (si labels historiques disponibles) unknown_traffic['xgb_prob'] = 0.0 + xgb_model_ref = None # Référence pour SHAP TreeExplainer (§2.4.5) if XGB_AVAILABLE and XGB_WEIGHT > 0: try: xgb_client = get_client() @@ -150,6 +151,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map): X_xgb = unknown_traffic[xgb_cols].replace([np.inf, -np.inf], np.nan).fillna(0) xgb_probs = xgb_model.predict_proba(X_xgb.values)[:, 1] unknown_traffic['xgb_prob'] = xgb_probs + xgb_model_ref = xgb_model log_info(f"[{name}] XGBoost : xgb_mean={xgb_probs.mean():.4f}") except Exception as exc: log_info(f"[{name}] XGBoost scoring échoué : {exc} — EIF+AE seuls.") @@ -187,9 +189,9 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map): except Exception as exc: log_info(f"[{name}] MetaLearner entraînement échoué : {exc}") - # §7 — ExIFFI : importance de features pour l'EIF (quand SHAP désactivé) + # §7 — ExIFFI : importance de features pour l'EIF (toujours actif en complément de SHAP) exiffi_tops: list = [{}] * len(unknown_traffic) - if not ENABLE_SHAP and len(unknown_traffic) > 0: + if len(unknown_traffic) > 0: try: exiffi_tops = compute_exiffi_importance(model, X_test, scoring_features) except Exception: @@ -376,9 +378,10 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map): log_info(f"[{name}] ALERT: {len(anomalies)} anomalies détectées (seuil={effective_threshold:.4f}).") anomalies['recurrence'] = anomalies['src_ip'].map(recurrence_map).fillna(0).astype(int) + 1 - # A4 — Explainabilité SHAP : top features responsables de chaque anomalie + # A4 — Explainabilité SHAP : TreeExplainer sur XGBoost si dispo, sinon EIF X_anomalies = X_test.loc[anomalies.index] - shap_tops = compute_shap_top_features(model, X_anomalies, valid_features) + shap_tops = compute_shap_top_features(model, X_anomalies, valid_features, + xgb_model=xgb_model_ref) # §7 — ExIFFI : utiliser les tops ExIFFI précalculés quand SHAP est inactif # Construire un mapping index → exiffi_top pour accès rapide diff --git a/services/bot-detector/bot_detector/preprocessing.py b/services/bot-detector/bot_detector/preprocessing.py index cb158e0..dfc2f9f 100644 --- a/services/bot-detector/bot_detector/preprocessing.py +++ b/services/bot-detector/bot_detector/preprocessing.py @@ -44,6 +44,8 @@ FEATURES = [ 'host_diversity', 'host_sweep_speed', 'host_coverage_uniformity', # §5.8b — Similarité Jaccard cross-domaine (chemins partagés entre hosts) 'cross_domain_path_similarity', + # §5.4 — Resource Dependency Tree (cascade de chargement) + 'root_to_first_asset_delay', 'asset_load_stddev', # P0+P1 : features sous-exploitées (SQL existant ou ajouté) 'is_fake_navigation', 'true_window_size', 'window_mss_ratio', @@ -59,6 +61,9 @@ FEATURES = [ 'h2_order_chromesafari', 'h2_order_firefox', # §3 — Score de cohérence de fingerprint cross-layer 'fingerprint_coherence_score', + # §3.9.4 — Browser matcher scores (passif H2) + 'browser_match_chrome', 'browser_match_firefox', 'browser_match_safari', + 'browser_match_max', ] # Features supplémentaires pour le modèle Complet (données TCP/TLS requises) @@ -103,6 +108,11 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame: # browser_confidence jusqu'à la validation complète. if BROWSER_MATCHER_ENABLED: df = run_browser_matcher(df) + else: + # Colonnes par défaut quand le matcher est désactivé + for col in ['browser_match_chrome', 'browser_match_firefox', 'browser_match_safari', + 'browser_match_max', 'browser_family_detected']: + df[col] = 0.0 if col != 'browser_family_detected' else '' # Rétro-compatibilité df['is_known_browser'] = browser_axes['axis_ja4_known'].astype(int) diff --git a/services/bot-detector/bot_detector/scoring.py b/services/bot-detector/bot_detector/scoring.py index 148e5f4..4d4630e 100644 --- a/services/bot-detector/bot_detector/scoring.py +++ b/services/bot-detector/bot_detector/scoring.py @@ -248,25 +248,48 @@ def normalize_scores(scores: np.ndarray) -> np.ndarray: # ═══════════════════════════════════════════════════════════════════════════════ def compute_shap_top_features(model, X: pd.DataFrame, features: list, - n_top: int = 5) -> list: + n_top: int = 5, xgb_model=None) -> list: """ - Calcule les valeurs SHAP pour chaque ligne de X et retourne les n_top features - les plus contributives (valeur SHAP la plus négative = plus responsable de l'anomalie). - Retourne une liste de dicts {feature: shap_value} par ligne. + Calcule les valeurs SHAP et retourne les n_top features les plus contributives. - Utilise TreeExplainer pour sklearn, et un échantillon Permutation pour isotree. + Stratégie par modèle (conforme à la thèse §2.4.5) : + - XGBoost : TreeExplainer (O(TLD²), exact et efficace) + - EIF (sklearn) : TreeExplainer natif + - EIF (isotree) : PermutationExplainer + + Si xgb_model est fourni, utilise TreeExplainer sur XGBoost en priorité. + Sinon, utilise l'Explainer adapté au modèle EIF. + + Retourne une liste de dicts {feature: shap_value} par ligne. """ if not ENABLE_SHAP or X.empty: return [{}] * len(X) + + # Priorité XGBoost : TreeExplainer est optimal pour les modèles à base d'arbres + if xgb_model is not None: + try: + explainer = _shap.TreeExplainer(xgb_model) + shap_values = explainer.shap_values(X[features].fillna(0)) + if isinstance(shap_values, list): + shap_values = shap_values[1] if len(shap_values) > 1 else shap_values[0] + result = [] + for sv in shap_values: + pairs = sorted(zip(features, sv), key=lambda x: abs(x[1]), reverse=True) + result.append({f: round(float(v), 4) for f, v in pairs[:n_top]}) + return result + except Exception as e: + log_info(f"[SHAP] TreeExplainer XGBoost échoué ({e}), fallback EIF") + + # Fallback EIF try: if EIF_AVAILABLE: sample_size = min(100, len(X)) - X_sample = X.sample(n=sample_size, random_state=42) if len(X) > sample_size else X + X_sample = X[features].sample(n=sample_size, random_state=42) if len(X) > sample_size else X[features] explainer = _shap.Explainer(model.decision_function, X_sample) - shap_values = explainer(X).values + shap_values = explainer(X[features].fillna(0)).values else: explainer = _shap.TreeExplainer(model) - shap_values = explainer.shap_values(X) + shap_values = explainer.shap_values(X[features].fillna(0)) result = [] for sv in shap_values: pairs = sorted(zip(features, sv), key=lambda x: x[1]) diff --git a/services/ja4ebpf/Dockerfile.package b/services/ja4ebpf/Dockerfile.package index cb28d2f..3aceeec 100644 --- a/services/ja4ebpf/Dockerfile.package +++ b/services/ja4ebpf/Dockerfile.package @@ -20,13 +20,17 @@ # ============================================================================= ARG BUILD_VERSION=dev -ARG GO_VERSION=1.24 +ARG GO_VERSION=1.24.3 # ── Stage 1 : compilation Go ────────────────────────────────────────────── FROM rockylinux:9 AS go-builder ARG BUILD_VERSION ARG GO_VERSION + +RUN dnf install -y epel-release dnf-plugins-core && \ + dnf config-manager --enable crb && \ + dnf install -y --allowerasing \ clang llvm libbpf-devel bpftool \ curl tar gzip && \ dnf clean all diff --git a/services/ja4ebpf/bpf/bpf_types.h b/services/ja4ebpf/bpf/bpf_types.h index 10b3110..187478e 100644 --- a/services/ja4ebpf/bpf/bpf_types.h +++ b/services/ja4ebpf/bpf/bpf_types.h @@ -45,13 +45,17 @@ struct tcp_syn_event { /* --------------------------------------------------------------------------- * Événement TLS ClientHello : émis quand un ClientHello TLS est détecté + * + * IMPORTANT : le payload est à l'offset 0 pour que bpf_skb_load_bytes() + * puisse écrire directement au début du map value (compatible kernel 4.18). + * Les métadonnées sont placées APRÈS le payload. * ---------------------------------------------------------------------------*/ struct tls_hello_event { - __u32 src_ip; /* adresse source (host byte order, via bpf_ntohl) */ - __u16 src_port; /* port source (host byte order) */ - __u8 payload[2048]; /* payload ClientHello brut (capturé jusqu'à 2048 octets) */ - __u16 payload_len; /* longueur effective du payload */ - __u64 timestamp_ns; /* horodatage kernel */ + __u8 payload[2048]; /* payload ClientHello brut (offset 0) */ + __u32 src_ip; /* adresse source (host byte order) */ + __u16 src_port; /* port source (host byte order) */ + __u16 payload_len; /* longueur effective du payload */ + __u64 timestamp_ns; /* horodatage kernel */ } __attribute__((packed)); /* --------------------------------------------------------------------------- @@ -80,16 +84,14 @@ struct accept_event { } __attribute__((packed)); /* --------------------------------------------------------------------------- - * Événement HTTP en clair : émis pour chaque segment TCP porteur d'un - * payload HTTP (port 80 ou 8080). Un seul segment par requête est capturé - * (le premier, qui contient la request-line et les en-têtes). + * Événement HTTP en clair : payload à l'offset 0 pour compat kernel 4.18. * ---------------------------------------------------------------------------*/ struct http_plain_event { + __u8 payload[4096]; /* payload TCP brut (offset 0) */ __u32 src_ip; /* adresse source (host byte order) */ __u32 dst_ip; /* adresse destination (host byte order) */ __u16 src_port; /* port source (host byte order) */ __u16 dst_port; /* port destination 80 ou 8080 */ - __u8 payload[4096]; /* payload TCP brut (request-line + headers) */ __u16 payload_len; /* longueur effective du payload copié */ __u64 timestamp_ns; /* horodatage kernel */ } __attribute__((packed)); @@ -124,35 +126,65 @@ struct accept_key { * Déclarations des maps eBPF avec annotations BTF * ===========================================================================*/ -/* Ring buffer : événements TCP SYN (16 MB) */ +/* Perf event array : événements TCP SYN (kernel 4.4+) */ struct { - __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 24); -} rb_tcp_syn SEC(".maps"); + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} pb_tcp_syn SEC(".maps"); -/* Ring buffer : événements TLS ClientHello (16 MB) */ +/* Perf event array : événements TLS ClientHello (kernel 4.4+) */ struct { - __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 24); -} rb_tls_hello SEC(".maps"); + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} pb_tls_hello SEC(".maps"); -/* Ring buffer : données SSL déchiffrées (64 MB, plus volumineux) */ +/* Perf event array : données SSL déchiffrées (kernel 4.4+) */ struct { - __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 26); -} rb_ssl_data SEC(".maps"); + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} pb_ssl_data SEC(".maps"); -/* Ring buffer : événements accept4 (4 MB) */ +/* Perf event array : événements accept4 (kernel 4.4+) */ struct { - __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 22); -} rb_accept SEC(".maps"); + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} pb_accept SEC(".maps"); -/* Ring buffer : payload HTTP en clair port 80/8080 (32 MB) */ +/* Perf event array : payload HTTP en clair port 80/8080 (kernel 4.4+) */ struct { - __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 25); -} rb_http_plain SEC(".maps"); + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} pb_http_plain SEC(".maps"); + +/* ── PERCPU_ARRAY temporaires pour les structs > 512o (stack eBPF) ──── */ +/* TLS hello event : 2064 octets, ne tient pas sur la stack */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct tls_hello_event); +} __tls_buf SEC(".maps"); + +/* HTTP plain event : 4118 octets */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct http_plain_event); +} __http_buf SEC(".maps"); + +/* SSL data event : 4131 octets */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct ssl_data_event); +} __ssl_buf SEC(".maps"); /* Hash map : pid_tgid → ssl_read_args (arguments SSL_read entry) */ struct { diff --git a/services/ja4ebpf/bpf/tc_capture.c b/services/ja4ebpf/bpf/tc_capture.c index f93d753..a63fd0b 100644 --- a/services/ja4ebpf/bpf/tc_capture.c +++ b/services/ja4ebpf/bpf/tc_capture.c @@ -1,16 +1,18 @@ /* ============================================================================ - * tc_capture.c — Programme XDP ingress : capture des TCP SYN et TLS ClientHello + * tc_capture.c — Programme TC ingress : capture des TCP SYN, TLS ClientHello + * et HTTP en clair * - * Remplace l'ancienne version TC (SCHED_CLS + TCX) par un hook XDP compatible - * depuis le kernel 4.8. Utilisé en mode XDP_GENERIC sur Rocky Linux 9 (5.14). + * Hook TC ingress (clsact qdisc) compatible kernel 4.1+. + * Émet via bpf_perf_event_output() (kernel 4.4+) pour compatibilité maximale. * - * Conventions vérificateur eBPF : - * - Tous les accès mémoire paquet utilisent de l'arithmétique de pointeur - * directe avec bornes explicites (data / data_end). - * - Les copies de longueur variable utilisent des boucles bornées (sans - * #pragma unroll) : le vérificateur kernel ≥ 5.3 les accepte nativement. - * - Les options TCP sont copiées brutes ; MSS et Window Scale sont extraits - * côté Go (userspace) depuis le tableau tcp_options_raw. + * IMPORTANT : Ce programme n'utilise AUCUN accès direct au paquet (data/data_end). + * Toutes les lectures se font via bpf_skb_load_bytes() (kernel 4.5+) avec des + * tailles constantes, pour compatibilité avec le vérificateur kernel 4.18 qui + * rejette "math between pkt pointer and register with unbounded min value". + * + * Les copies de payload utilisent bpf_skb_load_bytes() avec &= (2^n - 1) + * pour borner la taille per le vérificateur. + * Les structs > 512o utilisent un PERCPU_ARRAY temporaire (stack limit eBPF). * ============================================================================ */ #include "vmlinux.h" @@ -19,219 +21,248 @@ #include #include "bpf_types.h" -/* Constantes Ethernet */ +/* Constantes */ #define ETH_P_IP 0x0800 #define ETH_HLEN 14 - -/* Constantes IP */ #define IPPROTO_TCP 6 #define IP_DF 0x4000 - -/* Constantes TCP */ #define TH_SYN 0x02 #define TH_ACK 0x10 #define TH_FIN 0x01 #define TH_RST 0x04 - -/* Ports */ #define HTTPS_PORT 443 #define HTTP_PORT 80 #define HTTP_ALT_PORT 8080 - -/* TLS */ #define TLS_CONTENT_HANDSHAKE 0x16 #define TLS_MSG_CLIENT_HELLO 0x01 - -/* Tailles maximales des payloads copiés */ #define MAX_TLS_PAYLOAD 2048 #define MAX_HTTP_PAYLOAD 1024 #define MAX_TCP_OPTIONS 40 -/* Structure Ethernet locale (évite d'inclure linux/if_ether.h) */ -struct ethhdr_local { - __u8 h_dest[6]; - __u8 h_source[6]; - __be16 h_proto; -} __attribute__((packed)); +/* Counter map for debug */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 7); + __type(key, __u32); + __type(value, __u64); +} tc_stats SEC(".maps"); + +#define STAT_TOTAL 0 +#define STAT_IPV4 1 +#define STAT_TCP 2 +#define STAT_SYN 3 +#define STAT_SYN_SUBMIT 4 +#define STAT_TLS_SUBMIT 5 +#define STAT_HTTP_SUBMIT 6 /* --------------------------------------------------------------------------- - * capture_xdp — Point d'entrée XDP ingress + * capture_tc — Point d'entrée TC ingress (clsact) * - * Observe chaque paquet ingress en lecture seule (retourne toujours XDP_PASS). - * Émet des événements vers les ring buffers pour TCP SYN, TLS ClientHello - * et les payloads HTTP en clair. + * AUCUN accès direct au paquet. Tout via bpf_skb_load_bytes() + tailles constantes. + * Compatible vérificateur kernel 4.18. * ---------------------------------------------------------------------------*/ -SEC("xdp") -int capture_xdp(struct xdp_md *ctx) +SEC("tc") +int capture_tc(struct __sk_buff *ctx) { - void *data = (void *)(long)ctx->data; - void *data_end = (void *)(long)ctx->data_end; + __u32 key; + __u64 *cnt; + __u32 pkt_len = ctx->len; - /* --- Ethernet --- */ - struct ethhdr_local *eth = data; - if ((void *)(eth + 1) > data_end) - return XDP_PASS; - if (bpf_ntohs(eth->h_proto) != ETH_P_IP) - return XDP_PASS; + key = STAT_TOTAL; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; - /* --- IPv4 --- */ - struct iphdr *ip = data + ETH_HLEN; - if ((void *)(ip + 1) > data_end) - return XDP_PASS; - if (ip->protocol != IPPROTO_TCP) - return XDP_PASS; + /* --- Ethernet : vérifier type IPv4 --- */ + if (pkt_len < ETH_HLEN + 20 + 20) + return TC_ACT_OK; - __u32 ihl = ip->ihl & 0x0F; - if (ihl < 5) - return XDP_PASS; - __u32 ip_hlen = ihl << 2; /* ∈ [20, 60] */ + __be16 h_proto; + bpf_skb_load_bytes(ctx, 12, &h_proto, 2); + if (h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; - __u32 src_ip = ip->saddr; - __u32 dst_ip = ip->daddr; - __u8 ttl = ip->ttl; - __u16 ip_id = bpf_ntohs(ip->id); - __u16 frag_off = bpf_ntohs(ip->frag_off); + /* --- IPv4 : lire le header (20 octets min) --- */ + key = STAT_IPV4; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; + + struct iphdr iph; + bpf_skb_load_bytes(ctx, ETH_HLEN, &iph, sizeof(iph)); + + if (iph.protocol != IPPROTO_TCP) + return TC_ACT_OK; + + __u32 ihl = iph.ihl & 0x0F; + if (ihl < 5 || ihl > 15) + return TC_ACT_OK; + __u32 ip_hlen = ihl << 2; + if (ip_hlen < 20 || ip_hlen > 60) + return TC_ACT_OK; + + __u32 src_ip = iph.saddr; + __u32 dst_ip = iph.daddr; + __u8 ttl = iph.ttl; + __u16 ip_id = bpf_ntohs(iph.id); + __u16 frag_off = bpf_ntohs(iph.frag_off); __u8 df_bit = (frag_off & IP_DF) ? 1 : 0; - /* --- TCP à offset variable --- */ - struct tcphdr *tcp = (void *)ip + ip_hlen; - if ((void *)(tcp + 1) > data_end) /* valide tcp[0..19] */ - return XDP_PASS; + /* --- TCP : lire le header (20 octets) --- */ + __u32 tcp_off = ETH_HLEN + ip_hlen; + if (pkt_len < tcp_off + 20) + return TC_ACT_OK; - __u16 src_port = bpf_ntohs(tcp->source); - __u16 dst_port = bpf_ntohs(tcp->dest); - __u16 window = bpf_ntohs(tcp->window); + key = STAT_TCP; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; + + struct tcphdr tcph; + bpf_skb_load_bytes(ctx, tcp_off, &tcph, sizeof(tcph)); + + __u16 src_port = bpf_ntohs(tcph.source); + __u16 dst_port = bpf_ntohs(tcph.dest); + __u16 window = bpf_ntohs(tcph.window); - /* Flags via les champs de bits du struct (sûr pour le vérificateur) */ __u8 tcp_flags = 0; - if (tcp->syn) tcp_flags |= TH_SYN; - if (tcp->ack) tcp_flags |= TH_ACK; - if (tcp->fin) tcp_flags |= TH_FIN; - if (tcp->rst) tcp_flags |= TH_RST; + if (tcph.syn) tcp_flags |= TH_SYN; + if (tcph.ack) tcp_flags |= TH_ACK; + if (tcph.fin) tcp_flags |= TH_FIN; + if (tcph.rst) tcp_flags |= TH_RST; - __u32 doff = tcp->doff; - if (doff < 5) - return XDP_PASS; - __u32 tcp_hlen = doff << 2; /* ∈ [20, 60] */ + __u32 doff = tcph.doff; + if (doff < 5 || doff > 15) + return TC_ACT_OK; + __u32 tcp_hlen = doff << 2; + if (tcp_hlen < 20 || tcp_hlen > 60) + return TC_ACT_OK; - /* Offset du payload applicatif */ - void *payload = (void *)tcp + tcp_hlen; + __u32 payload_off = ETH_HLEN + ip_hlen + tcp_hlen; /* =================================================================== - * TCP SYN : extraction des paramètres L3/L4 + * TCP SYN * ===================================================================*/ if ((tcp_flags & TH_SYN) && !(tcp_flags & TH_ACK)) { - struct tcp_syn_event *evt = - bpf_ringbuf_reserve(&rb_tcp_syn, sizeof(*evt), 0); - if (!evt) - return XDP_PASS; + key = STAT_SYN; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; - evt->src_ip = bpf_ntohl(src_ip); - evt->dst_ip = bpf_ntohl(dst_ip); - evt->src_port = src_port; - evt->dst_port = dst_port; - evt->ttl = ttl; - evt->df_bit = df_bit; - evt->ip_id = ip_id; - evt->window_size = window; - evt->window_scale = 0xFF; /* défaut = absent */ - evt->mss = 0; - evt->timestamp_ns = bpf_ktime_get_ns(); - evt->tcp_options_len = 0; + struct tcp_syn_event evt = {}; + evt.src_ip = bpf_ntohl(src_ip); + evt.dst_ip = bpf_ntohl(dst_ip); + evt.src_port = src_port; + evt.dst_port = dst_port; + evt.ttl = ttl; + evt.df_bit = df_bit; + evt.ip_id = ip_id; + evt.window_size = window; + evt.window_scale = 0xFF; + evt.mss = 0; + evt.timestamp_ns = bpf_ktime_get_ns(); + evt.tcp_options_len = 0; - /* Copie brute des options TCP (MSS/WS extraits en userspace Go). - * Boucle bornée à MAX_TCP_OPTIONS = 40 itérations : triviale pour - * le vérificateur kernel ≥ 5.3, sans #pragma unroll. */ - __u8 *opts_start = (__u8 *)(tcp + 1); /* après les 20 octets fixes */ - __u32 opts_len = tcp_hlen - 20; /* ∈ [0, 40] */ - if (opts_len > MAX_TCP_OPTIONS) - opts_len = MAX_TCP_OPTIONS; - - if (opts_len > 0) { - #pragma clang loop unroll(disable) - for (__u32 i = 0; i < MAX_TCP_OPTIONS; i++) { - if (i >= opts_len) - break; - if (opts_start + i + 1 > (__u8 *)data_end) - break; - evt->tcp_options_raw[i] = opts_start[i]; - } - evt->tcp_options_len = (__u8)opts_len; + /* Copie des options TCP via bpf_skb_load_bytes avec taille constante. + * On lit MAX_TCP_OPTIONS=40 octets depuis le début des options. + * Si le paquet est trop court, l'appel échoue → options absentes. */ + __u32 opts_off = tcp_off + 20; + __u32 opts_len = tcp_hlen - 20; + if (opts_len > 0 && opts_len <= MAX_TCP_OPTIONS && + opts_off + MAX_TCP_OPTIONS <= pkt_len) { + bpf_skb_load_bytes(ctx, opts_off, evt.tcp_options_raw, MAX_TCP_OPTIONS); + evt.tcp_options_len = (__u8)opts_len; } - bpf_ringbuf_submit(evt, 0); + bpf_perf_event_output(ctx, &pb_tcp_syn, BPF_F_CURRENT_CPU, + &evt, sizeof(evt)); + + key = STAT_SYN_SUBMIT; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; } /* =================================================================== * TLS ClientHello (port 443) * ===================================================================*/ if (dst_port == HTTPS_PORT) { - /* Au moins 6 octets pour l'en-tête TLS record + type message */ - if (payload + 6 > data_end) - return XDP_PASS; + /* Lire les 6 premiers octets du payload pour vérifier le type TLS */ + if (payload_off + 6 > pkt_len) + return TC_ACT_OK; - __u8 tls_type = ((__u8 *)payload)[0]; - __u8 tls_msg_type = ((__u8 *)payload)[5]; - if (tls_type != TLS_CONTENT_HANDSHAKE || tls_msg_type != TLS_MSG_CLIENT_HELLO) - return XDP_PASS; + __u8 tls_hdr[6]; + bpf_skb_load_bytes(ctx, payload_off, tls_hdr, 6); - __u32 avail = (__u8 *)data_end - (__u8 *)payload; - /* avail ≥ 6 (vérifié ci-dessus), on plafonne à MAX_TLS_PAYLOAD */ - if (avail > MAX_TLS_PAYLOAD) - avail = MAX_TLS_PAYLOAD; - /* Barrière compilateur : coupe le lien CSE entre avail et (data_end - payload). - * Sans cette barrière, clang génère un test "PTR_TO_PACKET <<= 32" (compare - * data_end == payload pour l'entrée de boucle) que le vérificateur eBPF rejette. - * La barrière force une comparaison scalaire (avail == 0) à la place. */ - asm volatile("" : "+r"(avail)); + if (tls_hdr[0] != TLS_CONTENT_HANDSHAKE || tls_hdr[5] != TLS_MSG_CLIENT_HELLO) + return TC_ACT_OK; - struct tls_hello_event *tls_evt = - bpf_ringbuf_reserve(&rb_tls_hello, sizeof(*tls_evt), 0); + /* Avail via pkt_len (scalaire pur) */ + __u32 avail = 0; + if (pkt_len > payload_off) { + avail = pkt_len - payload_off; + if (avail > MAX_TLS_PAYLOAD) + avail = MAX_TLS_PAYLOAD; + } + if (avail == 0) + return TC_ACT_OK; + + __u32 zero = 0; + struct tls_hello_event *tls_evt = bpf_map_lookup_elem(&__tls_buf, &zero); if (!tls_evt) - return XDP_PASS; + return TC_ACT_OK; + + tls_evt->src_ip = 0; + tls_evt->src_port = 0; + tls_evt->payload_len = 0; + tls_evt->timestamp_ns = 0; tls_evt->src_ip = bpf_ntohl(src_ip); tls_evt->src_port = src_port; tls_evt->timestamp_ns = bpf_ktime_get_ns(); tls_evt->payload_len = (__u16)avail; - /* Copie bornée du payload TLS. - * Pour tout i < avail : payload + i < payload + avail ≤ data_end. - * Le vérificateur kernel ≥ 5.3 peut vérifier cette boucle sans unroll. */ - __u8 *src = (__u8 *)payload; - #pragma clang loop unroll(disable) - for (__u32 i = 0; i < MAX_TLS_PAYLOAD; i++) { - if (i >= avail) - break; - if (src + i + 1 > (__u8 *)data_end) - break; - tls_evt->payload[i] = src[i]; - } + /* Copie via bpf_skb_load_bytes avec taille constante 256. + * Kernel 4.18 ne supporte pas les tailles variables vers map values. + * 256 octets capture le ClientHello dans la majorité des cas. */ + if (bpf_skb_load_bytes(ctx, payload_off, tls_evt, 256)) + return TC_ACT_OK; - bpf_ringbuf_submit(tls_evt, 0); - return XDP_PASS; + bpf_perf_event_output(ctx, &pb_tls_hello, BPF_F_CURRENT_CPU, + tls_evt, sizeof(*tls_evt)); + + key = STAT_TLS_SUBMIT; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; + + return TC_ACT_OK; } /* =================================================================== * HTTP en clair (port 80 / 8080) * ===================================================================*/ if (dst_port == HTTP_PORT || dst_port == HTTP_ALT_PORT) { - /* Ignorer SYN, FIN, RST : seuls les segments de données */ if (tcp_flags & (TH_SYN | TH_FIN | TH_RST)) - return XDP_PASS; - if (payload >= data_end) - return XDP_PASS; + return TC_ACT_OK; + if (payload_off >= pkt_len) + return TC_ACT_OK; - __u32 avail = (__u8 *)data_end - (__u8 *)payload; - if (avail > MAX_HTTP_PAYLOAD) - avail = MAX_HTTP_PAYLOAD; - /* Même barrière que pour la section TLS : force comparaison scalaire. */ - asm volatile("" : "+r"(avail)); + /* Avail via pkt_len (scalaire pur) */ + __u32 avail = 0; + if (pkt_len > payload_off) { + avail = pkt_len - payload_off; + if (avail > MAX_HTTP_PAYLOAD) + avail = MAX_HTTP_PAYLOAD; + } + if (avail == 0) + return TC_ACT_OK; - struct http_plain_event *h_evt = - bpf_ringbuf_reserve(&rb_http_plain, sizeof(*h_evt), 0); + __u32 zero = 0; + struct http_plain_event *h_evt = bpf_map_lookup_elem(&__http_buf, &zero); if (!h_evt) - return XDP_PASS; + return TC_ACT_OK; + + h_evt->src_ip = 0; + h_evt->dst_ip = 0; + h_evt->src_port = 0; + h_evt->dst_port = 0; + h_evt->payload_len = 0; + h_evt->timestamp_ns = 0; h_evt->src_ip = bpf_ntohl(src_ip); h_evt->dst_ip = bpf_ntohl(dst_ip); @@ -240,21 +271,19 @@ int capture_xdp(struct xdp_md *ctx) h_evt->timestamp_ns = bpf_ktime_get_ns(); h_evt->payload_len = (__u16)avail; - __u8 *src = (__u8 *)payload; - #pragma clang loop unroll(disable) - for (__u32 i = 0; i < MAX_HTTP_PAYLOAD; i++) { - if (i >= avail) - break; - if (src + i + 1 > (__u8 *)data_end) - break; - h_evt->payload[i] = src[i]; - } + /* Taille constante 256 pour compatibilité vérificateur kernel 4.18 */ + if (bpf_skb_load_bytes(ctx, payload_off, h_evt, 256)) + return TC_ACT_OK; - bpf_ringbuf_submit(h_evt, 0); + bpf_perf_event_output(ctx, &pb_http_plain, BPF_F_CURRENT_CPU, + h_evt, sizeof(*h_evt)); + + key = STAT_HTTP_SUBMIT; + cnt = bpf_map_lookup_elem(&tc_stats, &key); + if (cnt) (*cnt)++; } - return XDP_PASS; + return TC_ACT_OK; } char LICENSE[] SEC("license") = "GPL"; - diff --git a/services/ja4ebpf/bpf/uprobe_ssl.c b/services/ja4ebpf/bpf/uprobe_ssl.c index f53dd80..5f070b9 100644 --- a/services/ja4ebpf/bpf/uprobe_ssl.c +++ b/services/ja4ebpf/bpf/uprobe_ssl.c @@ -4,6 +4,9 @@ * et corrige l'association socket ↔ SSL* via les tracepoints syscalls/accept4. * Les tracepoints sont plus stables que les kprobes car ils ne dépendent pas * du nom manglé __x64_sys_accept4 (variable selon la version du kernel). + * + * Utilise bpf_perf_event_output() (kernel 4.4+) pour compatibilité maximale. + * Les structs > 512o utilisent un PERCPU_ARRAY temporaire (__ssl_buf). * ============================================================================ */ #include "vmlinux.h" @@ -105,7 +108,8 @@ int uprobe_ssl_read_entry(struct pt_regs *ctx) /* =========================================================================== * uretprobe_ssl_read_exit — Retour de SSL_read * - * Lit le buffer déchiffré et l'émet dans rb_ssl_data. + * Lit le buffer déchiffré et l'émet via perf_event_output. + * Struct ssl_data_event = 4131 octets → PERCPU_ARRAY temporaire (__ssl_buf). * ===========================================================================*/ SEC("uretprobe/SSL_read") int uretprobe_ssl_read_exit(struct pt_regs *ctx) @@ -124,12 +128,21 @@ int uretprobe_ssl_read_exit(struct pt_regs *ctx) return 0; } - /* Allouer un slot dans le ring buffer */ - struct ssl_data_event *evt = bpf_ringbuf_reserve(&rb_ssl_data, sizeof(*evt), 0); + /* Utiliser le buffer PERCPU (struct trop grande pour la stack) */ + __u32 zero = 0; + struct ssl_data_event *evt = bpf_map_lookup_elem(&__ssl_buf, &zero); if (!evt) { bpf_map_delete_elem(&ssl_args_map, &pid_tgid); return 0; } + /* Initialiser les champs fixes (data sera écrasé par probe_read_user) */ + evt->pid_tgid = 0; + evt->fd = 0; + evt->src_ip = 0; + evt->src_port = 0; + evt->data_len = 0; + evt->timestamp_ns = 0; + evt->direction = 0; evt->pid_tgid = pid_tgid; evt->direction = 0; /* lecture = client vers serveur */ @@ -154,7 +167,8 @@ int uretprobe_ssl_read_exit(struct pt_regs *ctx) evt->src_port = 0; } - bpf_ringbuf_submit(evt, 0); + bpf_perf_event_output(ctx, &pb_ssl_data, BPF_F_CURRENT_CPU, + evt, sizeof(*evt)); bpf_map_delete_elem(&ssl_args_map, &pid_tgid); return 0; @@ -181,7 +195,8 @@ int kprobe_accept4_entry(struct sys_enter_accept4_ctx *ctx) * kretprobe_accept4_exit — Retour de accept4 via tracepoint syscalls * * Lit la sockaddr_in pour extraire src_ip:src_port du client, - * peuple accept_map et fd_conn_map, et émet dans rb_accept. + * peuple accept_map et fd_conn_map, et émet via perf_event_output. + * Struct accept_event = 26 octets → tient sur la stack (< 512o). * ===========================================================================*/ SEC("tracepoint/syscalls/sys_exit_accept4") int kretprobe_accept4_exit(struct sys_exit_accept4_ctx *ctx) @@ -238,21 +253,11 @@ int kretprobe_accept4_exit(struct sys_exit_accept4_ctx *ctx) }; bpf_map_update_elem(&fd_conn_map, &fd, &conn_info, BPF_ANY); - /* Émettre dans rb_accept */ - struct accept_event *out = bpf_ringbuf_reserve(&rb_accept, sizeof(*out), 0); - if (!out) - return 0; - - out->pid_tgid = pid_tgid; - out->fd = fd; - out->src_ip = src_ip; - out->src_port = src_port; - out->timestamp_ns = aevt.timestamp_ns; - - bpf_ringbuf_submit(out, 0); + /* Émettre via perf_event_output (struct 26o → sur la stack) */ + bpf_perf_event_output(ctx, &pb_accept, BPF_F_CURRENT_CPU, + &aevt, sizeof(aevt)); return 0; } char LICENSE[] SEC("license") = "GPL"; - diff --git a/services/ja4ebpf/cmd/ja4ebpf/main.go b/services/ja4ebpf/cmd/ja4ebpf/main.go index f99e8ea..0350e06 100644 --- a/services/ja4ebpf/cmd/ja4ebpf/main.go +++ b/services/ja4ebpf/cmd/ja4ebpf/main.go @@ -10,6 +10,8 @@ import ( "log" "os" "os/signal" + "strings" + "sync/atomic" "syscall" "time" @@ -18,7 +20,7 @@ import ( "github.com/antitbone/ja4/ja4ebpf/internal/parser" "github.com/antitbone/ja4/ja4ebpf/internal/procutil" "github.com/antitbone/ja4/ja4ebpf/internal/writer" - "github.com/cilium/ebpf/ringbuf" + "github.com/cilium/ebpf/perf" "gopkg.in/yaml.v3" ) @@ -32,6 +34,7 @@ var fdCache = procutil.NewFDCache(5 * time.Second) type Config struct { Interface string `yaml:"interface"` // interface réseau à surveiller (ex: "eth0") SSLLibPath string `yaml:"ssl_lib_path"` // chemin vers libssl (ex: "/usr/lib64/libssl.so.3") + Debug bool `yaml:"debug"` // mode debug : dump compteurs BPF, log verbeux, ClickHouse optionnel ClickHouse struct { DSN string `yaml:"dsn"` // DSN ClickHouse natif @@ -87,6 +90,9 @@ func loadConfig(path string) (*Config, error) { if v := os.Getenv("JA4EBPF_CLICKHOUSE_DSN"); v != "" { cfg.ClickHouse.DSN = v } + if v := os.Getenv("JA4EBPF_DEBUG"); v != "" { + cfg.Debug = strings.EqualFold(v, "true") || v == "1" || v == "yes" + } return cfg, nil } @@ -104,7 +110,10 @@ func main() { log.Fatalf("erreur chargement configuration: %v", err) } - log.Printf("[ja4ebpf] démarrage — interface=%s ssl=%s", cfg.Interface, cfg.SSLLibPath) + if cfg.Debug { + log.Printf("[ja4ebpf] MODE DEBUG ACTIVÉ") + } + log.Printf("[ja4ebpf] démarrage — interface=%s ssl=%s debug=%v", cfg.Interface, cfg.SSLLibPath, cfg.Debug) // Contexte principal avec annulation sur signal système ctx, cancel := context.WithCancel(context.Background()) @@ -122,9 +131,11 @@ func main() { defer ldr.Close() // --- 2. Attachement TC ingress --- + log.Printf("[ja4ebpf] attachement TC ingress sur %s...", cfg.Interface) if err := ldr.AttachTC(cfg.Interface); err != nil { log.Fatalf("erreur attachement TC sur %s: %v", cfg.Interface, err) } + log.Printf("[ja4ebpf] TC ingress attaché sur %s", cfg.Interface) // --- 3. Attachement uprobes SSL --- if err := ldr.AttachUprobes(cfg.SSLLibPath); err != nil { @@ -144,26 +155,46 @@ func main() { defer mgr.Close() // --- 6. Writer ClickHouse --- + var w *writer.ClickHouseWriter flushInterval := time.Duration(cfg.ClickHouse.FlushSecs) * time.Second - w, err := writer.NewClickHouseWriter(cfg.ClickHouse.DSN, cfg.ClickHouse.BatchSize, flushInterval) + w, err = writer.NewClickHouseWriter(cfg.ClickHouse.DSN, cfg.ClickHouse.BatchSize, flushInterval) if err != nil { - log.Fatalf("erreur initialisation writer ClickHouse: %v", err) + if cfg.Debug { + log.Printf("[ja4ebpf] DEBUG: writer ClickHouse non disponible: %v (continue sans CH)", err) + } else { + log.Fatalf("erreur initialisation writer ClickHouse: %v", err) + } + } + if w != nil { + w.Start(ctx) } - w.Start(ctx) // --- 7. Goroutine : écriture des sessions prêtes --- go func() { for s := range mgr.ReadyCh { - w.Write(s) + if w != nil { + w.Write(s) + } else if cfg.Debug { + log.Printf("[ja4ebpf] DEBUG: session prête (sans CH): has_l3l4=%v has_tls=%v", + s.L3L4 != nil, s.TLS != nil) + } } }() - // --- 8. Goroutines de consommation des ring buffers --- - go consumeSynEvents(ctx, ldr.SynReader, mgr) - go consumeTLSEvents(ctx, ldr.TLSReader, mgr) - go consumeSSLEvents(ctx, ldr.SSLReader, mgr) - go consumeAcceptEvents(ctx, ldr.AcceptReader, mgr) - go consumeHTTPPlainEvents(ctx, ldr.HTTPPlainReader, mgr) + // --- 8. Compteurs d'événements consommés (mode debug) --- + consumed := &eventCounters{} + + // --- 9. Goroutines de consommation des ring buffers --- + go consumeSynEvents(ctx, ldr.SynReader, mgr, &consumed.syn) + go consumeTLSEvents(ctx, ldr.TLSReader, mgr, &consumed.tls) + go consumeSSLEvents(ctx, ldr.SSLReader, mgr, &consumed.ssl) + go consumeAcceptEvents(ctx, ldr.AcceptReader, mgr, &consumed.accept) + go consumeHTTPPlainEvents(ctx, ldr.HTTPPlainReader, mgr, &consumed.httpPlain) + + // --- 10. Stats dumper (mode debug) --- + if cfg.Debug { + go debugStatsDumper(ctx, ldr, consumed) + } log.Printf("[ja4ebpf] démon actif — en attente des événements") @@ -178,6 +209,43 @@ func main() { log.Printf("[ja4ebpf] arrêt terminé") } +// eventCounters contient les compteurs atomiques pour chaque type d'événement consommé. +type eventCounters struct { + syn atomic.Uint64 + tls atomic.Uint64 + ssl atomic.Uint64 + accept atomic.Uint64 + httpPlain atomic.Uint64 +} + +// debugStatsDumper affiche les compteurs BPF et les événements consommés toutes les 5 secondes. +func debugStatsDumper(ctx context.Context, ldr *loader.Loader, consumed *eventCounters) { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + + // Compteurs BPF kernel + stats, err := ldr.ReadStats() + if err != nil { + log.Printf("[debug] erreur lecture tc_stats: %v", err) + continue + } + log.Printf("[debug] BPF: TOTAL=%d IPV4=%d TCP=%d SYN=%d SYN_SUB=%d TLS_SUB=%d HTTP_SUB=%d", + stats[0], stats[1], stats[2], stats[3], stats[4], stats[5], stats[6]) + + // Compteurs userspace + log.Printf("[debug] GO: syn=%d tls=%d ssl=%d accept=%d http=%d", + consumed.syn.Load(), consumed.tls.Load(), consumed.ssl.Load(), + consumed.accept.Load(), consumed.httpPlain.Load()) + } +} + // parseTCPOptions extrait le MSS et le Window Scale depuis les options TCP brutes. // Les options TCP suivent le format TLV (Type-Length-Value), sauf les options 0 et 1. // Retourne (mss=0, windowScale=0xFF) si les options sont absentes ou mal formées. @@ -220,7 +288,7 @@ func parseTCPOptions(opts []byte) (mss uint16, windowScale uint8) { // consumeSynEvents lit les événements TCP SYN depuis le ring buffer // et met à jour l'état L3/L4 des sessions. -func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) { +func consumeSynEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) { for { select { case <-ctx.Done(): @@ -230,7 +298,7 @@ func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. record, err := rd.Read() if err != nil { - if err == ringbuf.ErrClosed { + if err == os.ErrClosed { return } continue @@ -240,7 +308,7 @@ func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. // src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+ttl(1)+df_bit(1)+ip_id(2)+ // window_size(2)+window_scale(1)+mss(2)+tcp_options_raw[40]+tcp_options_len(1)+timestamp_ns(8) // offsets: 0 4 8 10 12 13 14 16 18 19 21 61 62 - if len(record.RawSample) < 62 { + if len(record.RawSample) < 70 { continue } data := record.RawSample @@ -288,12 +356,13 @@ func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. _ = s.TLS // corrélation implicite par présence des deux champs } }) + counter.Add(1) } } // consumeTLSEvents lit les événements TLS ClientHello depuis le ring buffer // et calcule l'empreinte JA4 pour chaque session. -func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) { +func consumeTLSEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) { for { select { case <-ctx.Done(): @@ -303,7 +372,7 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. record, err := rd.Read() if err != nil { - if err == ringbuf.ErrClosed { + if err == os.ErrClosed { return } continue @@ -312,20 +381,20 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. // struct tls_hello_event (packed): // src_ip(4) + src_port(2) + payload[2048] + payload_len(2) + timestamp_ns(8) // offsets: 0 4 6 2054 2056 - if len(record.RawSample) < 2056 { + if len(record.RawSample) < 2064 { continue } data := record.RawSample - srcIPRaw := binary.LittleEndian.Uint32(data[0:4]) - srcPort := binary.LittleEndian.Uint16(data[4:6]) + srcIPRaw := binary.LittleEndian.Uint32(data[2048:2052]) + srcPort := binary.LittleEndian.Uint16(data[2052:2054]) payloadLen := binary.LittleEndian.Uint16(data[2054:2056]) if int(payloadLen) > 2048 { payloadLen = 2048 } payload := make([]byte, payloadLen) - copy(payload, data[6:6+payloadLen]) + copy(payload, data[0:payloadLen]) var key correlation.SessionKey key.SrcIP[0] = byte(srcIPRaw >> 24) @@ -366,13 +435,14 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. _ = s.L3L4 // corrélation implicite par présence des deux champs } }) + counter.Add(1) } } // consumeSSLEvents lit les données SSL déchiffrées depuis le ring buffer. // Parse les requêtes HTTP/1.x et détecte le préambule HTTP/2. // Quand src_ip=0 (accept4 non disponible), tente un lookup /proc pour retrouver l'IP du client. -func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) { +func consumeSSLEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) { for { select { case <-ctx.Done(): @@ -382,7 +452,7 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. record, err := rd.Read() if err != nil { - if err == ringbuf.ErrClosed { + if err == os.ErrClosed { return } continue @@ -439,6 +509,7 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. key.SrcIP[3] = byte(srcIPRaw) key.SrcPort = srcPort + counter.Add(1) // === Routeur Magic Bytes === if parser.DetectH2Preface(sslData) { @@ -517,7 +588,7 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. // consumeAcceptEvents lit les événements accept4 depuis le ring buffer. // Met à jour les sessions avec les informations de connexion client. -func consumeAcceptEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) { +func consumeAcceptEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) { for { select { case <-ctx.Done(): @@ -527,7 +598,7 @@ func consumeAcceptEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlati record, err := rd.Read() if err != nil { - if err == ringbuf.ErrClosed { + if err == os.ErrClosed { return } continue @@ -556,13 +627,14 @@ func consumeAcceptEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlati // S'assurer que la session existe mgr.GetOrCreate(key) + counter.Add(1) } } -// consumeHTTPPlainEvents lit les payloads HTTP en clair depuis le ring buffer XDP. +// consumeHTTPPlainEvents lit les payloads HTTP en clair depuis le perf buffer TC. // Parse la requête HTTP/1.x ou détecte la préface HTTP/2 pour les connexions // non-chiffrées sur les ports 80/8080. -func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) { +func consumeHTTPPlainEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) { for { select { case <-ctx.Done(): @@ -572,21 +644,21 @@ func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correl record, err := rd.Read() if err != nil { - if err == ringbuf.ErrClosed { + if err == os.ErrClosed { return } continue } data := record.RawSample - // struct http_plain_event: src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+payload(4096)+payload_len(2)+timestamp_ns(8) + // struct http_plain_event: payload(4096)+src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+payload_len(2)+timestamp_ns(8) if len(data) < 14 { continue } // src_ip et src_port en host byte order (bpf_ntohl appliqué dans tc_capture.c) - srcIPRaw := binary.LittleEndian.Uint32(data[0:4]) - srcPort := binary.LittleEndian.Uint16(data[8:10]) + srcIPRaw := binary.LittleEndian.Uint32(data[4096:4100]) + srcPort := binary.LittleEndian.Uint16(data[4104:4106]) if srcIPRaw == 0 && srcPort == 0 { continue @@ -610,10 +682,10 @@ func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correl if payloadLen == 0 { continue } - if 12+payloadLen > len(data) { - payloadLen = len(data) - 12 + if 4096+payloadLen > len(data) { + payloadLen = len(data) - 4096 } - httpData := data[12 : 12+payloadLen] + httpData := data[0:payloadLen] // Routeur Magic Bytes : HTTP/1.x uniquement sur port 80 if parser.IsHTTP1Request(httpData) { @@ -633,6 +705,7 @@ func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correl // Corréler si L3/L4 est déjà présent (TCP SYN capturé) _ = s.L3L4 // corrélation implicite }) + counter.Add(1) } } } diff --git a/services/ja4ebpf/config.yml.example b/services/ja4ebpf/config.yml.example index 86831bc..59cdd4c 100644 --- a/services/ja4ebpf/config.yml.example +++ b/services/ja4ebpf/config.yml.example @@ -1,33 +1,26 @@ # Configuration de l'agent ja4ebpf # Copiez ce fichier en config.yml et adaptez les valeurs. -# Interface réseau à surveiller (hook TC ingress) +# Interface réseau à surveiller (XDP ingress) interface: eth0 -# Processus à instrumenter via uprobes SSL -ssl_probes: - - executable: /usr/sbin/httpd - symbol: SSL_read - - executable: /usr/lib64/libssl.so.3 - symbol: SSL_read +# Chemin vers libssl pour les uprobes SSL_read/SSL_write +ssl_lib_path: "/usr/lib64/libssl.so.3" + +# Mode debug : dump compteurs BPF + événements consommés toutes les 5s +# ClickHouse optionnel en mode debug +debug: false # Paramètres de connexion ClickHouse clickhouse: - addr: "127.0.0.1:9000" - database: "ja4_logs" - table: "http_logs_raw" - username: "default" - password: "" - tls: false + dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs" batch_size: 500 - flush_every: "2s" + flush_secs: 1 # Délais de corrélation et de détection -timeouts: - # Durée sans activité avant expiration d'une session TCP - session_expiry: "500ms" - # Délai maximum pour une requête L7 sans réponse (détection Slowloris) - slowloris: "10s" +correlation: + timeout_ms: 500 # expiration session TCP (ms) + slowloris_ms: 10000 # seuil Slowloris (ms) # Journalisation log: diff --git a/services/ja4ebpf/go.mod b/services/ja4ebpf/go.mod index 45c0c77..ff581d2 100644 --- a/services/ja4ebpf/go.mod +++ b/services/ja4ebpf/go.mod @@ -20,9 +20,10 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/segmentio/asm v1.2.0 // indirect github.com/shopspring/decimal v1.3.1 // indirect + github.com/vishvananda/netlink v1.3.1 // indirect + github.com/vishvananda/netns v0.0.5 // indirect go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect golang.org/x/sys v0.20.0 // indirect ) - diff --git a/services/ja4ebpf/go.sum b/services/ja4ebpf/go.sum index 1988efd..09d2cb5 100644 --- a/services/ja4ebpf/go.sum +++ b/services/ja4ebpf/go.sum @@ -65,6 +65,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= +github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= +github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= +github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= @@ -103,6 +107,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/services/ja4ebpf/internal/loader/loader.go b/services/ja4ebpf/internal/loader/loader.go index 7267a7f..62dd242 100644 --- a/services/ja4ebpf/internal/loader/loader.go +++ b/services/ja4ebpf/internal/loader/loader.go @@ -1,48 +1,90 @@ // Package loader initialise les programmes eBPF via cilium/ebpf, -// attache les hooks TC ingress et les uprobes SSL, et expose -// les readers RingBuffer aux consommateurs Go. +// attache le hook TC ingress et les uprobes SSL, et expose +// les readers PerfEvent aux consommateurs Go. package loader import ( "context" - "encoding/binary" "fmt" "net" "os" + "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" - "github.com/cilium/ebpf/ringbuf" + "github.com/cilium/ebpf/perf" "github.com/cilium/ebpf/rlimit" + "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" ) //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -target amd64 -cflags "-O2 -g -Wall -D__TARGET_ARCH_x86 -Wno-pass-failed" Ja4Tc ../../bpf/tc_capture.c -- -I../../bpf/headers //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -target amd64 -cflags "-O2 -g -Wall -D__TARGET_ARCH_x86 -Wno-pass-failed" Ja4Ssl ../../bpf/uprobe_ssl.c -- -I../../bpf/headers +// perCPUBufferSize est la taille du buffer perf per-CPU en octets (256 KB). +const perCPUBufferSize = 256 * 1024 + // Loader encapsule les objets eBPF compilés, les liens vers les hooks, -// et les readers RingBuffer exposés au pipeline de traitement. +// et les readers PerfEvent exposés au pipeline de traitement. type Loader struct { tcObjs *Ja4TcObjects // généré par bpf2go (tc_capture.c) sslObjs *Ja4SslObjects // généré par bpf2go (uprobe_ssl.c) - tcLink link.Link + tcNlLink netlink.Link // interface netlink pour cleanup TC uprobeLinks []link.Link + statsMap *ebpf.Map // map tc_stats pour lecture des compteurs BPF (mode debug) - // SynReader lit les événements TCP SYN depuis rb_tcp_syn. - SynReader *ringbuf.Reader - // TLSReader lit les événements TLS ClientHello depuis rb_tls_hello. - TLSReader *ringbuf.Reader - // SSLReader lit les données SSL déchiffrées depuis rb_ssl_data. - SSLReader *ringbuf.Reader - // AcceptReader lit les événements accept4 depuis rb_accept. - AcceptReader *ringbuf.Reader - // HTTPPlainReader lit les payloads HTTP en clair depuis rb_http_plain. - HTTPPlainReader *ringbuf.Reader + // SynReader lit les événements TCP SYN depuis pb_tcp_syn. + SynReader *perf.Reader + // TLSReader lit les événements TLS ClientHello depuis pb_tls_hello. + TLSReader *perf.Reader + // SSLReader lit les données SSL déchiffrées depuis pb_ssl_data. + SSLReader *perf.Reader + // AcceptReader lit les événements accept4 depuis pb_accept. + AcceptReader *perf.Reader + // HTTPPlainReader lit les payloads HTTP en clair depuis pb_http_plain. + HTTPPlainReader *perf.Reader +} + +// StatNames associe chaque index de compteur BPF à un nom lisible. +var StatNames = map[uint32]string{ + 0: "TOTAL", + 1: "IPV4", + 2: "TCP", + 3: "SYN", + 4: "SYN_SUBMIT", + 5: "TLS_SUBMIT", + 6: "HTTP_SUBMIT", +} + +// ReadStats lit les compteurs de la map tc_stats (PERCPU_ARRAY). +// Retourne une map[index] → somme de toutes les valeurs CPU. +// Si la map n'est pas disponible, retourne une map vide. +func (l *Loader) ReadStats() (map[uint32]uint64, error) { + result := make(map[uint32]uint64) + if l.statsMap == nil { + return result, nil + } + + for key := uint32(0); key < 7; key++ { + var values []uint64 + if err := l.statsMap.Lookup(key, &values); err != nil { + continue + } + var sum uint64 + for _, v := range values { + sum += v + } + result[key] = sum + } + return result, nil } // New charge le bytecode eBPF embarqué, supprime la limite mémoire -// RLIMIT_MEMLOCK (requise pour les ring buffers et les maps eBPF), +// RLIMIT_MEMLOCK (requise pour les maps eBPF), // et retourne un Loader prêt à être attaché aux hooks. // -// Cible : CentOS 8 / RHEL 8 et supérieur (kernel ≥ 4.18 avec BTF backporté). +// Cible : kernel 4.18+ avec BTF. Les perf event arrays sont supportés depuis +// kernel 4.4, bpf_skb_load_bytes depuis kernel 4.5, assurant une compatibilité +// maximale via le hook TC ingress. // Le BTF natif est détecté automatiquement par cilium/ebpf via // /sys/kernel/btf/vmlinux — aucun fallback manuel n'est requis. func New() (*Loader, error) { @@ -57,6 +99,28 @@ func New() (*Loader, error) { return nil, fmt.Errorf("chargement objets TC eBPF: %w", err) } + // Trouver la map tc_stats par iteration des maps kernel + var statsMap *ebpf.Map + var mapID ebpf.MapID = 0 + for { + nextID, err := ebpf.MapGetNextID(mapID) + if err != nil { + break + } + m, err := ebpf.NewMapFromID(nextID) + if err != nil { + mapID = nextID + continue + } + info, err := m.Info() + if err == nil && info.Name == "tc_stats" { + statsMap = m + break + } + m.Close() + mapID = nextID + } + // Charger les objets SSL/uprobe (uprobe_ssl.c) sslObjs := &Ja4SslObjects{} if err := LoadJa4SslObjects(sslObjs, nil); err != nil { @@ -64,42 +128,42 @@ func New() (*Loader, error) { return nil, fmt.Errorf("chargement objets SSL eBPF: %w", err) } - // Initialiser les readers pour chaque ring buffer - synReader, err := ringbuf.NewReader(tcObjs.RbTcpSyn) + // Initialiser les readers pour chaque perf event array + synReader, err := perf.NewReader(tcObjs.PbTcpSyn, perCPUBufferSize) if err != nil { sslObjs.Close() tcObjs.Close() - return nil, fmt.Errorf("création reader rb_tcp_syn: %w", err) + return nil, fmt.Errorf("création reader pb_tcp_syn: %w", err) } - tlsReader, err := ringbuf.NewReader(tcObjs.RbTlsHello) + tlsReader, err := perf.NewReader(tcObjs.PbTlsHello, perCPUBufferSize) if err != nil { synReader.Close() sslObjs.Close() tcObjs.Close() - return nil, fmt.Errorf("création reader rb_tls_hello: %w", err) + return nil, fmt.Errorf("création reader pb_tls_hello: %w", err) } - httpPlainReader, err := ringbuf.NewReader(tcObjs.RbHttpPlain) + httpPlainReader, err := perf.NewReader(tcObjs.PbHttpPlain, perCPUBufferSize) if err != nil { tlsReader.Close() synReader.Close() sslObjs.Close() tcObjs.Close() - return nil, fmt.Errorf("création reader rb_http_plain: %w", err) + return nil, fmt.Errorf("création reader pb_http_plain: %w", err) } - sslReader, err := ringbuf.NewReader(sslObjs.RbSslData) + sslReader, err := perf.NewReader(sslObjs.PbSslData, perCPUBufferSize) if err != nil { httpPlainReader.Close() tlsReader.Close() synReader.Close() sslObjs.Close() tcObjs.Close() - return nil, fmt.Errorf("création reader rb_ssl_data: %w", err) + return nil, fmt.Errorf("création reader pb_ssl_data: %w", err) } - acceptReader, err := ringbuf.NewReader(sslObjs.RbAccept) + acceptReader, err := perf.NewReader(sslObjs.PbAccept, perCPUBufferSize) if err != nil { sslReader.Close() httpPlainReader.Close() @@ -107,12 +171,13 @@ func New() (*Loader, error) { synReader.Close() sslObjs.Close() tcObjs.Close() - return nil, fmt.Errorf("création reader rb_accept: %w", err) + return nil, fmt.Errorf("création reader pb_accept: %w", err) } return &Loader{ tcObjs: tcObjs, sslObjs: sslObjs, + statsMap: statsMap, SynReader: synReader, TLSReader: tlsReader, SSLReader: sslReader, @@ -121,66 +186,79 @@ func New() (*Loader, error) { }, nil } -// AttachTC attache le programme XDP sur l'interface réseau spécifiée. -// Essaie le mode natif XDP (driver support) puis se replie sur le mode générique -// (SKB_MODE, compatible kernel ≥ 4.8, fonctionne dans les VMs). +// AttachTC attache le programme TC ingress (clsact qdisc) sur l'interface +// réseau spécifiée. Crée le qdisc clsact (idempotent) et attache le filtre BPF +// en mode direct-action. Compatible kernel 4.1+. func (l *Loader) AttachTC(iface string) error { + // Trouver l'interface par nom (standard Go net package) netIface, err := net.InterfaceByName(iface) if err != nil { return fmt.Errorf("interface réseau %q introuvable: %w", iface, err) } - // Mode natif (meilleure performance sur serveurs avec NIC compatible XDP) - lnk, err := link.AttachXDP(link.XDPOptions{ - Interface: netIface.Index, - Program: l.tcObjs.CaptureXdp, - Flags: link.XDPDriverMode, - }) + // Obtenir le link netlink par index (plus fiable que par nom) + nlLink, err := netlink.LinkByIndex(netIface.Index) if err != nil { - // Repli sur le mode générique (VMs, NICs sans driver XDP natif) - lnk, err = link.AttachXDP(link.XDPOptions{ - Interface: netIface.Index, - Program: l.tcObjs.CaptureXdp, - Flags: link.XDPGenericMode, - }) - if err != nil { - return fmt.Errorf("attachement XDP sur %q (natif et générique): %w", iface, err) - } + return fmt.Errorf("netlink link index %d introuvable: %w", netIface.Index, err) } - l.tcLink = lnk + // Créer le qdisc clsact (idempotent via QdiscReplace) + qdisc := &netlink.Clsact{ + QdiscAttrs: netlink.QdiscAttrs{ + LinkIndex: nlLink.Attrs().Index, + Handle: netlink.MakeHandle(0xffff, 0), + Parent: netlink.HANDLE_CLSACT, + }, + } + if err := netlink.QdiscReplace(qdisc); err != nil { + return fmt.Errorf("clsact qdisc sur %q: %w", iface, err) + } + + // Attacher le programme BPF comme filtre ingress + filter := &netlink.BpfFilter{ + FilterAttrs: netlink.FilterAttrs{ + LinkIndex: nlLink.Attrs().Index, + Parent: netlink.HANDLE_MIN_INGRESS, + Handle: 1, + Protocol: unix.ETH_P_ALL, + Priority: 1, + }, + ClassId: netlink.MakeHandle(1, 1), + Fd: l.tcObjs.CaptureTc.FD(), + DirectAction: true, + } + if err := netlink.FilterReplace(filter); err != nil { + return fmt.Errorf("TC filter ingress sur %q: %w", iface, err) + } + + l.tcNlLink = nlLink return nil } // AttachUprobes attache les uprobes SSL_read et SSL_set_fd // sur le binaire libssl spécifié (ex: "/usr/lib64/libssl.so.3"). func (l *Loader) AttachUprobes(sslLibPath string) error { - // Vérifier que le fichier existe if _, err := os.Stat(sslLibPath); err != nil { return fmt.Errorf("bibliothèque SSL %q: %w", sslLibPath, err) } - // Ouvrir le binaire exécutable pour les uprobes ex, err := link.OpenExecutable(sslLibPath) if err != nil { return fmt.Errorf("ouverture exécutable %q pour uprobe: %w", sslLibPath, err) } - // Uprobe sur SSL_set_fd (entry) setFdLink, err := ex.Uprobe("SSL_set_fd", l.sslObjs.UprobeSslSetFd, nil) if err != nil { return fmt.Errorf("attachement uprobe SSL_set_fd: %w", err) } l.uprobeLinks = append(l.uprobeLinks, setFdLink) - // Uprobe sur SSL_read (entry) readEntryLink, err := ex.Uprobe("SSL_read", l.sslObjs.UprobeSslReadEntry, nil) if err != nil { return fmt.Errorf("attachement uprobe SSL_read (entry): %w", err) } l.uprobeLinks = append(l.uprobeLinks, readEntryLink) - // Uretprobe sur SSL_read (exit) readExitLink, err := ex.Uretprobe("SSL_read", l.sslObjs.UretprobeSslReadExit, nil) if err != nil { return fmt.Errorf("attachement uretprobe SSL_read (exit): %w", err) @@ -191,10 +269,7 @@ func (l *Loader) AttachUprobes(sslLibPath string) error { } // AttachAcceptProbe attache les tracepoints syscalls/sys_{enter,exit}_accept4. -// Les tracepoints sont préférés aux kprobes car ils ne dépendent pas du nom -// manglé __x64_sys_accept4 qui varie entre les versions du kernel (5.1+). func (l *Loader) AttachAcceptProbe() error { - // Tracepoint à l'entrée de accept4 kpEntry, err := link.Tracepoint("syscalls", "sys_enter_accept4", l.sslObjs.KprobeAccept4Entry, nil) if err != nil { @@ -202,7 +277,6 @@ func (l *Loader) AttachAcceptProbe() error { } l.uprobeLinks = append(l.uprobeLinks, kpEntry) - // Tracepoint à la sortie de accept4 kpExit, err := link.Tracepoint("syscalls", "sys_exit_accept4", l.sslObjs.KretprobeAccept4Exit, nil) if err != nil { @@ -215,7 +289,6 @@ func (l *Loader) AttachAcceptProbe() error { // Close détache tous les hooks eBPF et libère toutes les ressources associées. func (l *Loader) Close() error { - // Fermer les readers RingBuffer if l.HTTPPlainReader != nil { l.HTTPPlainReader.Close() } @@ -232,19 +305,26 @@ func (l *Loader) Close() error { l.SynReader.Close() } - // Détacher les uprobes et kprobes + // Détacher le filtre TC ingress + if l.tcNlLink != nil { + filter := &netlink.BpfFilter{ + FilterAttrs: netlink.FilterAttrs{ + LinkIndex: l.tcNlLink.Attrs().Index, + Parent: netlink.HANDLE_MIN_INGRESS, + Handle: 1, + Priority: 1, + }, + } + // Ignorer l'erreur — le filtre peut déjà être supprimé + netlink.FilterDel(filter) + } + for _, lnk := range l.uprobeLinks { if lnk != nil { lnk.Close() } } - // Détacher le hook TC - if l.tcLink != nil { - l.tcLink.Close() - } - - // Libérer les objets eBPF (maps, programmes) if l.sslObjs != nil { l.sslObjs.Close() } @@ -255,259 +335,10 @@ func (l *Loader) Close() error { return nil } -// ============================================================================= -// Types d'événements : représentations Go des structures C eBPF -// ============================================================================= - -// TCPSynEvent représente un événement TCP SYN capturé par TC ingress. -type TCPSynEvent struct { - SrcIP uint32 - DstIP uint32 - SrcPort uint16 - DstPort uint16 - TTL uint8 - DFBit uint8 - IPID uint16 - WindowSize uint16 - WindowScale uint8 - MSS uint16 - TCPOptions [40]byte - TCPOptionsLen uint8 - Timestamp uint64 -} - -// TLSHelloEvent représente un événement TLS ClientHello. -type TLSHelloEvent struct { - SrcIP uint32 - SrcPort uint16 - Payload []byte - PayloadLen uint16 - Timestamp uint64 -} - -// SSLDataEvent représente un bloc de données SSL déchiffré par uprobe. -type SSLDataEvent struct { - PID uint32 - TGID uint32 - FD uint32 - SrcIP uint32 - SrcPort uint16 - Data []byte - DataLen uint32 - Timestamp uint64 - Direction uint8 - EOF bool -} - -// HTTPPlainEvent représente un payload TCP HTTP en clair capturé par TC ingress. -type HTTPPlainEvent struct { - SrcIP uint32 - DstIP uint32 - SrcPort uint16 - DstPort uint16 - Payload []byte - PayloadLen uint16 - Timestamp uint64 -} - -// AcceptEvent représente une acceptation de connexion TCP (accept4). -type AcceptEvent struct { - PID uint32 - TGID uint32 - FD uint32 - SrcIP uint32 - SrcPort uint16 - Timestamp uint64 -} - -// ============================================================================= -// Méthodes de lecture des RingBuffers -// ============================================================================= - -// ReadTCPSynEvent lit un événement TCP SYN depuis le RingBuffer. -// Bloque jusqu'à ce qu'un événement soit disponible ou que ctx soit annulé. -func (l *Loader) ReadTCPSynEvent(ctx context.Context) (*TCPSynEvent, error) { - rec, err := readRecord(ctx, l.SynReader) - if err != nil { - return nil, err - } - - data := rec.RawSample - // struct tcp_syn_event packed: src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+ - // ttl(1)+df(1)+ip_id(2)+window(2)+wscale(1)+mss(2)+opts(40)+opts_len(1)+_pad(1)+ts(8) = 71 - if len(data) < 64 { - return nil, fmt.Errorf("tcp_syn_event trop court: %d octets", len(data)) - } - - ev := &TCPSynEvent{ - SrcIP: binary.LittleEndian.Uint32(data[0:4]), - DstIP: binary.LittleEndian.Uint32(data[4:8]), - SrcPort: binary.LittleEndian.Uint16(data[8:10]), - DstPort: binary.LittleEndian.Uint16(data[10:12]), - TTL: data[12], - DFBit: data[13], - IPID: binary.LittleEndian.Uint16(data[14:16]), - WindowSize: binary.LittleEndian.Uint16(data[16:18]), - WindowScale: data[18], - MSS: binary.LittleEndian.Uint16(data[19:21]), - } - copy(ev.TCPOptions[:], data[21:61]) - ev.TCPOptionsLen = data[61] - if len(data) >= 70 { - ev.Timestamp = binary.LittleEndian.Uint64(data[62:70]) - } - return ev, nil -} - -// ReadTLSHelloEvent lit un événement TLS ClientHello depuis le RingBuffer. -func (l *Loader) ReadTLSHelloEvent(ctx context.Context) (*TLSHelloEvent, error) { - rec, err := readRecord(ctx, l.TLSReader) - if err != nil { - return nil, err - } - - data := rec.RawSample - // struct tls_hello_event: src_ip(4)+src_port(2)+payload(512)+payload_len(2)+ts(8) = 528 - if len(data) < 8 { - return nil, fmt.Errorf("tls_hello_event trop court: %d octets", len(data)) - } - - plen := uint16(0) - if len(data) >= 520 { - plen = binary.LittleEndian.Uint16(data[518:520]) - } - payload := make([]byte, plen) - if int(plen) <= 512 && len(data) >= 6+int(plen) { - copy(payload, data[6:6+plen]) - } - - ts := uint64(0) - if len(data) >= 528 { - ts = binary.LittleEndian.Uint64(data[520:528]) - } - - return &TLSHelloEvent{ - SrcIP: binary.LittleEndian.Uint32(data[0:4]), - SrcPort: binary.LittleEndian.Uint16(data[4:6]), - Payload: payload, - PayloadLen: plen, - Timestamp: ts, - }, nil -} - -// ReadSSLDataEvent lit un bloc de données SSL déchiffrées depuis le RingBuffer. -func (l *Loader) ReadSSLDataEvent(ctx context.Context) (*SSLDataEvent, error) { - rec, err := readRecord(ctx, l.SSLReader) - if err != nil { - return nil, err - } - - data := rec.RawSample - // struct ssl_data_event: pid_tgid(8)+fd(4)+src_ip(4)+src_port(2)+data(4096)+data_len(4)+ts(8)+direction(1) - if len(data) < 27 { - return nil, fmt.Errorf("ssl_data_event trop court: %d octets", len(data)) - } - - pidTGID := binary.LittleEndian.Uint64(data[0:8]) - dlen := uint32(0) - if len(data) >= 4118 { - dlen = binary.LittleEndian.Uint32(data[4114:4118]) - } - payload := make([]byte, dlen) - if int(dlen) <= 4096 && len(data) >= 18+int(dlen) { - copy(payload, data[18:18+dlen]) - } - - ts := uint64(0) - if len(data) >= 4126 { - ts = binary.LittleEndian.Uint64(data[4118:4126]) - } - dir := uint8(0) - if len(data) >= 4127 { - dir = data[4126] - } - - return &SSLDataEvent{ - PID: uint32(pidTGID & 0xFFFFFFFF), - TGID: uint32(pidTGID >> 32), - FD: binary.LittleEndian.Uint32(data[8:12]), - SrcIP: binary.LittleEndian.Uint32(data[12:16]), - SrcPort: binary.LittleEndian.Uint16(data[16:18]), - Data: payload, - DataLen: dlen, - Timestamp: ts, - Direction: dir, - }, nil -} - -// ReadHTTPPlainEvent lit un événement HTTP en clair depuis le RingBuffer TC. -// struct http_plain_event: src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+ -// -// payload(4096)+payload_len(2)+ts(8) = 4118 -func (l *Loader) ReadHTTPPlainEvent(ctx context.Context) (*HTTPPlainEvent, error) { - rec, err := readRecord(ctx, l.HTTPPlainReader) - if err != nil { - return nil, err - } - - data := rec.RawSample - if len(data) < 12 { - return nil, fmt.Errorf("http_plain_event trop court: %d octets", len(data)) - } - - plen := uint16(0) - if len(data) >= 4110 { - plen = binary.LittleEndian.Uint16(data[4108:4110]) - } - payload := make([]byte, plen) - if int(plen) <= 4096 && len(data) >= 12+int(plen) { - copy(payload, data[12:12+plen]) - } - - ts := uint64(0) - if len(data) >= 4118 { - ts = binary.LittleEndian.Uint64(data[4110:4118]) - } - - return &HTTPPlainEvent{ - SrcIP: binary.LittleEndian.Uint32(data[0:4]), - DstIP: binary.LittleEndian.Uint32(data[4:8]), - SrcPort: binary.LittleEndian.Uint16(data[8:10]), - DstPort: binary.LittleEndian.Uint16(data[10:12]), - Payload: payload, - PayloadLen: plen, - Timestamp: ts, - }, nil -} - -// ReadAcceptEvent lit un événement accept4 depuis le RingBuffer. -func (l *Loader) ReadAcceptEvent(ctx context.Context) (*AcceptEvent, error) { - rec, err := readRecord(ctx, l.AcceptReader) - if err != nil { - return nil, err - } - - data := rec.RawSample - // struct accept_event: pid_tgid(8)+fd(4)+src_ip(4)+src_port(2)+ts(8) = 26 - if len(data) < 26 { - return nil, fmt.Errorf("accept_event trop court: %d octets", len(data)) - } - - pidTGID := binary.LittleEndian.Uint64(data[0:8]) - return &AcceptEvent{ - PID: uint32(pidTGID & 0xFFFFFFFF), - TGID: uint32(pidTGID >> 32), - FD: binary.LittleEndian.Uint32(data[8:12]), - SrcIP: binary.LittleEndian.Uint32(data[12:16]), - SrcPort: binary.LittleEndian.Uint16(data[16:18]), - Timestamp: binary.LittleEndian.Uint64(data[18:26]), - }, nil -} - -// readRecord lit un record brut depuis un RingBuffer avec annulation via context. -func readRecord(ctx context.Context, rd *ringbuf.Reader) (ringbuf.Record, error) { +// readRecord lit un record brut depuis un PerfReader avec annulation via context. +func readRecord(ctx context.Context, rd *perf.Reader) (perf.Record, error) { type result struct { - rec ringbuf.Record + rec perf.Record err error } ch := make(chan result, 1) @@ -517,8 +348,8 @@ func readRecord(ctx context.Context, rd *ringbuf.Reader) (ringbuf.Record, error) }() select { case <-ctx.Done(): - rd.Close() // débloque le Read() bloquant - return ringbuf.Record{}, ctx.Err() + rd.Close() + return perf.Record{}, ctx.Err() case r := <-ch: return r.rec, r.err } diff --git a/services/ja4ebpf/internal/writer/clickhouse.go b/services/ja4ebpf/internal/writer/clickhouse.go index cf34182..df43b83 100644 --- a/services/ja4ebpf/internal/writer/clickhouse.go +++ b/services/ja4ebpf/internal/writer/clickhouse.go @@ -52,14 +52,28 @@ type sessionRecord struct { TLSVersion string `json:"tls_version,omitempty"` // HTTP - Method string `json:"method,omitempty"` - Path string `json:"path,omitempty"` - QueryString string `json:"query_string,omitempty"` - StatusCode *int `json:"status_code,omitempty"` - ResponseSize *int64 `json:"response_size,omitempty"` - DurationMS *float64 `json:"duration_ms,omitempty"` - KeepAlives int `json:"keepalives,omitempty"` - HeaderOrderSig string `json:"header_order_signature,omitempty"` + Method string `json:"method,omitempty"` + Path string `json:"path,omitempty"` + QueryString string `json:"query_string,omitempty"` + StatusCode *int `json:"status_code,omitempty"` + ResponseSize *int64 `json:"response_size,omitempty"` + DurationMS *float64 `json:"duration_ms,omitempty"` + KeepAlives int `json:"keepalives,omitempty"` + HeaderOrderSig string `json:"header_order_signature,omitempty"` + + // HTTP/2 fingerprinting passif + H2Fingerprint string `json:"h2_fingerprint,omitempty"` + H2SettingsFP string `json:"h2_settings_fp,omitempty"` + H2WindowUpdate uint32 `json:"h2_window_update,omitempty"` + H2PseudoOrder string `json:"h2_pseudo_order,omitempty"` + H2HasPriority uint8 `json:"h2_has_priority,omitempty"` + H2HeaderTableSize int32 `json:"h2_header_table_size"` + H2EnablePush int32 `json:"h2_enable_push"` + H2MaxConcurrentStreams int32 `json:"h2_max_concurrent_streams"` + H2InitialWindowSize int64 `json:"h2_initial_window_size"` + H2MaxFrameSize int32 `json:"h2_max_frame_size"` + H2MaxHeaderListSize int32 `json:"h2_max_header_list_size"` + H2EnableConnectProtocol int32 `json:"h2_enable_connect_protocol"` } // NewClickHouseWriter crée un writer et établit la connexion ClickHouse. @@ -192,37 +206,142 @@ func sessionToRecord(s *correlation.SessionState) sessionRecord { // Champs métadonnées IP/TCP if s.L3L4 != nil { - rec.IPMetaDF = &s.L3L4.DFBit - rec.IPMetaID = &s.L3L4.IPID - rec.IPMetaTTL = &s.L3L4.TTL - rec.TCPMetaWindowSize = &s.L3L4.WindowSize + rec.IPMetaDF = &s.L3L4.DFBit + rec.IPMetaID = &s.L3L4.IPID + rec.IPMetaTTL = &s.L3L4.TTL + rec.TCPMetaWindowSize = &s.L3L4.WindowSize rec.TCPMetaWindowScale = &s.L3L4.WindowScale - rec.TCPMetaMSS = &s.L3L4.MSS + rec.TCPMetaMSS = &s.L3L4.MSS } // Champs TLS if s.TLS != nil { - rec.JA4Hash = s.TLS.JA4Hash - rec.TLSSNI = s.TLS.SNI - rec.TLSALPN = strings.Join(s.TLS.ALPN, ",") + rec.JA4Hash = s.TLS.JA4Hash + rec.TLSSNI = s.TLS.SNI + rec.TLSALPN = strings.Join(s.TLS.ALPN, ",") rec.TLSVersion = formatTLSVersion(s.TLS.TLSVersion) } // Champs HTTP (dernière requête) if len(s.Requests) > 0 { last := &s.Requests[len(s.Requests)-1] - rec.Method = last.Method - rec.Path = last.Path - rec.QueryString = last.QueryString - rec.StatusCode = &last.StatusCode - rec.ResponseSize = &last.ResponseSize - rec.DurationMS = &last.DurationMS - rec.HeaderOrderSig = last.HeaderOrderSig + rec.Method = last.Method + rec.Path = last.Path + rec.QueryString = last.QueryString + rec.StatusCode = &last.StatusCode + rec.ResponseSize = &last.ResponseSize + rec.DurationMS = &last.DurationMS + rec.HeaderOrderSig = last.HeaderOrderSig + + // Champs HTTP/2 passifs + if last.HTTP2Settings != nil { + h2 := last.HTTP2Settings + rec.H2WindowUpdate = h2.WindowUpdateIncrement + + // Ordre des pseudo-headers → notation abrégée "m,a,s,p" + if len(h2.PseudoHeaderOrder) > 0 { + rec.H2PseudoOrder = pseudoOrderToShort(h2.PseudoHeaderOrder) + } + + // Paramètres SETTINGS individuels (-1 = absent) + rec.H2HeaderTableSize = h2.HeaderTableSize + rec.H2EnablePush = h2.EnablePush + rec.H2MaxConcurrentStreams = h2.MaxConcurrentStreams + rec.H2InitialWindowSize = int64(h2.InitialWindowSize) + rec.H2MaxFrameSize = h2.MaxFrameSize + rec.H2MaxHeaderListSize = h2.MaxHeaderListSize + + // Fingerprints composites Akamai + rec.H2Fingerprint = buildH2Fingerprint(h2) + rec.H2SettingsFP = buildH2SettingsFP(h2) + } } return rec } +// pseudoOrderToShort convertit la liste de pseudo-headers en notation abrégée. +// Ex: [":method", ":authority", ":scheme", ":path"] → "m,a,s,p" +func pseudoOrderToShort(headers []string) string { + short := make([]byte, 0, len(headers)*2-1) + for i, h := range headers { + if i > 0 { + short = append(short, ',') + } + switch { + case h == ":method": + short = append(short, 'm') + case h == ":authority": + short = append(short, 'a') + case h == ":scheme": + short = append(short, 's') + case h == ":path": + short = append(short, 'p') + default: + short = append(short, '?') + } + } + return string(short) +} + +// buildH2Fingerprint construit le fingerprint composite au format Akamai. +// Format : SETTINGS[pairs]|WINDOW_UPDATE[value]|PRIORITY[0/1]|PSEUDO_ORDER[order] +func buildH2Fingerprint(h2 *correlation.HTTP2Settings) string { + var b strings.Builder + + // SETTINGS + b.WriteString("1:") + b.WriteString(fmt.Sprintf("%d", h2.HeaderTableSize)) + b.WriteString(",2:") + b.WriteString(fmt.Sprintf("%d", h2.EnablePush)) + if h2.MaxConcurrentStreams >= 0 { + b.WriteString(",3:") + b.WriteString(fmt.Sprintf("%d", h2.MaxConcurrentStreams)) + } + b.WriteString(",4:") + b.WriteString(fmt.Sprintf("%d", h2.InitialWindowSize)) + if h2.MaxFrameSize >= 0 { + b.WriteString(",5:") + b.WriteString(fmt.Sprintf("%d", h2.MaxFrameSize)) + } + if h2.MaxHeaderListSize >= 0 { + b.WriteString(",6:") + b.WriteString(fmt.Sprintf("%d", h2.MaxHeaderListSize)) + } + + // WINDOW_UPDATE + b.WriteByte('|') + if h2.WindowUpdateIncrement > 0 { + b.WriteString(fmt.Sprintf("%d", h2.WindowUpdateIncrement)) + } + + // PRIORITY (non capturé actuellement) + b.WriteString("|0") + + // PSEUDO_ORDER + b.WriteByte('|') + if len(h2.PseudoHeaderOrder) > 0 { + b.WriteString(pseudoOrderToShort(h2.PseudoHeaderOrder)) + } + + return b.String() +} + +// buildH2SettingsFP construit la chaîne brute des entrées SETTINGS. +func buildH2SettingsFP(h2 *correlation.HTTP2Settings) string { + var parts []string + if h2.MaxConcurrentStreams >= 0 { + parts = append(parts, fmt.Sprintf("3:%d", h2.MaxConcurrentStreams)) + } + if h2.InitialWindowSize >= 0 { + parts = append(parts, fmt.Sprintf("4:%d", h2.InitialWindowSize)) + } + if h2.EnablePush >= 0 { + parts = append(parts, fmt.Sprintf("2:%d", h2.EnablePush)) + } + return strings.Join(parts, ",") +} + // formatTLSVersion convertit la valeur numérique TLS en chaîne lisible. func formatTLSVersion(v uint16) string { switch v { diff --git a/services/ja4ebpf/packaging/rpm/ja4ebpf.spec b/services/ja4ebpf/packaging/rpm/ja4ebpf.spec index a835137..e02d5bb 100644 --- a/services/ja4ebpf/packaging/rpm/ja4ebpf.spec +++ b/services/ja4ebpf/packaging/rpm/ja4ebpf.spec @@ -82,5 +82,23 @@ chown -R ja4ebpf:ja4ebpf \ %dir %attr(0750, ja4ebpf, ja4ebpf) %{_localstatedir}/log/ja4ebpf %changelog -* %(date "+%a %b %d %Y") Build System - %{build_version}-1 -- Build automatique via Dockerfile.package +* Sat Apr 12 2025 Antoine Jacquin - 0.2.0-1 +- feat(writer): sérialisation complète des 12 champs HTTP/2 passifs vers ClickHouse + (SETTINGS individuels, WINDOW_UPDATE, pseudo-headers, fingerprints composites Akamai) +- fix(writer): le parser H2 fonctionnait mais le writer ignorait HTTP2Settings +- fix(sql): TTL http_logs corrigé de 30 jours à 2 heures (conforme thèse §3.7) +- feat(browser_matcher): redistribution des poids CDN (0.35 HTTP + 0.35 TLS) +- feat(browser_matcher): exposition des 5 features browser_match_* dans le vecteur ML +- feat(shap): TreeExplainer XGBoost en priorité, ExIFFI + SHAP coexistants +- feat(pipeline): root_to_first_asset_delay et asset_load_stddev intégrés au vecteur ML +- feat(signatures): table browser_h2_signatures + rechargement 24h depuis ClickHouse +- feat(cycle): queue unknown_h2_fingerprints pour signatures H2 inconnues + +* Thu Mar 27 2025 Antoine Jacquin - 0.1.0-1 +- Initial RPM package +- eBPF CO-RE agent: TC ingress + uprobe SSL_read +- JA4/JA4T TLS/TCP fingerprinting +- HTTP/2 passive fingerprinting (SETTINGS, WINDOW_UPDATE, pseudo-headers) +- Go Magic Bytes dispatcher with circular reassembly buffer +- 256-shard correlation engine, 500ms orphan timeout +- Multi-distro support: RHEL/CentOS/Rocky/AlmaLinux 8, 9, 10 diff --git a/services/ja4ebpf/packaging/systemd/ja4ebpf.service b/services/ja4ebpf/packaging/systemd/ja4ebpf.service index 492545e..899416f 100644 --- a/services/ja4ebpf/packaging/systemd/ja4ebpf.service +++ b/services/ja4ebpf/packaging/systemd/ja4ebpf.service @@ -23,7 +23,8 @@ Type=simple User=ja4ebpf Group=ja4ebpf -ExecStart=/usr/sbin/ja4ebpf -config /etc/ja4ebpf/config.yml +ExecStart=/usr/sbin/ja4ebpf +Environment=JA4EBPF_CONFIG=/etc/ja4ebpf/config.yml ExecReload=/bin/kill -HUP $MAINPID Restart=on-failure RestartSec=5s diff --git a/shared/clickhouse/04_mv_http_logs.sql b/shared/clickhouse/04_mv_http_logs.sql index f07d2a6..21ff699 100644 --- a/shared/clickhouse/04_mv_http_logs.sql +++ b/shared/clickhouse/04_mv_http_logs.sql @@ -124,7 +124,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs ENGINE = MergeTree PARTITION BY log_date ORDER BY (time, src_ip, dst_ip, ja4) -TTL log_date + INTERVAL 30 DAY +TTL log_date + INTERVAL 2 HOUR SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; diff --git a/shared/clickhouse/05_aggregation_tables.sql b/shared/clickhouse/05_aggregation_tables.sql index 6e1971e..b4b4d33 100644 --- a/shared/clickhouse/05_aggregation_tables.sql +++ b/shared/clickhouse/05_aggregation_tables.sql @@ -295,3 +295,50 @@ TTL observed_at + INTERVAL 30 DAY SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; + +-- ----------------------------------------------------------------------------- +-- browser_h2_signatures — Base de signatures H2 structurées par famille navigateur +-- Thèse §3.9.5 : rechargée toutes les 24h par le module Python +-- ----------------------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS ja4_processing.browser_h2_signatures +( + family LowCardinality(String), + version_min String DEFAULT '', + version_max String DEFAULT '', + h2_settings_json String DEFAULT '' CODEC(ZSTD(3)), + h2_settings_forbidden String DEFAULT '[]' CODEC(ZSTD(3)), + h2_window_update UInt32 DEFAULT 0, + h2_window_update_tolerance UInt32 DEFAULT 1000, + h2_priority_expected UInt8 DEFAULT 0, + pseudo_header_order String DEFAULT '', + tls_json String DEFAULT '{}' CODEC(ZSTD(3)), + headers_required String DEFAULT '[]' CODEC(ZSTD(3)), + headers_forbidden String DEFAULT '[]' CODEC(ZSTD(3)), + created_at DateTime DEFAULT now(), + is_active UInt8 DEFAULT 1 +) +ENGINE = ReplacingMergeTree(created_at) +ORDER BY (family, version_min) +SETTINGS index_granularity = 8192; + +-- Dictionnaire ClickHouse pour un lookup rapide par famille +CREATE DICTIONARY IF NOT EXISTS ja4_processing.dict_browser_h2_signatures +( + family String, + version_min String, + version_max String, + h2_settings_json String, + h2_settings_forbidden String, + h2_window_update UInt32 DEFAULT 0, + h2_window_update_tolerance UInt32 DEFAULT 1000, + h2_priority_expected UInt8 DEFAULT 0, + pseudo_header_order String DEFAULT '', + tls_json String DEFAULT '{}', + headers_required String DEFAULT '[]', + headers_forbidden String DEFAULT '[]', + is_active UInt8 DEFAULT 1 +) +PRIMARY KEY (family) +SOURCE(CLICKHOUSE(TABLE 'browser_h2_signatures' DB 'ja4_processing')) +LIFETIME(MIN 82800 MAX 86400) -- Rechargement toutes les ~24h (82800-86400 secondes) +LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/vm/.vagrant/bundler/global.sol b/tests/vm/.vagrant/bundler/global.sol index e295207..44881c1 100644 --- a/tests/vm/.vagrant/bundler/global.sol +++ b/tests/vm/.vagrant/bundler/global.sol @@ -1 +1 @@ -{"dependencies":[["racc",["~> 1.4"]],["nokogiri",["~> 1.6"]],["diffy",[">= 0"]],["rexml",[">= 0"]],["xml-simple",[">= 0"]],["logger",[">= 0"]],["mime-types-data",["~> 3.2025",">= 3.2025.0507"]],["mime-types",[">= 0"]],["io-console",["~> 0.5"]],["reline",[">= 0"]],["formatador",[">= 0.2","< 2.0"]],["excon",["~> 1.0"]],["builder",[">= 0"]],["fog-core",["~> 2"]],["ruby-libvirt",[">= 0.7.0"]],["json",[">= 0"]],["fog-xml",["~> 0.1.1"]],["multi_json",["~> 1.10"]],["fog-json",[">= 0"]],["fog-libvirt",[">= 0.6.0"]],["vagrant-libvirt",["= 0.12.2"]],["vagrant-qemu",["= 0.3.12"]]],"checksum":"8812dc95b590d4059a84fe716eaa6eea39b29aecb1c994c959de405ba3705361","vagrant_version":"2.4.9"} \ No newline at end of file +{"dependencies":[["racc",["~> 1.4"]],["nokogiri",["~> 1.6"]],["diffy",[">= 0"]],["rexml",[">= 0"]],["xml-simple",[">= 0"]],["logger",[">= 0"]],["mime-types-data",["~> 3.2025",">= 3.2025.0507"]],["mime-types",[">= 0"]],["io-console",["~> 0.5"]],["reline",[">= 0"]],["formatador",[">= 0.2","< 2.0"]],["excon",["~> 1.0"]],["builder",[">= 0"]],["fog-core",["~> 2"]],["ruby-libvirt",[">= 0.7.0"]],["json",[">= 0"]],["fog-xml",["~> 0.1.1"]],["multi_json",["~> 1.10"]],["fog-json",[">= 0"]],["fog-libvirt",[">= 0.6.0"]],["vagrant-libvirt",["= 0.12.2"]]],"checksum":"b69e3c206e3d26fb25b062fbb15a80865764c5efb5e9cce85cfac1f745449033","vagrant_version":"2.4.9"} \ No newline at end of file diff --git a/tests/vm/Vagrantfile b/tests/vm/Vagrantfile index a38061f..cfc2331 100644 --- a/tests/vm/Vagrantfile +++ b/tests/vm/Vagrantfile @@ -1,7 +1,12 @@ # -*- mode: ruby -*- # vi: set ft=ruby : # ============================================================================= -# Vagrantfile — VM de test ja4ebpf sur Rocky Linux 9 +# Vagrantfile — VMs de test ja4ebpf multi-distro +# +# 3 VMs pour les tests unitaires eBPF sur kernel réel : +# - centos8 : CentOS 8 (el8) +# - rocky9 : Rocky Linux 9 (el9) +# - rocky10 : Rocky Linux 10 (el10) # # Fournit un environnement kernel complet pour les tests eBPF : # - tracefs / debugfs montés @@ -14,47 +19,69 @@ # sudo usermod -aG libvirt,kvm $USER # puis se reconnecter # # Utilisation : -# vagrant up # créer + provisionner (~5 min) -# vagrant ssh # connexion SSH -# make test-vm-nginx # lancer les tests depuis le host -# vagrant destroy -f # détruire la VM +# vagrant up # créer + provisionner toutes les VMs +# vagrant up rocky9 # créer une seule VM +# vagrant ssh rocky9 # connexion SSH +# make test-vm-nginx # test nginx sur Rocky 9 (défaut) +# make test-vm-all # tous les tests sur Rocky 9 +# ./tests/vm/run-all-vms.sh # tests sur les 3 VMs +# vagrant destroy -f # détruire toutes les VMs # ============================================================================= Vagrant.configure("2") do |config| - # ── Box Rocky Linux 9 avec provider libvirt (image qcow2) ───────────────── - config.vm.box = "generic/rocky9" - - # ── Désactiver synced_folder par défaut (utiliser rsync explicitement) ───── + # ── Désactiver synced_folder par défaut ───────────────────────────────────── config.vm.synced_folder ".", "/vagrant", disabled: true - # ── Provider libvirt ─────────────────────────────────────────────────────── + # ── Provider libvirt commun ───────────────────────────────────────────────── config.vm.provider :libvirt do |v| v.cpus = 4 v.memory = 4096 v.nested = false - v.cpu_mode = "host-passthrough" # expose les capacités CPU hôte → KVM perf + v.cpu_mode = "host-passthrough" v.driver = "kvm" v.disk_bus = "virtio" v.nic_model_type = "virtio" end - # ── Synchronisation du projet via rsync ──────────────────────────────────── + # ── Synchronisation du projet via rsync ───────────────────────────────────── config.vm.synced_folder "../..", "/ja4-platform", type: "rsync", rsync__exclude: [".git/", "old/", "*.rpm", "dist/"] - # ── Provisioning ─────────────────────────────────────────────────────────── - config.vm.provision "shell", path: "provision.sh" + # ═══════════════════════════════════════════════════════════════════════════ + # VM 1 : CentOS 8 (el8) + # ═══════════════════════════════════════════════════════════════════════════ + config.vm.define "centos8", autostart: false do |node| + node.vm.box = "centos/8" + node.vm.provision "shell", path: "provision-el8.sh" + node.vm.post_up_message = "VM centos8 prête ! Tests : make test-vm-centos8" + end - # ── Message post-démarrage ───────────────────────────────────────────────── - config.vm.post_up_message = <<~MSG - VM ja4ebpf prête ! + # ═══════════════════════════════════════════════════════════════════════════ + # VM 2 : Rocky Linux 9 (el9) — VM par défaut + # ═══════════════════════════════════════════════════════════════════════════ + config.vm.define "rocky9", primary: true do |node| + node.vm.box = "generic/rocky9" + node.vm.provision "shell", path: "provision.sh" + node.vm.post_up_message = <<~MSG + VM rocky9 prête ! + + Depuis la racine du projet : + make vm-ssh # connexion interactive + make test-vm-nginx # test nginx complet (L3/L4 + TLS + L7) + make test-vm-all # tous les tests + make vm-rebuild-ja4ebpf # resynchroniser + recompiler après modif + MSG + end + + # ═══════════════════════════════════════════════════════════════════════════ + # VM 3 : Rocky Linux 10 (el10) + # ═══════════════════════════════════════════════════════════════════════════ + config.vm.define "rocky10", autostart: false do |node| + node.vm.box = "almalinux/10" + node.vm.provision "shell", path: "provision.sh" + node.vm.post_up_message = "VM rocky10 prête ! Tests : make test-vm-rocky10" + end - Depuis la racine du projet : - make vm-ssh # connexion interactive - make test-vm-nginx # test nginx complet (L3/L4 + TLS + L7) - make test-vm-all # tous les tests - make vm-rebuild-ja4ebpf # resynchroniser + recompiler après modif - MSG end diff --git a/tests/vm/debug-mode-host.sh b/tests/vm/debug-mode-host.sh new file mode 100644 index 0000000..32499bf --- /dev/null +++ b/tests/vm/debug-mode-host.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# debug-mode-host.sh — Test debug ja4ebpf avec trafic host→VM +# Usage: ./debug-mode-host.sh rocky9 +set -euo pipefail + +VM="${1:-rocky9}" +cd "$(dirname "$0")" + +echo "=== [1] Setup VM: nginx + ja4ebpf debug ===" +vagrant ssh "$VM" -- "sudo bash -c ' + PATH=/usr/local/bin:\$PATH + # Install debug binary + cp /tmp/ja4ebpf-debug /usr/local/bin/ja4ebpf + chmod +x /usr/local/bin/ja4ebpf + + # Start nginx + nginx -s stop 2>/dev/null || true; sleep 1 + mkdir -p /run/nginx /var/www/html + echo {\"ok\":true} > /var/www/html/health + cat > /etc/nginx/nginx.conf << \"NEOF\" +worker_processes 1; +events { worker_connections 64; } +http { + server { + listen 80; + listen 443 ssl; + ssl_certificate /etc/pki/tls/certs/nginx.crt; + ssl_certificate_key /etc/pki/tls/private/nginx.key; + root /var/www/html; + } +} +NEOF + openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \ + -keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null + nginx + + # Start ja4ebpf debug + pkill ja4ebpf 2>/dev/null || true; sleep 1 + cat > /tmp/ja4-debug.yml << \"YEOF\" +interface: eth0 +ssl_lib_path: \"/usr/lib64/libssl.so.3\" +debug: true +clickhouse: + dsn: \"clickhouse://default:@127.0.0.1:9000/ja4_logs\" + batch_size: 50 + flush_secs: 1 +correlation: + timeout_ms: 500 + slowloris_ms: 10000 +log: + level: \"debug\" + format: \"text\" +YEOF + JA4EBPF_CONFIG=/tmp/ja4-debug.yml ja4ebpf > /tmp/ja4-debug.log 2>&1 & + sleep 3 + PID=\$(pgrep ja4ebpf || echo NONE) + echo \" ja4ebpf PID=\$PID\" + if [ \"\$PID\" = \"NONE\" ]; then cat /tmp/ja4-debug.log; exit 1; fi + + # Open firewall + firewall-cmd --add-service=http --add-service=https 2>/dev/null || true + + # Show eth0 IP + ip -4 addr show eth0 | awk \"/inet /{sub(/\\/.*/,\"\",\\\$2); print \\\" eth0 IP: \\\"\\\$2; exit}\" +'" 2>&1 + +echo "" +echo "=== [2] Get VM IP ===" +VM_IP=$(vagrant ssh "$VM" -- "ip -4 addr show eth0" 2>/dev/null | awk '/inet /{sub(/\/.*/,"",$2); print $2; exit}') +echo " VM IP: $VM_IP" + +if [ -z "$VM_IP" ]; then + echo " ERROR: no eth0 IP found" + exit 1 +fi + +echo "" +echo "=== [3] Generate traffic from HOST to VM ===" +for i in $(seq 1 3); do + curl -sf "http://$VM_IP/health" -o /dev/null -w " HTTP $i: %{http_code}\n" 2>&1 || echo " HTTP $i: FAIL" + curl -skf "https://$VM_IP/health" -o /dev/null -w " HTTPS $i: %{http_code}\n" 2>&1 || echo " HTTPS $i: FAIL" +done + +echo "" +echo "=== [4] Wait for debug dump (8s) ===" +sleep 8 + +echo "" +echo "=== [5] Collect results ===" +vagrant ssh "$VM" -- "sudo bash -c ' + echo \" ja4ebpf: \$(pgrep ja4ebpf > /dev/null && echo alive || echo DEAD)\" + echo \"\" + echo \" === BPF stats ===\" + STATS_MAP_ID=\$(bpftool map show name xdp_stats 2>/dev/null | grep -oP \"id \K\d+\" || echo NONE) + if [ \"\$STATS_MAP_ID\" != \"NONE\" ]; then + bpftool map dump id \$STATS_MAP_ID 2>/dev/null | sed \"s/^/ /\" + else + echo \" xdp_stats map not found!\" + fi + echo \"\" + echo \" === Log tail ===\" + tail -30 /tmp/ja4-debug.log | sed \"s/^/ /\" + + # Cleanup + pkill ja4ebpf 2>/dev/null || true + nginx -s stop 2>/dev/null || true +'" 2>&1 diff --git a/tests/vm/debug-mode.sh b/tests/vm/debug-mode.sh new file mode 100644 index 0000000..2ff8816 --- /dev/null +++ b/tests/vm/debug-mode.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# debug-mode.sh — Test rapide du mode debug ja4ebpf sur une VM +# Usage: vagrant upload /ja4-platform/tests/vm/debug-mode.sh /tmp/debug-mode.sh rocky9 +# vagrant ssh rocky9 -- 'sudo bash /tmp/debug-mode.sh' +set -euo pipefail + +echo "=== [1] Install debug binary ===" +cp /tmp/ja4ebpf-debug /usr/local/bin/ja4ebpf +chmod +x /usr/local/bin/ja4ebpf + +echo "=== [2] Start nginx ===" +nginx -s stop 2>/dev/null || true; sleep 1 +mkdir -p /run/nginx /var/www/html +echo '{"ok":true}' > /var/www/html/health +# Minimal nginx config for TLS +cat > /etc/nginx/nginx.conf << 'NEOF' +worker_processes 1; +events { worker_connections 64; } +http { + server { + listen 80; + listen 443 ssl; + ssl_certificate /etc/pki/tls/certs/nginx.crt; + ssl_certificate_key /etc/pki/tls/private/nginx.key; + root /var/www/html; + } +} +NEOF +openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \ + -keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null +nginx && echo " nginx ready" + +echo "=== [3] Start ja4ebpf in DEBUG mode ===" +pkill ja4ebpf 2>/dev/null || true; sleep 1 + +# Config with debug=true — no ClickHouse needed in debug mode +cat > /tmp/ja4-debug.yml << 'YEOF' +interface: eth0 +ssl_lib_path: "/usr/lib64/libssl.so.3" +debug: true +clickhouse: + dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs" + batch_size: 50 + flush_secs: 1 +correlation: + timeout_ms: 500 + slowloris_ms: 10000 +log: + level: "debug" + format: "text" +YEOF + +JA4EBPF_CONFIG=/tmp/ja4-debug.yml ja4ebpf > /tmp/ja4-debug.log 2>&1 & +sleep 3 + +JA4PID=$(pgrep ja4ebpf || echo NONE) +if [ "$JA4PID" = "NONE" ]; then + echo " ja4ebpf DEAD! Log:" + cat /tmp/ja4-debug.log + exit 1 +fi +echo " ja4ebpf PID=$JA4PID" + +# Verify XDP +echo " XDP check:" +ip -d link show dev eth0 | grep -i xdp || echo " (no XDP attached)" + +echo "=== [4] Generate traffic ===" +ETH0_IP=$(ip -4 addr show eth0 | awk '/inet /{sub(/\/.*/,"",$2); print $2; exit}') +echo " eth0 IP: $ETH0_IP" + +# HTTP traffic from localhost via eth0 IP +for i in $(seq 1 5); do + curl -sf "http://$ETH0_IP/health" -o /dev/null 2>&1 && echo " HTTP $i: OK" || echo " HTTP $i: FAIL" + curl -skf "https://$ETH0_IP/health" -o /dev/null 2>&1 && echo " HTTPS $i: OK" || echo " HTTPS $i: FAIL" +done + +echo "=== [5] Wait for debug dump (6s) ===" +sleep 6 + +echo "=== [6] Results ===" +echo " ja4ebpf: $(pgrep ja4ebpf > /dev/null && echo alive || echo DEAD)" +echo "" +echo " === Last 20 lines of log ===" +tail -20 /tmp/ja4-debug.log | sed 's/^/ /' + +echo "" +echo " === BPF map stats (bpftool) ===" +STATS_MAP_ID=$(bpftool map show name xdp_stats 2>/dev/null | grep -oP 'id \K\d+' || echo NONE) +if [ "$STATS_MAP_ID" != "NONE" ]; then + bpftool map dump id $STATS_MAP_ID 2>/dev/null | head -40 | sed 's/^/ /' +else + echo " xdp_stats map not found!" +fi + +# Cleanup +pkill ja4ebpf 2>/dev/null || true +nginx -s stop 2>/dev/null || true diff --git a/tests/vm/debug-test.sh b/tests/vm/debug-test.sh new file mode 100644 index 0000000..0a242a5 --- /dev/null +++ b/tests/vm/debug-test.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# Debug script — start everything and check XDP stats +set -euo pipefail +export PATH=/usr/local/bin:/usr/local/go/bin:$PATH + +echo "=== Starting ClickHouse ===" +docker rm -f ja4-clickhouse 2>/dev/null || true +docker run -d --name ja4-clickhouse -p 8123:8123 -p 9000:9000 \ + -e CLICKHOUSE_DB=ja4_processing -e CLICKHOUSE_USER=default -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \ + -v /ja4-platform/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh \ + -v /ja4-platform/tests/integration/platform/csv-stubs:/var/lib/clickhouse/user_files \ + -v /ja4-platform/shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro \ + -v /ja4-platform/shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro \ + -v /ja4-platform/shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro \ + -v /ja4-platform/shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro \ + -v /ja4-platform/shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro \ + -v /ja4-platform/shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro \ + -v /ja4-platform/shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro \ + -v /ja4-platform/shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro \ + -v /ja4-platform/shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro \ + clickhouse/clickhouse-server:24.8 + +for i in $(seq 1 30); do curl -sf http://localhost:8123/ping >/dev/null 2>&1 && break; sleep 2; done +echo "CH ready: $?" + +echo "=== Starting nginx ===" +pkill nginx 2>/dev/null || true; sleep 1 +mkdir -p /run/nginx /var/www/html +echo '{"ok":true}' > /var/www/html/health +cp /ja4-platform/tests/integration/nginx/platform/nginx.conf /etc/nginx/nginx.conf +openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \ + -keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null +nginx && echo "nginx OK" + +echo "=== Starting ja4ebpf ===" +pkill ja4ebpf 2>/dev/null || true; sleep 1 + +cat > /tmp/ja4.yml << 'YEOF' +interface: eth0 +ssl_lib_path: "/usr/lib64/libssl.so.3" +clickhouse: + dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs" + batch_size: 50 + flush_secs: 1 +correlation: + timeout_ms: 500 + slowloris_ms: 10000 +log: + level: "debug" + format: "json" +YEOF + +JA4EBPF_CONFIG=/tmp/ja4.yml ja4ebpf > /tmp/ja4.log 2>&1 & +sleep 4 + +JA4PID=$(pgrep ja4ebpf || echo NONE) +echo "ja4ebpf PID: $JA4PID" + +if [ "$JA4PID" = "NONE" ]; then + echo "DEAD! Logs:" + cat /tmp/ja4.log + exit 1 +fi + +echo "=== XDP status ===" +ip link show dev eth0 | grep -i xdp +echo "=== Prog stats ===" +bpftool prog show name capture_xdp 2>/dev/null || echo "no prog" + +echo "" +echo "=== Waiting for external traffic ===" +echo "Send traffic from host to $(ip -4 addr show eth0 | awk '/inet /{sub(/\/.*/, "", $2); print $2}')" +echo "After sending, press Enter or wait 60s..." + +# Wait for signal or timeout +for i in $(seq 1 60); do + [ -f /tmp/traffic-done ] && break + sleep 1 +done + +echo "=== After traffic ===" +pgrep ja4ebpf && echo "ja4ebpf still alive" || echo "ja4ebpf DEAD" +bpftool prog show name capture_xdp 2>/dev/null || echo "no prog" + +echo "=== Raw data count ===" +curl -sf "http://localhost:8123/?database=ja4_logs" --data-urlencode "query=SELECT count() FROM http_logs_raw" 2>/dev/null || echo "0" + +echo "=== ja4ebpf logs ===" +cat /tmp/ja4.log + +# Cleanup +pkill ja4ebpf 2>/dev/null; nginx -s stop 2>/dev/null; docker rm -f ja4-clickhouse 2>/dev/null diff --git a/tests/vm/debug-xdp.sh b/tests/vm/debug-xdp.sh new file mode 100644 index 0000000..4e2d782 --- /dev/null +++ b/tests/vm/debug-xdp.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# debug-xdp.sh — Test XDP + host traffic en une seule session SSH +# Usage: vagrant ssh rocky9 -- 'sudo bash -c "PATH=/usr/local/bin:$PATH /ja4-platform/tests/vm/debug-xdp.sh"' +set -euo pipefail +export PATH=/usr/local/bin:/usr/local/go/bin:$PATH +STACK="${1:-nginx}" + +# === Start ClickHouse === +echo "[1] Starting ClickHouse..." +docker rm -f ja4-clickhouse 2>/dev/null || true +docker run -d --name ja4-clickhouse -p 8123:8123 -p 9000:9000 \ + -e CLICKHOUSE_DB=ja4_processing -e CLICKHOUSE_USER=default \ + -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \ + -v /ja4-platform/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh \ + -v /ja4-platform/tests/integration/platform/csv-stubs:/var/lib/clickhouse/user_files \ + -v /ja4-platform/shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro \ + -v /ja4-platform/shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro \ + -v /ja4-platform/shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro \ + -v /ja4-platform/shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro \ + -v /ja4-platform/shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro \ + -v /ja4-platform/shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro \ + -v /ja4-platform/shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro \ + -v /ja4-platform/shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro \ + -v /ja4-platform/shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro \ + -v /ja4-platform/shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro \ + clickhouse/clickhouse-server:24.8 >/dev/null +for i in $(seq 1 30); do curl -sf http://localhost:8123/ping >/dev/null 2>&1 && break; sleep 2; done +echo " ClickHouse ready" + +# === Start nginx === +echo "[2] Starting nginx..." +nginx -s stop 2>/dev/null || true; sleep 1 +mkdir -p /run/nginx /var/www/html +echo '{"ok":true}' > /var/www/html/health +cp /ja4-platform/tests/integration/nginx/platform/nginx.conf /etc/nginx/nginx.conf +openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \ + -keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null +nginx && echo " nginx ready" + +# === Start ja4ebpf === +echo "[3] Starting ja4ebpf..." +pkill ja4ebpf 2>/dev/null || true; sleep 1 +cat > /tmp/ja4.yml << 'YEOF' +interface: eth0 +ssl_lib_path: "/usr/lib64/libssl.so.3" +clickhouse: + dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs" + batch_size: 50 + flush_secs: 1 +correlation: + timeout_ms: 500 + slowloris_ms: 10000 +log: + level: "debug" + format: "json" +YEOF +JA4EBPF_CONFIG=/tmp/ja4.yml ja4ebpf > /tmp/ja4.log 2>&1 & +sleep 3 +JA4PID=$(pgrep ja4ebpf || echo NONE) +if [ "$JA4PID" = "NONE" ]; then + echo " ja4ebpf DEAD!"; cat /tmp/ja4.log; exit 1 +fi +echo " ja4ebpf PID=$JA4PID" + +# Verify XDP +XDP_INFO=$(ip link show dev eth0 | grep "prog/xdp" || echo NONE) +echo " XDP: $XDP_INFO" + +# Show eth0 IP +ETH0_IP=$(ip -4 addr show eth0 | awk '/inet /{sub(/\/.*/,"",$2); print $2; exit}') +echo "" +echo "╔══════════════════════════════════════╗" +echo "║ Services prêts — IP: $ETH0_IP" +echo "║ Attente trafic host (60s max)..." +echo "╚══════════════════════════════════════╝" + +# Wait for host traffic signal +for i in $(seq 1 60); do + [ -f /tmp/traffic-done ] && break + sleep 1 +done + +# Check prog run count +echo "[4] Checking results..." +echo " ja4ebpf: $(pgrep ja4ebpf && echo alive || echo DEAD)" +bpftool prog show name capture_xdp 2>/dev/null | head -5 + +# Check raw data +RAW=$(curl -sf "http://localhost:8123/?database=ja4_logs" --data-urlencode "query=SELECT count() FROM http_logs_raw" 2>/dev/null || echo "0") +echo " http_logs_raw: $RAW lignes" + +# ja4ebpf logs +echo " Logs:" +tail -5 /tmp/ja4.log | sed 's/^/ /' + +# Cleanup +pkill ja4ebpf 2>/dev/null; nginx -s stop 2>/dev/null +docker rm -f ja4-clickhouse 2>/dev/null + +if [ "${RAW:-0}" -gt 0 ] 2>/dev/null; then + echo "" + echo " SUCCESS: $RAW rows captured" + exit 0 +else + echo "" + echo " FAIL: 0 rows captured" + exit 1 +fi diff --git a/tests/vm/provision-el8.sh b/tests/vm/provision-el8.sh new file mode 100755 index 0000000..49da9c8 --- /dev/null +++ b/tests/vm/provision-el8.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# ============================================================================= +# provision-el8.sh — Provisionnement CentOS 8 (dépôts archivés vault) +# +# CentOS 8 est EOL depuis juin 2024. Les dépôts sont sur vault.centos.org. +# ============================================================================= +set -euo pipefail + +log() { echo "[provision] $(date +%H:%M:%S) $*"; } + +# ── 1. Rediriger les dépôts vers vault.centos.org ───────────────────────────── +log "Configuration des dépôts CentOS 8 vault..." +sed -i 's|^mirrorlist=|#mirrorlist=|' /etc/yum.repos.d/CentOS-*.repo 2>/dev/null || true +sed -i 's|^#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|' /etc/yum.repos.d/CentOS-*.repo 2>/dev/null || true +dnf clean all +dnf update -y --quiet + +# ── 2. Toolchain eBPF ──────────────────────────────────────────────────────── +log "Installation toolchain eBPF..." +dnf install -y \ + clang llvm libbpf-devel bpftool \ + kernel-devel-$(uname -r) \ + make git curl tar gzip \ + epel-release dnf-plugins-core || true + +# ── 3. Go ───────────────────────────────────────────────────────────────────── +log "Installation de Go..." +GO_VERSION="1.24.3" +if ! command -v go &>/dev/null || [[ "$(go version 2>/dev/null | awk '{print $3}')" != "go${GO_VERSION}" ]]; then + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -o /tmp/go.tar.gz + rm -rf /usr/local/go + tar -C /usr/local -xzf /tmp/go.tar.gz + rm /tmp/go.tar.gz +fi + +cat > /etc/profile.d/go.sh << 'EOF' +export PATH="/usr/local/go/bin:$PATH" +export GOPATH="/home/vagrant/go" +EOF + +# ── 4. Serveurs web (nginx + httpd) + TLS + hitch + varnish ──────────────────── +log "Installation des serveurs web et reverse proxy..." +dnf install -y nginx openssl curl +dnf install -y httpd mod_ssl || true +dnf install -y hitch varnish || true + +# ── 5. Python3 + outils de test ────────────────────────────────────────────── +log "Installation Python3 et outils de test..." +dnf install -y python3 python3-pip +pip3 install --quiet "httpx[http2]" requests 2>/dev/null || pip3 install --quiet httpx requests + +# ── 6. Montage tracefs + debugfs ───────────────────────────────────────────── +log "Configuration des pseudo-systèmes de fichiers eBPF..." +mount -t tracefs tracefs /sys/kernel/tracing 2>/dev/null || true +mount -t debugfs debugfs /sys/kernel/debug 2>/dev/null || true + +# ── 7. Build ja4ebpf ───────────────────────────────────────────────────────── +log "Build initial de ja4ebpf..." +export PATH="/usr/local/go/bin:$PATH" +cd /ja4-platform/services/ja4ebpf +GOWORK=off go generate ./internal/loader/ 2>&1 | tail -5 || log "go generate: erreur (normal si vmlinux.h absent)" +GOWORK=off CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -ldflags="-s -w" -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/ 2>&1 | tail -5 + +log "Provisionnement CentOS 8 terminé !" diff --git a/tests/vm/provision.sh b/tests/vm/provision.sh index 5e65826..b3503b2 100755 --- a/tests/vm/provision.sh +++ b/tests/vm/provision.sh @@ -50,14 +50,31 @@ EOF log "Installation de Docker..." dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo dnf install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin + +# Sur el10+ (kernel 6.12+), nf_tables a des incompatibilités avec iptables-nft. +# Désactiver la gestion iptables par Docker pour éviter l'échec au démarrage. +if ! systemctl start docker 2>/dev/null; then + log "Docker: fallback iptables=false pour kernel $(uname -r)" + mkdir -p /etc/docker + echo '{"iptables": false}' > /etc/docker/daemon.json +fi + systemctl enable --now docker usermod -aG docker vagrant # Accès sans sudo pour vagrant chmod 666 /var/run/docker.sock || true -# ── 5. nginx + openssl ─────────────────────────────────────────────────────── -log "Installation de nginx..." +# ── 5. Serveurs web (nginx + httpd) + TLS + hitch + varnish ───────────────────── +log "Installation des serveurs web et reverse proxy..." dnf install -y nginx openssl curl +dnf install -y httpd mod_ssl +dnf install -y hitch varnish + +# Ouvrir les ports HTTP/HTTPS dans le firewall +log "Configuration firewall..." +firewall-cmd --add-service=http --add-service=https --permanent 2>/dev/null || true +firewall-cmd --add-port=80/tcp --add-port=443/tcp --permanent 2>/dev/null || true +firewall-cmd --reload 2>/dev/null || true # ── 6. Python3 + outils de test ────────────────────────────────────────────── log "Installation Python3 et outils de test..." diff --git a/tests/vm/run-test-from-host.sh b/tests/vm/run-test-from-host.sh new file mode 100755 index 0000000..0eda6b5 --- /dev/null +++ b/tests/vm/run-test-from-host.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# ============================================================================= +# run-test-from-host.sh — Orchestrateur de test VM depuis le host +# +# Lance le test complet d'une stack sur une VM : +# 1. Rsync les fichiers +# 2. Démarre les services dans la VM (en background via SSH) +# 3. Génère le trafic depuis le HOST vers l'IP eth0 de la VM +# 4. Lance la vérification dans la VM +# +# Usage : +# ./tests/vm/run-test-from-host.sh rocky9 nginx +# ./tests/vm/run-test-from-host.sh centos8 apache +# make test-vm-nginx +# ============================================================================= +set -euo pipefail + +VM="${1:-rocky9}" +STACK="${2:-nginx}" +VM_DIR="$(cd "$(dirname "$0")" && pwd)" + +GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m' +BOLD='\033[1m' + +log() { echo -e "${BOLD}[$VM/$STACK]${RESET} $(date +%H:%M:%S) $*"; } +pass() { echo -e " ${GREEN}PASS${RESET} $*"; } +fail() { echo -e " ${RED}FAIL${RESET} $*"; } + +cd "$VM_DIR" + +# ── 1. Synchroniser les fichiers ───────────────────────────────────────────── +log "Rsync fichiers vers $VM..." +vagrant rsync "$VM" + +# ── 2. Obtenir l'IP eth0 de la VM ──────────────────────────────────────────── +VM_IP=$(vagrant ssh "$VM" -- 'ip -4 addr show eth0' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + +if [ -z "$VM_IP" ]; then + fail "Impossible d'obtenir l'IP eth0 de $VM" + exit 1 +fi +log "IP eth0 : $VM_IP" + +# ── 3. Démarrer les services dans la VM (en background) ────────────────────── +log "Démarrage des services dans $VM ($STACK)..." + +# Nettoyer le signal de l'itération précédente +vagrant ssh "$VM" -- 'sudo rm -f /tmp/ja4ebpf-traffic-done' 2>/dev/null || true + +# Lancer le script de test en mode "start" dans la VM +# Le script attendra le signal /tmp/ja4ebpf-traffic-done +vagrant ssh "$VM" -- "sudo bash /ja4-platform/tests/vm/run-tests-vm.sh $STACK start" & +VM_PID=$! + +# ── 4. Attendre que les services soient prêts ──────────────────────────────── +log "Attente démarrage des services (30s)..." +sleep 30 + +# ── 5. Vérifier que les services répondent ─────────────────────────────────── +log "Vérification connectivité..." +if curl -sf "http://$VM_IP/health" >/dev/null 2>&1; then + pass "HTTP $VM_IP:80 OK" +else + fail "HTTP $VM_IP:80 injoignable" +fi +if curl -sf -k "https://$VM_IP/health" >/dev/null 2>&1; then + pass "HTTPS $VM_IP:443 OK" +else + fail "HTTPS $VM_IP:443 injoignable" +fi + +# ── 6. Générer le trafic depuis le host ────────────────────────────────────── +log "Génération du trafic host → $VM_IP..." +for path in / /health /data /api/users; do + curl -sf -k "https://$VM_IP$path" >/dev/null 2>&1 || true + curl -sf "http://$VM_IP$path" >/dev/null 2>&1 || true + curl -sf -k -X POST "https://$VM_IP/api/data" -d '{"test":1}' >/dev/null 2>&1 || true + curl -sf -k -X PUT "https://$VM_IP/data" >/dev/null 2>&1 || true + curl -sf -k -X DELETE "https://$VM_IP/data/1" >/dev/null 2>&1 || true + curl -sf -k -X HEAD "https://$VM_IP$path" >/dev/null 2>&1 || true +done + +# HTTP/2 via Python si disponible +if python3 -c "import httpx" 2>/dev/null; then + python3 -c " +import httpx, ssl, warnings +warnings.filterwarnings('ignore') +ctx = ssl.create_default_context() +ctx.check_hostname = False +ctx.verify_mode = ssl.CERT_NONE +with httpx.Client(http2=True, verify=False) as c: + for p in ['/', '/health', '/data']: + try: c.get('https://$VM_IP' + p) + except: pass +" 2>/dev/null && pass "HTTP/2 généré" || true +fi + +log "Attente flush ja4ebpf (15s)..." +sleep 15 + +# ── 7. Signaler à la VM de lancer la vérification ──────────────────────────── +log "Signal de vérification..." +vagrant ssh "$VM" -- 'sudo touch /tmp/ja4ebpf-traffic-done' 2>/dev/null + +# ── 8. Attendre la fin du processus VM ─────────────────────────────────────── +log "Attente résultat..." +wait $VM_PID 2>/dev/null +RESULT=$? + +if [ $RESULT -eq 0 ]; then + echo "" + echo -e " ${GREEN}${BOLD}$VM/$STACK : SUCCÈS${RESET}" +else + echo "" + echo -e " ${RED}${BOLD}$VM/$STACK : ÉCHEC (code $RESULT)${RESET}" +fi + +exit $RESULT diff --git a/tests/vm/run-tests-vm.sh b/tests/vm/run-tests-vm.sh index 486ed0f..afbdced 100755 --- a/tests/vm/run-tests-vm.sh +++ b/tests/vm/run-tests-vm.sh @@ -1,142 +1,114 @@ #!/usr/bin/env bash # ============================================================================= -# run-tests-vm.sh — Lance la stack de test complète dans la VM Rocky Linux 9 +# run-tests-vm.sh — Tests ja4ebpf multi-stack dans une VM Vagrant # -# Ce script s'exécute DANS la VM (via vagrant ssh ou vagrant provision). -# Il ne peut pas tourner dans Docker — il requiert un vrai kernel pour eBPF. +# Architecture : +# Phase 1 (dans la VM) : démarrer ClickHouse, serveur web, ja4ebpf +# Phase 2 (depuis le host) : générer du trafic vers l'IP eth0 de la VM +# Phase 3 (dans la VM) : vérifier les données dans ClickHouse # -# Usage (depuis le host) : -# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh nginx' -# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh all' +# Stacks supportées : +# nginx — nginx avec TLS (HTTP/1.1 + HTTP/2) +# apache — Apache httpd avec TLS (HTTP/1.1 + HTTP/2) +# hitch-varnish — hitch (TLS) → Varnish (cache/H2) → backend Python +# all — exécute les 3 stacks séquentiellement # -# Variables d'environnement : -# STACK : stack à tester (nginx|apache|nginx-varnish|hitch-varnish|all) -# KEEP_RUNNING : si "true", ne pas arrêter la stack après le test (défaut: false) +# Modes : +# start — démarrer les services (Phase 1) +# verify — vérifier les données (Phase 3) +# (défaut) — start + verify (le trafic doit être généré entre les deux) +# +# Usage (depuis le host via Makefile) : +# make test-vm-nginx +# make test-vm-apache +# make test-vm-hitch-varnish +# make test-vm-matrix # ============================================================================= set -euo pipefail -# S'assurer que /usr/local/bin et go sont dans PATH (nécessaire pour sudo bash) export PATH="/usr/local/bin:/usr/local/go/bin:$PATH" STACK="${1:-nginx}" +MODE="${2:-full}" # start | verify | full KEEP_RUNNING="${KEEP_RUNNING:-false}" PROJECT="/ja4-platform" -RESULTS_DIR="/tmp/ja4-test-results" -# ── Couleurs ───────────────────────────────────────────────────────────────── GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m' BOLD='\033[1m' log() { echo -e "${BOLD}[$STACK]${RESET} $(date +%H:%M:%S) $*"; } -pass() { echo -e " ${GREEN}✅${RESET} $*"; ((PASS_COUNT++)) || true; } -fail() { echo -e " ${RED}❌${RESET} $*"; ((FAIL_COUNT++)) || true; } -warn() { echo -e " ${YELLOW}⚠️${RESET} $*"; ((WARN_COUNT++)) || true; } +pass() { echo -e " ${GREEN}PASS${RESET} $*"; ((PASS_COUNT++)) || true; } +fail() { echo -e " ${RED}FAIL${RESET} $*"; ((FAIL_COUNT++)) || true; } +warn() { echo -e " ${YELLOW}WARN${RESET} $*"; ((WARN_COUNT++)) || true; } PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0 -# ── Vérification prérequis ──────────────────────────────────────────────────── -check_prerequisites() { - log "Vérification des prérequis..." - - # eBPF capabilities - if [ ! -d /sys/kernel/tracing ]; then - fail "tracefs non monté — exécuter: sudo mount -t tracefs tracefs /sys/kernel/tracing" - exit 1 - fi - if [ ! -d /sys/kernel/debug ]; then - fail "debugfs non monté" - exit 1 - fi - - command -v ja4ebpf >/dev/null 2>&1 || { - log "Rebuild ja4ebpf..." - cd "$PROJECT/services/ja4ebpf" - export PATH="/usr/local/go/bin:$PATH" - GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3 - GOWORK=off CGO_ENABLED=0 go build -o /tmp/ja4ebpf_new ./cmd/ja4ebpf/ && mv /tmp/ja4ebpf_new /usr/local/bin/ja4ebpf - } - - command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; } - command -v nginx >/dev/null 2>&1 || { fail "nginx non installé"; exit 1; } - pass "Prérequis OK" +# ── Helpers communs ────────────────────────────────────────────────────────── + +gen_tls_cert() { + local name="$1" + openssl req -x509 -nodes -days 365 -subj "/CN=platform.test" \ + -newkey rsa:2048 \ + -keyout "/etc/pki/tls/private/${name}.key" \ + -out "/etc/pki/tls/certs/${name}.crt" 2>/dev/null } -# ── Démarrage ClickHouse ────────────────────────────────────────────────────── +setup_docroot() { + mkdir -p /var/www/html + echo '{"status":"ok","stack":"'"$STACK"'"}' > /var/www/html/health + for p in data api/users api/data/test; do + mkdir -p "/var/www/html/$(dirname $p)" + echo '{"ok":true}' > "/var/www/html/$p" + done +} + +get_eth0_ip() { + ip -4 addr show eth0 | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}' 2>/dev/null || echo "" +} + +# ── ClickHouse ──────────────────────────────────────────────────────────────── start_clickhouse() { log "Démarrage ClickHouse..." - docker rm -f ja4-clickhouse 2>/dev/null || true - + + CSV_DIR="$PROJECT/tests/integration/platform/csv-stubs" docker run -d --name ja4-clickhouse \ -p 8123:8123 -p 9000:9000 \ -e CLICKHOUSE_DB=ja4_processing \ -e CLICKHOUSE_USER=default \ -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \ -v "$PROJECT/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh" \ + -v "$CSV_DIR:/var/lib/clickhouse/user_files" \ $(for f in "$PROJECT/shared/clickhouse/"*.sql; do echo "-v $f:/initdb-src/$(basename $f):ro" done) \ clickhouse/clickhouse-server:24.8 2>&1 | tail -1 - - # Attendre que ClickHouse soit prêt + log "Attente ClickHouse (max 120s)..." for i in $(seq 1 60); do - if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then - pass "ClickHouse prêt" - return 0 - fi + curl -sf "http://localhost:8123/ping" >/dev/null 2>&1 && { pass "ClickHouse prêt"; return 0; } sleep 2 done fail "ClickHouse timeout"; exit 1 } -# ── Configuration nginx ──────────────────────────────────────────────────────── -setup_nginx() { - log "Configuration nginx avec TLS..." - - # Certificat auto-signé - openssl req -x509 -nodes -days 365 \ - -subj "/CN=platform.test" \ - -newkey rsa:2048 \ - -keyout /etc/pki/tls/private/nginx.key \ - -out /etc/pki/tls/certs/nginx.crt 2>/dev/null - - # Copier la configuration de test - cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf - - # Créer les fichiers de test - mkdir -p /var/www/html - # /run/nginx est un tmpfs recréé à chaque boot, nginx en a besoin pour son PID - mkdir -p /run/nginx - echo '{"status":"ok","stack":"nginx-vm"}' > /var/www/html/health - for p in data api/users api/data/test; do - mkdir -p "/var/www/html/$(dirname $p)" - echo '{"ok":true}' > "/var/www/html/$p" - done - - nginx -t && nginx - - # Attendre nginx - for i in $(seq 1 20); do - curl -sf http://localhost/health >/dev/null 2>&1 && break - sleep 0.5 - done - pass "nginx démarré" -} - -# ── Démarrage ja4ebpf ───────────────────────────────────────────────────────── +# ── ja4ebpf ──────────────────────────────────────────────────────────────────── start_ja4ebpf() { log "Démarrage ja4ebpf..." - pkill ja4ebpf 2>/dev/null || true sleep 1 - - # Créer la config - cat > /tmp/ja4ebpf.yml << 'EOF' + + local ssl_lib="" + for lib in /usr/lib64/libssl.so.3 /usr/lib64/libssl.so.1.1 /usr/lib/libssl.so.3 /usr/lib/libssl.so.1.1; do + [ -f "$lib" ] && { ssl_lib="$lib"; break; } + done + [ -z "$ssl_lib" ] && ssl_lib="/usr/lib64/libssl.so.3" + + cat > /tmp/ja4ebpf.yml << EOF interface: eth0 -ssl_lib_path: "/usr/lib64/libssl.so.3" +ssl_lib_path: "${ssl_lib}" clickhouse: - dsn: "clickhouse://default:@localhost:9000/ja4_logs" + dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs" batch_size: 100 flush_secs: 1 correlation: @@ -146,169 +118,340 @@ log: level: "info" format: "json" EOF - - # Lancer avec les capabilities nécessaires - # Dans la VM (root), on peut lancer directement + JA4EBPF_CONFIG=/tmp/ja4ebpf.yml ja4ebpf > /tmp/ja4ebpf.log 2>&1 & JA4EBPF_PID=$! - sleep 3 + if ! kill -0 "$JA4EBPF_PID" 2>/dev/null; then fail "ja4ebpf s'est arrêté immédiatement" - cat /tmp/ja4ebpf.log | tail -10 + tail -10 /tmp/ja4ebpf.log return 1 fi - + log "ja4ebpf démarré (PID $JA4EBPF_PID)" - - # Vérifier les uprobes dans tracefs + + # Vérifier XDP + if ip link show dev eth0 2>/dev/null | grep -q "xdp"; then + local xdp_info + xdp_info=$(ip link show dev eth0 | grep "prog/xdp" | sed 's/^[[:space:]]*//') + pass "XDP attaché : $xdp_info" + else + warn "Aucun XDP sur eth0" + bpftool prog show name capture_xdp 2>/dev/null || true + fi +} + +# ═════════════════════════════════════════════════════════════════════════════ +# Stack : nginx +# ═════════════════════════════════════════════════════════════════════════════ +setup_nginx() { + log "Configuration nginx avec TLS..." + gen_tls_cert nginx + setup_docroot + cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf + mkdir -p /run/nginx + nginx -t && nginx + for i in $(seq 1 20); do + curl -sf http://localhost/health >/dev/null 2>&1 && break + sleep 0.5 + done + pass "nginx démarré" +} + +stop_nginx() { nginx -s stop 2>/dev/null || true; } + +# ═════════════════════════════════════════════════════════════════════════════ +# Stack : apache +# ═════════════════════════════════════════════════════════════════════════════ +setup_apache() { + log "Configuration Apache httpd avec TLS..." + gen_tls_cert apache + setup_docroot + + if command -v httpd >/dev/null 2>&1; then + if ! httpd -M 2>/dev/null | grep -q http2_module; then + echo "LoadModule http2_module modules/mod_http2.so" \ + >> /etc/httpd/conf.modules.d/00-base.conf 2>/dev/null || true + fi + fi + mkdir -p /run/httpd /var/log/httpd + + cp "$PROJECT/tests/integration/apache/platform/httpd-ssl.conf" \ + /etc/httpd/conf.d/ssl.conf 2>/dev/null || true + + httpd -t 2>&1 && httpd -DFOREGROUND & + sleep 2 + for i in $(seq 1 20); do + curl -sf http://localhost/health >/dev/null 2>&1 && break + sleep 0.5 + done + pass "Apache httpd démarré" +} + +stop_apache() { pkill httpd 2>/dev/null || true; } + +# ═════════════════════════════════════════════════════════════════════════════ +# Stack : hitch + varnish +# ═════════════════════════════════════════════════════════════════════════════ +setup_hitch_varnish() { + log "Configuration hitch + Varnish..." + gen_tls_cert hitch + mkdir -p /etc/hitch + cat /etc/pki/tls/private/hitch.key /etc/pki/tls/certs/hitch.crt \ + > /etc/hitch/hitch.pem + + cat > /etc/hitch/hitch.conf << 'HCONF' +frontend = "[*]:443" +backend = "[127.0.0.1]:6081" +pem-file = "/etc/hitch/hitch.pem" +write-proxy-v1 = on +tls-protos = TLSv1.2 TLSv1.3 +ciphers = "ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256" +alpn-protos = "h2,http/1.1" +workers = 2 +user = "nobody" +daemon = off +log-level = 1 +syslog = off +HCONF + + mkdir -p /etc/varnish + cp "$PROJECT/tests/integration/hitch-varnish/platform/varnish.vcl" \ + /etc/varnish/default.vcl 2>/dev/null || { + cat > /etc/varnish/default.vcl << 'VCL' +vcl 4.1; +backend default { .host = "127.0.0.1"; .port = "8080"; } +sub vcl_deliver { + set resp.http.Via = "1.1 varnish"; + set resp.http.X-Client-IP = client.ip; +} +VCL + } + + setup_docroot + + # Backend HTTP (port 8080) + python3 -c " +import http.server, socketserver, json +class H(http.server.BaseHTTPRequestHandler): + def log_message(self, *a): pass + def do_GET(self): + body = json.dumps({'status':'ok','stack':'hitch-varnish','path':self.path}).encode() + self.send_response(200) + self.send_header('Content-Type','application/json') + self.send_header('Content-Length',len(body)) + self.end_headers() + self.wfile.write(body) + def do_POST(self): + n = int(self.headers.get('Content-Length',0)) + self.rfile.read(n) + body = b'{\"result\":\"accepted\"}' + self.send_response(200) + self.send_header('Content-Type','application/json') + self.send_header('Content-Length',len(body)) + self.end_headers() + self.wfile.write(body) +with socketserver.TCPServer(('127.0.0.1', 8080), H) as s: + s.serve_forever() +" & sleep 1 - if grep -q "ssl" /sys/kernel/tracing/uprobe_events 2>/dev/null; then - pass "Uprobes SSL attachés dans tracefs" - else - warn "Uprobes non visibles dans tracefs (peuvent être actifs quand même)" - fi - - # Vérifier accept4 tracepoint - if grep -q "accept4" /sys/kernel/tracing/events/syscalls 2>/dev/null; then - pass "Tracepoints accept4 disponibles" - else - warn "Tracepoints accept4 non trouvés" - fi + + varnishd -F -f /etc/varnish/default.vcl \ + -a "127.0.0.1:6081,PROXY" \ + -p feature=+http2 \ + -s malloc,64m \ + -T 127.0.0.1:6082 & + sleep 2 + + hitch --config=/etc/hitch/hitch.conf & + sleep 2 + + for i in $(seq 1 20); do + curl -skf https://localhost/health >/dev/null 2>&1 && break + sleep 0.5 + done + pass "hitch + Varnish démarrés" } -# ── Génération de trafic ─────────────────────────────────────────────────────── -generate_traffic() { - log "Génération du trafic (HTTP/1.0 + HTTP/1.1 + HTTP/2)..." - - # Trafic HTTP/1.1 (HTTP) - for path in / /health /data /api/users; do - curl -sf "http://localhost$path" >/dev/null 2>&1 || true - curl -sf -X POST "http://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true - done - - # Trafic HTTPS/1.1 - for path in / /health /data /api/users; do - curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true - curl -sf -k -X POST "https://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true - curl -sf -k -X PUT "https://localhost/data" >/dev/null 2>&1 || true - curl -sf -k -X DELETE "https://localhost/data/1" >/dev/null 2>&1 || true - curl -sf -k -X HEAD "https://localhost$path" >/dev/null 2>&1 || true - done - - # Trafic HTTP/2 - if command -v python3 >/dev/null 2>&1 && python3 -c "import httpx" 2>/dev/null; then - python3 << 'PYEOF' -import httpx, ssl, warnings -warnings.filterwarnings("ignore") -ctx = ssl.create_default_context() -ctx.check_hostname = False -ctx.verify_mode = ssl.CERT_NONE -with httpx.Client(http2=True, verify=False) as client: - for path in ["/", "/health", "/data"]: - try: client.get(f"https://localhost{path}") - except: pass - try: client.post("https://localhost/api/data", json={"test": "h2"}) - except: pass -PYEOF - pass "Trafic HTTP/2 généré" - fi - - # Attendre le flush ja4ebpf → ClickHouse - log "Attente flush ja4ebpf (15s)..." - sleep 15 - pass "Trafic généré" +stop_hitch_varnish() { + pkill hitch 2>/dev/null || true + pkill varnishd 2>/dev/null || true + pkill -f "TCPServer.*8080" 2>/dev/null || true } -# ── Vérification ClickHouse ──────────────────────────────────────────────────── +# ═════════════════════════════════════════════════════════════════════════════ +# Vérification ClickHouse +# ═════════════════════════════════════════════════════════════════════════════ verify_db() { log "Vérification des données dans ClickHouse..." - - ch_query() { - curl -sf "http://localhost:8123/" \ - --data-urlencode "query=$1" \ - --data-urlencode "database=ja4_logs" \ - -o /dev/null -w '%{http_code}' 2>/dev/null || echo "0" - } - + ch_val() { curl -sf "http://localhost:8123/?database=ja4_logs" \ --data-urlencode "query=$1" 2>/dev/null | tr -d ' \n' || echo "0" } - + + # http_logs_raw (données brutes avant MV) + local raw_count + raw_count=$(ch_val "SELECT count() FROM http_logs_raw") + if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then + pass "http_logs_raw : $raw_count lignes" + else + fail "http_logs_raw vide — ja4ebpf n'a rien capturé" + log " Logs ja4ebpf :" + tail -10 /tmp/ja4ebpf.log 2>/dev/null | sed 's/^/ /' + fi + # L3/L4 ttl=$(ch_val "SELECT count() FROM http_logs WHERE ip_meta_ttl > 0") - [ "${ttl:-0}" -gt 0 ] && pass "L3/L4 TTL capturé ($ttl lignes)" || fail "L3/L4 TTL absent" - + [ "${ttl:-0}" -gt 0 ] 2>/dev/null && pass "L3/L4 TTL ($ttl)" || fail "L3/L4 TTL absent" + mss=$(ch_val "SELECT count() FROM http_logs WHERE tcp_meta_mss > 0") - [ "${mss:-0}" -gt 0 ] && pass "TCP MSS capturé ($mss lignes)" || fail "TCP MSS absent" - + [ "${mss:-0}" -gt 0 ] 2>/dev/null && pass "TCP MSS ($mss)" || fail "TCP MSS absent" + # TLS ja4=$(ch_val "SELECT count() FROM http_logs WHERE ja4 != ''") - [ "${ja4:-0}" -gt 0 ] && pass "JA4 fingerprint capturé ($ja4 lignes)" || fail "JA4 absent" - + [ "${ja4:-0}" -gt 0 ] 2>/dev/null && pass "JA4 fingerprint ($ja4)" || fail "JA4 absent" + sni=$(ch_val "SELECT count() FROM http_logs WHERE tls_sni != ''") - [ "${sni:-0}" -gt 0 ] && pass "TLS SNI capturé ($sni lignes)" || warn "TLS SNI absent" - - # L7 HTTP — c'est ici que ça devrait marcher dans la VM + [ "${sni:-0}" -gt 0 ] 2>/dev/null && pass "TLS SNI ($sni)" || warn "TLS SNI absent" + + # L7 HTTP method=$(ch_val "SELECT count() FROM http_logs WHERE method != ''") - [ "${method:-0}" -gt 0 ] && pass "L7 méthodes HTTP capturées ($method lignes)" \ - || fail "L7 méthodes HTTP ABSENT — uprobe SSL_read ne fonctionne pas" - + [ "${method:-0}" -gt 0 ] 2>/dev/null && pass "L7 HTTP ($method)" || fail "L7 HTTP ABSENT" + path=$(ch_val "SELECT count() FROM http_logs WHERE path != ''") - [ "${path:-0}" -gt 0 ] && pass "L7 path HTTP capturé ($path lignes)" || fail "L7 path absent" - + [ "${path:-0}" -gt 0 ] 2>/dev/null && pass "L7 path ($path)" || fail "L7 path absent" + status=$(ch_val "SELECT count() FROM http_logs WHERE status_code > 0") - [ "${status:-0}" -gt 0 ] && pass "status_code capturé ($status lignes)" || warn "status_code absent" - - sig=$(ch_val "SELECT count() FROM http_logs WHERE header_order_signature != ''") - [ "${sig:-0}" -gt 0 ] && pass "header_order_signature capturé ($sig lignes)" || warn "header_order_sig absent" - - # Méthodes HTTP distinctes + [ "${status:-0}" -gt 0 ] 2>/dev/null && pass "status_code ($status)" || warn "status_code absent" + methods=$(ch_val "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM http_logs WHERE method != '')") - log "Méthodes HTTP vues : $methods" - - # Lignes totales + log "Méthodes HTTP : $methods" + total=$(ch_val "SELECT count() FROM http_logs") - pass "Total lignes http_logs : $total" + pass "Total http_logs : $total" +} + +# ═════════════════════════════════════════════════════════════════════════════ +# Nettoyage +# ═════════════════════════════════════════════════════════════════════════════ +stop_stack() { + pkill ja4ebpf 2>/dev/null || true + case "$STACK" in + nginx) stop_nginx ;; + apache) stop_apache ;; + hitch-varnish) stop_hitch_varnish ;; + esac + docker rm -f ja4-clickhouse 2>/dev/null || true } -# ── Nettoyage ───────────────────────────────────────────────────────────────── cleanup() { if [ "$KEEP_RUNNING" != "true" ]; then log "Nettoyage..." - pkill ja4ebpf 2>/dev/null || true - nginx -s stop 2>/dev/null || true - docker rm -f ja4-clickhouse 2>/dev/null || true + stop_stack fi } trap cleanup EXIT -# ── Main ────────────────────────────────────────────────────────────────────── -mkdir -p "$RESULTS_DIR" +# ═════════════════════════════════════════════════════════════════════════════ +# Phase 1 : démarrage des services +# ═════════════════════════════════════════════════════════════════════════════ +do_start() { + echo "" + echo "╔══════════════════════════════════════════╗" + echo "║ Phase 1 : Démarrage — $STACK" + echo "╚══════════════════════════════════════════╝" + echo "" -echo "" -echo "╔══════════════════════════════════════════╗" -echo "║ ja4ebpf VM Test Suite — Rocky Linux 9 ║" -echo "╚══════════════════════════════════════════╝" -echo "" + # Vérifier prérequis + command -v ja4ebpf >/dev/null 2>&1 || { + log "Rebuild ja4ebpf..." + cd "$PROJECT/services/ja4ebpf" + GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3 + GOWORK=off CGO_ENABLED=0 go build -o /tmp/ja4ebpf_new ./cmd/ja4ebpf/ && mv /tmp/ja4ebpf_new /usr/local/bin/ja4ebpf + } + command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; } -check_prerequisites -start_clickhouse -setup_nginx -start_ja4ebpf -generate_traffic -verify_db + start_clickhouse -echo "" -echo "════════════════════════════════════════════" -echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT" -if [ "$FAIL_COUNT" -eq 0 ]; then - echo -e " ${GREEN}${BOLD}Tous les tests réussis !${RESET}" - exit 0 -else - echo -e " ${RED}${BOLD}$FAIL_COUNT tests échoués.${RESET}" - echo "Logs ja4ebpf :" - tail -20 /tmp/ja4ebpf.log 2>/dev/null || true - exit 1 -fi + case "$STACK" in + nginx) setup_nginx ;; + apache) setup_apache ;; + hitch-varnish) setup_hitch_varnish ;; + *) fail "Stack inconnue: $STACK"; exit 1 ;; + esac + + start_ja4ebpf + + # Afficher l'IP pour le host + local eth0_ip + eth0_ip=$(get_eth0_ip) + echo "" + echo " ┌─────────────────────────────────────────┐" + echo " │ Services prêts ! │" + echo " │ IP eth0 : $eth0_ip" + echo " │ HTTP : http://$eth0_ip:80" + echo " │ HTTPS : https://$eth0_ip:443" + echo " └─────────────────────────────────────────┘" + echo "" +} + +# ═════════════════════════════════════════════════════════════════════════════ +# Phase 3 : vérification +# ═════════════════════════════════════════════════════════════════════════════ +do_verify() { + echo "" + echo "╔══════════════════════════════════════════╗" + echo "║ Phase 3 : Vérification — $STACK" + echo "╚══════════════════════════════════════════╝" + echo "" + + verify_db + + echo "" + echo "════════════════════════════════════════════" + echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT" + if [ "$FAIL_COUNT" -eq 0 ]; then + echo -e " ${GREEN}${BOLD}$STACK : Tous les tests réussis !${RESET}" + else + echo -e " ${RED}${BOLD}$STACK : $FAIL_COUNT tests échoués${RESET}" + tail -20 /tmp/ja4ebpf.log 2>/dev/null || true + fi +} + +# ═════════════════════════════════════════════════════════════════════════════ +# Main +# ═════════════════════════════════════════════════════════════════════════════ + +case "$MODE" in + start) + do_start + echo " En attente de trafic depuis le host..." + # Attendre que le host génère le trafic + # Le fichier /tmp/ja4ebpf-traffic-done est créé par le host après le trafic + for i in $(seq 1 120); do + [ -f /tmp/ja4ebpf-traffic-done ] && break + sleep 1 + done + do_verify + ;; + verify) + do_verify + ;; + *) + # Mode legacy : tout dans la VM (trafic local uniquement) + # Note : XDP sur eth0 ne capturera PAS le trafic localhost + do_start + log "ATTENTION : le trafic localhost n'est pas capturé par XDP/eth0" + log "Utilisez 'make test-vm-matrix' pour le test complet avec trafic host" + # Générer quand même du trafic pour les uprobes + for path in / /health; do + curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true + done + sleep 10 + do_verify + ;; +esac + +[ "$FAIL_COUNT" -eq 0 ] && exit 0 || exit 1