feat: multi-distro VM tests, ja4ebpf eBPF improvements, bot-detector scoring

ja4ebpf:
- Refactor BPF TC capture with improved SYN offset handling and TCP option parsing
- Enhance TLS uprobe SSL hooking for better key extraction
- Add ClickHouse writer improvements for HTTP log materialized views
- Update RPM spec for Rocky Linux 8/9/10, fix systemd service
- Simplify loader with cleaner bpf2go integration

bot-detector:
- Add H2 SETTINGS per-parameter comparison in browser_matcher
- Enhance browser signatures and scoring pipeline
- Improve preprocessing and cycle detection

infra:
- Multi-distro Vagrantfile (centos8, rocky9, rocky10) with per-distro provisioning
- New Makefile targets: vm-up-all, test-vm-matrix, test-vm-centos8/rocky10
- Add debug helpers and run-test-from-host.sh for host-driven VM testing
- Update run-tests-vm.sh for cross-distro compatibility
- Remove accidental binary blob (\004)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-13 01:09:33 +02:00
parent d81463a589
commit d75825278e
32 changed files with 2148 additions and 890 deletions

109
Makefile
View File

@ -37,10 +37,17 @@ help: ## Affiche cette aide
@echo ""
@echo " Tests VM (eBPF sur kernel réel — nécessite 'make vm-up' d'abord)"
@echo " make vm-up Créer la VM Rocky Linux 9 (vagrant up)"
@echo " make vm-up-all Créer les 3 VMs (centos8/rocky9/rocky10)"
@echo " make vm-down Détruire la VM (vagrant destroy)"
@echo " make vm-ssh Connexion SSH à la VM"
@echo " make test-vm-nginx Test nginx dans la VM (L7 complet)"
@echo " make test-vm-all Tous les tests dans la VM"
@echo " make vm-reprovision Re-provisionner les 3 VMs"
@echo " make test-vm-nginx Test nginx dans la VM Rocky 9"
@echo " make test-vm-apache Test apache dans la VM Rocky 9"
@echo " make test-vm-hitch-varnish Test hitch+varnish dans la VM Rocky 9"
@echo " make test-vm-all Tous les tests (3 stacks) dans la VM Rocky 9"
@echo " make test-vm-centos8 Tous les tests dans la VM CentOS 8"
@echo " make test-vm-rocky10 Tous les tests dans la VM Rocky 10"
@echo " make test-vm-matrix Matrice complète : 3 stacks × 3 distros"
@echo ""
@echo " Tests d'intégration (par stack, Docker — L3/L4/TLS uniquement)"
@echo " make test-all-stacks Toutes les stacks sur Rocky Linux 9"
@ -160,18 +167,26 @@ test-hitch-varnish:
# Répertoire Vagrantfile
VM_DIR := tests/vm
VMS := centos8 rocky9 rocky10
STACKS := nginx apache hitch-varnish
vm-up: ## Créer la VM Rocky Linux 9 pour les tests eBPF
cd $(VM_DIR) && vagrant up
cd $(VM_DIR) && vagrant up rocky9
vm-up-all: ## Créer les 3 VMs (centos8, rocky9, rocky10)
cd $(VM_DIR) && vagrant up centos8 rocky9 rocky10
vm-down: ## Détruire la VM
cd $(VM_DIR) && vagrant destroy -f
vm-ssh: ## Connexion SSH à la VM
cd $(VM_DIR) && vagrant ssh
vm-down-all: ## Détruire toutes les VMs
cd $(VM_DIR) && vagrant destroy -f
vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM (après modifications)
cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \
vm-ssh: ## Connexion SSH à la VM Rocky 9
cd $(VM_DIR) && vagrant ssh rocky9
vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM Rocky 9 (après modifications)
cd $(VM_DIR) && vagrant rsync rocky9 && vagrant ssh rocky9 -- \
'export PATH=/usr/local/go/bin:$$PATH && \
cd /ja4-platform/services/ja4ebpf && \
GOWORK=off go generate ./internal/loader/ && \
@ -179,15 +194,79 @@ vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM (après modifications)
sudo mv /tmp/ja4ebpf /usr/local/bin/ja4ebpf && \
echo "ja4ebpf rebuilt OK"'
test-vm-nginx: ## Test nginx dans la VM (L3/L4/TLS/L7 HTTP complet)
@echo "=== Test VM nginx (kernel réel) ==="
cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \
'sudo bash /ja4-platform/tests/vm/run-tests-vm.sh nginx'
# ── Tests VM : cibles par stack ──────────────────────────────────────────────
test-vm-all: ## Tous les tests dans la VM
@echo "=== Tests VM (toutes stacks) ==="
cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \
'sudo bash /ja4-platform/tests/vm/run-tests-vm.sh all'
test-vm-nginx: ## Test nginx dans la VM Rocky 9 (trafic host → VM)
bash tests/vm/run-test-from-host.sh rocky9 nginx
test-vm-apache: ## Test apache dans la VM Rocky 9
bash tests/vm/run-test-from-host.sh rocky9 apache
test-vm-hitch-varnish: ## Test hitch+varnish dans la VM Rocky 9
bash tests/vm/run-test-from-host.sh rocky9 hitch-varnish
test-vm-all: ## Tous les tests (3 stacks) dans la VM Rocky 9
@for stack in $(STACKS); do \
bash tests/vm/run-test-from-host.sh rocky9 $$stack || true; \
done
# ── Tests VM : cibles par distro ─────────────────────────────────────────────
test-vm-centos8: ## Test nginx dans la VM CentOS 8
bash tests/vm/run-test-from-host.sh centos8 nginx
test-vm-rocky10: ## Test nginx dans la VM Rocky 10
bash tests/vm/run-test-from-host.sh rocky10 nginx
# ── Matrice complète : toutes stacks × toutes distros ────────────────────────
test-vm-matrix: ## Toutes stacks × toutes VMs (nginx/apache/hitch-varnish sur centos8/rocky9/rocky10)
@echo "╔══════════════════════════════════════════════╗"
@echo "║ Matrice VM : 3 stacks × 3 distros ║"
@echo "╚══════════════════════════════════════════════╝"
@TOTAL_FAIL=0; \
for vm in $(VMS); do \
for stack in $(STACKS); do \
bash tests/vm/run-test-from-host.sh $$vm $$stack || TOTAL_FAIL=$$((TOTAL_FAIL + 1)); \
done; \
done; \
echo ""; \
if [ "$$TOTAL_FAIL" -eq 0 ]; then \
echo "=== Matrice complète : SUCCÈS ==="; \
else \
echo "=== Matrice : $$TOTAL_FAIL combinaisons échouées ==="; \
exit 1; \
fi
done; \
echo ""; \
if [ "$$TOTAL_FAIL" -eq 0 ]; then \
echo "=== Matrice complète : SUCCÈS ==="; \
else \
echo "=== Matrice : $$TOTAL_FAIL combinaisons échouées ==="; \
exit 1; \
fi
test-vm-all-distros: ## Tests unitaires Go sur les 3 VMs (centos8 + rocky9 + rocky10)
@echo "=== Tests unitaires multi-distro ==="
@for vm in $(VMS); do \
echo ""; \
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"; \
echo " VM: $$vm"; \
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"; \
cd $(CURDIR)/$(VM_DIR) && vagrant rsync $$vm && vagrant ssh $$vm -- \
'export PATH=/usr/local/go/bin:$$PATH && \
cd /ja4-platform/services/ja4ebpf && \
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -2 && \
GOWORK=off CGO_ENABLED=0 go test ./... 2>&1 | tail -20'; \
echo ""; \
done
@echo "=== Tous les tests multi-distro terminés ==="
vm-reprovision: ## Re-provisionner les 3 VMs (installer nouveaux paquets)
@for vm in $(VMS); do \
echo "Re-provision $$vm..."; \
cd $(CURDIR)/$(VM_DIR) && vagrant rsync $$vm && vagrant provision $$vm; \
done
# ── Matrice multi-distro ─────────────────────────────────────────────────────

View File

@ -1 +1,56 @@
dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/ClickHouse/clickhouse-go v1.5.4 h1:cKjXeYLNWVJIx2J1K6H2CqyRmfwVJVY1OV1coaaFcI0=
github.com/ClickHouse/clickhouse-go v1.5.4/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI=
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cloudflare/golz4 v0.0.0-20150217214814-ef862a3cdc58/go.mod h1:EOBUe0h4xcZ5GoxqC5SDxFQ8gwyZPKQoEzownBlhI80=
github.com/containerd/containerd v1.7.12/go.mod h1:/5OMpE1p0ylxtEUGY8kuCYkDRzJm9NO1TFMWjUpdevk=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/cpuguy83/dockercfg v0.3.1/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/dmarkham/enumer v1.5.9/go.mod h1:e4VILe2b1nYK3JKJpRmNdl5xbDQvELc6tQ8b+GsGk6E=
github.com/docker/docker v25.0.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mkevac/debugcharts v0.0.0-20191222103121-ae1c48aa8615/go.mod h1:Ad7oeElCZqA1Ufj0U9/liOF4BtVepxRcTvr2ey7zTvM=
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=
github.com/pascaldekloe/name v1.0.1/go.mod h1:Z//MfYJnH4jVpQ9wkclwu2I2MkHmXTlT9wR5UZScttM=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shirou/gopsutil/v3 v3.23.12/go.mod h1:1FrWgea594Jp7qmjHUUPlJDTPgcsb9mGnXDxavtikzM=
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/testcontainers/testcontainers-go v0.28.0/go.mod h1:COlDpUXbwW3owtpMkEB1zo9gwb1CoKVKlyrVPejF4AU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q=
go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=
go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM=
google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=

View File

@ -302,16 +302,51 @@ def _compute_family_score(df: pd.DataFrame, family: str) -> pd.Series:
"""Calcule le score de correspondance [0.0, 1.0] pour une famille navigateur.
Score = somme pondérée des 7 dimensions.
Quand has_xff=1 (CDN/proxy), les dimensions H2 sont neutralisées à 0.5
et leur poids (0.70) est redistribué vers HTTP headers (+0.35) et TLS (+0.35).
"""
w = DIMENSION_WEIGHTS
score = (
_d1_h2_settings(df, family) * w["h2_settings"]
+ _d2_h2_window(df, family) * w["h2_window"]
+ _d3_pseudo_order(df, family) * w["pseudo_order"]
+ _d4_h2_priority(df, family) * w["h2_priority"]
+ _d5_http_headers(df, family) * w["http_headers"]
+ _d6_tls_structure(df, family) * w["tls_structure"]
+ _d7_ja4_dict(df, family) * w["ja4_dict"]
has_xff = _col(df, "has_xff").astype(bool)
# Dimensions H2
d_h2_settings = _d1_h2_settings(df, family)
d_h2_window = _d2_h2_window(df, family)
d_pseudo = _d3_pseudo_order(df, family)
d_priority = _d4_h2_priority(df, family)
# Dimensions non-H2
d_headers = _d5_http_headers(df, family)
d_tls = _d6_tls_structure(df, family)
d_ja4 = _d7_ja4_dict(df, family)
# Neutraliser les dimensions H2 à 0.5 derrière CDN (le H2 observé est celui du proxy)
h2_weight_total = w["h2_settings"] + w["h2_window"] + w["pseudo_order"] + w["h2_priority"]
# Redistribuer : chaque dimension non-H2 reçoit une part proportionnelle
# au poids H2 redistribué (0.35 vers headers, 0.35 vers TLS)
http_bonus = h2_weight_total / 2 # 0.35
tls_bonus = h2_weight_total / 2 # 0.35
# Score avec poids normaux (pas CDN)
score_normal = (
d_h2_settings * w["h2_settings"]
+ d_h2_window * w["h2_window"]
+ d_pseudo * w["pseudo_order"]
+ d_priority * w["h2_priority"]
+ d_headers * w["http_headers"]
+ d_tls * w["tls_structure"]
+ d_ja4 * w["ja4_dict"]
)
# Score avec poids redistribués (CDN : H2 neutralisé à 0.5)
score_cdn = (
0.5 * h2_weight_total # H2 dimensions neutralisées
+ d_headers * (w["http_headers"] + http_bonus) # 0.15 + 0.35 = 0.50
+ d_tls * (w["tls_structure"] + tls_bonus) # 0.10 + 0.35 = 0.45
+ d_ja4 * w["ja4_dict"] # 0.05 (inchangé)
)
score = pd.Series(
np.where(has_xff, score_cdn, score_normal),
index=df.index,
)
return score.clip(0.0, 1.0)
@ -414,6 +449,12 @@ def run_browser_matcher(df: pd.DataFrame) -> pd.DataFrame:
df["bm_non_browser"] = non_browser_mask
df["bm_decision"] = decision
# Scores par famille pour le vecteur ML (§3.9.4)
for family in BROWSER_SIGNATURES:
df[f"browser_match_{family.lower()}"] = scores[family].round(4)
df["browser_match_max"] = adjusted_score.round(4)
df["browser_family_detected"] = df["bm_family"]
return df

View File

@ -103,3 +103,63 @@ DIMENSION_WEIGHTS: dict = {
"tls_structure": 0.10,
"ja4_dict": 0.05,
}
# Timestamp du dernier rechargement des signatures depuis ClickHouse.
_last_signature_reload: float = 0.0
_SIGNATURE_RELOAD_INTERVAL: float = 86400.0 # 24 heures
def reload_signatures_from_clickhouse(client) -> bool:
"""§3.9.5 : Recharge les signatures H2 depuis ja4_processing.browser_h2_signatures.
Fusionne les signatures dynamiques (ClickHouse) avec les signatures statiques.
Les signatures dynamiques sont ajoutées ou remplacent les existantes par famille.
Appelé une fois par cycle, mais n'effectue le rechargement que toutes les 24h.
"""
import json
import time as _time
global _last_signature_reload
now = _time.time()
if now - _last_signature_reload < _SIGNATURE_RELOAD_INTERVAL:
return False
try:
df = client.query_df(
"SELECT * FROM ja4_processing.browser_h2_signatures WHERE is_active = 1"
)
if df is None or df.empty:
return False
loaded = 0
for _, row in df.iterrows():
family = str(row.get('family', ''))
if not family:
continue
try:
settings = json.loads(str(row.get('h2_settings_json', '{}')))
forbidden = json.loads(str(row.get('h2_settings_forbidden', '[]')))
tls = json.loads(str(row.get('tls_json', '{}')))
headers_req = json.loads(str(row.get('headers_required', '[]')))
headers_forbid = json.loads(str(row.get('headers_forbidden', '[]')))
except (json.JSONDecodeError, TypeError):
continue
BROWSER_SIGNATURES[family] = {
"h2_settings_exact": {int(k): int(v) for k, v in settings.items()},
"h2_settings_forbidden_keys": [int(x) for x in forbidden],
"h2_window_update": int(row.get('h2_window_update', 0)),
"h2_window_update_tolerance": int(row.get('h2_window_update_tolerance', 1000)),
"h2_priority_frames_expected": bool(row.get('h2_priority_expected', 0)),
"pseudo_header_order": str(row.get('pseudo_header_order', '')),
"tls": tls,
"headers_required": headers_req,
"headers_forbidden": headers_forbid,
}
loaded += 1
_last_signature_reload = now
return loaded > 0
except Exception:
return False

View File

@ -18,6 +18,7 @@ from .infra import get_client, set_healthy
from .preprocessing import preprocess_df, FEATURES, FEATURES_COMPLET
from .pipeline import run_semi_supervised_logic
from .fleet import enrich_with_fleet_score
from .browser_signatures import reload_signatures_from_clickhouse
from .metrics import record_cycle_metrics
@ -120,6 +121,13 @@ def fetch_and_analyze():
client = get_client()
# §3.9.5 — Rechargement périodique des signatures H2 depuis ClickHouse
try:
if reload_signatures_from_clickhouse(client):
log_info('[Signatures] Signatures H2 rechargées depuis browser_h2_signatures.')
except Exception:
pass
# ── Récupération du trafic (fenêtre 1h) ──────────────────────────────────
try:
df = client.query_df(f'SELECT * FROM {DB}.view_ai_features_1h')
@ -171,6 +179,43 @@ def fetch_and_analyze():
except Exception as e:
log_info(f'[Fleet §5] Enrichissement de flotte échoué : {e}')
# §3.9.5 — Queue unknown_h2_fingerprints : sessions H2 inconnues mais navigateur-like
try:
bm_col = 'bm_score' if 'bm_score' in df.columns else None
bc_col = 'browser_confidence' if 'browser_confidence' in df.columns else None
h2_col = 'h2_settings_known' if 'h2_settings_known' in df.columns else None
tls_col = 'tls_version' if 'tls_version' in df.columns else None
if bm_col and h2_col:
# Conditions : H2 inconnu + comportement navigateur + TLS 1.3
unknown_h2_mask = (
(df[h2_col] == 0) # H2 SETTINGS inconnu
& (
(df[bm_col] < 0.45) # browser_matcher ne reconnaît pas
| (bc_col and df[bc_col] >= 0.55) # mais browser_confidence élevé
)
)
if tls_col:
unknown_h2_mask = unknown_h2_mask & (df[tls_col].astype(str).str.startswith('TLSv1.3'))
unknown_h2 = df[unknown_h2_mask]
if not unknown_h2.empty:
n_unknown = len(unknown_h2)
# Insérer les fingerprints inconnus dans la table ClickHouse
client.command(
"INSERT INTO ja4_processing.unknown_h2_fingerprints "
"(observed_at, src_ip, ja4, h2_fingerprint, h2_settings_fp, "
"h2_window_update, h2_pseudo_order, h2_has_priority, "
"browser_confidence_score, header_user_agent, tls_version) "
"SELECT now(), src_ip, ja4, h2_fingerprint, h2_settings_fp, "
"h2_window_update, h2_pseudo_order, h2_has_priority, "
"browser_confidence, header_user_agent, tls_version "
"FROM input"
)
log_info(f'[H2 Queue] {n_unknown} fingerprint(s) H2 inconnu(s) mis en file d\'examen.')
except Exception as e:
log_info(f'[H2 Queue] Erreur insertion unknown_h2_fingerprints : {e}')
# ── Résumé des données chargées ───────────────────────────────────────────
n_total = len(df)
n_correlated = int((df.get('correlated', pd.Series()) == 1).sum())

View File

@ -140,6 +140,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
# XGBoost supervisé — troisième voix (si labels historiques disponibles)
unknown_traffic['xgb_prob'] = 0.0
xgb_model_ref = None # Référence pour SHAP TreeExplainer (§2.4.5)
if XGB_AVAILABLE and XGB_WEIGHT > 0:
try:
xgb_client = get_client()
@ -150,6 +151,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
X_xgb = unknown_traffic[xgb_cols].replace([np.inf, -np.inf], np.nan).fillna(0)
xgb_probs = xgb_model.predict_proba(X_xgb.values)[:, 1]
unknown_traffic['xgb_prob'] = xgb_probs
xgb_model_ref = xgb_model
log_info(f"[{name}] XGBoost : xgb_mean={xgb_probs.mean():.4f}")
except Exception as exc:
log_info(f"[{name}] XGBoost scoring échoué : {exc} — EIF+AE seuls.")
@ -187,9 +189,9 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
except Exception as exc:
log_info(f"[{name}] MetaLearner entraînement échoué : {exc}")
# §7 — ExIFFI : importance de features pour l'EIF (quand SHAP désactivé)
# §7 — ExIFFI : importance de features pour l'EIF (toujours actif en complément de SHAP)
exiffi_tops: list = [{}] * len(unknown_traffic)
if not ENABLE_SHAP and len(unknown_traffic) > 0:
if len(unknown_traffic) > 0:
try:
exiffi_tops = compute_exiffi_importance(model, X_test, scoring_features)
except Exception:
@ -376,9 +378,10 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
log_info(f"[{name}] ALERT: {len(anomalies)} anomalies détectées (seuil={effective_threshold:.4f}).")
anomalies['recurrence'] = anomalies['src_ip'].map(recurrence_map).fillna(0).astype(int) + 1
# A4 — Explainabilité SHAP : top features responsables de chaque anomalie
# A4 — Explainabilité SHAP : TreeExplainer sur XGBoost si dispo, sinon EIF
X_anomalies = X_test.loc[anomalies.index]
shap_tops = compute_shap_top_features(model, X_anomalies, valid_features)
shap_tops = compute_shap_top_features(model, X_anomalies, valid_features,
xgb_model=xgb_model_ref)
# §7 — ExIFFI : utiliser les tops ExIFFI précalculés quand SHAP est inactif
# Construire un mapping index → exiffi_top pour accès rapide

View File

@ -44,6 +44,8 @@ FEATURES = [
'host_diversity', 'host_sweep_speed', 'host_coverage_uniformity',
# §5.8b — Similarité Jaccard cross-domaine (chemins partagés entre hosts)
'cross_domain_path_similarity',
# §5.4 — Resource Dependency Tree (cascade de chargement)
'root_to_first_asset_delay', 'asset_load_stddev',
# P0+P1 : features sous-exploitées (SQL existant ou ajouté)
'is_fake_navigation',
'true_window_size', 'window_mss_ratio',
@ -59,6 +61,9 @@ FEATURES = [
'h2_order_chromesafari', 'h2_order_firefox',
# §3 — Score de cohérence de fingerprint cross-layer
'fingerprint_coherence_score',
# §3.9.4 — Browser matcher scores (passif H2)
'browser_match_chrome', 'browser_match_firefox', 'browser_match_safari',
'browser_match_max',
]
# Features supplémentaires pour le modèle Complet (données TCP/TLS requises)
@ -103,6 +108,11 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
# browser_confidence jusqu'à la validation complète.
if BROWSER_MATCHER_ENABLED:
df = run_browser_matcher(df)
else:
# Colonnes par défaut quand le matcher est désactivé
for col in ['browser_match_chrome', 'browser_match_firefox', 'browser_match_safari',
'browser_match_max', 'browser_family_detected']:
df[col] = 0.0 if col != 'browser_family_detected' else ''
# Rétro-compatibilité
df['is_known_browser'] = browser_axes['axis_ja4_known'].astype(int)

View File

@ -248,25 +248,48 @@ def normalize_scores(scores: np.ndarray) -> np.ndarray:
# ═══════════════════════════════════════════════════════════════════════════════
def compute_shap_top_features(model, X: pd.DataFrame, features: list,
n_top: int = 5) -> list:
n_top: int = 5, xgb_model=None) -> list:
"""
Calcule les valeurs SHAP pour chaque ligne de X et retourne les n_top features
les plus contributives (valeur SHAP la plus négative = plus responsable de l'anomalie).
Retourne une liste de dicts {feature: shap_value} par ligne.
Calcule les valeurs SHAP et retourne les n_top features les plus contributives.
Utilise TreeExplainer pour sklearn, et un échantillon Permutation pour isotree.
Stratégie par modèle (conforme à la thèse §2.4.5) :
- XGBoost : TreeExplainer (O(TLD²), exact et efficace)
- EIF (sklearn) : TreeExplainer natif
- EIF (isotree) : PermutationExplainer
Si xgb_model est fourni, utilise TreeExplainer sur XGBoost en priorité.
Sinon, utilise l'Explainer adapté au modèle EIF.
Retourne une liste de dicts {feature: shap_value} par ligne.
"""
if not ENABLE_SHAP or X.empty:
return [{}] * len(X)
# Priorité XGBoost : TreeExplainer est optimal pour les modèles à base d'arbres
if xgb_model is not None:
try:
explainer = _shap.TreeExplainer(xgb_model)
shap_values = explainer.shap_values(X[features].fillna(0))
if isinstance(shap_values, list):
shap_values = shap_values[1] if len(shap_values) > 1 else shap_values[0]
result = []
for sv in shap_values:
pairs = sorted(zip(features, sv), key=lambda x: abs(x[1]), reverse=True)
result.append({f: round(float(v), 4) for f, v in pairs[:n_top]})
return result
except Exception as e:
log_info(f"[SHAP] TreeExplainer XGBoost échoué ({e}), fallback EIF")
# Fallback EIF
try:
if EIF_AVAILABLE:
sample_size = min(100, len(X))
X_sample = X.sample(n=sample_size, random_state=42) if len(X) > sample_size else X
X_sample = X[features].sample(n=sample_size, random_state=42) if len(X) > sample_size else X[features]
explainer = _shap.Explainer(model.decision_function, X_sample)
shap_values = explainer(X).values
shap_values = explainer(X[features].fillna(0)).values
else:
explainer = _shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
shap_values = explainer.shap_values(X[features].fillna(0))
result = []
for sv in shap_values:
pairs = sorted(zip(features, sv), key=lambda x: x[1])

View File

@ -20,13 +20,17 @@
# =============================================================================
ARG BUILD_VERSION=dev
ARG GO_VERSION=1.24
ARG GO_VERSION=1.24.3
# ── Stage 1 : compilation Go ──────────────────────────────────────────────
FROM rockylinux:9 AS go-builder
ARG BUILD_VERSION
ARG GO_VERSION
RUN dnf install -y epel-release dnf-plugins-core && \
dnf config-manager --enable crb && \
dnf install -y --allowerasing \
clang llvm libbpf-devel bpftool \
curl tar gzip && \
dnf clean all

View File

@ -45,11 +45,15 @@ struct tcp_syn_event {
/* ---------------------------------------------------------------------------
* Événement TLS ClientHello : émis quand un ClientHello TLS est détecté
*
* IMPORTANT : le payload est à l'offset 0 pour que bpf_skb_load_bytes()
* puisse écrire directement au début du map value (compatible kernel 4.18).
* Les métadonnées sont placées APRÈS le payload.
* ---------------------------------------------------------------------------*/
struct tls_hello_event {
__u32 src_ip; /* adresse source (host byte order, via bpf_ntohl) */
__u8 payload[2048]; /* payload ClientHello brut (offset 0) */
__u32 src_ip; /* adresse source (host byte order) */
__u16 src_port; /* port source (host byte order) */
__u8 payload[2048]; /* payload ClientHello brut (capturé jusqu'à 2048 octets) */
__u16 payload_len; /* longueur effective du payload */
__u64 timestamp_ns; /* horodatage kernel */
} __attribute__((packed));
@ -80,16 +84,14 @@ struct accept_event {
} __attribute__((packed));
/* ---------------------------------------------------------------------------
* Événement HTTP en clair : émis pour chaque segment TCP porteur d'un
* payload HTTP (port 80 ou 8080). Un seul segment par requête est capturé
* (le premier, qui contient la request-line et les en-têtes).
* Événement HTTP en clair : payload à l'offset 0 pour compat kernel 4.18.
* ---------------------------------------------------------------------------*/
struct http_plain_event {
__u8 payload[4096]; /* payload TCP brut (offset 0) */
__u32 src_ip; /* adresse source (host byte order) */
__u32 dst_ip; /* adresse destination (host byte order) */
__u16 src_port; /* port source (host byte order) */
__u16 dst_port; /* port destination 80 ou 8080 */
__u8 payload[4096]; /* payload TCP brut (request-line + headers) */
__u16 payload_len; /* longueur effective du payload copié */
__u64 timestamp_ns; /* horodatage kernel */
} __attribute__((packed));
@ -124,35 +126,65 @@ struct accept_key {
* Déclarations des maps eBPF avec annotations BTF
* ===========================================================================*/
/* Ring buffer : événements TCP SYN (16 MB) */
/* Perf event array : événements TCP SYN (kernel 4.4+) */
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
} rb_tcp_syn SEC(".maps");
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} pb_tcp_syn SEC(".maps");
/* Ring buffer : événements TLS ClientHello (16 MB) */
/* Perf event array : événements TLS ClientHello (kernel 4.4+) */
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
} rb_tls_hello SEC(".maps");
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} pb_tls_hello SEC(".maps");
/* Ring buffer : données SSL déchiffrées (64 MB, plus volumineux) */
/* Perf event array : données SSL déchiffrées (kernel 4.4+) */
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 26);
} rb_ssl_data SEC(".maps");
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} pb_ssl_data SEC(".maps");
/* Ring buffer : événements accept4 (4 MB) */
/* Perf event array : événements accept4 (kernel 4.4+) */
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 22);
} rb_accept SEC(".maps");
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} pb_accept SEC(".maps");
/* Ring buffer : payload HTTP en clair port 80/8080 (32 MB) */
/* Perf event array : payload HTTP en clair port 80/8080 (kernel 4.4+) */
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 25);
} rb_http_plain SEC(".maps");
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} pb_http_plain SEC(".maps");
/* ── PERCPU_ARRAY temporaires pour les structs > 512o (stack eBPF) ──── */
/* TLS hello event : 2064 octets, ne tient pas sur la stack */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, struct tls_hello_event);
} __tls_buf SEC(".maps");
/* HTTP plain event : 4118 octets */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, struct http_plain_event);
} __http_buf SEC(".maps");
/* SSL data event : 4131 octets */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, struct ssl_data_event);
} __ssl_buf SEC(".maps");
/* Hash map : pid_tgid → ssl_read_args (arguments SSL_read entry) */
struct {

View File

@ -1,16 +1,18 @@
/* ============================================================================
* tc_capture.c — Programme XDP ingress : capture des TCP SYN et TLS ClientHello
* tc_capture.c — Programme TC ingress : capture des TCP SYN, TLS ClientHello
* et HTTP en clair
*
* Remplace l'ancienne version TC (SCHED_CLS + TCX) par un hook XDP compatible
* depuis le kernel 4.8. Utilisé en mode XDP_GENERIC sur Rocky Linux 9 (5.14).
* Hook TC ingress (clsact qdisc) compatible kernel 4.1+.
* Émet via bpf_perf_event_output() (kernel 4.4+) pour compatibilité maximale.
*
* Conventions vérificateur eBPF :
* - Tous les accès mémoire paquet utilisent de l'arithmétique de pointeur
* directe avec bornes explicites (data / data_end).
* - Les copies de longueur variable utilisent des boucles bornées (sans
* #pragma unroll) : le vérificateur kernel ≥ 5.3 les accepte nativement.
* - Les options TCP sont copiées brutes ; MSS et Window Scale sont extraits
* côté Go (userspace) depuis le tableau tcp_options_raw.
* IMPORTANT : Ce programme n'utilise AUCUN accès direct au paquet (data/data_end).
* Toutes les lectures se font via bpf_skb_load_bytes() (kernel 4.5+) avec des
* tailles constantes, pour compatibilité avec le vérificateur kernel 4.18 qui
* rejette "math between pkt pointer and register with unbounded min value".
*
* Les copies de payload utilisent bpf_skb_load_bytes() avec &= (2^n - 1)
* pour borner la taille per le vérificateur.
* Les structs > 512o utilisent un PERCPU_ARRAY temporaire (stack limit eBPF).
* ============================================================================ */
#include "vmlinux.h"
@ -19,219 +21,248 @@
#include <bpf/bpf_core_read.h>
#include "bpf_types.h"
/* Constantes Ethernet */
/* Constantes */
#define ETH_P_IP 0x0800
#define ETH_HLEN 14
/* Constantes IP */
#define IPPROTO_TCP 6
#define IP_DF 0x4000
/* Constantes TCP */
#define TH_SYN 0x02
#define TH_ACK 0x10
#define TH_FIN 0x01
#define TH_RST 0x04
/* Ports */
#define HTTPS_PORT 443
#define HTTP_PORT 80
#define HTTP_ALT_PORT 8080
/* TLS */
#define TLS_CONTENT_HANDSHAKE 0x16
#define TLS_MSG_CLIENT_HELLO 0x01
/* Tailles maximales des payloads copiés */
#define MAX_TLS_PAYLOAD 2048
#define MAX_HTTP_PAYLOAD 1024
#define MAX_TCP_OPTIONS 40
/* Structure Ethernet locale (évite d'inclure linux/if_ether.h) */
struct ethhdr_local {
__u8 h_dest[6];
__u8 h_source[6];
__be16 h_proto;
} __attribute__((packed));
/* Counter map for debug */
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 7);
__type(key, __u32);
__type(value, __u64);
} tc_stats SEC(".maps");
#define STAT_TOTAL 0
#define STAT_IPV4 1
#define STAT_TCP 2
#define STAT_SYN 3
#define STAT_SYN_SUBMIT 4
#define STAT_TLS_SUBMIT 5
#define STAT_HTTP_SUBMIT 6
/* ---------------------------------------------------------------------------
* capture_xdp — Point d'entrée XDP ingress
* capture_tc — Point d'entrée TC ingress (clsact)
*
* Observe chaque paquet ingress en lecture seule (retourne toujours XDP_PASS).
* Émet des événements vers les ring buffers pour TCP SYN, TLS ClientHello
* et les payloads HTTP en clair.
* AUCUN accès direct au paquet. Tout via bpf_skb_load_bytes() + tailles constantes.
* Compatible vérificateur kernel 4.18.
* ---------------------------------------------------------------------------*/
SEC("xdp")
int capture_xdp(struct xdp_md *ctx)
SEC("tc")
int capture_tc(struct __sk_buff *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
__u32 key;
__u64 *cnt;
__u32 pkt_len = ctx->len;
/* --- Ethernet --- */
struct ethhdr_local *eth = data;
if ((void *)(eth + 1) > data_end)
return XDP_PASS;
if (bpf_ntohs(eth->h_proto) != ETH_P_IP)
return XDP_PASS;
key = STAT_TOTAL;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
/* --- IPv4 --- */
struct iphdr *ip = data + ETH_HLEN;
if ((void *)(ip + 1) > data_end)
return XDP_PASS;
if (ip->protocol != IPPROTO_TCP)
return XDP_PASS;
/* --- Ethernet : vérifier type IPv4 --- */
if (pkt_len < ETH_HLEN + 20 + 20)
return TC_ACT_OK;
__u32 ihl = ip->ihl & 0x0F;
if (ihl < 5)
return XDP_PASS;
__u32 ip_hlen = ihl << 2; /* ∈ [20, 60] */
__be16 h_proto;
bpf_skb_load_bytes(ctx, 12, &h_proto, 2);
if (h_proto != bpf_htons(ETH_P_IP))
return TC_ACT_OK;
__u32 src_ip = ip->saddr;
__u32 dst_ip = ip->daddr;
__u8 ttl = ip->ttl;
__u16 ip_id = bpf_ntohs(ip->id);
__u16 frag_off = bpf_ntohs(ip->frag_off);
/* --- IPv4 : lire le header (20 octets min) --- */
key = STAT_IPV4;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
struct iphdr iph;
bpf_skb_load_bytes(ctx, ETH_HLEN, &iph, sizeof(iph));
if (iph.protocol != IPPROTO_TCP)
return TC_ACT_OK;
__u32 ihl = iph.ihl & 0x0F;
if (ihl < 5 || ihl > 15)
return TC_ACT_OK;
__u32 ip_hlen = ihl << 2;
if (ip_hlen < 20 || ip_hlen > 60)
return TC_ACT_OK;
__u32 src_ip = iph.saddr;
__u32 dst_ip = iph.daddr;
__u8 ttl = iph.ttl;
__u16 ip_id = bpf_ntohs(iph.id);
__u16 frag_off = bpf_ntohs(iph.frag_off);
__u8 df_bit = (frag_off & IP_DF) ? 1 : 0;
/* --- TCP à offset variable --- */
struct tcphdr *tcp = (void *)ip + ip_hlen;
if ((void *)(tcp + 1) > data_end) /* valide tcp[0..19] */
return XDP_PASS;
/* --- TCP : lire le header (20 octets) --- */
__u32 tcp_off = ETH_HLEN + ip_hlen;
if (pkt_len < tcp_off + 20)
return TC_ACT_OK;
__u16 src_port = bpf_ntohs(tcp->source);
__u16 dst_port = bpf_ntohs(tcp->dest);
__u16 window = bpf_ntohs(tcp->window);
key = STAT_TCP;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
struct tcphdr tcph;
bpf_skb_load_bytes(ctx, tcp_off, &tcph, sizeof(tcph));
__u16 src_port = bpf_ntohs(tcph.source);
__u16 dst_port = bpf_ntohs(tcph.dest);
__u16 window = bpf_ntohs(tcph.window);
/* Flags via les champs de bits du struct (sûr pour le vérificateur) */
__u8 tcp_flags = 0;
if (tcp->syn) tcp_flags |= TH_SYN;
if (tcp->ack) tcp_flags |= TH_ACK;
if (tcp->fin) tcp_flags |= TH_FIN;
if (tcp->rst) tcp_flags |= TH_RST;
if (tcph.syn) tcp_flags |= TH_SYN;
if (tcph.ack) tcp_flags |= TH_ACK;
if (tcph.fin) tcp_flags |= TH_FIN;
if (tcph.rst) tcp_flags |= TH_RST;
__u32 doff = tcp->doff;
if (doff < 5)
return XDP_PASS;
__u32 tcp_hlen = doff << 2; /* ∈ [20, 60] */
__u32 doff = tcph.doff;
if (doff < 5 || doff > 15)
return TC_ACT_OK;
__u32 tcp_hlen = doff << 2;
if (tcp_hlen < 20 || tcp_hlen > 60)
return TC_ACT_OK;
/* Offset du payload applicatif */
void *payload = (void *)tcp + tcp_hlen;
__u32 payload_off = ETH_HLEN + ip_hlen + tcp_hlen;
/* ===================================================================
* TCP SYN : extraction des paramètres L3/L4
* TCP SYN
* ===================================================================*/
if ((tcp_flags & TH_SYN) && !(tcp_flags & TH_ACK)) {
struct tcp_syn_event *evt =
bpf_ringbuf_reserve(&rb_tcp_syn, sizeof(*evt), 0);
if (!evt)
return XDP_PASS;
key = STAT_SYN;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
evt->src_ip = bpf_ntohl(src_ip);
evt->dst_ip = bpf_ntohl(dst_ip);
evt->src_port = src_port;
evt->dst_port = dst_port;
evt->ttl = ttl;
evt->df_bit = df_bit;
evt->ip_id = ip_id;
evt->window_size = window;
evt->window_scale = 0xFF; /* défaut = absent */
evt->mss = 0;
evt->timestamp_ns = bpf_ktime_get_ns();
evt->tcp_options_len = 0;
struct tcp_syn_event evt = {};
evt.src_ip = bpf_ntohl(src_ip);
evt.dst_ip = bpf_ntohl(dst_ip);
evt.src_port = src_port;
evt.dst_port = dst_port;
evt.ttl = ttl;
evt.df_bit = df_bit;
evt.ip_id = ip_id;
evt.window_size = window;
evt.window_scale = 0xFF;
evt.mss = 0;
evt.timestamp_ns = bpf_ktime_get_ns();
evt.tcp_options_len = 0;
/* Copie brute des options TCP (MSS/WS extraits en userspace Go).
* Boucle bornée à MAX_TCP_OPTIONS = 40 itérations : triviale pour
* le vérificateur kernel ≥ 5.3, sans #pragma unroll. */
__u8 *opts_start = (__u8 *)(tcp + 1); /* après les 20 octets fixes */
__u32 opts_len = tcp_hlen - 20; /* ∈ [0, 40] */
if (opts_len > MAX_TCP_OPTIONS)
opts_len = MAX_TCP_OPTIONS;
if (opts_len > 0) {
#pragma clang loop unroll(disable)
for (__u32 i = 0; i < MAX_TCP_OPTIONS; i++) {
if (i >= opts_len)
break;
if (opts_start + i + 1 > (__u8 *)data_end)
break;
evt->tcp_options_raw[i] = opts_start[i];
}
evt->tcp_options_len = (__u8)opts_len;
/* Copie des options TCP via bpf_skb_load_bytes avec taille constante.
* On lit MAX_TCP_OPTIONS=40 octets depuis le début des options.
* Si le paquet est trop court, l'appel échoue → options absentes. */
__u32 opts_off = tcp_off + 20;
__u32 opts_len = tcp_hlen - 20;
if (opts_len > 0 && opts_len <= MAX_TCP_OPTIONS &&
opts_off + MAX_TCP_OPTIONS <= pkt_len) {
bpf_skb_load_bytes(ctx, opts_off, evt.tcp_options_raw, MAX_TCP_OPTIONS);
evt.tcp_options_len = (__u8)opts_len;
}
bpf_ringbuf_submit(evt, 0);
bpf_perf_event_output(ctx, &pb_tcp_syn, BPF_F_CURRENT_CPU,
&evt, sizeof(evt));
key = STAT_SYN_SUBMIT;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
}
/* ===================================================================
* TLS ClientHello (port 443)
* ===================================================================*/
if (dst_port == HTTPS_PORT) {
/* Au moins 6 octets pour l'en-tête TLS record + type message */
if (payload + 6 > data_end)
return XDP_PASS;
/* Lire les 6 premiers octets du payload pour vérifier le type TLS */
if (payload_off + 6 > pkt_len)
return TC_ACT_OK;
__u8 tls_type = ((__u8 *)payload)[0];
__u8 tls_msg_type = ((__u8 *)payload)[5];
if (tls_type != TLS_CONTENT_HANDSHAKE || tls_msg_type != TLS_MSG_CLIENT_HELLO)
return XDP_PASS;
__u8 tls_hdr[6];
bpf_skb_load_bytes(ctx, payload_off, tls_hdr, 6);
__u32 avail = (__u8 *)data_end - (__u8 *)payload;
/* avail ≥ 6 (vérifié ci-dessus), on plafonne à MAX_TLS_PAYLOAD */
if (tls_hdr[0] != TLS_CONTENT_HANDSHAKE || tls_hdr[5] != TLS_MSG_CLIENT_HELLO)
return TC_ACT_OK;
/* Avail via pkt_len (scalaire pur) */
__u32 avail = 0;
if (pkt_len > payload_off) {
avail = pkt_len - payload_off;
if (avail > MAX_TLS_PAYLOAD)
avail = MAX_TLS_PAYLOAD;
/* Barrière compilateur : coupe le lien CSE entre avail et (data_end - payload).
* Sans cette barrière, clang génère un test "PTR_TO_PACKET <<= 32" (compare
* data_end == payload pour l'entrée de boucle) que le vérificateur eBPF rejette.
* La barrière force une comparaison scalaire (avail == 0) à la place. */
asm volatile("" : "+r"(avail));
}
if (avail == 0)
return TC_ACT_OK;
struct tls_hello_event *tls_evt =
bpf_ringbuf_reserve(&rb_tls_hello, sizeof(*tls_evt), 0);
__u32 zero = 0;
struct tls_hello_event *tls_evt = bpf_map_lookup_elem(&__tls_buf, &zero);
if (!tls_evt)
return XDP_PASS;
return TC_ACT_OK;
tls_evt->src_ip = 0;
tls_evt->src_port = 0;
tls_evt->payload_len = 0;
tls_evt->timestamp_ns = 0;
tls_evt->src_ip = bpf_ntohl(src_ip);
tls_evt->src_port = src_port;
tls_evt->timestamp_ns = bpf_ktime_get_ns();
tls_evt->payload_len = (__u16)avail;
/* Copie bornée du payload TLS.
* Pour tout i < avail : payload + i < payload + avail ≤ data_end.
* Le vérificateur kernel ≥ 5.3 peut vérifier cette boucle sans unroll. */
__u8 *src = (__u8 *)payload;
#pragma clang loop unroll(disable)
for (__u32 i = 0; i < MAX_TLS_PAYLOAD; i++) {
if (i >= avail)
break;
if (src + i + 1 > (__u8 *)data_end)
break;
tls_evt->payload[i] = src[i];
}
/* Copie via bpf_skb_load_bytes avec taille constante 256.
* Kernel 4.18 ne supporte pas les tailles variables vers map values.
* 256 octets capture le ClientHello dans la majorité des cas. */
if (bpf_skb_load_bytes(ctx, payload_off, tls_evt, 256))
return TC_ACT_OK;
bpf_ringbuf_submit(tls_evt, 0);
return XDP_PASS;
bpf_perf_event_output(ctx, &pb_tls_hello, BPF_F_CURRENT_CPU,
tls_evt, sizeof(*tls_evt));
key = STAT_TLS_SUBMIT;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
return TC_ACT_OK;
}
/* ===================================================================
* HTTP en clair (port 80 / 8080)
* ===================================================================*/
if (dst_port == HTTP_PORT || dst_port == HTTP_ALT_PORT) {
/* Ignorer SYN, FIN, RST : seuls les segments de données */
if (tcp_flags & (TH_SYN | TH_FIN | TH_RST))
return XDP_PASS;
if (payload >= data_end)
return XDP_PASS;
return TC_ACT_OK;
if (payload_off >= pkt_len)
return TC_ACT_OK;
__u32 avail = (__u8 *)data_end - (__u8 *)payload;
/* Avail via pkt_len (scalaire pur) */
__u32 avail = 0;
if (pkt_len > payload_off) {
avail = pkt_len - payload_off;
if (avail > MAX_HTTP_PAYLOAD)
avail = MAX_HTTP_PAYLOAD;
/* Même barrière que pour la section TLS : force comparaison scalaire. */
asm volatile("" : "+r"(avail));
}
if (avail == 0)
return TC_ACT_OK;
struct http_plain_event *h_evt =
bpf_ringbuf_reserve(&rb_http_plain, sizeof(*h_evt), 0);
__u32 zero = 0;
struct http_plain_event *h_evt = bpf_map_lookup_elem(&__http_buf, &zero);
if (!h_evt)
return XDP_PASS;
return TC_ACT_OK;
h_evt->src_ip = 0;
h_evt->dst_ip = 0;
h_evt->src_port = 0;
h_evt->dst_port = 0;
h_evt->payload_len = 0;
h_evt->timestamp_ns = 0;
h_evt->src_ip = bpf_ntohl(src_ip);
h_evt->dst_ip = bpf_ntohl(dst_ip);
@ -240,21 +271,19 @@ int capture_xdp(struct xdp_md *ctx)
h_evt->timestamp_ns = bpf_ktime_get_ns();
h_evt->payload_len = (__u16)avail;
__u8 *src = (__u8 *)payload;
#pragma clang loop unroll(disable)
for (__u32 i = 0; i < MAX_HTTP_PAYLOAD; i++) {
if (i >= avail)
break;
if (src + i + 1 > (__u8 *)data_end)
break;
h_evt->payload[i] = src[i];
/* Taille constante 256 pour compatibilité vérificateur kernel 4.18 */
if (bpf_skb_load_bytes(ctx, payload_off, h_evt, 256))
return TC_ACT_OK;
bpf_perf_event_output(ctx, &pb_http_plain, BPF_F_CURRENT_CPU,
h_evt, sizeof(*h_evt));
key = STAT_HTTP_SUBMIT;
cnt = bpf_map_lookup_elem(&tc_stats, &key);
if (cnt) (*cnt)++;
}
bpf_ringbuf_submit(h_evt, 0);
}
return XDP_PASS;
return TC_ACT_OK;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -4,6 +4,9 @@
* et corrige l'association socket ↔ SSL* via les tracepoints syscalls/accept4.
* Les tracepoints sont plus stables que les kprobes car ils ne dépendent pas
* du nom manglé __x64_sys_accept4 (variable selon la version du kernel).
*
* Utilise bpf_perf_event_output() (kernel 4.4+) pour compatibilité maximale.
* Les structs > 512o utilisent un PERCPU_ARRAY temporaire (__ssl_buf).
* ============================================================================ */
#include "vmlinux.h"
@ -105,7 +108,8 @@ int uprobe_ssl_read_entry(struct pt_regs *ctx)
/* ===========================================================================
* uretprobe_ssl_read_exit — Retour de SSL_read
*
* Lit le buffer déchiffré et l'émet dans rb_ssl_data.
* Lit le buffer déchiffré et l'émet via perf_event_output.
* Struct ssl_data_event = 4131 octets → PERCPU_ARRAY temporaire (__ssl_buf).
* ===========================================================================*/
SEC("uretprobe/SSL_read")
int uretprobe_ssl_read_exit(struct pt_regs *ctx)
@ -124,12 +128,21 @@ int uretprobe_ssl_read_exit(struct pt_regs *ctx)
return 0;
}
/* Allouer un slot dans le ring buffer */
struct ssl_data_event *evt = bpf_ringbuf_reserve(&rb_ssl_data, sizeof(*evt), 0);
/* Utiliser le buffer PERCPU (struct trop grande pour la stack) */
__u32 zero = 0;
struct ssl_data_event *evt = bpf_map_lookup_elem(&__ssl_buf, &zero);
if (!evt) {
bpf_map_delete_elem(&ssl_args_map, &pid_tgid);
return 0;
}
/* Initialiser les champs fixes (data sera écrasé par probe_read_user) */
evt->pid_tgid = 0;
evt->fd = 0;
evt->src_ip = 0;
evt->src_port = 0;
evt->data_len = 0;
evt->timestamp_ns = 0;
evt->direction = 0;
evt->pid_tgid = pid_tgid;
evt->direction = 0; /* lecture = client vers serveur */
@ -154,7 +167,8 @@ int uretprobe_ssl_read_exit(struct pt_regs *ctx)
evt->src_port = 0;
}
bpf_ringbuf_submit(evt, 0);
bpf_perf_event_output(ctx, &pb_ssl_data, BPF_F_CURRENT_CPU,
evt, sizeof(*evt));
bpf_map_delete_elem(&ssl_args_map, &pid_tgid);
return 0;
@ -181,7 +195,8 @@ int kprobe_accept4_entry(struct sys_enter_accept4_ctx *ctx)
* kretprobe_accept4_exit — Retour de accept4 via tracepoint syscalls
*
* Lit la sockaddr_in pour extraire src_ip:src_port du client,
* peuple accept_map et fd_conn_map, et émet dans rb_accept.
* peuple accept_map et fd_conn_map, et émet via perf_event_output.
* Struct accept_event = 26 octets → tient sur la stack (< 512o).
* ===========================================================================*/
SEC("tracepoint/syscalls/sys_exit_accept4")
int kretprobe_accept4_exit(struct sys_exit_accept4_ctx *ctx)
@ -238,21 +253,11 @@ int kretprobe_accept4_exit(struct sys_exit_accept4_ctx *ctx)
};
bpf_map_update_elem(&fd_conn_map, &fd, &conn_info, BPF_ANY);
/* Émettre dans rb_accept */
struct accept_event *out = bpf_ringbuf_reserve(&rb_accept, sizeof(*out), 0);
if (!out)
return 0;
out->pid_tgid = pid_tgid;
out->fd = fd;
out->src_ip = src_ip;
out->src_port = src_port;
out->timestamp_ns = aevt.timestamp_ns;
bpf_ringbuf_submit(out, 0);
/* Émettre via perf_event_output (struct 26o → sur la stack) */
bpf_perf_event_output(ctx, &pb_accept, BPF_F_CURRENT_CPU,
&aevt, sizeof(aevt));
return 0;
}
char LICENSE[] SEC("license") = "GPL";

View File

@ -10,6 +10,8 @@ import (
"log"
"os"
"os/signal"
"strings"
"sync/atomic"
"syscall"
"time"
@ -18,7 +20,7 @@ import (
"github.com/antitbone/ja4/ja4ebpf/internal/parser"
"github.com/antitbone/ja4/ja4ebpf/internal/procutil"
"github.com/antitbone/ja4/ja4ebpf/internal/writer"
"github.com/cilium/ebpf/ringbuf"
"github.com/cilium/ebpf/perf"
"gopkg.in/yaml.v3"
)
@ -32,6 +34,7 @@ var fdCache = procutil.NewFDCache(5 * time.Second)
type Config struct {
Interface string `yaml:"interface"` // interface réseau à surveiller (ex: "eth0")
SSLLibPath string `yaml:"ssl_lib_path"` // chemin vers libssl (ex: "/usr/lib64/libssl.so.3")
Debug bool `yaml:"debug"` // mode debug : dump compteurs BPF, log verbeux, ClickHouse optionnel
ClickHouse struct {
DSN string `yaml:"dsn"` // DSN ClickHouse natif
@ -87,6 +90,9 @@ func loadConfig(path string) (*Config, error) {
if v := os.Getenv("JA4EBPF_CLICKHOUSE_DSN"); v != "" {
cfg.ClickHouse.DSN = v
}
if v := os.Getenv("JA4EBPF_DEBUG"); v != "" {
cfg.Debug = strings.EqualFold(v, "true") || v == "1" || v == "yes"
}
return cfg, nil
}
@ -104,7 +110,10 @@ func main() {
log.Fatalf("erreur chargement configuration: %v", err)
}
log.Printf("[ja4ebpf] démarrage — interface=%s ssl=%s", cfg.Interface, cfg.SSLLibPath)
if cfg.Debug {
log.Printf("[ja4ebpf] MODE DEBUG ACTIVÉ")
}
log.Printf("[ja4ebpf] démarrage — interface=%s ssl=%s debug=%v", cfg.Interface, cfg.SSLLibPath, cfg.Debug)
// Contexte principal avec annulation sur signal système
ctx, cancel := context.WithCancel(context.Background())
@ -122,9 +131,11 @@ func main() {
defer ldr.Close()
// --- 2. Attachement TC ingress ---
log.Printf("[ja4ebpf] attachement TC ingress sur %s...", cfg.Interface)
if err := ldr.AttachTC(cfg.Interface); err != nil {
log.Fatalf("erreur attachement TC sur %s: %v", cfg.Interface, err)
}
log.Printf("[ja4ebpf] TC ingress attaché sur %s", cfg.Interface)
// --- 3. Attachement uprobes SSL ---
if err := ldr.AttachUprobes(cfg.SSLLibPath); err != nil {
@ -144,26 +155,46 @@ func main() {
defer mgr.Close()
// --- 6. Writer ClickHouse ---
var w *writer.ClickHouseWriter
flushInterval := time.Duration(cfg.ClickHouse.FlushSecs) * time.Second
w, err := writer.NewClickHouseWriter(cfg.ClickHouse.DSN, cfg.ClickHouse.BatchSize, flushInterval)
w, err = writer.NewClickHouseWriter(cfg.ClickHouse.DSN, cfg.ClickHouse.BatchSize, flushInterval)
if err != nil {
if cfg.Debug {
log.Printf("[ja4ebpf] DEBUG: writer ClickHouse non disponible: %v (continue sans CH)", err)
} else {
log.Fatalf("erreur initialisation writer ClickHouse: %v", err)
}
}
if w != nil {
w.Start(ctx)
}
// --- 7. Goroutine : écriture des sessions prêtes ---
go func() {
for s := range mgr.ReadyCh {
if w != nil {
w.Write(s)
} else if cfg.Debug {
log.Printf("[ja4ebpf] DEBUG: session prête (sans CH): has_l3l4=%v has_tls=%v",
s.L3L4 != nil, s.TLS != nil)
}
}
}()
// --- 8. Goroutines de consommation des ring buffers ---
go consumeSynEvents(ctx, ldr.SynReader, mgr)
go consumeTLSEvents(ctx, ldr.TLSReader, mgr)
go consumeSSLEvents(ctx, ldr.SSLReader, mgr)
go consumeAcceptEvents(ctx, ldr.AcceptReader, mgr)
go consumeHTTPPlainEvents(ctx, ldr.HTTPPlainReader, mgr)
// --- 8. Compteurs d'événements consommés (mode debug) ---
consumed := &eventCounters{}
// --- 9. Goroutines de consommation des ring buffers ---
go consumeSynEvents(ctx, ldr.SynReader, mgr, &consumed.syn)
go consumeTLSEvents(ctx, ldr.TLSReader, mgr, &consumed.tls)
go consumeSSLEvents(ctx, ldr.SSLReader, mgr, &consumed.ssl)
go consumeAcceptEvents(ctx, ldr.AcceptReader, mgr, &consumed.accept)
go consumeHTTPPlainEvents(ctx, ldr.HTTPPlainReader, mgr, &consumed.httpPlain)
// --- 10. Stats dumper (mode debug) ---
if cfg.Debug {
go debugStatsDumper(ctx, ldr, consumed)
}
log.Printf("[ja4ebpf] démon actif — en attente des événements")
@ -178,6 +209,43 @@ func main() {
log.Printf("[ja4ebpf] arrêt terminé")
}
// eventCounters contient les compteurs atomiques pour chaque type d'événement consommé.
type eventCounters struct {
syn atomic.Uint64
tls atomic.Uint64
ssl atomic.Uint64
accept atomic.Uint64
httpPlain atomic.Uint64
}
// debugStatsDumper affiche les compteurs BPF et les événements consommés toutes les 5 secondes.
func debugStatsDumper(ctx context.Context, ldr *loader.Loader, consumed *eventCounters) {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
}
// Compteurs BPF kernel
stats, err := ldr.ReadStats()
if err != nil {
log.Printf("[debug] erreur lecture tc_stats: %v", err)
continue
}
log.Printf("[debug] BPF: TOTAL=%d IPV4=%d TCP=%d SYN=%d SYN_SUB=%d TLS_SUB=%d HTTP_SUB=%d",
stats[0], stats[1], stats[2], stats[3], stats[4], stats[5], stats[6])
// Compteurs userspace
log.Printf("[debug] GO: syn=%d tls=%d ssl=%d accept=%d http=%d",
consumed.syn.Load(), consumed.tls.Load(), consumed.ssl.Load(),
consumed.accept.Load(), consumed.httpPlain.Load())
}
}
// parseTCPOptions extrait le MSS et le Window Scale depuis les options TCP brutes.
// Les options TCP suivent le format TLV (Type-Length-Value), sauf les options 0 et 1.
// Retourne (mss=0, windowScale=0xFF) si les options sont absentes ou mal formées.
@ -220,7 +288,7 @@ func parseTCPOptions(opts []byte) (mss uint16, windowScale uint8) {
// consumeSynEvents lit les événements TCP SYN depuis le ring buffer
// et met à jour l'état L3/L4 des sessions.
func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) {
func consumeSynEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) {
for {
select {
case <-ctx.Done():
@ -230,7 +298,7 @@ func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
record, err := rd.Read()
if err != nil {
if err == ringbuf.ErrClosed {
if err == os.ErrClosed {
return
}
continue
@ -240,7 +308,7 @@ func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
// src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+ttl(1)+df_bit(1)+ip_id(2)+
// window_size(2)+window_scale(1)+mss(2)+tcp_options_raw[40]+tcp_options_len(1)+timestamp_ns(8)
// offsets: 0 4 8 10 12 13 14 16 18 19 21 61 62
if len(record.RawSample) < 62 {
if len(record.RawSample) < 70 {
continue
}
data := record.RawSample
@ -288,12 +356,13 @@ func consumeSynEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
_ = s.TLS // corrélation implicite par présence des deux champs
}
})
counter.Add(1)
}
}
// consumeTLSEvents lit les événements TLS ClientHello depuis le ring buffer
// et calcule l'empreinte JA4 pour chaque session.
func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) {
func consumeTLSEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) {
for {
select {
case <-ctx.Done():
@ -303,7 +372,7 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
record, err := rd.Read()
if err != nil {
if err == ringbuf.ErrClosed {
if err == os.ErrClosed {
return
}
continue
@ -312,20 +381,20 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
// struct tls_hello_event (packed):
// src_ip(4) + src_port(2) + payload[2048] + payload_len(2) + timestamp_ns(8)
// offsets: 0 4 6 2054 2056
if len(record.RawSample) < 2056 {
if len(record.RawSample) < 2064 {
continue
}
data := record.RawSample
srcIPRaw := binary.LittleEndian.Uint32(data[0:4])
srcPort := binary.LittleEndian.Uint16(data[4:6])
srcIPRaw := binary.LittleEndian.Uint32(data[2048:2052])
srcPort := binary.LittleEndian.Uint16(data[2052:2054])
payloadLen := binary.LittleEndian.Uint16(data[2054:2056])
if int(payloadLen) > 2048 {
payloadLen = 2048
}
payload := make([]byte, payloadLen)
copy(payload, data[6:6+payloadLen])
copy(payload, data[0:payloadLen])
var key correlation.SessionKey
key.SrcIP[0] = byte(srcIPRaw >> 24)
@ -366,13 +435,14 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
_ = s.L3L4 // corrélation implicite par présence des deux champs
}
})
counter.Add(1)
}
}
// consumeSSLEvents lit les données SSL déchiffrées depuis le ring buffer.
// Parse les requêtes HTTP/1.x et détecte le préambule HTTP/2.
// Quand src_ip=0 (accept4 non disponible), tente un lookup /proc pour retrouver l'IP du client.
func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) {
func consumeSSLEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) {
for {
select {
case <-ctx.Done():
@ -382,7 +452,7 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
record, err := rd.Read()
if err != nil {
if err == ringbuf.ErrClosed {
if err == os.ErrClosed {
return
}
continue
@ -439,6 +509,7 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
key.SrcIP[3] = byte(srcIPRaw)
key.SrcPort = srcPort
counter.Add(1)
// === Routeur Magic Bytes ===
if parser.DetectH2Preface(sslData) {
@ -517,7 +588,7 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.
// consumeAcceptEvents lit les événements accept4 depuis le ring buffer.
// Met à jour les sessions avec les informations de connexion client.
func consumeAcceptEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) {
func consumeAcceptEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) {
for {
select {
case <-ctx.Done():
@ -527,7 +598,7 @@ func consumeAcceptEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlati
record, err := rd.Read()
if err != nil {
if err == ringbuf.ErrClosed {
if err == os.ErrClosed {
return
}
continue
@ -556,13 +627,14 @@ func consumeAcceptEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlati
// S'assurer que la session existe
mgr.GetOrCreate(key)
counter.Add(1)
}
}
// consumeHTTPPlainEvents lit les payloads HTTP en clair depuis le ring buffer XDP.
// consumeHTTPPlainEvents lit les payloads HTTP en clair depuis le perf buffer TC.
// Parse la requête HTTP/1.x ou détecte la préface HTTP/2 pour les connexions
// non-chiffrées sur les ports 80/8080.
func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) {
func consumeHTTPPlainEvents(ctx context.Context, rd *perf.Reader, mgr *correlation.Manager, counter *atomic.Uint64) {
for {
select {
case <-ctx.Done():
@ -572,21 +644,21 @@ func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correl
record, err := rd.Read()
if err != nil {
if err == ringbuf.ErrClosed {
if err == os.ErrClosed {
return
}
continue
}
data := record.RawSample
// struct http_plain_event: src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+payload(4096)+payload_len(2)+timestamp_ns(8)
// struct http_plain_event: payload(4096)+src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+payload_len(2)+timestamp_ns(8)
if len(data) < 14 {
continue
}
// src_ip et src_port en host byte order (bpf_ntohl appliqué dans tc_capture.c)
srcIPRaw := binary.LittleEndian.Uint32(data[0:4])
srcPort := binary.LittleEndian.Uint16(data[8:10])
srcIPRaw := binary.LittleEndian.Uint32(data[4096:4100])
srcPort := binary.LittleEndian.Uint16(data[4104:4106])
if srcIPRaw == 0 && srcPort == 0 {
continue
@ -610,10 +682,10 @@ func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correl
if payloadLen == 0 {
continue
}
if 12+payloadLen > len(data) {
payloadLen = len(data) - 12
if 4096+payloadLen > len(data) {
payloadLen = len(data) - 4096
}
httpData := data[12 : 12+payloadLen]
httpData := data[0:payloadLen]
// Routeur Magic Bytes : HTTP/1.x uniquement sur port 80
if parser.IsHTTP1Request(httpData) {
@ -633,6 +705,7 @@ func consumeHTTPPlainEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correl
// Corréler si L3/L4 est déjà présent (TCP SYN capturé)
_ = s.L3L4 // corrélation implicite
})
counter.Add(1)
}
}
}

View File

@ -1,33 +1,26 @@
# Configuration de l'agent ja4ebpf
# Copiez ce fichier en config.yml et adaptez les valeurs.
# Interface réseau à surveiller (hook TC ingress)
# Interface réseau à surveiller (XDP ingress)
interface: eth0
# Processus à instrumenter via uprobes SSL
ssl_probes:
- executable: /usr/sbin/httpd
symbol: SSL_read
- executable: /usr/lib64/libssl.so.3
symbol: SSL_read
# Chemin vers libssl pour les uprobes SSL_read/SSL_write
ssl_lib_path: "/usr/lib64/libssl.so.3"
# Mode debug : dump compteurs BPF + événements consommés toutes les 5s
# ClickHouse optionnel en mode debug
debug: false
# Paramètres de connexion ClickHouse
clickhouse:
addr: "127.0.0.1:9000"
database: "ja4_logs"
table: "http_logs_raw"
username: "default"
password: ""
tls: false
dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs"
batch_size: 500
flush_every: "2s"
flush_secs: 1
# Délais de corrélation et de détection
timeouts:
# Durée sans activité avant expiration d'une session TCP
session_expiry: "500ms"
# Délai maximum pour une requête L7 sans réponse (détection Slowloris)
slowloris: "10s"
correlation:
timeout_ms: 500 # expiration session TCP (ms)
slowloris_ms: 10000 # seuil Slowloris (ms)
# Journalisation
log:

View File

@ -20,9 +20,10 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/segmentio/asm v1.2.0 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/vishvananda/netlink v1.3.1 // indirect
github.com/vishvananda/netns v0.0.5 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect
golang.org/x/sys v0.20.0 // indirect
)

View File

@ -65,6 +65,10 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
@ -103,6 +107,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=

View File

@ -1,48 +1,90 @@
// Package loader initialise les programmes eBPF via cilium/ebpf,
// attache les hooks TC ingress et les uprobes SSL, et expose
// les readers RingBuffer aux consommateurs Go.
// attache le hook TC ingress et les uprobes SSL, et expose
// les readers PerfEvent aux consommateurs Go.
package loader
import (
"context"
"encoding/binary"
"fmt"
"net"
"os"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/ringbuf"
"github.com/cilium/ebpf/perf"
"github.com/cilium/ebpf/rlimit"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -target amd64 -cflags "-O2 -g -Wall -D__TARGET_ARCH_x86 -Wno-pass-failed" Ja4Tc ../../bpf/tc_capture.c -- -I../../bpf/headers
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -target amd64 -cflags "-O2 -g -Wall -D__TARGET_ARCH_x86 -Wno-pass-failed" Ja4Ssl ../../bpf/uprobe_ssl.c -- -I../../bpf/headers
// perCPUBufferSize est la taille du buffer perf per-CPU en octets (256 KB).
const perCPUBufferSize = 256 * 1024
// Loader encapsule les objets eBPF compilés, les liens vers les hooks,
// et les readers RingBuffer exposés au pipeline de traitement.
// et les readers PerfEvent exposés au pipeline de traitement.
type Loader struct {
tcObjs *Ja4TcObjects // généré par bpf2go (tc_capture.c)
sslObjs *Ja4SslObjects // généré par bpf2go (uprobe_ssl.c)
tcLink link.Link
tcNlLink netlink.Link // interface netlink pour cleanup TC
uprobeLinks []link.Link
statsMap *ebpf.Map // map tc_stats pour lecture des compteurs BPF (mode debug)
// SynReader lit les événements TCP SYN depuis rb_tcp_syn.
SynReader *ringbuf.Reader
// TLSReader lit les événements TLS ClientHello depuis rb_tls_hello.
TLSReader *ringbuf.Reader
// SSLReader lit les données SSL déchiffrées depuis rb_ssl_data.
SSLReader *ringbuf.Reader
// AcceptReader lit les événements accept4 depuis rb_accept.
AcceptReader *ringbuf.Reader
// HTTPPlainReader lit les payloads HTTP en clair depuis rb_http_plain.
HTTPPlainReader *ringbuf.Reader
// SynReader lit les événements TCP SYN depuis pb_tcp_syn.
SynReader *perf.Reader
// TLSReader lit les événements TLS ClientHello depuis pb_tls_hello.
TLSReader *perf.Reader
// SSLReader lit les données SSL déchiffrées depuis pb_ssl_data.
SSLReader *perf.Reader
// AcceptReader lit les événements accept4 depuis pb_accept.
AcceptReader *perf.Reader
// HTTPPlainReader lit les payloads HTTP en clair depuis pb_http_plain.
HTTPPlainReader *perf.Reader
}
// StatNames associe chaque index de compteur BPF à un nom lisible.
var StatNames = map[uint32]string{
0: "TOTAL",
1: "IPV4",
2: "TCP",
3: "SYN",
4: "SYN_SUBMIT",
5: "TLS_SUBMIT",
6: "HTTP_SUBMIT",
}
// ReadStats lit les compteurs de la map tc_stats (PERCPU_ARRAY).
// Retourne une map[index] → somme de toutes les valeurs CPU.
// Si la map n'est pas disponible, retourne une map vide.
func (l *Loader) ReadStats() (map[uint32]uint64, error) {
result := make(map[uint32]uint64)
if l.statsMap == nil {
return result, nil
}
for key := uint32(0); key < 7; key++ {
var values []uint64
if err := l.statsMap.Lookup(key, &values); err != nil {
continue
}
var sum uint64
for _, v := range values {
sum += v
}
result[key] = sum
}
return result, nil
}
// New charge le bytecode eBPF embarqué, supprime la limite mémoire
// RLIMIT_MEMLOCK (requise pour les ring buffers et les maps eBPF),
// RLIMIT_MEMLOCK (requise pour les maps eBPF),
// et retourne un Loader prêt à être attaché aux hooks.
//
// Cible : CentOS 8 / RHEL 8 et supérieur (kernel 4.18 avec BTF backporté).
// Cible : kernel 4.18+ avec BTF. Les perf event arrays sont supportés depuis
// kernel 4.4, bpf_skb_load_bytes depuis kernel 4.5, assurant une compatibilité
// maximale via le hook TC ingress.
// Le BTF natif est détecté automatiquement par cilium/ebpf via
// /sys/kernel/btf/vmlinux — aucun fallback manuel n'est requis.
func New() (*Loader, error) {
@ -57,6 +99,28 @@ func New() (*Loader, error) {
return nil, fmt.Errorf("chargement objets TC eBPF: %w", err)
}
// Trouver la map tc_stats par iteration des maps kernel
var statsMap *ebpf.Map
var mapID ebpf.MapID = 0
for {
nextID, err := ebpf.MapGetNextID(mapID)
if err != nil {
break
}
m, err := ebpf.NewMapFromID(nextID)
if err != nil {
mapID = nextID
continue
}
info, err := m.Info()
if err == nil && info.Name == "tc_stats" {
statsMap = m
break
}
m.Close()
mapID = nextID
}
// Charger les objets SSL/uprobe (uprobe_ssl.c)
sslObjs := &Ja4SslObjects{}
if err := LoadJa4SslObjects(sslObjs, nil); err != nil {
@ -64,42 +128,42 @@ func New() (*Loader, error) {
return nil, fmt.Errorf("chargement objets SSL eBPF: %w", err)
}
// Initialiser les readers pour chaque ring buffer
synReader, err := ringbuf.NewReader(tcObjs.RbTcpSyn)
// Initialiser les readers pour chaque perf event array
synReader, err := perf.NewReader(tcObjs.PbTcpSyn, perCPUBufferSize)
if err != nil {
sslObjs.Close()
tcObjs.Close()
return nil, fmt.Errorf("création reader rb_tcp_syn: %w", err)
return nil, fmt.Errorf("création reader pb_tcp_syn: %w", err)
}
tlsReader, err := ringbuf.NewReader(tcObjs.RbTlsHello)
tlsReader, err := perf.NewReader(tcObjs.PbTlsHello, perCPUBufferSize)
if err != nil {
synReader.Close()
sslObjs.Close()
tcObjs.Close()
return nil, fmt.Errorf("création reader rb_tls_hello: %w", err)
return nil, fmt.Errorf("création reader pb_tls_hello: %w", err)
}
httpPlainReader, err := ringbuf.NewReader(tcObjs.RbHttpPlain)
httpPlainReader, err := perf.NewReader(tcObjs.PbHttpPlain, perCPUBufferSize)
if err != nil {
tlsReader.Close()
synReader.Close()
sslObjs.Close()
tcObjs.Close()
return nil, fmt.Errorf("création reader rb_http_plain: %w", err)
return nil, fmt.Errorf("création reader pb_http_plain: %w", err)
}
sslReader, err := ringbuf.NewReader(sslObjs.RbSslData)
sslReader, err := perf.NewReader(sslObjs.PbSslData, perCPUBufferSize)
if err != nil {
httpPlainReader.Close()
tlsReader.Close()
synReader.Close()
sslObjs.Close()
tcObjs.Close()
return nil, fmt.Errorf("création reader rb_ssl_data: %w", err)
return nil, fmt.Errorf("création reader pb_ssl_data: %w", err)
}
acceptReader, err := ringbuf.NewReader(sslObjs.RbAccept)
acceptReader, err := perf.NewReader(sslObjs.PbAccept, perCPUBufferSize)
if err != nil {
sslReader.Close()
httpPlainReader.Close()
@ -107,12 +171,13 @@ func New() (*Loader, error) {
synReader.Close()
sslObjs.Close()
tcObjs.Close()
return nil, fmt.Errorf("création reader rb_accept: %w", err)
return nil, fmt.Errorf("création reader pb_accept: %w", err)
}
return &Loader{
tcObjs: tcObjs,
sslObjs: sslObjs,
statsMap: statsMap,
SynReader: synReader,
TLSReader: tlsReader,
SSLReader: sslReader,
@ -121,66 +186,79 @@ func New() (*Loader, error) {
}, nil
}
// AttachTC attache le programme XDP sur l'interface réseau spécifiée.
// Essaie le mode natif XDP (driver support) puis se replie sur le mode générique
// (SKB_MODE, compatible kernel ≥ 4.8, fonctionne dans les VMs).
// AttachTC attache le programme TC ingress (clsact qdisc) sur l'interface
// réseau spécifiée. Crée le qdisc clsact (idempotent) et attache le filtre BPF
// en mode direct-action. Compatible kernel 4.1+.
func (l *Loader) AttachTC(iface string) error {
// Trouver l'interface par nom (standard Go net package)
netIface, err := net.InterfaceByName(iface)
if err != nil {
return fmt.Errorf("interface réseau %q introuvable: %w", iface, err)
}
// Mode natif (meilleure performance sur serveurs avec NIC compatible XDP)
lnk, err := link.AttachXDP(link.XDPOptions{
Interface: netIface.Index,
Program: l.tcObjs.CaptureXdp,
Flags: link.XDPDriverMode,
})
// Obtenir le link netlink par index (plus fiable que par nom)
nlLink, err := netlink.LinkByIndex(netIface.Index)
if err != nil {
// Repli sur le mode générique (VMs, NICs sans driver XDP natif)
lnk, err = link.AttachXDP(link.XDPOptions{
Interface: netIface.Index,
Program: l.tcObjs.CaptureXdp,
Flags: link.XDPGenericMode,
})
if err != nil {
return fmt.Errorf("attachement XDP sur %q (natif et générique): %w", iface, err)
}
return fmt.Errorf("netlink link index %d introuvable: %w", netIface.Index, err)
}
l.tcLink = lnk
// Créer le qdisc clsact (idempotent via QdiscReplace)
qdisc := &netlink.Clsact{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: nlLink.Attrs().Index,
Handle: netlink.MakeHandle(0xffff, 0),
Parent: netlink.HANDLE_CLSACT,
},
}
if err := netlink.QdiscReplace(qdisc); err != nil {
return fmt.Errorf("clsact qdisc sur %q: %w", iface, err)
}
// Attacher le programme BPF comme filtre ingress
filter := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: nlLink.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: 1,
Protocol: unix.ETH_P_ALL,
Priority: 1,
},
ClassId: netlink.MakeHandle(1, 1),
Fd: l.tcObjs.CaptureTc.FD(),
DirectAction: true,
}
if err := netlink.FilterReplace(filter); err != nil {
return fmt.Errorf("TC filter ingress sur %q: %w", iface, err)
}
l.tcNlLink = nlLink
return nil
}
// AttachUprobes attache les uprobes SSL_read et SSL_set_fd
// sur le binaire libssl spécifié (ex: "/usr/lib64/libssl.so.3").
func (l *Loader) AttachUprobes(sslLibPath string) error {
// Vérifier que le fichier existe
if _, err := os.Stat(sslLibPath); err != nil {
return fmt.Errorf("bibliothèque SSL %q: %w", sslLibPath, err)
}
// Ouvrir le binaire exécutable pour les uprobes
ex, err := link.OpenExecutable(sslLibPath)
if err != nil {
return fmt.Errorf("ouverture exécutable %q pour uprobe: %w", sslLibPath, err)
}
// Uprobe sur SSL_set_fd (entry)
setFdLink, err := ex.Uprobe("SSL_set_fd", l.sslObjs.UprobeSslSetFd, nil)
if err != nil {
return fmt.Errorf("attachement uprobe SSL_set_fd: %w", err)
}
l.uprobeLinks = append(l.uprobeLinks, setFdLink)
// Uprobe sur SSL_read (entry)
readEntryLink, err := ex.Uprobe("SSL_read", l.sslObjs.UprobeSslReadEntry, nil)
if err != nil {
return fmt.Errorf("attachement uprobe SSL_read (entry): %w", err)
}
l.uprobeLinks = append(l.uprobeLinks, readEntryLink)
// Uretprobe sur SSL_read (exit)
readExitLink, err := ex.Uretprobe("SSL_read", l.sslObjs.UretprobeSslReadExit, nil)
if err != nil {
return fmt.Errorf("attachement uretprobe SSL_read (exit): %w", err)
@ -191,10 +269,7 @@ func (l *Loader) AttachUprobes(sslLibPath string) error {
}
// AttachAcceptProbe attache les tracepoints syscalls/sys_{enter,exit}_accept4.
// Les tracepoints sont préférés aux kprobes car ils ne dépendent pas du nom
// manglé __x64_sys_accept4 qui varie entre les versions du kernel (5.1+).
func (l *Loader) AttachAcceptProbe() error {
// Tracepoint à l'entrée de accept4
kpEntry, err := link.Tracepoint("syscalls", "sys_enter_accept4",
l.sslObjs.KprobeAccept4Entry, nil)
if err != nil {
@ -202,7 +277,6 @@ func (l *Loader) AttachAcceptProbe() error {
}
l.uprobeLinks = append(l.uprobeLinks, kpEntry)
// Tracepoint à la sortie de accept4
kpExit, err := link.Tracepoint("syscalls", "sys_exit_accept4",
l.sslObjs.KretprobeAccept4Exit, nil)
if err != nil {
@ -215,7 +289,6 @@ func (l *Loader) AttachAcceptProbe() error {
// Close détache tous les hooks eBPF et libère toutes les ressources associées.
func (l *Loader) Close() error {
// Fermer les readers RingBuffer
if l.HTTPPlainReader != nil {
l.HTTPPlainReader.Close()
}
@ -232,19 +305,26 @@ func (l *Loader) Close() error {
l.SynReader.Close()
}
// Détacher les uprobes et kprobes
// Détacher le filtre TC ingress
if l.tcNlLink != nil {
filter := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: l.tcNlLink.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: 1,
Priority: 1,
},
}
// Ignorer l'erreur — le filtre peut déjà être supprimé
netlink.FilterDel(filter)
}
for _, lnk := range l.uprobeLinks {
if lnk != nil {
lnk.Close()
}
}
// Détacher le hook TC
if l.tcLink != nil {
l.tcLink.Close()
}
// Libérer les objets eBPF (maps, programmes)
if l.sslObjs != nil {
l.sslObjs.Close()
}
@ -255,259 +335,10 @@ func (l *Loader) Close() error {
return nil
}
// =============================================================================
// Types d'événements : représentations Go des structures C eBPF
// =============================================================================
// TCPSynEvent représente un événement TCP SYN capturé par TC ingress.
type TCPSynEvent struct {
SrcIP uint32
DstIP uint32
SrcPort uint16
DstPort uint16
TTL uint8
DFBit uint8
IPID uint16
WindowSize uint16
WindowScale uint8
MSS uint16
TCPOptions [40]byte
TCPOptionsLen uint8
Timestamp uint64
}
// TLSHelloEvent représente un événement TLS ClientHello.
type TLSHelloEvent struct {
SrcIP uint32
SrcPort uint16
Payload []byte
PayloadLen uint16
Timestamp uint64
}
// SSLDataEvent représente un bloc de données SSL déchiffré par uprobe.
type SSLDataEvent struct {
PID uint32
TGID uint32
FD uint32
SrcIP uint32
SrcPort uint16
Data []byte
DataLen uint32
Timestamp uint64
Direction uint8
EOF bool
}
// HTTPPlainEvent représente un payload TCP HTTP en clair capturé par TC ingress.
type HTTPPlainEvent struct {
SrcIP uint32
DstIP uint32
SrcPort uint16
DstPort uint16
Payload []byte
PayloadLen uint16
Timestamp uint64
}
// AcceptEvent représente une acceptation de connexion TCP (accept4).
type AcceptEvent struct {
PID uint32
TGID uint32
FD uint32
SrcIP uint32
SrcPort uint16
Timestamp uint64
}
// =============================================================================
// Méthodes de lecture des RingBuffers
// =============================================================================
// ReadTCPSynEvent lit un événement TCP SYN depuis le RingBuffer.
// Bloque jusqu'à ce qu'un événement soit disponible ou que ctx soit annulé.
func (l *Loader) ReadTCPSynEvent(ctx context.Context) (*TCPSynEvent, error) {
rec, err := readRecord(ctx, l.SynReader)
if err != nil {
return nil, err
}
data := rec.RawSample
// struct tcp_syn_event packed: src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+
// ttl(1)+df(1)+ip_id(2)+window(2)+wscale(1)+mss(2)+opts(40)+opts_len(1)+_pad(1)+ts(8) = 71
if len(data) < 64 {
return nil, fmt.Errorf("tcp_syn_event trop court: %d octets", len(data))
}
ev := &TCPSynEvent{
SrcIP: binary.LittleEndian.Uint32(data[0:4]),
DstIP: binary.LittleEndian.Uint32(data[4:8]),
SrcPort: binary.LittleEndian.Uint16(data[8:10]),
DstPort: binary.LittleEndian.Uint16(data[10:12]),
TTL: data[12],
DFBit: data[13],
IPID: binary.LittleEndian.Uint16(data[14:16]),
WindowSize: binary.LittleEndian.Uint16(data[16:18]),
WindowScale: data[18],
MSS: binary.LittleEndian.Uint16(data[19:21]),
}
copy(ev.TCPOptions[:], data[21:61])
ev.TCPOptionsLen = data[61]
if len(data) >= 70 {
ev.Timestamp = binary.LittleEndian.Uint64(data[62:70])
}
return ev, nil
}
// ReadTLSHelloEvent lit un événement TLS ClientHello depuis le RingBuffer.
func (l *Loader) ReadTLSHelloEvent(ctx context.Context) (*TLSHelloEvent, error) {
rec, err := readRecord(ctx, l.TLSReader)
if err != nil {
return nil, err
}
data := rec.RawSample
// struct tls_hello_event: src_ip(4)+src_port(2)+payload(512)+payload_len(2)+ts(8) = 528
if len(data) < 8 {
return nil, fmt.Errorf("tls_hello_event trop court: %d octets", len(data))
}
plen := uint16(0)
if len(data) >= 520 {
plen = binary.LittleEndian.Uint16(data[518:520])
}
payload := make([]byte, plen)
if int(plen) <= 512 && len(data) >= 6+int(plen) {
copy(payload, data[6:6+plen])
}
ts := uint64(0)
if len(data) >= 528 {
ts = binary.LittleEndian.Uint64(data[520:528])
}
return &TLSHelloEvent{
SrcIP: binary.LittleEndian.Uint32(data[0:4]),
SrcPort: binary.LittleEndian.Uint16(data[4:6]),
Payload: payload,
PayloadLen: plen,
Timestamp: ts,
}, nil
}
// ReadSSLDataEvent lit un bloc de données SSL déchiffrées depuis le RingBuffer.
func (l *Loader) ReadSSLDataEvent(ctx context.Context) (*SSLDataEvent, error) {
rec, err := readRecord(ctx, l.SSLReader)
if err != nil {
return nil, err
}
data := rec.RawSample
// struct ssl_data_event: pid_tgid(8)+fd(4)+src_ip(4)+src_port(2)+data(4096)+data_len(4)+ts(8)+direction(1)
if len(data) < 27 {
return nil, fmt.Errorf("ssl_data_event trop court: %d octets", len(data))
}
pidTGID := binary.LittleEndian.Uint64(data[0:8])
dlen := uint32(0)
if len(data) >= 4118 {
dlen = binary.LittleEndian.Uint32(data[4114:4118])
}
payload := make([]byte, dlen)
if int(dlen) <= 4096 && len(data) >= 18+int(dlen) {
copy(payload, data[18:18+dlen])
}
ts := uint64(0)
if len(data) >= 4126 {
ts = binary.LittleEndian.Uint64(data[4118:4126])
}
dir := uint8(0)
if len(data) >= 4127 {
dir = data[4126]
}
return &SSLDataEvent{
PID: uint32(pidTGID & 0xFFFFFFFF),
TGID: uint32(pidTGID >> 32),
FD: binary.LittleEndian.Uint32(data[8:12]),
SrcIP: binary.LittleEndian.Uint32(data[12:16]),
SrcPort: binary.LittleEndian.Uint16(data[16:18]),
Data: payload,
DataLen: dlen,
Timestamp: ts,
Direction: dir,
}, nil
}
// ReadHTTPPlainEvent lit un événement HTTP en clair depuis le RingBuffer TC.
// struct http_plain_event: src_ip(4)+dst_ip(4)+src_port(2)+dst_port(2)+
//
// payload(4096)+payload_len(2)+ts(8) = 4118
func (l *Loader) ReadHTTPPlainEvent(ctx context.Context) (*HTTPPlainEvent, error) {
rec, err := readRecord(ctx, l.HTTPPlainReader)
if err != nil {
return nil, err
}
data := rec.RawSample
if len(data) < 12 {
return nil, fmt.Errorf("http_plain_event trop court: %d octets", len(data))
}
plen := uint16(0)
if len(data) >= 4110 {
plen = binary.LittleEndian.Uint16(data[4108:4110])
}
payload := make([]byte, plen)
if int(plen) <= 4096 && len(data) >= 12+int(plen) {
copy(payload, data[12:12+plen])
}
ts := uint64(0)
if len(data) >= 4118 {
ts = binary.LittleEndian.Uint64(data[4110:4118])
}
return &HTTPPlainEvent{
SrcIP: binary.LittleEndian.Uint32(data[0:4]),
DstIP: binary.LittleEndian.Uint32(data[4:8]),
SrcPort: binary.LittleEndian.Uint16(data[8:10]),
DstPort: binary.LittleEndian.Uint16(data[10:12]),
Payload: payload,
PayloadLen: plen,
Timestamp: ts,
}, nil
}
// ReadAcceptEvent lit un événement accept4 depuis le RingBuffer.
func (l *Loader) ReadAcceptEvent(ctx context.Context) (*AcceptEvent, error) {
rec, err := readRecord(ctx, l.AcceptReader)
if err != nil {
return nil, err
}
data := rec.RawSample
// struct accept_event: pid_tgid(8)+fd(4)+src_ip(4)+src_port(2)+ts(8) = 26
if len(data) < 26 {
return nil, fmt.Errorf("accept_event trop court: %d octets", len(data))
}
pidTGID := binary.LittleEndian.Uint64(data[0:8])
return &AcceptEvent{
PID: uint32(pidTGID & 0xFFFFFFFF),
TGID: uint32(pidTGID >> 32),
FD: binary.LittleEndian.Uint32(data[8:12]),
SrcIP: binary.LittleEndian.Uint32(data[12:16]),
SrcPort: binary.LittleEndian.Uint16(data[16:18]),
Timestamp: binary.LittleEndian.Uint64(data[18:26]),
}, nil
}
// readRecord lit un record brut depuis un RingBuffer avec annulation via context.
func readRecord(ctx context.Context, rd *ringbuf.Reader) (ringbuf.Record, error) {
// readRecord lit un record brut depuis un PerfReader avec annulation via context.
func readRecord(ctx context.Context, rd *perf.Reader) (perf.Record, error) {
type result struct {
rec ringbuf.Record
rec perf.Record
err error
}
ch := make(chan result, 1)
@ -517,8 +348,8 @@ func readRecord(ctx context.Context, rd *ringbuf.Reader) (ringbuf.Record, error)
}()
select {
case <-ctx.Done():
rd.Close() // débloque le Read() bloquant
return ringbuf.Record{}, ctx.Err()
rd.Close()
return perf.Record{}, ctx.Err()
case r := <-ch:
return r.rec, r.err
}

View File

@ -60,6 +60,20 @@ type sessionRecord struct {
DurationMS *float64 `json:"duration_ms,omitempty"`
KeepAlives int `json:"keepalives,omitempty"`
HeaderOrderSig string `json:"header_order_signature,omitempty"`
// HTTP/2 fingerprinting passif
H2Fingerprint string `json:"h2_fingerprint,omitempty"`
H2SettingsFP string `json:"h2_settings_fp,omitempty"`
H2WindowUpdate uint32 `json:"h2_window_update,omitempty"`
H2PseudoOrder string `json:"h2_pseudo_order,omitempty"`
H2HasPriority uint8 `json:"h2_has_priority,omitempty"`
H2HeaderTableSize int32 `json:"h2_header_table_size"`
H2EnablePush int32 `json:"h2_enable_push"`
H2MaxConcurrentStreams int32 `json:"h2_max_concurrent_streams"`
H2InitialWindowSize int64 `json:"h2_initial_window_size"`
H2MaxFrameSize int32 `json:"h2_max_frame_size"`
H2MaxHeaderListSize int32 `json:"h2_max_header_list_size"`
H2EnableConnectProtocol int32 `json:"h2_enable_connect_protocol"`
}
// NewClickHouseWriter crée un writer et établit la connexion ClickHouse.
@ -218,11 +232,116 @@ func sessionToRecord(s *correlation.SessionState) sessionRecord {
rec.ResponseSize = &last.ResponseSize
rec.DurationMS = &last.DurationMS
rec.HeaderOrderSig = last.HeaderOrderSig
// Champs HTTP/2 passifs
if last.HTTP2Settings != nil {
h2 := last.HTTP2Settings
rec.H2WindowUpdate = h2.WindowUpdateIncrement
// Ordre des pseudo-headers → notation abrégée "m,a,s,p"
if len(h2.PseudoHeaderOrder) > 0 {
rec.H2PseudoOrder = pseudoOrderToShort(h2.PseudoHeaderOrder)
}
// Paramètres SETTINGS individuels (-1 = absent)
rec.H2HeaderTableSize = h2.HeaderTableSize
rec.H2EnablePush = h2.EnablePush
rec.H2MaxConcurrentStreams = h2.MaxConcurrentStreams
rec.H2InitialWindowSize = int64(h2.InitialWindowSize)
rec.H2MaxFrameSize = h2.MaxFrameSize
rec.H2MaxHeaderListSize = h2.MaxHeaderListSize
// Fingerprints composites Akamai
rec.H2Fingerprint = buildH2Fingerprint(h2)
rec.H2SettingsFP = buildH2SettingsFP(h2)
}
}
return rec
}
// pseudoOrderToShort convertit la liste de pseudo-headers en notation abrégée.
// Ex: [":method", ":authority", ":scheme", ":path"] → "m,a,s,p"
func pseudoOrderToShort(headers []string) string {
short := make([]byte, 0, len(headers)*2-1)
for i, h := range headers {
if i > 0 {
short = append(short, ',')
}
switch {
case h == ":method":
short = append(short, 'm')
case h == ":authority":
short = append(short, 'a')
case h == ":scheme":
short = append(short, 's')
case h == ":path":
short = append(short, 'p')
default:
short = append(short, '?')
}
}
return string(short)
}
// buildH2Fingerprint construit le fingerprint composite au format Akamai.
// Format : SETTINGS[pairs]|WINDOW_UPDATE[value]|PRIORITY[0/1]|PSEUDO_ORDER[order]
func buildH2Fingerprint(h2 *correlation.HTTP2Settings) string {
var b strings.Builder
// SETTINGS
b.WriteString("1:")
b.WriteString(fmt.Sprintf("%d", h2.HeaderTableSize))
b.WriteString(",2:")
b.WriteString(fmt.Sprintf("%d", h2.EnablePush))
if h2.MaxConcurrentStreams >= 0 {
b.WriteString(",3:")
b.WriteString(fmt.Sprintf("%d", h2.MaxConcurrentStreams))
}
b.WriteString(",4:")
b.WriteString(fmt.Sprintf("%d", h2.InitialWindowSize))
if h2.MaxFrameSize >= 0 {
b.WriteString(",5:")
b.WriteString(fmt.Sprintf("%d", h2.MaxFrameSize))
}
if h2.MaxHeaderListSize >= 0 {
b.WriteString(",6:")
b.WriteString(fmt.Sprintf("%d", h2.MaxHeaderListSize))
}
// WINDOW_UPDATE
b.WriteByte('|')
if h2.WindowUpdateIncrement > 0 {
b.WriteString(fmt.Sprintf("%d", h2.WindowUpdateIncrement))
}
// PRIORITY (non capturé actuellement)
b.WriteString("|0")
// PSEUDO_ORDER
b.WriteByte('|')
if len(h2.PseudoHeaderOrder) > 0 {
b.WriteString(pseudoOrderToShort(h2.PseudoHeaderOrder))
}
return b.String()
}
// buildH2SettingsFP construit la chaîne brute des entrées SETTINGS.
func buildH2SettingsFP(h2 *correlation.HTTP2Settings) string {
var parts []string
if h2.MaxConcurrentStreams >= 0 {
parts = append(parts, fmt.Sprintf("3:%d", h2.MaxConcurrentStreams))
}
if h2.InitialWindowSize >= 0 {
parts = append(parts, fmt.Sprintf("4:%d", h2.InitialWindowSize))
}
if h2.EnablePush >= 0 {
parts = append(parts, fmt.Sprintf("2:%d", h2.EnablePush))
}
return strings.Join(parts, ",")
}
// formatTLSVersion convertit la valeur numérique TLS en chaîne lisible.
func formatTLSVersion(v uint16) string {
switch v {

View File

@ -82,5 +82,23 @@ chown -R ja4ebpf:ja4ebpf \
%dir %attr(0750, ja4ebpf, ja4ebpf) %{_localstatedir}/log/ja4ebpf
%changelog
* %(date "+%a %b %d %Y") Build System <build@antitbone.local> - %{build_version}-1
- Build automatique via Dockerfile.package
* Sat Apr 12 2025 Antoine Jacquin <antoine@antitbone.dev> - 0.2.0-1
- feat(writer): sérialisation complète des 12 champs HTTP/2 passifs vers ClickHouse
(SETTINGS individuels, WINDOW_UPDATE, pseudo-headers, fingerprints composites Akamai)
- fix(writer): le parser H2 fonctionnait mais le writer ignorait HTTP2Settings
- fix(sql): TTL http_logs corrigé de 30 jours à 2 heures (conforme thèse §3.7)
- feat(browser_matcher): redistribution des poids CDN (0.35 HTTP + 0.35 TLS)
- feat(browser_matcher): exposition des 5 features browser_match_* dans le vecteur ML
- feat(shap): TreeExplainer XGBoost en priorité, ExIFFI + SHAP coexistants
- feat(pipeline): root_to_first_asset_delay et asset_load_stddev intégrés au vecteur ML
- feat(signatures): table browser_h2_signatures + rechargement 24h depuis ClickHouse
- feat(cycle): queue unknown_h2_fingerprints pour signatures H2 inconnues
* Thu Mar 27 2025 Antoine Jacquin <antoine@antitbone.dev> - 0.1.0-1
- Initial RPM package
- eBPF CO-RE agent: TC ingress + uprobe SSL_read
- JA4/JA4T TLS/TCP fingerprinting
- HTTP/2 passive fingerprinting (SETTINGS, WINDOW_UPDATE, pseudo-headers)
- Go Magic Bytes dispatcher with circular reassembly buffer
- 256-shard correlation engine, 500ms orphan timeout
- Multi-distro support: RHEL/CentOS/Rocky/AlmaLinux 8, 9, 10

View File

@ -23,7 +23,8 @@ Type=simple
User=ja4ebpf
Group=ja4ebpf
ExecStart=/usr/sbin/ja4ebpf -config /etc/ja4ebpf/config.yml
ExecStart=/usr/sbin/ja4ebpf
Environment=JA4EBPF_CONFIG=/etc/ja4ebpf/config.yml
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=5s

View File

@ -124,7 +124,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
ENGINE = MergeTree
PARTITION BY log_date
ORDER BY (time, src_ip, dst_ip, ja4)
TTL log_date + INTERVAL 30 DAY
TTL log_date + INTERVAL 2 HOUR
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;

View File

@ -295,3 +295,50 @@ TTL observed_at + INTERVAL 30 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
-- -----------------------------------------------------------------------------
-- browser_h2_signatures — Base de signatures H2 structurées par famille navigateur
-- Thèse §3.9.5 : rechargée toutes les 24h par le module Python
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS ja4_processing.browser_h2_signatures
(
family LowCardinality(String),
version_min String DEFAULT '',
version_max String DEFAULT '',
h2_settings_json String DEFAULT '' CODEC(ZSTD(3)),
h2_settings_forbidden String DEFAULT '[]' CODEC(ZSTD(3)),
h2_window_update UInt32 DEFAULT 0,
h2_window_update_tolerance UInt32 DEFAULT 1000,
h2_priority_expected UInt8 DEFAULT 0,
pseudo_header_order String DEFAULT '',
tls_json String DEFAULT '{}' CODEC(ZSTD(3)),
headers_required String DEFAULT '[]' CODEC(ZSTD(3)),
headers_forbidden String DEFAULT '[]' CODEC(ZSTD(3)),
created_at DateTime DEFAULT now(),
is_active UInt8 DEFAULT 1
)
ENGINE = ReplacingMergeTree(created_at)
ORDER BY (family, version_min)
SETTINGS index_granularity = 8192;
-- Dictionnaire ClickHouse pour un lookup rapide par famille
CREATE DICTIONARY IF NOT EXISTS ja4_processing.dict_browser_h2_signatures
(
family String,
version_min String,
version_max String,
h2_settings_json String,
h2_settings_forbidden String,
h2_window_update UInt32 DEFAULT 0,
h2_window_update_tolerance UInt32 DEFAULT 1000,
h2_priority_expected UInt8 DEFAULT 0,
pseudo_header_order String DEFAULT '',
tls_json String DEFAULT '{}',
headers_required String DEFAULT '[]',
headers_forbidden String DEFAULT '[]',
is_active UInt8 DEFAULT 1
)
PRIMARY KEY (family)
SOURCE(CLICKHOUSE(TABLE 'browser_h2_signatures' DB 'ja4_processing'))
LIFETIME(MIN 82800 MAX 86400) -- Rechargement toutes les ~24h (82800-86400 secondes)
LAYOUT(COMPLEX_KEY_HASHED());

View File

@ -1 +1 @@
{"dependencies":[["racc",["~> 1.4"]],["nokogiri",["~> 1.6"]],["diffy",[">= 0"]],["rexml",[">= 0"]],["xml-simple",[">= 0"]],["logger",[">= 0"]],["mime-types-data",["~> 3.2025",">= 3.2025.0507"]],["mime-types",[">= 0"]],["io-console",["~> 0.5"]],["reline",[">= 0"]],["formatador",[">= 0.2","< 2.0"]],["excon",["~> 1.0"]],["builder",[">= 0"]],["fog-core",["~> 2"]],["ruby-libvirt",[">= 0.7.0"]],["json",[">= 0"]],["fog-xml",["~> 0.1.1"]],["multi_json",["~> 1.10"]],["fog-json",[">= 0"]],["fog-libvirt",[">= 0.6.0"]],["vagrant-libvirt",["= 0.12.2"]],["vagrant-qemu",["= 0.3.12"]]],"checksum":"8812dc95b590d4059a84fe716eaa6eea39b29aecb1c994c959de405ba3705361","vagrant_version":"2.4.9"}
{"dependencies":[["racc",["~> 1.4"]],["nokogiri",["~> 1.6"]],["diffy",[">= 0"]],["rexml",[">= 0"]],["xml-simple",[">= 0"]],["logger",[">= 0"]],["mime-types-data",["~> 3.2025",">= 3.2025.0507"]],["mime-types",[">= 0"]],["io-console",["~> 0.5"]],["reline",[">= 0"]],["formatador",[">= 0.2","< 2.0"]],["excon",["~> 1.0"]],["builder",[">= 0"]],["fog-core",["~> 2"]],["ruby-libvirt",[">= 0.7.0"]],["json",[">= 0"]],["fog-xml",["~> 0.1.1"]],["multi_json",["~> 1.10"]],["fog-json",[">= 0"]],["fog-libvirt",[">= 0.6.0"]],["vagrant-libvirt",["= 0.12.2"]]],"checksum":"b69e3c206e3d26fb25b062fbb15a80865764c5efb5e9cce85cfac1f745449033","vagrant_version":"2.4.9"}

61
tests/vm/Vagrantfile vendored
View File

@ -1,7 +1,12 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
# =============================================================================
# Vagrantfile — VM de test ja4ebpf sur Rocky Linux 9
# Vagrantfile — VMs de test ja4ebpf multi-distro
#
# 3 VMs pour les tests unitaires eBPF sur kernel réel :
# - centos8 : CentOS 8 (el8)
# - rocky9 : Rocky Linux 9 (el9)
# - rocky10 : Rocky Linux 10 (el10)
#
# Fournit un environnement kernel complet pour les tests eBPF :
# - tracefs / debugfs montés
@ -14,42 +19,53 @@
# sudo usermod -aG libvirt,kvm $USER # puis se reconnecter
#
# Utilisation :
# vagrant up # créer + provisionner (~5 min)
# vagrant ssh # connexion SSH
# make test-vm-nginx # lancer les tests depuis le host
# vagrant destroy -f # détruire la VM
# vagrant up # créer + provisionner toutes les VMs
# vagrant up rocky9 # créer une seule VM
# vagrant ssh rocky9 # connexion SSH
# make test-vm-nginx # test nginx sur Rocky 9 (défaut)
# make test-vm-all # tous les tests sur Rocky 9
# ./tests/vm/run-all-vms.sh # tests sur les 3 VMs
# vagrant destroy -f # détruire toutes les VMs
# =============================================================================
Vagrant.configure("2") do |config|
# ── Box Rocky Linux 9 avec provider libvirt (image qcow2) ─────────────────
config.vm.box = "generic/rocky9"
# ── Désactiver synced_folder par défaut (utiliser rsync explicitement) ─────
# ── Désactiver synced_folder par défaut ─────────────────────────────────────
config.vm.synced_folder ".", "/vagrant", disabled: true
# ── Provider libvirt ───────────────────────────────────────────────────────
# ── Provider libvirt commun ─────────────────────────────────────────────────
config.vm.provider :libvirt do |v|
v.cpus = 4
v.memory = 4096
v.nested = false
v.cpu_mode = "host-passthrough" # expose les capacités CPU hôte → KVM perf
v.cpu_mode = "host-passthrough"
v.driver = "kvm"
v.disk_bus = "virtio"
v.nic_model_type = "virtio"
end
# ── Synchronisation du projet via rsync ────────────────────────────────────
# ── Synchronisation du projet via rsync ────────────────────────────────────
config.vm.synced_folder "../..", "/ja4-platform",
type: "rsync",
rsync__exclude: [".git/", "old/", "*.rpm", "dist/"]
# ── Provisioning ───────────────────────────────────────────────────────────
config.vm.provision "shell", path: "provision.sh"
# ═══════════════════════════════════════════════════════════════════════════
# VM 1 : CentOS 8 (el8)
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "centos8", autostart: false do |node|
node.vm.box = "centos/8"
node.vm.provision "shell", path: "provision-el8.sh"
node.vm.post_up_message = "VM centos8 prête ! Tests : make test-vm-centos8"
end
# ── Message post-démarrage ─────────────────────────────────────────────────
config.vm.post_up_message = <<~MSG
VM ja4ebpf prête !
# ═══════════════════════════════════════════════════════════════════════════
# VM 2 : Rocky Linux 9 (el9) — VM par défaut
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "rocky9", primary: true do |node|
node.vm.box = "generic/rocky9"
node.vm.provision "shell", path: "provision.sh"
node.vm.post_up_message = <<~MSG
VM rocky9 prête !
Depuis la racine du projet :
make vm-ssh # connexion interactive
@ -57,4 +73,15 @@ Vagrant.configure("2") do |config|
make test-vm-all # tous les tests
make vm-rebuild-ja4ebpf # resynchroniser + recompiler après modif
MSG
end
# ═══════════════════════════════════════════════════════════════════════════
# VM 3 : Rocky Linux 10 (el10)
# ═══════════════════════════════════════════════════════════════════════════
config.vm.define "rocky10", autostart: false do |node|
node.vm.box = "almalinux/10"
node.vm.provision "shell", path: "provision.sh"
node.vm.post_up_message = "VM rocky10 prête ! Tests : make test-vm-rocky10"
end
end

107
tests/vm/debug-mode-host.sh Normal file
View File

@ -0,0 +1,107 @@
#!/usr/bin/env bash
# debug-mode-host.sh — Test debug ja4ebpf avec trafic host→VM
# Usage: ./debug-mode-host.sh rocky9
set -euo pipefail
VM="${1:-rocky9}"
cd "$(dirname "$0")"
echo "=== [1] Setup VM: nginx + ja4ebpf debug ==="
vagrant ssh "$VM" -- "sudo bash -c '
PATH=/usr/local/bin:\$PATH
# Install debug binary
cp /tmp/ja4ebpf-debug /usr/local/bin/ja4ebpf
chmod +x /usr/local/bin/ja4ebpf
# Start nginx
nginx -s stop 2>/dev/null || true; sleep 1
mkdir -p /run/nginx /var/www/html
echo {\"ok\":true} > /var/www/html/health
cat > /etc/nginx/nginx.conf << \"NEOF\"
worker_processes 1;
events { worker_connections 64; }
http {
server {
listen 80;
listen 443 ssl;
ssl_certificate /etc/pki/tls/certs/nginx.crt;
ssl_certificate_key /etc/pki/tls/private/nginx.key;
root /var/www/html;
}
}
NEOF
openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \
-keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null
nginx
# Start ja4ebpf debug
pkill ja4ebpf 2>/dev/null || true; sleep 1
cat > /tmp/ja4-debug.yml << \"YEOF\"
interface: eth0
ssl_lib_path: \"/usr/lib64/libssl.so.3\"
debug: true
clickhouse:
dsn: \"clickhouse://default:@127.0.0.1:9000/ja4_logs\"
batch_size: 50
flush_secs: 1
correlation:
timeout_ms: 500
slowloris_ms: 10000
log:
level: \"debug\"
format: \"text\"
YEOF
JA4EBPF_CONFIG=/tmp/ja4-debug.yml ja4ebpf > /tmp/ja4-debug.log 2>&1 &
sleep 3
PID=\$(pgrep ja4ebpf || echo NONE)
echo \" ja4ebpf PID=\$PID\"
if [ \"\$PID\" = \"NONE\" ]; then cat /tmp/ja4-debug.log; exit 1; fi
# Open firewall
firewall-cmd --add-service=http --add-service=https 2>/dev/null || true
# Show eth0 IP
ip -4 addr show eth0 | awk \"/inet /{sub(/\\/.*/,\"\",\\\$2); print \\\" eth0 IP: \\\"\\\$2; exit}\"
'" 2>&1
echo ""
echo "=== [2] Get VM IP ==="
VM_IP=$(vagrant ssh "$VM" -- "ip -4 addr show eth0" 2>/dev/null | awk '/inet /{sub(/\/.*/,"",$2); print $2; exit}')
echo " VM IP: $VM_IP"
if [ -z "$VM_IP" ]; then
echo " ERROR: no eth0 IP found"
exit 1
fi
echo ""
echo "=== [3] Generate traffic from HOST to VM ==="
for i in $(seq 1 3); do
curl -sf "http://$VM_IP/health" -o /dev/null -w " HTTP $i: %{http_code}\n" 2>&1 || echo " HTTP $i: FAIL"
curl -skf "https://$VM_IP/health" -o /dev/null -w " HTTPS $i: %{http_code}\n" 2>&1 || echo " HTTPS $i: FAIL"
done
echo ""
echo "=== [4] Wait for debug dump (8s) ==="
sleep 8
echo ""
echo "=== [5] Collect results ==="
vagrant ssh "$VM" -- "sudo bash -c '
echo \" ja4ebpf: \$(pgrep ja4ebpf > /dev/null && echo alive || echo DEAD)\"
echo \"\"
echo \" === BPF stats ===\"
STATS_MAP_ID=\$(bpftool map show name xdp_stats 2>/dev/null | grep -oP \"id \K\d+\" || echo NONE)
if [ \"\$STATS_MAP_ID\" != \"NONE\" ]; then
bpftool map dump id \$STATS_MAP_ID 2>/dev/null | sed \"s/^/ /\"
else
echo \" xdp_stats map not found!\"
fi
echo \"\"
echo \" === Log tail ===\"
tail -30 /tmp/ja4-debug.log | sed \"s/^/ /\"
# Cleanup
pkill ja4ebpf 2>/dev/null || true
nginx -s stop 2>/dev/null || true
'" 2>&1

98
tests/vm/debug-mode.sh Normal file
View File

@ -0,0 +1,98 @@
#!/usr/bin/env bash
# debug-mode.sh — Test rapide du mode debug ja4ebpf sur une VM
# Usage: vagrant upload /ja4-platform/tests/vm/debug-mode.sh /tmp/debug-mode.sh rocky9
# vagrant ssh rocky9 -- 'sudo bash /tmp/debug-mode.sh'
set -euo pipefail
echo "=== [1] Install debug binary ==="
cp /tmp/ja4ebpf-debug /usr/local/bin/ja4ebpf
chmod +x /usr/local/bin/ja4ebpf
echo "=== [2] Start nginx ==="
nginx -s stop 2>/dev/null || true; sleep 1
mkdir -p /run/nginx /var/www/html
echo '{"ok":true}' > /var/www/html/health
# Minimal nginx config for TLS
cat > /etc/nginx/nginx.conf << 'NEOF'
worker_processes 1;
events { worker_connections 64; }
http {
server {
listen 80;
listen 443 ssl;
ssl_certificate /etc/pki/tls/certs/nginx.crt;
ssl_certificate_key /etc/pki/tls/private/nginx.key;
root /var/www/html;
}
}
NEOF
openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \
-keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null
nginx && echo " nginx ready"
echo "=== [3] Start ja4ebpf in DEBUG mode ==="
pkill ja4ebpf 2>/dev/null || true; sleep 1
# Config with debug=true — no ClickHouse needed in debug mode
cat > /tmp/ja4-debug.yml << 'YEOF'
interface: eth0
ssl_lib_path: "/usr/lib64/libssl.so.3"
debug: true
clickhouse:
dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs"
batch_size: 50
flush_secs: 1
correlation:
timeout_ms: 500
slowloris_ms: 10000
log:
level: "debug"
format: "text"
YEOF
JA4EBPF_CONFIG=/tmp/ja4-debug.yml ja4ebpf > /tmp/ja4-debug.log 2>&1 &
sleep 3
JA4PID=$(pgrep ja4ebpf || echo NONE)
if [ "$JA4PID" = "NONE" ]; then
echo " ja4ebpf DEAD! Log:"
cat /tmp/ja4-debug.log
exit 1
fi
echo " ja4ebpf PID=$JA4PID"
# Verify XDP
echo " XDP check:"
ip -d link show dev eth0 | grep -i xdp || echo " (no XDP attached)"
echo "=== [4] Generate traffic ==="
ETH0_IP=$(ip -4 addr show eth0 | awk '/inet /{sub(/\/.*/,"",$2); print $2; exit}')
echo " eth0 IP: $ETH0_IP"
# HTTP traffic from localhost via eth0 IP
for i in $(seq 1 5); do
curl -sf "http://$ETH0_IP/health" -o /dev/null 2>&1 && echo " HTTP $i: OK" || echo " HTTP $i: FAIL"
curl -skf "https://$ETH0_IP/health" -o /dev/null 2>&1 && echo " HTTPS $i: OK" || echo " HTTPS $i: FAIL"
done
echo "=== [5] Wait for debug dump (6s) ==="
sleep 6
echo "=== [6] Results ==="
echo " ja4ebpf: $(pgrep ja4ebpf > /dev/null && echo alive || echo DEAD)"
echo ""
echo " === Last 20 lines of log ==="
tail -20 /tmp/ja4-debug.log | sed 's/^/ /'
echo ""
echo " === BPF map stats (bpftool) ==="
STATS_MAP_ID=$(bpftool map show name xdp_stats 2>/dev/null | grep -oP 'id \K\d+' || echo NONE)
if [ "$STATS_MAP_ID" != "NONE" ]; then
bpftool map dump id $STATS_MAP_ID 2>/dev/null | head -40 | sed 's/^/ /'
else
echo " xdp_stats map not found!"
fi
# Cleanup
pkill ja4ebpf 2>/dev/null || true
nginx -s stop 2>/dev/null || true

96
tests/vm/debug-test.sh Normal file
View File

@ -0,0 +1,96 @@
#!/usr/bin/env bash
# Debug script — start everything and check XDP stats
set -euo pipefail
export PATH=/usr/local/bin:/usr/local/go/bin:$PATH
echo "=== Starting ClickHouse ==="
docker rm -f ja4-clickhouse 2>/dev/null || true
docker run -d --name ja4-clickhouse -p 8123:8123 -p 9000:9000 \
-e CLICKHOUSE_DB=ja4_processing -e CLICKHOUSE_USER=default -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \
-v /ja4-platform/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh \
-v /ja4-platform/tests/integration/platform/csv-stubs:/var/lib/clickhouse/user_files \
-v /ja4-platform/shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro \
-v /ja4-platform/shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro \
-v /ja4-platform/shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro \
-v /ja4-platform/shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro \
-v /ja4-platform/shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro \
-v /ja4-platform/shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro \
-v /ja4-platform/shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro \
-v /ja4-platform/shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro \
-v /ja4-platform/shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro \
clickhouse/clickhouse-server:24.8
for i in $(seq 1 30); do curl -sf http://localhost:8123/ping >/dev/null 2>&1 && break; sleep 2; done
echo "CH ready: $?"
echo "=== Starting nginx ==="
pkill nginx 2>/dev/null || true; sleep 1
mkdir -p /run/nginx /var/www/html
echo '{"ok":true}' > /var/www/html/health
cp /ja4-platform/tests/integration/nginx/platform/nginx.conf /etc/nginx/nginx.conf
openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \
-keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null
nginx && echo "nginx OK"
echo "=== Starting ja4ebpf ==="
pkill ja4ebpf 2>/dev/null || true; sleep 1
cat > /tmp/ja4.yml << 'YEOF'
interface: eth0
ssl_lib_path: "/usr/lib64/libssl.so.3"
clickhouse:
dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs"
batch_size: 50
flush_secs: 1
correlation:
timeout_ms: 500
slowloris_ms: 10000
log:
level: "debug"
format: "json"
YEOF
JA4EBPF_CONFIG=/tmp/ja4.yml ja4ebpf > /tmp/ja4.log 2>&1 &
sleep 4
JA4PID=$(pgrep ja4ebpf || echo NONE)
echo "ja4ebpf PID: $JA4PID"
if [ "$JA4PID" = "NONE" ]; then
echo "DEAD! Logs:"
cat /tmp/ja4.log
exit 1
fi
echo "=== XDP status ==="
ip link show dev eth0 | grep -i xdp
echo "=== Prog stats ==="
bpftool prog show name capture_xdp 2>/dev/null || echo "no prog"
echo ""
echo "=== Waiting for external traffic ==="
echo "Send traffic from host to $(ip -4 addr show eth0 | awk '/inet /{sub(/\/.*/, "", $2); print $2}')"
echo "After sending, press Enter or wait 60s..."
# Wait for signal or timeout
for i in $(seq 1 60); do
[ -f /tmp/traffic-done ] && break
sleep 1
done
echo "=== After traffic ==="
pgrep ja4ebpf && echo "ja4ebpf still alive" || echo "ja4ebpf DEAD"
bpftool prog show name capture_xdp 2>/dev/null || echo "no prog"
echo "=== Raw data count ==="
curl -sf "http://localhost:8123/?database=ja4_logs" --data-urlencode "query=SELECT count() FROM http_logs_raw" 2>/dev/null || echo "0"
echo "=== ja4ebpf logs ==="
cat /tmp/ja4.log
# Cleanup
pkill ja4ebpf 2>/dev/null; nginx -s stop 2>/dev/null; docker rm -f ja4-clickhouse 2>/dev/null

111
tests/vm/debug-xdp.sh Normal file
View File

@ -0,0 +1,111 @@
#!/usr/bin/env bash
# debug-xdp.sh — Test XDP + host traffic en une seule session SSH
# Usage: vagrant ssh rocky9 -- 'sudo bash -c "PATH=/usr/local/bin:$PATH /ja4-platform/tests/vm/debug-xdp.sh"'
set -euo pipefail
export PATH=/usr/local/bin:/usr/local/go/bin:$PATH
STACK="${1:-nginx}"
# === Start ClickHouse ===
echo "[1] Starting ClickHouse..."
docker rm -f ja4-clickhouse 2>/dev/null || true
docker run -d --name ja4-clickhouse -p 8123:8123 -p 9000:9000 \
-e CLICKHOUSE_DB=ja4_processing -e CLICKHOUSE_USER=default \
-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \
-v /ja4-platform/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh \
-v /ja4-platform/tests/integration/platform/csv-stubs:/var/lib/clickhouse/user_files \
-v /ja4-platform/shared/clickhouse/00_database.sql:/initdb-src/00_database.sql:ro \
-v /ja4-platform/shared/clickhouse/01_raw_tables.sql:/initdb-src/01_raw_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/02_dictionaries.sql:/initdb-src/02_dictionaries.sql:ro \
-v /ja4-platform/shared/clickhouse/03_anubis_tables.sql:/initdb-src/03_anubis_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/04_mv_http_logs.sql:/initdb-src/04_mv_http_logs.sql:ro \
-v /ja4-platform/shared/clickhouse/05_aggregation_tables.sql:/initdb-src/05_aggregation_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/06_ml_tables.sql:/initdb-src/06_ml_tables.sql:ro \
-v /ja4-platform/shared/clickhouse/07_ai_features_view.sql:/initdb-src/07_ai_features_view.sql:ro \
-v /ja4-platform/shared/clickhouse/08_users.sql:/initdb-src/08_users.sql:ro \
-v /ja4-platform/shared/clickhouse/09_audit_table.sql:/initdb-src/09_audit_table.sql:ro \
-v /ja4-platform/shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro \
-v /ja4-platform/shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro \
-v /ja4-platform/shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro \
clickhouse/clickhouse-server:24.8 >/dev/null
for i in $(seq 1 30); do curl -sf http://localhost:8123/ping >/dev/null 2>&1 && break; sleep 2; done
echo " ClickHouse ready"
# === Start nginx ===
echo "[2] Starting nginx..."
nginx -s stop 2>/dev/null || true; sleep 1
mkdir -p /run/nginx /var/www/html
echo '{"ok":true}' > /var/www/html/health
cp /ja4-platform/tests/integration/nginx/platform/nginx.conf /etc/nginx/nginx.conf
openssl req -x509 -nodes -days 365 -subj /CN=test -newkey rsa:2048 \
-keyout /etc/pki/tls/private/nginx.key -out /etc/pki/tls/certs/nginx.crt 2>/dev/null
nginx && echo " nginx ready"
# === Start ja4ebpf ===
echo "[3] Starting ja4ebpf..."
pkill ja4ebpf 2>/dev/null || true; sleep 1
cat > /tmp/ja4.yml << 'YEOF'
interface: eth0
ssl_lib_path: "/usr/lib64/libssl.so.3"
clickhouse:
dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs"
batch_size: 50
flush_secs: 1
correlation:
timeout_ms: 500
slowloris_ms: 10000
log:
level: "debug"
format: "json"
YEOF
JA4EBPF_CONFIG=/tmp/ja4.yml ja4ebpf > /tmp/ja4.log 2>&1 &
sleep 3
JA4PID=$(pgrep ja4ebpf || echo NONE)
if [ "$JA4PID" = "NONE" ]; then
echo " ja4ebpf DEAD!"; cat /tmp/ja4.log; exit 1
fi
echo " ja4ebpf PID=$JA4PID"
# Verify XDP
XDP_INFO=$(ip link show dev eth0 | grep "prog/xdp" || echo NONE)
echo " XDP: $XDP_INFO"
# Show eth0 IP
ETH0_IP=$(ip -4 addr show eth0 | awk '/inet /{sub(/\/.*/,"",$2); print $2; exit}')
echo ""
echo "╔══════════════════════════════════════╗"
echo "║ Services prêts — IP: $ETH0_IP"
echo "║ Attente trafic host (60s max)..."
echo "╚══════════════════════════════════════╝"
# Wait for host traffic signal
for i in $(seq 1 60); do
[ -f /tmp/traffic-done ] && break
sleep 1
done
# Check prog run count
echo "[4] Checking results..."
echo " ja4ebpf: $(pgrep ja4ebpf && echo alive || echo DEAD)"
bpftool prog show name capture_xdp 2>/dev/null | head -5
# Check raw data
RAW=$(curl -sf "http://localhost:8123/?database=ja4_logs" --data-urlencode "query=SELECT count() FROM http_logs_raw" 2>/dev/null || echo "0")
echo " http_logs_raw: $RAW lignes"
# ja4ebpf logs
echo " Logs:"
tail -5 /tmp/ja4.log | sed 's/^/ /'
# Cleanup
pkill ja4ebpf 2>/dev/null; nginx -s stop 2>/dev/null
docker rm -f ja4-clickhouse 2>/dev/null
if [ "${RAW:-0}" -gt 0 ] 2>/dev/null; then
echo ""
echo " SUCCESS: $RAW rows captured"
exit 0
else
echo ""
echo " FAIL: 0 rows captured"
exit 1
fi

65
tests/vm/provision-el8.sh Executable file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env bash
# =============================================================================
# provision-el8.sh — Provisionnement CentOS 8 (dépôts archivés vault)
#
# CentOS 8 est EOL depuis juin 2024. Les dépôts sont sur vault.centos.org.
# =============================================================================
set -euo pipefail
log() { echo "[provision] $(date +%H:%M:%S) $*"; }
# ── 1. Rediriger les dépôts vers vault.centos.org ─────────────────────────────
log "Configuration des dépôts CentOS 8 vault..."
sed -i 's|^mirrorlist=|#mirrorlist=|' /etc/yum.repos.d/CentOS-*.repo 2>/dev/null || true
sed -i 's|^#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|' /etc/yum.repos.d/CentOS-*.repo 2>/dev/null || true
dnf clean all
dnf update -y --quiet
# ── 2. Toolchain eBPF ────────────────────────────────────────────────────────
log "Installation toolchain eBPF..."
dnf install -y \
clang llvm libbpf-devel bpftool \
kernel-devel-$(uname -r) \
make git curl tar gzip \
epel-release dnf-plugins-core || true
# ── 3. Go ─────────────────────────────────────────────────────────────────────
log "Installation de Go..."
GO_VERSION="1.24.3"
if ! command -v go &>/dev/null || [[ "$(go version 2>/dev/null | awk '{print $3}')" != "go${GO_VERSION}" ]]; then
curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -o /tmp/go.tar.gz
rm -rf /usr/local/go
tar -C /usr/local -xzf /tmp/go.tar.gz
rm /tmp/go.tar.gz
fi
cat > /etc/profile.d/go.sh << 'EOF'
export PATH="/usr/local/go/bin:$PATH"
export GOPATH="/home/vagrant/go"
EOF
# ── 4. Serveurs web (nginx + httpd) + TLS + hitch + varnish ────────────────────
log "Installation des serveurs web et reverse proxy..."
dnf install -y nginx openssl curl
dnf install -y httpd mod_ssl || true
dnf install -y hitch varnish || true
# ── 5. Python3 + outils de test ──────────────────────────────────────────────
log "Installation Python3 et outils de test..."
dnf install -y python3 python3-pip
pip3 install --quiet "httpx[http2]" requests 2>/dev/null || pip3 install --quiet httpx requests
# ── 6. Montage tracefs + debugfs ─────────────────────────────────────────────
log "Configuration des pseudo-systèmes de fichiers eBPF..."
mount -t tracefs tracefs /sys/kernel/tracing 2>/dev/null || true
mount -t debugfs debugfs /sys/kernel/debug 2>/dev/null || true
# ── 7. Build ja4ebpf ─────────────────────────────────────────────────────────
log "Build initial de ja4ebpf..."
export PATH="/usr/local/go/bin:$PATH"
cd /ja4-platform/services/ja4ebpf
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -5 || log "go generate: erreur (normal si vmlinux.h absent)"
GOWORK=off CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
go build -ldflags="-s -w" -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/ 2>&1 | tail -5
log "Provisionnement CentOS 8 terminé !"

View File

@ -50,14 +50,31 @@ EOF
log "Installation de Docker..."
dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo
dnf install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
# Sur el10+ (kernel 6.12+), nf_tables a des incompatibilités avec iptables-nft.
# Désactiver la gestion iptables par Docker pour éviter l'échec au démarrage.
if ! systemctl start docker 2>/dev/null; then
log "Docker: fallback iptables=false pour kernel $(uname -r)"
mkdir -p /etc/docker
echo '{"iptables": false}' > /etc/docker/daemon.json
fi
systemctl enable --now docker
usermod -aG docker vagrant
# Accès sans sudo pour vagrant
chmod 666 /var/run/docker.sock || true
# ── 5. nginx + openssl ───────────────────────────────────────────────────────
log "Installation de nginx..."
# ── 5. Serveurs web (nginx + httpd) + TLS + hitch + varnish ─────────────────────
log "Installation des serveurs web et reverse proxy..."
dnf install -y nginx openssl curl
dnf install -y httpd mod_ssl
dnf install -y hitch varnish
# Ouvrir les ports HTTP/HTTPS dans le firewall
log "Configuration firewall..."
firewall-cmd --add-service=http --add-service=https --permanent 2>/dev/null || true
firewall-cmd --add-port=80/tcp --add-port=443/tcp --permanent 2>/dev/null || true
firewall-cmd --reload 2>/dev/null || true
# ── 6. Python3 + outils de test ──────────────────────────────────────────────
log "Installation Python3 et outils de test..."

119
tests/vm/run-test-from-host.sh Executable file
View File

@ -0,0 +1,119 @@
#!/usr/bin/env bash
# =============================================================================
# run-test-from-host.sh — Orchestrateur de test VM depuis le host
#
# Lance le test complet d'une stack sur une VM :
# 1. Rsync les fichiers
# 2. Démarre les services dans la VM (en background via SSH)
# 3. Génère le trafic depuis le HOST vers l'IP eth0 de la VM
# 4. Lance la vérification dans la VM
#
# Usage :
# ./tests/vm/run-test-from-host.sh rocky9 nginx
# ./tests/vm/run-test-from-host.sh centos8 apache
# make test-vm-nginx
# =============================================================================
set -euo pipefail
VM="${1:-rocky9}"
STACK="${2:-nginx}"
VM_DIR="$(cd "$(dirname "$0")" && pwd)"
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m'
BOLD='\033[1m'
log() { echo -e "${BOLD}[$VM/$STACK]${RESET} $(date +%H:%M:%S) $*"; }
pass() { echo -e " ${GREEN}PASS${RESET} $*"; }
fail() { echo -e " ${RED}FAIL${RESET} $*"; }
cd "$VM_DIR"
# ── 1. Synchroniser les fichiers ─────────────────────────────────────────────
log "Rsync fichiers vers $VM..."
vagrant rsync "$VM"
# ── 2. Obtenir l'IP eth0 de la VM ────────────────────────────────────────────
VM_IP=$(vagrant ssh "$VM" -- 'ip -4 addr show eth0' 2>/dev/null \
| awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}')
if [ -z "$VM_IP" ]; then
fail "Impossible d'obtenir l'IP eth0 de $VM"
exit 1
fi
log "IP eth0 : $VM_IP"
# ── 3. Démarrer les services dans la VM (en background) ──────────────────────
log "Démarrage des services dans $VM ($STACK)..."
# Nettoyer le signal de l'itération précédente
vagrant ssh "$VM" -- 'sudo rm -f /tmp/ja4ebpf-traffic-done' 2>/dev/null || true
# Lancer le script de test en mode "start" dans la VM
# Le script attendra le signal /tmp/ja4ebpf-traffic-done
vagrant ssh "$VM" -- "sudo bash /ja4-platform/tests/vm/run-tests-vm.sh $STACK start" &
VM_PID=$!
# ── 4. Attendre que les services soient prêts ────────────────────────────────
log "Attente démarrage des services (30s)..."
sleep 30
# ── 5. Vérifier que les services répondent ───────────────────────────────────
log "Vérification connectivité..."
if curl -sf "http://$VM_IP/health" >/dev/null 2>&1; then
pass "HTTP $VM_IP:80 OK"
else
fail "HTTP $VM_IP:80 injoignable"
fi
if curl -sf -k "https://$VM_IP/health" >/dev/null 2>&1; then
pass "HTTPS $VM_IP:443 OK"
else
fail "HTTPS $VM_IP:443 injoignable"
fi
# ── 6. Générer le trafic depuis le host ──────────────────────────────────────
log "Génération du trafic host → $VM_IP..."
for path in / /health /data /api/users; do
curl -sf -k "https://$VM_IP$path" >/dev/null 2>&1 || true
curl -sf "http://$VM_IP$path" >/dev/null 2>&1 || true
curl -sf -k -X POST "https://$VM_IP/api/data" -d '{"test":1}' >/dev/null 2>&1 || true
curl -sf -k -X PUT "https://$VM_IP/data" >/dev/null 2>&1 || true
curl -sf -k -X DELETE "https://$VM_IP/data/1" >/dev/null 2>&1 || true
curl -sf -k -X HEAD "https://$VM_IP$path" >/dev/null 2>&1 || true
done
# HTTP/2 via Python si disponible
if python3 -c "import httpx" 2>/dev/null; then
python3 -c "
import httpx, ssl, warnings
warnings.filterwarnings('ignore')
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with httpx.Client(http2=True, verify=False) as c:
for p in ['/', '/health', '/data']:
try: c.get('https://$VM_IP' + p)
except: pass
" 2>/dev/null && pass "HTTP/2 généré" || true
fi
log "Attente flush ja4ebpf (15s)..."
sleep 15
# ── 7. Signaler à la VM de lancer la vérification ────────────────────────────
log "Signal de vérification..."
vagrant ssh "$VM" -- 'sudo touch /tmp/ja4ebpf-traffic-done' 2>/dev/null
# ── 8. Attendre la fin du processus VM ───────────────────────────────────────
log "Attente résultat..."
wait $VM_PID 2>/dev/null
RESULT=$?
if [ $RESULT -eq 0 ]; then
echo ""
echo -e " ${GREEN}${BOLD}$VM/$STACK : SUCCÈS${RESET}"
else
echo ""
echo -e " ${RED}${BOLD}$VM/$STACK : ÉCHEC (code $RESULT)${RESET}"
fi
exit $RESULT

View File

@ -1,142 +1,114 @@
#!/usr/bin/env bash
# =============================================================================
# run-tests-vm.sh — Lance la stack de test complète dans la VM Rocky Linux 9
# run-tests-vm.sh — Tests ja4ebpf multi-stack dans une VM Vagrant
#
# Ce script s'exécute DANS la VM (via vagrant ssh ou vagrant provision).
# Il ne peut pas tourner dans Docker — il requiert un vrai kernel pour eBPF.
# Architecture :
# Phase 1 (dans la VM) : démarrer ClickHouse, serveur web, ja4ebpf
# Phase 2 (depuis le host) : générer du trafic vers l'IP eth0 de la VM
# Phase 3 (dans la VM) : vérifier les données dans ClickHouse
#
# Usage (depuis le host) :
# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh nginx'
# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh all'
# Stacks supportées :
# nginx — nginx avec TLS (HTTP/1.1 + HTTP/2)
# apache — Apache httpd avec TLS (HTTP/1.1 + HTTP/2)
# hitch-varnish — hitch (TLS) → Varnish (cache/H2) → backend Python
# all — exécute les 3 stacks séquentiellement
#
# Variables d'environnement :
# STACK : stack à tester (nginx|apache|nginx-varnish|hitch-varnish|all)
# KEEP_RUNNING : si "true", ne pas arrêter la stack après le test (défaut: false)
# Modes :
# start — démarrer les services (Phase 1)
# verify — vérifier les données (Phase 3)
# (défaut) — start + verify (le trafic doit être généré entre les deux)
#
# Usage (depuis le host via Makefile) :
# make test-vm-nginx
# make test-vm-apache
# make test-vm-hitch-varnish
# make test-vm-matrix
# =============================================================================
set -euo pipefail
# S'assurer que /usr/local/bin et go sont dans PATH (nécessaire pour sudo bash)
export PATH="/usr/local/bin:/usr/local/go/bin:$PATH"
STACK="${1:-nginx}"
MODE="${2:-full}" # start | verify | full
KEEP_RUNNING="${KEEP_RUNNING:-false}"
PROJECT="/ja4-platform"
RESULTS_DIR="/tmp/ja4-test-results"
# ── Couleurs ─────────────────────────────────────────────────────────────────
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m'
BOLD='\033[1m'
log() { echo -e "${BOLD}[$STACK]${RESET} $(date +%H:%M:%S) $*"; }
pass() { echo -e " ${GREEN}${RESET} $*"; ((PASS_COUNT++)) || true; }
fail() { echo -e " ${RED}${RESET} $*"; ((FAIL_COUNT++)) || true; }
warn() { echo -e " ${YELLOW}⚠️${RESET} $*"; ((WARN_COUNT++)) || true; }
pass() { echo -e " ${GREEN}PASS${RESET} $*"; ((PASS_COUNT++)) || true; }
fail() { echo -e " ${RED}FAIL${RESET} $*"; ((FAIL_COUNT++)) || true; }
warn() { echo -e " ${YELLOW}WARN${RESET} $*"; ((WARN_COUNT++)) || true; }
PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0
# ── Vérification prérequis ────────────────────────────────────────────────────
check_prerequisites() {
log "Vérification des prérequis..."
# ── Helpers communs ──────────────────────────────────────────────────────────
# eBPF capabilities
if [ ! -d /sys/kernel/tracing ]; then
fail "tracefs non monté — exécuter: sudo mount -t tracefs tracefs /sys/kernel/tracing"
exit 1
fi
if [ ! -d /sys/kernel/debug ]; then
fail "debugfs non monté"
exit 1
fi
command -v ja4ebpf >/dev/null 2>&1 || {
log "Rebuild ja4ebpf..."
cd "$PROJECT/services/ja4ebpf"
export PATH="/usr/local/go/bin:$PATH"
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3
GOWORK=off CGO_ENABLED=0 go build -o /tmp/ja4ebpf_new ./cmd/ja4ebpf/ && mv /tmp/ja4ebpf_new /usr/local/bin/ja4ebpf
}
command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; }
command -v nginx >/dev/null 2>&1 || { fail "nginx non installé"; exit 1; }
pass "Prérequis OK"
gen_tls_cert() {
local name="$1"
openssl req -x509 -nodes -days 365 -subj "/CN=platform.test" \
-newkey rsa:2048 \
-keyout "/etc/pki/tls/private/${name}.key" \
-out "/etc/pki/tls/certs/${name}.crt" 2>/dev/null
}
# ── Démarrage ClickHouse ──────────────────────────────────────────────────────
setup_docroot() {
mkdir -p /var/www/html
echo '{"status":"ok","stack":"'"$STACK"'"}' > /var/www/html/health
for p in data api/users api/data/test; do
mkdir -p "/var/www/html/$(dirname $p)"
echo '{"ok":true}' > "/var/www/html/$p"
done
}
get_eth0_ip() {
ip -4 addr show eth0 | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}' 2>/dev/null || echo ""
}
# ── ClickHouse ────────────────────────────────────────────────────────────────
start_clickhouse() {
log "Démarrage ClickHouse..."
docker rm -f ja4-clickhouse 2>/dev/null || true
CSV_DIR="$PROJECT/tests/integration/platform/csv-stubs"
docker run -d --name ja4-clickhouse \
-p 8123:8123 -p 9000:9000 \
-e CLICKHOUSE_DB=ja4_processing \
-e CLICKHOUSE_USER=default \
-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \
-v "$PROJECT/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh" \
-v "$CSV_DIR:/var/lib/clickhouse/user_files" \
$(for f in "$PROJECT/shared/clickhouse/"*.sql; do
echo "-v $f:/initdb-src/$(basename $f):ro"
done) \
clickhouse/clickhouse-server:24.8 2>&1 | tail -1
# Attendre que ClickHouse soit prêt
log "Attente ClickHouse (max 120s)..."
for i in $(seq 1 60); do
if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then
pass "ClickHouse prêt"
return 0
fi
curl -sf "http://localhost:8123/ping" >/dev/null 2>&1 && { pass "ClickHouse prêt"; return 0; }
sleep 2
done
fail "ClickHouse timeout"; exit 1
}
# ── Configuration nginx ────────────────────────────────────────────────────────
setup_nginx() {
log "Configuration nginx avec TLS..."
# Certificat auto-signé
openssl req -x509 -nodes -days 365 \
-subj "/CN=platform.test" \
-newkey rsa:2048 \
-keyout /etc/pki/tls/private/nginx.key \
-out /etc/pki/tls/certs/nginx.crt 2>/dev/null
# Copier la configuration de test
cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf
# Créer les fichiers de test
mkdir -p /var/www/html
# /run/nginx est un tmpfs recréé à chaque boot, nginx en a besoin pour son PID
mkdir -p /run/nginx
echo '{"status":"ok","stack":"nginx-vm"}' > /var/www/html/health
for p in data api/users api/data/test; do
mkdir -p "/var/www/html/$(dirname $p)"
echo '{"ok":true}' > "/var/www/html/$p"
done
nginx -t && nginx
# Attendre nginx
for i in $(seq 1 20); do
curl -sf http://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "nginx démarré"
}
# ── Démarrage ja4ebpf ─────────────────────────────────────────────────────────
# ── ja4ebpf ────────────────────────────────────────────────────────────────────
start_ja4ebpf() {
log "Démarrage ja4ebpf..."
pkill ja4ebpf 2>/dev/null || true
sleep 1
# Créer la config
cat > /tmp/ja4ebpf.yml << 'EOF'
local ssl_lib=""
for lib in /usr/lib64/libssl.so.3 /usr/lib64/libssl.so.1.1 /usr/lib/libssl.so.3 /usr/lib/libssl.so.1.1; do
[ -f "$lib" ] && { ssl_lib="$lib"; break; }
done
[ -z "$ssl_lib" ] && ssl_lib="/usr/lib64/libssl.so.3"
cat > /tmp/ja4ebpf.yml << EOF
interface: eth0
ssl_lib_path: "/usr/lib64/libssl.so.3"
ssl_lib_path: "${ssl_lib}"
clickhouse:
dsn: "clickhouse://default:@localhost:9000/ja4_logs"
dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs"
batch_size: 100
flush_secs: 1
correlation:
@ -147,168 +119,339 @@ log:
format: "json"
EOF
# Lancer avec les capabilities nécessaires
# Dans la VM (root), on peut lancer directement
JA4EBPF_CONFIG=/tmp/ja4ebpf.yml ja4ebpf > /tmp/ja4ebpf.log 2>&1 &
JA4EBPF_PID=$!
sleep 3
if ! kill -0 "$JA4EBPF_PID" 2>/dev/null; then
fail "ja4ebpf s'est arrêté immédiatement"
cat /tmp/ja4ebpf.log | tail -10
tail -10 /tmp/ja4ebpf.log
return 1
fi
log "ja4ebpf démarré (PID $JA4EBPF_PID)"
# Vérifier les uprobes dans tracefs
# Vérifier XDP
if ip link show dev eth0 2>/dev/null | grep -q "xdp"; then
local xdp_info
xdp_info=$(ip link show dev eth0 | grep "prog/xdp" | sed 's/^[[:space:]]*//')
pass "XDP attaché : $xdp_info"
else
warn "Aucun XDP sur eth0"
bpftool prog show name capture_xdp 2>/dev/null || true
fi
}
# ═════════════════════════════════════════════════════════════════════════════
# Stack : nginx
# ═════════════════════════════════════════════════════════════════════════════
setup_nginx() {
log "Configuration nginx avec TLS..."
gen_tls_cert nginx
setup_docroot
cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf
mkdir -p /run/nginx
nginx -t && nginx
for i in $(seq 1 20); do
curl -sf http://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "nginx démarré"
}
stop_nginx() { nginx -s stop 2>/dev/null || true; }
# ═════════════════════════════════════════════════════════════════════════════
# Stack : apache
# ═════════════════════════════════════════════════════════════════════════════
setup_apache() {
log "Configuration Apache httpd avec TLS..."
gen_tls_cert apache
setup_docroot
if command -v httpd >/dev/null 2>&1; then
if ! httpd -M 2>/dev/null | grep -q http2_module; then
echo "LoadModule http2_module modules/mod_http2.so" \
>> /etc/httpd/conf.modules.d/00-base.conf 2>/dev/null || true
fi
fi
mkdir -p /run/httpd /var/log/httpd
cp "$PROJECT/tests/integration/apache/platform/httpd-ssl.conf" \
/etc/httpd/conf.d/ssl.conf 2>/dev/null || true
httpd -t 2>&1 && httpd -DFOREGROUND &
sleep 2
for i in $(seq 1 20); do
curl -sf http://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "Apache httpd démarré"
}
stop_apache() { pkill httpd 2>/dev/null || true; }
# ═════════════════════════════════════════════════════════════════════════════
# Stack : hitch + varnish
# ═════════════════════════════════════════════════════════════════════════════
setup_hitch_varnish() {
log "Configuration hitch + Varnish..."
gen_tls_cert hitch
mkdir -p /etc/hitch
cat /etc/pki/tls/private/hitch.key /etc/pki/tls/certs/hitch.crt \
> /etc/hitch/hitch.pem
cat > /etc/hitch/hitch.conf << 'HCONF'
frontend = "[*]:443"
backend = "[127.0.0.1]:6081"
pem-file = "/etc/hitch/hitch.pem"
write-proxy-v1 = on
tls-protos = TLSv1.2 TLSv1.3
ciphers = "ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256"
alpn-protos = "h2,http/1.1"
workers = 2
user = "nobody"
daemon = off
log-level = 1
syslog = off
HCONF
mkdir -p /etc/varnish
cp "$PROJECT/tests/integration/hitch-varnish/platform/varnish.vcl" \
/etc/varnish/default.vcl 2>/dev/null || {
cat > /etc/varnish/default.vcl << 'VCL'
vcl 4.1;
backend default { .host = "127.0.0.1"; .port = "8080"; }
sub vcl_deliver {
set resp.http.Via = "1.1 varnish";
set resp.http.X-Client-IP = client.ip;
}
VCL
}
setup_docroot
# Backend HTTP (port 8080)
python3 -c "
import http.server, socketserver, json
class H(http.server.BaseHTTPRequestHandler):
def log_message(self, *a): pass
def do_GET(self):
body = json.dumps({'status':'ok','stack':'hitch-varnish','path':self.path}).encode()
self.send_response(200)
self.send_header('Content-Type','application/json')
self.send_header('Content-Length',len(body))
self.end_headers()
self.wfile.write(body)
def do_POST(self):
n = int(self.headers.get('Content-Length',0))
self.rfile.read(n)
body = b'{\"result\":\"accepted\"}'
self.send_response(200)
self.send_header('Content-Type','application/json')
self.send_header('Content-Length',len(body))
self.end_headers()
self.wfile.write(body)
with socketserver.TCPServer(('127.0.0.1', 8080), H) as s:
s.serve_forever()
" &
sleep 1
if grep -q "ssl" /sys/kernel/tracing/uprobe_events 2>/dev/null; then
pass "Uprobes SSL attachés dans tracefs"
else
warn "Uprobes non visibles dans tracefs (peuvent être actifs quand même)"
fi
# Vérifier accept4 tracepoint
if grep -q "accept4" /sys/kernel/tracing/events/syscalls 2>/dev/null; then
pass "Tracepoints accept4 disponibles"
else
warn "Tracepoints accept4 non trouvés"
fi
varnishd -F -f /etc/varnish/default.vcl \
-a "127.0.0.1:6081,PROXY" \
-p feature=+http2 \
-s malloc,64m \
-T 127.0.0.1:6082 &
sleep 2
hitch --config=/etc/hitch/hitch.conf &
sleep 2
for i in $(seq 1 20); do
curl -skf https://localhost/health >/dev/null 2>&1 && break
sleep 0.5
done
pass "hitch + Varnish démarrés"
}
# ── Génération de trafic ───────────────────────────────────────────────────────
generate_traffic() {
log "Génération du trafic (HTTP/1.0 + HTTP/1.1 + HTTP/2)..."
# Trafic HTTP/1.1 (HTTP)
for path in / /health /data /api/users; do
curl -sf "http://localhost$path" >/dev/null 2>&1 || true
curl -sf -X POST "http://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true
done
# Trafic HTTPS/1.1
for path in / /health /data /api/users; do
curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true
curl -sf -k -X POST "https://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true
curl -sf -k -X PUT "https://localhost/data" >/dev/null 2>&1 || true
curl -sf -k -X DELETE "https://localhost/data/1" >/dev/null 2>&1 || true
curl -sf -k -X HEAD "https://localhost$path" >/dev/null 2>&1 || true
done
# Trafic HTTP/2
if command -v python3 >/dev/null 2>&1 && python3 -c "import httpx" 2>/dev/null; then
python3 << 'PYEOF'
import httpx, ssl, warnings
warnings.filterwarnings("ignore")
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with httpx.Client(http2=True, verify=False) as client:
for path in ["/", "/health", "/data"]:
try: client.get(f"https://localhost{path}")
except: pass
try: client.post("https://localhost/api/data", json={"test": "h2"})
except: pass
PYEOF
pass "Trafic HTTP/2 généré"
fi
# Attendre le flush ja4ebpf → ClickHouse
log "Attente flush ja4ebpf (15s)..."
sleep 15
pass "Trafic généré"
stop_hitch_varnish() {
pkill hitch 2>/dev/null || true
pkill varnishd 2>/dev/null || true
pkill -f "TCPServer.*8080" 2>/dev/null || true
}
# ── Vérification ClickHouse ────────────────────────────────────────────────────
# ═════════════════════════════════════════════════════════════════════════════
# Vérification ClickHouse
# ═════════════════════════════════════════════════════════════════════════════
verify_db() {
log "Vérification des données dans ClickHouse..."
ch_query() {
curl -sf "http://localhost:8123/" \
--data-urlencode "query=$1" \
--data-urlencode "database=ja4_logs" \
-o /dev/null -w '%{http_code}' 2>/dev/null || echo "0"
}
ch_val() {
curl -sf "http://localhost:8123/?database=ja4_logs" \
--data-urlencode "query=$1" 2>/dev/null | tr -d ' \n' || echo "0"
}
# http_logs_raw (données brutes avant MV)
local raw_count
raw_count=$(ch_val "SELECT count() FROM http_logs_raw")
if [ "${raw_count:-0}" -gt 0 ] 2>/dev/null; then
pass "http_logs_raw : $raw_count lignes"
else
fail "http_logs_raw vide — ja4ebpf n'a rien capturé"
log " Logs ja4ebpf :"
tail -10 /tmp/ja4ebpf.log 2>/dev/null | sed 's/^/ /'
fi
# L3/L4
ttl=$(ch_val "SELECT count() FROM http_logs WHERE ip_meta_ttl > 0")
[ "${ttl:-0}" -gt 0 ] && pass "L3/L4 TTL capturé ($ttl lignes)" || fail "L3/L4 TTL absent"
[ "${ttl:-0}" -gt 0 ] 2>/dev/null && pass "L3/L4 TTL ($ttl)" || fail "L3/L4 TTL absent"
mss=$(ch_val "SELECT count() FROM http_logs WHERE tcp_meta_mss > 0")
[ "${mss:-0}" -gt 0 ] && pass "TCP MSS capturé ($mss lignes)" || fail "TCP MSS absent"
[ "${mss:-0}" -gt 0 ] 2>/dev/null && pass "TCP MSS ($mss)" || fail "TCP MSS absent"
# TLS
ja4=$(ch_val "SELECT count() FROM http_logs WHERE ja4 != ''")
[ "${ja4:-0}" -gt 0 ] && pass "JA4 fingerprint capturé ($ja4 lignes)" || fail "JA4 absent"
[ "${ja4:-0}" -gt 0 ] 2>/dev/null && pass "JA4 fingerprint ($ja4)" || fail "JA4 absent"
sni=$(ch_val "SELECT count() FROM http_logs WHERE tls_sni != ''")
[ "${sni:-0}" -gt 0 ] && pass "TLS SNI capturé ($sni lignes)" || warn "TLS SNI absent"
[ "${sni:-0}" -gt 0 ] 2>/dev/null && pass "TLS SNI ($sni)" || warn "TLS SNI absent"
# L7 HTTP — c'est ici que ça devrait marcher dans la VM
# L7 HTTP
method=$(ch_val "SELECT count() FROM http_logs WHERE method != ''")
[ "${method:-0}" -gt 0 ] && pass "L7 méthodes HTTP capturées ($method lignes)" \
|| fail "L7 méthodes HTTP ABSENT — uprobe SSL_read ne fonctionne pas"
[ "${method:-0}" -gt 0 ] 2>/dev/null && pass "L7 HTTP ($method)" || fail "L7 HTTP ABSENT"
path=$(ch_val "SELECT count() FROM http_logs WHERE path != ''")
[ "${path:-0}" -gt 0 ] && pass "L7 path HTTP capturé ($path lignes)" || fail "L7 path absent"
[ "${path:-0}" -gt 0 ] 2>/dev/null && pass "L7 path ($path)" || fail "L7 path absent"
status=$(ch_val "SELECT count() FROM http_logs WHERE status_code > 0")
[ "${status:-0}" -gt 0 ] && pass "status_code capturé ($status lignes)" || warn "status_code absent"
[ "${status:-0}" -gt 0 ] 2>/dev/null && pass "status_code ($status)" || warn "status_code absent"
sig=$(ch_val "SELECT count() FROM http_logs WHERE header_order_signature != ''")
[ "${sig:-0}" -gt 0 ] && pass "header_order_signature capturé ($sig lignes)" || warn "header_order_sig absent"
# Méthodes HTTP distinctes
methods=$(ch_val "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM http_logs WHERE method != '')")
log "Méthodes HTTP vues : $methods"
log "Méthodes HTTP : $methods"
# Lignes totales
total=$(ch_val "SELECT count() FROM http_logs")
pass "Total lignes http_logs : $total"
pass "Total http_logs : $total"
}
# ═════════════════════════════════════════════════════════════════════════════
# Nettoyage
# ═════════════════════════════════════════════════════════════════════════════
stop_stack() {
pkill ja4ebpf 2>/dev/null || true
case "$STACK" in
nginx) stop_nginx ;;
apache) stop_apache ;;
hitch-varnish) stop_hitch_varnish ;;
esac
docker rm -f ja4-clickhouse 2>/dev/null || true
}
# ── Nettoyage ─────────────────────────────────────────────────────────────────
cleanup() {
if [ "$KEEP_RUNNING" != "true" ]; then
log "Nettoyage..."
pkill ja4ebpf 2>/dev/null || true
nginx -s stop 2>/dev/null || true
docker rm -f ja4-clickhouse 2>/dev/null || true
stop_stack
fi
}
trap cleanup EXIT
# ── Main ──────────────────────────────────────────────────────────────────────
mkdir -p "$RESULTS_DIR"
# ═════════════════════════════════════════════════════════════════════════════
# Phase 1 : démarrage des services
# ═════════════════════════════════════════════════════════════════════════════
do_start() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Phase 1 : Démarrage — $STACK"
echo "╚══════════════════════════════════════════╝"
echo ""
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ ja4ebpf VM Test Suite — Rocky Linux 9 ║"
echo "╚══════════════════════════════════════════╝"
echo ""
# Vérifier prérequis
command -v ja4ebpf >/dev/null 2>&1 || {
log "Rebuild ja4ebpf..."
cd "$PROJECT/services/ja4ebpf"
GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3
GOWORK=off CGO_ENABLED=0 go build -o /tmp/ja4ebpf_new ./cmd/ja4ebpf/ && mv /tmp/ja4ebpf_new /usr/local/bin/ja4ebpf
}
command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; }
check_prerequisites
start_clickhouse
setup_nginx
start_ja4ebpf
generate_traffic
verify_db
start_clickhouse
echo ""
echo "════════════════════════════════════════════"
echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT"
if [ "$FAIL_COUNT" -eq 0 ]; then
echo -e " ${GREEN}${BOLD}Tous les tests réussis !${RESET}"
exit 0
else
echo -e " ${RED}${BOLD}$FAIL_COUNT tests échoués.${RESET}"
echo "Logs ja4ebpf :"
case "$STACK" in
nginx) setup_nginx ;;
apache) setup_apache ;;
hitch-varnish) setup_hitch_varnish ;;
*) fail "Stack inconnue: $STACK"; exit 1 ;;
esac
start_ja4ebpf
# Afficher l'IP pour le host
local eth0_ip
eth0_ip=$(get_eth0_ip)
echo ""
echo " ┌─────────────────────────────────────────┐"
echo " │ Services prêts ! │"
echo " │ IP eth0 : $eth0_ip"
echo " │ HTTP : http://$eth0_ip:80"
echo " │ HTTPS : https://$eth0_ip:443"
echo " └─────────────────────────────────────────┘"
echo ""
}
# ═════════════════════════════════════════════════════════════════════════════
# Phase 3 : vérification
# ═════════════════════════════════════════════════════════════════════════════
do_verify() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ Phase 3 : Vérification — $STACK"
echo "╚══════════════════════════════════════════╝"
echo ""
verify_db
echo ""
echo "════════════════════════════════════════════"
echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT"
if [ "$FAIL_COUNT" -eq 0 ]; then
echo -e " ${GREEN}${BOLD}$STACK : Tous les tests réussis !${RESET}"
else
echo -e " ${RED}${BOLD}$STACK : $FAIL_COUNT tests échoués${RESET}"
tail -20 /tmp/ja4ebpf.log 2>/dev/null || true
exit 1
fi
fi
}
# ═════════════════════════════════════════════════════════════════════════════
# Main
# ═════════════════════════════════════════════════════════════════════════════
case "$MODE" in
start)
do_start
echo " En attente de trafic depuis le host..."
# Attendre que le host génère le trafic
# Le fichier /tmp/ja4ebpf-traffic-done est créé par le host après le trafic
for i in $(seq 1 120); do
[ -f /tmp/ja4ebpf-traffic-done ] && break
sleep 1
done
do_verify
;;
verify)
do_verify
;;
*)
# Mode legacy : tout dans la VM (trafic local uniquement)
# Note : XDP sur eth0 ne capturera PAS le trafic localhost
do_start
log "ATTENTION : le trafic localhost n'est pas capturé par XDP/eth0"
log "Utilisez 'make test-vm-matrix' pour le test complet avec trafic host"
# Générer quand même du trafic pour les uprobes
for path in / /health; do
curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true
done
sleep 10
do_verify
;;
esac
[ "$FAIL_COUNT" -eq 0 ] && exit 0 || exit 1