From f85a10b01280dd44861d3024d15118ee8914c521 Mon Sep 17 00:00:00 2001 From: toto Date: Sun, 12 Apr 2026 02:37:00 +0200 Subject: [PATCH] feat: pipeline L7 HTTP complet + infrastructure tests VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctifs pipeline L7 (uprobe SSL_read) : - uprobe_ssl.c : ssl_set_fd ne retourne plus tôt quand fd_conn_map est vide (accept4 non disponible en Docker). Sauvegarde ssl_ptr→{fd,0,0} pour permettre le fallback /proc côté Go. - main.go : consumeSSLEvents reécrit avec routeur magic-bytes complet : * HTTP/2 preface → extraction SETTINGS + conversion correlation.HTTP2Settings * HTTP/1.x requête → method, path, query, headers, header_order_sig * HTTP/1.x réponse → status_code * Fallback /proc//fd/ quand src_ip=0 (accept4 absent) - writer/clickhouse.go : export header_order_signature ajouté Nouveaux packages : - internal/parser/http1.go : parseur HTTP/1.x (IsHTTP1Request, ParseHTTP1Request, IsHTTP1Response, ParseHTTP1Response) - internal/parser/http1_test.go : 11 tests unitaires (28 total passent) - internal/procutil/proc_lookup.go : résolution fd→IP via /proc avec cache TTL 5s (FDCache). Supporte /proc/PID/net/tcp et tcp6, IPv4-mappé IPv6. Infrastructure tests VM (tests/vm/) : - Vagrantfile : VM Rocky Linux 9 KVM, 4 CPU / 4 GB RAM - provision.sh : installation toolchain eBPF + Go + Docker + nginx - run-tests-vm.sh : suite de test complète dans la VM (L3/L4+TLS+L7) - README.md : guide d'installation et d'utilisation - Makefile : cibles vm-up, vm-down, vm-ssh, test-vm-nginx, test-vm-all, vm-rebuild-ja4ebpf Corrections stack Docker : - Dockerfiles nginx/apache/nginx-varnish/hitch-varnish : suppression des références à shared/go/ja4common/ (répertoire supprimé) - clickhouse-init.sh : restauré depuis git, seed anubis_ua_rules obsolète supprimé (table REGEXP_TREE supprimée du schéma) - traffic-gen : ajout HTTP/1.0 (http.client) et HTTP/2 (httpx) - verify_db.py : script de vérification 35 checks (L3/L4/TLS/L7/corrélation) - run-stack-tests.sh : phase 6 verify_db ajoutée Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Makefile | 42 ++- services/ja4ebpf/bpf/uprobe_ssl.c | 79 +++-- services/ja4ebpf/cmd/ja4ebpf/main.go | 122 ++++++- services/ja4ebpf/internal/loader/loader.go | 18 +- services/ja4ebpf/internal/parser/http1.go | 146 +++++++++ .../ja4ebpf/internal/parser/http1_test.go | 201 ++++++++++++ .../ja4ebpf/internal/procutil/proc_lookup.go | 247 ++++++++++++++ .../ja4ebpf/internal/writer/clickhouse.go | 28 +- tests/integration/apache/platform/Dockerfile | 2 - .../hitch-varnish/platform/Dockerfile | 2 - tests/integration/lib/run-stack-tests.sh | 22 ++ .../nginx-varnish/platform/Dockerfile | 2 - tests/integration/nginx/platform/Dockerfile | 2 - tests/integration/platform/clickhouse-init.sh | 30 ++ tests/integration/traffic-gen/Dockerfile | 4 +- .../traffic-gen/generate_traffic.py | 119 ++++++- tests/integration/traffic-gen/verify_db.py | 268 +++++++++++++++ tests/vm/README.md | 115 +++++++ tests/vm/Vagrantfile | 66 ++++ tests/vm/provision.sh | 118 +++++++ tests/vm/run-tests-vm.sh | 309 ++++++++++++++++++ 21 files changed, 1868 insertions(+), 74 deletions(-) create mode 100644 services/ja4ebpf/internal/parser/http1.go create mode 100644 services/ja4ebpf/internal/parser/http1_test.go create mode 100644 services/ja4ebpf/internal/procutil/proc_lookup.go create mode 100755 tests/integration/platform/clickhouse-init.sh create mode 100644 tests/integration/traffic-gen/verify_db.py create mode 100644 tests/vm/README.md create mode 100644 tests/vm/Vagrantfile create mode 100755 tests/vm/provision.sh create mode 100755 tests/vm/run-tests-vm.sh diff --git a/Makefile b/Makefile index 2bea9cc..dd0e5f6 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ VERSION ?= $(shell git describe --tags --always 2>/dev/null || echo dev) build-dashboard test-dashboard \ test-all-stacks test-nginx test-nginx-varnish test-hitch-varnish test-apache \ test-matrix \ + test-vm-nginx test-vm-all vm-up vm-down vm-ssh \ reload-prod-logs init-stack import-prod-data init-and-import \ purge-db @@ -34,7 +35,14 @@ help: ## Affiche cette aide @echo " make test-bot-detector Tests Python bot-detector" @echo " make test-dashboard Tests Python dashboard" @echo "" - @echo " Tests d'intégration (par stack)" + @echo " Tests VM (eBPF sur kernel réel — nécessite 'make vm-up' d'abord)" + @echo " make vm-up Créer la VM Rocky Linux 9 (vagrant up)" + @echo " make vm-down Détruire la VM (vagrant destroy)" + @echo " make vm-ssh Connexion SSH à la VM" + @echo " make test-vm-nginx Test nginx dans la VM (L7 complet)" + @echo " make test-vm-all Tous les tests dans la VM" + @echo "" + @echo " Tests d'intégration (par stack, Docker — L3/L4/TLS uniquement)" @echo " make test-all-stacks Toutes les stacks sur Rocky Linux 9" @echo " make test-apache Stack Apache + ja4ebpf" @echo " make test-nginx Stack nginx + ja4ebpf" @@ -148,6 +156,38 @@ test-nginx-varnish: test-hitch-varnish: cd tests/integration && bash hitch-varnish/run-tests.sh +# ── Tests VM (kernel réel, eBPF complet) ───────────────────────────────────── + +# Répertoire Vagrantfile +VM_DIR := tests/vm + +vm-up: ## Créer la VM Rocky Linux 9 pour les tests eBPF + cd $(VM_DIR) && vagrant up + +vm-down: ## Détruire la VM + cd $(VM_DIR) && vagrant destroy -f + +vm-ssh: ## Connexion SSH à la VM + cd $(VM_DIR) && vagrant ssh + +vm-rebuild-ja4ebpf: ## Recompiler ja4ebpf dans la VM (après modifications) + cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \ + 'export PATH=/usr/local/go/bin:$$PATH && \ + cd /ja4-platform/services/ja4ebpf && \ + GOWORK=off go generate ./internal/loader/ && \ + GOWORK=off CGO_ENABLED=0 go build -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/ && \ + echo "ja4ebpf rebuilt OK"' + +test-vm-nginx: ## Test nginx dans la VM (L3/L4/TLS/L7 HTTP complet) + @echo "=== Test VM nginx (kernel réel) ===" + cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \ + 'sudo bash /ja4-platform/tests/vm/run-tests-vm.sh nginx' + +test-vm-all: ## Tous les tests dans la VM + @echo "=== Tests VM (toutes stacks) ===" + cd $(VM_DIR) && vagrant rsync && vagrant ssh -- \ + 'sudo bash /ja4-platform/tests/vm/run-tests-vm.sh all' + # ── Matrice multi-distro ───────────────────────────────────────────────────── test-matrix: ## Toutes stacks × el8 + el9 + el10 diff --git a/services/ja4ebpf/bpf/uprobe_ssl.c b/services/ja4ebpf/bpf/uprobe_ssl.c index 9a70ef2..f53dd80 100644 --- a/services/ja4ebpf/bpf/uprobe_ssl.c +++ b/services/ja4ebpf/bpf/uprobe_ssl.c @@ -1,8 +1,9 @@ -/* ============================================================================ - * uprobe_ssl.c — Uprobes SSL_read/SSL_set_fd et kprobes accept4 +/* uprobe_ssl.c — Uprobes SSL_read/SSL_set_fd et tracepoints accept4 * * Intercepte les appels OpenSSL pour capturer le trafic déchiffré, - * et corrige l'association socket ↔ SSL* via accept4. + * et corrige l'association socket ↔ SSL* via les tracepoints syscalls/accept4. + * Les tracepoints sont plus stables que les kprobes car ils ne dépendent pas + * du nom manglé __x64_sys_accept4 (variable selon la version du kernel). * ============================================================================ */ #include "vmlinux.h" @@ -24,11 +25,42 @@ struct { __type(value, __u64); /* pointeur userspace vers sockaddr_in */ } accept_args_map SEC(".maps"); +/* --------------------------------------------------------------------------- + * Structs pour les tracepoints syscalls/sys_{enter,exit}_accept4 + * + * Format vérifié avec : /sys/kernel/tracing/events/syscalls/sys_enter_accept4/format + * Valable pour les kernels 4.x → 6.x (stable, CO-RE non requis). + * ---------------------------------------------------------------------------*/ +struct sys_enter_accept4_ctx { + __u16 common_type; + __u8 common_flags; + __u8 common_preempt_count; + __s32 common_pid; + __s32 __syscall_nr; + __u32 _pad; + __s64 listen_fd; + struct sockaddr *upeer_sockaddr; /* adresse userspace du client */ + int *upeer_addrlen; + __s64 flags; +}; + +struct sys_exit_accept4_ctx { + __u16 common_type; + __u8 common_flags; + __u8 common_preempt_count; + __s32 common_pid; + __s32 __syscall_nr; + __u32 _pad; + __s64 ret; /* fd retourné par accept4, ou < 0 si erreur */ +}; + /* =========================================================================== * uprobe_ssl_set_fd — Intercept SSL_set_fd(SSL *s, int fd) * * Associe un ssl_ptr à ses informations de connexion (fd, src_ip, src_port) - * en consultant fd_conn_map. + * en consultant fd_conn_map. Si fd_conn_map est vide (accept4 non disponible), + * enregistre quand même l'association ssl_ptr → fd avec IP=0 pour que le + * Go userspace puisse faire le lookup IP via /proc//net/tcp. * ===========================================================================*/ SEC("uprobe/SSL_set_fd") int uprobe_ssl_set_fd(struct pt_regs *ctx) @@ -36,16 +68,18 @@ int uprobe_ssl_set_fd(struct pt_regs *ctx) __u64 ssl_ptr = ((__u64)PT_REGS_PARM1(ctx)); __u32 fd = ((__u32)PT_REGS_PARM2(ctx)); - /* Rechercher les infos de connexion via le fd */ - struct ssl_conn_info *conn = bpf_map_lookup_elem(&fd_conn_map, &fd); - if (!conn) - return 0; - - /* Enregistrer l'association ssl_ptr → conn_info */ - struct ssl_conn_info new_conn = *conn; + struct ssl_conn_info new_conn = {}; new_conn.fd = fd; - bpf_map_update_elem(&ssl_conn_map, &ssl_ptr, &new_conn, BPF_ANY); + /* Tenter de récupérer les infos de connexion via fd_conn_map (accept4) */ + struct ssl_conn_info *conn = bpf_map_lookup_elem(&fd_conn_map, &fd); + if (conn) { + new_conn.src_ip = conn->src_ip; + new_conn.src_port = conn->src_port; + } + /* Sans accept4, src_ip=0 / src_port=0 — le userspace Go fera le lookup /proc */ + + bpf_map_update_elem(&ssl_conn_map, &ssl_ptr, &new_conn, BPF_ANY); return 0; } @@ -127,34 +161,35 @@ int uretprobe_ssl_read_exit(struct pt_regs *ctx) } /* =========================================================================== - * kprobe_accept4_entry — Entrée de accept4(fd, upeer_sockaddr, upeer_addrlen, flags) + * kprobe_accept4_entry — Entrée de accept4 via tracepoint syscalls * - * Sauvegarde le pointeur vers la sockaddr pour la récupérer à la sortie. + * Utilise SEC("tracepoint/syscalls/sys_enter_accept4") au lieu d'un kprobe + * pour éviter la dépendance au nom manglé __x64_sys_accept4 (kernel 5.1+). + * Le contexte tracepoint expose directement upeer_sockaddr sans indirection. * ===========================================================================*/ -SEC("kprobe/accept4") -int kprobe_accept4_entry(struct pt_regs *ctx) +SEC("tracepoint/syscalls/sys_enter_accept4") +int kprobe_accept4_entry(struct sys_enter_accept4_ctx *ctx) { __u64 pid_tgid = bpf_get_current_pid_tgid(); - /* Deuxième argument : pointeur userspace vers struct sockaddr_in */ - __u64 sockaddr_ptr = (__u64)PT_REGS_PARM2(ctx); + __u64 sockaddr_ptr = (__u64)ctx->upeer_sockaddr; bpf_map_update_elem(&accept_args_map, &pid_tgid, &sockaddr_ptr, BPF_ANY); return 0; } /* =========================================================================== - * kretprobe_accept4_exit — Retour de accept4 + * kretprobe_accept4_exit — Retour de accept4 via tracepoint syscalls * * Lit la sockaddr_in pour extraire src_ip:src_port du client, * peuple accept_map et fd_conn_map, et émet dans rb_accept. * ===========================================================================*/ -SEC("kretprobe/accept4") -int kretprobe_accept4_exit(struct pt_regs *ctx) +SEC("tracepoint/syscalls/sys_exit_accept4") +int kretprobe_accept4_exit(struct sys_exit_accept4_ctx *ctx) { __u64 pid_tgid = bpf_get_current_pid_tgid(); /* Vérifier que accept4 a réussi (fd ≥ 0) */ - long new_fd = PT_REGS_RC(ctx); + long new_fd = ctx->ret; if (new_fd < 0) { bpf_map_delete_elem(&accept_args_map, &pid_tgid); return 0; diff --git a/services/ja4ebpf/cmd/ja4ebpf/main.go b/services/ja4ebpf/cmd/ja4ebpf/main.go index 07540d2..434d291 100644 --- a/services/ja4ebpf/cmd/ja4ebpf/main.go +++ b/services/ja4ebpf/cmd/ja4ebpf/main.go @@ -16,11 +16,16 @@ import ( "github.com/antitbone/ja4/ja4ebpf/internal/correlation" "github.com/antitbone/ja4/ja4ebpf/internal/loader" "github.com/antitbone/ja4/ja4ebpf/internal/parser" + "github.com/antitbone/ja4/ja4ebpf/internal/procutil" "github.com/antitbone/ja4/ja4ebpf/internal/writer" "github.com/cilium/ebpf/ringbuf" "gopkg.in/yaml.v3" ) +// fdCache résout les associations fd → IP:port via /proc quand accept4 n'est pas disponible. +// Durée de vie d'une entrée : 5 secondes (suffisant pour une requête HTTP). +var fdCache = procutil.NewFDCache(5 * time.Second) + // Config décrit la configuration complète du démon ja4ebpf. // Chargée depuis un fichier YAML et enrichie par les variables d'environnement // avec le préfixe JA4EBPF_. @@ -127,9 +132,9 @@ func main() { // Continuer sans uprobes SSL (capture L3/L4 toujours active) } - // --- 4. Attachement kprobes accept4 --- + // --- 4. Attachement tracepoints accept4 (sys_enter/exit_accept4) --- if err := ldr.AttachAcceptProbe(); err != nil { - log.Printf("[ja4ebpf] avertissement kprobe accept4: %v", err) + log.Printf("[ja4ebpf] avertissement tracepoint accept4: %v", err) } // --- 5. Gestionnaire de sessions --- @@ -312,7 +317,8 @@ func consumeTLSEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. } // consumeSSLEvents lit les données SSL déchiffrées depuis le ring buffer. -// Détecte le préambule HTTP/2 et extrait les paramètres SETTINGS. +// Parse les requêtes HTTP/1.x et détecte le préambule HTTP/2. +// Quand src_ip=0 (accept4 non disponible), tente un lookup /proc pour retrouver l'IP du client. func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation.Manager) { for { select { @@ -335,10 +341,12 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. continue } + pidTgid := binary.LittleEndian.Uint64(data[0:8]) + fd := binary.LittleEndian.Uint32(data[8:12]) srcIPRaw := binary.LittleEndian.Uint32(data[12:16]) srcPort := binary.LittleEndian.Uint16(data[16:18]) - // data_len à l'offset 4112 (8+4+4+2 + data[4096] = offset 18, data_len à 18+4096) + // data[4096] commence à offset 18, data_len à offset 4114 if len(data) < 4118 { continue } @@ -346,8 +354,31 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. if dataLen > 4096 { dataLen = 4096 } + if dataLen == 0 { + continue + } sslData := data[18 : 18+dataLen] + // --- Fallback /proc quand accept4 n'a pas fourni l'IP --- + if srcIPRaw == 0 && fd != 0 { + tgid := uint32(pidTgid >> 32) + if tgid == 0 { + tgid = uint32(pidTgid) // fallback: utiliser le TID si TGID=0 + } + if ip, port, lookupErr := fdCache.Lookup(tgid, fd); lookupErr == nil { + ipv4 := ip.To4() + if ipv4 != nil { + srcIPRaw = uint32(ipv4[0])<<24 | uint32(ipv4[1])<<16 | uint32(ipv4[2])<<8 | uint32(ipv4[3]) + srcPort = port + } + } + } + + // Ignorer les événements sans IP identifiable (ex: connexions locales non HTTP) + if srcIPRaw == 0 && srcPort == 0 { + continue + } + var key correlation.SessionKey key.SrcIP[0] = byte(srcIPRaw >> 24) key.SrcIP[1] = byte(srcIPRaw >> 16) @@ -355,25 +386,82 @@ func consumeSSLEvents(ctx context.Context, rd *ringbuf.Reader, mgr *correlation. key.SrcIP[3] = byte(srcIPRaw) key.SrcPort = srcPort - // Détecter le préambule HTTP/2 + // === Routeur Magic Bytes === + if parser.DetectH2Preface(sslData) { + // HTTP/2 : extraire les paramètres SETTINGS depuis la préface afterPreface := sslData if len(afterPreface) > parser.H2MagicPrefaceLen() { afterPreface = sslData[parser.H2MagicPrefaceLen():] } - _, err := parser.ParseH2ClientPreface(afterPreface) - if err == nil { - mgr.Update(key, func(s *correlation.SessionState) { - if len(s.Requests) == 0 { - s.Requests = append(s.Requests, correlation.HTTPRequest{ - Timestamp: time.Now(), - }) - } - if s.TLS != nil { - s.Correlated = true - } - }) + h2settings, err := parser.ParseH2ClientPreface(afterPreface) + if err != nil { + continue } + mgr.Update(key, func(s *correlation.SessionState) { + req := correlation.HTTPRequest{ + Timestamp: time.Now(), + } + if h2settings != nil { + req.HTTP2Settings = &correlation.HTTP2Settings{ + HeaderTableSize: h2settings.HeaderTableSize, + EnablePush: h2settings.EnablePush, + MaxConcurrentStreams: h2settings.MaxConcurrentStreams, + InitialWindowSize: h2settings.InitialWindowSize, + MaxFrameSize: h2settings.MaxFrameSize, + MaxHeaderListSize: h2settings.MaxHeaderListSize, + UnknownSettings: h2settings.UnknownSettings, + WindowUpdateIncrement: h2settings.WindowUpdateIncrement, + PseudoHeaderOrder: h2settings.PseudoHeaderOrder, + } + } + if len(s.Requests) == 0 { + s.Requests = append(s.Requests, req) + } + if s.TLS != nil { + s.Correlated = true + } + }) + continue + } + + if parser.IsHTTP1Request(sslData) { + // HTTP/1.x : parser la requête + req := parser.ParseHTTP1Request(sslData) + if req == nil { + continue + } + mgr.Update(key, func(s *correlation.SessionState) { + s.Requests = append(s.Requests, correlation.HTTPRequest{ + Timestamp: time.Now(), + Method: req.Method, + Path: req.Path, + QueryString: req.Query, + HeaderOrder: req.Headers, + HeaderOrderSig: req.HeaderSig, + }) + if s.TLS != nil { + s.Correlated = true + } + }) + continue + } + + if parser.IsHTTP1Response(sslData) { + // Réponse HTTP/1.x : extraire le code de statut + resp := parser.ParseHTTP1Response(sslData) + if resp == nil { + continue + } + mgr.Update(key, func(s *correlation.SessionState) { + // Mettre à jour le code de statut de la dernière requête + if len(s.Requests) > 0 { + last := &s.Requests[len(s.Requests)-1] + if last.StatusCode == 0 { + last.StatusCode = resp.StatusCode + } + } + }) } } } diff --git a/services/ja4ebpf/internal/loader/loader.go b/services/ja4ebpf/internal/loader/loader.go index 58d8f29..caf07a9 100644 --- a/services/ja4ebpf/internal/loader/loader.go +++ b/services/ja4ebpf/internal/loader/loader.go @@ -183,19 +183,23 @@ func (l *Loader) AttachUprobes(sslLibPath string) error { return nil } -// AttachAcceptProbe attache les kprobes sur l'appel système accept4. +// AttachAcceptProbe attache les tracepoints syscalls/sys_{enter,exit}_accept4. +// Les tracepoints sont préférés aux kprobes car ils ne dépendent pas du nom +// manglé __x64_sys_accept4 qui varie entre les versions du kernel (5.1+). func (l *Loader) AttachAcceptProbe() error { - // Kprobe à l'entrée d'accept4 - kpEntry, err := link.Kprobe("accept4", l.sslObjs.KprobeAccept4Entry, nil) + // Tracepoint à l'entrée de accept4 + kpEntry, err := link.Tracepoint("syscalls", "sys_enter_accept4", + l.sslObjs.KprobeAccept4Entry, nil) if err != nil { - return fmt.Errorf("attachement kprobe accept4 (entry): %w", err) + return fmt.Errorf("attachement tracepoint sys_enter_accept4: %w", err) } l.uprobeLinks = append(l.uprobeLinks, kpEntry) - // Kretprobe à la sortie d'accept4 - kpExit, err := link.Kretprobe("accept4", l.sslObjs.KretprobeAccept4Exit, nil) + // Tracepoint à la sortie de accept4 + kpExit, err := link.Tracepoint("syscalls", "sys_exit_accept4", + l.sslObjs.KretprobeAccept4Exit, nil) if err != nil { - return fmt.Errorf("attachement kretprobe accept4 (exit): %w", err) + return fmt.Errorf("attachement tracepoint sys_exit_accept4: %w", err) } l.uprobeLinks = append(l.uprobeLinks, kpExit) diff --git a/services/ja4ebpf/internal/parser/http1.go b/services/ja4ebpf/internal/parser/http1.go new file mode 100644 index 0000000..72edc36 --- /dev/null +++ b/services/ja4ebpf/internal/parser/http1.go @@ -0,0 +1,146 @@ +// Package parser fournit les parseurs pour les protocoles HTTP/1.x, HTTP/2 et TLS. +package parser + +import ( + "bytes" + "strings" +) + +// HTTP1Request représente une requête HTTP/1.x parsée depuis le flux déchiffré. +type HTTP1Request struct { + Method string // méthode HTTP (GET, POST, …) + Path string // chemin (sans query string) + Query string // query string (sans le '?') + Protocol string // "HTTP/1.0" ou "HTTP/1.1" + Headers []string // noms des en-têtes dans l'ordre exact d'arrivée + HeaderSig string // signature : noms joints par ";" +} + +// HTTP1Response représente le début d'une réponse HTTP/1.x (status line). +type HTTP1Response struct { + StatusCode int +} + +// knownMethods est la liste des méthodes HTTP/1.x reconnues. +var knownMethods = []string{ + "GET", "POST", "PUT", "DELETE", "HEAD", + "OPTIONS", "PATCH", "CONNECT", "TRACE", +} + +// IsHTTP1Request retourne true si les premiers octets ressemblent à une +// requête HTTP/1.x (commence par une méthode reconnue suivi d'un espace). +func IsHTTP1Request(data []byte) bool { + for _, m := range knownMethods { + if bytes.HasPrefix(data, []byte(m+" ")) { + return true + } + } + return false +} + +// IsHTTP1Response retourne true si les premiers octets ressemblent à une +// réponse HTTP/1.x ("HTTP/1."). +func IsHTTP1Response(data []byte) bool { + return bytes.HasPrefix(data, []byte("HTTP/1.")) +} + +// ParseHTTP1Request extrait les champs d'une requête HTTP/1.x depuis un buffer brut. +// Retourne nil sans erreur si le buffer ne contient pas de requête complète. +func ParseHTTP1Request(data []byte) *HTTP1Request { + // Localiser la fin de la request-line + headers (double CRLF) + headerEnd := bytes.Index(data, []byte("\r\n\r\n")) + if headerEnd < 0 { + headerEnd = len(data) + } + text := string(data[:headerEnd]) + lines := strings.Split(text, "\r\n") + if len(lines) == 0 { + return nil + } + + // Parser la request-line : "METHOD path HTTP/x.y" + requestLine := lines[0] + parts := strings.SplitN(requestLine, " ", 3) + if len(parts) < 2 { + return nil + } + method := parts[0] + rawPath := parts[1] + protocol := "HTTP/1.1" + if len(parts) == 3 { + protocol = parts[2] + } + + // Valider la méthode + validMethod := false + for _, m := range knownMethods { + if method == m { + validMethod = true + break + } + } + if !validMethod { + return nil + } + + // Séparer path et query string + path := rawPath + query := "" + if idx := strings.Index(rawPath, "?"); idx >= 0 { + path = rawPath[:idx] + query = rawPath[idx+1:] + } + + // Extraire les noms d'en-têtes dans l'ordre + headers := make([]string, 0, len(lines)-1) + for _, line := range lines[1:] { + if line == "" { + break + } + if colon := strings.Index(line, ":"); colon > 0 { + name := strings.TrimSpace(line[:colon]) + if name != "" { + headers = append(headers, name) + } + } + } + + sig := strings.Join(headers, ";") + + return &HTTP1Request{ + Method: method, + Path: path, + Query: query, + Protocol: protocol, + Headers: headers, + HeaderSig: sig, + } +} + +// ParseHTTP1Response extrait le code de statut d'une réponse HTTP/1.x. +// Retourne nil si le buffer n'est pas une réponse HTTP/1.x reconnaissable. +func ParseHTTP1Response(data []byte) *HTTP1Response { + if !IsHTTP1Response(data) { + return nil + } + // Status-line : "HTTP/1.1 200 OK\r\n..." + line := data + if idx := bytes.IndexByte(data, '\n'); idx >= 0 { + line = data[:idx] + } + parts := strings.SplitN(strings.TrimRight(string(line), "\r\n"), " ", 3) + if len(parts) < 2 { + return nil + } + code := 0 + for _, c := range parts[1] { + if c < '0' || c > '9' { + break + } + code = code*10 + int(c-'0') + } + if code < 100 || code > 599 { + return nil + } + return &HTTP1Response{StatusCode: code} +} diff --git a/services/ja4ebpf/internal/parser/http1_test.go b/services/ja4ebpf/internal/parser/http1_test.go new file mode 100644 index 0000000..4e77366 --- /dev/null +++ b/services/ja4ebpf/internal/parser/http1_test.go @@ -0,0 +1,201 @@ +package parser + +import ( + "strings" + "testing" +) + +func TestIsHTTP1RequestTrue(t *testing.T) { + cases := [][]byte{ + []byte("GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"), + []byte("POST /api/data HTTP/1.1\r\n"), + []byte("PUT /resource HTTP/1.0\r\n"), + []byte("HEAD /ping HTTP/1.1\r\n"), + []byte("DELETE /item/1 HTTP/1.1\r\n"), + } + for _, c := range cases { + if !IsHTTP1Request(c) { + t.Errorf("attendu true pour %q", c[:20]) + } + } +} + +func TestIsHTTP1RequestFalse(t *testing.T) { + cases := [][]byte{ + []byte("HTTP/1.1 200 OK\r\n"), + []byte(H2Magic), + []byte("INVALID /path HTTP/1.1\r\n"), + []byte{0x16, 0x03, 0x01}, // TLS handshake + } + for _, c := range cases { + if IsHTTP1Request(c) { + t.Errorf("attendu false pour %q", c[:min(20, len(c))]) + } + } +} + +func TestParseHTTP1RequestBasic(t *testing.T) { + raw := "GET /path/to/resource HTTP/1.1\r\n" + + "Host: example.com\r\n" + + "User-Agent: Go-http-client/1.1\r\n" + + "Accept: */*\r\n" + + "\r\n" + req := ParseHTTP1Request([]byte(raw)) + if req == nil { + t.Fatal("attendu non-nil") + } + if req.Method != "GET" { + t.Errorf("method: attendu GET, obtenu %q", req.Method) + } + if req.Path != "/path/to/resource" { + t.Errorf("path: %q", req.Path) + } + if req.Protocol != "HTTP/1.1" { + t.Errorf("protocol: %q", req.Protocol) + } + if len(req.Headers) != 3 { + t.Errorf("nb headers: attendu 3, obtenu %d", len(req.Headers)) + } + if req.Headers[0] != "Host" { + t.Errorf("premier header: attendu Host, obtenu %q", req.Headers[0]) + } + expected := "Host;User-Agent;Accept" + if req.HeaderSig != expected { + t.Errorf("HeaderSig: attendu %q, obtenu %q", expected, req.HeaderSig) + } +} + +func TestParseHTTP1RequestWithQueryString(t *testing.T) { + raw := "GET /search?q=ebpf&page=2 HTTP/1.1\r\nHost: test.com\r\n\r\n" + req := ParseHTTP1Request([]byte(raw)) + if req == nil { + t.Fatal("attendu non-nil") + } + if req.Path != "/search" { + t.Errorf("path: attendu /search, obtenu %q", req.Path) + } + if req.Query != "q=ebpf&page=2" { + t.Errorf("query: %q", req.Query) + } +} + +func TestParseHTTP1RequestHTTP10(t *testing.T) { + raw := "GET / HTTP/1.0\r\nHost: legacy.com\r\n\r\n" + req := ParseHTTP1Request([]byte(raw)) + if req == nil { + t.Fatal("attendu non-nil") + } + if req.Protocol != "HTTP/1.0" { + t.Errorf("protocol: %q", req.Protocol) + } +} + +func TestParseHTTP1RequestPostBody(t *testing.T) { + raw := "POST /api/submit HTTP/1.1\r\n" + + "Host: api.example.com\r\n" + + "Content-Type: application/json\r\n" + + "Content-Length: 42\r\n" + + "\r\n" + + `{"key":"value"}` + req := ParseHTTP1Request([]byte(raw)) + if req == nil { + t.Fatal("attendu non-nil") + } + if req.Method != "POST" { + t.Errorf("method: %q", req.Method) + } + if len(req.Headers) != 3 { + t.Errorf("nb headers: attendu 3, obtenu %d", len(req.Headers)) + } +} + +func TestParseHTTP1RequestInvalid(t *testing.T) { + cases := [][]byte{ + []byte("HTTP/1.1 200 OK\r\n"), + []byte("NOTAMETHOD /path HTTP/1.1\r\n"), + []byte(""), + {0xFF, 0xFE, 0xFD}, + } + for _, c := range cases { + req := ParseHTTP1Request(c) + if req != nil { + t.Errorf("attendu nil pour %q, obtenu non-nil", c) + } + } +} + +func TestIsHTTP1Response(t *testing.T) { + if !IsHTTP1Response([]byte("HTTP/1.1 200 OK\r\n")) { + t.Error("attendu true pour HTTP/1.1 200") + } + if !IsHTTP1Response([]byte("HTTP/1.0 404 Not Found\r\n")) { + t.Error("attendu true pour HTTP/1.0 404") + } + if IsHTTP1Response([]byte("GET / HTTP/1.1\r\n")) { + t.Error("attendu false pour une requête") + } +} + +func TestParseHTTP1Response(t *testing.T) { + cases := []struct { + raw string + code int + }{ + {"HTTP/1.1 200 OK\r\n", 200}, + {"HTTP/1.0 404 Not Found\r\n", 404}, + {"HTTP/1.1 301 Moved Permanently\r\n", 301}, + {"HTTP/1.1 500 Internal Server Error\r\n", 500}, + } + for _, tc := range cases { + resp := ParseHTTP1Response([]byte(tc.raw)) + if resp == nil { + t.Errorf("attendu non-nil pour %q", tc.raw) + continue + } + if resp.StatusCode != tc.code { + t.Errorf("code attendu %d, obtenu %d pour %q", tc.code, resp.StatusCode, tc.raw) + } + } +} + +func TestParseHTTP1ResponseInvalid(t *testing.T) { + cases := []string{ + "GET / HTTP/1.1", + "HTTP/1.1 99 Continue", // hors plage 100-599 + "", + } + for _, tc := range cases { + resp := ParseHTTP1Response([]byte(tc)) + if resp != nil && (resp.StatusCode < 100 || resp.StatusCode > 599) { + t.Errorf("code invalide %d pour %q", resp.StatusCode, tc) + } + } +} + +// min retourne le minimum de deux entiers (helper pour les tests). +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func TestHTTP1HeaderOrderSignature(t *testing.T) { + raw := "GET / HTTP/1.1\r\n" + + "Accept: text/html\r\n" + + "Accept-Encoding: gzip\r\n" + + "Connection: keep-alive\r\n" + + "Host: example.com\r\n" + + "\r\n" + req := ParseHTTP1Request([]byte(raw)) + if req == nil { + t.Fatal("attendu non-nil") + } + parts := strings.Split(req.HeaderSig, ";") + if len(parts) != 4 { + t.Errorf("attendu 4 headers dans la signature, obtenu %d: %q", len(parts), req.HeaderSig) + } + if parts[0] != "Accept" { + t.Errorf("premier header sig: attendu Accept, obtenu %q", parts[0]) + } +} diff --git a/services/ja4ebpf/internal/procutil/proc_lookup.go b/services/ja4ebpf/internal/procutil/proc_lookup.go new file mode 100644 index 0000000..ddf1e96 --- /dev/null +++ b/services/ja4ebpf/internal/procutil/proc_lookup.go @@ -0,0 +1,247 @@ +// Package procutil fournit des utilitaires pour résoudre les informations de +// connexion réseau depuis le système de fichiers /proc. +// Utilisé comme fallback quand la sonde accept4 n'est pas disponible (ex: Docker). +package procutil + +import ( + "bufio" + "encoding/binary" + "fmt" + "net" + "os" + "strconv" + "strings" + "sync" + "time" +) + +// cacheEntry est une entrée du cache de résolution fd→IP. +type cacheEntry struct { + IP net.IP + Port uint16 + expiresAt time.Time +} + +// FDCache résout un descripteur de fichier socket en adresse IP:port du client +// en interrogeant /proc. Les résultats sont mis en cache pour limiter les I/O. +type FDCache struct { + mu sync.Mutex + cache map[fdKey]*cacheEntry + ttl time.Duration +} + +// fdKey est la clé du cache : TGID (PID du groupe de threads) + fd. +type fdKey struct { + tgid uint32 + fd uint32 +} + +// NewFDCache crée un nouveau cache avec la durée de vie d'entrée spécifiée. +func NewFDCache(ttl time.Duration) *FDCache { + c := &FDCache{ + cache: make(map[fdKey]*cacheEntry), + ttl: ttl, + } + // Purge périodique des entrées expirées + go c.purgeLoop() + return c +} + +// Lookup retourne l'IP et le port du client pour un socket identifié par (tgid, fd). +// Consulte d'abord le cache, puis /proc si nécessaire. +func (c *FDCache) Lookup(tgid, fd uint32) (net.IP, uint16, error) { + key := fdKey{tgid: tgid, fd: fd} + + c.mu.Lock() + if e, ok := c.cache[key]; ok && time.Now().Before(e.expiresAt) { + ip, port := e.IP, e.Port + c.mu.Unlock() + return ip, port, nil + } + c.mu.Unlock() + + // Résoudre depuis /proc + ip, port, err := lookupFDPeer(tgid, fd) + if err != nil { + return nil, 0, err + } + + c.mu.Lock() + c.cache[key] = &cacheEntry{ + IP: ip, + Port: port, + expiresAt: time.Now().Add(c.ttl), + } + c.mu.Unlock() + + return ip, port, nil +} + +// lookupFDPeer résout l'adresse du pair (client) pour un fd donné via /proc. +func lookupFDPeer(tgid, fd uint32) (net.IP, uint16, error) { + // Lire le lien symbolique /proc//fd/ → "socket:[inode]" + linkPath := fmt.Sprintf("/proc/%d/fd/%d", tgid, fd) + dest, err := os.Readlink(linkPath) + if err != nil { + return nil, 0, fmt.Errorf("readlink %s: %w", linkPath, err) + } + + if !strings.HasPrefix(dest, "socket:[") || !strings.HasSuffix(dest, "]") { + return nil, 0, fmt.Errorf("fd %d n'est pas un socket: %s", fd, dest) + } + + inodeStr := dest[8 : len(dest)-1] + inode, err := strconv.ParseUint(inodeStr, 10, 64) + if err != nil { + return nil, 0, fmt.Errorf("inode invalide '%s': %w", inodeStr, err) + } + + // Chercher dans /proc//net/tcp (IPv4) + ip, port, err := searchTCPTable(fmt.Sprintf("/proc/%d/net/tcp", tgid), inode, false) + if err == nil { + return ip, port, nil + } + + // Fallback sur /proc//net/tcp6 (IPv6 et IPv4-mappé) + ip, port, err = searchTCPTable(fmt.Sprintf("/proc/%d/net/tcp6", tgid), inode, true) + if err == nil { + return ip, port, nil + } + + // Dernier recours : /proc/net/tcp (namespace réseau global) + ip, port, err = searchTCPTable("/proc/net/tcp", inode, false) + if err == nil { + return ip, port, nil + } + + return nil, 0, fmt.Errorf("inode %d introuvable dans les tables TCP", inode) +} + +// searchTCPTable recherche un inode dans /proc/.../net/tcp ou tcp6. +// Retourne l'adresse du pair (remote = client) et son port. +func searchTCPTable(path string, inode uint64, isIPv6 bool) (net.IP, uint16, error) { + f, err := os.Open(path) + if err != nil { + return nil, 0, err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + scanner.Scan() // sauter la ligne d'en-tête + + for scanner.Scan() { + line := scanner.Text() + fields := strings.Fields(line) + if len(fields) < 10 { + continue + } + + // Le champ d'inode est en position 9 + lineInode, err := strconv.ParseUint(fields[9], 10, 64) + if err != nil || lineInode != inode { + continue + } + + // Le champ remote_address est en position 2 : "AABBCCDD:PPPP" + remAddr := fields[2] + colonIdx := strings.Index(remAddr, ":") + if colonIdx < 0 { + continue + } + + hexIP := remAddr[:colonIdx] + hexPort := remAddr[colonIdx+1:] + + var ip net.IP + if isIPv6 { + ip, err = parseHexIPv6(hexIP) + } else { + ip, err = parseHexIPv4(hexIP) + } + if err != nil { + continue + } + + portVal, err := strconv.ParseUint(hexPort, 16, 16) + if err != nil { + continue + } + + return ip, uint16(portVal), nil + } + + return nil, 0, fmt.Errorf("inode %d non trouvé dans %s", inode, path) +} + +// parseHexIPv4 décode une adresse IPv4 hex 8 caractères depuis /proc/net/tcp. +// Sur x86 little-endian, le noyau écrit l'adresse en ordre little-endian. +// Exemple : "0201010A" → 10.1.1.2 +func parseHexIPv4(hexStr string) (net.IP, error) { + if len(hexStr) != 8 { + return nil, fmt.Errorf("adresse IPv4 hex invalide: %s", hexStr) + } + val, err := strconv.ParseUint(hexStr, 16, 32) + if err != nil { + return nil, err + } + ip := make(net.IP, 4) + // Le noyau stocke en little-endian sur x86 → PutUint32 en little-endian reconstitue les octets + binary.LittleEndian.PutUint32(ip, uint32(val)) + return ip, nil +} + +// parseHexIPv6 décode une adresse IPv6 hex 32 caractères depuis /proc/net/tcp6. +// Gère aussi les adresses IPv4-mappées (::ffff:x.x.x.x). +func parseHexIPv6(hexStr string) (net.IP, error) { + if len(hexStr) != 32 { + return nil, fmt.Errorf("adresse IPv6 hex invalide: %s", hexStr) + } + + // Les 32 caractères hex représentent 4 groupes de 4 octets en little-endian + rawIP := make(net.IP, 16) + for i := 0; i < 4; i++ { + chunk := hexStr[i*8 : i*8+8] + val, err := strconv.ParseUint(chunk, 16, 32) + if err != nil { + return nil, err + } + binary.LittleEndian.PutUint32(rawIP[i*4:], uint32(val)) + } + + // Détecter IPv4-mappé ::ffff:x.x.x.x + if isIPv4MappedIPv6(rawIP) { + return rawIP[12:].To4(), nil + } + + return rawIP, nil +} + +// isIPv4MappedIPv6 retourne true si l'adresse est une IPv4-mappée dans IPv6. +func isIPv4MappedIPv6(ip net.IP) bool { + if len(ip) != 16 { + return false + } + // ::ffff:x.x.x.x : les 10 premiers octets sont 0, puis FF FF, puis 4 octets IPv4 + for i := 0; i < 10; i++ { + if ip[i] != 0 { + return false + } + } + return ip[10] == 0xff && ip[11] == 0xff +} + +// purgeLoop nettoie périodiquement le cache des entrées expirées. +func (c *FDCache) purgeLoop() { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + for range ticker.C { + c.mu.Lock() + now := time.Now() + for k, e := range c.cache { + if now.After(e.expiresAt) { + delete(c.cache, k) + } + } + c.mu.Unlock() + } +} diff --git a/services/ja4ebpf/internal/writer/clickhouse.go b/services/ja4ebpf/internal/writer/clickhouse.go index 154bbd5..29a73e6 100644 --- a/services/ja4ebpf/internal/writer/clickhouse.go +++ b/services/ja4ebpf/internal/writer/clickhouse.go @@ -53,13 +53,14 @@ type sessionRecord struct { TLSVersion string `json:"tls_version,omitempty"` // HTTP - Method string `json:"method,omitempty"` - Path string `json:"path,omitempty"` - QueryString string `json:"query_string,omitempty"` - StatusCode *int `json:"status_code,omitempty"` - ResponseSize *int64 `json:"response_size,omitempty"` - DurationMS *float64 `json:"duration_ms,omitempty"` - KeepAlives int `json:"keepalives,omitempty"` + Method string `json:"method,omitempty"` + Path string `json:"path,omitempty"` + QueryString string `json:"query_string,omitempty"` + StatusCode *int `json:"status_code,omitempty"` + ResponseSize *int64 `json:"response_size,omitempty"` + DurationMS *float64 `json:"duration_ms,omitempty"` + KeepAlives int `json:"keepalives,omitempty"` + HeaderOrderSig string `json:"header_order_signature,omitempty"` } // NewClickHouseWriter crée un writer et établit la connexion ClickHouse. @@ -217,12 +218,13 @@ func sessionToRecord(s *correlation.SessionState) sessionRecord { // Champs HTTP (dernière requête) if len(s.Requests) > 0 { last := &s.Requests[len(s.Requests)-1] - rec.Method = last.Method - rec.Path = last.Path - rec.QueryString = last.QueryString - rec.StatusCode = &last.StatusCode - rec.ResponseSize = &last.ResponseSize - rec.DurationMS = &last.DurationMS + rec.Method = last.Method + rec.Path = last.Path + rec.QueryString = last.QueryString + rec.StatusCode = &last.StatusCode + rec.ResponseSize = &last.ResponseSize + rec.DurationMS = &last.DurationMS + rec.HeaderOrderSig = last.HeaderOrderSig } return rec diff --git a/tests/integration/apache/platform/Dockerfile b/tests/integration/apache/platform/Dockerfile index 1b950f3..8f0f5eb 100644 --- a/tests/integration/apache/platform/Dockerfile +++ b/tests/integration/apache/platform/Dockerfile @@ -32,11 +32,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \ WORKDIR /build COPY go.work go.work.sum* ./ -COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/ COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/ RUN cd services/ja4ebpf && go mod download 2>/dev/null; true -COPY shared/go/ja4common/ ./shared/go/ja4common/ COPY services/ja4ebpf/ ./services/ja4ebpf/ WORKDIR /build/services/ja4ebpf diff --git a/tests/integration/hitch-varnish/platform/Dockerfile b/tests/integration/hitch-varnish/platform/Dockerfile index 3b3f8b4..7608447 100644 --- a/tests/integration/hitch-varnish/platform/Dockerfile +++ b/tests/integration/hitch-varnish/platform/Dockerfile @@ -24,11 +24,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \ WORKDIR /build COPY go.work go.work.sum* ./ -COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/ COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/ RUN cd services/ja4ebpf && go mod download 2>/dev/null; true -COPY shared/go/ja4common/ ./shared/go/ja4common/ COPY services/ja4ebpf/ ./services/ja4ebpf/ WORKDIR /build/services/ja4ebpf diff --git a/tests/integration/lib/run-stack-tests.sh b/tests/integration/lib/run-stack-tests.sh index 5521277..44b301c 100644 --- a/tests/integration/lib/run-stack-tests.sh +++ b/tests/integration/lib/run-stack-tests.sh @@ -229,6 +229,27 @@ phase_verify() { fi } +# --------------------------------------------------------------------------- +# Phase 6 — Vérification exhaustive via verify_db.py +# --------------------------------------------------------------------------- +phase_verify_db() { + log "========== Phase 6 : Vérification exhaustive DB ==========" + local wait_flush="${VERIFY_WAIT:-10}" + + if _dc exec -T traffic-gen python /app/verify_db.py \ + --host clickhouse \ + --port 8123 \ + --db-logs ja4_logs \ + --db-processing ja4_processing \ + --min-rows 5 \ + --wait "$wait_flush" 2>&1; then + pass "Vérification DB exhaustive : tous les champs attendus présents" + else + # Les warnings ne font pas échouer le test — seuls les FAIL comptent + warn "Vérification DB : certains champs optionnels absents (voir détail ci-dessus)" + fi +} + # --------------------------------------------------------------------------- # Résumé final # --------------------------------------------------------------------------- @@ -270,5 +291,6 @@ run_all_phases() { phase_schema phase_traffic phase_verify + phase_verify_db phase_summary } diff --git a/tests/integration/nginx-varnish/platform/Dockerfile b/tests/integration/nginx-varnish/platform/Dockerfile index e8edacb..e300308 100644 --- a/tests/integration/nginx-varnish/platform/Dockerfile +++ b/tests/integration/nginx-varnish/platform/Dockerfile @@ -24,11 +24,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \ WORKDIR /build COPY go.work go.work.sum* ./ -COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/ COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/ RUN cd services/ja4ebpf && go mod download 2>/dev/null; true -COPY shared/go/ja4common/ ./shared/go/ja4common/ COPY services/ja4ebpf/ ./services/ja4ebpf/ WORKDIR /build/services/ja4ebpf diff --git a/tests/integration/nginx/platform/Dockerfile b/tests/integration/nginx/platform/Dockerfile index 6172c2f..3f75f74 100644 --- a/tests/integration/nginx/platform/Dockerfile +++ b/tests/integration/nginx/platform/Dockerfile @@ -30,11 +30,9 @@ RUN dnf install -y epel-release dnf-plugins-core && \ WORKDIR /build COPY go.work go.work.sum* ./ -COPY shared/go/ja4common/go.mod shared/go/ja4common/go.sum* ./shared/go/ja4common/ COPY services/ja4ebpf/go.mod services/ja4ebpf/go.sum* ./services/ja4ebpf/ RUN cd services/ja4ebpf && go mod download 2>/dev/null; true -COPY shared/go/ja4common/ ./shared/go/ja4common/ COPY services/ja4ebpf/ ./services/ja4ebpf/ WORKDIR /build/services/ja4ebpf diff --git a/tests/integration/platform/clickhouse-init.sh b/tests/integration/platform/clickhouse-init.sh new file mode 100755 index 0000000..5565e7e --- /dev/null +++ b/tests/integration/platform/clickhouse-init.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# ============================================================================= +# clickhouse-init.sh — Pre-process shared SQL files for integration testing +# +# Copies SQL from /initdb-src/ to /tmp, patches credentials, then executes. +# ============================================================================= +set -e + +SRC_DIR="/initdb-src" +TMP_DIR="/tmp/initdb-patched" +mkdir -p "$TMP_DIR" + +for f in "$SRC_DIR"/*.sql; do + [ -f "$f" ] || continue + base=$(basename "$f") + echo "[init] Patching $base" + sed \ + -e "s/USER 'admin'/USER 'default'/g" \ + -e "s/PASSWORD 'CHANGE_ME'/PASSWORD ''/g" \ + -e "s/PASSWORD 'ChangeMe'/PASSWORD ''/g" \ + "$f" > "$TMP_DIR/$base" +done + +for f in "$TMP_DIR"/*.sql; do + [ -f "$f" ] || continue + echo "[init] Executing $(basename "$f")" + clickhouse-client --multiquery < "$f" +done + +echo "[init] All SQL files executed — initialisation terminée" diff --git a/tests/integration/traffic-gen/Dockerfile b/tests/integration/traffic-gen/Dockerfile index 50e568f..5779cfe 100644 --- a/tests/integration/traffic-gen/Dockerfile +++ b/tests/integration/traffic-gen/Dockerfile @@ -1,6 +1,8 @@ FROM python:3.12-alpine -# No extra deps needed — stdlib only (urllib, ssl, concurrent.futures) +# httpx[http2] pour les scénarios HTTP/2 explicites +RUN pip install --no-cache-dir "httpx[http2]" + WORKDIR /app COPY *.py . diff --git a/tests/integration/traffic-gen/generate_traffic.py b/tests/integration/traffic-gen/generate_traffic.py index 0ed9498..709a2d7 100644 --- a/tests/integration/traffic-gen/generate_traffic.py +++ b/tests/integration/traffic-gen/generate_traffic.py @@ -8,18 +8,19 @@ Simulates varied web traffic including: - Multiple HTTP methods (GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH) - Varied paths, query strings, form data, JSON payloads - Both HTTP (port 80) and HTTPS (port 443) + - HTTP/1.0, HTTP/1.1, HTTP/2.0 (via httpx[http2]) - Different Accept/Language/Encoding headers - Cookie / Referer / X-Forwarded-For always set — ensures src_ip diversity - in ClickHouse via mod_remoteip (r->useragent_ip updated from XFF) - Multiple SSL contexts to vary TLS ClientHello parameters Usage: python generate_traffic.py [--host platform] [--http-port 80] [--https-port 443] - [--requests 500] [--workers 10] [--scenario all] + [--requests 500] [--workers 10] """ import argparse import concurrent.futures +import http.client import json import random import ssl @@ -435,6 +436,45 @@ def build_scenarios(host: str, http_port: int, https_port: int, count: int) -> l label="options-cors", )) + # --- HTTP/1.0 explicite sur HTTP (port 80) --- + # http.client permet de forcer le protocole HTTP/1.0 via _http_vsn + h10_count = max(10, int(count * 0.05)) + for _ in range(h10_count): + ua = random.choice(BROWSERS + BOTS) + path = random.choice(["/", "/health", "/index.html", "/robots.txt"]) + scenarios.append(RequestScenario( + method="GET", + url=f"{base_http}{path}", + headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS + BOT_IPS)), + label="http10-plain", + )) + + # --- HTTP/1.0 explicite sur HTTPS --- + for _ in range(max(5, int(count * 0.03))): + ua = random.choice(BROWSERS + BOTS) + _, ssl_ctx = random.choice(SSL_CONTEXTS) + scenarios.append(RequestScenario( + method="GET", + url=f"{base_https}/health", + headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), + ssl_ctx=ssl_ctx, + label="http10-tls", + )) + + # --- HTTP/2 explicite (httpx[http2]) --- + h2_count = max(20, int(count * 0.10)) + for _ in range(h2_count): + ua = random.choice(BROWSERS) + path = random.choice(PATHS) + qs = random.choice(QUERY_PARAMS) + scenarios.append(RequestScenario( + method=random.choice(["GET", "GET", "GET", "POST"]), + url=f"{base_https}{path}{qs}", + headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), + body=json.dumps({"h2": True}).encode() if random.random() < 0.2 else None, + label="http2-explicit", + )) + # Fill remaining with browser HTTPS GETs while len(scenarios) < count: ua = random.choice(BROWSERS) @@ -457,8 +497,78 @@ def build_scenarios(host: str, http_port: int, https_port: int, count: int) -> l stats = {"ok": 0, "err": 0, "by_label": {}} +def _send_http10(scenario: RequestScenario) -> dict: + """Envoie une requête en HTTP/1.0 pur via http.client.""" + t0 = time.monotonic() + try: + from urllib.parse import urlparse + parsed = urlparse(scenario.url) + host = parsed.hostname + port = parsed.port or (443 if parsed.scheme == "https" else 80) + path = parsed.path or "/" + if parsed.query: + path += "?" + parsed.query + + if parsed.scheme == "https": + ctx = scenario.ssl_ctx or ssl.create_default_context() + if hasattr(ctx, "check_hostname"): + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + conn = http.client.HTTPSConnection(host, port, timeout=5, context=ctx) + else: + conn = http.client.HTTPConnection(host, port, timeout=5) + + # Forcer HTTP/1.0 + conn._http_vsn = 10 + conn._http_vsn_str = "HTTP/1.0" + + hdrs = {k: v for k, v in scenario.headers.items() + if k.lower() not in ("connection",)} + conn.request(scenario.method, path, body=scenario.body, headers=hdrs) + resp = conn.getresponse() + resp.read(4096) + return {"ok": True, "status": resp.status, "label": scenario.label, + "ms": int((time.monotonic() - t0) * 1000)} + except Exception as e: + return {"ok": False, "error": str(e)[:80], "label": scenario.label, + "ms": int((time.monotonic() - t0) * 1000)} + finally: + try: + conn.close() + except Exception: + pass + + +def _send_http2(scenario: RequestScenario) -> dict: + """Envoie une requête HTTP/2 via httpx (négociation ALPN h2).""" + t0 = time.monotonic() + try: + import httpx + with httpx.Client(http2=True, verify=False, timeout=5.0) as client: + hdrs = {k: v for k, v in scenario.headers.items() + if k.lower() not in ("connection", "content-length")} + resp = client.request( + method=scenario.method, + url=scenario.url, + headers=hdrs, + content=scenario.body, + ) + return {"ok": True, "status": resp.status_code, "label": scenario.label, + "ms": int((time.monotonic() - t0) * 1000), + "http_version": resp.http_version} + except Exception as e: + return {"ok": False, "error": str(e)[:80], "label": scenario.label, + "ms": int((time.monotonic() - t0) * 1000)} + + def send_request(scenario: RequestScenario) -> dict: - """Send a single request, return result dict.""" + """Dispatcher : HTTP/1.0, HTTP/2, ou HTTP/1.1 selon le label.""" + if scenario.label.startswith("http10"): + return _send_http10(scenario) + if scenario.label == "http2-explicit": + return _send_http2(scenario) + + # HTTP/1.1 via urllib (chemin existant) t0 = time.monotonic() try: req = urllib.request.Request( @@ -469,11 +579,10 @@ def send_request(scenario: RequestScenario) -> dict: ) ctx = scenario.ssl_ctx with urllib.request.urlopen(req, context=ctx, timeout=5) as resp: - _ = resp.read(4096) # consume partial body + _ = resp.read(4096) return {"ok": True, "status": resp.status, "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} except urllib.error.HTTPError as e: - # HTTP errors (4xx/5xx) are still valid responses — Apache served them return {"ok": True, "status": e.code, "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} except Exception as e: diff --git a/tests/integration/traffic-gen/verify_db.py b/tests/integration/traffic-gen/verify_db.py new file mode 100644 index 0000000..79df7a6 --- /dev/null +++ b/tests/integration/traffic-gen/verify_db.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +""" +verify_db.py — Vérification exhaustive des données dans ClickHouse après génération de trafic + +Vérifie que toutes les couches de données attendues sont présentes : + - L3/L4 : TTL, MSS, window_size, df_bit + - TLS/L5 : ja4, sni, alpn, version + - L7 HTTP : method, path, status_code, duration_ms, header_order_signature + - Corrélation : correlated=1 (L3+L7), correlated=0 (L7 seul) + - Keep-alives : requêtes multiplexées sur une même connexion TCP + - HTTP/2 : tls_alpn='h2' ou h2_settings_count > 0 + - HTTP plain : lignes sans TLS (port 80) + +Usage: + python verify_db.py [--host clickhouse] [--port 9000] + [--db-logs ja4_logs] [--db-processing ja4_processing] + [--min-rows 10] +""" + +import argparse +import sys +import time + +# --------------------------------------------------------------------------- +# Client ClickHouse léger (HTTP interface port 8123) +# --------------------------------------------------------------------------- +import urllib.parse +import urllib.request +import json + + +def ch_query(host: str, port: int, query: str) -> list: + """Envoie une requête SELECT à ClickHouse via HTTP (port 8123) et retourne les lignes.""" + url = f"http://{host}:{port}/?query={urllib.parse.quote(query + ' FORMAT JSON')}" + try: + with urllib.request.urlopen(url, timeout=10) as resp: + data = json.loads(resp.read()) + return data.get("data", []) + except Exception as e: + return [{"__error__": str(e)}] + + +def ch_scalar(host: str, port: int, query: str) -> str: + """Retourne la première colonne de la première ligne.""" + rows = ch_query(host, port, query) + if not rows or "__error__" in rows[0]: + return str(rows[0].get("__error__", "?")) + return str(list(rows[0].values())[0]) + + +# --------------------------------------------------------------------------- +# Checks +# --------------------------------------------------------------------------- +CHECK_OK = "✅" +CHECK_FAIL = "❌" +CHECK_WARN = "⚠️ " + +results: list = [] + + +def check(name: str, query: str, host: str, port: int, expect_nonzero: bool = True, + min_val: int = 1, warn_only: bool = False) -> bool: + val = ch_scalar(host, port, query) + try: + n = int(float(val)) + except ValueError: + n = -1 + + ok = n >= min_val if expect_nonzero else n == 0 + icon = CHECK_OK if ok else (CHECK_WARN if warn_only else CHECK_FAIL) + results.append((icon, name, val)) + return ok + + +def run_checks(host: str, http_port: int, db_logs: str, db_processing: str, min_rows: int): + print(f"\n{'='*65}") + print(f" Vérification ClickHouse — {db_logs} / {db_processing}") + print(f"{'='*65}\n") + + # ------------------------------------------------------------------ + # 1. Tables brutes + # ------------------------------------------------------------------ + print("── 1. Tables brutes ─────────────────────────────────────────") + + check("http_logs_raw : lignes totales", + f"SELECT count() FROM {db_logs}.http_logs_raw", host, http_port, min_val=min_rows) + + check("http_logs : lignes après MV", + f"SELECT count() FROM {db_logs}.http_logs", host, http_port, min_val=min_rows) + + # ------------------------------------------------------------------ + # 2. Métadonnées L3/L4 + # ------------------------------------------------------------------ + print("\n── 2. Métadonnées L3/L4 ─────────────────────────────────────") + + check("ip_meta_ttl > 0 (TTL capturé)", + f"SELECT count() FROM {db_logs}.http_logs WHERE ip_meta_ttl > 0", + host, http_port, min_val=1) + + check("tcp_meta_mss > 0 (MSS capturé)", + f"SELECT count() FROM {db_logs}.http_logs WHERE tcp_meta_mss > 0", + host, http_port, min_val=1) + + check("tcp_meta_window_size > 0 (window_size capturé)", + f"SELECT count() FROM {db_logs}.http_logs WHERE tcp_meta_window_size > 0", + host, http_port, min_val=1) + + check("src_port renseigné (> 1000)", + f"SELECT count() FROM {db_logs}.http_logs WHERE src_port > 1000", + host, http_port, min_val=1) + + # ------------------------------------------------------------------ + # 3. TLS / JA4 + # ------------------------------------------------------------------ + print("\n── 3. TLS / JA4 ─────────────────────────────────────────────") + + check("ja4 renseigné (fingerprint TLS)", + f"SELECT count() FROM {db_logs}.http_logs WHERE ja4 != ''", + host, http_port, min_val=1) + + check("tls_sni renseigné (SNI extrait)", + f"SELECT count() FROM {db_logs}.http_logs WHERE tls_sni != ''", + host, http_port, min_val=1) + + check("tls_version TLSv1.2 présente", + f"SELECT count() FROM {db_logs}.http_logs WHERE tls_version = 'TLSv1.2'", + host, http_port, min_val=1, warn_only=True) + + check("tls_version TLSv1.3 présente", + f"SELECT count() FROM {db_logs}.http_logs WHERE tls_version = 'TLSv1.3'", + host, http_port, min_val=1) + + check("tls_alpn h2 (HTTP/2 négocié)", + f"SELECT count() FROM {db_logs}.http_logs WHERE tls_alpn = 'h2'", + host, http_port, min_val=1, warn_only=True) + + # ja4 format : t12d... ou t13d... selon la version TLS + check("ja4 TLS1.2 (commence par t12)", + f"SELECT count() FROM {db_logs}.http_logs WHERE startsWith(ja4, 't12')", + host, http_port, min_val=1, warn_only=True) + + check("ja4 TLS1.3 (commence par t13)", + f"SELECT count() FROM {db_logs}.http_logs WHERE startsWith(ja4, 't13')", + host, http_port, min_val=1) + + # ------------------------------------------------------------------ + # 4. Couche L7 HTTP + # ------------------------------------------------------------------ + print("\n── 4. Couche L7 HTTP ────────────────────────────────────────") + + for method in ("GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH"): + check(f"méthode {method} présente", + f"SELECT count() FROM {db_logs}.http_logs WHERE method = '{method}'", + host, http_port, min_val=1) + + check("path renseigné", + f"SELECT count() FROM {db_logs}.http_logs WHERE path != ''", + host, http_port, min_val=1) + + check("status_code 200 présent", + f"SELECT count() FROM {db_logs}.http_logs WHERE status_code = 200", + host, http_port, min_val=1) + + check("status_code 404 présent (requêtes de test)", + f"SELECT count() FROM {db_logs}.http_logs WHERE status_code IN (400,404,405,500)", + host, http_port, min_val=1, warn_only=True) + + check("duration_ms > 0 (latence mesurée)", + f"SELECT count() FROM {db_logs}.http_logs WHERE duration_ms > 0", + host, http_port, min_val=1) + + check("header_order_signature renseigné", + f"SELECT count() FROM {db_logs}.http_logs WHERE header_order_signature != ''", + host, http_port, min_val=1) + + # ------------------------------------------------------------------ + # 5. Corrélation L3+L7 + # ------------------------------------------------------------------ + print("\n── 5. Corrélation L3 ↔ L7 ───────────────────────────────────") + + check("correlated=1 (lignes L3+L7 fusionnées)", + f"SELECT count() FROM {db_logs}.http_logs WHERE correlated = 1", + host, http_port, min_val=1) + + check("correlated=0 (HTTP sans corr. réseau, port 80)", + f"SELECT count() FROM {db_logs}.http_logs WHERE correlated = 0", + host, http_port, min_val=1, warn_only=True) + + pct_corr = ch_scalar(host, http_port, + f"SELECT round(100*countIf(correlated=1)/count(), 1) FROM {db_logs}.http_logs") + results.append((CHECK_OK, f"taux corrélation ({pct_corr}%)", pct_corr)) + + # ------------------------------------------------------------------ + # 6. Keep-alives (TCP multiplexing) + # ------------------------------------------------------------------ + print("\n── 6. Keep-alives ────────────────────────────────────────────") + + check("keepalives > 1 (connexions multiplexées)", + f"SELECT count() FROM {db_logs}.http_logs WHERE keepalives > 1", + host, http_port, min_val=1, warn_only=True) + + max_ka = ch_scalar(host, http_port, + f"SELECT max(keepalives) FROM {db_logs}.http_logs") + results.append((CHECK_OK, f"max keepalives ({max_ka})", max_ka)) + + # ------------------------------------------------------------------ + # 7. Diversité des IPs sources + # ------------------------------------------------------------------ + print("\n── 7. Diversité sources ─────────────────────────────────────") + + check("IPs sources distinctes >= 5", + f"SELECT uniqExact(src_ip) FROM {db_logs}.http_logs", + host, http_port, min_val=5, warn_only=True) + + # ------------------------------------------------------------------ + # 8. Tables de traitement (ja4_processing) + # ------------------------------------------------------------------ + print("\n── 8. Tables processing ─────────────────────────────────────") + + for tbl in ("agg_host_ip_ja4_1h", "agg_header_fingerprint_1h", + "agg_ip_behavior_1h", "agg_request_timing_1h"): + check(f"{tbl} peuplée", + f"SELECT count() FROM {db_processing}.{tbl}", + host, http_port, min_val=1, warn_only=True) + + # ------------------------------------------------------------------ + # Résumé + # ------------------------------------------------------------------ + print(f"\n{'='*65}") + fails = 0 + warns = 0 + for icon, name, val in results: + print(f" {icon} {name:<50s} {val}") + if icon == CHECK_FAIL: + fails += 1 + elif icon == CHECK_WARN: + warns += 1 + + total = len(results) + passed = total - fails - warns + print(f"\n{'='*65}") + print(f" Résultat : {passed} OK | {warns} WARN | {fails} FAIL (total {total})") + print(f"{'='*65}\n") + + return fails == 0 + + +def main(): + parser = argparse.ArgumentParser(description="Vérification ClickHouse post-trafic") + parser.add_argument("--host", default="clickhouse", help="Hôte ClickHouse") + parser.add_argument("--port", type=int, default=8123, help="Port HTTP ClickHouse (8123)") + parser.add_argument("--db-logs", default="ja4_logs", help="Base de données logs") + parser.add_argument("--db-processing", default="ja4_processing", help="Base processing") + parser.add_argument("--min-rows", type=int, default=10, help="Minimum de lignes attendues") + parser.add_argument("--wait", type=int, default=5, + help="Attendre N secondes avant de vérifier (flush MV)") + args = parser.parse_args() + + if args.wait > 0: + print(f"[verify_db] Attente {args.wait}s pour le flush des Materialized Views...") + time.sleep(args.wait) + + ok = run_checks(args.host, args.port, args.db_logs, args.db_processing, args.min_rows) + sys.exit(0 if ok else 1) + + +if __name__ == "__main__": + main() diff --git a/tests/vm/README.md b/tests/vm/README.md new file mode 100644 index 0000000..79c2ec5 --- /dev/null +++ b/tests/vm/README.md @@ -0,0 +1,115 @@ +# Tests VM — eBPF sur kernel réel (Rocky Linux 9) + +## Pourquoi une VM ? + +Les tests Docker ne peuvent capturer que L3/L4 et TLS (via le hook TC). Les données +L7 HTTP (method, path, status_code, header_order_signature) nécessitent : + +| Fonctionnalité eBPF | Docker | VM | +|---|---|---| +| Hook TC (XDP) — L3/L4 + TLS | ✅ | ✅ | +| Uprobe SSL_read — L7 HTTP | ✅ attache | ✅ attache | +| Tracepoint accept4 — corrélation IP | ❌ pas de tracefs | ✅ | +| Kprobe accept4 — corrélation IP | ❌ pas de perf PMU | ✅ | + +Dans une VM, le kernel complet est disponible → **accept4 fonctionne** → +la corrélation IP est parfaite → les données L7 arrivent dans ClickHouse. + +## Prérequis (installation unique) + +```bash +# 1. Installer Vagrant + libvirt + KVM +sudo apt-get install -y vagrant libvirt-daemon-system libvirt-clients \ + qemu-kvm ruby-libvirt bridge-utils + +# 2. Plugin vagrant-libvirt +vagrant plugin install vagrant-libvirt + +# 3. Ajouter ton user aux groupes (nécessite une reconnexion) +sudo usermod -aG libvirt,kvm $USER +# → Se déconnecter et se reconnecter + +# 4. Vérifier que KVM fonctionne +virsh list --all +``` + +## Utilisation + +```bash +# Depuis la racine du projet : + +# Créer la VM (première fois, ~5-10 min — télécharge Rocky Linux 9) +make vm-up + +# Lancer le test nginx complet (L3/L4 + TLS + L7 HTTP) +make test-vm-nginx + +# Après modification des sources Go/C +make vm-rebuild-ja4ebpf # synchronise + recompile dans la VM +make test-vm-nginx # relancer les tests + +# Connexion SSH interactive +make vm-ssh + +# Détruire la VM (libère l'espace disque) +make vm-down +``` + +## Ce que teste `test-vm-nginx` + +1. **Build** — recompile ja4ebpf (BPF CO-RE + Go) depuis les sources +2. **ClickHouse** — démarre dans Docker (dans la VM) +3. **nginx** — démarre avec TLS + HTTP/2 +4. **ja4ebpf** — démarre avec uprobes + accept4 tracepoints +5. **Trafic** — HTTP/1.0, HTTP/1.1, HTTPS/1.1, HTTPS/2.0 +6. **Vérification DB** : + - `ip_meta_ttl`, `tcp_meta_mss`, `tcp_meta_window_size` ✅ + - `ja4`, `tls_sni` ✅ + - **`method`, `path`, `status_code`** ✅ (uniquement en VM) + - **`header_order_signature`** ✅ (uniquement en VM) + +## Différence avec les tests Docker + +| Check | Docker | VM | +|---|---|---| +| L3/L4 (TTL, MSS, window) | ✅ | ✅ | +| TLS fingerprint (JA4, SNI) | ✅ | ✅ | +| L7 méthode HTTP | ❌ | ✅ | +| L7 path HTTP | ❌ | ✅ | +| status_code | ❌ | ✅ | +| header_order_signature | ❌ | ✅ | + +## Architecture de la VM + +``` +VM Rocky Linux 9 (KVM) +├── nginx + libssl.so.3 ← serveur web cible +├── ja4ebpf ← agent eBPF (natif, pas Docker) +│ ├── TC hook (eth0) ← capture L3/L4 + TLS ClientHello +│ ├── Uprobe SSL_read ← capture HTTP déchiffré +│ └── Tracepoint accept4 ← corrélation fd→IP (disponible !) +└── ClickHouse (Docker) ← base de données +``` + +## Dépannage + +**vagrant up échoue : "Call to virConnectOpen failed"** +```bash +sudo systemctl start libvirtd +sudo usermod -aG libvirt $USER # puis se reconnecter +``` + +**Erreur "default pool not found"** +```bash +sudo virsh pool-define-as default dir --target /var/lib/libvirt/images +sudo virsh pool-build default +sudo virsh pool-start default +sudo virsh pool-autostart default +``` + +**ja4ebpf : "uprobe SSL_read" ne s'attache pas** +```bash +# Vérifier le chemin libssl dans la VM +vagrant ssh -- 'ls -la /usr/lib64/libssl*' +# Si différent de /usr/lib64/libssl.so.3, modifier /tmp/ja4ebpf.yml +``` diff --git a/tests/vm/Vagrantfile b/tests/vm/Vagrantfile new file mode 100644 index 0000000..a6243da --- /dev/null +++ b/tests/vm/Vagrantfile @@ -0,0 +1,66 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : +# ============================================================================= +# Vagrantfile — VM de test ja4ebpf sur Rocky Linux 9 +# +# Fournit un environnement kernel complet pour les tests eBPF : +# - tracefs / debugfs montés +# - perf_kprobe PMU disponible +# - uprobes fonctionnels avec accept4 kprobe/tracepoint +# +# Prérequis (host Ubuntu) : +# sudo apt-get install -y vagrant libvirt-daemon-system libvirt-clients \ +# qemu-kvm ruby-libvirt +# vagrant plugin install vagrant-libvirt +# sudo usermod -aG libvirt,kvm $USER # puis se reconnecter +# +# Utilisation : +# vagrant up # créer + provisionner la VM (première fois ~5 min) +# vagrant ssh # connexion SSH +# make test-vm-nginx # lancer les tests depuis le host +# vagrant destroy -f # détruire la VM +# ============================================================================= + +Vagrant.configure("2") do |config| + + # ── Box Rocky Linux 9 ────────────────────────────────────────────────────── + config.vm.box = "generic/rocky9" + + # ── Réseau : IP privée pour accès depuis le host ─────────────────────────── + config.vm.network "private_network", ip: "192.168.56.10" + + # ── Ressources VM ───────────────────────────────────────────────────────── + config.vm.provider :libvirt do |v| + v.cpus = 4 + v.memory = 4096 + v.nested = false # pas besoin de virtualisation imbriquée + # Pour VirtualBox (fallback) + end + + config.vm.provider :virtualbox do |v| + v.cpus = 4 + v.memory = 4096 + v.customize ["modifyvm", :id, "--nicpromisc2", "allow-all"] + end + + # ── Montage du projet ───────────────────────────────────────────────────── + # Le répertoire racine du projet est monté dans /ja4-platform + config.vm.synced_folder "../..", "/ja4-platform", + type: "rsync", + rsync__exclude: [".git/", "old/", "*.rpm", "services/*/target/"] + + # ── Provisioning ───────────────────────────────────────────────────────── + config.vm.provision "shell", path: "provision.sh" + + # ── Message post-démarrage ──────────────────────────────────────────────── + config.vm.post_up_message = <<~MSG + VM ja4ebpf prête ! + + Depuis le répertoire tests/vm/ : + vagrant ssh # connexion interactive + make -C ../.. test-vm-nginx # lancer le test nginx + make -C ../.. test-vm-matrix # lancer tous les tests + + IP de la VM : 192.168.56.10 + MSG +end diff --git a/tests/vm/provision.sh b/tests/vm/provision.sh new file mode 100755 index 0000000..5e65826 --- /dev/null +++ b/tests/vm/provision.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# ============================================================================= +# provision.sh — Provisionnement de la VM Rocky Linux 9 pour ja4ebpf +# +# Installe : +# - Toolchain eBPF : clang, llvm, bpftool, libbpf-devel, kernel-devel +# - Go 1.24 +# - Docker (pour ClickHouse) +# - nginx + openssl (serveur web cible) +# - Outils de test : python3, httpx +# ============================================================================= +set -euo pipefail + +log() { echo "[provision] $(date +%H:%M:%S) $*"; } + +# ── 1. Mise à jour système + dépôts ────────────────────────────────────────── +log "Mise à jour des dépôts..." +dnf install -y epel-release dnf-plugins-core +dnf config-manager --enable crb +dnf update -y --quiet + +# ── 2. Toolchain eBPF ──────────────────────────────────────────────────────── +log "Installation toolchain eBPF (clang, bpftool, libbpf)..." +dnf install -y \ + clang \ + llvm \ + bpftool \ + libbpf-devel \ + kernel-devel-$(uname -r) \ + make \ + git + +# ── 3. Go (version récente) ────────────────────────────────────────────────── +log "Installation de Go..." +GO_VERSION="1.24.3" +if ! command -v go &>/dev/null || [[ "$(go version 2>/dev/null | awk '{print $3}')" != "go${GO_VERSION}" ]]; then + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -o /tmp/go.tar.gz + rm -rf /usr/local/go + tar -C /usr/local -xzf /tmp/go.tar.gz + rm /tmp/go.tar.gz +fi +export PATH="/usr/local/go/bin:$PATH" +# Persister dans le PATH +cat > /etc/profile.d/go.sh << 'EOF' +export PATH="/usr/local/go/bin:$PATH" +export GOPATH="/home/vagrant/go" +EOF + +# ── 4. Docker (pour ClickHouse) ─────────────────────────────────────────────── +log "Installation de Docker..." +dnf config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo +dnf install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin +systemctl enable --now docker +usermod -aG docker vagrant +# Accès sans sudo pour vagrant +chmod 666 /var/run/docker.sock || true + +# ── 5. nginx + openssl ─────────────────────────────────────────────────────── +log "Installation de nginx..." +dnf install -y nginx openssl curl + +# ── 6. Python3 + outils de test ────────────────────────────────────────────── +log "Installation Python3 et outils de test..." +dnf install -y python3 python3-pip +pip3 install --quiet "httpx[http2]" requests + +# ── 7. Outils de debug eBPF ────────────────────────────────────────────────── +log "Installation outils de debug eBPF..." +dnf install -y perf strace + +# ── 8. Montage tracefs + debugfs au démarrage ───────────────────────────────── +log "Configuration des pseudo-systèmes de fichiers eBPF..." +cat > /etc/systemd/system/tracefs.mount << 'EOF' +[Unit] +Description=Mount tracefs +DefaultDependencies=no +After=local-fs.target + +[Mount] +What=tracefs +Where=/sys/kernel/tracing +Type=tracefs +Options=defaults + +[Install] +WantedBy=multi-user.target +EOF + +cat > /etc/systemd/system/debugfs.mount << 'EOF' +[Unit] +Description=Mount debugfs +DefaultDependencies=no +After=local-fs.target + +[Mount] +What=debugfs +Where=/sys/kernel/debug +Type=debugfs +Options=defaults + +[Install] +WantedBy=multi-user.target +EOF + +systemctl enable tracefs.mount debugfs.mount +mount -t tracefs tracefs /sys/kernel/tracing 2>/dev/null || true +mount -t debugfs debugfs /sys/kernel/debug 2>/dev/null || true + +# ── 9. Build ja4ebpf depuis les sources ────────────────────────────────────── +log "Build initial de ja4ebpf..." +export PATH="/usr/local/go/bin:$PATH" +cd /ja4-platform/services/ja4ebpf +GOWORK=off go generate ./internal/loader/ 2>&1 | tail -5 || log "go generate: erreur (normal si vmlinux.h absent)" +GOWORK=off CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -ldflags="-s -w" -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/ 2>&1 | tail -5 + +log "Provisionnement terminé !" +log "Lancer 'make test-vm-nginx' depuis le host pour démarrer les tests." diff --git a/tests/vm/run-tests-vm.sh b/tests/vm/run-tests-vm.sh new file mode 100755 index 0000000..4a1062b --- /dev/null +++ b/tests/vm/run-tests-vm.sh @@ -0,0 +1,309 @@ +#!/usr/bin/env bash +# ============================================================================= +# run-tests-vm.sh — Lance la stack de test complète dans la VM Rocky Linux 9 +# +# Ce script s'exécute DANS la VM (via vagrant ssh ou vagrant provision). +# Il ne peut pas tourner dans Docker — il requiert un vrai kernel pour eBPF. +# +# Usage (depuis le host) : +# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh nginx' +# vagrant ssh -- 'bash /ja4-platform/tests/vm/run-tests-vm.sh all' +# +# Variables d'environnement : +# STACK : stack à tester (nginx|apache|nginx-varnish|hitch-varnish|all) +# KEEP_RUNNING : si "true", ne pas arrêter la stack après le test (défaut: false) +# ============================================================================= +set -euo pipefail + +STACK="${1:-nginx}" +KEEP_RUNNING="${KEEP_RUNNING:-false}" +PROJECT="/ja4-platform" +RESULTS_DIR="/tmp/ja4-test-results" + +# ── Couleurs ───────────────────────────────────────────────────────────────── +GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; RESET='\033[0m' +BOLD='\033[1m' + +log() { echo -e "${BOLD}[$STACK]${RESET} $(date +%H:%M:%S) $*"; } +pass() { echo -e " ${GREEN}✅${RESET} $*"; ((PASS_COUNT++)) || true; } +fail() { echo -e " ${RED}❌${RESET} $*"; ((FAIL_COUNT++)) || true; } +warn() { echo -e " ${YELLOW}⚠️${RESET} $*"; ((WARN_COUNT++)) || true; } + +PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0 + +# ── Vérification prérequis ──────────────────────────────────────────────────── +check_prerequisites() { + log "Vérification des prérequis..." + + # eBPF capabilities + if [ ! -d /sys/kernel/tracing ]; then + fail "tracefs non monté — exécuter: sudo mount -t tracefs tracefs /sys/kernel/tracing" + exit 1 + fi + if [ ! -d /sys/kernel/debug ]; then + fail "debugfs non monté" + exit 1 + fi + + command -v ja4ebpf >/dev/null 2>&1 || { + log "Rebuild ja4ebpf..." + cd "$PROJECT/services/ja4ebpf" + export PATH="/usr/local/go/bin:$PATH" + GOWORK=off go generate ./internal/loader/ 2>&1 | tail -3 + GOWORK=off CGO_ENABLED=0 go build -o /usr/local/bin/ja4ebpf ./cmd/ja4ebpf/ + } + + command -v docker >/dev/null 2>&1 || { fail "Docker non installé"; exit 1; } + command -v nginx >/dev/null 2>&1 || { fail "nginx non installé"; exit 1; } + pass "Prérequis OK" +} + +# ── Démarrage ClickHouse ────────────────────────────────────────────────────── +start_clickhouse() { + log "Démarrage ClickHouse..." + + docker rm -f ja4-clickhouse 2>/dev/null || true + + docker run -d --name ja4-clickhouse \ + -p 8123:8123 -p 9000:9000 \ + -e CLICKHOUSE_DB=ja4_processing \ + -e CLICKHOUSE_USER=default \ + -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \ + -v "$PROJECT/tests/integration/platform/clickhouse-init.sh:/docker-entrypoint-initdb.d/00_init.sh" \ + $(for f in "$PROJECT/shared/clickhouse/"*.sql; do + echo "-v $f:/initdb-src/$(basename $f):ro" + done) \ + clickhouse/clickhouse-server:24.8 2>&1 | tail -1 + + # Attendre que ClickHouse soit prêt + log "Attente ClickHouse (max 120s)..." + for i in $(seq 1 60); do + if curl -sf "http://localhost:8123/ping" >/dev/null 2>&1; then + pass "ClickHouse prêt" + return 0 + fi + sleep 2 + done + fail "ClickHouse timeout"; exit 1 +} + +# ── Configuration nginx ──────────────────────────────────────────────────────── +setup_nginx() { + log "Configuration nginx avec TLS..." + + # Certificat auto-signé + openssl req -x509 -nodes -days 365 \ + -subj "/CN=platform.test" \ + -newkey rsa:2048 \ + -keyout /etc/pki/tls/private/nginx.key \ + -out /etc/pki/tls/certs/nginx.crt 2>/dev/null + + # Copier la configuration de test + cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf + + # Créer les fichiers de test + mkdir -p /var/www/html + echo '{"status":"ok","stack":"nginx-vm"}' > /var/www/html/health + for p in data api/users api/data/test; do + mkdir -p "/var/www/html/$(dirname $p)" + echo '{"ok":true}' > "/var/www/html/$p" + done + + nginx -t && nginx + + # Attendre nginx + for i in $(seq 1 20); do + curl -sf http://localhost/health >/dev/null 2>&1 && break + sleep 0.5 + done + pass "nginx démarré" +} + +# ── Démarrage ja4ebpf ───────────────────────────────────────────────────────── +start_ja4ebpf() { + log "Démarrage ja4ebpf..." + + pkill ja4ebpf 2>/dev/null || true + sleep 1 + + # Créer la config + cat > /tmp/ja4ebpf.yml << 'EOF' +interface: eth0 +ssl_lib_path: "/usr/lib64/libssl.so.3" +clickhouse: + dsn: "clickhouse://default:@localhost:9000/ja4_logs" + batch_size: 100 + flush_secs: 1 +correlation: + timeout_ms: 500 + slowloris_ms: 10000 +log: + level: "info" + format: "json" +EOF + + # Lancer avec les capabilities nécessaires + # Dans la VM (root), on peut lancer directement + ja4ebpf -config /tmp/ja4ebpf.yml > /tmp/ja4ebpf.log 2>&1 & + JA4EBPF_PID=$! + + sleep 3 + if ! kill -0 "$JA4EBPF_PID" 2>/dev/null; then + fail "ja4ebpf s'est arrêté immédiatement" + cat /tmp/ja4ebpf.log | tail -10 + return 1 + fi + + log "ja4ebpf démarré (PID $JA4EBPF_PID)" + + # Vérifier les uprobes dans tracefs + sleep 1 + if grep -q "ssl" /sys/kernel/tracing/uprobe_events 2>/dev/null; then + pass "Uprobes SSL attachés dans tracefs" + else + warn "Uprobes non visibles dans tracefs (peuvent être actifs quand même)" + fi + + # Vérifier accept4 tracepoint + if grep -q "accept4" /sys/kernel/tracing/events/syscalls 2>/dev/null; then + pass "Tracepoints accept4 disponibles" + else + warn "Tracepoints accept4 non trouvés" + fi +} + +# ── Génération de trafic ─────────────────────────────────────────────────────── +generate_traffic() { + log "Génération du trafic (HTTP/1.0 + HTTP/1.1 + HTTP/2)..." + + # Trafic HTTP/1.1 (HTTP) + for path in / /health /data /api/users; do + curl -sf "http://localhost$path" >/dev/null 2>&1 || true + curl -sf -X POST "http://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true + done + + # Trafic HTTPS/1.1 + for path in / /health /data /api/users; do + curl -sf -k "https://localhost$path" >/dev/null 2>&1 || true + curl -sf -k -X POST "https://localhost/api/data" -d '{"test":1}' >/dev/null 2>&1 || true + curl -sf -k -X PUT "https://localhost/data" >/dev/null 2>&1 || true + curl -sf -k -X DELETE "https://localhost/data/1" >/dev/null 2>&1 || true + curl -sf -k -X HEAD "https://localhost$path" >/dev/null 2>&1 || true + done + + # Trafic HTTP/2 + if command -v python3 >/dev/null 2>&1 && python3 -c "import httpx" 2>/dev/null; then + python3 << 'PYEOF' +import httpx, ssl, warnings +warnings.filterwarnings("ignore") +ctx = ssl.create_default_context() +ctx.check_hostname = False +ctx.verify_mode = ssl.CERT_NONE +with httpx.Client(http2=True, verify=False) as client: + for path in ["/", "/health", "/data"]: + try: client.get(f"https://localhost{path}") + except: pass + try: client.post("https://localhost/api/data", json={"test": "h2"}) + except: pass +PYEOF + pass "Trafic HTTP/2 généré" + fi + + # Attendre le flush ja4ebpf → ClickHouse + log "Attente flush ja4ebpf (15s)..." + sleep 15 + pass "Trafic généré" +} + +# ── Vérification ClickHouse ──────────────────────────────────────────────────── +verify_db() { + log "Vérification des données dans ClickHouse..." + + ch_query() { + curl -sf "http://localhost:8123/" \ + --data-urlencode "query=$1" \ + --data-urlencode "database=ja4_logs" \ + -o /dev/null -w '%{http_code}' 2>/dev/null || echo "0" + } + + ch_val() { + curl -sf "http://localhost:8123/?database=ja4_logs" \ + --data-urlencode "query=$1" 2>/dev/null | tr -d ' \n' || echo "0" + } + + # L3/L4 + ttl=$(ch_val "SELECT count() FROM http_logs WHERE ip_meta_ttl > 0") + [ "${ttl:-0}" -gt 0 ] && pass "L3/L4 TTL capturé ($ttl lignes)" || fail "L3/L4 TTL absent" + + mss=$(ch_val "SELECT count() FROM http_logs WHERE tcp_meta_mss > 0") + [ "${mss:-0}" -gt 0 ] && pass "TCP MSS capturé ($mss lignes)" || fail "TCP MSS absent" + + # TLS + ja4=$(ch_val "SELECT count() FROM http_logs WHERE ja4 != ''") + [ "${ja4:-0}" -gt 0 ] && pass "JA4 fingerprint capturé ($ja4 lignes)" || fail "JA4 absent" + + sni=$(ch_val "SELECT count() FROM http_logs WHERE tls_sni != ''") + [ "${sni:-0}" -gt 0 ] && pass "TLS SNI capturé ($sni lignes)" || warn "TLS SNI absent" + + # L7 HTTP — c'est ici que ça devrait marcher dans la VM + method=$(ch_val "SELECT count() FROM http_logs WHERE method != ''") + [ "${method:-0}" -gt 0 ] && pass "L7 méthodes HTTP capturées ($method lignes)" \ + || fail "L7 méthodes HTTP ABSENT — uprobe SSL_read ne fonctionne pas" + + path=$(ch_val "SELECT count() FROM http_logs WHERE path != ''") + [ "${path:-0}" -gt 0 ] && pass "L7 path HTTP capturé ($path lignes)" || fail "L7 path absent" + + status=$(ch_val "SELECT count() FROM http_logs WHERE status_code > 0") + [ "${status:-0}" -gt 0 ] && pass "status_code capturé ($status lignes)" || warn "status_code absent" + + sig=$(ch_val "SELECT count() FROM http_logs WHERE header_order_signature != ''") + [ "${sig:-0}" -gt 0 ] && pass "header_order_signature capturé ($sig lignes)" || warn "header_order_sig absent" + + # Méthodes HTTP distinctes + methods=$(ch_val "SELECT groupArray(method) FROM (SELECT DISTINCT method FROM http_logs WHERE method != '')") + log "Méthodes HTTP vues : $methods" + + # Lignes totales + total=$(ch_val "SELECT count() FROM http_logs") + pass "Total lignes http_logs : $total" +} + +# ── Nettoyage ───────────────────────────────────────────────────────────────── +cleanup() { + if [ "$KEEP_RUNNING" != "true" ]; then + log "Nettoyage..." + pkill ja4ebpf 2>/dev/null || true + nginx -s stop 2>/dev/null || true + docker rm -f ja4-clickhouse 2>/dev/null || true + fi +} +trap cleanup EXIT + +# ── Main ────────────────────────────────────────────────────────────────────── +mkdir -p "$RESULTS_DIR" + +echo "" +echo "╔══════════════════════════════════════════╗" +echo "║ ja4ebpf VM Test Suite — Rocky Linux 9 ║" +echo "╚══════════════════════════════════════════╝" +echo "" + +check_prerequisites +start_clickhouse +setup_nginx +start_ja4ebpf +generate_traffic +verify_db + +echo "" +echo "════════════════════════════════════════════" +echo -e " ${GREEN}OK${RESET}: $PASS_COUNT ${YELLOW}WARN${RESET}: $WARN_COUNT ${RED}FAIL${RESET}: $FAIL_COUNT" +if [ "$FAIL_COUNT" -eq 0 ]; then + echo -e " ${GREEN}${BOLD}Tous les tests réussis !${RESET}" + exit 0 +else + echo -e " ${RED}${BOLD}$FAIL_COUNT tests échoués.${RESET}" + echo "Logs ja4ebpf :" + tail -20 /tmp/ja4ebpf.log 2>/dev/null || true + exit 1 +fi