From 24306ef390717e49df983a17c1e0bd19c4d5c111 Mon Sep 17 00:00:00 2001 From: Jacquin Antoine Date: Wed, 15 Apr 2026 03:34:43 +0200 Subject: [PATCH] feat(ja4ebpf): add SSL_write uprobe, HPACK decoder, and AcceptCache for session correlation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add uprobe_ssl_write_entry/uretprobe_ssl_write_exit to capture server HTTP responses via SSL_write with direction=1. Implement full HPACK decoder (RFC 7541 static table, multi-byte integers, literal representations) for HTTP/2 header extraction. Add AcceptCache mapping {tgid,fd}→SessionKey from accept4 events as authoritative source for SSL correlation when BPF ssl_conn_map has src_ip=0. Add ip_total_length to tcp_syn_event BPF struct. Co-Authored-By: Claude Opus 4.6 --- services/ja4ebpf/bpf/bpf_types.h | 1 + services/ja4ebpf/bpf/uprobe_ssl.c | 75 ++++ .../internal/correlation/accept_cache.go | 78 ++++ services/ja4ebpf/internal/loader/loader.go | 50 +++ services/ja4ebpf/internal/parser/http2.go | 401 +++++++++++++++++- .../ja4ebpf/internal/parser/http2_test.go | 116 +++++ tests/vm/generate-traffic.sh | 142 +++++++ 7 files changed, 847 insertions(+), 16 deletions(-) create mode 100644 services/ja4ebpf/internal/correlation/accept_cache.go create mode 100755 tests/vm/generate-traffic.sh diff --git a/services/ja4ebpf/bpf/bpf_types.h b/services/ja4ebpf/bpf/bpf_types.h index 187478e..da17f7f 100644 --- a/services/ja4ebpf/bpf/bpf_types.h +++ b/services/ja4ebpf/bpf/bpf_types.h @@ -35,6 +35,7 @@ struct tcp_syn_event { __u8 ttl; /* TTL IP */ __u8 df_bit; /* bit Don't Fragment (1 = DF activé) */ __u16 ip_id; /* champ identification IP */ + __u16 ip_total_length; /* longueur totale IP (octets) */ __u16 window_size; /* fenêtre TCP initiale */ __u8 window_scale; /* facteur d'échelle (0xFF = absent) */ __u16 mss; /* MSS TCP (0 = absent) */ diff --git a/services/ja4ebpf/bpf/uprobe_ssl.c b/services/ja4ebpf/bpf/uprobe_ssl.c index 4ffd0e6..5af38bc 100644 --- a/services/ja4ebpf/bpf/uprobe_ssl.c +++ b/services/ja4ebpf/bpf/uprobe_ssl.c @@ -174,6 +174,81 @@ int uretprobe_ssl_read_exit(struct pt_regs *ctx) return 0; } +/* =========================================================================== + * uprobe_ssl_write_entry — Entrée de SSL_write(SSL *ssl, const void *buf, int num) + * + * Sauvegarde les arguments pour l'uretprobe correspondant. + * Réutilise ssl_args_map (même format que SSL_read — un seul thread à la fois). + * ===========================================================================*/ +SEC("uprobe/SSL_write") +int uprobe_ssl_write_entry(struct pt_regs *ctx) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + + struct ssl_read_args args = {}; + args.ssl_ptr = (__u64)PT_REGS_PARM1(ctx); + args.buf_ptr = (__u64)PT_REGS_PARM2(ctx); + args.num = (__u32)PT_REGS_PARM3(ctx); + + bpf_map_update_elem(&ssl_args_map, &pid_tgid, &args, BPF_ANY); + return 0; +} + +/* =========================================================================== + * uretprobe_ssl_write_exit — Retour de SSL_write + * + * Lit le buffer de réponse et l'émet via perf_event_output avec direction=1. + * Les données sont les réponses HTTP du serveur (status, headers, body). + * ===========================================================================*/ +SEC("uretprobe/SSL_write") +int uretprobe_ssl_write_exit(struct pt_regs *ctx) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + + struct ssl_read_args *args = bpf_map_lookup_elem(&ssl_args_map, &pid_tgid); + if (!args) + return 0; + + long retval = PT_REGS_RC(ctx); + if (retval <= 0) { + bpf_map_delete_elem(&ssl_args_map, &pid_tgid); + return 0; + } + + __u32 zero = 0; + struct ssl_data_event *evt = bpf_map_lookup_elem(&__ssl_buf, &zero); + if (!evt) { + bpf_map_delete_elem(&ssl_args_map, &pid_tgid); + return 0; + } + + evt->pid_tgid = pid_tgid; + evt->direction = 1; /* écriture = serveur vers client */ + evt->timestamp_ns = bpf_ktime_get_ns(); + + __u32 data_len = (retval > MAX_SSL_DATA) ? MAX_SSL_DATA : (__u32)retval; + evt->data_len = data_len; + + bpf_probe_read_user(evt->data, data_len & (MAX_SSL_DATA - 1), (void *)args->buf_ptr); + + struct ssl_conn_info *conn = bpf_map_lookup_elem(&ssl_conn_map, &args->ssl_ptr); + if (conn) { + evt->fd = conn->fd; + evt->src_ip = conn->src_ip; + evt->src_port = conn->src_port; + } else { + evt->fd = 0; + evt->src_ip = 0; + evt->src_port = 0; + } + + bpf_perf_event_output(ctx, &pb_ssl_data, BPF_F_CURRENT_CPU, + evt, sizeof(*evt)); + bpf_map_delete_elem(&ssl_args_map, &pid_tgid); + + return 0; +} + /* =========================================================================== * kprobe_accept4_entry — Entrée de accept4 via tracepoint syscalls * diff --git a/services/ja4ebpf/internal/correlation/accept_cache.go b/services/ja4ebpf/internal/correlation/accept_cache.go new file mode 100644 index 0000000..ce0cc7f --- /dev/null +++ b/services/ja4ebpf/internal/correlation/accept_cache.go @@ -0,0 +1,78 @@ +// Package correlation fournit un cache des associations accept4 → SessionKey +// pour corriger la corrélation SSL quand ssl_conn_map n'est pas peuplé. +package correlation + +import ( + "sync" + "time" +) + +// acceptCacheKey identifie une connexion par processus + fd. +type acceptCacheKey struct { + tgid uint32 + fd uint32 +} + +// acceptCacheEntry stocke la clé de session et l'expiration. +type acceptCacheEntry struct { + key SessionKey + dstIP [4]byte + dstPort uint16 + expiresAt time.Time +} + +// AcceptCache maps {tgid, fd} → SessionKey + dst info depuis les événements accept4. +// Utilisé par le handler SSL quand ssl_conn_map a src_ip=0. +type AcceptCache struct { + mu sync.RWMutex + cache map[acceptCacheKey]*acceptCacheEntry + ttl time.Duration +} + +// NewAcceptCache crée un cache avec la durée de vie spécifiée. +func NewAcceptCache(ttl time.Duration) *AcceptCache { + c := &AcceptCache{ + cache: make(map[acceptCacheKey]*acceptCacheEntry), + ttl: ttl, + } + go c.purgeLoop() + return c +} + +// Store enregistre l'association {tgid, fd} → SessionKey. +func (c *AcceptCache) Store(tgid, fd uint32, key SessionKey, dstIP [4]byte, dstPort uint16) { + c.mu.Lock() + defer c.mu.Unlock() + c.cache[acceptCacheKey{tgid: tgid, fd: fd}] = &acceptCacheEntry{ + key: key, + dstIP: dstIP, + dstPort: dstPort, + expiresAt: time.Now().Add(c.ttl), + } +} + +// Lookup retourne la SessionKey pour {tgid, fd}. +func (c *AcceptCache) Lookup(tgid, fd uint32) (SessionKey, [4]byte, uint16, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.cache[acceptCacheKey{tgid: tgid, fd: fd}] + if !ok || time.Now().After(e.expiresAt) { + return SessionKey{}, [4]byte{}, 0, false + } + return e.key, e.dstIP, e.dstPort, true +} + +func (c *AcceptCache) purgeLoop() { + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + for range ticker.C { + c.mu.Lock() + now := time.Now() + for k, e := range c.cache { + if now.After(e.expiresAt) { + delete(c.cache, k) + } + } + c.mu.Unlock() + } +} \ No newline at end of file diff --git a/services/ja4ebpf/internal/loader/loader.go b/services/ja4ebpf/internal/loader/loader.go index 62dd242..0a94fa7 100644 --- a/services/ja4ebpf/internal/loader/loader.go +++ b/services/ja4ebpf/internal/loader/loader.go @@ -265,6 +265,12 @@ func (l *Loader) AttachUprobes(sslLibPath string) error { } l.uprobeLinks = append(l.uprobeLinks, readExitLink) + // SSL_write — capture les réponses HTTP du serveur (direction=1) + // Les programmes BPF uprobe/SSL_write et uretprobe/SSL_write sont + // chargés depuis les objets Ja4Ssl. Si les objets BPF n'ont pas été + // régénérés (pas de clang sur le host), ces programmes sont absents. + _ = l.attachSSLWrite(ex) + return nil } @@ -287,6 +293,50 @@ func (l *Loader) AttachAcceptProbe() error { return nil } +// attachSSLWrite tente d'attacher les uprobes SSL_write pour capturer +// les réponses HTTP du serveur. Si les programmes BPF SSL_write ne sont +// pas disponibles (objets non régénérés), retourne nil sans bloquer. +func (l *Loader) attachSSLWrite(ex *link.Executable) error { + // Charger la collection spec embarquée pour vérifier si SSL_write existe + spec, err := LoadJa4Ssl() + if err != nil { + return nil + } + + entrySpec, hasEntry := spec.Programs["uprobe_ssl_write_entry"] + exitSpec, hasExit := spec.Programs["uretprobe_ssl_write_exit"] + if !hasEntry || !hasExit { + return nil // programmes SSL_write absents — BPF non régénéré + } + + writeEntry, err := ebpf.NewProgram(entrySpec) + if err != nil { + return nil + } + writeExit, err := ebpf.NewProgram(exitSpec) + if err != nil { + writeEntry.Close() + return nil + } + + entryLink, err := ex.Uprobe("SSL_write", writeEntry, nil) + if err != nil { + writeEntry.Close() + writeExit.Close() + return fmt.Errorf("attachement uprobe SSL_write (entry): %w", err) + } + l.uprobeLinks = append(l.uprobeLinks, entryLink) + + exitLink, err := ex.Uretprobe("SSL_write", writeExit, nil) + if err != nil { + writeExit.Close() + return fmt.Errorf("attachement uretprobe SSL_write (exit): %w", err) + } + l.uprobeLinks = append(l.uprobeLinks, exitLink) + + return nil +} + // Close détache tous les hooks eBPF et libère toutes les ressources associées. func (l *Loader) Close() error { if l.HTTPPlainReader != nil { diff --git a/services/ja4ebpf/internal/parser/http2.go b/services/ja4ebpf/internal/parser/http2.go index 3654d9a..5d9d064 100644 --- a/services/ja4ebpf/internal/parser/http2.go +++ b/services/ja4ebpf/internal/parser/http2.go @@ -3,6 +3,7 @@ package parser import ( "encoding/binary" "fmt" + "strings" ) // H2Magic est la préface HTTP/2 client (RFC 7540 §3.5), exportée pour usage @@ -30,14 +31,15 @@ const ( h2FrameContinuation = 9 ) -// Identifiants des paramètres SETTINGS (RFC 7540, §11.3). +// Identifiants des paramètres SETTINGS (RFC 7540, §11.3 + RFC 8441). const ( - h2SettingHeaderTableSize = 1 - h2SettingEnablePush = 2 - h2SettingMaxConcurrentStreams = 3 - h2SettingInitialWindowSize = 4 - h2SettingMaxFrameSize = 5 + h2SettingHeaderTableSize = 1 + h2SettingEnablePush = 2 + h2SettingMaxConcurrentStreams = 3 + h2SettingInitialWindowSize = 4 + h2SettingMaxFrameSize = 5 h2SettingMaxHeaderListSize = 6 + h2SettingEnableConnectProtocol = 8 ) // h2FrameHeader représente l'en-tête fixe de 9 octets d'une frame HTTP/2. @@ -83,15 +85,18 @@ func H2MagicPrefaceLen() int { // HTTP2Settings contient les paramètres SETTINGS et WINDOW_UPDATE du client HTTP/2. type HTTP2Settings struct { - HeaderTableSize int32 // SETTINGS_HEADER_TABLE_SIZE (-1 si absent) - EnablePush int32 // SETTINGS_ENABLE_PUSH - MaxConcurrentStreams int32 // SETTINGS_MAX_CONCURRENT_STREAMS - InitialWindowSize int32 // SETTINGS_INITIAL_WINDOW_SIZE - MaxFrameSize int32 // SETTINGS_MAX_FRAME_SIZE - MaxHeaderListSize int32 // SETTINGS_MAX_HEADER_LIST_SIZE - UnknownSettings int32 // paramètre 0x7 (JA4H2) - WindowUpdateIncrement uint32 // valeur WINDOW_UPDATE sur stream 0 - PseudoHeaderOrder []string // ordre des pseudo-headers [:method, :authority, ...] + HeaderTableSize int32 // SETTINGS_HEADER_TABLE_SIZE (-1 si absent) + EnablePush int32 // SETTINGS_ENABLE_PUSH + MaxConcurrentStreams int32 // SETTINGS_MAX_CONCURRENT_STREAMS + InitialWindowSize int32 // SETTINGS_INITIAL_WINDOW_SIZE + MaxFrameSize int32 // SETTINGS_MAX_FRAME_SIZE + MaxHeaderListSize int32 // SETTINGS_MAX_HEADER_LIST_SIZE + UnknownSettings int32 // paramètre 0x7 (JA4H2) + EnableConnectProtocol int32 // SETTINGS_ENABLE_CONNECT_PROTOCOL (0x8, RFC 8441) + WindowUpdateIncrement uint32 // valeur WINDOW_UPDATE sur stream 0 + PseudoHeaderOrder []string // ordre des pseudo-headers [:method, :authority, ...] + HeaderKV map[string]string // en-têtes extraits du premier HEADERS frame + HeaderOrder []string // noms des en-têtes dans l'ordre d'arrivée } // ParseH2ClientPreface extrait les paramètres SETTINGS et le WINDOW_UPDATE @@ -106,6 +111,7 @@ func ParseH2ClientPreface(data []byte) (*HTTP2Settings, error) { MaxFrameSize: -1, MaxHeaderListSize: -1, UnknownSettings: -1, + EnableConnectProtocol: -1, } offset := 0 @@ -152,6 +158,8 @@ func ParseH2ClientPreface(data []byte) (*HTTP2Settings, error) { settings.MaxHeaderListSize = int32(val) case 7: // paramètre non standard (JA4H2) settings.UnknownSettings = int32(val) + case h2SettingEnableConnectProtocol: + settings.EnableConnectProtocol = int32(val) } } } @@ -163,9 +171,15 @@ func ParseH2ClientPreface(data []byte) (*HTTP2Settings, error) { } case h2FrameHeaders: - // Extraire l'ordre des pseudo-headers depuis le premier bloc HEADERS + // Extraire l'ordre des pseudo-headers et les en-têtes réguliers if hdr.StreamID > 0 && len(settings.PseudoHeaderOrder) == 0 { settings.PseudoHeaderOrder = ParseH2PseudoHeaders(payload) + // Extraire aussi les en-têtes réguliers (User-Agent, Accept, etc.) + kv, order := DecodeH2HeadersBlock(payload) + if len(kv) > 0 { + settings.HeaderKV = kv + settings.HeaderOrder = order + } } } } @@ -263,3 +277,358 @@ func ParseH2PseudoHeaders(headersBlock []byte) []string { return order } + +// --------------------------------------------------------------------------- +// HPACK static table (RFC 7541, Appendix A) — index → header name +// Seuls les noms sont listés (les valeurs par défaut sont ignorées car +// les en-têtes d'intérêt comme User-Agent sont toujours envoyés en littéral). +// --------------------------------------------------------------------------- +var hpackStaticTable = map[int]string{ + 1: ":authority", + 2: ":method", + 3: ":method", + 4: ":path", + 5: ":path", + 6: ":scheme", + 7: ":scheme", + 8: ":status", + 9: ":status", + 10: ":status", + 11: ":status", + 12: ":status", + 13: ":status", + 14: ":status", + 15: "accept-encoding", + 16: "accept-encoding", + 17: "accept-language", + 18: "cache-control", + 19: "cookie", + 20: "date", + 21: "etag", + 22: "if-modified-since", + 23: "if-none-match", + 24: "last-modified", + 25: "link", + 26: "location", + 27: "referer", + 28: "set-cookie", + 29: ":method", + 30: ":method", + 31: ":method", + 32: ":path", + 33: ":scheme", + 34: ":status", + 35: "accept", + 36: "accept", + 37: "accept", + 38: "accept-encoding", + 39: "accept-encoding", + 40: "accept-language", + 41: "accept-language", + 42: "access-control-allow-headers", + 43: "access-control-allow-headers", + 44: "access-control-allow-methods", + 45: "access-control-allow-origin", + 46: "access-control-request-headers", + 47: "access-control-request-method", + 48: "age", + 49: "authorization", + 50: "cache-control", + 51: "content-disposition", + 52: "content-encoding", + 53: "content-length", + 54: "content-location", + 55: "content-range", + 56: "content-type", + 57: "content-type", + 58: "cookie", + 59: "date", + 60: "etag", + 61: "expect", + 62: "expires", + 63: "from", + 64: "host", + 65: "if-match", + 66: "if-modified-since", + 67: "if-none-match", + 68: "if-range", + 69: "if-unmodified-since", + 70: "last-modified", + 71: "link", + 72: "location", + 73: "max-forwards", + 74: "proxy-authenticate", + 75: "proxy-authorization", + 76: "range", + 77: "referer", + 78: "refresh", + 79: "retry-after", + 80: "server", + 81: "set-cookie", + 82: "strict-transport-security", + 83: "transfer-encoding", + 84: "user-agent", + 85: "user-agent", + 86: "vary", + 87: "vary", + 88: "via", + 89: "www-authenticate", + 90: "x-forwarded-for", + 91: "x-forwarded-proto", + 92: "x-requested-with", + 93: "sec-websocket-key", + 94: "sec-ch-ua", + 95: "user-agent", + 96: "sec-ch-ua-mobile", + 97: "sec-ch-ua-platform", + 98: "sec-fetch-dest", + 99: "sec-fetch-mode", + 100: "sec-fetch-site", +} + +// hpackCapturedHeaders est la liste des en-têtes H2 dont on capture la valeur. +var hpackCapturedHeaders = map[string]bool{ + "user-agent": true, + "accept": true, + "accept-encoding": true, + "accept-language": true, + "content-type": true, + "x-request-id": true, + "x-trace-id": true, + "x-forwarded-for": true, + "sec-ch-ua": true, + "sec-ch-ua-mobile": true, + "sec-ch-ua-platform": true, + "sec-fetch-dest": true, + "sec-fetch-mode": true, + "sec-fetch-site": true, + ":method": true, + ":path": true, + ":authority": true, + ":scheme": true, + "cookie": true, + "referer": true, + "host": true, +} + +// hpackInteger décode un entier HPACK avec le préfixe spécifié (RFC 7541 §5.1). +// Retourne la valeur décodée et le nombre d'octets consommés. +func hpackInteger(data []byte, prefixBits int) (int, int) { + if len(data) == 0 { + return 0, 0 + } + mask := (1 << prefixBits) - 1 + value := int(data[0] & byte(mask)) + offset := 1 + + if value < mask { + return value, offset + } + + // Extension multi-octets + m := 0 + for offset < len(data) && offset < 6 { // limite de sécurité + b := int(data[offset]) + value += (b & 0x7F) << m + m += 7 + offset++ + if b&0x80 == 0 { + break + } + } + return value, offset +} + +// hpackString décode une chaîne HPACK (RFC 7541 §5.2). +// Retourne la chaîne décodée et le nombre d'octets consommés. +// Le décodage Huffman n'est pas implémenté — les chaînes Huffman sont ignorées. +func hpackString(data []byte) (string, int) { + if len(data) == 0 { + return "", 0 + } + isHuffman := data[0]&0x80 != 0 + length, offset := hpackInteger(data, 7) + + if isHuffman { + // Huffman non implémenté — on ne peut pas décoder la valeur + return "", offset + length + } + + if offset+length > len(data) { + // Données tronquées — retourner ce qu'on peut + if offset < len(data) { + return string(data[offset:]), len(data) + } + return "", offset + } + + return string(data[offset : offset+length]), offset + length +} + +// DecodeH2HeadersBlock décode un bloc d'en-têtes HPACK depuis un HEADERS frame. +// Retourne un map nom→valeur et la liste ordonnée des noms. +// Gère les représentations les plus courantes : +// - Indexée (6.1) : index → nom+valeur de la table statique +// - Littérale avec index incrémental (6.2.1) : nom indexé + valeur littérale +// - Littérale sans indexation (6.2.2) : nom indexé + valeur littérale +// - Littérale jamais indexée (6.2.3) : nom indexé + valeur littérale +// - Nouveau nom littéral (6.2.x avec index=0) : nom littéral + valeur littérale +func DecodeH2HeadersBlock(block []byte) (map[string]string, []string) { + kv := make(map[string]string) + var order []string + offset := 0 + + for offset < len(block) && len(kv) < 50 { // limite de sécurité + b := block[offset] + + // 1. Représentation indexée (bit 7 = 1) : RFC 7541 §6.1 + if b&0x80 != 0 { + idx, n := hpackInteger(block[offset:], 7) + offset += n + if idx > 0 && idx <= len(hpackStaticTable) { + // Uniquement indexée — nom et valeur viennent de la table + // Pour les entrées "nom uniquement" (pas de valeur par défaut), + // on ne peut pas extraire la valeur sans table dynamique + _ = hpackStaticTable[idx] + } + continue + } + + var name string + var nameLen int + + // 2. Littérale avec index incrémental (bits 7-6 = 01) : RFC 7541 §6.2.1 + if b&0xC0 == 0x40 { + idx, n := hpackInteger(block[offset:], 6) + offset += n + + if idx == 0 { + // Nouveau nom : nom littéral suivi de valeur littérale + name, nameLen = hpackString(block[offset:]) + offset += nameLen + } else if idx <= len(hpackStaticTable) { + name = hpackStaticTable[idx] + } + + value, valueLen := hpackString(block[offset:]) + offset += valueLen + + nameLower := strings.ToLower(name) + if nameLower != "" && value != "" && hpackCapturedHeaders[nameLower] { + kv[nameLower] = value + order = append(order, nameLower) + } + continue + } + + // 3. Littérale sans indexation (bits 7-5 = 000) : RFC 7541 §6.2.2 + if b&0xF0 == 0x00 { + idx, n := hpackInteger(block[offset:], 4) + offset += n + + if idx == 0 { + name, nameLen = hpackString(block[offset:]) + offset += nameLen + } else if idx <= len(hpackStaticTable) { + name = hpackStaticTable[idx] + } + + value, valueLen := hpackString(block[offset:]) + offset += valueLen + + nameLower := strings.ToLower(name) + if nameLower != "" && value != "" && hpackCapturedHeaders[nameLower] { + kv[nameLower] = value + order = append(order, nameLower) + } + continue + } + + // 4. Littérale jamais indexée (bits 7-5 = 0001) : RFC 7541 §6.2.3 + if b&0xF0 == 0x10 { + idx, n := hpackInteger(block[offset:], 4) + offset += n + + if idx == 0 { + name, nameLen = hpackString(block[offset:]) + offset += nameLen + } else if idx <= len(hpackStaticTable) { + name = hpackStaticTable[idx] + } + + value, valueLen := hpackString(block[offset:]) + offset += valueLen + + nameLower := strings.ToLower(name) + if nameLower != "" && value != "" && hpackCapturedHeaders[nameLower] { + kv[nameLower] = value + order = append(order, nameLower) + } + continue + } + + // Représentation inconnue — arrêter + break + } + + return kv, order +} + +// IsH2FrameHeader vérifie si les données commencent par un en-tête de frame HTTP/2 valide. +// Utilisé pour détecter les frames H2 seules (sans préface) dans les SSL_read ultérieurs. +func IsH2FrameHeader(data []byte) bool { + if len(data) < 9 { + return false + } + hdr, err := parseH2FrameHeader(data) + if err != nil { + return false + } + // Vérifications de plausibilité : + // - Longueur ≤ 16384 (16 KiB, limite conservatrice pour un seul read) + // - Type dans la plage 0-9 (types de frame définis) + // - Stream ID dans une plage raisonnable + if hdr.Length > 16384 { + return false + } + if hdr.Type > 9 { + return false + } + return true +} + +// ExtractH2HeaderKV extrait les en-têtes des frames HEADERS HTTP/2. +// Parcourt toutes les frames dans les données et décode les blocs HEADERS. +func ExtractH2HeaderKV(data []byte) map[string]string { + kv := make(map[string]string) + offset := 0 + + for offset < len(data) && len(kv) < 50 { + if offset+9 > len(data) { + break + } + hdr, err := parseH2FrameHeader(data[offset:]) + if err != nil { + break + } + offset += 9 + + payloadEnd := offset + int(hdr.Length) + if payloadEnd > len(data) { + break + } + payload := data[offset:payloadEnd] + offset = payloadEnd + + if hdr.Type == h2FrameHeaders && hdr.StreamID > 0 { + frameKV, _ := DecodeH2HeadersBlock(payload) + for k, v := range frameKV { + if _, exists := kv[k]; !exists { + kv[k] = v + } + } + } + } + + return kv +} diff --git a/services/ja4ebpf/internal/parser/http2_test.go b/services/ja4ebpf/internal/parser/http2_test.go index b2d4b90..34ac9c7 100644 --- a/services/ja4ebpf/internal/parser/http2_test.go +++ b/services/ja4ebpf/internal/parser/http2_test.go @@ -155,3 +155,119 @@ func buildH2Frame(frameType, flags uint8, streamID uint32, payload []byte) []byt } return append(frame, payload...) } + +func TestDecodeH2HeadersBlockLiteralWithIndexedName(t *testing.T) { + // Literal with incremental indexing, indexed name (user-agent = index 95) + // Prefix byte: 0x40 | 95 = 0x5F... wait, 95 > 63 so we need multi-byte + // For index 95: first byte = 0x40 | 0x3F = 0x7F, second byte = 95 - 63 = 32 = 0x20 + // Then value: 7-bit length "Mozilla/5.0" = 11 bytes, no Huffman + h2block := []byte{ + 0x7F, 0x20, // indexed name = 95 (user-agent), with incremental indexing + 0x0B, 'M', 'o', 'z', 'i', 'l', 'l', 'a', '/', '5', '.', '0', // value length 11 + value + } + kv, order := parser.DecodeH2HeadersBlock(h2block) + if kv["user-agent"] != "Mozilla/5.0" { + t.Errorf("user-agent: attendu 'Mozilla/5.0', obtenu %q", kv["user-agent"]) + } + if len(order) != 1 || order[0] != "user-agent" { + t.Errorf("order: attendu [user-agent], obtenu %v", order) + } +} + +func TestDecodeH2HeadersBlockLiteralWithoutIndexing(t *testing.T) { + // Literal without indexing, indexed name (accept-encoding = index 16) + // 4-bit prefix max = 15, so index 16 needs multi-byte: 0x0F 0x01 + h2block := []byte{ + 0x0F, 0x01, // literal without indexing, name index = 16 (accept-encoding) + 0x12, 'g', 'z', 'i', 'p', ',', ' ', 'd', 'e', 'f', 'l', 'a', 't', 'e', ',', ' ', 'b', 'r', // value + } + kv, _ := parser.DecodeH2HeadersBlock(h2block) + if kv["accept-encoding"] != "gzip, deflate, br" { + t.Errorf("accept-encoding: attendu 'gzip, deflate, br', obtenu %q", kv["accept-encoding"]) + } +} + +func TestDecodeH2HeadersBlockLiteralNewName(t *testing.T) { + // Literal with incremental indexing, new name + // Prefix byte: 0x40 (index = 0, new name) + // Name: "x-custom-header", Value: "test-value" + name := "x-custom-header" + value := "test-value" + h2block := []byte{ + 0x40, // literal with incremental indexing, new name + byte(len(name)), // name length + } + h2block = append(h2block, []byte(name)...) + h2block = append(h2block, byte(len(value))) + h2block = append(h2block, []byte(value)...) + + kv, order := parser.DecodeH2HeadersBlock(h2block) + // x-custom-header is not in hpackCapturedHeaders, so it won't be in kv + if len(kv) != 0 { + t.Errorf("x-custom-header ne doit pas être capturé (pas dans hpackCapturedHeaders), obtenu %v", kv) + } + _ = order +} + +func TestDecodeH2HeadersBlockPseudoHeaders(t *testing.T) { + // Pseudo-headers :method GET (indexed, byte 0x82), :path / (indexed, byte 0x84) + // Then :authority as literal with indexed name (index 1) + // 0x40 | 1 = 0x41, then value "example.com" + h2block := []byte{ + 0x82, // indexed :method GET + 0x84, // indexed :path / + 0x41, // literal with incremental indexing, name index 1 (:authority) + 0x0B, 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', // value + } + kv, order := parser.DecodeH2HeadersBlock(h2block) + if kv[":authority"] != "example.com" { + t.Errorf(":authority: attendu 'example.com', obtenu %q", kv[":authority"]) + } + if len(order) < 1 { + t.Errorf("order ne doit pas être vide, obtenu %v", order) + } +} + +func TestIsH2FrameHeader(t *testing.T) { + // Frame SETTINGS valide + frame := buildH2Frame(0x4, 0x0, 0, []byte{}) + if !parser.IsH2FrameHeader(frame) { + t.Error("IsH2FrameHeader doit retourner true pour frame SETTINGS valide") + } + // Données aléatoires + random := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} + if parser.IsH2FrameHeader(random) { + t.Error("IsH2FrameHeader doit retourner false pour données invalides (length > 16384)") + } + // Trop court + if parser.IsH2FrameHeader([]byte{0x00, 0x00}) { + t.Error("IsH2FrameHeader doit retourner false pour données trop courtes") + } +} + +func TestExtractH2HeaderKV(t *testing.T) { + // HEADERS frame with :authority literal + headersPayload := []byte{ + 0x41, // literal with incremental indexing, name index 1 (:authority) + 0x07, 'e', 'x', 'a', 'm', 'p', 'l', 'e', // value + } + frame := buildH2Frame(0x1, 0x04, 1, headersPayload) // HEADERS, END_HEADERS, stream 1 + + kv := parser.ExtractH2HeaderKV(frame) + if kv[":authority"] != "example" { + t.Errorf(":authority: attendu 'example', obtenu %q", kv[":authority"]) + } +} + +func TestFormatTCPOptions(t *testing.T) { + // MSS(2,4bytes) + WS(3,3bytes) + SACK(4,2bytes) + NOP(1) + TS(8,10bytes) + opts := []byte{ + 2, 4, 0x05, 0xB4, // MSS = 1460 + 3, 3, 6, // WS = 6 + 4, 2, // SACK Permitted + 1, // NOP + 8, 10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // TS + } + // This function is in the writer package, not parser - skip direct test here + _ = opts +} diff --git a/tests/vm/generate-traffic.sh b/tests/vm/generate-traffic.sh new file mode 100755 index 0000000..0f789ef --- /dev/null +++ b/tests/vm/generate-traffic.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env bash +# ============================================================================= +# generate-traffic.sh — Generate HTTPS/HTTP traffic from a VM endpoint +# +# Called by run-e2e-test.sh via: +# vagrant ssh $vm -- "source /tmp/e2e-traffic.env && bash /ja4-platform/tests/vm/generate-traffic.sh" +# +# Environment variables (from /tmp/e2e-traffic.env): +# HITS — Number of HTTPS requests (required) +# HITS_HTTP — Number of HTTP requests (default: 0) +# TARGET_IPS — Space-separated list of endpoint IPs (required) +# SNI_HOSTS — Space-separated list of SNI hostnames (required) +# TLS_FLAGS — curl TLS flags e.g. "--tlsv1.2 --tlsv1.3" (required) +# SRC_IP_COUNT — Number of source IPs to rotate (default: 1) +# ============================================================================= +set -uo pipefail + +HITS="${HITS:-0}" +HITS_HTTP="${HITS_HTTP:-0}" +TARGET_IPS=(${TARGET_IPS:-}) +SNI_HOSTS=(${SNI_HOSTS:-platform.test}) +TLS_FLAGS="${TLS_FLAGS:---tlsv1.2 --tlsv1.3}" +SRC_IP_COUNT="${SRC_IP_COUNT:-1}" + +if [ "$HITS" -eq 0 ] && [ "$HITS_HTTP" -eq 0 ]; then + echo "0/0" + exit 0 +fi + +# ── Collect source IPs from eth0 ── +SRC_IPS=($(ip -4 addr show eth0 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2}')) +if [ ${#SRC_IPS[@]} -eq 0 ]; then + echo "0/${HITS}" > /dev/stderr + exit 1 +fi + +# ── User-Agent pools ── +UA_BROWSER=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15" + "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0" +) +UA_BOT=( + "python-requests/2.32.3" + "curl/8.9.1" + "Go-http-client/2.0" + "python-httpx/0.28.1" + "Googlebot/2.1" +) +PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \ + "/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \ + "/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin") + +ok=0 +err=0 + +# ── HTTPS traffic ── +if [ "$HITS" -gt 0 ]; then + for i in $(seq 1 "$HITS"); do + idx=$((i - 1)) + target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}" + sni_host="${SNI_HOSTS[$((idx % ${#SNI_HOSTS[@]}))]}" + path="${PATHS[$((idx % ${#PATHS[@]}))]}" + + # Rotate methods: GET(50%), POST(20%), PUT(10%), DELETE(10%), HEAD(10%) + case $((i % 10)) in + 0|1|2|3|4) method="GET" ;; + 5|6) method="POST" ;; + 7) method="PUT" ;; + 8) method="DELETE" ;; + 9) method="HEAD" ;; + esac + + # 70% browser UA, 30% bot UA + if [ $((i % 10)) -lt 7 ]; then + ua="${UA_BROWSER[$((idx % ${#UA_BROWSER[@]}))]}" + else + ua="${UA_BOT[$((idx % ${#UA_BOT[@]}))]}" + fi + + # Build curl flags + resolve_flag="--resolve ${sni_host}:443:${target_ip}" + extra_flags="${resolve_flag} ${TLS_FLAGS}" + + # Rotate source IPs if multiple are available + if [ ${#SRC_IPS[@]} -gt 1 ] && [ "$SRC_IP_COUNT" -gt 1 ]; then + src_ip="${SRC_IPS[$((idx % SRC_IP_COUNT))]}" + if [ -n "$src_ip" ]; then + extra_flags="${extra_flags} --interface ${src_ip}" + fi + fi + + case $method in + POST) + curl -sf -k ${extra_flags} -X POST "https://${sni_host}${path}" \ + -H "User-Agent: ${ua}" -H "Content-Type: application/json" \ + -d '{"test":1,"seq":'$i'}' \ + --connect-timeout 5 --max-time 10 \ + >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; + PUT) + curl -sf -k ${extra_flags} -X PUT "https://${sni_host}${path}" \ + -H "User-Agent: ${ua}" \ + --connect-timeout 5 --max-time 10 \ + >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; + DELETE) + curl -sf -k ${extra_flags} -X DELETE "https://${sni_host}${path}" \ + -H "User-Agent: ${ua}" \ + --connect-timeout 5 --max-time 10 \ + >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; + HEAD) + curl -sf -k ${extra_flags} -I "https://${sni_host}${path}" \ + -H "User-Agent: ${ua}" \ + --connect-timeout 5 --max-time 10 \ + >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; + *) + curl -sf -k ${extra_flags} "https://${sni_host}${path}" \ + -H "User-Agent: ${ua}" \ + --connect-timeout 5 --max-time 10 \ + >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; + esac + done +fi + +# ── HTTP traffic (port 80) ── +ok_http=0 +if [ "$HITS_HTTP" -gt 0 ]; then + for i in $(seq 1 "$HITS_HTTP"); do + idx=$((i - 1)) + # Round-robin across target IPs for HTTP too + target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}" + path="${PATHS[$((idx % ${#PATHS[@]}))]}" + + # HTTP: use target_ip directly (no --resolve needed for HTTP) + curl -sf "http://${target_ip}${path}" \ + --connect-timeout 5 --max-time 10 \ + >/dev/null 2>&1 && ok_http=$((ok_http + 1)) || true + done +fi + +# Output: HTTPS_ok/HTTPS_total HTTP_ok/HTTP_total +echo "${ok}/${HITS} ${ok_http}/${HITS_HTTP}" \ No newline at end of file