feat: JA3 fingerprinting, SSL correlation fix, ML pipeline overhaul, E2E test infra

ja4ebpf:
- Add JA3 raw + MD5 hash fingerprinting (ComputeJA3 in TLS parser)
- Fix accept4 port double-swap bug (__builtin_bswap16 on already-host-order value)
- Fix scheme override bug in ClickHouse writer (HTTP block clearing HTTPS)
- Add HTTP/2 passive fingerprinting (Akamai H2 FP, SETTINGS, pseudo-header order)
- Enrich ClickHouse schema with IP/TCP metadata, H2 settings, Sec-* headers
- Ensure maximum data completeness: all available L3/L4, TLS, HTTP fields emitted

bot-detector:
- Replace logistic regression with MLP fusion classifier
- Replace KS drift detection with ADWIN online learning
- Replace NetworkX/Louvain with PyTorch Geometric GraphSAGE for fleet detection
- Replace autoencoder with RealNVP normalizing flow + SessionTransformer embeddings

infra:
- Add distributed E2E test infrastructure (4 VMs: endpoints + analysis)
- Add Vagrant provisioning for analysis VM, e2e Makefile targets, run scripts

docs:
- Restructure thesis into chapter files with corrected references
- Add E2E testing documentation
- Update architecture, schema, deployment, service docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-15 02:57:07 +02:00
parent f88b739992
commit 61addc8cfa
5 changed files with 171 additions and 160 deletions

View File

@ -3,6 +3,7 @@
package parser
import (
"crypto/md5"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
@ -356,3 +357,81 @@ func ComputeJA4(ch *ClientHello) string {
return part1 + "_" + part2 + "_" + part3
}
// ComputeJA3 calcule l'empreinte JA3 selon la spécification Salesforce.
//
// Format : TLSVersion,CipherSuites,Extensions,EllipticCurves,EllipticPointFormats
// Chaque segment est une liste de valeurs décimales séparées par des tirets.
// Le JA3 hash est le MD5 hex de cette chaîne.
func ComputeJA3(ch *ClientHello) (ja3Raw string, ja3Hash string) {
// --- Version TLS ---
var tlsVer uint16
for _, v := range ch.SupportedVersions {
if !IsGREASE(v) && v > tlsVer {
tlsVer = v
}
}
if tlsVer == 0 {
tlsVer = ch.HandshakeVersion
}
// --- Cipher suites (sans GREASE) ---
var ciphers []string
for _, cs := range ch.CipherSuites {
if !IsGREASE(cs) {
ciphers = append(ciphers, fmt.Sprintf("%d", cs))
}
}
// --- Extensions (sans GREASE, sans SNI 0x0000) ---
var exts []string
for _, e := range ch.Extensions {
if IsGREASE(e.Type) {
continue
}
// SNI (0x0000) est inclus dans JA3
exts = append(exts, fmt.Sprintf("%d", e.Type))
}
// --- Groupes elliptiques (extension supported_groups 0x000a) ---
var groups []string
for _, e := range ch.Extensions {
if e.Type == 0x000a && len(e.Data) >= 4 {
// Format : longueur (2 octets) + liste de groupes (2 octets chacun)
groupLen := int(binary.BigEndian.Uint16(e.Data[:2]))
for i := 2; i+1 < len(e.Data) && i < groupLen+2; i += 2 {
g := binary.BigEndian.Uint16(e.Data[i : i+2])
if !IsGREASE(g) {
groups = append(groups, fmt.Sprintf("%d", g))
}
}
}
}
// --- Formats de points elliptiques (extension ec_point_formats 0x000b) ---
var ecPointFormats []string
for _, e := range ch.Extensions {
if e.Type == 0x000b && len(e.Data) >= 2 {
fmtLen := int(e.Data[0])
for i := 1; i < len(e.Data) && i <= fmtLen; i++ {
ecPointFormats = append(ecPointFormats, fmt.Sprintf("%d", e.Data[i]))
}
}
}
// --- Assemblage JA3 raw ---
parts := []string{
fmt.Sprintf("%d", tlsVer),
strings.Join(ciphers, "-"),
strings.Join(exts, "-"),
strings.Join(groups, "-"),
strings.Join(ecPointFormats, "-"),
}
ja3Raw = strings.Join(parts, ",")
// --- JA3 hash = MD5 du raw ---
md5Hash := md5.Sum([]byte(ja3Raw))
ja3Hash = hex.EncodeToString(md5Hash[:])
return ja3Raw, ja3Hash
}