- TC ingress hook captures TCP SYN (L3/L4) and TLS ClientHello - Uprobes on SSL_read/SSL_set_fd capture decrypted TLS data - Kprobes on accept4 correlate socket FDs to client IP:port - JA4 fingerprint computed from parsed TLS ClientHello - HTTP/2 SETTINGS and WINDOW_UPDATE extracted from decrypted streams - Session manager with sharded map (256 shards) and GC goroutine - Slowloris detection: sessions with no requests after 10s threshold - ClickHouse batch writer to ja4_logs.http_logs_raw (raw_json) - All tests pass: 17 parser + 10 correlation tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
354 lines
10 KiB
Go
354 lines
10 KiB
Go
// Package parser fournit les parseurs TLS ClientHello et HTTP/2
|
|
// pour l'extraction des empreintes de fingerprinting réseau.
|
|
package parser
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/binary"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// ClientHello représente les champs extraits d'un message TLS ClientHello.
|
|
type ClientHello struct {
|
|
RecordVersion uint16 // version du record TLS (ex: 0x0303)
|
|
HandshakeVersion uint16 // version dans le handshake
|
|
CipherSuites []uint16 // suites de chiffrement proposées
|
|
CompressionMethods []uint8 // méthodes de compression
|
|
Extensions []Extension // liste des extensions TLS
|
|
SNI string // Server Name Indication (si présent)
|
|
ALPN []string // protocoles ALPN annoncés
|
|
SupportedGroups []uint16 // groupes Diffie-Hellman supportés
|
|
ECPointFormats []uint8 // formats de points elliptiques
|
|
SupportedVersions []uint16 // versions TLS annoncées (extension 0x002b)
|
|
}
|
|
|
|
// Extension représente une extension TLS avec son type et son contenu brut.
|
|
type Extension struct {
|
|
Type uint16 // identifiant de l'extension
|
|
Data []byte // données brutes de l'extension
|
|
}
|
|
|
|
// ParseClientHello extrait les champs du ClientHello TLS depuis le payload brut.
|
|
// Le payload doit commencer au record layer TLS (premier octet = 0x16).
|
|
// Retourne une erreur si le payload est tronqué ou structurellement invalide.
|
|
func ParseClientHello(payload []byte) (*ClientHello, error) {
|
|
if len(payload) < 5 {
|
|
return nil, fmt.Errorf("payload trop court pour le record TLS: %d octets", len(payload))
|
|
}
|
|
|
|
// Vérifier le type de contenu : 0x16 = Handshake
|
|
if payload[0] != 0x16 {
|
|
return nil, fmt.Errorf("type de contenu TLS inattendu: 0x%02x (attendu 0x16)", payload[0])
|
|
}
|
|
|
|
recordVersion := binary.BigEndian.Uint16(payload[1:3])
|
|
recordLength := int(binary.BigEndian.Uint16(payload[3:5]))
|
|
|
|
if len(payload) < 5+recordLength {
|
|
return nil, fmt.Errorf("record TLS tronqué: attendu %d octets, reçu %d", 5+recordLength, len(payload))
|
|
}
|
|
|
|
// Parsing du message Handshake
|
|
hs := payload[5 : 5+recordLength]
|
|
if len(hs) < 4 {
|
|
return nil, fmt.Errorf("message Handshake trop court")
|
|
}
|
|
|
|
// Vérifier le type de message Handshake : 0x01 = ClientHello
|
|
if hs[0] != 0x01 {
|
|
return nil, fmt.Errorf("type de message Handshake inattendu: 0x%02x (attendu 0x01)", hs[0])
|
|
}
|
|
|
|
// Longueur du ClientHello (3 octets big-endian)
|
|
chLen := int(uint32(hs[1])<<16 | uint32(hs[2])<<8 | uint32(hs[3]))
|
|
if len(hs) < 4+chLen {
|
|
return nil, fmt.Errorf("ClientHello tronqué: attendu %d octets", 4+chLen)
|
|
}
|
|
|
|
ch := &ClientHello{RecordVersion: recordVersion}
|
|
data := hs[4 : 4+chLen]
|
|
|
|
// Version du handshake (2 octets)
|
|
if len(data) < 2 {
|
|
return nil, fmt.Errorf("ClientHello: version manquante")
|
|
}
|
|
ch.HandshakeVersion = binary.BigEndian.Uint16(data[0:2])
|
|
offset := 2
|
|
|
|
// Random (32 octets)
|
|
if len(data) < offset+32 {
|
|
return nil, fmt.Errorf("ClientHello: random manquant")
|
|
}
|
|
offset += 32
|
|
|
|
// Session ID (longueur 1 octet + données)
|
|
if len(data) < offset+1 {
|
|
return nil, fmt.Errorf("ClientHello: session ID manquant")
|
|
}
|
|
sessionIDLen := int(data[offset])
|
|
offset += 1 + sessionIDLen
|
|
|
|
// Cipher Suites (longueur 2 octets + données)
|
|
if len(data) < offset+2 {
|
|
return nil, fmt.Errorf("ClientHello: longueur cipher suites manquante")
|
|
}
|
|
csLen := int(binary.BigEndian.Uint16(data[offset : offset+2]))
|
|
offset += 2
|
|
if len(data) < offset+csLen {
|
|
return nil, fmt.Errorf("ClientHello: cipher suites tronquées")
|
|
}
|
|
for i := 0; i < csLen; i += 2 {
|
|
cs := binary.BigEndian.Uint16(data[offset+i : offset+i+2])
|
|
ch.CipherSuites = append(ch.CipherSuites, cs)
|
|
}
|
|
offset += csLen
|
|
|
|
// Compression Methods (longueur 1 octet + données)
|
|
if len(data) < offset+1 {
|
|
return nil, fmt.Errorf("ClientHello: longueur compression manquante")
|
|
}
|
|
compLen := int(data[offset])
|
|
offset++
|
|
if len(data) < offset+compLen {
|
|
return nil, fmt.Errorf("ClientHello: méthodes de compression tronquées")
|
|
}
|
|
ch.CompressionMethods = data[offset : offset+compLen]
|
|
offset += compLen
|
|
|
|
// Extensions (optionnelles)
|
|
if len(data) < offset+2 {
|
|
return ch, nil // pas d'extensions
|
|
}
|
|
extTotalLen := int(binary.BigEndian.Uint16(data[offset : offset+2]))
|
|
offset += 2
|
|
if len(data) < offset+extTotalLen {
|
|
return nil, fmt.Errorf("ClientHello: extensions tronquées")
|
|
}
|
|
|
|
// Parsing des extensions
|
|
extData := data[offset : offset+extTotalLen]
|
|
extOffset := 0
|
|
for extOffset+4 <= len(extData) {
|
|
extType := binary.BigEndian.Uint16(extData[extOffset : extOffset+2])
|
|
extLen := int(binary.BigEndian.Uint16(extData[extOffset+2 : extOffset+4]))
|
|
extOffset += 4
|
|
|
|
if extOffset+extLen > len(extData) {
|
|
break
|
|
}
|
|
extPayload := extData[extOffset : extOffset+extLen]
|
|
|
|
ch.Extensions = append(ch.Extensions, Extension{Type: extType, Data: extPayload})
|
|
|
|
// Décoder les extensions importantes
|
|
switch extType {
|
|
case 0x0000: // SNI
|
|
ch.SNI = parseSNI(extPayload)
|
|
case 0x0010: // ALPN
|
|
ch.ALPN = parseALPN(extPayload)
|
|
case 0x000a: // Supported Groups (elliptic_curves)
|
|
ch.SupportedGroups = parseSupportedGroups(extPayload)
|
|
case 0x000b: // EC Point Formats
|
|
ch.ECPointFormats = parseECPointFormats(extPayload)
|
|
case 0x002b: // Supported Versions
|
|
ch.SupportedVersions = parseSupportedVersions(extPayload)
|
|
}
|
|
|
|
extOffset += extLen
|
|
}
|
|
|
|
return ch, nil
|
|
}
|
|
|
|
// parseSNI extrait le nom d'hôte depuis l'extension SNI (type 0x0000).
|
|
func parseSNI(data []byte) string {
|
|
// Structure : list_len(2) + type(1) + name_len(2) + name
|
|
if len(data) < 5 {
|
|
return ""
|
|
}
|
|
// Ignorer list_len et name_type, lire directement name_len
|
|
nameLen := int(binary.BigEndian.Uint16(data[3:5]))
|
|
if len(data) < 5+nameLen {
|
|
return ""
|
|
}
|
|
return string(data[5 : 5+nameLen])
|
|
}
|
|
|
|
// parseALPN extrait la liste des protocoles ALPN (extension 0x0010).
|
|
func parseALPN(data []byte) []string {
|
|
if len(data) < 2 {
|
|
return nil
|
|
}
|
|
listLen := int(binary.BigEndian.Uint16(data[0:2]))
|
|
offset := 2
|
|
var protocols []string
|
|
for offset < 2+listLen && offset < len(data) {
|
|
if offset+1 > len(data) {
|
|
break
|
|
}
|
|
protoLen := int(data[offset])
|
|
offset++
|
|
if offset+protoLen > len(data) {
|
|
break
|
|
}
|
|
protocols = append(protocols, string(data[offset:offset+protoLen]))
|
|
offset += protoLen
|
|
}
|
|
return protocols
|
|
}
|
|
|
|
// parseSupportedGroups extrait les groupes Diffie-Hellman (extension 0x000a).
|
|
func parseSupportedGroups(data []byte) []uint16 {
|
|
if len(data) < 2 {
|
|
return nil
|
|
}
|
|
listLen := int(binary.BigEndian.Uint16(data[0:2]))
|
|
offset := 2
|
|
var groups []uint16
|
|
for i := 0; i < listLen/2 && offset+2 <= len(data); i++ {
|
|
groups = append(groups, binary.BigEndian.Uint16(data[offset:offset+2]))
|
|
offset += 2
|
|
}
|
|
return groups
|
|
}
|
|
|
|
// parseECPointFormats extrait les formats de points elliptiques (extension 0x000b).
|
|
func parseECPointFormats(data []byte) []uint8 {
|
|
if len(data) < 1 {
|
|
return nil
|
|
}
|
|
listLen := int(data[0])
|
|
if len(data) < 1+listLen {
|
|
return nil
|
|
}
|
|
return data[1 : 1+listLen]
|
|
}
|
|
|
|
// parseSupportedVersions extrait les versions TLS supportées (extension 0x002b).
|
|
func parseSupportedVersions(data []byte) []uint16 {
|
|
if len(data) < 1 {
|
|
return nil
|
|
}
|
|
listLen := int(data[0])
|
|
offset := 1
|
|
var versions []uint16
|
|
for i := 0; i < listLen/2 && offset+2 <= len(data); i++ {
|
|
versions = append(versions, binary.BigEndian.Uint16(data[offset:offset+2]))
|
|
offset += 2
|
|
}
|
|
return versions
|
|
}
|
|
|
|
// isGREASE vérifie si une valeur est une valeur GREASE (RFC 8701).
|
|
// Les valeurs GREASE suivent le motif 0x?A?A (ex: 0x0A0A, 0x1A1A, ...).
|
|
func isGREASE(v uint16) bool {
|
|
return v&0x0F0F == 0x0A0A && v>>8 == v&0xFF
|
|
}
|
|
|
|
// tlsVersionString convertit un code de version TLS en chaîne à 2 caractères JA4.
|
|
func tlsVersionString(v uint16) string {
|
|
switch v {
|
|
case 0x0304:
|
|
return "13"
|
|
case 0x0303:
|
|
return "12"
|
|
case 0x0302:
|
|
return "11"
|
|
case 0x0301:
|
|
return "10"
|
|
default:
|
|
return "00"
|
|
}
|
|
}
|
|
|
|
// ComputeJA4 calcule l'empreinte JA4 selon la spécification FoxIO.
|
|
//
|
|
// Format: t{tls_ver}{sni}{cipher_count}{ext_count}_{sorted_ciphers_sha256[:12]}_{sorted_exts_alpn_sha256[:12]}
|
|
func ComputeJA4(ch *ClientHello) string {
|
|
// --- Protocole : toujours "t" (TCP) ---
|
|
proto := "t"
|
|
|
|
// --- Version TLS : version la plus haute annoncée ---
|
|
var tlsVer uint16
|
|
for _, v := range ch.SupportedVersions {
|
|
if !isGREASE(v) && v > tlsVer {
|
|
tlsVer = v
|
|
}
|
|
}
|
|
if tlsVer == 0 {
|
|
// Fallback : version du handshake
|
|
tlsVer = ch.HandshakeVersion
|
|
}
|
|
verStr := tlsVersionString(tlsVer)
|
|
|
|
// --- SNI : "d" si présent, "i" si absent ---
|
|
sniFlag := "i"
|
|
if ch.SNI != "" {
|
|
sniFlag = "d"
|
|
}
|
|
|
|
// --- Comptage des cipher suites (sans GREASE) ---
|
|
var ciphers []uint16
|
|
for _, cs := range ch.CipherSuites {
|
|
if !isGREASE(cs) {
|
|
ciphers = append(ciphers, cs)
|
|
}
|
|
}
|
|
cipherCount := fmt.Sprintf("%02d", len(ciphers))
|
|
|
|
// --- Comptage des extensions (sans GREASE, sans SNI 0x0000) ---
|
|
var extensions []uint16
|
|
for _, ext := range ch.Extensions {
|
|
if isGREASE(ext.Type) {
|
|
continue
|
|
}
|
|
if ext.Type == 0x0000 { // SNI exclue du comptage
|
|
continue
|
|
}
|
|
extensions = append(extensions, ext.Type)
|
|
}
|
|
extCount := fmt.Sprintf("%02d", len(extensions))
|
|
|
|
// --- Partie 1 : identifiant de base ---
|
|
part1 := proto + verStr + sniFlag + cipherCount + extCount
|
|
|
|
// --- Partie 2 : SHA-256 des cipher suites triées (12 premiers hex chars) ---
|
|
sortedCiphers := make([]uint16, len(ciphers))
|
|
copy(sortedCiphers, ciphers)
|
|
sort.Slice(sortedCiphers, func(i, j int) bool { return sortedCiphers[i] < sortedCiphers[j] })
|
|
|
|
cipherStrings := make([]string, len(sortedCiphers))
|
|
for i, cs := range sortedCiphers {
|
|
cipherStrings[i] = fmt.Sprintf("%04x", cs)
|
|
}
|
|
cipherRaw := strings.Join(cipherStrings, ",")
|
|
cipherHash := sha256.Sum256([]byte(cipherRaw))
|
|
part2 := hex.EncodeToString(cipherHash[:])[:12]
|
|
|
|
// --- Partie 3 : SHA-256 des extensions triées + ALPN (12 premiers hex chars) ---
|
|
sortedExts := make([]uint16, len(extensions))
|
|
copy(sortedExts, extensions)
|
|
sort.Slice(sortedExts, func(i, j int) bool { return sortedExts[i] < sortedExts[j] })
|
|
|
|
extStrings := make([]string, len(sortedExts))
|
|
for i, e := range sortedExts {
|
|
extStrings[i] = fmt.Sprintf("%04x", e)
|
|
}
|
|
extRaw := strings.Join(extStrings, ",")
|
|
|
|
// Premier protocole ALPN (ou "00" si absent)
|
|
alpnFirst := "00"
|
|
if len(ch.ALPN) > 0 {
|
|
alpnFirst = ch.ALPN[0]
|
|
}
|
|
|
|
extAlpnRaw := extRaw + "_" + alpnFirst
|
|
extHash := sha256.Sum256([]byte(extAlpnRaw))
|
|
part3 := hex.EncodeToString(extHash[:])[:12]
|
|
|
|
return part1 + "_" + part2 + "_" + part3
|
|
}
|