Complete implementation of HTTP/2 passive fingerprinting per thesis §2.5.3: mod-reqin-log (C module): - Replace connection-level filter with ap_hook_process_connection (APR_HOOK_FIRST) to capture H2 preface before mod_http2 takes over the connection - AP_MODE_SPECULATIVE read of 512 bytes from c->input_filters - Parse SETTINGS, WINDOW_UPDATE, PRIORITY flags, pseudo-header order - Output individual SETTINGS params as separate JSON fields (IDs 1-6, 8) - Read H2 notes from c1 (master connection) for mod_http2 secondary conns - Fix header_order_signature JSON length bug (26→strlen) ClickHouse schema: - Add 8 new columns to http_logs: h2_has_priority, h2_header_table_size, h2_enable_push, h2_max_concurrent_streams, h2_initial_window_size, h2_max_frame_size, h2_max_header_list_size, h2_enable_connect_protocol - Use Int32/Int64 with DEFAULT -1 to distinguish absent vs zero - Update mv_http_logs to extract individual fields via JSONHas/JSONExtractInt - Migration 04_http2_fields.sql updated for existing deployments Correlator: - Accept both timestamp_ns and timestamp field names (backward compat) Integration: - Enable HTTP/2 in Apache: Protocols h2 http/1.1 in httpd-integration.conf Validated end-to-end via Playwright: H2 curl traffic → mod-reqin-log → correlator → ClickHouse with all 12 H2 columns populated correctly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
193 lines
5.7 KiB
Go
193 lines
5.7 KiB
Go
// Package fingerprint provides JA4/JA3 fingerprint generation for TLS ClientHello
|
|
package fingerprint
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/antitbone/ja4/sentinel/api"
|
|
|
|
tlsfingerprint "github.com/psanford/tlsfingerprint"
|
|
)
|
|
|
|
// EngineImpl implements the api.Engine interface for fingerprint generation
|
|
type EngineImpl struct{}
|
|
|
|
// NewEngine creates a new fingerprint engine
|
|
func NewEngine() *EngineImpl {
|
|
return &EngineImpl{}
|
|
}
|
|
|
|
// FromClientHello generates JA4 (and optionally JA3) fingerprints from a TLS ClientHello
|
|
// Note: JA4 hash portion is extracted for internal use but NOT serialized to LogRecord
|
|
// as the JA4 format already includes its own hash portions (per architecture.yml)
|
|
func (e *EngineImpl) FromClientHello(ch api.TLSClientHello) (*api.Fingerprints, error) {
|
|
if len(ch.Payload) == 0 {
|
|
return nil, fmt.Errorf("empty ClientHello payload from %s:%d -> %s:%d",
|
|
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort)
|
|
}
|
|
|
|
// Parse the ClientHello using tlsfingerprint
|
|
fp, err := tlsfingerprint.ParseClientHello(ch.Payload)
|
|
if err != nil {
|
|
// Try to sanitize truncated extensions and retry
|
|
sanitized := sanitizeClientHelloExtensions(ch.Payload)
|
|
if sanitized != nil {
|
|
fp, err = tlsfingerprint.ParseClientHello(sanitized)
|
|
}
|
|
if err != nil {
|
|
sanitizeStatus := "unavailable"
|
|
if sanitized != nil {
|
|
sanitizeStatus = "failed"
|
|
}
|
|
return nil, fmt.Errorf("fingerprint generation failed for %s:%d -> %s:%d (conn_id=%s, payload_len=%d, tls_version=%s, sni=%s, sanitization=%s): %w",
|
|
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort, ch.ConnID, len(ch.Payload), ch.TLSVersion, ch.SNI, sanitizeStatus, err)
|
|
}
|
|
}
|
|
|
|
// Generate JA4 fingerprint
|
|
// Note: JA4 string format already includes the hash portion
|
|
// e.g., "t13d1516h2_8daaf6152771_02cb136f2775" where the last part is the SHA256 hash
|
|
ja4 := fp.JA4String()
|
|
|
|
// Generate JA3 fingerprint and its MD5 hash
|
|
ja3 := fp.JA3String()
|
|
ja3Hash := fp.JA3Hash()
|
|
|
|
// Extract JA4 hash portion (last segment after underscore)
|
|
// JA4 format: <tls_ver><ciphers><extensions>_<sni_hash>_<cipher_extension_hash>
|
|
// This is kept for internal use but NOT serialized to LogRecord
|
|
ja4Hash := extractJA4Hash(ja4)
|
|
|
|
// Generate JA4T fingerprint from TCP SYN parameters
|
|
ja4t := computeJA4T(ch.TCPMeta)
|
|
|
|
return &api.Fingerprints{
|
|
JA4: ja4,
|
|
JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord
|
|
JA4T: ja4t,
|
|
JA3: ja3,
|
|
JA3Hash: ja3Hash,
|
|
}, nil
|
|
}
|
|
|
|
// computeJA4T génère l'empreinte JA4T à partir des métadonnées TCP SYN.
|
|
// Format : {WindowSize}_{OptionKinds}_{WindowScale}_{MSS}
|
|
func computeJA4T(tcp api.TCPMeta) string {
|
|
optStr := ""
|
|
if len(tcp.OptionKinds) > 0 {
|
|
parts := make([]string, len(tcp.OptionKinds))
|
|
for i, k := range tcp.OptionKinds {
|
|
parts[i] = strconv.Itoa(int(k))
|
|
}
|
|
optStr = strings.Join(parts, "-")
|
|
}
|
|
|
|
return fmt.Sprintf("%d_%s_%d_%d", tcp.WindowSize, optStr, tcp.WindowScale, tcp.MSS)
|
|
}
|
|
|
|
// extractJA4Hash extracts the hash portion from a JA4 string
|
|
// JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>"
|
|
func extractJA4Hash(ja4 string) string {
|
|
// JA4 string format: t13d1516h2_8daaf6152771_02cb136f2775
|
|
// We extract everything after the first underscore as the "hash" portion
|
|
for i, c := range ja4 {
|
|
if c == '_' {
|
|
return ja4[i+1:]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// sanitizeClientHelloExtensions fixes ClientHellos with truncated extension data
|
|
// by adjusting the extensions length to include only complete extensions.
|
|
// Returns a corrected copy, or nil if the payload cannot be fixed.
|
|
func sanitizeClientHelloExtensions(data []byte) []byte {
|
|
if len(data) < 5 || data[0] != 0x16 {
|
|
return nil
|
|
}
|
|
recordLen := int(data[3])<<8 | int(data[4])
|
|
if len(data) < 5+recordLen {
|
|
return nil
|
|
}
|
|
payload := data[5 : 5+recordLen]
|
|
if len(payload) < 4 || payload[0] != 0x01 {
|
|
return nil
|
|
}
|
|
helloLen := int(payload[1])<<16 | int(payload[2])<<8 | int(payload[3])
|
|
if len(payload) < 4+helloLen {
|
|
return nil
|
|
}
|
|
hello := payload[4 : 4+helloLen]
|
|
|
|
// Skip through ClientHello fields to reach extensions
|
|
offset := 2 + 32 // version + random
|
|
if len(hello) < offset+1 {
|
|
return nil
|
|
}
|
|
offset += 1 + int(hello[offset]) // session ID
|
|
if len(hello) < offset+2 {
|
|
return nil
|
|
}
|
|
csLen := int(hello[offset])<<8 | int(hello[offset+1])
|
|
offset += 2 + csLen // cipher suites
|
|
if len(hello) < offset+1 {
|
|
return nil
|
|
}
|
|
offset += 1 + int(hello[offset]) // compression methods
|
|
if len(hello) < offset+2 {
|
|
return nil
|
|
}
|
|
extLenOffset := offset // position of extensions length field
|
|
declaredExtLen := int(hello[offset])<<8 | int(hello[offset+1])
|
|
offset += 2
|
|
extStart := offset
|
|
|
|
if len(hello) < extStart+declaredExtLen {
|
|
return nil
|
|
}
|
|
extData := hello[extStart : extStart+declaredExtLen]
|
|
|
|
// Walk extensions, find how many complete ones exist
|
|
validLen := 0
|
|
pos := 0
|
|
for pos < len(extData) {
|
|
if pos+4 > len(extData) {
|
|
break
|
|
}
|
|
extBodyLen := int(extData[pos+2])<<8 | int(extData[pos+3])
|
|
if pos+4+extBodyLen > len(extData) {
|
|
break // this extension is truncated
|
|
}
|
|
pos += 4 + extBodyLen
|
|
validLen = pos
|
|
}
|
|
|
|
if validLen == declaredExtLen {
|
|
return nil // no truncation found, nothing to fix
|
|
}
|
|
|
|
// Build a corrected copy with adjusted extensions length
|
|
fixed := make([]byte, len(data))
|
|
copy(fixed, data)
|
|
|
|
// Absolute offset of extensions length field within data
|
|
extLenAbs := 5 + 4 + extLenOffset
|
|
diff := declaredExtLen - validLen
|
|
|
|
// Update extensions length
|
|
binary.BigEndian.PutUint16(fixed[extLenAbs:], uint16(validLen))
|
|
// Update ClientHello handshake length
|
|
newHelloLen := helloLen - diff
|
|
fixed[5+1] = byte(newHelloLen >> 16)
|
|
fixed[5+2] = byte(newHelloLen >> 8)
|
|
fixed[5+3] = byte(newHelloLen)
|
|
// Update TLS record length
|
|
newRecordLen := recordLen - diff
|
|
binary.BigEndian.PutUint16(fixed[3:5], uint16(newRecordLen))
|
|
|
|
return fixed[:5+newRecordLen]
|
|
}
|