Files
ja4-platform/services/sentinel/internal/fingerprint/engine.go
toto d469e39da7 feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized
Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-07 16:42:59 +02:00

172 lines
5.2 KiB
Go

// Package fingerprint provides JA4/JA3 fingerprint generation for TLS ClientHello
package fingerprint
import (
"encoding/binary"
"fmt"
"github.com/antitbone/ja4/sentinel/api"
tlsfingerprint "github.com/psanford/tlsfingerprint"
)
// EngineImpl implements the api.Engine interface for fingerprint generation
type EngineImpl struct{}
// NewEngine creates a new fingerprint engine
func NewEngine() *EngineImpl {
return &EngineImpl{}
}
// FromClientHello generates JA4 (and optionally JA3) fingerprints from a TLS ClientHello
// Note: JA4 hash portion is extracted for internal use but NOT serialized to LogRecord
// as the JA4 format already includes its own hash portions (per architecture.yml)
func (e *EngineImpl) FromClientHello(ch api.TLSClientHello) (*api.Fingerprints, error) {
if len(ch.Payload) == 0 {
return nil, fmt.Errorf("empty ClientHello payload from %s:%d -> %s:%d",
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort)
}
// Parse the ClientHello using tlsfingerprint
fp, err := tlsfingerprint.ParseClientHello(ch.Payload)
if err != nil {
// Try to sanitize truncated extensions and retry
sanitized := sanitizeClientHelloExtensions(ch.Payload)
if sanitized != nil {
fp, err = tlsfingerprint.ParseClientHello(sanitized)
}
if err != nil {
sanitizeStatus := "unavailable"
if sanitized != nil {
sanitizeStatus = "failed"
}
return nil, fmt.Errorf("fingerprint generation failed for %s:%d -> %s:%d (conn_id=%s, payload_len=%d, tls_version=%s, sni=%s, sanitization=%s): %w",
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort, ch.ConnID, len(ch.Payload), ch.TLSVersion, ch.SNI, sanitizeStatus, err)
}
}
// Generate JA4 fingerprint
// Note: JA4 string format already includes the hash portion
// e.g., "t13d1516h2_8daaf6152771_02cb136f2775" where the last part is the SHA256 hash
ja4 := fp.JA4String()
// Generate JA3 fingerprint and its MD5 hash
ja3 := fp.JA3String()
ja3Hash := fp.JA3Hash()
// Extract JA4 hash portion (last segment after underscore)
// JA4 format: <tls_ver><ciphers><extensions>_<sni_hash>_<cipher_extension_hash>
// This is kept for internal use but NOT serialized to LogRecord
ja4Hash := extractJA4Hash(ja4)
return &api.Fingerprints{
JA4: ja4,
JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord
JA3: ja3,
JA3Hash: ja3Hash,
}, nil
}
// extractJA4Hash extracts the hash portion from a JA4 string
// JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>"
func extractJA4Hash(ja4 string) string {
// JA4 string format: t13d1516h2_8daaf6152771_02cb136f2775
// We extract everything after the first underscore as the "hash" portion
for i, c := range ja4 {
if c == '_' {
return ja4[i+1:]
}
}
return ""
}
// sanitizeClientHelloExtensions fixes ClientHellos with truncated extension data
// by adjusting the extensions length to include only complete extensions.
// Returns a corrected copy, or nil if the payload cannot be fixed.
func sanitizeClientHelloExtensions(data []byte) []byte {
if len(data) < 5 || data[0] != 0x16 {
return nil
}
recordLen := int(data[3])<<8 | int(data[4])
if len(data) < 5+recordLen {
return nil
}
payload := data[5 : 5+recordLen]
if len(payload) < 4 || payload[0] != 0x01 {
return nil
}
helloLen := int(payload[1])<<16 | int(payload[2])<<8 | int(payload[3])
if len(payload) < 4+helloLen {
return nil
}
hello := payload[4 : 4+helloLen]
// Skip through ClientHello fields to reach extensions
offset := 2 + 32 // version + random
if len(hello) < offset+1 {
return nil
}
offset += 1 + int(hello[offset]) // session ID
if len(hello) < offset+2 {
return nil
}
csLen := int(hello[offset])<<8 | int(hello[offset+1])
offset += 2 + csLen // cipher suites
if len(hello) < offset+1 {
return nil
}
offset += 1 + int(hello[offset]) // compression methods
if len(hello) < offset+2 {
return nil
}
extLenOffset := offset // position of extensions length field
declaredExtLen := int(hello[offset])<<8 | int(hello[offset+1])
offset += 2
extStart := offset
if len(hello) < extStart+declaredExtLen {
return nil
}
extData := hello[extStart : extStart+declaredExtLen]
// Walk extensions, find how many complete ones exist
validLen := 0
pos := 0
for pos < len(extData) {
if pos+4 > len(extData) {
break
}
extBodyLen := int(extData[pos+2])<<8 | int(extData[pos+3])
if pos+4+extBodyLen > len(extData) {
break // this extension is truncated
}
pos += 4 + extBodyLen
validLen = pos
}
if validLen == declaredExtLen {
return nil // no truncation found, nothing to fix
}
// Build a corrected copy with adjusted extensions length
fixed := make([]byte, len(data))
copy(fixed, data)
// Absolute offset of extensions length field within data
extLenAbs := 5 + 4 + extLenOffset
diff := declaredExtLen - validLen
// Update extensions length
binary.BigEndian.PutUint16(fixed[extLenAbs:], uint16(validLen))
// Update ClientHello handshake length
newHelloLen := helloLen - diff
fixed[5+1] = byte(newHelloLen >> 16)
fixed[5+2] = byte(newHelloLen >> 8)
fixed[5+3] = byte(newHelloLen)
// Update TLS record length
newRecordLen := recordLen - diff
binary.BigEndian.PutUint16(fixed[3:5], uint16(newRecordLen))
return fixed[:5+newRecordLen]
}