feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized
Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
171
services/sentinel/internal/fingerprint/engine.go
Normal file
171
services/sentinel/internal/fingerprint/engine.go
Normal file
@ -0,0 +1,171 @@
|
||||
// Package fingerprint provides JA4/JA3 fingerprint generation for TLS ClientHello
|
||||
package fingerprint
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
"github.com/antitbone/ja4/sentinel/api"
|
||||
|
||||
tlsfingerprint "github.com/psanford/tlsfingerprint"
|
||||
)
|
||||
|
||||
// EngineImpl implements the api.Engine interface for fingerprint generation
|
||||
type EngineImpl struct{}
|
||||
|
||||
// NewEngine creates a new fingerprint engine
|
||||
func NewEngine() *EngineImpl {
|
||||
return &EngineImpl{}
|
||||
}
|
||||
|
||||
// FromClientHello generates JA4 (and optionally JA3) fingerprints from a TLS ClientHello
|
||||
// Note: JA4 hash portion is extracted for internal use but NOT serialized to LogRecord
|
||||
// as the JA4 format already includes its own hash portions (per architecture.yml)
|
||||
func (e *EngineImpl) FromClientHello(ch api.TLSClientHello) (*api.Fingerprints, error) {
|
||||
if len(ch.Payload) == 0 {
|
||||
return nil, fmt.Errorf("empty ClientHello payload from %s:%d -> %s:%d",
|
||||
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort)
|
||||
}
|
||||
|
||||
// Parse the ClientHello using tlsfingerprint
|
||||
fp, err := tlsfingerprint.ParseClientHello(ch.Payload)
|
||||
if err != nil {
|
||||
// Try to sanitize truncated extensions and retry
|
||||
sanitized := sanitizeClientHelloExtensions(ch.Payload)
|
||||
if sanitized != nil {
|
||||
fp, err = tlsfingerprint.ParseClientHello(sanitized)
|
||||
}
|
||||
if err != nil {
|
||||
sanitizeStatus := "unavailable"
|
||||
if sanitized != nil {
|
||||
sanitizeStatus = "failed"
|
||||
}
|
||||
return nil, fmt.Errorf("fingerprint generation failed for %s:%d -> %s:%d (conn_id=%s, payload_len=%d, tls_version=%s, sni=%s, sanitization=%s): %w",
|
||||
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort, ch.ConnID, len(ch.Payload), ch.TLSVersion, ch.SNI, sanitizeStatus, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Generate JA4 fingerprint
|
||||
// Note: JA4 string format already includes the hash portion
|
||||
// e.g., "t13d1516h2_8daaf6152771_02cb136f2775" where the last part is the SHA256 hash
|
||||
ja4 := fp.JA4String()
|
||||
|
||||
// Generate JA3 fingerprint and its MD5 hash
|
||||
ja3 := fp.JA3String()
|
||||
ja3Hash := fp.JA3Hash()
|
||||
|
||||
// Extract JA4 hash portion (last segment after underscore)
|
||||
// JA4 format: <tls_ver><ciphers><extensions>_<sni_hash>_<cipher_extension_hash>
|
||||
// This is kept for internal use but NOT serialized to LogRecord
|
||||
ja4Hash := extractJA4Hash(ja4)
|
||||
|
||||
return &api.Fingerprints{
|
||||
JA4: ja4,
|
||||
JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord
|
||||
JA3: ja3,
|
||||
JA3Hash: ja3Hash,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// extractJA4Hash extracts the hash portion from a JA4 string
|
||||
// JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>"
|
||||
func extractJA4Hash(ja4 string) string {
|
||||
// JA4 string format: t13d1516h2_8daaf6152771_02cb136f2775
|
||||
// We extract everything after the first underscore as the "hash" portion
|
||||
for i, c := range ja4 {
|
||||
if c == '_' {
|
||||
return ja4[i+1:]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// sanitizeClientHelloExtensions fixes ClientHellos with truncated extension data
|
||||
// by adjusting the extensions length to include only complete extensions.
|
||||
// Returns a corrected copy, or nil if the payload cannot be fixed.
|
||||
func sanitizeClientHelloExtensions(data []byte) []byte {
|
||||
if len(data) < 5 || data[0] != 0x16 {
|
||||
return nil
|
||||
}
|
||||
recordLen := int(data[3])<<8 | int(data[4])
|
||||
if len(data) < 5+recordLen {
|
||||
return nil
|
||||
}
|
||||
payload := data[5 : 5+recordLen]
|
||||
if len(payload) < 4 || payload[0] != 0x01 {
|
||||
return nil
|
||||
}
|
||||
helloLen := int(payload[1])<<16 | int(payload[2])<<8 | int(payload[3])
|
||||
if len(payload) < 4+helloLen {
|
||||
return nil
|
||||
}
|
||||
hello := payload[4 : 4+helloLen]
|
||||
|
||||
// Skip through ClientHello fields to reach extensions
|
||||
offset := 2 + 32 // version + random
|
||||
if len(hello) < offset+1 {
|
||||
return nil
|
||||
}
|
||||
offset += 1 + int(hello[offset]) // session ID
|
||||
if len(hello) < offset+2 {
|
||||
return nil
|
||||
}
|
||||
csLen := int(hello[offset])<<8 | int(hello[offset+1])
|
||||
offset += 2 + csLen // cipher suites
|
||||
if len(hello) < offset+1 {
|
||||
return nil
|
||||
}
|
||||
offset += 1 + int(hello[offset]) // compression methods
|
||||
if len(hello) < offset+2 {
|
||||
return nil
|
||||
}
|
||||
extLenOffset := offset // position of extensions length field
|
||||
declaredExtLen := int(hello[offset])<<8 | int(hello[offset+1])
|
||||
offset += 2
|
||||
extStart := offset
|
||||
|
||||
if len(hello) < extStart+declaredExtLen {
|
||||
return nil
|
||||
}
|
||||
extData := hello[extStart : extStart+declaredExtLen]
|
||||
|
||||
// Walk extensions, find how many complete ones exist
|
||||
validLen := 0
|
||||
pos := 0
|
||||
for pos < len(extData) {
|
||||
if pos+4 > len(extData) {
|
||||
break
|
||||
}
|
||||
extBodyLen := int(extData[pos+2])<<8 | int(extData[pos+3])
|
||||
if pos+4+extBodyLen > len(extData) {
|
||||
break // this extension is truncated
|
||||
}
|
||||
pos += 4 + extBodyLen
|
||||
validLen = pos
|
||||
}
|
||||
|
||||
if validLen == declaredExtLen {
|
||||
return nil // no truncation found, nothing to fix
|
||||
}
|
||||
|
||||
// Build a corrected copy with adjusted extensions length
|
||||
fixed := make([]byte, len(data))
|
||||
copy(fixed, data)
|
||||
|
||||
// Absolute offset of extensions length field within data
|
||||
extLenAbs := 5 + 4 + extLenOffset
|
||||
diff := declaredExtLen - validLen
|
||||
|
||||
// Update extensions length
|
||||
binary.BigEndian.PutUint16(fixed[extLenAbs:], uint16(validLen))
|
||||
// Update ClientHello handshake length
|
||||
newHelloLen := helloLen - diff
|
||||
fixed[5+1] = byte(newHelloLen >> 16)
|
||||
fixed[5+2] = byte(newHelloLen >> 8)
|
||||
fixed[5+3] = byte(newHelloLen)
|
||||
// Update TLS record length
|
||||
newRecordLen := recordLen - diff
|
||||
binary.BigEndian.PutUint16(fixed[3:5], uint16(newRecordLen))
|
||||
|
||||
return fixed[:5+newRecordLen]
|
||||
}
|
||||
Reference in New Issue
Block a user