Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
172 lines
5.2 KiB
Go
172 lines
5.2 KiB
Go
// Package fingerprint provides JA4/JA3 fingerprint generation for TLS ClientHello
|
|
package fingerprint
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
|
|
"github.com/antitbone/ja4/sentinel/api"
|
|
|
|
tlsfingerprint "github.com/psanford/tlsfingerprint"
|
|
)
|
|
|
|
// EngineImpl implements the api.Engine interface for fingerprint generation
|
|
type EngineImpl struct{}
|
|
|
|
// NewEngine creates a new fingerprint engine
|
|
func NewEngine() *EngineImpl {
|
|
return &EngineImpl{}
|
|
}
|
|
|
|
// FromClientHello generates JA4 (and optionally JA3) fingerprints from a TLS ClientHello
|
|
// Note: JA4 hash portion is extracted for internal use but NOT serialized to LogRecord
|
|
// as the JA4 format already includes its own hash portions (per architecture.yml)
|
|
func (e *EngineImpl) FromClientHello(ch api.TLSClientHello) (*api.Fingerprints, error) {
|
|
if len(ch.Payload) == 0 {
|
|
return nil, fmt.Errorf("empty ClientHello payload from %s:%d -> %s:%d",
|
|
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort)
|
|
}
|
|
|
|
// Parse the ClientHello using tlsfingerprint
|
|
fp, err := tlsfingerprint.ParseClientHello(ch.Payload)
|
|
if err != nil {
|
|
// Try to sanitize truncated extensions and retry
|
|
sanitized := sanitizeClientHelloExtensions(ch.Payload)
|
|
if sanitized != nil {
|
|
fp, err = tlsfingerprint.ParseClientHello(sanitized)
|
|
}
|
|
if err != nil {
|
|
sanitizeStatus := "unavailable"
|
|
if sanitized != nil {
|
|
sanitizeStatus = "failed"
|
|
}
|
|
return nil, fmt.Errorf("fingerprint generation failed for %s:%d -> %s:%d (conn_id=%s, payload_len=%d, tls_version=%s, sni=%s, sanitization=%s): %w",
|
|
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort, ch.ConnID, len(ch.Payload), ch.TLSVersion, ch.SNI, sanitizeStatus, err)
|
|
}
|
|
}
|
|
|
|
// Generate JA4 fingerprint
|
|
// Note: JA4 string format already includes the hash portion
|
|
// e.g., "t13d1516h2_8daaf6152771_02cb136f2775" where the last part is the SHA256 hash
|
|
ja4 := fp.JA4String()
|
|
|
|
// Generate JA3 fingerprint and its MD5 hash
|
|
ja3 := fp.JA3String()
|
|
ja3Hash := fp.JA3Hash()
|
|
|
|
// Extract JA4 hash portion (last segment after underscore)
|
|
// JA4 format: <tls_ver><ciphers><extensions>_<sni_hash>_<cipher_extension_hash>
|
|
// This is kept for internal use but NOT serialized to LogRecord
|
|
ja4Hash := extractJA4Hash(ja4)
|
|
|
|
return &api.Fingerprints{
|
|
JA4: ja4,
|
|
JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord
|
|
JA3: ja3,
|
|
JA3Hash: ja3Hash,
|
|
}, nil
|
|
}
|
|
|
|
// extractJA4Hash extracts the hash portion from a JA4 string
|
|
// JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>"
|
|
func extractJA4Hash(ja4 string) string {
|
|
// JA4 string format: t13d1516h2_8daaf6152771_02cb136f2775
|
|
// We extract everything after the first underscore as the "hash" portion
|
|
for i, c := range ja4 {
|
|
if c == '_' {
|
|
return ja4[i+1:]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// sanitizeClientHelloExtensions fixes ClientHellos with truncated extension data
|
|
// by adjusting the extensions length to include only complete extensions.
|
|
// Returns a corrected copy, or nil if the payload cannot be fixed.
|
|
func sanitizeClientHelloExtensions(data []byte) []byte {
|
|
if len(data) < 5 || data[0] != 0x16 {
|
|
return nil
|
|
}
|
|
recordLen := int(data[3])<<8 | int(data[4])
|
|
if len(data) < 5+recordLen {
|
|
return nil
|
|
}
|
|
payload := data[5 : 5+recordLen]
|
|
if len(payload) < 4 || payload[0] != 0x01 {
|
|
return nil
|
|
}
|
|
helloLen := int(payload[1])<<16 | int(payload[2])<<8 | int(payload[3])
|
|
if len(payload) < 4+helloLen {
|
|
return nil
|
|
}
|
|
hello := payload[4 : 4+helloLen]
|
|
|
|
// Skip through ClientHello fields to reach extensions
|
|
offset := 2 + 32 // version + random
|
|
if len(hello) < offset+1 {
|
|
return nil
|
|
}
|
|
offset += 1 + int(hello[offset]) // session ID
|
|
if len(hello) < offset+2 {
|
|
return nil
|
|
}
|
|
csLen := int(hello[offset])<<8 | int(hello[offset+1])
|
|
offset += 2 + csLen // cipher suites
|
|
if len(hello) < offset+1 {
|
|
return nil
|
|
}
|
|
offset += 1 + int(hello[offset]) // compression methods
|
|
if len(hello) < offset+2 {
|
|
return nil
|
|
}
|
|
extLenOffset := offset // position of extensions length field
|
|
declaredExtLen := int(hello[offset])<<8 | int(hello[offset+1])
|
|
offset += 2
|
|
extStart := offset
|
|
|
|
if len(hello) < extStart+declaredExtLen {
|
|
return nil
|
|
}
|
|
extData := hello[extStart : extStart+declaredExtLen]
|
|
|
|
// Walk extensions, find how many complete ones exist
|
|
validLen := 0
|
|
pos := 0
|
|
for pos < len(extData) {
|
|
if pos+4 > len(extData) {
|
|
break
|
|
}
|
|
extBodyLen := int(extData[pos+2])<<8 | int(extData[pos+3])
|
|
if pos+4+extBodyLen > len(extData) {
|
|
break // this extension is truncated
|
|
}
|
|
pos += 4 + extBodyLen
|
|
validLen = pos
|
|
}
|
|
|
|
if validLen == declaredExtLen {
|
|
return nil // no truncation found, nothing to fix
|
|
}
|
|
|
|
// Build a corrected copy with adjusted extensions length
|
|
fixed := make([]byte, len(data))
|
|
copy(fixed, data)
|
|
|
|
// Absolute offset of extensions length field within data
|
|
extLenAbs := 5 + 4 + extLenOffset
|
|
diff := declaredExtLen - validLen
|
|
|
|
// Update extensions length
|
|
binary.BigEndian.PutUint16(fixed[extLenAbs:], uint16(validLen))
|
|
// Update ClientHello handshake length
|
|
newHelloLen := helloLen - diff
|
|
fixed[5+1] = byte(newHelloLen >> 16)
|
|
fixed[5+2] = byte(newHelloLen >> 8)
|
|
fixed[5+3] = byte(newHelloLen)
|
|
// Update TLS record length
|
|
newRecordLen := recordLen - diff
|
|
binary.BigEndian.PutUint16(fixed[3:5], uint16(newRecordLen))
|
|
|
|
return fixed[:5+newRecordLen]
|
|
}
|