feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions

View File

@ -0,0 +1,171 @@
// Package fingerprint provides JA4/JA3 fingerprint generation for TLS ClientHello
package fingerprint
import (
"encoding/binary"
"fmt"
"github.com/antitbone/ja4/sentinel/api"
tlsfingerprint "github.com/psanford/tlsfingerprint"
)
// EngineImpl implements the api.Engine interface for fingerprint generation
type EngineImpl struct{}
// NewEngine creates a new fingerprint engine
func NewEngine() *EngineImpl {
return &EngineImpl{}
}
// FromClientHello generates JA4 (and optionally JA3) fingerprints from a TLS ClientHello
// Note: JA4 hash portion is extracted for internal use but NOT serialized to LogRecord
// as the JA4 format already includes its own hash portions (per architecture.yml)
func (e *EngineImpl) FromClientHello(ch api.TLSClientHello) (*api.Fingerprints, error) {
if len(ch.Payload) == 0 {
return nil, fmt.Errorf("empty ClientHello payload from %s:%d -> %s:%d",
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort)
}
// Parse the ClientHello using tlsfingerprint
fp, err := tlsfingerprint.ParseClientHello(ch.Payload)
if err != nil {
// Try to sanitize truncated extensions and retry
sanitized := sanitizeClientHelloExtensions(ch.Payload)
if sanitized != nil {
fp, err = tlsfingerprint.ParseClientHello(sanitized)
}
if err != nil {
sanitizeStatus := "unavailable"
if sanitized != nil {
sanitizeStatus = "failed"
}
return nil, fmt.Errorf("fingerprint generation failed for %s:%d -> %s:%d (conn_id=%s, payload_len=%d, tls_version=%s, sni=%s, sanitization=%s): %w",
ch.SrcIP, ch.SrcPort, ch.DstIP, ch.DstPort, ch.ConnID, len(ch.Payload), ch.TLSVersion, ch.SNI, sanitizeStatus, err)
}
}
// Generate JA4 fingerprint
// Note: JA4 string format already includes the hash portion
// e.g., "t13d1516h2_8daaf6152771_02cb136f2775" where the last part is the SHA256 hash
ja4 := fp.JA4String()
// Generate JA3 fingerprint and its MD5 hash
ja3 := fp.JA3String()
ja3Hash := fp.JA3Hash()
// Extract JA4 hash portion (last segment after underscore)
// JA4 format: <tls_ver><ciphers><extensions>_<sni_hash>_<cipher_extension_hash>
// This is kept for internal use but NOT serialized to LogRecord
ja4Hash := extractJA4Hash(ja4)
return &api.Fingerprints{
JA4: ja4,
JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord
JA3: ja3,
JA3Hash: ja3Hash,
}, nil
}
// extractJA4Hash extracts the hash portion from a JA4 string
// JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>"
func extractJA4Hash(ja4 string) string {
// JA4 string format: t13d1516h2_8daaf6152771_02cb136f2775
// We extract everything after the first underscore as the "hash" portion
for i, c := range ja4 {
if c == '_' {
return ja4[i+1:]
}
}
return ""
}
// sanitizeClientHelloExtensions fixes ClientHellos with truncated extension data
// by adjusting the extensions length to include only complete extensions.
// Returns a corrected copy, or nil if the payload cannot be fixed.
func sanitizeClientHelloExtensions(data []byte) []byte {
if len(data) < 5 || data[0] != 0x16 {
return nil
}
recordLen := int(data[3])<<8 | int(data[4])
if len(data) < 5+recordLen {
return nil
}
payload := data[5 : 5+recordLen]
if len(payload) < 4 || payload[0] != 0x01 {
return nil
}
helloLen := int(payload[1])<<16 | int(payload[2])<<8 | int(payload[3])
if len(payload) < 4+helloLen {
return nil
}
hello := payload[4 : 4+helloLen]
// Skip through ClientHello fields to reach extensions
offset := 2 + 32 // version + random
if len(hello) < offset+1 {
return nil
}
offset += 1 + int(hello[offset]) // session ID
if len(hello) < offset+2 {
return nil
}
csLen := int(hello[offset])<<8 | int(hello[offset+1])
offset += 2 + csLen // cipher suites
if len(hello) < offset+1 {
return nil
}
offset += 1 + int(hello[offset]) // compression methods
if len(hello) < offset+2 {
return nil
}
extLenOffset := offset // position of extensions length field
declaredExtLen := int(hello[offset])<<8 | int(hello[offset+1])
offset += 2
extStart := offset
if len(hello) < extStart+declaredExtLen {
return nil
}
extData := hello[extStart : extStart+declaredExtLen]
// Walk extensions, find how many complete ones exist
validLen := 0
pos := 0
for pos < len(extData) {
if pos+4 > len(extData) {
break
}
extBodyLen := int(extData[pos+2])<<8 | int(extData[pos+3])
if pos+4+extBodyLen > len(extData) {
break // this extension is truncated
}
pos += 4 + extBodyLen
validLen = pos
}
if validLen == declaredExtLen {
return nil // no truncation found, nothing to fix
}
// Build a corrected copy with adjusted extensions length
fixed := make([]byte, len(data))
copy(fixed, data)
// Absolute offset of extensions length field within data
extLenAbs := 5 + 4 + extLenOffset
diff := declaredExtLen - validLen
// Update extensions length
binary.BigEndian.PutUint16(fixed[extLenAbs:], uint16(validLen))
// Update ClientHello handshake length
newHelloLen := helloLen - diff
fixed[5+1] = byte(newHelloLen >> 16)
fixed[5+2] = byte(newHelloLen >> 8)
fixed[5+3] = byte(newHelloLen)
// Update TLS record length
newRecordLen := recordLen - diff
binary.BigEndian.PutUint16(fixed[3:5], uint16(newRecordLen))
return fixed[:5+newRecordLen]
}

View File

@ -0,0 +1,489 @@
package fingerprint
import (
"strings"
"testing"
"github.com/antitbone/ja4/sentinel/api"
tlsfingerprint "github.com/psanford/tlsfingerprint"
)
func TestFromClientHello(t *testing.T) {
tests := []struct {
name string
ch api.TLSClientHello
wantErr bool
}{
{
name: "empty payload",
ch: api.TLSClientHello{
Payload: []byte{},
},
wantErr: true,
},
{
name: "invalid payload",
ch: api.TLSClientHello{
Payload: []byte{0x00, 0x01, 0x02},
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
engine := NewEngine()
_, err := engine.FromClientHello(tt.ch)
if (err != nil) != tt.wantErr {
t.Errorf("FromClientHello() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
}
func TestNewEngine(t *testing.T) {
engine := NewEngine()
if engine == nil {
t.Error("NewEngine() returned nil")
}
}
func TestFromClientHello_ValidPayload(t *testing.T) {
// Use a minimal valid TLS 1.2 ClientHello with extensions
// Build a proper ClientHello using the same structure as parser tests
clientHello := buildMinimalClientHelloForTest()
ch := api.TLSClientHello{
SrcIP: "192.168.1.100",
SrcPort: 54321,
DstIP: "10.0.0.1",
DstPort: 443,
Payload: clientHello,
}
engine := NewEngine()
fp, err := engine.FromClientHello(ch)
if err != nil {
t.Fatalf("FromClientHello() error = %v", err)
}
if fp == nil {
t.Fatal("FromClientHello() returned nil")
}
// Verify JA4 is populated (format: t13d... or t12d...)
if fp.JA4 == "" {
t.Error("JA4 should not be empty")
}
// JA4Hash is populated for internal use (but not serialized to LogRecord)
// It contains the hash portions of the JA4 string
if fp.JA4Hash == "" {
t.Error("JA4Hash should be populated for internal use")
}
}
// buildMinimalClientHelloForTest creates a minimal valid TLS 1.2 ClientHello
func buildMinimalClientHelloForTest() []byte {
// Cipher suites (minimal set)
cipherSuites := []byte{0x00, 0x04, 0x13, 0x01, 0x13, 0x02, 0xc0, 0x2f}
// Compression methods (null only)
compressionMethods := []byte{0x01, 0x00}
// No extensions
extensions := []byte{}
extLen := len(extensions)
// Build ClientHello handshake body
handshakeBody := []byte{
0x03, 0x03, // Version: TLS 1.2
// Random (32 bytes)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, // Session ID length: 0
}
// Add cipher suites (with length prefix)
cipherSuiteLen := len(cipherSuites)
handshakeBody = append(handshakeBody, byte(cipherSuiteLen>>8), byte(cipherSuiteLen))
handshakeBody = append(handshakeBody, cipherSuites...)
// Add compression methods (with length prefix)
handshakeBody = append(handshakeBody, compressionMethods...)
// Add extensions (with length prefix)
handshakeBody = append(handshakeBody, byte(extLen>>8), byte(extLen))
handshakeBody = append(handshakeBody, extensions...)
// Now build full handshake with type and length
handshakeLen := len(handshakeBody)
handshake := append([]byte{
0x01, // Handshake type: ClientHello
byte(handshakeLen >> 16), byte(handshakeLen >> 8), byte(handshakeLen), // Handshake length
}, handshakeBody...)
// Build TLS record
recordLen := len(handshake)
record := make([]byte, 5+recordLen)
record[0] = 0x16 // Handshake
record[1] = 0x03 // Version: TLS 1.2
record[2] = 0x03
record[3] = byte(recordLen >> 8)
record[4] = byte(recordLen)
copy(record[5:], handshake)
return record
}
// TestExtractJA4Hash tests the extractJA4Hash helper function
func TestExtractJA4Hash(t *testing.T) {
tests := []struct {
name string
ja4 string
want string
}{
{
name: "standard_ja4_format",
ja4: "t13d1516h2_8daaf6152771_02cb136f2775",
want: "8daaf6152771_02cb136f2775",
},
{
name: "ja4_with_single_underscore",
ja4: "t12d1234h1_abcdef123456",
want: "abcdef123456",
},
{
name: "ja4_no_underscore_returns_empty",
ja4: "t13d1516h2",
want: "",
},
{
name: "empty_ja4_returns_empty",
ja4: "",
want: "",
},
{
name: "underscore_at_start",
ja4: "_hash1_hash2",
want: "hash1_hash2",
},
{
name: "multiple_underscores_returns_after_first",
ja4: "base_part1_part2_part3",
want: "part1_part2_part3",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := extractJA4Hash(tt.ja4)
if got != tt.want {
t.Errorf("extractJA4Hash(%q) = %q, want %q", tt.ja4, got, tt.want)
}
})
}
}
// TestFromClientHello_NilPayload tests error handling for nil payload
func TestFromClientHello_NilPayload(t *testing.T) {
engine := NewEngine()
ch := api.TLSClientHello{
Payload: nil,
}
_, err := engine.FromClientHello(ch)
if err == nil {
t.Error("FromClientHello() with nil payload should return error")
}
if !strings.HasPrefix(err.Error(), "empty ClientHello payload") {
t.Errorf("FromClientHello() error = %v, should start with 'empty ClientHello payload'", err)
}
}
// TestFromClientHello_JA3Hash tests that JA3Hash is correctly populated
func TestFromClientHello_JA3Hash(t *testing.T) {
clientHello := buildMinimalClientHelloForTest()
ch := api.TLSClientHello{
Payload: clientHello,
}
engine := NewEngine()
fp, err := engine.FromClientHello(ch)
if err != nil {
t.Fatalf("FromClientHello() error = %v", err)
}
// JA3Hash should be populated (MD5 hash of JA3 string)
if fp.JA3Hash == "" {
t.Error("JA3Hash should be populated")
}
// JA3 should also be populated
if fp.JA3 == "" {
t.Error("JA3 should be populated")
}
}
// TestFromClientHello_EmptyJA4Hash tests behavior when JA4 has no underscore
func TestFromClientHello_EmptyJA4Hash(t *testing.T) {
// This test verifies that even if JA4 format changes, the code handles it gracefully
engine := NewEngine()
// Use a valid ClientHello - the library should produce a proper JA4
clientHello := buildMinimalClientHelloForTest()
ch := api.TLSClientHello{
Payload: clientHello,
}
fp, err := engine.FromClientHello(ch)
if err != nil {
t.Fatalf("FromClientHello() error = %v", err)
}
// JA4 should always be populated
if fp.JA4 == "" {
t.Error("JA4 should be populated")
}
// JA4Hash may be empty if the JA4 format doesn't include underscores
// This is acceptable behavior
}
// buildClientHelloWithTruncatedExtension creates a ClientHello where the last
// extension declares more data than actually present.
func buildClientHelloWithTruncatedExtension() []byte {
// Build a valid SNI extension first
sniHostname := []byte("example.com")
sniExt := []byte{
0x00, 0x00, // Extension type: server_name
}
sniData := []byte{0x00}
sniListLen := 1 + 2 + len(sniHostname) // type(1) + len(2) + hostname
sniData = append(sniData, byte(sniListLen>>8), byte(sniListLen))
sniData = append(sniData, 0x00) // hostname type
sniData = append(sniData, byte(len(sniHostname)>>8), byte(len(sniHostname)))
sniData = append(sniData, sniHostname...)
sniExt = append(sniExt, byte(len(sniData)>>8), byte(len(sniData)))
sniExt = append(sniExt, sniData...)
// Build a truncated extension: declares 100 bytes but only has 5
truncatedExt := []byte{
0x00, 0x15, // Extension type: padding
0x00, 0x64, // Extension data length: 100 (but we only provide 5)
0x00, 0x00, 0x00, 0x00, 0x00, // Only 5 bytes of padding
}
// Extensions = valid SNI + truncated padding
extensions := append(sniExt, truncatedExt...)
// But the extensions length field claims the full size (including the bad extension)
extLen := len(extensions)
// Cipher suites
cipherSuites := []byte{0x00, 0x04, 0x13, 0x01, 0x13, 0x02, 0xc0, 0x2f}
compressionMethods := []byte{0x01, 0x00}
handshakeBody := []byte{0x03, 0x03}
for i := 0; i < 32; i++ {
handshakeBody = append(handshakeBody, 0x01)
}
handshakeBody = append(handshakeBody, 0x00) // session ID length: 0
handshakeBody = append(handshakeBody, byte(len(cipherSuites)>>8), byte(len(cipherSuites)))
handshakeBody = append(handshakeBody, cipherSuites...)
handshakeBody = append(handshakeBody, compressionMethods...)
handshakeBody = append(handshakeBody, byte(extLen>>8), byte(extLen))
handshakeBody = append(handshakeBody, extensions...)
handshakeLen := len(handshakeBody)
handshake := append([]byte{
0x01,
byte(handshakeLen >> 16), byte(handshakeLen >> 8), byte(handshakeLen),
}, handshakeBody...)
recordLen := len(handshake)
record := make([]byte, 5+recordLen)
record[0] = 0x16
record[1] = 0x03
record[2] = 0x03
record[3] = byte(recordLen >> 8)
record[4] = byte(recordLen)
copy(record[5:], handshake)
return record
}
func TestFromClientHello_TruncatedExtension_StillGeneratesFingerprint(t *testing.T) {
payload := buildClientHelloWithTruncatedExtension()
ch := api.TLSClientHello{
SrcIP: "4.251.36.192",
SrcPort: 19346,
DstIP: "212.95.72.88",
DstPort: 443,
Payload: payload,
ConnID: "4.251.36.192:19346->212.95.72.88:443",
}
engine := NewEngine()
fp, err := engine.FromClientHello(ch)
if err != nil {
t.Fatalf("FromClientHello() should succeed after sanitization, got error: %v", err)
}
if fp == nil {
t.Fatal("FromClientHello() returned nil fingerprint")
}
if fp.JA4 == "" {
t.Error("JA4 should be populated even with truncated extension")
}
if fp.JA3 == "" {
t.Error("JA3 should be populated even with truncated extension")
}
}
func TestSanitizeClientHelloExtensions(t *testing.T) {
t.Run("valid payload returns nil", func(t *testing.T) {
valid := buildMinimalClientHelloForTest()
result := sanitizeClientHelloExtensions(valid)
if result != nil {
t.Error("should return nil for valid payload (no fix needed)")
}
})
t.Run("truncated extension is fixed", func(t *testing.T) {
truncated := buildClientHelloWithTruncatedExtension()
result := sanitizeClientHelloExtensions(truncated)
if result == nil {
t.Fatal("should return sanitized payload")
}
// The sanitized payload should be parseable by the library
fp, err := tlsfingerprint.ParseClientHello(result)
if err != nil {
t.Fatalf("sanitized payload should parse without error, got: %v", err)
}
if fp == nil {
t.Fatal("sanitized payload should produce a fingerprint")
}
})
t.Run("too short returns nil", func(t *testing.T) {
if sanitizeClientHelloExtensions([]byte{0x16}) != nil {
t.Error("should return nil for short payload")
}
})
t.Run("non-TLS returns nil", func(t *testing.T) {
if sanitizeClientHelloExtensions([]byte{0x15, 0x03, 0x03, 0x00, 0x01, 0x00}) != nil {
t.Error("should return nil for non-TLS payload")
}
})
}
// TestExtractJA4Hash_Standard tests the hash extraction from a standard JA4 string.
func TestExtractJA4Hash_Standard(t *testing.T) {
ja4 := "t13d1516h2_8daaf6152771_02cb136f2775"
got := extractJA4Hash(ja4)
expected := "8daaf6152771_02cb136f2775"
if got != expected {
t.Errorf("extractJA4Hash(%q) = %q, want %q", ja4, got, expected)
}
}
// TestExtractJA4Hash_NoUnderscore tests that no underscore returns empty string.
func TestExtractJA4Hash_NoUnderscore(t *testing.T) {
got := extractJA4Hash("nounderscore")
if got != "" {
t.Errorf("expected empty string for no underscore, got %q", got)
}
}
// TestExtractJA4Hash_Empty tests that empty string returns empty string.
func TestExtractJA4Hash_Empty(t *testing.T) {
got := extractJA4Hash("")
if got != "" {
t.Errorf("expected empty string for empty input, got %q", got)
}
}
// TestFromClientHello_NilPayloadExplicit tests that nil payload (empty) returns error.
func TestFromClientHello_NilPayloadExplicit(t *testing.T) {
engine := NewEngine()
_, err := engine.FromClientHello(api.TLSClientHello{
SrcIP: "1.2.3.4",
SrcPort: 12345,
DstIP: "5.6.7.8",
DstPort: 443,
Payload: nil,
})
if err == nil {
t.Error("expected error for nil payload")
}
}
// TestFromClientHello_SingleByte tests that single byte payload returns error.
func TestFromClientHello_SingleByte(t *testing.T) {
engine := NewEngine()
_, err := engine.FromClientHello(api.TLSClientHello{
Payload: []byte{0x16},
})
if err == nil {
t.Error("expected error for single-byte payload")
}
}
// TestFromClientHello_ErrorContainsAddresses tests that error message includes addresses.
func TestFromClientHello_ErrorContainsAddresses(t *testing.T) {
engine := NewEngine()
_, err := engine.FromClientHello(api.TLSClientHello{
SrcIP: "192.168.1.100",
SrcPort: 54321,
DstIP: "10.0.0.1",
DstPort: 443,
ConnID: "test-conn-id",
Payload: []byte{0x01, 0x02, 0x03}, // invalid
})
if err == nil {
t.Fatal("expected error for invalid payload")
}
if !strings.Contains(err.Error(), "192.168.1.100") {
t.Errorf("expected error to contain src IP, got: %v", err)
}
}
// TestSanitizeClientHelloExtensions_NilInput tests nil input returns nil.
func TestSanitizeClientHelloExtensions_NilInput(t *testing.T) {
if sanitizeClientHelloExtensions(nil) != nil {
t.Error("nil input should return nil")
}
}
// TestSanitizeClientHelloExtensions_EmptyInput tests empty input returns nil.
func TestSanitizeClientHelloExtensions_EmptyInput(t *testing.T) {
if sanitizeClientHelloExtensions([]byte{}) != nil {
t.Error("empty input should return nil")
}
}
// TestJA4HashExtraction_ConsistentWithFullParse verifies JA4Hash is the tail of JA4 string.
func TestJA4HashExtraction_ConsistentWithFullParse(t *testing.T) {
// Any JA4 string with exactly one underscore should work
ja4 := "t12d4562h0_somehash"
hash := extractJA4Hash(ja4)
if !strings.HasPrefix(ja4, "t12") {
t.Skip("precondition failed")
}
if hash != "somehash" {
t.Errorf("expected 'somehash', got %q", hash)
}
}
// Compile-time check: EngineImpl satisfies api.Engine.
var _ interface {
FromClientHello(api.TLSClientHello) (*api.Fingerprints, error)
} = (*EngineImpl)(nil)