feat: HTTP/2 passive fingerprinting with individual SETTINGS fields

Complete implementation of HTTP/2 passive fingerprinting per thesis §2.5.3:

mod-reqin-log (C module):
- Replace connection-level filter with ap_hook_process_connection (APR_HOOK_FIRST)
  to capture H2 preface before mod_http2 takes over the connection
- AP_MODE_SPECULATIVE read of 512 bytes from c->input_filters
- Parse SETTINGS, WINDOW_UPDATE, PRIORITY flags, pseudo-header order
- Output individual SETTINGS params as separate JSON fields (IDs 1-6, 8)
- Read H2 notes from c1 (master connection) for mod_http2 secondary conns
- Fix header_order_signature JSON length bug (26→strlen)

ClickHouse schema:
- Add 8 new columns to http_logs: h2_has_priority, h2_header_table_size,
  h2_enable_push, h2_max_concurrent_streams, h2_initial_window_size,
  h2_max_frame_size, h2_max_header_list_size, h2_enable_connect_protocol
- Use Int32/Int64 with DEFAULT -1 to distinguish absent vs zero
- Update mv_http_logs to extract individual fields via JSONHas/JSONExtractInt
- Migration 04_http2_fields.sql updated for existing deployments

Correlator:
- Accept both timestamp_ns and timestamp field names (backward compat)

Integration:
- Enable HTTP/2 in Apache: Protocols h2 http/1.1 in httpd-integration.conf

Validated end-to-end via Playwright: H2 curl traffic → mod-reqin-log →
correlator → ClickHouse with all 12 H2 columns populated correctly.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-11 02:33:45 +02:00
parent bd81331411
commit 85d3b95b7b
25 changed files with 649 additions and 160 deletions

View File

@ -1,6 +1,6 @@
# Dashboard # Dashboard
Application web SOC (Security Operations Center) construite avec **FastAPI + Jinja2 + htmx**, Application web SOC (Security Operations Center) construite avec **FastAPI + Jinja2 + ECharts**,
offrant la visualisation en temps réel, l'investigation et l'analyse des détections de bots offrant la visualisation en temps réel, l'investigation et l'analyse des détections de bots
générées par le [bot-detector](bot-detector.md). Interroge ClickHouse sur deux bases de données générées par le [bot-detector](bot-detector.md). Interroge ClickHouse sur deux bases de données
(`ja4_processing` et `ja4_logs`). (`ja4_processing` et `ja4_logs`).
@ -13,8 +13,8 @@ générées par le [bot-detector](bot-detector.md). Interroge ClickHouse sur deu
|-----------|-------------| |-----------|-------------|
| Backend | Python 3.11 + FastAPI | | Backend | Python 3.11 + FastAPI |
| Templates | Jinja2 (rendu côté serveur) | | Templates | Jinja2 (rendu côté serveur) |
| Interactions dynamiques | htmx (mises à jour partielles via JSON API) | | Interactions dynamiques | Vanilla `fetch()` (appels JSON API avec rechargement partiel côté JS) |
| Graphiques | Chart.js + ECharts | | Graphiques | ECharts 5.5 (CDN) |
| Style | Tailwind CSS (CDN) | | Style | Tailwind CSS (CDN) |
| Base de données | ClickHouse via `clickhouse-connect` (client propre, **PAS** `ja4_common`) | | Base de données | ClickHouse via `clickhouse-connect` (client propre, **PAS** `ja4_common`) |
| Documentation API | Swagger UI (`/docs`) + OpenAPI JSON (`/openapi.json`) | | Documentation API | Swagger UI (`/docs`) + OpenAPI JSON (`/openapi.json`) |

View File

@ -89,8 +89,9 @@ class TrafficAutoEncoder:
self._scaler_range = None self._scaler_range = None
def _build_model(self): def _build_model(self):
dim1 = min(64, max(self.n_features, self.latent_dim + 4)) # Architecture fixe n→64→32→16→32→64→n (§2.4.3 thèse)
dim2 = min(32, max(dim1 // 2, self.latent_dim + 2)) dim1 = 64
dim2 = 32
self.encoder = nn.Sequential( self.encoder = nn.Sequential(
nn.Linear(self.n_features, dim1), nn.BatchNorm1d(dim1), nn.ReLU(), nn.Linear(self.n_features, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
nn.Linear(dim1, dim2), nn.BatchNorm1d(dim2), nn.ReLU(), nn.Linear(dim1, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),

View File

@ -255,9 +255,13 @@ func parseJSONEvent(data []byte, sourceType string) (*domain.NormalizedEvent, er
// Extract timestamp based on source contract // Extract timestamp based on source contract
switch event.Source { switch event.Source {
case domain.SourceA: case domain.SourceA:
ts, ok := getInt64(raw, "timestamp") ts, ok := getInt64(raw, "timestamp_ns")
if !ok { if !ok {
return nil, fmt.Errorf("missing required numeric field: timestamp for source A") // Fallback to legacy "timestamp" field name
ts, ok = getInt64(raw, "timestamp")
}
if !ok {
return nil, fmt.Errorf("missing required numeric field: timestamp/timestamp_ns for source A")
} }
// Assume nanoseconds // Assume nanoseconds
event.Timestamp = time.Unix(0, ts) event.Timestamp = time.Unix(0, ts)

View File

@ -196,7 +196,7 @@ func TestClickHouseSink_BufferManagement(t *testing.T) {
log := domain.CorrelatedLog{ log := domain.CorrelatedLog{
SrcIP: "192.168.1.1", SrcIP: "192.168.1.1",
SrcPort: 8080, SrcPort: 8080,
Correlated: true, Correlated: 1,
} }
s := &ClickHouseSink{ s := &ClickHouseSink{
@ -527,7 +527,7 @@ func BenchmarkClickHouseSink_Write(b *testing.B) {
Timestamp: time.Now(), Timestamp: time.Now(),
SrcIP: "192.168.1.1", SrcIP: "192.168.1.1",
SrcPort: 8080, SrcPort: 8080,
Correlated: true, Correlated: 1,
} }
ctx := context.Background() ctx := context.Background()

View File

@ -22,7 +22,7 @@ func TestFileSink_Write(t *testing.T) {
log := domain.CorrelatedLog{ log := domain.CorrelatedLog{
SrcIP: "192.168.1.1", SrcIP: "192.168.1.1",
SrcPort: 8080, SrcPort: 8080,
Correlated: true, Correlated: 1,
} }
if err := sink.Write(context.Background(), log); err != nil { if err := sink.Write(context.Background(), log); err != nil {
@ -57,7 +57,7 @@ func TestFileSink_WriteImmediatePersist_NoFlushNeeded(t *testing.T) {
log := domain.CorrelatedLog{ log := domain.CorrelatedLog{
SrcIP: "192.168.1.1", SrcIP: "192.168.1.1",
SrcPort: 8080, SrcPort: 8080,
Correlated: true, Correlated: 1,
} }
if err := sink.Write(context.Background(), log); err != nil { if err := sink.Write(context.Background(), log); err != nil {

View File

@ -10,7 +10,7 @@ import (
"github.com/antitbone/ja4/correlator/internal/domain" "github.com/antitbone/ja4/correlator/internal/domain"
) )
func makeLog(correlated bool) domain.CorrelatedLog { func makeLog(correlated int) domain.CorrelatedLog {
return domain.CorrelatedLog{ return domain.CorrelatedLog{
Timestamp: time.Unix(1700000000, 0), Timestamp: time.Unix(1700000000, 0),
SrcIP: "1.2.3.4", SrcIP: "1.2.3.4",
@ -53,10 +53,10 @@ func TestStdoutSink_WriteDoesNotProduceOutput(t *testing.T) {
s := NewStdoutSink(Config{Enabled: true}) s := NewStdoutSink(Config{Enabled: true})
got := captureStdout(t, func() { got := captureStdout(t, func() {
if err := s.Write(context.Background(), makeLog(true)); err != nil { if err := s.Write(context.Background(), makeLog(1)); err != nil {
t.Fatalf("Write(correlated) returned error: %v", err) t.Fatalf("Write(correlated) returned error: %v", err)
} }
if err := s.Write(context.Background(), makeLog(false)); err != nil { if err := s.Write(context.Background(), makeLog(0)); err != nil {
t.Fatalf("Write(orphan) returned error: %v", err) t.Fatalf("Write(orphan) returned error: %v", err)
} }
}) })

View File

@ -9,6 +9,7 @@ import (
"time" "time"
"github.com/antitbone/ja4/correlator/internal/domain" "github.com/antitbone/ja4/correlator/internal/domain"
ja4config "github.com/antitbone/ja4/ja4common/config"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@ -29,7 +30,7 @@ type MetricsConfig struct {
// LogConfig holds logging configuration. // LogConfig holds logging configuration.
type LogConfig struct { type LogConfig struct {
Level string `yaml:"level"` // DEBUG, INFO, WARN, ERROR Level string `yaml:"level" env:"LOG_LEVEL"` // DEBUG, INFO, WARN, ERROR
} }
// GetLogLevel returns the log level, defaulting to INFO if not set. // GetLogLevel returns the log level, defaulting to INFO if not set.
@ -75,15 +76,15 @@ type FileOutputConfig struct {
// ClickHouseOutputConfig holds ClickHouse sink configuration. // ClickHouseOutputConfig holds ClickHouse sink configuration.
type ClickHouseOutputConfig struct { type ClickHouseOutputConfig struct {
Enabled bool `yaml:"enabled"` Enabled bool `yaml:"enabled" env:"CLICKHOUSE_ENABLED"`
DSN string `yaml:"dsn"` DSN string `yaml:"dsn" env:"CLICKHOUSE_DSN"`
Table string `yaml:"table"` Table string `yaml:"table" env:"CLICKHOUSE_TABLE"`
BatchSize int `yaml:"batch_size"` BatchSize int `yaml:"batch_size" env:"CLICKHOUSE_BATCH_SIZE"`
FlushIntervalMs int `yaml:"flush_interval_ms"` FlushIntervalMs int `yaml:"flush_interval_ms" env:"CLICKHOUSE_FLUSH_INTERVAL_MS"`
MaxBufferSize int `yaml:"max_buffer_size"` MaxBufferSize int `yaml:"max_buffer_size" env:"CLICKHOUSE_MAX_BUFFER_SIZE"`
DropOnOverflow bool `yaml:"drop_on_overflow"` DropOnOverflow bool `yaml:"drop_on_overflow"`
AsyncInsert bool `yaml:"async_insert"` AsyncInsert bool `yaml:"async_insert"`
TimeoutMs int `yaml:"timeout_ms"` TimeoutMs int `yaml:"timeout_ms" env:"CLICKHOUSE_TIMEOUT_MS"`
} }
// StdoutOutputConfig holds stdout sink configuration. // StdoutOutputConfig holds stdout sink configuration.
@ -165,6 +166,11 @@ func Load(path string) (*Config, error) {
return nil, fmt.Errorf("failed to parse config file: %w", err) return nil, fmt.Errorf("failed to parse config file: %w", err)
} }
// Surcharge par variables d'environnement (préfixe LOGCORRELATOR_)
if err := ja4config.OverrideFromEnv(cfg, "LOGCORRELATOR"); err != nil {
return nil, fmt.Errorf("failed to apply env overrides: %w", err)
}
if err := cfg.Validate(); err != nil { if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err) return nil, fmt.Errorf("invalid config: %w", err)
} }

View File

@ -14,7 +14,7 @@ type CorrelatedLog struct {
SrcPort int `json:"src_port"` SrcPort int `json:"src_port"`
DstIP string `json:"dst_ip,omitempty"` DstIP string `json:"dst_ip,omitempty"`
DstPort int `json:"dst_port,omitempty"` DstPort int `json:"dst_port,omitempty"`
Correlated bool `json:"correlated"` Correlated int `json:"correlated"` // 0 = orphelin, 1 = corrélé
OrphanSide string `json:"orphan_side,omitempty"` OrphanSide string `json:"orphan_side,omitempty"`
Fields map[string]any `json:"-"` // Additional fields, merged at marshal time Fields map[string]any `json:"-"` // Additional fields, merged at marshal time
} }
@ -71,7 +71,7 @@ func NewCorrelatedLogFromEvent(event *NormalizedEvent, orphanSide string) Correl
SrcPort: event.SrcPort, SrcPort: event.SrcPort,
DstIP: event.DstIP, DstIP: event.DstIP,
DstPort: event.DstPort, DstPort: event.DstPort,
Correlated: false, Correlated: 0,
OrphanSide: orphanSide, OrphanSide: orphanSide,
Fields: fields, Fields: fields,
} }
@ -95,7 +95,7 @@ func NewCorrelatedLog(apacheEvent, networkEvent *NormalizedEvent) CorrelatedLog
SrcPort: apacheEvent.SrcPort, SrcPort: apacheEvent.SrcPort,
DstIP: coalesceString(apacheEvent.DstIP, networkEvent.DstIP), DstIP: coalesceString(apacheEvent.DstIP, networkEvent.DstIP),
DstPort: coalesceInt(apacheEvent.DstPort, networkEvent.DstPort), DstPort: coalesceInt(apacheEvent.DstPort, networkEvent.DstPort),
Correlated: true, Correlated: 1,
OrphanSide: "", OrphanSide: "",
Fields: fields, Fields: fields,
} }

View File

@ -64,8 +64,8 @@ func TestNewCorrelatedLogFromEvent(t *testing.T) {
log := NewCorrelatedLogFromEvent(event, "A") log := NewCorrelatedLogFromEvent(event, "A")
if log.Correlated { if log.Correlated != 0 {
t.Error("expected correlated to be false") t.Error("expected correlated to be 0")
} }
if log.OrphanSide != "A" { if log.OrphanSide != "A" {
t.Errorf("expected orphan_side A, got %s", log.OrphanSide) t.Errorf("expected orphan_side A, got %s", log.OrphanSide)
@ -101,8 +101,8 @@ func TestNewCorrelatedLog(t *testing.T) {
log := NewCorrelatedLog(apacheEvent, networkEvent) log := NewCorrelatedLog(apacheEvent, networkEvent)
if !log.Correlated { if log.Correlated != 1 {
t.Error("expected correlated to be true") t.Error("expected correlated to be 1")
} }
if log.OrphanSide != "" { if log.OrphanSide != "" {
t.Errorf("expected orphan_side to be empty, got %s", log.OrphanSide) t.Errorf("expected orphan_side to be empty, got %s", log.OrphanSide)
@ -273,7 +273,7 @@ func TestMarshalJSON_ReservedKeyProtection(t *testing.T) {
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
SrcIP: "1.2.3.4", SrcIP: "1.2.3.4",
SrcPort: 1234, SrcPort: 1234,
Correlated: true, Correlated: 1,
Fields: map[string]any{ Fields: map[string]any{
"src_ip": "EVIL_OVERRIDE", // should be ignored "src_ip": "EVIL_OVERRIDE", // should be ignored
"correlated": false, // should be ignored "correlated": false, // should be ignored
@ -294,7 +294,7 @@ func TestMarshalJSON_ReservedKeyProtection(t *testing.T) {
if flat["src_ip"] != "1.2.3.4" { if flat["src_ip"] != "1.2.3.4" {
t.Errorf("reserved key src_ip should not be overwritten, got %v", flat["src_ip"]) t.Errorf("reserved key src_ip should not be overwritten, got %v", flat["src_ip"])
} }
if flat["correlated"] != true { if flat["correlated"] != float64(1) {
t.Errorf("reserved key correlated should not be overwritten, got %v", flat["correlated"]) t.Errorf("reserved key correlated should not be overwritten, got %v", flat["correlated"])
} }
if flat["extra"] != "value" { if flat["extra"] != "value" {
@ -308,7 +308,7 @@ func TestMarshalJSON_OptionalFieldsOmittedWhenZero(t *testing.T) {
Timestamp: time.Now(), Timestamp: time.Now(),
SrcIP: "1.2.3.4", SrcIP: "1.2.3.4",
SrcPort: 1234, SrcPort: 1234,
Correlated: false, Correlated: 0,
} }
data, err := json.Marshal(log) data, err := json.Marshal(log)

View File

@ -57,7 +57,7 @@ func TestCorrelationService_Match(t *testing.T) {
results = svc.ProcessEvent(networkEvent) results = svc.ProcessEvent(networkEvent)
if len(results) != 1 { if len(results) != 1 {
t.Errorf("expected 1 result (correlated), got %d", len(results)) t.Errorf("expected 1 result (correlated), got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result") t.Error("expected correlated result")
} }
} }
@ -376,7 +376,7 @@ func TestCorrelationService_DifferentSourceTypes(t *testing.T) {
results = svc.ProcessEvent(apacheEvent) results = svc.ProcessEvent(apacheEvent)
if len(results) < 1 { if len(results) < 1 {
t.Errorf("expected at least 1 result (correlated), got %d", len(results)) t.Errorf("expected at least 1 result (correlated), got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result") t.Error("expected correlated result")
} }
} }
@ -455,7 +455,7 @@ func TestCorrelationService_OneToMany_KeepAlive(t *testing.T) {
results = svc.ProcessEvent(apacheEvent1) results = svc.ProcessEvent(apacheEvent1)
if len(results) != 1 { if len(results) != 1 {
t.Errorf("expected 1 correlated result for first A, got %d", len(results)) t.Errorf("expected 1 correlated result for first A, got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result for first A") t.Error("expected correlated result for first A")
} }
@ -470,7 +470,7 @@ func TestCorrelationService_OneToMany_KeepAlive(t *testing.T) {
results = svc.ProcessEvent(apacheEvent2) results = svc.ProcessEvent(apacheEvent2)
if len(results) != 1 { if len(results) != 1 {
t.Errorf("expected 1 correlated result for second A (Keep-Alive), got %d", len(results)) t.Errorf("expected 1 correlated result for second A (Keep-Alive), got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result for second A (Keep-Alive)") t.Error("expected correlated result for second A (Keep-Alive)")
} }
@ -654,7 +654,7 @@ func TestCorrelationService_KeepAlive_TTLNotBasedOnEventTimestamp(t *testing.T)
SrcPort: 8080, SrcPort: 8080,
} }
results := svc.ProcessEvent(apacheEvent1) results := svc.ProcessEvent(apacheEvent1)
if len(results) != 1 || !results[0].Correlated { if len(results) != 1 || results[0].Correlated == 0 {
t.Fatalf("expected 1 correlated result, got %d", len(results)) t.Fatalf("expected 1 correlated result, got %d", len(results))
} }
@ -667,7 +667,7 @@ func TestCorrelationService_KeepAlive_TTLNotBasedOnEventTimestamp(t *testing.T)
SrcPort: 8080, SrcPort: 8080,
} }
results = svc.ProcessEvent(apacheEvent2) results = svc.ProcessEvent(apacheEvent2)
if len(results) != 1 || !results[0].Correlated { if len(results) != 1 || results[0].Correlated == 0 {
t.Fatalf("expected 1 correlated result (Keep-Alive), got %d", len(results)) t.Fatalf("expected 1 correlated result (Keep-Alive), got %d", len(results))
} }
@ -740,7 +740,7 @@ func TestCorrelationService_KeepAlive_LongSession(t *testing.T) {
Raw: map[string]any{"method": "GET", "path": fmt.Sprintf("/api/%d", i)}, Raw: map[string]any{"method": "GET", "path": fmt.Sprintf("/api/%d", i)},
} }
results := svc.ProcessEvent(apacheEvent) results := svc.ProcessEvent(apacheEvent)
if len(results) != 1 || !results[0].Correlated { if len(results) != 1 || results[0].Correlated == 0 {
t.Errorf("Request %d at t=%ds (A timestamp t=%v): expected correlation, got %d results", t.Errorf("Request %d at t=%ds (A timestamp t=%v): expected correlation, got %d results",
i, i*5, now.Add(time.Duration(i)*500*time.Millisecond), len(results)) i, i*5, now.Add(time.Duration(i)*500*time.Millisecond), len(results))
} }
@ -808,7 +808,7 @@ func TestCorrelationService_ALateThanB_WithinTimeWindow(t *testing.T) {
results = svc.ProcessEvent(apacheEvent) results = svc.ProcessEvent(apacheEvent)
if len(results) != 1 { if len(results) != 1 {
t.Errorf("expected 1 correlated result, got %d", len(results)) t.Errorf("expected 1 correlated result, got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result") t.Error("expected correlated result")
} }
} }
@ -866,7 +866,7 @@ func TestCorrelationService_ALateThanB_AExpiredTooSoon(t *testing.T) {
results = svc.ProcessEvent(networkEvent) results = svc.ProcessEvent(networkEvent)
if len(results) != 1 { if len(results) != 1 {
t.Errorf("expected 1 correlated result, got %d", len(results)) t.Errorf("expected 1 correlated result, got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result") t.Error("expected correlated result")
} }
} }
@ -921,7 +921,7 @@ func TestCorrelationService_Flush_CorrelatesRemainingEvents(t *testing.T) {
flushed := svc.Flush() flushed := svc.Flush()
if len(flushed) != 1 { if len(flushed) != 1 {
t.Errorf("expected 1 flushed correlated result, got %d", len(flushed)) t.Errorf("expected 1 flushed correlated result, got %d", len(flushed))
} else if flushed[0].Correlated { } else if flushed[0].Correlated != 0 {
// Good - it's correlated // Good - it's correlated
} else { } else {
t.Errorf("expected correlated result, got orphan side %s", flushed[0].OrphanSide) t.Errorf("expected correlated result, got orphan side %s", flushed[0].OrphanSide)
@ -1032,7 +1032,7 @@ func TestCorrelationService_CleanA_RespectsBTTL(t *testing.T) {
Raw: map[string]any{"method": "GET"}, Raw: map[string]any{"method": "GET"},
} }
results := svc.ProcessEvent(apacheEvent) results := svc.ProcessEvent(apacheEvent)
if len(results) != 1 || !results[0].Correlated { if len(results) != 1 || results[0].Correlated == 0 {
t.Fatalf("expected 1 correlated result, got %d", len(results)) t.Fatalf("expected 1 correlated result, got %d", len(results))
} }
@ -1127,7 +1127,7 @@ func TestCorrelationService_ApacheEmitDelay_BArrivesDuringDelay(t *testing.T) {
results = svc.ProcessEvent(networkEvent) results = svc.ProcessEvent(networkEvent)
if len(results) != 1 { if len(results) != 1 {
t.Errorf("expected 1 correlated result, got %d", len(results)) t.Errorf("expected 1 correlated result, got %d", len(results))
} else if !results[0].Correlated { } else if results[0].Correlated == 0 {
t.Error("expected correlated result") t.Error("expected correlated result")
} }
@ -1347,7 +1347,7 @@ results = svc.ProcessEvent(aEvent)
if len(results) != 1 { if len(results) != 1 {
t.Fatalf("expected 1 correlation, got %d", len(results)) t.Fatalf("expected 1 correlation, got %d", len(results))
} }
if !results[0].Correlated { if results[0].Correlated == 0 {
t.Error("expected correlated=true") t.Error("expected correlated=true")
} }
} }
@ -1371,7 +1371,7 @@ results := svc.ProcessEvent(aEvent)
if len(results) != 1 { if len(results) != 1 {
t.Fatalf("expected 1 result (orphan A, dest port filtered), got %d", len(results)) t.Fatalf("expected 1 result (orphan A, dest port filtered), got %d", len(results))
} }
if results[0].Correlated { if results[0].Correlated != 0 {
t.Errorf("expected Correlated=false for dest-port-filtered A event") t.Errorf("expected Correlated=false for dest-port-filtered A event")
} }
if results[0].OrphanSide != "A" { if results[0].OrphanSide != "A" {
@ -1438,7 +1438,7 @@ Source: SourceA, Timestamp: now,
SrcIP: "1.2.3.4", SrcPort: 1234, DstPort: 9999, SrcIP: "1.2.3.4", SrcPort: 1234, DstPort: 9999,
} }
results := svc.ProcessEvent(aEvent) results := svc.ProcessEvent(aEvent)
if len(results) != 1 || !results[0].Correlated { if len(results) != 1 || results[0].Correlated == 0 {
t.Errorf("expected 1 correlation on any port when list is empty, got %d", len(results)) t.Errorf("expected 1 correlation on any port when list is empty, got %d", len(results))
} }
} }
@ -1614,9 +1614,9 @@ SrcPort: 5555,
} }
timeProvider.now = now.Add(200 * time.Millisecond) timeProvider.now = now.Add(200 * time.Millisecond)
results = svc.ProcessEvent(b) results = svc.ProcessEvent(b)
if len(results) != 1 || !results[0].Correlated { if len(results) != 1 || results[0].Correlated == 0 {
t.Fatalf("B: expected 1 correlated result (A1+B), got %d correlated=%v", t.Fatalf("B: expected 1 correlated result (A1+B), got %d correlated=%v",
len(results), len(results) > 0 && results[0].Correlated) len(results), len(results) > 0 && results[0].Correlated != 0)
} }
// A2 arrives on the same Keep-Alive connection — B must still be in buffer // A2 arrives on the same Keep-Alive connection — B must still be in buffer
@ -1632,7 +1632,7 @@ results = svc.ProcessEvent(a2)
// A2 should correlate with B (still in buffer in one_to_many mode) // A2 should correlate with B (still in buffer in one_to_many mode)
correlated := false correlated := false
for _, r := range results { for _, r := range results {
if r.Correlated { if r.Correlated != 0 {
correlated = true correlated = true
} }
} }
@ -1679,7 +1679,7 @@ Timestamp: tp.now,
SrcIP: "91.224.92.185", SrcIP: "91.224.92.185",
SrcPort: 53471, SrcPort: 53471,
} }
if results := svc.ProcessEvent(a1); len(results) != 1 || !results[0].Correlated { if results := svc.ProcessEvent(a1); len(results) != 1 || results[0].Correlated == 0 {
t.Fatalf("A seq=1: expected 1 correlated result, got %d", len(svc.ProcessEvent(a1))) t.Fatalf("A seq=1: expected 1 correlated result, got %d", len(svc.ProcessEvent(a1)))
} }
@ -1695,7 +1695,7 @@ results := svc.ProcessEvent(a10)
correlated := false correlated := false
for _, r := range results { for _, r := range results {
if r.Correlated { if r.Correlated != 0 {
correlated = true correlated = true
} }
} }
@ -1748,7 +1748,7 @@ results := svc.ProcessEvent(b)
correlated := false correlated := false
for _, r := range results { for _, r := range results {
if r.Correlated { if r.Correlated != 0 {
correlated = true correlated = true
} }
} }
@ -1809,7 +1809,7 @@ func TestBTTLExpiry_PurgesPendingOrphans(t *testing.T) {
// The orphan must have been returned (not silently lost) — data-loss fix // The orphan must have been returned (not silently lost) — data-loss fix
orphanFound := false orphanFound := false
for _, r := range returned { for _, r := range returned {
if !r.Correlated && r.SrcIP == "10.9.9.9" { if r.Correlated == 0 && r.SrcIP == "10.9.9.9" {
orphanFound = true orphanFound = true
} }
} }
@ -1854,7 +1854,7 @@ emitted := svc.EmitPendingOrphans()
if len(emitted) != 1 { if len(emitted) != 1 {
t.Fatalf("after delay: expected 1 emitted orphan, got %d", len(emitted)) t.Fatalf("after delay: expected 1 emitted orphan, got %d", len(emitted))
} }
if emitted[0].Correlated { if emitted[0].Correlated != 0 {
t.Errorf("expected orphan (Correlated=false), got Correlated=true") t.Errorf("expected orphan (Correlated=false), got Correlated=true")
} }

View File

@ -1,8 +1,8 @@
-- === 04_http2_fields.sql — Ajout des colonnes HTTP/2 à http_logs === -- === 04_http2_fields.sql — Ajout des colonnes HTTP/2 à http_logs ===
-- --
-- Migration pour les déploiements existants : ajoute les 4 colonnes de -- Migration pour les déploiements existants : ajoute les colonnes de
-- fingerprint HTTP/2 passif extraites par mod_reqin_log via son filtre -- fingerprint HTTP/2 passif extraites par mod_reqin_log via son hook
-- de connexion (APR_HOOK_LAST, AP_FTYPE_CONNECTION). -- process_connection (APR_HOOK_FIRST, AP_MODE_SPECULATIVE).
-- --
-- Format du fingerprint Akamai (h2_fingerprint) : -- Format du fingerprint Akamai (h2_fingerprint) :
-- Chrome : "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p" -- Chrome : "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p"
@ -12,6 +12,7 @@
-- Appliquer avec : -- Appliquer avec :
-- clickhouse-client --multiquery < 04_http2_fields.sql -- clickhouse-client --multiquery < 04_http2_fields.sql
-- Champs composites (fingerprint global + valeurs agrégées)
ALTER TABLE ja4_logs.http_logs ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_fingerprint` String DEFAULT '' CODEC(ZSTD(3)); ADD COLUMN IF NOT EXISTS `h2_fingerprint` String DEFAULT '' CODEC(ZSTD(3));
@ -23,3 +24,29 @@ ALTER TABLE ja4_logs.http_logs
ALTER TABLE ja4_logs.http_logs ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_pseudo_order` LowCardinality(String) DEFAULT ''; ADD COLUMN IF NOT EXISTS `h2_pseudo_order` LowCardinality(String) DEFAULT '';
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_has_priority` UInt8 DEFAULT 0;
-- Paramètres SETTINGS individuels (RFC 9113 §6.5.2)
-- Valeur -1 = paramètre absent du preface client (non envoyé)
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_header_table_size` Int32 DEFAULT -1;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_enable_push` Int32 DEFAULT -1;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_max_concurrent_streams` Int32 DEFAULT -1;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_initial_window_size` Int64 DEFAULT -1;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_max_frame_size` Int32 DEFAULT -1;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_max_header_list_size` Int32 DEFAULT -1;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_enable_connect_protocol` Int32 DEFAULT -1;

View File

@ -5,6 +5,8 @@ from __future__ import annotations
import json import json
import logging import logging
import os import os
import re
from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -21,6 +23,34 @@ router = APIRouter(prefix="/api")
_DB = safe_identifier(DB_PROCESSING) _DB = safe_identifier(DB_PROCESSING)
_DB_LOGS = safe_identifier(DB_LOGS) _DB_LOGS = safe_identifier(DB_LOGS)
# Regex pour extraire les features SHAP/ExIFFI depuis le champ reason
# Format: "SHAP: feat1(+0.123) | feat2(-0.456)" ou "ExIFFI: ..."
_SHAP_RE = re.compile(r"(?:SHAP|ExIFFI):\s*(.+?)(?:\s*\|\s*Threat|$)")
_FEAT_RE = re.compile(r"(\w+)\(([+-]?\d+\.\d+)\)")
def _aggregate_shap_importance(reasons: list[str]) -> list[dict]:
"""Agrège les valeurs SHAP/ExIFFI extraites des champs reason."""
totals: dict[str, float] = defaultdict(float)
counts: dict[str, int] = defaultdict(int)
for reason in reasons:
m = _SHAP_RE.search(reason or "")
if not m:
continue
for feat_match in _FEAT_RE.finditer(m.group(1)):
name = feat_match.group(1)
val = abs(float(feat_match.group(2)))
totals[name] += val
counts[name] += 1
if not totals:
return []
return sorted(
[{"name": k, "importance": round(totals[k] / counts[k], 4), "occurrences": counts[k]}
for k in totals],
key=lambda x: -x["importance"],
)
# Whitelists for sort/order to prevent SQL injection # Whitelists for sort/order to prevent SQL injection
_DETECTION_SORT_COLS = { _DETECTION_SORT_COLS = {
"detected_at", "src_ip", "ja4", "host", "anomaly_score", "detected_at", "src_ip", "ja4", "host", "anomaly_score",
@ -500,7 +530,7 @@ async def features() -> dict[str, Any]:
except Exception: except Exception:
pass pass
# Feature variance (importance proxy) # Feature variance (importance proxy — fallback si SHAP indisponible)
try: try:
variance_rows = query( variance_rows = query(
f"SELECT " f"SELECT "
@ -523,6 +553,22 @@ async def features() -> dict[str, Any]:
except Exception: except Exception:
pass pass
# SHAP/ExIFFI — importance réelle extraite des anomalies détectées
try:
reason_rows = query(
f"SELECT reason FROM {_DB}.ml_detected_anomalies "
"WHERE reason LIKE '%SHAP:%' OR reason LIKE '%ExIFFI:%' "
"ORDER BY detected_at DESC LIMIT 500"
)
if reason_rows:
shap_importance = _aggregate_shap_importance(
[r["reason"] for r in reason_rows]
)
if shap_importance:
result["shap_importance"] = shap_importance
except Exception:
logger.debug("SHAP importance extraction unavailable")
return result return result
@ -846,11 +892,11 @@ async def classify_suggested() -> dict[str, Any]:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class ClassifyRequest(BaseModel): class ClassifyRequest(BaseModel):
src_ip: str src_ip: str
classification: str # bot | legitimate | suspicious classification: str # true_positive | false_positive | suspicious
comment: str = "" comment: str = ""
_VALID_CLASSIFICATIONS = {"bot", "legitimate", "suspicious"} _VALID_CLASSIFICATIONS = {"true_positive", "false_positive", "suspicious"}
_feedback_table_ensured = False _feedback_table_ensured = False

View File

@ -6,7 +6,7 @@
<h4>Feedback analyste SOC</h4> <h4>Feedback analyste SOC</h4>
<p>Classifiez les IPs pour entraîner le modèle XGBoost supervisé. Les labels sont utilisés au prochain cycle ML.</p> <p>Classifiez les IPs pour entraîner le modèle XGBoost supervisé. Les labels sont utilisés au prochain cycle ML.</p>
<p><strong>Workflow :</strong> 1. Consultez les IPs suggérées (non classifiées). 2. Classifiez-les. 3. Les labels alimentent XGBoost au prochain cycle.</p> <p><strong>Workflow :</strong> 1. Consultez les IPs suggérées (non classifiées). 2. Classifiez-les. 3. Les labels alimentent XGBoost au prochain cycle.</p>
<p><strong>Bot :</strong> Confirme une IP malveillante. <strong>Légitime :</strong> Faux positif. <strong>Suspect :</strong> À surveiller.</p> <p><strong>Vrai positif :</strong> Confirme un bot détecté. <strong>Faux positif :</strong> Trafic légitime mal détecté. <strong>Suspect :</strong> À surveiller.</p>
<p class="doc-source">Source : soc_feedback → XGBoost training</p> <p class="doc-source">Source : soc_feedback → XGBoost training</p>
</div></span> </div></span>
{% endblock %} {% endblock %}
@ -15,8 +15,8 @@
<!-- KPIs --> <!-- KPIs -->
<div class="grid grid-cols-2 md:grid-cols-4 gap-3"> <div class="grid grid-cols-2 md:grid-cols-4 gap-3">
<div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">Total classifiées</div><div class="text-xl font-bold text-brand-500" id="kpi-total">0</div></div> <div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">Total classifiées</div><div class="text-xl font-bold text-brand-500" id="kpi-total">0</div></div>
<div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">🤖 Bots confirmés</div><div class="text-xl font-bold text-red-400" id="kpi-bots">0</div></div> <div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">✅ Vrais positifs</div><div class="text-xl font-bold text-red-400" id="kpi-tp">0</div></div>
<div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">✅ Légitimes</div><div class="text-xl font-bold text-green-400" id="kpi-legit">0</div></div> <div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">❌ Faux positifs</div><div class="text-xl font-bold text-green-400" id="kpi-fp">0</div></div>
<div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">⚠️ Suspects</div><div class="text-xl font-bold text-yellow-400" id="kpi-suspect">0</div></div> <div class="kpi-card"><div class="text-[11px] text-gray-500 mb-1">⚠️ Suspects</div><div class="text-xl font-bold text-yellow-400" id="kpi-suspect">0</div></div>
</div> </div>
@ -38,8 +38,8 @@
<div> <div>
<label class="block text-[11px] text-gray-500 mb-1">Classification</label> <label class="block text-[11px] text-gray-500 mb-1">Classification</label>
<div class="grid grid-cols-3 gap-2"> <div class="grid grid-cols-3 gap-2">
<button class="cls-type-btn px-3 py-2 rounded-lg text-sm font-medium transition-colors bg-red-500/20 text-red-400 border border-red-500/30 hover:bg-red-500/30" data-cls="bot">🤖 Bot</button> <button class="cls-type-btn px-3 py-2 rounded-lg text-sm font-medium transition-colors bg-red-500/20 text-red-400 border border-red-500/30 hover:bg-red-500/30" data-cls="true_positive">✅ Vrai positif</button>
<button class="cls-type-btn px-3 py-2 rounded-lg text-sm font-medium transition-colors bg-green-500/20 text-green-400 border border-green-500/30 hover:bg-green-500/30" data-cls="legitimate">✅ Légitime</button> <button class="cls-type-btn px-3 py-2 rounded-lg text-sm font-medium transition-colors bg-green-500/20 text-green-400 border border-green-500/30 hover:bg-green-500/30" data-cls="false_positive">❌ Faux positif</button>
<button class="cls-type-btn px-3 py-2 rounded-lg text-sm font-medium transition-colors bg-yellow-500/20 text-yellow-400 border border-yellow-500/30 hover:bg-yellow-500/30" data-cls="suspicious">⚠️ Suspect</button> <button class="cls-type-btn px-3 py-2 rounded-lg text-sm font-medium transition-colors bg-yellow-500/20 text-yellow-400 border border-yellow-500/30 hover:bg-yellow-500/30" data-cls="suspicious">⚠️ Suspect</button>
</div> </div>
</div> </div>
@ -114,7 +114,7 @@ document.querySelectorAll('.cls-type-btn').forEach(btn => {
selectedCls = btn.dataset.cls; selectedCls = btn.dataset.cls;
const sub = document.getElementById('cls-submit'); const sub = document.getElementById('cls-submit');
sub.disabled = false; sub.disabled = false;
sub.textContent = {bot:'🤖 Classifier comme Bot',legitimate:' Classifier comme Légitime',suspicious:'⚠️ Classifier comme Suspect'}[selectedCls]; sub.textContent = {true_positive:' Classifier comme Vrai positif',false_positive:' Classifier comme Faux positif',suspicious:'⚠️ Classifier comme Suspect'}[selectedCls];
}; };
}); });
@ -158,13 +158,13 @@ async function loadAll() {
const byType = {}; const byType = {};
(stats.stats||[]).forEach(r => { byType[r.classification] = r.cnt; }); (stats.stats||[]).forEach(r => { byType[r.classification] = r.cnt; });
document.getElementById('kpi-total').textContent = fmtNum(stats.total||0); document.getElementById('kpi-total').textContent = fmtNum(stats.total||0);
document.getElementById('kpi-bots').textContent = fmtNum(byType.bot||0); document.getElementById('kpi-tp').textContent = fmtNum(byType.true_positive||0);
document.getElementById('kpi-legit').textContent = fmtNum(byType.legitimate||0); document.getElementById('kpi-fp').textContent = fmtNum(byType.false_positive||0);
document.getElementById('kpi-suspect').textContent = fmtNum(byType.suspicious||0); document.getElementById('kpi-suspect').textContent = fmtNum(byType.suspicious||0);
// ── Distribution chart ── // ── Distribution chart ──
const CLS_COLORS = {bot:'#ef4444',legitimate:'#22c55e',suspicious:'#eab308'}; const CLS_COLORS = {true_positive:'#ef4444',false_positive:'#22c55e',suspicious:'#eab308'};
const CLS_LABELS = {bot:'🤖 Bot',legitimate:'✅ Légitime',suspicious:'⚠️ Suspect'}; const CLS_LABELS = {true_positive:'✅ Vrai positif',false_positive:'❌ Faux positif',suspicious:'⚠️ Suspect'};
if (stats.total > 0) { if (stats.total > 0) {
const el = document.getElementById('dist-chart'); const el = document.getElementById('dist-chart');
const ch = echarts.init(el); const ch = echarts.init(el);
@ -188,8 +188,8 @@ async function loadAll() {
<td class="text-xs max-w-[100px] truncate">${row.asn_org ? fmtASN(row.asn_org) : ''}</td> <td class="text-xs max-w-[100px] truncate">${row.asn_org ? fmtASN(row.asn_org) : ''}</td>
<td>${fmtCountry(row.country_code)}</td> <td>${fmtCountry(row.country_code)}</td>
<td class="whitespace-nowrap"> <td class="whitespace-nowrap">
<button onclick="quickClassify('${escapeHtml(row.src_ip)}','bot')" class="px-1.5 py-0.5 text-[10px] bg-red-500/20 text-red-400 rounded hover:bg-red-500/30" title="Bot">🤖</button> <button onclick="quickClassify('${escapeHtml(row.src_ip)}','true_positive')" class="px-1.5 py-0.5 text-[10px] bg-red-500/20 text-red-400 rounded hover:bg-red-500/30" title="Vrai positif"></button>
<button onclick="quickClassify('${escapeHtml(row.src_ip)}','legitimate')" class="px-1.5 py-0.5 text-[10px] bg-green-500/20 text-green-400 rounded hover:bg-green-500/30" title="Légitime"></button> <button onclick="quickClassify('${escapeHtml(row.src_ip)}','false_positive')" class="px-1.5 py-0.5 text-[10px] bg-green-500/20 text-green-400 rounded hover:bg-green-500/30" title="Faux positif"></button>
<a href="/ip/${encodeURIComponent(row.src_ip)}" class="px-1.5 py-0.5 text-[10px] bg-gray-700 text-gray-300 rounded hover:bg-gray-600 inline-block" title="Détail">🔍</a> <a href="/ip/${encodeURIComponent(row.src_ip)}" class="px-1.5 py-0.5 text-[10px] bg-gray-700 text-gray-300 rounded hover:bg-gray-600 inline-block" title="Détail">🔍</a>
</td> </td>
</tr>`).join('') || '<tr><td colspan="8" class="text-center text-gray-500 py-4">Toutes les IPs ont été classifiées 🎉</td></tr>'; </tr>`).join('') || '<tr><td colspan="8" class="text-center text-gray-500 py-4">Toutes les IPs ont été classifiées 🎉</td></tr>';
@ -198,7 +198,7 @@ async function loadAll() {
document.getElementById('cls-history').innerHTML = (history.data||[]).map(row => `<tr onclick="window.location='/ip/${encodeURIComponent(row.src_ip)}'"> document.getElementById('cls-history').innerHTML = (history.data||[]).map(row => `<tr onclick="window.location='/ip/${encodeURIComponent(row.src_ip)}'">
<td class="text-xs text-gray-400">${(row.created_at||'').substring(0,16)}</td> <td class="text-xs text-gray-400">${(row.created_at||'').substring(0,16)}</td>
<td class="whitespace-nowrap">${fmtIP(row.src_ip)}</td> <td class="whitespace-nowrap">${fmtIP(row.src_ip)}</td>
<td><span class="badge ${row.classification==='bot'?'badge-critical':row.classification==='legitimate'?'badge-low':'badge-medium'}">${escapeHtml(row.classification)}</span></td> <td><span class="badge ${row.classification==='true_positive'?'badge-critical':row.classification==='false_positive'?'badge-low':'badge-medium'}">${escapeHtml(row.classification)}</span></td>
<td class="text-xs max-w-[300px] truncate text-gray-400">${escapeHtml(row.comment||'')}</td> <td class="text-xs max-w-[300px] truncate text-gray-400">${escapeHtml(row.comment||'')}</td>
</tr>`).join('') || '<tr><td colspan="4" class="text-center text-gray-500 py-4">Aucune classification</td></tr>'; </tr>`).join('') || '<tr><td colspan="4" class="text-center text-gray-500 py-4">Aucune classification</td></tr>';

View File

@ -26,12 +26,12 @@
<div class="section-body"><div id="chart-radar" style="height:360px"></div></div> <div class="section-body"><div id="chart-radar" style="height:360px"></div></div>
</div> </div>
<div class="section-card"> <div class="section-card">
<div class="section-header"><span class="section-title">Importance des features (Variance) <div class="section-header"><span class="section-title" id="importance-title">Importance des features (SHAP/ExIFFI)
<span class="relative inline-block"><button onclick="docToggle(this)" class="doc-btn"></button><div class="doc-panel"> <span class="relative inline-block"><button onclick="docToggle(this)" class="doc-btn"></button><div class="doc-panel">
<h4>Feature importance</h4> <h4>Feature importance</h4>
<p>Variance inter-classe (ISP vs datacenter) de chaque feature. Les features à haute variance discriminent le mieux bots et humains.</p> <p>Importance moyenne des features issue de SHAP (XGBoost) ou ExIFFI (EIF). Chaque barre représente la contribution absolue moyenne d'une feature aux décisions d'anomalie récentes.</p>
<p><strong>Usage :</strong> Les features en tête sont les plus utiles pour le modèle EIF. Celles à variance nulle sont élaguées automatiquement.</p> <p><strong>Fallback :</strong> Si aucune donnée SHAP/ExIFFI n'est disponible, la variance inter-classe (proxy statistique) est affichée à la place.</p>
<p class="doc-source">Source : view_ai_features_1h</p> <p class="doc-source">Source : ml_detected_anomalies.reason (SHAP/ExIFFI) ou view_ai_features_1h (variance)</p>
</div></span> </div></span>
</span></div> </span></div>
<div class="section-body"><div id="chart-importance" style="height:360px"></div></div> <div class="section-body"><div id="chart-importance" style="height:360px"></div></div>
@ -158,8 +158,16 @@ async function loadAll() {
})); }));
} }
// ── Feature Importance (horizontal bar) ── // ── Feature Importance (horizontal bar) — SHAP/ExIFFI si disponible, variance sinon ──
const fi = (feat.feature_importance || []).sort((a,b) => a.variance - b.variance); const shapData = feat.shap_importance || [];
const varianceData = (feat.feature_importance || []).sort((a,b) => a.variance - b.variance);
const useShap = shapData.length > 0;
const fi = useShap
? shapData.slice().sort((a,b) => a.importance - b.importance)
: varianceData;
const impLabel = useShap ? 'SHAP/ExIFFI (|valeur| moyenne)' : 'Variance';
document.getElementById('importance-title').childNodes[0].textContent =
useShap ? 'Importance des features (SHAP/ExIFFI) ' : 'Importance des features (Variance) ';
const impChart = initChart('chart-importance'); const impChart = initChart('chart-importance');
if (impChart && fi.length) { if (impChart && fi.length) {
impChart.setOption(ecBase({ impChart.setOption(ecBase({
@ -175,12 +183,13 @@ async function loadAll() {
type:'value', type:'value',
splitLine:{lineStyle:{color:EC_GRID, type:'dashed'}}, splitLine:{lineStyle:{color:EC_GRID, type:'dashed'}},
axisLabel:{color:EC_TEXT}, axisLabel:{color:EC_TEXT},
name:'Variance', nameTextStyle:{color:EC_TEXT}, name: impLabel, nameTextStyle:{color:EC_TEXT},
}, },
series:[{ series:[{
type:'bar', data: fi.map(f => f.variance), barWidth:'60%', type:'bar', data: fi.map(f => useShap ? f.importance : f.variance), barWidth:'60%',
itemStyle:{color: new echarts.graphic.LinearGradient(0,0,1,0,[ itemStyle:{color: new echarts.graphic.LinearGradient(0,0,1,0,[
{offset:0, color:'#6366f1'}, {offset:1, color:'#8b5cf6'} {offset:0, color: useShap ? '#f59e0b' : '#6366f1'},
{offset:1, color: useShap ? '#ef4444' : '#8b5cf6'}
])}, ])},
label:{show:true, position:'right', color:EC_TEXT, fontSize:10, formatter:p => p.value.toFixed(4)}, label:{show:true, position:'right', color:EC_TEXT, fontSize:10, formatter:p => p.value.toFixed(4)},
}] }]

View File

@ -130,15 +130,13 @@ static const char *cmd_set_log_level(cmd_parms *cmd, void *dummy, const char *ar
/* Forward declarations for hooks */ /* Forward declarations for hooks */
static int reqin_log_post_read_request(request_rec *r); static int reqin_log_post_read_request(request_rec *r);
static int reqin_log_log_transaction(request_rec *r);
static void reqin_log_child_init(apr_pool_t *p, server_rec *s); static void reqin_log_child_init(apr_pool_t *p, server_rec *s);
static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s); static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s);
static void reqin_log_register_hooks(apr_pool_t *p); static void reqin_log_register_hooks(apr_pool_t *p);
/* Forward declarations for le filtre HTTP/2 */ /* Forward declarations for la capture HTTP/2 */
static apr_status_t reqin_h2_filter(ap_filter_t *f, apr_bucket_brigade *bb, static int reqin_h2_process_connection(conn_rec *c, void *csd);
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes);
static void reqin_h2_add_filter(conn_rec *c, void *csd);
/* Command table */ /* Command table */
static const command_rec reqin_log_cmds[] = { static const command_rec reqin_log_cmds[] = {
@ -934,12 +932,16 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
format_iso8601(&buf, r->request_time); format_iso8601(&buf, r->request_time);
dynbuf_append(&buf, "\",", 2); dynbuf_append(&buf, "\",", 2);
/* timestamp (nanoseconds since epoch, from request reception time) */ /* timestamp_ns (nanoseconds since epoch, via clock_gettime CLOCK_REALTIME) */
{ {
apr_uint64_t ns = ((apr_uint64_t)r->request_time) * APR_UINT64_C(1000); struct timespec ts_now;
apr_uint64_t ns;
char ts_buf[32]; char ts_buf[32];
clock_gettime(CLOCK_REALTIME, &ts_now);
ns = (apr_uint64_t)ts_now.tv_sec * APR_UINT64_C(1000000000)
+ (apr_uint64_t)ts_now.tv_nsec;
snprintf(ts_buf, sizeof(ts_buf), "%" APR_UINT64_T_FMT, ns); snprintf(ts_buf, sizeof(ts_buf), "%" APR_UINT64_T_FMT, ns);
dynbuf_append(&buf, "\"timestamp\":", 12); dynbuf_append(&buf, "\"timestamp_ns\":", 15);
dynbuf_append(&buf, ts_buf, -1); dynbuf_append(&buf, ts_buf, -1);
dynbuf_append(&buf, ",", 1); dynbuf_append(&buf, ",", 1);
} }
@ -989,8 +991,8 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
append_json_string(&buf, path); append_json_string(&buf, path);
dynbuf_append(&buf, "\",", 2); dynbuf_append(&buf, "\",", 2);
/* query */ /* query_string */
dynbuf_append(&buf, "\"query\":\"", 9); dynbuf_append(&buf, "\"query_string\":\"", 16);
append_json_string(&buf, query); append_json_string(&buf, query);
dynbuf_append(&buf, "\",", 2); dynbuf_append(&buf, "\",", 2);
@ -1013,11 +1015,15 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
} }
/* client_headers - ordered list of all header names as received from the client, /* client_headers - ordered list of all header names as received from the client,
* preserving original order and case */ * preserving original order and case.
* headers_raw - all headers concatenated "Name: Value\r\n" preserving order.
* header_order_signature - FNV-1a 64-bit hash of the ordered header names. */
{ {
const apr_array_header_t *arr = apr_table_elts(r->headers_in); const apr_array_header_t *arr = apr_table_elts(r->headers_in);
const apr_table_entry_t *elts = (const apr_table_entry_t *)arr->elts; const apr_table_entry_t *elts = (const apr_table_entry_t *)arr->elts;
int first = 1; int first = 1;
apr_uint64_t fnv_hash = APR_UINT64_C(14695981039346656037);
char hash_buf[24];
dynbuf_append(&buf, ",\"client_headers\":[", 19); dynbuf_append(&buf, ",\"client_headers\":[", 19);
for (int i = 0; i < arr->nelts; i++) { for (int i = 0; i < arr->nelts; i++) {
@ -1029,9 +1035,35 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
append_json_string(&buf, elts[i].key); append_json_string(&buf, elts[i].key);
dynbuf_append(&buf, "\"", 1); dynbuf_append(&buf, "\"", 1);
first = 0; first = 0;
/* FNV-1a sur chaque octet du nom de header */
for (const char *p = elts[i].key; *p; p++) {
fnv_hash ^= (apr_uint64_t)(unsigned char)*p;
fnv_hash *= APR_UINT64_C(1099511628211);
}
/* Séparateur entre noms */
fnv_hash ^= (apr_uint64_t)'\n';
fnv_hash *= APR_UINT64_C(1099511628211);
} }
} }
dynbuf_append(&buf, "]", 1); dynbuf_append(&buf, "]", 1);
/* headers_raw — en-têtes bruts dans leur ordre d'émission */
dynbuf_append(&buf, ",\"headers_raw\":\"", 16);
for (int i = 0; i < arr->nelts; i++) {
if (elts[i].key != NULL) {
append_json_string(&buf, elts[i].key);
dynbuf_append(&buf, ": ", 2);
append_json_string(&buf, elts[i].val ? elts[i].val : "");
dynbuf_append(&buf, "\\r\\n", 4);
}
}
dynbuf_append(&buf, "\"", 1);
/* header_order_signature — FNV-1a 64-bit hash de l'ordre des noms */
snprintf(hash_buf, sizeof(hash_buf), "%" APR_UINT64_T_FMT, fnv_hash);
dynbuf_append(&buf, ",\"header_order_signature\":\"", (apr_size_t)-1);
dynbuf_append(&buf, hash_buf, -1);
dynbuf_append(&buf, "\"", 1);
} }
/* Check buffer size before adding headers to prevent memory exhaustion */ /* Check buffer size before adding headers to prevent memory exhaustion */
@ -1096,14 +1128,20 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
} }
} }
/* Champs HTTP/2 passif depuis les notes de connexion (vides si HTTP/1.x) */ /* Champs HTTP/2 passif depuis les notes de connexion (vides si HTTP/1.x).
* Pour les connexions HTTP/2, mod_http2 crée des connexions secondaires (c2)
* par stream. Le preface H2 est stocké dans les notes de la connexion
* primaire (c1), accessible via r->connection->master. */
{ {
const char *h2_fp = apr_table_get(r->connection->notes, H2_NOTE_FINGERPRINT); conn_rec *c1 = r->connection->master ? r->connection->master : r->connection;
const char *h2_set = apr_table_get(r->connection->notes, H2_NOTE_SETTINGS); const char *h2_fp = apr_table_get(c1->notes, H2_NOTE_FINGERPRINT);
const char *h2_wu = apr_table_get(r->connection->notes, H2_NOTE_WUPDATE); const char *h2_set = apr_table_get(c1->notes, H2_NOTE_SETTINGS);
const char *h2_ps = apr_table_get(r->connection->notes, H2_NOTE_PSEUDO_ORDER); const char *h2_wu = apr_table_get(c1->notes, H2_NOTE_WUPDATE);
const char *h2_ps = apr_table_get(c1->notes, H2_NOTE_PSEUDO_ORDER);
const char *h2_pri = apr_table_get(c1->notes, H2_NOTE_HAS_PRIORITY);
if (h2_set && h2_set[0] != '\0') { if (h2_set && h2_set[0] != '\0') {
/* Champs composites (rétrocompatibilité + fingerprint matching) */
dynbuf_append(&buf, ",\"h2_fingerprint\":\"", (apr_size_t)-1); dynbuf_append(&buf, ",\"h2_fingerprint\":\"", (apr_size_t)-1);
append_json_string(&buf, h2_fp ? h2_fp : ""); append_json_string(&buf, h2_fp ? h2_fp : "");
dynbuf_append(&buf, "\",\"h2_settings_fp\":\"", (apr_size_t)-1); dynbuf_append(&buf, "\",\"h2_settings_fp\":\"", (apr_size_t)-1);
@ -1113,11 +1151,35 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
dynbuf_append(&buf, ",\"h2_pseudo_order\":\"", (apr_size_t)-1); dynbuf_append(&buf, ",\"h2_pseudo_order\":\"", (apr_size_t)-1);
append_json_string(&buf, h2_ps ? h2_ps : ""); append_json_string(&buf, h2_ps ? h2_ps : "");
dynbuf_append(&buf, "\"", 1); dynbuf_append(&buf, "\"", 1);
dynbuf_append(&buf, ",\"h2_has_priority\":", (apr_size_t)-1);
dynbuf_append(&buf, (h2_pri && h2_pri[0] == '1') ? "1" : "0", 1);
/* Champs SETTINGS individuels (RFC 9113 §6.5.2).
* Émis uniquement si le client a envoyé le paramètre
* (-1 / absent = non émis → le champ JSON est absent). */
static const struct { const char *note; const char *json; } sfields[] = {
{H2_NOTE_SET_HEADER_TABLE_SIZE, ",\"h2_header_table_size\":"},
{H2_NOTE_SET_ENABLE_PUSH, ",\"h2_enable_push\":"},
{H2_NOTE_SET_MAX_CONCURRENT_STREAMS, ",\"h2_max_concurrent_streams\":"},
{H2_NOTE_SET_INITIAL_WINDOW_SIZE, ",\"h2_initial_window_size\":"},
{H2_NOTE_SET_MAX_FRAME_SIZE, ",\"h2_max_frame_size\":"},
{H2_NOTE_SET_MAX_HEADER_LIST_SIZE, ",\"h2_max_header_list_size\":"},
{H2_NOTE_SET_ENABLE_CONNECT, ",\"h2_enable_connect_protocol\":"},
};
int si;
for (si = 0; si < (int)(sizeof(sfields) / sizeof(sfields[0])); si++) {
const char *v = apr_table_get(c1->notes, sfields[si].note);
if (v) {
dynbuf_append(&buf, sfields[si].json, (apr_size_t)-1);
dynbuf_append(&buf, v, (apr_size_t)-1);
}
}
} }
} }
dynbuf_append(&buf, "}\n", 2); /* Ne pas fermer le JSON ici — les champs de réponse (status_code,
* response_size, duration_ms) seront ajoutés par le hook log_transaction
* qui s'exécute après le traitement complet de la requête. */
if (buf.len > MAX_JSON_SIZE) { if (buf.len > MAX_JSON_SIZE) {
apr_time_t now = apr_time_now(); apr_time_t now = apr_time_now();
apr_time_t error_interval = apr_time_from_sec(cfg->error_report_interval); apr_time_t error_interval = apr_time_from_sec(cfg->error_report_interval);
@ -1137,7 +1199,11 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
return; return;
} }
write_to_socket(buf.data, buf.len, s, cfg, state); /* Stocker le JSON partiel dans les notes de la requête pour log_transaction */
{
char *partial = apr_pstrmemdup(r->pool, buf.data, buf.len);
apr_table_setn(r->notes, "reqin_partial_json", partial);
}
} }
/* ====== Fingerprinting HTTP/2 passif ====== */ /* ====== Fingerprinting HTTP/2 passif ====== */
@ -1307,6 +1373,13 @@ static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len)
int has_priority = 0; int has_priority = 0;
int settings_pos_out = 0; int settings_pos_out = 0;
/* Valeurs individuelles des paramètres SETTINGS (RFC 9113 §6.5.2).
* -1 signifie « absent du preface client » (distinction importante :
* un paramètre absent ≠ un paramètre à 0). */
int64_t setting_vals[9];
int i;
for (i = 0; i < 9; i++) setting_vals[i] = -1;
/* Vérification du magic HTTP/2 */ /* Vérification du magic HTTP/2 */
if (len < MAGIC_LEN || memcmp(buf, H2_MAGIC, MAGIC_LEN) != 0) return; if (len < MAGIC_LEN || memcmp(buf, H2_MAGIC, MAGIC_LEN) != 0) return;
@ -1346,6 +1419,12 @@ static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len)
settings_pos_out += snprintf(settings_buf + settings_pos_out, settings_pos_out += snprintf(settings_buf + settings_pos_out,
(int)sizeof(settings_buf) - settings_pos_out, (int)sizeof(settings_buf) - settings_pos_out,
"%u:%u", id, val); "%u:%u", id, val);
/* Stocker la valeur individuelle (IDs 1-6 et 8) */
if (id >= 1 && id <= 6)
setting_vals[id] = (int64_t)val;
else if (id == 8)
setting_vals[8] = (int64_t)val;
} }
} else if (type == 0x08u && stream_id == 0) { } else if (type == 0x08u && stream_id == 0) {
@ -1412,6 +1491,26 @@ static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len)
apr_table_setn(c->notes, H2_NOTE_SETTINGS, apr_pstrdup(c->pool, settings_buf)); apr_table_setn(c->notes, H2_NOTE_SETTINGS, apr_pstrdup(c->pool, settings_buf));
apr_table_setn(c->notes, H2_NOTE_WUPDATE, apr_pstrdup(c->pool, wupdate_buf)); apr_table_setn(c->notes, H2_NOTE_WUPDATE, apr_pstrdup(c->pool, wupdate_buf));
apr_table_setn(c->notes, H2_NOTE_PSEUDO_ORDER, apr_pstrdup(c->pool, pseudo_buf)); apr_table_setn(c->notes, H2_NOTE_PSEUDO_ORDER, apr_pstrdup(c->pool, pseudo_buf));
apr_table_setn(c->notes, H2_NOTE_HAS_PRIORITY, has_priority ? "1" : "0");
/* Stocker chaque paramètre SETTINGS individuel (absent = note absente) */
static const struct { int id; const char *note; } smap[] = {
{1, H2_NOTE_SET_HEADER_TABLE_SIZE},
{2, H2_NOTE_SET_ENABLE_PUSH},
{3, H2_NOTE_SET_MAX_CONCURRENT_STREAMS},
{4, H2_NOTE_SET_INITIAL_WINDOW_SIZE},
{5, H2_NOTE_SET_MAX_FRAME_SIZE},
{6, H2_NOTE_SET_MAX_HEADER_LIST_SIZE},
{8, H2_NOTE_SET_ENABLE_CONNECT},
};
for (i = 0; i < (int)(sizeof(smap) / sizeof(smap[0])); i++) {
int64_t v = setting_vals[smap[i].id];
if (v >= 0) {
char tmp[16];
snprintf(tmp, sizeof(tmp), "%u", (uint32_t)v);
apr_table_setn(c->notes, smap[i].note, apr_pstrdup(c->pool, tmp));
}
}
} }
/** /**
@ -1419,9 +1518,13 @@ static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len)
* *
* S'injecte entre le filtre SSL (déchiffrement) et mod_http2 grâce à sa * S'injecte entre le filtre SSL (déchiffrement) et mod_http2 grâce à sa
* priorité AP_FTYPE_CONNECTION et à l'inscription via APR_HOOK_LAST. * priorité AP_FTYPE_CONNECTION et à l'inscription via APR_HOOK_LAST.
* À la première invocation, effectue une lecture spéculative non-destructive *
* (AP_MODE_SPECULATIVE) de H2_PEEK_SIZE octets, parse le preface HTTP/2, * Stratégie : au lieu d'une lecture spéculative séparée (qui interfère avec
* stocke les résultats dans c->notes, puis se retire de la chaîne. * le handshake SSL et le traitement mod_http2), ce filtre se greffe sur les
* lectures réelles. Il laisse passer les lectures spéculatives (utilisées par
* mod_http2 pour détecter le magic H2) sans intervenir, puis sur la première
* lecture non-spéculative, il inspecte les données déjà lues (via
* apr_brigade_flatten, qui copie sans consommer) pour parser le preface H2.
* *
* @param f Filtre courant. * @param f Filtre courant.
* @param bb Brigade cible pour la lecture réelle. * @param bb Brigade cible pour la lecture réelle.
@ -1430,51 +1533,58 @@ static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len)
* @param readbytes Nombre d'octets demandés. * @param readbytes Nombre d'octets demandés.
* @return Statut APR de la lecture réelle. * @return Statut APR de la lecture réelle.
*/ */
static apr_status_t reqin_h2_filter(ap_filter_t *f, apr_bucket_brigade *bb,
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes)
{
conn_rec *c = f->c;
if (!apr_table_get(c->notes, H2_NOTE_PARSED)) {
/* Lecture spéculative : ne consomme pas les données du flux */
apr_bucket_brigade *peek = apr_brigade_create(c->pool, c->bucket_alloc);
apr_status_t rv = ap_get_brigade(f->next, peek,
AP_MODE_SPECULATIVE, APR_BLOCK_READ,
H2_PEEK_SIZE);
if (rv == APR_SUCCESS) {
char peek_buf[H2_PEEK_SIZE];
apr_size_t peek_len = sizeof(peek_buf);
if (apr_brigade_flatten(peek, peek_buf, &peek_len) == APR_SUCCESS
&& peek_len > 0) {
h2_parse_preface(c, peek_buf, peek_len);
}
}
apr_brigade_cleanup(peek);
apr_table_setn(c->notes, H2_NOTE_PARSED, "1");
}
/* Le filtre n'est nécessaire qu'une seule fois par connexion */
ap_remove_input_filter(f);
return ap_get_brigade(f->next, bb, mode, block, readbytes);
}
/** /**
* @brief Hook pre_connection — enregistre le filtre HTTP/2 sur chaque connexion. * @brief Filtre d'entrée de connexion pour la capture passive du preface HTTP/2.
* *
* Appelé à l'établissement de chaque connexion. Inscrit reqin_h2_filter dans * S'injecte entre le filtre SSL (déchiffrement) et mod_http2 grâce à sa
* la chaîne d'entrée avec APR_HOOK_LAST, ce qui garantit son positionnement * priorité AP_FTYPE_CONNECTION et à l'inscription via APR_HOOK_LAST.
* après le filtre SSL (qui s'inscrit avec APR_HOOK_MIDDLE) et donc son accès *
* au flux HTTP/2 en clair. * Stratégie : au lieu d'une lecture spéculative séparée (qui interfère avec
* le traitement mod_http2), ce filtre se greffe sur les lectures réelles.
* Il laisse passer les lectures spéculatives (utilisées par mod_http2 pour
* détecter le magic H2) sans intervenir, puis sur la première lecture
* non-spéculative, il inspecte les données déjà lues (via apr_brigade_flatten,
* qui copie sans consommer) pour parser le preface H2.
*
* @param f Filtre courant.
* @param bb Brigade cible pour la lecture réelle.
* @param mode Mode de lecture demandé (transmis à f->next).
* @param block Type de blocage (transmis à f->next).
* @param readbytes Nombre d'octets demandés.
* @return Statut APR de la lecture réelle.
*/
/**
* @brief Hook process_connection — capture passive du preface HTTP/2.
*
* S'exécute AVANT mod_http2 (APR_HOOK_FIRST) et effectue une lecture
* spéculative non-destructive de H2_PEEK_SIZE octets sur la connexion.
* Si le preface HTTP/2 (RFC 9113 §3.4) est détecté, parse les frames
* SETTINGS, WINDOW_UPDATE et le premier HEADERS, puis stocke les
* résultats dans c->notes. Retourne DECLINED pour laisser mod_http2
* (ou le handler HTTP/1.x) prendre le relais.
* *
* @param c Connexion Apache. * @param c Connexion Apache.
* @param csd Socket descriptor (non utilisé). * @param csd Socket descriptor (non utilisé).
* @return DECLINED — ne gère pas la connexion, laisse les hooks suivants.
*/ */
static void reqin_h2_add_filter(conn_rec *c, void *csd) static int reqin_h2_process_connection(conn_rec *c, void *csd)
{ {
(void)csd; (void)csd;
ap_add_input_filter(H2_FILTER_NAME, NULL, NULL, c);
apr_bucket_brigade *bb = apr_brigade_create(c->pool, c->bucket_alloc);
apr_status_t rv = ap_get_brigade(c->input_filters, bb,
AP_MODE_SPECULATIVE, APR_BLOCK_READ,
H2_PEEK_SIZE);
if (rv == APR_SUCCESS) {
char buf[H2_PEEK_SIZE];
apr_size_t len = sizeof(buf);
if (apr_brigade_flatten(bb, buf, &len) == APR_SUCCESS && len >= 24) {
h2_parse_preface(c, buf, len);
}
}
apr_brigade_destroy(bb);
return DECLINED;
} }
/* ====== Hooks Apache ====== */ /* ====== Hooks Apache ====== */
@ -1506,6 +1616,73 @@ static int reqin_log_post_read_request(request_rec *r)
return DECLINED; return DECLINED;
} }
/**
* @brief Hook log_transaction — complète le JSON avec les champs de réponse et envoie.
*
* Récupère le JSON partiel stocké par post_read_request dans r->notes,
* ajoute status_code, response_size et duration_ms, puis envoie le JSON
* complet via le socket Unix.
*
* @param r request_rec — la requête traitée.
* @return DECLINED pour permettre aux autres modules de logger.
*/
static int reqin_log_log_transaction(request_rec *r)
{
reqin_log_server_conf_t *srv_conf;
reqin_log_config_t *cfg;
reqin_log_child_state_t *state;
const char *partial;
dynbuf_t buf;
char num_buf[32];
apr_time_t duration_us;
if (r->main != NULL || r->prev != NULL) {
return DECLINED;
}
srv_conf = get_server_conf(r->server);
if (srv_conf == NULL || srv_conf->config == NULL ||
!srv_conf->config->enabled || srv_conf->config->socket_path == NULL) {
return DECLINED;
}
partial = apr_table_get(r->notes, "reqin_partial_json");
if (partial == NULL) {
return DECLINED;
}
cfg = srv_conf->config;
state = &srv_conf->child_state;
dynbuf_init(&buf, r->pool, 4096);
dynbuf_append(&buf, partial, -1);
/* status_code */
snprintf(num_buf, sizeof(num_buf), "%d", r->status);
dynbuf_append(&buf, ",\"status_code\":", 15);
dynbuf_append(&buf, num_buf, -1);
/* response_size (bytes sent to client) */
snprintf(num_buf, sizeof(num_buf), "%" APR_INT64_T_FMT, (apr_int64_t)r->bytes_sent);
dynbuf_append(&buf, ",\"response_size\":", 17);
dynbuf_append(&buf, num_buf, -1);
/* duration_ms (request processing time in milliseconds) */
duration_us = apr_time_now() - r->request_time;
snprintf(num_buf, sizeof(num_buf), "%" APR_INT64_T_FMT, (apr_int64_t)(duration_us / 1000));
dynbuf_append(&buf, ",\"duration_ms\":", 15);
dynbuf_append(&buf, num_buf, -1);
/* Fermer le JSON */
dynbuf_append(&buf, "}\n", 2);
if (buf.len <= MAX_JSON_SIZE) {
write_to_socket(buf.data, buf.len, r->server, cfg, state);
}
return DECLINED;
}
/** /**
* @brief Hook child_init — initialise l'état du processus enfant et établit la connexion socket. * @brief Hook child_init — initialise l'état du processus enfant et établit la connexion socket.
* *
@ -1627,11 +1804,11 @@ static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t
static void reqin_log_register_hooks(apr_pool_t *p) static void reqin_log_register_hooks(apr_pool_t *p)
{ {
(void)p; (void)p;
/* Enregistrement du filtre de connexion HTTP/2 (avant les hooks de requête) */ /* Hook process_connection AVANT mod_http2 pour capturer le preface H2 */
ap_register_input_filter(H2_FILTER_NAME, reqin_h2_filter, NULL, AP_FTYPE_CONNECTION); ap_hook_process_connection(reqin_h2_process_connection, NULL, NULL, APR_HOOK_FIRST);
ap_hook_pre_connection(reqin_h2_add_filter, NULL, NULL, APR_HOOK_LAST);
ap_hook_post_config(reqin_log_post_config, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_post_config(reqin_log_post_config, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_post_read_request(reqin_log_post_read_request, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_post_read_request(reqin_log_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_log_transaction(reqin_log_log_transaction, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_child_init(reqin_log_child_init, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_child_init(reqin_log_child_init, NULL, NULL, APR_HOOK_MIDDLE);
} }

View File

@ -36,14 +36,21 @@ extern module AP_MODULE_DECLARE_DATA reqin_log_module;
/* ====== Fingerprinting HTTP/2 passif ====== */ /* ====== Fingerprinting HTTP/2 passif ====== */
/* Nom du filtre d'entrée de connexion pour la capture du preface HTTP/2 */
#define H2_FILTER_NAME "REQIN_H2_PEEK"
/* Clés des notes de connexion stockant le fingerprint HTTP/2 parsé */ /* Clés des notes de connexion stockant le fingerprint HTTP/2 parsé */
#define H2_NOTE_FINGERPRINT "reqin_h2_fp" /* Fingerprint Akamai complet */ #define H2_NOTE_FINGERPRINT "reqin_h2_fp" /* Fingerprint Akamai complet */
#define H2_NOTE_SETTINGS "reqin_h2_set" /* Entrées SETTINGS brutes */ #define H2_NOTE_SETTINGS "reqin_h2_set" /* Entrées SETTINGS brutes */
#define H2_NOTE_WUPDATE "reqin_h2_wu" /* Incrément WINDOW_UPDATE */ #define H2_NOTE_WUPDATE "reqin_h2_wu" /* Incrément WINDOW_UPDATE */
#define H2_NOTE_PSEUDO_ORDER "reqin_h2_ps" /* Ordre pseudo-headers */ #define H2_NOTE_PSEUDO_ORDER "reqin_h2_ps" /* Ordre pseudo-headers */
#define H2_NOTE_HAS_PRIORITY "reqin_h2_pri" /* Flag PRIORITY présent */
#define H2_NOTE_PARSED "reqin_h2_done" /* Marqueur "déjà parsé" */ #define H2_NOTE_PARSED "reqin_h2_done" /* Marqueur "déjà parsé" */
/* Clés des notes pour chaque paramètre SETTINGS individuel (RFC 9113 §6.5.2) */
#define H2_NOTE_SET_HEADER_TABLE_SIZE "reqin_h2_s1" /* ID 1 */
#define H2_NOTE_SET_ENABLE_PUSH "reqin_h2_s2" /* ID 2 */
#define H2_NOTE_SET_MAX_CONCURRENT_STREAMS "reqin_h2_s3" /* ID 3 */
#define H2_NOTE_SET_INITIAL_WINDOW_SIZE "reqin_h2_s4" /* ID 4 */
#define H2_NOTE_SET_MAX_FRAME_SIZE "reqin_h2_s5" /* ID 5 */
#define H2_NOTE_SET_MAX_HEADER_LIST_SIZE "reqin_h2_s6" /* ID 6 */
#define H2_NOTE_SET_ENABLE_CONNECT "reqin_h2_s8" /* ID 8 */
#endif /* MOD_REQIN_LOG_H */ #endif /* MOD_REQIN_LOG_H */

View File

@ -42,6 +42,7 @@ type TCPMeta struct {
MSS uint16 `json:"mss,omitempty"` MSS uint16 `json:"mss,omitempty"`
WindowScale uint8 `json:"window_scale,omitempty"` WindowScale uint8 `json:"window_scale,omitempty"`
Options []string `json:"options"` Options []string `json:"options"`
OptionKinds []uint8 `json:"-"` // Raw TCP option kind numbers for JA4T
} }
// RawPacket represents a raw packet captured from the network // RawPacket represents a raw packet captured from the network
@ -73,6 +74,7 @@ type TLSClientHello struct {
type Fingerprints struct { type Fingerprints struct {
JA4 string `json:"ja4"` JA4 string `json:"ja4"`
JA4Hash string `json:"ja4_hash,omitempty"` // Internal use, not serialized to LogRecord JA4Hash string `json:"ja4_hash,omitempty"` // Internal use, not serialized to LogRecord
JA4T string `json:"ja4t,omitempty"`
JA3 string `json:"ja3,omitempty"` JA3 string `json:"ja3,omitempty"`
JA3Hash string `json:"ja3_hash,omitempty"` JA3Hash string `json:"ja3_hash,omitempty"`
} }
@ -111,6 +113,7 @@ type LogRecord struct {
// Fingerprints // Fingerprints
// Note: ja4_hash is NOT included - the JA4 format already includes its own hash portions // Note: ja4_hash is NOT included - the JA4 format already includes its own hash portions
JA4 string `json:"ja4"` JA4 string `json:"ja4"`
JA4T string `json:"ja4t,omitempty"`
JA3 string `json:"ja3,omitempty"` JA3 string `json:"ja3,omitempty"`
JA3Hash string `json:"ja3_hash,omitempty"` JA3Hash string `json:"ja3_hash,omitempty"`
@ -265,6 +268,7 @@ func NewLogRecord(ch TLSClientHello, fp *Fingerprints) LogRecord {
if fp != nil { if fp != nil {
rec.JA4 = fp.JA4 rec.JA4 = fp.JA4
rec.JA4T = fp.JA4T
rec.JA3 = fp.JA3 rec.JA3 = fp.JA3
rec.JA3Hash = fp.JA3Hash rec.JA3Hash = fp.JA3Hash
} }

View File

@ -4,6 +4,8 @@ package fingerprint
import ( import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"strconv"
"strings"
"github.com/antitbone/ja4/sentinel/api" "github.com/antitbone/ja4/sentinel/api"
@ -59,14 +61,33 @@ func (e *EngineImpl) FromClientHello(ch api.TLSClientHello) (*api.Fingerprints,
// This is kept for internal use but NOT serialized to LogRecord // This is kept for internal use but NOT serialized to LogRecord
ja4Hash := extractJA4Hash(ja4) ja4Hash := extractJA4Hash(ja4)
// Generate JA4T fingerprint from TCP SYN parameters
ja4t := computeJA4T(ch.TCPMeta)
return &api.Fingerprints{ return &api.Fingerprints{
JA4: ja4, JA4: ja4,
JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord JA4Hash: ja4Hash, // Internal use only - not serialized to LogRecord
JA4T: ja4t,
JA3: ja3, JA3: ja3,
JA3Hash: ja3Hash, JA3Hash: ja3Hash,
}, nil }, nil
} }
// computeJA4T génère l'empreinte JA4T à partir des métadonnées TCP SYN.
// Format : {WindowSize}_{OptionKinds}_{WindowScale}_{MSS}
func computeJA4T(tcp api.TCPMeta) string {
optStr := ""
if len(tcp.OptionKinds) > 0 {
parts := make([]string, len(tcp.OptionKinds))
for i, k := range tcp.OptionKinds {
parts[i] = strconv.Itoa(int(k))
}
optStr = strings.Join(parts, "-")
}
return fmt.Sprintf("%d_%s_%d_%d", tcp.WindowSize, optStr, tcp.WindowScale, tcp.MSS)
}
// extractJA4Hash extracts the hash portion from a JA4 string // extractJA4Hash extracts the hash portion from a JA4 string
// JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>" // JA4 format: <base>_<sni_hash>_<cipher_hash> -> returns "<sni_hash>_<cipher_hash>"
func extractJA4Hash(ja4 string) string { func extractJA4Hash(ja4 string) string {

View File

@ -487,3 +487,99 @@ t.Errorf("expected 'somehash', got %q", hash)
var _ interface { var _ interface {
FromClientHello(api.TLSClientHello) (*api.Fingerprints, error) FromClientHello(api.TLSClientHello) (*api.Fingerprints, error)
} = (*EngineImpl)(nil) } = (*EngineImpl)(nil)
// TestComputeJA4T tests the JA4T fingerprint generation.
func TestComputeJA4T(t *testing.T) {
tests := []struct {
name string
tcp api.TCPMeta
want string
}{
{
name: "linux_5x_typical",
tcp: api.TCPMeta{
WindowSize: 64240,
OptionKinds: []uint8{2, 4, 8, 1, 3},
WindowScale: 7,
MSS: 1460,
},
want: "64240_2-4-8-1-3_7_1460",
},
{
name: "windows_11_typical",
tcp: api.TCPMeta{
WindowSize: 64240,
OptionKinds: []uint8{2, 4, 8, 1, 3},
WindowScale: 8,
MSS: 1460,
},
want: "64240_2-4-8-1-3_8_1460",
},
{
name: "macos_14_typical",
tcp: api.TCPMeta{
WindowSize: 65535,
OptionKinds: []uint8{2, 4, 8, 1, 3},
WindowScale: 6,
MSS: 1460,
},
want: "65535_2-4-8-1-3_6_1460",
},
{
name: "no_options",
tcp: api.TCPMeta{
WindowSize: 8192,
OptionKinds: nil,
WindowScale: 0,
MSS: 0,
},
want: "8192__0_0",
},
{
name: "windows_no_ts",
tcp: api.TCPMeta{
WindowSize: 8192,
OptionKinds: []uint8{2, 4, 1, 3},
WindowScale: 2,
MSS: 1460,
},
want: "8192_2-4-1-3_2_1460",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := computeJA4T(tt.tcp)
if got != tt.want {
t.Errorf("computeJA4T() = %q, want %q", got, tt.want)
}
})
}
}
// TestFromClientHello_JA4T_Populated tests that JA4T is populated in FromClientHello.
func TestFromClientHello_JA4T_Populated(t *testing.T) {
clientHello := buildMinimalClientHelloForTest()
ch := api.TLSClientHello{
Payload: clientHello,
TCPMeta: api.TCPMeta{
WindowSize: 64240,
MSS: 1460,
WindowScale: 7,
OptionKinds: []uint8{2, 4, 8, 1, 3},
Options: []string{"MSS", "SACK", "TS", "NOP", "WS"},
},
}
engine := NewEngine()
fp, err := engine.FromClientHello(ch)
if err != nil {
t.Fatalf("FromClientHello() error = %v", err)
}
expected := "64240_2-4-8-1-3_7_1460"
if fp.JA4T != expected {
t.Errorf("JA4T = %q, want %q", fp.JA4T, expected)
}
}

View File

@ -617,6 +617,7 @@ func extractTCPMeta(tcp *layers.TCP) api.TCPMeta {
meta := api.TCPMeta{ meta := api.TCPMeta{
WindowSize: tcp.Window, WindowSize: tcp.Window,
Options: make([]string, 0, len(tcp.Options)), Options: make([]string, 0, len(tcp.Options)),
OptionKinds: make([]uint8, 0, len(tcp.Options)),
} }
// Parse TCP options // Parse TCP options
@ -635,20 +636,26 @@ func extractTCPMeta(tcp *layers.TCP) api.TCPMeta {
} else { } else {
meta.Options = append(meta.Options, "MSS_INVALID") meta.Options = append(meta.Options, "MSS_INVALID")
} }
meta.OptionKinds = append(meta.OptionKinds, uint8(opt.OptionType))
case layers.TCPOptionKindWindowScale: case layers.TCPOptionKindWindowScale:
if len(opt.OptionData) > 0 { if len(opt.OptionData) > 0 {
meta.WindowScale = opt.OptionData[0] meta.WindowScale = opt.OptionData[0]
} }
meta.Options = append(meta.Options, "WS") meta.Options = append(meta.Options, "WS")
meta.OptionKinds = append(meta.OptionKinds, uint8(opt.OptionType))
case layers.TCPOptionKindSACKPermitted: case layers.TCPOptionKindSACKPermitted:
meta.Options = append(meta.Options, "SACK") meta.Options = append(meta.Options, "SACK")
meta.OptionKinds = append(meta.OptionKinds, uint8(opt.OptionType))
case layers.TCPOptionKindSACK: case layers.TCPOptionKindSACK:
// SACK blocks (actual SACK data, not just permitted) // SACK blocks (actual SACK data, not just permitted)
meta.Options = append(meta.Options, "SACK") meta.Options = append(meta.Options, "SACK")
meta.OptionKinds = append(meta.OptionKinds, uint8(opt.OptionType))
case layers.TCPOptionKindTimestamps: case layers.TCPOptionKindTimestamps:
meta.Options = append(meta.Options, "TS") meta.Options = append(meta.Options, "TS")
meta.OptionKinds = append(meta.OptionKinds, uint8(opt.OptionType))
default: default:
meta.Options = append(meta.Options, fmt.Sprintf("OPT%d", opt.OptionType)) meta.Options = append(meta.Options, fmt.Sprintf("OPT%d", opt.OptionType))
meta.OptionKinds = append(meta.OptionKinds, uint8(opt.OptionType))
} }
} }

View File

@ -10,7 +10,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs_raw
ENGINE = MergeTree ENGINE = MergeTree
PARTITION BY toDate(ingest_time) PARTITION BY toDate(ingest_time)
ORDER BY ingest_time ORDER BY ingest_time
TTL ingest_time + INTERVAL 1 DAY TTL ingest_time + INTERVAL 2 HOUR
SETTINGS SETTINGS
index_granularity = 8192, index_granularity = 8192,
ttl_only_drop_parts = 1; ttl_only_drop_parts = 1;

View File

@ -46,6 +46,15 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
`b_timestamp` UInt64, `b_timestamp` UInt64,
`conn_id` String CODEC(ZSTD(3)), `conn_id` String CODEC(ZSTD(3)),
-- Response metadata (captured at log_transaction phase)
`status_code` UInt16 DEFAULT 0,
`response_size` UInt64 DEFAULT 0,
`duration_ms` UInt64 DEFAULT 0,
-- Header fingerprinting
`headers_raw` String DEFAULT '' CODEC(ZSTD(3)),
`header_order_signature` String DEFAULT '' CODEC(ZSTD(3)),
-- IP metadata -- IP metadata
`ip_meta_df` UInt8, `ip_meta_df` UInt8,
`ip_meta_id` UInt16, `ip_meta_id` UInt16,
@ -94,6 +103,17 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
`h2_settings_fp` String DEFAULT '' CODEC(ZSTD(3)), `h2_settings_fp` String DEFAULT '' CODEC(ZSTD(3)),
`h2_window_update` UInt32 DEFAULT 0, `h2_window_update` UInt32 DEFAULT 0,
`h2_pseudo_order` LowCardinality(String) DEFAULT '', `h2_pseudo_order` LowCardinality(String) DEFAULT '',
`h2_has_priority` UInt8 DEFAULT 0,
-- Paramètres SETTINGS HTTP/2 individuels (RFC 9113 §6.5.2)
-- -1 = absent du preface client (le client n'a pas envoyé ce paramètre)
`h2_header_table_size` Int32 DEFAULT -1,
`h2_enable_push` Int32 DEFAULT -1,
`h2_max_concurrent_streams` Int32 DEFAULT -1,
`h2_initial_window_size` Int64 DEFAULT -1,
`h2_max_frame_size` Int32 DEFAULT -1,
`h2_max_header_list_size` Int32 DEFAULT -1,
`h2_enable_connect_protocol` Int32 DEFAULT -1,
-- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent -- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent
-- les granules qui ne contiennent pas cette IP (~90% des granules en pratique). -- les granules qui ne contiennent pas cette IP (~90% des granules en pratique).
@ -104,7 +124,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
ENGINE = MergeTree ENGINE = MergeTree
PARTITION BY log_date PARTITION BY log_date
ORDER BY (time, src_ip, dst_ip, ja4) ORDER BY (time, src_ip, dst_ip, ja4)
TTL log_date + INTERVAL 7 DAY TTL log_date + INTERVAL 30 DAY
SETTINGS SETTINGS
index_granularity = 8192, index_granularity = 8192,
ttl_only_drop_parts = 1; ttl_only_drop_parts = 1;
@ -142,14 +162,19 @@ SELECT
coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme, coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme,
coalesce(JSONExtractString(raw_json, 'host'), '') AS host, coalesce(JSONExtractString(raw_json, 'host'), '') AS host,
coalesce(JSONExtractString(raw_json, 'path'), '') AS path, coalesce(JSONExtractString(raw_json, 'path'), '') AS path,
coalesce(JSONExtractString(raw_json, 'query'), '') AS query, coalesce(JSONExtractString(raw_json, 'query_string'), JSONExtractString(raw_json, 'query'), '') AS query,
coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version, coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version,
coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side, coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side,
toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated, toUInt8(coalesce(JSONExtractUInt(raw_json, 'correlated'), 0)) AS correlated,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives, toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives,
coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp, coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp,
coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp, coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp,
coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id, coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'status_code'), 0)) AS status_code,
coalesce(JSONExtractUInt(raw_json, 'response_size'), 0) AS response_size,
coalesce(JSONExtractUInt(raw_json, 'duration_ms'), 0) AS duration_ms,
coalesce(JSONExtractString(raw_json, 'headers_raw'), '') AS headers_raw,
coalesce(JSONExtractString(raw_json, 'header_order_signature'), '') AS header_order_signature,
toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df, toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0)) AS ip_meta_id, toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0)) AS ip_meta_id,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0)) AS ip_meta_total_length, toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0)) AS ip_meta_total_length,
@ -204,6 +229,16 @@ SELECT
coalesce(JSONExtractString(raw_json, 'h2_fingerprint'), '') AS h2_fingerprint, coalesce(JSONExtractString(raw_json, 'h2_fingerprint'), '') AS h2_fingerprint,
coalesce(JSONExtractString(raw_json, 'h2_settings_fp'), '') AS h2_settings_fp, coalesce(JSONExtractString(raw_json, 'h2_settings_fp'), '') AS h2_settings_fp,
toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update, toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update,
coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order,
toUInt8(coalesce(JSONExtractUInt(raw_json, 'h2_has_priority'), 0)) AS h2_has_priority,
-- Paramètres SETTINGS HTTP/2 individuels (-1 = absent du preface client)
toInt32(if(JSONHas(raw_json, 'h2_header_table_size'), JSONExtractInt(raw_json, 'h2_header_table_size'), -1)) AS h2_header_table_size,
toInt32(if(JSONHas(raw_json, 'h2_enable_push'), JSONExtractInt(raw_json, 'h2_enable_push'), -1)) AS h2_enable_push,
toInt32(if(JSONHas(raw_json, 'h2_max_concurrent_streams'), JSONExtractInt(raw_json, 'h2_max_concurrent_streams'), -1)) AS h2_max_concurrent_streams,
toInt64(if(JSONHas(raw_json, 'h2_initial_window_size'), JSONExtractInt(raw_json, 'h2_initial_window_size'), -1)) AS h2_initial_window_size,
toInt32(if(JSONHas(raw_json, 'h2_max_frame_size'), JSONExtractInt(raw_json, 'h2_max_frame_size'), -1)) AS h2_max_frame_size,
toInt32(if(JSONHas(raw_json, 'h2_max_header_list_size'), JSONExtractInt(raw_json, 'h2_max_header_list_size'), -1)) AS h2_max_header_list_size,
toInt32(if(JSONHas(raw_json, 'h2_enable_connect_protocol'), JSONExtractInt(raw_json, 'h2_enable_connect_protocol'), -1)) AS h2_enable_connect_protocol
FROM ja4_logs.http_logs_raw; FROM ja4_logs.http_logs_raw;

View File

@ -144,7 +144,10 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_host_ip_ja4_1h
) )
ENGINE = AggregatingMergeTree() ENGINE = AggregatingMergeTree()
ORDER BY (window_start, src_ip, ja4, host) ORDER BY (window_start, src_ip, ja4, host)
SETTINGS deduplicate_merge_projection_mode = 'drop'; TTL window_start + INTERVAL 7 DAY
SETTINGS
deduplicate_merge_projection_mode = 'drop',
ttl_only_drop_parts = 1;
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
@ -177,7 +180,15 @@ SELECT
sum(IF(match(src.path, '(?i)\.(png|jpg|jpeg|gif|css|js|ico|woff2|svg|eot)$'), 1, 0)) AS count_assets, sum(IF(match(src.path, '(?i)\.(png|jpg|jpeg|gif|css|js|ico|woff2|svg|eot)$'), 1, 0)) AS count_assets,
sum(IF(position(src.client_headers, 'Referer') = 0, 1, 0)) AS count_no_referer, sum(IF(position(src.client_headers, 'Referer') = 0, 1, 0)) AS count_no_referer,
uniqState(src.header_user_agent) AS uniq_ua, uniqState(src.header_user_agent) AS uniq_ua,
0 AS max_requests_per_sec, -- TODO(P0): calculer via sous-requête par seconde (impossible dans un seul GROUP BY) toUInt32(if(count() > 0,
arrayMax(
arrayMap(
s -> toUInt64(countEqual(groupArray(toStartOfSecond(src.time)), s)),
arrayDistinct(groupArray(toStartOfSecond(src.time)))
)
),
0
)) AS max_requests_per_sec,
varPopState(toFloat64(length(replaceAll(src.path, '/', '//')) - length(src.path))) AS url_depth_variance, varPopState(toFloat64(length(replaceAll(src.path, '/', '//')) - length(src.path))) AS url_depth_variance,
sum(IF(src.ip_meta_total_length < 60 OR src.ip_meta_total_length > 1500, 1, 0)) AS count_anomalous_payload, sum(IF(src.ip_meta_total_length < 60 OR src.ip_meta_total_length > 1500, 1, 0)) AS count_anomalous_payload,
uniqState(src.ja3) AS uniq_ja3, uniqState(src.ja3) AS uniq_ja3,
@ -224,7 +235,9 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_header_fingerprint_1h
sec_fetch_dest SimpleAggregateFunction(any, String) sec_fetch_dest SimpleAggregateFunction(any, String)
) )
ENGINE = AggregatingMergeTree() ENGINE = AggregatingMergeTree()
ORDER BY (window_start, src_ip); ORDER BY (window_start, src_ip)
TTL window_start + INTERVAL 7 DAY
SETTINGS ttl_only_drop_parts = 1;
DROP VIEW IF EXISTS ja4_processing.mv_agg_header_fingerprint_1h; DROP VIEW IF EXISTS ja4_processing.mv_agg_header_fingerprint_1h;
@ -249,3 +262,36 @@ SELECT
any(src.header_sec_fetch_dest) AS sec_fetch_dest any(src.header_sec_fetch_dest) AS sec_fetch_dest
FROM ja4_logs.http_logs AS src FROM ja4_logs.http_logs AS src
GROUP BY window_start, src.src_ip; GROUP BY window_start, src.src_ip;
-- -----------------------------------------------------------------------------
-- unknown_h2_fingerprints — file d'examen pour signatures H2 inconnues (§3.9.5)
--
-- Sessions dont le fingerprint H2 ne correspond à aucune famille connue
-- (browser_match_max < 0.45) mais qui présentent un comportement navigateur
-- (browser_confidence ≥ 0.55, Sec-Fetch-* présent, TLS 1.3).
-- Utilisée pour enrichir progressivement browser_signatures.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS ja4_processing.unknown_h2_fingerprints
(
observed_at DateTime DEFAULT now(),
src_ip IPv6,
ja4 String CODEC(ZSTD(3)),
h2_fingerprint String CODEC(ZSTD(3)),
h2_settings_fp String CODEC(ZSTD(3)),
h2_window_update UInt32,
h2_pseudo_order LowCardinality(String),
h2_has_priority UInt8,
browser_confidence_score Float32,
header_user_agent String CODEC(ZSTD(3)),
tls_version LowCardinality(String),
hit_count UInt64 DEFAULT 1,
INDEX idx_observed_at observed_at TYPE minmax GRANULARITY 4
)
ENGINE = ReplacingMergeTree(observed_at)
ORDER BY (h2_fingerprint, ja4, src_ip)
TTL observed_at + INTERVAL 30 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;

View File

@ -73,7 +73,7 @@ SETTINGS
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
-- ml_all_scores — all classifications (no threshold, for observability) -- ml_all_scores — all classifications (no threshold, for observability)
-- --
-- PARTITION BY date : TTL de 3 jours → les partitions expirées sont supprimées -- PARTITION BY date : TTL de 7 jours → les partitions expirées sont supprimées
-- entièrement sans avoir à lire chaque granule (ttl_only_drop_parts). -- entièrement sans avoir à lire chaque granule (ttl_only_drop_parts).
-- INDEX idx_detected_at : idem ml_detected_anomalies. -- INDEX idx_detected_at : idem ml_detected_anomalies.
-- ----------------------------------------------------------------------------- -- -----------------------------------------------------------------------------
@ -115,7 +115,7 @@ CREATE TABLE IF NOT EXISTS ja4_processing.ml_all_scores
ENGINE = ReplacingMergeTree(detected_at) ENGINE = ReplacingMergeTree(detected_at)
PARTITION BY toYYYYMMDD(window_start) PARTITION BY toYYYYMMDD(window_start)
ORDER BY (window_start, src_ip, ja4, host, model_name) ORDER BY (window_start, src_ip, ja4, host, model_name)
TTL window_start + INTERVAL 3 DAY TTL window_start + INTERVAL 7 DAY
SETTINGS SETTINGS
index_granularity = 8192, index_granularity = 8192,
ttl_only_drop_parts = 1; ttl_only_drop_parts = 1;

View File

@ -3,6 +3,9 @@
# Load mod-reqin-log # Load mod-reqin-log
LoadModule reqin_log_module modules/mod_reqin_log.so LoadModule reqin_log_module modules/mod_reqin_log.so
# Enable HTTP/2 negotiation (mod_http2 loaded by default on Rocky 9)
Protocols h2 http/1.1
# mod_remoteip: trust X-Forwarded-For from Docker internal subnets. # mod_remoteip: trust X-Forwarded-For from Docker internal subnets.
# mod_reqin_log reads r->useragent_ip which mod_remoteip updates, # mod_reqin_log reads r->useragent_ip which mod_remoteip updates,
# so the XFF IP appears as src_ip in the correlated logs. # so the XFF IP appears as src_ip in the correlated logs.