diff --git a/docs/services/dashboard.md b/docs/services/dashboard.md index ac5584a..cc7837c 100644 --- a/docs/services/dashboard.md +++ b/docs/services/dashboard.md @@ -1,6 +1,6 @@ # Dashboard -Application web SOC (Security Operations Center) construite avec **FastAPI + Jinja2 + htmx**, +Application web SOC (Security Operations Center) construite avec **FastAPI + Jinja2 + ECharts**, offrant la visualisation en temps réel, l'investigation et l'analyse des détections de bots générées par le [bot-detector](bot-detector.md). Interroge ClickHouse sur deux bases de données (`ja4_processing` et `ja4_logs`). @@ -13,8 +13,8 @@ générées par le [bot-detector](bot-detector.md). Interroge ClickHouse sur deu |-----------|-------------| | Backend | Python 3.11 + FastAPI | | Templates | Jinja2 (rendu côté serveur) | -| Interactions dynamiques | htmx (mises à jour partielles via JSON API) | -| Graphiques | Chart.js + ECharts | +| Interactions dynamiques | Vanilla `fetch()` (appels JSON API avec rechargement partiel côté JS) | +| Graphiques | ECharts 5.5 (CDN) | | Style | Tailwind CSS (CDN) | | Base de données | ClickHouse via `clickhouse-connect` (client propre, **PAS** `ja4_common`) | | Documentation API | Swagger UI (`/docs`) + OpenAPI JSON (`/openapi.json`) | diff --git a/services/bot-detector/bot_detector/models.py b/services/bot-detector/bot_detector/models.py index 681bc38..59f891e 100644 --- a/services/bot-detector/bot_detector/models.py +++ b/services/bot-detector/bot_detector/models.py @@ -89,8 +89,9 @@ class TrafficAutoEncoder: self._scaler_range = None def _build_model(self): - dim1 = min(64, max(self.n_features, self.latent_dim + 4)) - dim2 = min(32, max(dim1 // 2, self.latent_dim + 2)) + # Architecture fixe n→64→32→16→32→64→n (§2.4.3 thèse) + dim1 = 64 + dim2 = 32 self.encoder = nn.Sequential( nn.Linear(self.n_features, dim1), nn.BatchNorm1d(dim1), nn.ReLU(), nn.Linear(dim1, dim2), nn.BatchNorm1d(dim2), nn.ReLU(), diff --git a/services/correlator/internal/adapters/inbound/unixsocket/source.go b/services/correlator/internal/adapters/inbound/unixsocket/source.go index 55f663a..7afc667 100644 --- a/services/correlator/internal/adapters/inbound/unixsocket/source.go +++ b/services/correlator/internal/adapters/inbound/unixsocket/source.go @@ -255,9 +255,13 @@ func parseJSONEvent(data []byte, sourceType string) (*domain.NormalizedEvent, er // Extract timestamp based on source contract switch event.Source { case domain.SourceA: - ts, ok := getInt64(raw, "timestamp") + ts, ok := getInt64(raw, "timestamp_ns") if !ok { - return nil, fmt.Errorf("missing required numeric field: timestamp for source A") + // Fallback to legacy "timestamp" field name + ts, ok = getInt64(raw, "timestamp") + } + if !ok { + return nil, fmt.Errorf("missing required numeric field: timestamp/timestamp_ns for source A") } // Assume nanoseconds event.Timestamp = time.Unix(0, ts) diff --git a/services/correlator/internal/adapters/outbound/clickhouse/sink_test.go b/services/correlator/internal/adapters/outbound/clickhouse/sink_test.go index e4872c8..56e8f6f 100644 --- a/services/correlator/internal/adapters/outbound/clickhouse/sink_test.go +++ b/services/correlator/internal/adapters/outbound/clickhouse/sink_test.go @@ -196,7 +196,7 @@ func TestClickHouseSink_BufferManagement(t *testing.T) { log := domain.CorrelatedLog{ SrcIP: "192.168.1.1", SrcPort: 8080, - Correlated: true, + Correlated: 1, } s := &ClickHouseSink{ @@ -527,7 +527,7 @@ func BenchmarkClickHouseSink_Write(b *testing.B) { Timestamp: time.Now(), SrcIP: "192.168.1.1", SrcPort: 8080, - Correlated: true, + Correlated: 1, } ctx := context.Background() diff --git a/services/correlator/internal/adapters/outbound/file/sink_test.go b/services/correlator/internal/adapters/outbound/file/sink_test.go index 9168d45..827f657 100644 --- a/services/correlator/internal/adapters/outbound/file/sink_test.go +++ b/services/correlator/internal/adapters/outbound/file/sink_test.go @@ -22,7 +22,7 @@ func TestFileSink_Write(t *testing.T) { log := domain.CorrelatedLog{ SrcIP: "192.168.1.1", SrcPort: 8080, - Correlated: true, + Correlated: 1, } if err := sink.Write(context.Background(), log); err != nil { @@ -57,7 +57,7 @@ func TestFileSink_WriteImmediatePersist_NoFlushNeeded(t *testing.T) { log := domain.CorrelatedLog{ SrcIP: "192.168.1.1", SrcPort: 8080, - Correlated: true, + Correlated: 1, } if err := sink.Write(context.Background(), log); err != nil { diff --git a/services/correlator/internal/adapters/outbound/stdout/sink_test.go b/services/correlator/internal/adapters/outbound/stdout/sink_test.go index 1b5ccd2..ced00e4 100644 --- a/services/correlator/internal/adapters/outbound/stdout/sink_test.go +++ b/services/correlator/internal/adapters/outbound/stdout/sink_test.go @@ -10,7 +10,7 @@ import ( "github.com/antitbone/ja4/correlator/internal/domain" ) -func makeLog(correlated bool) domain.CorrelatedLog { +func makeLog(correlated int) domain.CorrelatedLog { return domain.CorrelatedLog{ Timestamp: time.Unix(1700000000, 0), SrcIP: "1.2.3.4", @@ -53,10 +53,10 @@ func TestStdoutSink_WriteDoesNotProduceOutput(t *testing.T) { s := NewStdoutSink(Config{Enabled: true}) got := captureStdout(t, func() { - if err := s.Write(context.Background(), makeLog(true)); err != nil { + if err := s.Write(context.Background(), makeLog(1)); err != nil { t.Fatalf("Write(correlated) returned error: %v", err) } - if err := s.Write(context.Background(), makeLog(false)); err != nil { + if err := s.Write(context.Background(), makeLog(0)); err != nil { t.Fatalf("Write(orphan) returned error: %v", err) } }) diff --git a/services/correlator/internal/config/config.go b/services/correlator/internal/config/config.go index 2402c99..c3701de 100644 --- a/services/correlator/internal/config/config.go +++ b/services/correlator/internal/config/config.go @@ -9,6 +9,7 @@ import ( "time" "github.com/antitbone/ja4/correlator/internal/domain" + ja4config "github.com/antitbone/ja4/ja4common/config" "gopkg.in/yaml.v3" ) @@ -29,7 +30,7 @@ type MetricsConfig struct { // LogConfig holds logging configuration. type LogConfig struct { - Level string `yaml:"level"` // DEBUG, INFO, WARN, ERROR + Level string `yaml:"level" env:"LOG_LEVEL"` // DEBUG, INFO, WARN, ERROR } // GetLogLevel returns the log level, defaulting to INFO if not set. @@ -75,15 +76,15 @@ type FileOutputConfig struct { // ClickHouseOutputConfig holds ClickHouse sink configuration. type ClickHouseOutputConfig struct { - Enabled bool `yaml:"enabled"` - DSN string `yaml:"dsn"` - Table string `yaml:"table"` - BatchSize int `yaml:"batch_size"` - FlushIntervalMs int `yaml:"flush_interval_ms"` - MaxBufferSize int `yaml:"max_buffer_size"` + Enabled bool `yaml:"enabled" env:"CLICKHOUSE_ENABLED"` + DSN string `yaml:"dsn" env:"CLICKHOUSE_DSN"` + Table string `yaml:"table" env:"CLICKHOUSE_TABLE"` + BatchSize int `yaml:"batch_size" env:"CLICKHOUSE_BATCH_SIZE"` + FlushIntervalMs int `yaml:"flush_interval_ms" env:"CLICKHOUSE_FLUSH_INTERVAL_MS"` + MaxBufferSize int `yaml:"max_buffer_size" env:"CLICKHOUSE_MAX_BUFFER_SIZE"` DropOnOverflow bool `yaml:"drop_on_overflow"` AsyncInsert bool `yaml:"async_insert"` - TimeoutMs int `yaml:"timeout_ms"` + TimeoutMs int `yaml:"timeout_ms" env:"CLICKHOUSE_TIMEOUT_MS"` } // StdoutOutputConfig holds stdout sink configuration. @@ -165,6 +166,11 @@ func Load(path string) (*Config, error) { return nil, fmt.Errorf("failed to parse config file: %w", err) } + // Surcharge par variables d'environnement (préfixe LOGCORRELATOR_) + if err := ja4config.OverrideFromEnv(cfg, "LOGCORRELATOR"); err != nil { + return nil, fmt.Errorf("failed to apply env overrides: %w", err) + } + if err := cfg.Validate(); err != nil { return nil, fmt.Errorf("invalid config: %w", err) } diff --git a/services/correlator/internal/domain/correlated_log.go b/services/correlator/internal/domain/correlated_log.go index 0a94476..12b06b8 100644 --- a/services/correlator/internal/domain/correlated_log.go +++ b/services/correlator/internal/domain/correlated_log.go @@ -14,7 +14,7 @@ type CorrelatedLog struct { SrcPort int `json:"src_port"` DstIP string `json:"dst_ip,omitempty"` DstPort int `json:"dst_port,omitempty"` - Correlated bool `json:"correlated"` + Correlated int `json:"correlated"` // 0 = orphelin, 1 = corrélé OrphanSide string `json:"orphan_side,omitempty"` Fields map[string]any `json:"-"` // Additional fields, merged at marshal time } @@ -71,7 +71,7 @@ func NewCorrelatedLogFromEvent(event *NormalizedEvent, orphanSide string) Correl SrcPort: event.SrcPort, DstIP: event.DstIP, DstPort: event.DstPort, - Correlated: false, + Correlated: 0, OrphanSide: orphanSide, Fields: fields, } @@ -95,7 +95,7 @@ func NewCorrelatedLog(apacheEvent, networkEvent *NormalizedEvent) CorrelatedLog SrcPort: apacheEvent.SrcPort, DstIP: coalesceString(apacheEvent.DstIP, networkEvent.DstIP), DstPort: coalesceInt(apacheEvent.DstPort, networkEvent.DstPort), - Correlated: true, + Correlated: 1, OrphanSide: "", Fields: fields, } diff --git a/services/correlator/internal/domain/correlated_log_test.go b/services/correlator/internal/domain/correlated_log_test.go index e11879f..ace732a 100644 --- a/services/correlator/internal/domain/correlated_log_test.go +++ b/services/correlator/internal/domain/correlated_log_test.go @@ -64,8 +64,8 @@ func TestNewCorrelatedLogFromEvent(t *testing.T) { log := NewCorrelatedLogFromEvent(event, "A") - if log.Correlated { - t.Error("expected correlated to be false") + if log.Correlated != 0 { + t.Error("expected correlated to be 0") } if log.OrphanSide != "A" { t.Errorf("expected orphan_side A, got %s", log.OrphanSide) @@ -101,8 +101,8 @@ func TestNewCorrelatedLog(t *testing.T) { log := NewCorrelatedLog(apacheEvent, networkEvent) - if !log.Correlated { - t.Error("expected correlated to be true") + if log.Correlated != 1 { + t.Error("expected correlated to be 1") } if log.OrphanSide != "" { t.Errorf("expected orphan_side to be empty, got %s", log.OrphanSide) @@ -273,7 +273,7 @@ func TestMarshalJSON_ReservedKeyProtection(t *testing.T) { Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), SrcIP: "1.2.3.4", SrcPort: 1234, - Correlated: true, + Correlated: 1, Fields: map[string]any{ "src_ip": "EVIL_OVERRIDE", // should be ignored "correlated": false, // should be ignored @@ -294,7 +294,7 @@ func TestMarshalJSON_ReservedKeyProtection(t *testing.T) { if flat["src_ip"] != "1.2.3.4" { t.Errorf("reserved key src_ip should not be overwritten, got %v", flat["src_ip"]) } - if flat["correlated"] != true { + if flat["correlated"] != float64(1) { t.Errorf("reserved key correlated should not be overwritten, got %v", flat["correlated"]) } if flat["extra"] != "value" { @@ -308,7 +308,7 @@ func TestMarshalJSON_OptionalFieldsOmittedWhenZero(t *testing.T) { Timestamp: time.Now(), SrcIP: "1.2.3.4", SrcPort: 1234, - Correlated: false, + Correlated: 0, } data, err := json.Marshal(log) diff --git a/services/correlator/internal/domain/correlation_service_test.go b/services/correlator/internal/domain/correlation_service_test.go index a98ed92..05b57a1 100644 --- a/services/correlator/internal/domain/correlation_service_test.go +++ b/services/correlator/internal/domain/correlation_service_test.go @@ -57,7 +57,7 @@ func TestCorrelationService_Match(t *testing.T) { results = svc.ProcessEvent(networkEvent) if len(results) != 1 { t.Errorf("expected 1 result (correlated), got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result") } } @@ -376,7 +376,7 @@ func TestCorrelationService_DifferentSourceTypes(t *testing.T) { results = svc.ProcessEvent(apacheEvent) if len(results) < 1 { t.Errorf("expected at least 1 result (correlated), got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result") } } @@ -455,7 +455,7 @@ func TestCorrelationService_OneToMany_KeepAlive(t *testing.T) { results = svc.ProcessEvent(apacheEvent1) if len(results) != 1 { t.Errorf("expected 1 correlated result for first A, got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result for first A") } @@ -470,7 +470,7 @@ func TestCorrelationService_OneToMany_KeepAlive(t *testing.T) { results = svc.ProcessEvent(apacheEvent2) if len(results) != 1 { t.Errorf("expected 1 correlated result for second A (Keep-Alive), got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result for second A (Keep-Alive)") } @@ -654,7 +654,7 @@ func TestCorrelationService_KeepAlive_TTLNotBasedOnEventTimestamp(t *testing.T) SrcPort: 8080, } results := svc.ProcessEvent(apacheEvent1) - if len(results) != 1 || !results[0].Correlated { + if len(results) != 1 || results[0].Correlated == 0 { t.Fatalf("expected 1 correlated result, got %d", len(results)) } @@ -667,7 +667,7 @@ func TestCorrelationService_KeepAlive_TTLNotBasedOnEventTimestamp(t *testing.T) SrcPort: 8080, } results = svc.ProcessEvent(apacheEvent2) - if len(results) != 1 || !results[0].Correlated { + if len(results) != 1 || results[0].Correlated == 0 { t.Fatalf("expected 1 correlated result (Keep-Alive), got %d", len(results)) } @@ -740,7 +740,7 @@ func TestCorrelationService_KeepAlive_LongSession(t *testing.T) { Raw: map[string]any{"method": "GET", "path": fmt.Sprintf("/api/%d", i)}, } results := svc.ProcessEvent(apacheEvent) - if len(results) != 1 || !results[0].Correlated { + if len(results) != 1 || results[0].Correlated == 0 { t.Errorf("Request %d at t=%ds (A timestamp t=%v): expected correlation, got %d results", i, i*5, now.Add(time.Duration(i)*500*time.Millisecond), len(results)) } @@ -808,7 +808,7 @@ func TestCorrelationService_ALateThanB_WithinTimeWindow(t *testing.T) { results = svc.ProcessEvent(apacheEvent) if len(results) != 1 { t.Errorf("expected 1 correlated result, got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result") } } @@ -866,7 +866,7 @@ func TestCorrelationService_ALateThanB_AExpiredTooSoon(t *testing.T) { results = svc.ProcessEvent(networkEvent) if len(results) != 1 { t.Errorf("expected 1 correlated result, got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result") } } @@ -921,7 +921,7 @@ func TestCorrelationService_Flush_CorrelatesRemainingEvents(t *testing.T) { flushed := svc.Flush() if len(flushed) != 1 { t.Errorf("expected 1 flushed correlated result, got %d", len(flushed)) - } else if flushed[0].Correlated { + } else if flushed[0].Correlated != 0 { // Good - it's correlated } else { t.Errorf("expected correlated result, got orphan side %s", flushed[0].OrphanSide) @@ -1032,7 +1032,7 @@ func TestCorrelationService_CleanA_RespectsBTTL(t *testing.T) { Raw: map[string]any{"method": "GET"}, } results := svc.ProcessEvent(apacheEvent) - if len(results) != 1 || !results[0].Correlated { + if len(results) != 1 || results[0].Correlated == 0 { t.Fatalf("expected 1 correlated result, got %d", len(results)) } @@ -1127,7 +1127,7 @@ func TestCorrelationService_ApacheEmitDelay_BArrivesDuringDelay(t *testing.T) { results = svc.ProcessEvent(networkEvent) if len(results) != 1 { t.Errorf("expected 1 correlated result, got %d", len(results)) - } else if !results[0].Correlated { + } else if results[0].Correlated == 0 { t.Error("expected correlated result") } @@ -1347,7 +1347,7 @@ results = svc.ProcessEvent(aEvent) if len(results) != 1 { t.Fatalf("expected 1 correlation, got %d", len(results)) } -if !results[0].Correlated { +if results[0].Correlated == 0 { t.Error("expected correlated=true") } } @@ -1371,7 +1371,7 @@ results := svc.ProcessEvent(aEvent) if len(results) != 1 { t.Fatalf("expected 1 result (orphan A, dest port filtered), got %d", len(results)) } -if results[0].Correlated { +if results[0].Correlated != 0 { t.Errorf("expected Correlated=false for dest-port-filtered A event") } if results[0].OrphanSide != "A" { @@ -1438,7 +1438,7 @@ Source: SourceA, Timestamp: now, SrcIP: "1.2.3.4", SrcPort: 1234, DstPort: 9999, } results := svc.ProcessEvent(aEvent) -if len(results) != 1 || !results[0].Correlated { +if len(results) != 1 || results[0].Correlated == 0 { t.Errorf("expected 1 correlation on any port when list is empty, got %d", len(results)) } } @@ -1614,9 +1614,9 @@ SrcPort: 5555, } timeProvider.now = now.Add(200 * time.Millisecond) results = svc.ProcessEvent(b) -if len(results) != 1 || !results[0].Correlated { +if len(results) != 1 || results[0].Correlated == 0 { t.Fatalf("B: expected 1 correlated result (A1+B), got %d correlated=%v", -len(results), len(results) > 0 && results[0].Correlated) +len(results), len(results) > 0 && results[0].Correlated != 0) } // A2 arrives on the same Keep-Alive connection — B must still be in buffer @@ -1632,7 +1632,7 @@ results = svc.ProcessEvent(a2) // A2 should correlate with B (still in buffer in one_to_many mode) correlated := false for _, r := range results { -if r.Correlated { +if r.Correlated != 0 { correlated = true } } @@ -1679,7 +1679,7 @@ Timestamp: tp.now, SrcIP: "91.224.92.185", SrcPort: 53471, } -if results := svc.ProcessEvent(a1); len(results) != 1 || !results[0].Correlated { +if results := svc.ProcessEvent(a1); len(results) != 1 || results[0].Correlated == 0 { t.Fatalf("A seq=1: expected 1 correlated result, got %d", len(svc.ProcessEvent(a1))) } @@ -1695,7 +1695,7 @@ results := svc.ProcessEvent(a10) correlated := false for _, r := range results { -if r.Correlated { +if r.Correlated != 0 { correlated = true } } @@ -1748,7 +1748,7 @@ results := svc.ProcessEvent(b) correlated := false for _, r := range results { -if r.Correlated { +if r.Correlated != 0 { correlated = true } } @@ -1809,7 +1809,7 @@ func TestBTTLExpiry_PurgesPendingOrphans(t *testing.T) { // The orphan must have been returned (not silently lost) — data-loss fix orphanFound := false for _, r := range returned { - if !r.Correlated && r.SrcIP == "10.9.9.9" { + if r.Correlated == 0 && r.SrcIP == "10.9.9.9" { orphanFound = true } } @@ -1854,7 +1854,7 @@ emitted := svc.EmitPendingOrphans() if len(emitted) != 1 { t.Fatalf("after delay: expected 1 emitted orphan, got %d", len(emitted)) } -if emitted[0].Correlated { +if emitted[0].Correlated != 0 { t.Errorf("expected orphan (Correlated=false), got Correlated=true") } diff --git a/services/correlator/sql/migrations/04_http2_fields.sql b/services/correlator/sql/migrations/04_http2_fields.sql index b002c45..4054ce0 100644 --- a/services/correlator/sql/migrations/04_http2_fields.sql +++ b/services/correlator/sql/migrations/04_http2_fields.sql @@ -1,8 +1,8 @@ -- === 04_http2_fields.sql — Ajout des colonnes HTTP/2 à http_logs === -- --- Migration pour les déploiements existants : ajoute les 4 colonnes de --- fingerprint HTTP/2 passif extraites par mod_reqin_log via son filtre --- de connexion (APR_HOOK_LAST, AP_FTYPE_CONNECTION). +-- Migration pour les déploiements existants : ajoute les colonnes de +-- fingerprint HTTP/2 passif extraites par mod_reqin_log via son hook +-- process_connection (APR_HOOK_FIRST, AP_MODE_SPECULATIVE). -- -- Format du fingerprint Akamai (h2_fingerprint) : -- Chrome : "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p" @@ -12,6 +12,7 @@ -- Appliquer avec : -- clickhouse-client --multiquery < 04_http2_fields.sql +-- Champs composites (fingerprint global + valeurs agrégées) ALTER TABLE ja4_logs.http_logs ADD COLUMN IF NOT EXISTS `h2_fingerprint` String DEFAULT '' CODEC(ZSTD(3)); @@ -23,3 +24,29 @@ ALTER TABLE ja4_logs.http_logs ALTER TABLE ja4_logs.http_logs ADD COLUMN IF NOT EXISTS `h2_pseudo_order` LowCardinality(String) DEFAULT ''; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_has_priority` UInt8 DEFAULT 0; + +-- Paramètres SETTINGS individuels (RFC 9113 §6.5.2) +-- Valeur -1 = paramètre absent du preface client (non envoyé) +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_header_table_size` Int32 DEFAULT -1; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_enable_push` Int32 DEFAULT -1; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_max_concurrent_streams` Int32 DEFAULT -1; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_initial_window_size` Int64 DEFAULT -1; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_max_frame_size` Int32 DEFAULT -1; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_max_header_list_size` Int32 DEFAULT -1; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_enable_connect_protocol` Int32 DEFAULT -1; diff --git a/services/dashboard/backend/routes/api.py b/services/dashboard/backend/routes/api.py index 9d7049c..c585170 100644 --- a/services/dashboard/backend/routes/api.py +++ b/services/dashboard/backend/routes/api.py @@ -5,6 +5,8 @@ from __future__ import annotations import json import logging import os +import re +from collections import defaultdict from pathlib import Path from typing import Any @@ -21,6 +23,34 @@ router = APIRouter(prefix="/api") _DB = safe_identifier(DB_PROCESSING) _DB_LOGS = safe_identifier(DB_LOGS) +# Regex pour extraire les features SHAP/ExIFFI depuis le champ reason +# Format: "SHAP: feat1(+0.123) | feat2(-0.456)" ou "ExIFFI: ..." +_SHAP_RE = re.compile(r"(?:SHAP|ExIFFI):\s*(.+?)(?:\s*\|\s*Threat|$)") +_FEAT_RE = re.compile(r"(\w+)\(([+-]?\d+\.\d+)\)") + + +def _aggregate_shap_importance(reasons: list[str]) -> list[dict]: + """Agrège les valeurs SHAP/ExIFFI extraites des champs reason.""" + totals: dict[str, float] = defaultdict(float) + counts: dict[str, int] = defaultdict(int) + for reason in reasons: + m = _SHAP_RE.search(reason or "") + if not m: + continue + for feat_match in _FEAT_RE.finditer(m.group(1)): + name = feat_match.group(1) + val = abs(float(feat_match.group(2))) + totals[name] += val + counts[name] += 1 + if not totals: + return [] + return sorted( + [{"name": k, "importance": round(totals[k] / counts[k], 4), "occurrences": counts[k]} + for k in totals], + key=lambda x: -x["importance"], + ) + + # Whitelists for sort/order to prevent SQL injection _DETECTION_SORT_COLS = { "detected_at", "src_ip", "ja4", "host", "anomaly_score", @@ -500,7 +530,7 @@ async def features() -> dict[str, Any]: except Exception: pass - # Feature variance (importance proxy) + # Feature variance (importance proxy — fallback si SHAP indisponible) try: variance_rows = query( f"SELECT " @@ -523,6 +553,22 @@ async def features() -> dict[str, Any]: except Exception: pass + # SHAP/ExIFFI — importance réelle extraite des anomalies détectées + try: + reason_rows = query( + f"SELECT reason FROM {_DB}.ml_detected_anomalies " + "WHERE reason LIKE '%SHAP:%' OR reason LIKE '%ExIFFI:%' " + "ORDER BY detected_at DESC LIMIT 500" + ) + if reason_rows: + shap_importance = _aggregate_shap_importance( + [r["reason"] for r in reason_rows] + ) + if shap_importance: + result["shap_importance"] = shap_importance + except Exception: + logger.debug("SHAP importance extraction unavailable") + return result @@ -846,11 +892,11 @@ async def classify_suggested() -> dict[str, Any]: # --------------------------------------------------------------------------- class ClassifyRequest(BaseModel): src_ip: str - classification: str # bot | legitimate | suspicious + classification: str # true_positive | false_positive | suspicious comment: str = "" -_VALID_CLASSIFICATIONS = {"bot", "legitimate", "suspicious"} +_VALID_CLASSIFICATIONS = {"true_positive", "false_positive", "suspicious"} _feedback_table_ensured = False diff --git a/services/dashboard/backend/templates/classify.html b/services/dashboard/backend/templates/classify.html index 0606a74..858b427 100644 --- a/services/dashboard/backend/templates/classify.html +++ b/services/dashboard/backend/templates/classify.html @@ -6,7 +6,7 @@
Classifiez les IPs pour entraîner le modèle XGBoost supervisé. Les labels sont utilisés au prochain cycle ML.
Workflow : 1. Consultez les IPs suggérées (non classifiées). 2. Classifiez-les. 3. Les labels alimentent XGBoost au prochain cycle.
-Bot : Confirme une IP malveillante. Légitime : Faux positif. Suspect : À surveiller.
+Vrai positif : Confirme un bot détecté. Faux positif : Trafic légitime mal détecté. Suspect : À surveiller.
Source : soc_feedback → XGBoost training
{% endblock %} @@ -15,8 +15,8 @@Variance inter-classe (ISP vs datacenter) de chaque feature. Les features à haute variance discriminent le mieux bots et humains.
-Usage : Les features en tête sont les plus utiles pour le modèle EIF. Celles à variance nulle sont élaguées automatiquement.
-Source : view_ai_features_1h
+Importance moyenne des features issue de SHAP (XGBoost) ou ExIFFI (EIF). Chaque barre représente la contribution absolue moyenne d'une feature aux décisions d'anomalie récentes.
+Fallback : Si aucune donnée SHAP/ExIFFI n'est disponible, la variance inter-classe (proxy statistique) est affichée à la place.
+Source : ml_detected_anomalies.reason (SHAP/ExIFFI) ou view_ai_features_1h (variance)