feat(dashboard): rebuild SOC dashboard + fix ClickHouse SQL

Complete rewrite of the SOC dashboard using FastAPI + Jinja2 + htmx + Chart.js + Tailwind CSS.
Replaces the old React/Vite frontend with server-rendered templates.

Dashboard pages:
- Overview: KPIs, timeline chart, threat distribution, top IPs
- Detections: paginated/filterable anomaly table
- Scores: ml_all_scores with AE error & XGB prob columns
- Traffic: HTTP logs with method/host filters
- IP Investigation: full deep-dive (scores, features, HTTP logs, classify)
- Classification: SOC feedback form + history
- Features: AI + thesis feature stats
- Models: scoring stats + model metadata

API: 9 JSON endpoints with parameterized queries, sort whitelists

SQL fixes:
- 05_aggregation_tables: add deduplicate_merge_projection_mode
- 11_views: fix nested aggregate (argMax inside sum)
- 12_thesis_features: remove invalid 'let' bindings, fix groupArrayIf type

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-08 03:21:05 +02:00
parent 228ad7026a
commit b735bab5a5
120 changed files with 1444 additions and 24933 deletions

View File

@ -110,7 +110,8 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_host_ip_ja4_1h
)
)
ENGINE = AggregatingMergeTree()
ORDER BY (window_start, src_ip, ja4, host);
ORDER BY (window_start, src_ip, ja4, host)
SETTINGS deduplicate_merge_projection_mode = 'drop';
-- -----------------------------------------------------------------------------

View File

@ -28,17 +28,20 @@ CREATE OR REPLACE VIEW ja4_processing.view_form_bruteforce_detected AS
SELECT
src_ip,
host,
-- JA4 le plus fréquent pour ce couple IP+hôte
argMax(any(tcp_fp_raw), sum(hits)) AS ja4,
-- Nombre total de requêtes (toutes méthodes)
sum(hits) AS hits,
-- Proxy pour les soumissions de formulaire : nombre de requêtes POST
-- (count_post = SimpleAggregateFunction(sum) dans agg_host_ip_ja4_1h)
sum(count_post) AS query_params_count
FROM ja4_processing.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
-- JA4 avec le plus de hits pour ce couple IP+hôte (from subquery)
argMax(ja4, ja4_hits) AS ja4,
sum(ja4_hits) AS hits,
sum(ja4_posts) AS query_params_count
FROM (
SELECT
src_ip, host, ja4,
sum(hits) AS ja4_hits,
sum(count_post) AS ja4_posts
FROM ja4_processing.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip, host, ja4
) sub
GROUP BY src_ip, host
-- Seuil : au moins 10 POST pour qualifier d'activité brute-force
HAVING query_params_count >= 10;

View File

@ -75,7 +75,7 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_request_timing_1h
ja4 LowCardinality(String),
host LowCardinality(String),
-- Timestamps nanoseconde (a_timestamp de mod_reqin_log)
request_times AggregateFunction(groupArray(500), UInt64)
request_times AggregateFunction(groupArrayIf(500), UInt64, UInt8)
)
ENGINE = AggregatingMergeTree()
PARTITION BY toDate(window_start)
@ -323,18 +323,17 @@ cadence_features AS (
-- Benford P(d) = log10(1 + 1/d) pour d=1..9
if(
length(deltas_ms) >= 10,
(
let benford_expected = [0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046],
let first_digits = arrayMap(x -> toUInt8(substring(toString(toUInt64(greatest(abs(x), 1))), 1, 1)), deltas_ms),
let n = toFloat64(length(first_digits)),
arraySum(
arrayMap(
d -> pow(
(toFloat64(arrayCount(x -> x = d, first_digits)) / n) - benford_expected[d],
2
) / benford_expected[d],
[1, 2, 3, 4, 5, 6, 7, 8, 9]
)
arraySum(
arrayMap(
d -> pow(
(toFloat64(arrayCount(
x -> x = d,
arrayMap(v -> toUInt8(substring(toString(toUInt64(greatest(abs(v), 1))), 1, 1)), deltas_ms)
)) / toFloat64(length(deltas_ms)))
- [0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046][d],
2
) / [0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046][d],
[1, 2, 3, 4, 5, 6, 7, 8, 9]
)
),
0.0