Files
logcorrelator/sql/mv1.sql
toto 5df2fd965b
Some checks failed
Build and Test / test (push) Has been cancelled
Build and Test / build (push) Has been cancelled
Build and Test / docker (push) Has been cancelled
view
2026-03-09 08:37:59 +01:00

155 lines
6.9 KiB
SQL

-- ============================================================================
-- PROJET : Moteur de Détection de Menaces HTTP (Full Spectrum)
-- DESCRIPTION : Configuration complète des tables d'agrégation et du scoring.
-- COUVRE : Spoofing UA/TLS, TCP Fingerprinting, Anomalies comportementales.
-- DATE : 2026-03-08
-- ============================================================================
-- ----------------------------------------------------------------------------
-- 1. NETTOYAGE (Ordre inverse des dépendances)
-- ----------------------------------------------------------------------------
DROP VIEW IF EXISTS mabase_prod.live_threat_scores;
DROP VIEW IF EXISTS mabase_prod.mv_baseline_update;
DROP VIEW IF EXISTS mabase_prod.mv_novelty;
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1d;
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1h;
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1m;
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1d;
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1h;
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1m;
-- ----------------------------------------------------------------------------
-- 2. TABLES DE DESTINATION (STORAGE)
-- ----------------------------------------------------------------------------
CREATE TABLE mabase_prod.agg_traffic_1m (
minute DateTime,
host LowCardinality(String),
src_ip IPv4,
src_asn UInt32,
src_country_code LowCardinality(String),
ja4 String,
ja3_hash String,
header_user_agent String,
-- Métriques de Base
hits AggregateFunction(count, UInt64),
uniq_paths AggregateFunction(uniq, String),
-- Couche 4 : TCP & Handshake
avg_syn_to_clienthello_ms AggregateFunction(avg, Int32),
var_syn_to_clienthello_ms AggregateFunction(varPop, Int32),
tcp_fingerprint AggregateFunction(uniq, UInt64), -- MSS + Window + Scale
-- Couche 7 : HTTP Fingerprinting
avg_headers_count AggregateFunction(avg, Float64),
host_sni_mismatch AggregateFunction(countIf, UInt8),
-- Détection Spoofing & Incohérences
spoofing_ua_tls AggregateFunction(countIf, UInt8),
spoofing_ua_alpn AggregateFunction(countIf, UInt8),
spoofing_os_ttl AggregateFunction(countIf, UInt8),
missing_human_headers AggregateFunction(countIf, UInt8),
-- Comportement & Payloads
sensitive_path_hits AggregateFunction(countIf, UInt8),
suspicious_methods AggregateFunction(countIf, UInt8),
suspicious_queries AggregateFunction(countIf, UInt8)
) ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(minute)
ORDER BY (host, ja4, src_ip, minute);
-- Tables 1h et 1d (Simplifiées pour le stockage long terme)
CREATE TABLE mabase_prod.agg_traffic_1h (
hour DateTime,
host LowCardinality(String),
src_country_code LowCardinality(String),
ja4 String,
hits AggregateFunction(count, UInt64),
uniq_ips AggregateFunction(uniq, IPv4)
) ENGINE = AggregatingMergeTree() ORDER BY (host, ja4, hour);
CREATE TABLE mabase_prod.agg_traffic_1d (
day Date,
host LowCardinality(String),
ja4 String,
hits AggregateFunction(count, UInt64),
uniq_ips AggregateFunction(uniq, IPv4)
) ENGINE = AggregatingMergeTree() ORDER BY (host, ja4, day);
-- ----------------------------------------------------------------------------
-- 3. VUES MATÉRIALISÉES (MOTEUR DE CALCUL)
-- ----------------------------------------------------------------------------
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1m TO mabase_prod.agg_traffic_1m
AS SELECT
toStartOfMinute(time) AS minute,
host, src_ip, src_asn, src_country_code, ja4, ja3_hash, header_user_agent,
countState() AS hits,
uniqState(path) AS uniq_paths,
avgState(syn_to_clienthello_ms) AS avg_syn_to_clienthello_ms,
varPopState(syn_to_clienthello_ms) AS var_syn_to_clienthello_ms,
-- TCP Fingerprint Hash
uniqState(cityHash64(toString(tcp_meta_mss), toString(tcp_meta_window_size), toString(tcp_meta_window_scale))) AS tcp_fingerprint,
-- HTTP Metrics
avgState(toFloat64(length(client_headers) - length(replaceAll(client_headers, ',', '')) + 1)) AS avg_headers_count,
countIfState(host != tls_sni AND tls_sni != '') AS host_sni_mismatch,
-- Spoofing Logic
countIfState((header_user_agent ILIKE '%Chrome%') AND (ja4 NOT ILIKE 't13d%')) AS spoofing_ua_tls,
countIfState((header_user_agent ILIKE '%Chrome%') AND (tls_alpn NOT ILIKE '%h2%')) AS spoofing_ua_alpn,
countIfState((header_user_agent ILIKE '%Windows%') AND (ip_meta_ttl <= 64)) AS spoofing_os_ttl,
countIfState((header_user_agent ILIKE '%Mozilla%') AND (header_sec_ch_ua = '')) AS missing_human_headers,
-- Behavior & Payloads
countIfState(match(path, 'login|auth|admin|password|config|wp-admin|api/v[0-9]/auth')) AS sensitive_path_hits,
countIfState(method IN ('PUT', 'DELETE', 'OPTIONS', 'TRACE')) AS suspicious_methods,
countIfState((length(query) > 250) OR match(query, '(<script|union|select|etc/passwd|%00)')) AS suspicious_queries
FROM mabase_prod.http_logs
GROUP BY minute, host, src_ip, src_asn, src_country_code, ja4, ja3_hash, header_user_agent;
-- Cascading to 1h
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1h TO mabase_prod.agg_traffic_1h
AS SELECT toStartOfHour(minute) AS hour, host, src_country_code, ja4, countMergeState(hits) AS hits, uniqState(src_ip) AS uniq_ips
FROM mabase_prod.agg_traffic_1m GROUP BY hour, host, src_country_code, ja4;
-- ----------------------------------------------------------------------------
-- 4. VUE DE SCORING FINAL (VERDICT)
-- ----------------------------------------------------------------------------
CREATE VIEW mabase_prod.live_threat_scores AS
SELECT
T1.src_ip,
T1.ja4,
T1.src_asn,
T1.src_country_code,
(
-- 1. Incohérences de Signature (Poids Fort : 40-50)
if(countMerge(T1.spoofing_ua_tls) > 0, 50, 0) +
if(countMerge(T1.spoofing_os_ttl) > 0, 40, 0) +
if(countMerge(T1.host_sni_mismatch) > 0, 45, 0) +
if(countMerge(T1.missing_human_headers) > 0, 30, 0) +
-- 2. Anomalies Réseau (Poids Moyen : 20-30)
if(varPopMerge(T1.var_syn_to_clienthello_ms) < 0.5 AND countMerge(T1.hits) > 5, 30, 0) +
if(avgMerge(T1.avg_headers_count) < 6, 25, 0) +
-- 3. Comportement (Poids Variable)
if(countMerge(T1.sensitive_path_hits) > 5, 40, 0) +
if(countMerge(T1.suspicious_queries) > 0, 60, 0) +
if(uniqMerge(T1.uniq_paths) > 50, 40, 0) + -- Balayage (Scanner)
-- 4. Volumétrie vs Baseline
if(countMerge(T1.hits) > (B.p99_hits_per_hour * 3), 50, 0)
) AS final_threat_score,
countMerge(T1.hits) AS request_count,
B.p99_hits_per_hour AS baseline
FROM mabase_prod.agg_traffic_1m AS T1
LEFT JOIN mabase_prod.tbl_baseline_ja4_7d AS B ON T1.ja4 = B.ja4
WHERE T1.minute >= now() - INTERVAL 5 MINUTE
GROUP BY T1.src_ip, T1.ja4, T1.src_asn, T1.src_country_code, B.p99_hits_per_hour
HAVING final_threat_score > 0
ORDER BY final_threat_score DESC;