diff --git a/sql/views.sql b/sql/views.sql index 18373a9..6edda1b 100644 --- a/sql/views.sql +++ b/sql/views.sql @@ -1,7 +1,4 @@ --- ============================================================================ --- SCRIPT DE DÉPLOIEMENT DES VUES DE DÉTECTION (CLICKHOUSE) - MABASE_PROD --- ============================================================================ - +<-- 1. NETTOYAGE COMPLET DROP TABLE IF EXISTS mabase_prod.ml_detected_anomalies; DROP VIEW IF EXISTS mabase_prod.view_ai_features_1h; DROP VIEW IF EXISTS mabase_prod.view_host_ip_ja4_rotation; @@ -14,218 +11,143 @@ DROP TABLE IF EXISTS mabase_prod.agg_host_ip_ja4_1h; DROP VIEW IF EXISTS mabase_prod.mv_agg_header_fingerprint_1h; DROP TABLE IF EXISTS mabase_prod.agg_header_fingerprint_1h; --- ---------------------------------------------------------------------------- --- 1. TABLES D'AGRÉGATION ET VUES MATÉRIALISÉES --- ---------------------------------------------------------------------------- +-- 2. TABLES D'EXCLUSION +CREATE TABLE IF NOT EXISTS mabase_prod.bot_ip (ip String) ENGINE = File(CSV, 'bot_ip.csv'); +CREATE TABLE IF NOT EXISTS mabase_prod.bot_ja4 (ja4 String) ENGINE = File(CSV, 'bot_ja4.csv'); -CREATE TABLE mabase_prod.agg_host_ip_ja4_1h ( - window_start DateTime, - src_ip String, - ja4 String, - host String, - first_seen SimpleAggregateFunction(min, DateTime), - last_seen SimpleAggregateFunction(max, DateTime), - hits SimpleAggregateFunction(sum, UInt64), - count_post SimpleAggregateFunction(sum, UInt64), - uniq_paths AggregateFunction(uniq, String), - uniq_query_params AggregateFunction(uniq, String), - src_country_code SimpleAggregateFunction(any, String), - tcp_fingerprint SimpleAggregateFunction(any, String), - tcp_jitter_variance AggregateFunction(varPop, Float64), - tcp_window_size SimpleAggregateFunction(any, UInt32), - tcp_window_scale SimpleAggregateFunction(any, UInt32), - tcp_mss SimpleAggregateFunction(any, UInt32), - tcp_ttl SimpleAggregateFunction(any, UInt32), - http_version SimpleAggregateFunction(any, String), - tls_alpn SimpleAggregateFunction(any, String), - tls_sni SimpleAggregateFunction(any, String), - first_ua SimpleAggregateFunction(any, String), - correlated SimpleAggregateFunction(max, UInt8), - unique_src_ports AggregateFunction(uniq, UInt16), - max_keepalives SimpleAggregateFunction(max, UInt32), - orphan_count SimpleAggregateFunction(sum, UInt64) -) ENGINE = AggregatingMergeTree() -ORDER BY (window_start, src_ip, ja4, host) -TTL window_start + INTERVAL 7 DAY; +-- 3. AGRÉGATION COMPORTEMENTALE (26 DIMENSIONS) +CREATE TABLE mabase_prod.agg_host_ip_ja4_1h +( + window_start DateTime, src_ip String, ja4 String, host String, + first_seen SimpleAggregateFunction(min, DateTime), last_seen SimpleAggregateFunction(max, DateTime), + hits SimpleAggregateFunction(sum, UInt64), count_post SimpleAggregateFunction(sum, UInt64), + uniq_paths AggregateFunction(uniq, String), uniq_query_params AggregateFunction(uniq, String), + src_country_code SimpleAggregateFunction(any, String), tcp_fp_raw SimpleAggregateFunction(any, String), + tcp_jitter_variance AggregateFunction(varPop, Float64), tcp_win_raw SimpleAggregateFunction(any, UInt32), + tcp_scale_raw SimpleAggregateFunction(any, UInt32), tcp_mss_raw SimpleAggregateFunction(any, UInt32), + tcp_ttl_raw SimpleAggregateFunction(any, UInt32), http_ver_raw SimpleAggregateFunction(any, String), + tls_alpn_raw SimpleAggregateFunction(any, String), tls_sni_raw SimpleAggregateFunction(any, String), + first_ua SimpleAggregateFunction(any, String), correlated_raw SimpleAggregateFunction(max, UInt8), + unique_src_ports AggregateFunction(uniq, UInt16), unique_conn_id AggregateFunction(uniq, String), + max_keepalives SimpleAggregateFunction(max, UInt32), orphan_count SimpleAggregateFunction(sum, UInt64), + ip_id_zero_count SimpleAggregateFunction(sum, UInt64), total_ip_length_var AggregateFunction(varPop, Float64), + mss_1460_count SimpleAggregateFunction(sum, UInt64) +) ENGINE = AggregatingMergeTree() ORDER BY (window_start, src_ip, ja4, host); CREATE MATERIALIZED VIEW mabase_prod.mv_agg_host_ip_ja4_1h TO mabase_prod.agg_host_ip_ja4_1h AS SELECT - toStartOfHour(src.time) AS window_start, - src.src_ip, src.ja4, src.host, - min(src.time) AS first_seen, max(src.time) AS last_seen, - count() AS hits, - sum(IF(src.method = 'POST', 1, 0)) AS count_post, - uniqState(src.path) AS uniq_paths, - uniqState(src.query) AS uniq_query_params, - any(src.src_country_code) AS src_country_code, - any(toString(cityHash64(concat(toString(src.tcp_meta_window_size), toString(src.tcp_meta_mss), toString(src.tcp_meta_window_scale), src.tcp_meta_options)))) AS tcp_fingerprint, + toStartOfHour(src.time) AS window_start, src.src_ip, src.ja4, src.host, + min(src.time) AS first_seen, max(src.time) AS last_seen, count() AS hits, + sum(IF(src.method = 'POST', 1, 0)) AS count_post, uniqState(src.path) AS uniq_paths, + uniqState(src.query) AS uniq_query_params, any(src.src_country_code) AS src_country_code, + any(toString(cityHash64(concat(toString(src.tcp_meta_window_size), toString(src.tcp_meta_mss), toString(src.tcp_meta_window_scale), src.tcp_meta_options)))) AS tcp_fp_raw, varPopState(toFloat64(src.syn_to_clienthello_ms)) AS tcp_jitter_variance, - any(src.tcp_meta_window_size) AS tcp_window_size, - any(src.tcp_meta_window_scale) AS tcp_window_scale, - any(src.tcp_meta_mss) AS tcp_mss, - any(src.ip_meta_ttl) AS tcp_ttl, - any(src.http_version) AS http_version, - any(src.tls_alpn) AS tls_alpn, - any(src.tls_sni) AS tls_sni, - any(src.header_user_agent) AS first_ua, - max(toUInt8(src.correlated)) AS correlated, - uniqState(toUInt16(src.src_port)) AS unique_src_ports, - max(toUInt32(src.keepalives)) AS max_keepalives, - sum(IF(src.orphan_side = 'A' OR toUInt8(src.correlated) = 0, 1, 0)) AS orphan_count + any(src.tcp_meta_window_size) AS tcp_win_raw, any(src.tcp_meta_window_scale) AS tcp_scale_raw, + any(src.tcp_meta_mss) AS tcp_mss_raw, any(src.ip_meta_ttl) AS tcp_ttl_raw, + any(src.http_version) AS http_ver_raw, any(src.tls_alpn) AS tls_alpn_raw, + any(src.tls_sni) AS tls_sni_raw, any(src.header_user_agent) AS first_ua, + max(toUInt8(src.correlated)) AS correlated_raw, uniqState(toUInt16(src.src_port)) AS unique_src_ports, + uniqState(src.conn_id) AS unique_conn_id, max(toUInt32(src.keepalives)) AS max_keepalives, + sum(IF(src.orphan_side = 'A' OR toUInt8(src.correlated) = 0, 1, 0)) AS orphan_count, + sum(IF(src.ip_meta_id == 0, 1, 0)) AS ip_id_zero_count, + varPopState(toFloat64(src.ip_meta_total_length)) AS total_ip_length_var, + sum(IF(src.tcp_meta_mss == 1460, 1, 0)) AS mss_1460_count FROM mabase_prod.http_logs AS src -GROUP BY window_start, src.src_ip, src.ja4, src.host; +GROUP BY window_start, src_ip, ja4, host; -CREATE TABLE mabase_prod.agg_header_fingerprint_1h ( - window_start DateTime, - src_ip String, - header_order_hash SimpleAggregateFunction(any, String), - header_count SimpleAggregateFunction(max, UInt16), - has_accept_language SimpleAggregateFunction(max, UInt8), - has_cookie SimpleAggregateFunction(max, UInt8), - has_referer SimpleAggregateFunction(max, UInt8), -- NOUVEAU (JA4H_a) - modern_browser_score SimpleAggregateFunction(max, UInt8), - sec_fetch_mode SimpleAggregateFunction(any, String), - sec_fetch_dest SimpleAggregateFunction(any, String), - count_site_none SimpleAggregateFunction(sum, UInt64) -) ENGINE = AggregatingMergeTree() -ORDER BY (window_start, src_ip) -TTL window_start + INTERVAL 7 DAY; +-- 4. AGRÉGATION HEADERS (JA4H) +CREATE TABLE mabase_prod.agg_header_fingerprint_1h +( + window_start DateTime, src_ip String, header_order_hash SimpleAggregateFunction(any, String), + header_count SimpleAggregateFunction(max, UInt16), has_accept_language SimpleAggregateFunction(max, UInt8), + has_cookie SimpleAggregateFunction(max, UInt8), has_referer SimpleAggregateFunction(max, UInt8), + modern_browser_score SimpleAggregateFunction(max, UInt8), ua_ch_mismatch SimpleAggregateFunction(max, UInt8), + sec_fetch_mode SimpleAggregateFunction(any, String), sec_fetch_dest SimpleAggregateFunction(any, String) +) ENGINE = AggregatingMergeTree() ORDER BY (window_start, src_ip); CREATE MATERIALIZED VIEW mabase_prod.mv_agg_header_fingerprint_1h TO mabase_prod.agg_header_fingerprint_1h AS SELECT - toStartOfHour(src.time) AS window_start, - src.src_ip, - any(toString(cityHash64(src.client_headers))) AS header_order_hash, + toStartOfHour(src.time) AS window_start, src.src_ip, any(toString(cityHash64(src.client_headers))) AS header_order_hash, max(toUInt16(length(src.client_headers) - length(replaceAll(src.client_headers, ',', '')) + 1)) AS header_count, max(toUInt8(if(position(src.client_headers, 'Accept-Language') > 0, 1, 0))) AS has_accept_language, max(toUInt8(if(position(src.client_headers, 'Cookie') > 0, 1, 0))) AS has_cookie, - max(toUInt8(if(position(src.client_headers, 'Referer') > 0, 1, 0))) AS has_referer, -- NOUVEAU (JA4H_a) + max(toUInt8(if(position(src.client_headers, 'Referer') > 0, 1, 0))) AS has_referer, max(toUInt8(if(length(src.header_sec_ch_ua) > 0, 100, if(length(src.header_user_agent) > 0, 50, 0)))) AS modern_browser_score, - any(src.header_sec_fetch_mode) AS sec_fetch_mode, - any(src.header_sec_fetch_dest) AS sec_fetch_dest, - sum(IF(src.header_sec_fetch_site = 'none', 1, 0)) AS count_site_none + max(toUInt8(if((position(src.header_user_agent, 'Windows') > 0 AND position(src.header_sec_ch_ua_platform, 'Windows') == 0) OR (position(src.header_user_agent, 'iPhone') > 0 AND position(src.header_sec_ch_ua_platform, 'iOS') == 0), 1, 0))) AS ua_ch_mismatch, + any(src.header_sec_fetch_mode) AS sec_fetch_mode, any(src.header_sec_fetch_dest) AS sec_fetch_dest FROM mabase_prod.http_logs AS src GROUP BY window_start, src.src_ip; --- ---------------------------------------------------------------------------- --- 2. TABLE DES ANOMALIES ÉLARGIE POUR GRAFANA --- ---------------------------------------------------------------------------- -CREATE TABLE mabase_prod.ml_detected_anomalies ( - detected_at DateTime, - src_ip String, - ja4 String, - host String, - anomaly_score Float32, - - -- Dimensions Applicatives et Comportementales - hits UInt64, - hit_velocity Float32, - fuzzing_index Float32, - post_ratio Float32, - site_none_ratio Float32, - - -- Dimensions Réseau / TCP - port_exhaustion_ratio Float32, - max_keepalives UInt32, - orphan_ratio Float32, - tcp_jitter_variance Float32, - tcp_shared_count UInt32, - true_window_size UInt64, - window_mss_ratio Float32, - - -- Dimensions TLS / Contextuelles - alpn_http_mismatch UInt8, - is_alpn_missing UInt8, - sni_host_mismatch UInt8, - - -- Dimensions JA4H (Headers) - header_count UInt16, - has_accept_language UInt8, - has_cookie UInt8, - has_referer UInt8, - modern_browser_score UInt8, - is_headless UInt8, - header_order_shared_count UInt32, -- NOUVEAU (JA4H_b) - - reason String -) ENGINE = MergeTree() -ORDER BY (detected_at, src_ip, ja4) -TTL detected_at + INTERVAL 30 DAY; +-- 5. TABLE RÉSULTATS DÉDOUBLONNÉE +CREATE TABLE mabase_prod.ml_detected_anomalies +( + detected_at DateTime, src_ip String, ja4 String, host String, anomaly_score Float32, + hits UInt64, hit_velocity Float32, fuzzing_index Float32, post_ratio Float32, + port_exhaustion_ratio Float32, max_keepalives UInt32, orphan_ratio Float32, + tcp_jitter_variance Float32, tcp_shared_count UInt32, true_window_size UInt64, + window_mss_ratio Float32, alpn_http_mismatch UInt8, is_alpn_missing UInt8, + sni_host_mismatch UInt8, header_count UInt16, has_accept_language UInt8, + has_cookie UInt8, has_referer UInt8, modern_browser_score UInt8, + is_headless UInt8, ua_ch_mismatch UInt8, header_order_shared_count UInt32, + ip_id_zero_ratio Float32, request_size_variance Float32, multiplexing_efficiency Float32, + mss_mobile_mismatch UInt8, reason String +) ENGINE = ReplacingMergeTree(detected_at) ORDER BY (src_ip, ja4, host) TTL detected_at + INTERVAL 30 DAY; --- ---------------------------------------------------------------------------- --- 3. VUE DE FEATURE ENGINEERING POUR L'IA (SUR 24H) --- ---------------------------------------------------------------------------- +-- 6. VUE IA (24H + EXCLUSIONS + TOUT MERGE DANS SOUS-REQUÊTE) CREATE OR REPLACE VIEW mabase_prod.view_ai_features_1h AS SELECT - a.src_ip, a.ja4, a.host, a.hits, a.uniq_paths, a.uniq_query_params, a.count_post, - a.correlated AS correlated, - - (a.count_post / (a.hits + 1)) AS post_ratio, - (a.uniq_query_params / (a.uniq_paths + 1)) AS fuzzing_index, + a.*, h.*, + (a.count_post / (a.hits + 1)) AS post_ratio, (a.uniq_query_params / (a.uniq_paths + 1)) AS fuzzing_index, (a.hits / (dateDiff('second', a.first_seen, a.last_seen) + 1)) AS hit_velocity, - (a.unique_src_ports / (a.hits + 1)) AS port_exhaustion_ratio, - (a.orphan_count / (a.hits + 1)) AS orphan_ratio, - a.max_keepalives AS max_keepalives, - - COALESCE(a.tcp_jitter_variance, 0) AS tcp_jitter_variance, + (a.unique_src_ports / (a.hits + 1)) AS port_exhaustion_ratio, (a.orphan_count / (a.hits + 1)) AS orphan_ratio, + (a.ip_id_zero_count / (a.hits + 1)) AS ip_id_zero_ratio, (a.hits / (a.unique_conn_id + 1)) AS multiplexing_efficiency, + IF(a.mss_1460_count > (a.hits * 0.8) AND h.modern_browser_score > 70, 1, 0) AS mss_mobile_mismatch, count() OVER (PARTITION BY a.tcp_fingerprint) AS tcp_shared_count, - a.tcp_window_size * exp2(a.tcp_window_scale) AS true_window_size, - IF(a.tcp_mss > 0, a.tcp_window_size / a.tcp_mss, 0) AS window_mss_ratio, - - IF(a.tls_alpn = 'h2' AND a.http_version!= '2', 1, 0) AS alpn_http_mismatch, - IF(length(a.tls_alpn) = 0 OR a.tls_alpn = '00', 1, 0) AS is_alpn_missing, - IF(length(a.tls_sni) > 0 AND a.tls_sni!= a.host, 1, 0) AS sni_host_mismatch, - - COALESCE(h.header_count, 0) AS header_count, - COALESCE(h.has_accept_language, 0) AS has_accept_language, - COALESCE(h.has_cookie, 0) AS has_cookie, - COALESCE(h.has_referer, 0) AS has_referer, - COALESCE(h.modern_browser_score, 0) AS modern_browser_score, - IF(h.sec_fetch_mode = 'navigate' AND h.sec_fetch_dest!= 'document', 1, 0) AS is_fake_navigation, - (h.count_site_none / (a.hits + 1)) AS site_none_ratio, - - -- JA4H_b : Regroupement par ordre de header pour détecter les botnets count() OVER (PARTITION BY h.header_order_hash) AS header_order_shared_count - FROM ( - SELECT - window_start, src_ip, ja4, host, - sum(hits) AS hits, - uniqMerge(uniq_paths) AS uniq_paths, - uniqMerge(uniq_query_params) AS uniq_query_params, - sum(count_post) AS count_post, - min(first_seen) AS first_seen, - max(last_seen) AS last_seen, - any(tcp_fingerprint) AS tcp_fingerprint, - varPopMerge(tcp_jitter_variance) AS tcp_jitter_variance, - any(tcp_window_size) AS tcp_window_size, - any(tcp_window_scale) AS tcp_window_scale, - any(tcp_mss) AS tcp_mss, - any(http_version) AS http_version, - any(tls_alpn) AS tls_alpn, - any(tls_sni) AS tls_sni, - max(correlated) AS correlated, - uniqMerge(unique_src_ports) AS unique_src_ports, - max(max_keepalives) AS max_keepalives, - sum(orphan_count) AS orphan_count + SELECT window_start, src_ip, ja4, host, sum(hits) AS hits, uniqMerge(uniq_paths) AS uniq_paths, + uniqMerge(uniq_query_params) AS uniq_query_params, sum(count_post) AS count_post, + min(first_seen) AS first_seen, max(last_seen) AS last_seen, any(tcp_fp_raw) AS tcp_fingerprint, + varPopMerge(tcp_jitter_variance) AS tcp_jitter_variance, varPopMerge(total_ip_length_var) AS request_size_variance, + any(tcp_win_raw * exp2(tcp_scale_raw)) AS true_window_size, + IF(any(tcp_mss_raw) > 0, any(tcp_win_raw) / any(tcp_mss_raw), 0) AS window_mss_ratio, + any(http_ver_raw) AS http_version, any(tls_alpn_raw) AS tls_alpn, any(tls_sni_raw) AS tls_sni, + max(correlated_raw) AS correlated, uniqMerge(unique_src_ports) AS unique_src_ports, + uniqMerge(unique_conn_id) AS unique_conn_id, max(max_keepalives) AS max_keepalives, + sum(orphan_count) AS orphan_count, sum(ip_id_zero_count) AS ip_id_zero_count, sum(mss_1460_count) AS mss_1460_count FROM mabase_prod.agg_host_ip_ja4_1h - WHERE window_start >= toStartOfHour(now() - INTERVAL 24 HOUR) + WHERE window_start >= now() - INTERVAL 24 HOUR + AND src_ip NOT IN (SELECT ip FROM mabase_prod.bot_ip) + AND ja4 NOT IN (SELECT ja4 FROM mabase_prod.bot_ja4) GROUP BY window_start, src_ip, ja4, host ) a LEFT JOIN ( - SELECT - window_start, src_ip, - any(header_order_hash) AS header_order_hash, - max(header_count) AS header_count, - max(has_accept_language) AS has_accept_language, - max(has_cookie) AS has_cookie, - max(has_referer) AS has_referer, - max(modern_browser_score) AS modern_browser_score, - any(sec_fetch_mode) AS sec_fetch_mode, - any(sec_fetch_dest) AS sec_fetch_dest, - sum(count_site_none) AS count_site_none + SELECT window_start, src_ip, any(header_order_hash) AS header_order_hash, max(header_count) AS header_count, + max(has_accept_language) AS has_accept_language, max(has_cookie) AS has_cookie, + max(has_referer) AS has_referer, max(modern_browser_score) AS modern_browser_score, + max(ua_ch_mismatch) AS ua_ch_mismatch, any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest FROM mabase_prod.agg_header_fingerprint_1h - WHERE window_start >= toStartOfHour(now() - INTERVAL 24 HOUR) + WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY window_start, src_ip -) h -ON a.src_ip = h.src_ip AND a.window_start = a.window_start; +) h ON a.src_ip = h.src_ip AND a.window_start = h.window_start; + +-- 7. RESTAURATION VUES HEURISTIQUES +CREATE OR REPLACE VIEW mabase_prod.view_host_ip_ja4_rotation AS +SELECT src_ip, uniqExact(ja4) AS distinct_ja4_count, sum(hits) AS total_hits FROM mabase_prod.agg_host_ip_ja4_1h +WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip HAVING distinct_ja4_count >= 5 AND total_hits > 100; + +CREATE OR REPLACE VIEW mabase_prod.view_host_ja4_anomalies AS +SELECT ja4, uniqExact(src_ip) AS unique_ips, uniqExact(src_country_code) AS unique_countries, uniqExact(host) AS targeted_hosts +FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY ja4 HAVING unique_ips >= 20 AND targeted_hosts >= 3; + +CREATE OR REPLACE VIEW mabase_prod.view_form_bruteforce_detected AS +SELECT src_ip, ja4, host, sum(hits) AS hits, uniqMerge(uniq_query_params) AS query_params_count FROM mabase_prod.agg_host_ip_ja4_1h +WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip, ja4, host HAVING query_params_count >= 10 AND hits >= 20; + +CREATE OR REPLACE VIEW mabase_prod.view_alpn_mismatch_detected AS +SELECT src_ip, ja4, host, sum(hits) AS hits, any(http_ver_raw) AS http_version, any(tls_alpn_raw) AS tls_alpn FROM mabase_prod.agg_host_ip_ja4_1h +WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip, ja4, host HAVING http_version = '1.1' AND tls_alpn IN ('h2', 'h3') AND hits >= 10; + +CREATE OR REPLACE VIEW mabase_prod.view_tcp_spoofing_detected AS +SELECT src_ip, ja4, any(tcp_ttl_raw) AS tcp_ttl, any(tcp_win_raw) AS tcp_window_size, any(first_ua) AS first_ua FROM mabase_prod.agg_host_ip_ja4_1h +WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip, ja4 HAVING tcp_ttl <= 64 AND (first_ua ILIKE '%Windows%' OR first_ua ILIKE '%iPhone%');