-- ============================================================================ -- SCRIPT DE DÉPLOIEMENT DES VUES DE DÉTECTION DE BOTS & SPAM (CLICKHOUSE) -- ============================================================================ -- ---------------------------------------------------------------------------- -- 1. NETTOYAGE STRICT -- ---------------------------------------------------------------------------- DROP TABLE IF EXISTS mabase_prod.ml_detected_anomalies; DROP VIEW IF EXISTS mabase_prod.view_ai_features_1h; DROP VIEW IF EXISTS mabase_prod.view_host_ip_ja4_rotation; DROP VIEW IF EXISTS mabase_prod.view_host_ja4_anomalies; DROP VIEW IF EXISTS mabase_prod.view_form_bruteforce_detected; DROP VIEW IF EXISTS mabase_prod.view_alpn_mismatch_detected; DROP VIEW IF EXISTS mabase_prod.view_tcp_spoofing_detected; DROP VIEW IF EXISTS mabase_prod.mv_agg_host_ip_ja4_1h; DROP TABLE IF EXISTS mabase_prod.agg_host_ip_ja4_1h; DROP VIEW IF EXISTS mabase_prod.mv_agg_header_fingerprint_1h; DROP TABLE IF EXISTS mabase_prod.agg_header_fingerprint_1h; -- ---------------------------------------------------------------------------- -- 2. TABLES D'AGRÉGATION ET VUES MATÉRIALISÉES (TEMPS RÉEL) -- ---------------------------------------------------------------------------- CREATE TABLE mabase_prod.agg_host_ip_ja4_1h ( window_start DateTime, src_ip String, ja4 String, host String, first_seen SimpleAggregateFunction(min, DateTime), last_seen SimpleAggregateFunction(max, DateTime), hits SimpleAggregateFunction(sum, UInt64), count_post SimpleAggregateFunction(sum, UInt64), uniq_paths AggregateFunction(uniq, String), uniq_query_params AggregateFunction(uniq, String), src_country_code SimpleAggregateFunction(any, String), tcp_fingerprint SimpleAggregateFunction(any, String), tcp_jitter_variance AggregateFunction(varPop, Float64), tcp_window_size SimpleAggregateFunction(any, UInt32), tcp_window_scale SimpleAggregateFunction(any, UInt32), tcp_mss SimpleAggregateFunction(any, UInt32), tcp_ttl SimpleAggregateFunction(any, UInt32), http_version SimpleAggregateFunction(any, String), first_ua SimpleAggregateFunction(any, String) ) ENGINE = AggregatingMergeTree() ORDER BY (window_start, src_ip, ja4, host) TTL window_start + INTERVAL 7 DAY; CREATE MATERIALIZED VIEW mabase_prod.mv_agg_host_ip_ja4_1h TO mabase_prod.agg_host_ip_ja4_1h AS SELECT toStartOfHour(time) AS window_start, src_ip, ja4, host, min(time) AS first_seen, max(time) AS last_seen, count() AS hits, sum(IF(method = 'POST', 1, 0)) AS count_post, uniqState(path) AS uniq_paths, uniqState(query) AS uniq_query_params, any(src_country_code) AS src_country_code, any(toString(cityHash64(concat(toString(tcp_meta_window_size), toString(tcp_meta_mss), toString(tcp_meta_window_scale), tcp_meta_options)))) AS tcp_fingerprint, varPopState(toFloat64(syn_to_clienthello_ms)) AS tcp_jitter_variance, any(tcp_meta_window_size) AS tcp_window_size, any(tcp_meta_window_scale) AS tcp_window_scale, any(tcp_meta_mss) AS tcp_mss, any(ip_meta_ttl) AS tcp_ttl, any(http_version) AS http_version, any(header_user_agent) AS first_ua FROM mabase_prod.http_logs GROUP BY window_start, src_ip, ja4, host; CREATE TABLE mabase_prod.agg_header_fingerprint_1h ( window_start DateTime, src_ip String, header_order_hash SimpleAggregateFunction(any, String), modern_browser_score SimpleAggregateFunction(max, UInt8), sec_fetch_mode SimpleAggregateFunction(any, String), sec_fetch_dest SimpleAggregateFunction(any, String), count_site_none SimpleAggregateFunction(sum, UInt64) ) ENGINE = AggregatingMergeTree() ORDER BY (window_start, src_ip) TTL window_start + INTERVAL 7 DAY; CREATE MATERIALIZED VIEW mabase_prod.mv_agg_header_fingerprint_1h TO mabase_prod.agg_header_fingerprint_1h AS SELECT toStartOfHour(time) AS window_start, src_ip, any(toString(cityHash64(client_headers))) AS header_order_hash, max(toUInt8(if(length(header_sec_ch_ua) > 0, 100, if(length(header_user_agent) > 0, 50, 0)))) AS modern_browser_score, any(header_sec_fetch_mode) AS sec_fetch_mode, any(header_sec_fetch_dest) AS sec_fetch_dest, sum(IF(header_sec_fetch_site = 'none', 1, 0)) AS count_site_none FROM mabase_prod.http_logs GROUP BY window_start, src_ip; -- ---------------------------------------------------------------------------- -- 3. TABLE DE DESTINATION POUR LE MACHINE LEARNING -- ---------------------------------------------------------------------------- CREATE TABLE mabase_prod.ml_detected_anomalies ( detected_at DateTime, src_ip String, ja4 String, host String, anomaly_score Float32, reason String ) ENGINE = MergeTree() ORDER BY (detected_at, src_ip, ja4) TTL detected_at + INTERVAL 30 DAY; -- ---------------------------------------------------------------------------- -- 4. VUE DE FEATURE ENGINEERING POUR L'ISOLATION FOREST (RÉSOLUE) -- ---------------------------------------------------------------------------- -- Utilisation de sous-requêtes agrégées (GROUP BY explicite) avant la jointure -- pour éviter les erreurs d'état et le produit cartésien. CREATE VIEW mabase_prod.view_ai_features_1h AS SELECT a.src_ip, a.ja4, a.host, a.hits, a.uniq_paths, a.uniq_query_params, a.count_post, -- Indicateur de Corrélation L4/L7 IF(length(a.ja4) > 0 AND length(a.tcp_fingerprint) > 0, 1, 0) AS correlated, -- DIMENSIONS COMPORTEMENTALES (a.count_post / (a.hits + 1)) AS post_ratio, (a.uniq_query_params / (a.uniq_paths + 1)) AS fuzzing_index, (a.hits / (dateDiff('second', a.first_seen, a.last_seen) + 1)) AS hit_velocity, -- DIMENSIONS TCP / L4 COALESCE(a.tcp_jitter_variance, 0) AS tcp_jitter_variance, count() OVER (PARTITION BY a.tcp_fingerprint) AS tcp_shared_count, a.tcp_window_size * exp2(a.tcp_window_scale) AS true_window_size, IF(a.tcp_mss > 0, a.tcp_window_size / a.tcp_mss, 0) AS window_mss_ratio, -- DIMENSIONS TLS / L5 (Mismatch) IF(substring(a.ja4, 10, 2) = 'h2' AND a.http_version!= '2', 1, 0) AS alpn_http_mismatch, IF(substring(a.ja4, 10, 2) = '00', 1, 0) AS is_alpn_missing, -- DIMENSIONS HTTP / L7 COALESCE(h.modern_browser_score, 0) AS modern_browser_score, IF(h.sec_fetch_mode = 'navigate' AND h.sec_fetch_dest!= 'document', 1, 0) AS is_fake_navigation, (h.count_site_none / (a.hits + 1)) AS site_none_ratio FROM ( -- Consolidation des logs d'hôtes (Résolution du GROUP BY manquant) SELECT window_start, src_ip, ja4, host, sum(hits) AS hits, uniqMerge(uniq_paths) AS uniq_paths, uniqMerge(uniq_query_params) AS uniq_query_params, sum(count_post) AS count_post, min(first_seen) AS first_seen, max(last_seen) AS last_seen, any(tcp_fingerprint) AS tcp_fingerprint, varPopMerge(tcp_jitter_variance) AS tcp_jitter_variance, any(tcp_window_size) AS tcp_window_size, any(tcp_window_scale) AS tcp_window_scale, any(tcp_mss) AS tcp_mss, any(http_version) AS http_version FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 2 HOUR) GROUP BY window_start, src_ip, ja4, host ) a LEFT JOIN ( -- Consolidation des en-têtes SELECT window_start, src_ip, max(modern_browser_score) AS modern_browser_score, any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest, sum(count_site_none) AS count_site_none FROM mabase_prod.agg_header_fingerprint_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 2 HOUR) GROUP BY window_start, src_ip ) h ON a.src_ip = h.src_ip AND a.window_start = h.window_start; -- ---------------------------------------------------------------------------- -- 5. VUES DE DÉTECTION HEURISTIQUES STATIQUES (RÉSOLUES) -- ---------------------------------------------------------------------------- CREATE VIEW mabase_prod.view_host_ip_ja4_rotation AS SELECT src_ip, uniqExact(ja4) AS distinct_ja4_count, sum(hits) AS total_hits FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR) GROUP BY src_ip HAVING distinct_ja4_count >= 5 AND total_hits > 100; CREATE VIEW mabase_prod.view_host_ja4_anomalies AS SELECT ja4, uniqExact(src_ip) AS unique_ips, uniqExact(src_country_code) AS unique_countries, uniqExact(host) AS targeted_hosts FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR) GROUP BY ja4 HAVING unique_ips >= 20 AND targeted_hosts >= 3; -- Ajout du GROUP BY CREATE VIEW mabase_prod.view_form_bruteforce_detected AS SELECT src_ip, ja4, host, sum(hits) AS hits, uniqMerge(uniq_query_params) AS query_params_count FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR) GROUP BY src_ip, ja4, host HAVING query_params_count >= 10 AND hits >= 20; -- Ajout du GROUP BY CREATE VIEW mabase_prod.view_alpn_mismatch_detected AS SELECT src_ip, ja4, host, sum(hits) AS hits, any(http_version) AS http_version FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR) AND substring(ja4, 10, 2) IN ('h2', 'h3') GROUP BY src_ip, ja4, host HAVING http_version = '1.1' AND hits >= 10; -- Ajout du GROUP BY CREATE VIEW mabase_prod.view_tcp_spoofing_detected AS SELECT src_ip, ja4, any(tcp_ttl) AS tcp_ttl, any(tcp_window_size) AS tcp_window_size, any(first_ua) AS first_ua FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR) GROUP BY src_ip, ja4 HAVING tcp_ttl <= 64 AND (first_ua ILIKE '%Windows%' OR first_ua ILIKE '%iPhone%');