-- ============================================================================= -- Vue materialisée unique pour Dashboard Entities - Bot Detector -- ============================================================================= -- -- Entités gérées : -- - ip : Adresses IP sources -- - ja4 : Fingerprints JA4 -- - user_agent : User-Agents HTTP -- - client_header : Client Headers -- - host : Hosts HTTP -- - path : Paths URL -- - query_param : Noms de paramètres de query (concaténés: foo,baz) -- -- Instructions d'installation : -- ----------------------------- -- 1. Se connecter à ClickHouse en CLI : -- clickhouse-client --host test-sdv-anubis.sdv.fr --port 8123 \ -- --user admin --password SuperPassword123! -- -- 2. Copier-coller CHAQUE BLOC séparément (un par un) -- -- 3. Vérifier que la vue est créée : -- SELECT count() FROM mabase_prod.view_dashboard_entities; -- -- ============================================================================= USE mabase_prod; -- ============================================================================= -- BLOC 0/3 : Nettoyer l'existant (IMPORTANT) -- ============================================================================= DROP TABLE IF EXISTS mabase_prod.view_dashboard_entities_mv; DROP TABLE IF EXISTS mabase_prod.view_dashboard_entities; -- ============================================================================= -- BLOC 1/3 : Créer la table -- ============================================================================= CREATE TABLE IF NOT EXISTS mabase_prod.view_dashboard_entities ( -- Identification de l'entité entity_type LowCardinality(String), entity_value String, -- Contexte src_ip IPv4, ja4 String, host String, -- Temps (granularité journalière) log_date Date, -- Métriques requests UInt64, unique_ips UInt64, -- Attributs associés (pour investigation croisée) user_agents Array(String), client_headers Array(String), paths Array(String), query_params Array(String), asns Array(String), countries Array(String) ) ENGINE = MergeTree() PARTITION BY toYYYYMM(log_date) ORDER BY (entity_type, entity_value, log_date) TTL log_date + INTERVAL 30 DAY SETTINGS index_granularity = 8192; -- ============================================================================= -- BLOC 2/3 : Créer la vue materialisée -- ============================================================================= CREATE MATERIALIZED VIEW IF NOT EXISTS mabase_prod.view_dashboard_entities_mv TO mabase_prod.view_dashboard_entities AS -- 1. Entité : IP SELECT 'ip' AS entity_type, toString(src_ip) AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(header_user_agent) AS user_agents, groupArrayDistinct(client_headers) AS client_headers, groupArrayDistinct(path) AS paths, groupArrayDistinct( arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) ) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM mabase_prod.http_logs WHERE src_ip IS NOT NULL GROUP BY src_ip, ja4, host, log_date UNION ALL -- 2. Entité : JA4 SELECT 'ja4' AS entity_type, ja4 AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(header_user_agent) AS user_agents, groupArrayDistinct(client_headers) AS client_headers, groupArrayDistinct(path) AS paths, groupArrayDistinct( arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) ) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM mabase_prod.http_logs WHERE ja4 != '' AND ja4 IS NOT NULL GROUP BY src_ip, ja4, host, log_date UNION ALL -- 3. Entité : User-Agent SELECT 'user_agent' AS entity_type, ua AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(ua) AS user_agents, groupArrayDistinct(client_headers) AS client_headers, groupArrayDistinct(path) AS paths, groupArrayDistinct( arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) ) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM ( SELECT src_ip, ja4, host, time, src_asn, src_country_code, header_user_agent AS ua, client_headers, path, query FROM mabase_prod.http_logs ) WHERE ua != '' AND ua IS NOT NULL GROUP BY src_ip, ja4, host, log_date, ua UNION ALL -- 4. Entité : Client Header SELECT 'client_header' AS entity_type, ch AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(header_user_agent) AS user_agents, groupArrayDistinct(ch) AS client_headers, groupArrayDistinct(path) AS paths, groupArrayDistinct( arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) ) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM ( SELECT src_ip, ja4, host, time, src_asn, src_country_code, header_user_agent, client_headers AS ch, path, query FROM mabase_prod.http_logs ) WHERE ch != '' AND ch IS NOT NULL GROUP BY src_ip, ja4, host, log_date, ch UNION ALL -- 5. Entité : Host SELECT 'host' AS entity_type, host AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(header_user_agent) AS user_agents, groupArrayDistinct(client_headers) AS client_headers, groupArrayDistinct(path) AS paths, groupArrayDistinct( arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) ) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM mabase_prod.http_logs WHERE host != '' AND host IS NOT NULL GROUP BY src_ip, ja4, host, log_date UNION ALL -- 6. Entité : Path SELECT 'path' AS entity_type, p AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(header_user_agent) AS user_agents, groupArrayDistinct(client_headers) AS client_headers, groupArrayDistinct(p) AS paths, groupArrayDistinct( arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) ) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM ( SELECT src_ip, ja4, host, time, src_asn, src_country_code, header_user_agent, client_headers, path AS p, query FROM mabase_prod.http_logs ) WHERE p != '' AND p IS NOT NULL GROUP BY src_ip, ja4, host, log_date, p UNION ALL -- 7. Entité : Query Param (noms concaténés) SELECT 'query_param' AS entity_type, query_params_string AS entity_value, src_ip, ja4, host, toDate(time) AS log_date, count() AS requests, uniq(src_ip) AS unique_ips, groupArrayDistinct(header_user_agent) AS user_agents, groupArrayDistinct(client_headers) AS client_headers, groupArrayDistinct(path) AS paths, groupArrayDistinct(query_params_string) AS query_params, groupArrayDistinct(toString(src_asn)) AS asns, groupArrayDistinct(src_country_code) AS countries FROM ( SELECT src_ip, ja4, host, time, src_asn, src_country_code, header_user_agent, client_headers, path, arrayStringConcat( arrayMap( x -> splitByChar('=', x)[1], splitByChar('&', replaceOne(query, '?', '')) ), ',' ) AS query_params_string FROM mabase_prod.http_logs WHERE query != '' AND query IS NOT NULL ) WHERE query_params_string != '' GROUP BY src_ip, ja4, host, log_date, query_params_string; -- ============================================================================= -- BLOC 3/3 : Créer les index (optionnel - améliore les performances) -- ============================================================================= ALTER TABLE mabase_prod.view_dashboard_entities ADD INDEX IF NOT EXISTS idx_entities_type (entity_type) TYPE minmax GRANULARITY 1; ALTER TABLE mabase_prod.view_dashboard_entities ADD INDEX IF NOT EXISTS idx_entities_value (entity_value) TYPE minmax GRANULARITY 1; ALTER TABLE mabase_prod.view_dashboard_entities ADD INDEX IF NOT EXISTS idx_entities_ip (src_ip) TYPE minmax GRANULARITY 1; -- ============================================================================= -- FIN -- ============================================================================= -- -- Pour vérifier que la vue fonctionne : -- ------------------------------------- -- SELECT entity_type, count() FROM mabase_prod.view_dashboard_entities GROUP BY entity_type; -- -- Pour rafraîchir manuellement (si nécessaire) : -- ---------------------------------------------- -- OPTIMIZE TABLE mabase_prod.view_dashboard_entities FINAL; -- -- Exemples de requêtes : -- ---------------------- -- -- Stats pour une IP -- SELECT * FROM mabase_prod.view_dashboard_entities -- WHERE entity_type = 'ip' AND entity_value = '116.179.33.143'; -- -- -- Stats pour un JA4 -- SELECT * FROM mabase_prod.view_dashboard_entities -- WHERE entity_type = 'ja4' AND entity_value = 't13d190900_9dc949149365_97f8aa674fd9'; -- -- -- Top 10 des user-agents -- SELECT entity_value, sum(requests) as total -- FROM mabase_prod.view_dashboard_entities -- WHERE entity_type = 'user_agent' -- GROUP BY entity_value -- ORDER BY total DESC -- LIMIT 10; -- -- =============================================================================