-- ============================================================================= -- 11_views.sql — Vues métier du dashboard -- -- Ce fichier crée les vues référencées par le dashboard mais absentes du schéma -- partagé. Ces vues agrègent les données de agg_host_ip_ja4_1h et http_logs -- pour fournir des perspectives métier aux endpoints FastAPI. -- -- Vues créées : -- view_form_bruteforce_detected — IPs/hôtes avec fort volume de requêtes POST -- view_host_ip_ja4_rotation — IPs changeant de fingerprint JA4 (évasion) -- view_dashboard_entities — Pivot IP/JA4/pays/ASN/host pour investigation -- view_dashboard_user_agents — User-Agents agrégés par IP/JA4/heure -- view_dashboard_summary — Métriques globales 24h (si pas déjà créée) -- ============================================================================= -- ----------------------------------------------------------------------------- -- view_form_bruteforce_detected -- -- Détecte les IPs effectuant des attaques par force brute sur des formulaires : -- - Volume élevé de requêtes POST vers un hôte donné (≥ 10 POST/heure) -- - Fenêtre glissante 24h depuis agg_host_ip_ja4_1h -- -- Colonnes : -- src_ip, host, ja4, hits, post_count, distinct_paths, first_seen, last_seen -- ----------------------------------------------------------------------------- CREATE OR REPLACE VIEW ja4_processing.view_form_bruteforce_detected AS SELECT src_ip, host, argMax(ja4, ja4_hits) AS ja4, sum(ja4_hits) AS hits, sum(ja4_posts) AS post_count, sum(ja4_posts) AS query_params_count, count() AS distinct_paths, min(w_min) AS first_seen, max(w_max) AS last_seen FROM ( SELECT src_ip, host, ja4, sum(hits) AS ja4_hits, sum(count_post) AS ja4_posts, min(window_start) AS w_min, max(window_start) AS w_max FROM ja4_processing.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip, host, ja4 ) sub GROUP BY src_ip, host HAVING post_count >= 10; -- ----------------------------------------------------------------------------- -- view_host_ip_ja4_rotation -- -- Détecte les IPs qui changent de fingerprint JA4 (rotation de TLS ClientHello) -- — indicateur d'évasion de détection par les outils de bot. -- -- Colonnes : -- src_ip, host, distinct_ja4, distinct_ja4_count, ja4_list, -- total_hits, window_start, first_seen, last_seen -- ----------------------------------------------------------------------------- CREATE OR REPLACE VIEW ja4_processing.view_host_ip_ja4_rotation AS SELECT src_ip, argMax(host, ja4_hits) AS host, count() AS distinct_ja4, count() AS distinct_ja4_count, groupArray(ja4) AS ja4_list, sum(ja4_hits) AS total_hits, max(w_max) AS window_start, min(w_min) AS first_seen, max(w_max) AS last_seen FROM ( SELECT src_ip, host, ja4, sum(hits) AS ja4_hits, min(window_start) AS w_min, max(window_start) AS w_max FROM ja4_processing.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND ja4 != '' GROUP BY src_ip, host, ja4 ) sub GROUP BY src_ip HAVING distinct_ja4 >= 2 ORDER BY distinct_ja4 DESC; -- ----------------------------------------------------------------------------- -- view_dashboard_user_agents -- -- Agrégation des User-Agents par IP, JA4 et heure. -- Utilisée par variability.py et attributes.py avec ARRAY JOIN user_agents. -- -- Colonnes : -- src_ip — IPv4 (sans préfixe ::ffff:) -- ja4 — Fingerprint TLS -- hour — Début d'heure (toStartOfHour) -- log_date — Date (pour le TTL de la vue) -- user_agents — Array(String) des UAs distincts sur cette heure -- requests — Nombre total de requêtes sur cette heure -- ----------------------------------------------------------------------------- CREATE OR REPLACE VIEW ja4_processing.view_dashboard_user_agents AS SELECT -- Normalisation IPv4 : supprime le préfixe ::ffff: des IPs mappées IPv6→IPv4 toIPv4OrZero(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS src_ip, ja4, toStartOfHour(time) AS hour, log_date, -- Collecte les UAs distincts (max 100 pour éviter les tableaux géants) groupUniqArray(100)(header_user_agent) AS user_agents, count() AS requests FROM ja4_logs.http_logs WHERE time >= now() - INTERVAL 7 DAY AND header_user_agent != '' GROUP BY src_ip, ja4, toStartOfHour(time), log_date; -- ----------------------------------------------------------------------------- -- view_dashboard_entities -- -- Vue pivot permettant de naviguer entre entités (IP ↔ JA4 ↔ pays ↔ ASN ↔ hôte). -- Pour chaque entité (entity_type + entity_value), expose les données associées : -- ips, ja4s, hosts (via GROUP BY en Python), asns, countries, user_agents, -- client_headers (array pour clustering.py). -- -- Structure UNION ALL : une branche par type d'entité. -- ClickHouse optimise les requêtes WHERE entity_type = 'ip' en éliminant -- les autres branches (condition constante sur colonne calculée). -- -- Colonnes : -- entity_type — 'ip' | 'ja4' | 'country' | 'asn' | 'host' -- entity_value — Valeur de l'entité (ex: '1.2.3.4', 't13d...', 'FR', ...) -- src_ip — IPv6 (format natif ClickHouse) -- ja4 — Fingerprint JA4 -- host — Virtual host HTTP -- log_date — Date de la requête -- client_headers — Array des noms de headers (splitByChar depuis http_logs) -- asns — Array(String) avec l'ASN source (pour groupUniqArrayArray) -- countries — Array(String) avec le code pays source -- user_agents — Array(String) avec le User-Agent -- ----------------------------------------------------------------------------- CREATE OR REPLACE VIEW ja4_processing.view_dashboard_entities AS -- Perspective IP : entity_value = adresse IPv4 de la source SELECT 'ip' AS entity_type, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS entity_value, src_ip, ja4, host, log_date, splitByChar(',', client_headers) AS client_headers, [toString(src_asn)] AS asns, [src_country_code] AS countries, [header_user_agent] AS user_agents FROM ja4_logs.http_logs WHERE time >= now() - INTERVAL 7 DAY UNION ALL -- Perspective JA4 : entity_value = fingerprint TLS JA4 SELECT 'ja4' AS entity_type, ja4 AS entity_value, src_ip, ja4, host, log_date, splitByChar(',', client_headers) AS client_headers, [toString(src_asn)] AS asns, [src_country_code] AS countries, [header_user_agent] AS user_agents FROM ja4_logs.http_logs WHERE time >= now() - INTERVAL 7 DAY AND ja4 != '' UNION ALL -- Perspective pays : entity_value = code pays ISO-3166 (ex: 'FR', 'US') SELECT 'country' AS entity_type, src_country_code AS entity_value, src_ip, ja4, host, log_date, splitByChar(',', client_headers) AS client_headers, [toString(src_asn)] AS asns, [src_country_code] AS countries, [header_user_agent] AS user_agents FROM ja4_logs.http_logs WHERE time >= now() - INTERVAL 7 DAY AND src_country_code != '' UNION ALL -- Perspective ASN : entity_value = numéro ASN (ex: '15169' pour Google) SELECT 'asn' AS entity_type, toString(src_asn) AS entity_value, src_ip, ja4, host, log_date, splitByChar(',', client_headers) AS client_headers, [toString(src_asn)] AS asns, [src_country_code] AS countries, [header_user_agent] AS user_agents FROM ja4_logs.http_logs WHERE time >= now() - INTERVAL 7 DAY AND src_asn > 0 UNION ALL -- Perspective hôte : entity_value = virtual host HTTP (ex: 'api.example.com') SELECT 'host' AS entity_type, host AS entity_value, src_ip, ja4, host, log_date, splitByChar(',', client_headers) AS client_headers, [toString(src_asn)] AS asns, [src_country_code] AS countries, [header_user_agent] AS user_agents FROM ja4_logs.http_logs WHERE time >= now() - INTERVAL 7 DAY AND host != '';