- view_form_bruteforce_detected: add post_count, distinct_paths, first_seen, last_seen - view_host_ip_ja4_rotation: add host, distinct_ja4, ja4_list, window_start - Replace uniqExact/groupUniqArray with count()/groupArray (no nested-agg error) - api.py campaigns/graph: move a.src_ip < b.src_ip from JOIN ON to WHERE (ClickHouse 24.8 forbids cross-table inequality in JOIN ON condition) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
232 lines
9.8 KiB
SQL
232 lines
9.8 KiB
SQL
-- =============================================================================
|
|
-- 11_views.sql — Vues métier du dashboard
|
|
--
|
|
-- Ce fichier crée les vues référencées par le dashboard mais absentes du schéma
|
|
-- partagé. Ces vues agrègent les données de agg_host_ip_ja4_1h et http_logs
|
|
-- pour fournir des perspectives métier aux endpoints FastAPI.
|
|
--
|
|
-- Vues créées :
|
|
-- view_form_bruteforce_detected — IPs/hôtes avec fort volume de requêtes POST
|
|
-- view_host_ip_ja4_rotation — IPs changeant de fingerprint JA4 (évasion)
|
|
-- view_dashboard_entities — Pivot IP/JA4/pays/ASN/host pour investigation
|
|
-- view_dashboard_user_agents — User-Agents agrégés par IP/JA4/heure
|
|
-- view_dashboard_summary — Métriques globales 24h (si pas déjà créée)
|
|
-- =============================================================================
|
|
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- view_form_bruteforce_detected
|
|
--
|
|
-- Détecte les IPs effectuant des attaques par force brute sur des formulaires :
|
|
-- - Volume élevé de requêtes POST vers un hôte donné (≥ 10 POST/heure)
|
|
-- - Fenêtre glissante 24h depuis agg_host_ip_ja4_1h
|
|
--
|
|
-- Colonnes :
|
|
-- src_ip, host, ja4, hits, post_count, distinct_paths, first_seen, last_seen
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE OR REPLACE VIEW ja4_processing.view_form_bruteforce_detected AS
|
|
SELECT
|
|
src_ip,
|
|
host,
|
|
argMax(ja4, ja4_hits) AS ja4,
|
|
sum(ja4_hits) AS hits,
|
|
sum(ja4_posts) AS post_count,
|
|
sum(ja4_posts) AS query_params_count,
|
|
count() AS distinct_paths,
|
|
min(w_min) AS first_seen,
|
|
max(w_max) AS last_seen
|
|
FROM (
|
|
SELECT
|
|
src_ip, host, ja4,
|
|
sum(hits) AS ja4_hits,
|
|
sum(count_post) AS ja4_posts,
|
|
min(window_start) AS w_min,
|
|
max(window_start) AS w_max
|
|
FROM ja4_processing.agg_host_ip_ja4_1h
|
|
WHERE window_start >= now() - INTERVAL 24 HOUR
|
|
GROUP BY src_ip, host, ja4
|
|
) sub
|
|
GROUP BY src_ip, host
|
|
HAVING post_count >= 10;
|
|
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- view_host_ip_ja4_rotation
|
|
--
|
|
-- Détecte les IPs qui changent de fingerprint JA4 (rotation de TLS ClientHello)
|
|
-- — indicateur d'évasion de détection par les outils de bot.
|
|
--
|
|
-- Colonnes :
|
|
-- src_ip, host, distinct_ja4, distinct_ja4_count, ja4_list,
|
|
-- total_hits, window_start, first_seen, last_seen
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE OR REPLACE VIEW ja4_processing.view_host_ip_ja4_rotation AS
|
|
SELECT
|
|
src_ip,
|
|
argMax(host, ja4_hits) AS host,
|
|
count() AS distinct_ja4,
|
|
count() AS distinct_ja4_count,
|
|
groupArray(ja4) AS ja4_list,
|
|
sum(ja4_hits) AS total_hits,
|
|
max(w_max) AS window_start,
|
|
min(w_min) AS first_seen,
|
|
max(w_max) AS last_seen
|
|
FROM (
|
|
SELECT
|
|
src_ip, host, ja4,
|
|
sum(hits) AS ja4_hits,
|
|
min(window_start) AS w_min,
|
|
max(window_start) AS w_max
|
|
FROM ja4_processing.agg_host_ip_ja4_1h
|
|
WHERE window_start >= now() - INTERVAL 24 HOUR
|
|
AND ja4 != ''
|
|
GROUP BY src_ip, host, ja4
|
|
) sub
|
|
GROUP BY src_ip
|
|
HAVING distinct_ja4 >= 2
|
|
ORDER BY distinct_ja4 DESC;
|
|
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- view_dashboard_user_agents
|
|
--
|
|
-- Agrégation des User-Agents par IP, JA4 et heure.
|
|
-- Utilisée par variability.py et attributes.py avec ARRAY JOIN user_agents.
|
|
--
|
|
-- Colonnes :
|
|
-- src_ip — IPv4 (sans préfixe ::ffff:)
|
|
-- ja4 — Fingerprint TLS
|
|
-- hour — Début d'heure (toStartOfHour)
|
|
-- log_date — Date (pour le TTL de la vue)
|
|
-- user_agents — Array(String) des UAs distincts sur cette heure
|
|
-- requests — Nombre total de requêtes sur cette heure
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE OR REPLACE VIEW ja4_processing.view_dashboard_user_agents AS
|
|
SELECT
|
|
-- Normalisation IPv4 : supprime le préfixe ::ffff: des IPs mappées IPv6→IPv4
|
|
toIPv4OrZero(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS src_ip,
|
|
ja4,
|
|
toStartOfHour(time) AS hour,
|
|
log_date,
|
|
-- Collecte les UAs distincts (max 100 pour éviter les tableaux géants)
|
|
groupUniqArray(100)(header_user_agent) AS user_agents,
|
|
count() AS requests
|
|
FROM ja4_logs.http_logs
|
|
WHERE time >= now() - INTERVAL 7 DAY
|
|
AND header_user_agent != ''
|
|
GROUP BY src_ip, ja4, toStartOfHour(time), log_date;
|
|
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- view_dashboard_entities
|
|
--
|
|
-- Vue pivot permettant de naviguer entre entités (IP ↔ JA4 ↔ pays ↔ ASN ↔ hôte).
|
|
-- Pour chaque entité (entity_type + entity_value), expose les données associées :
|
|
-- ips, ja4s, hosts (via GROUP BY en Python), asns, countries, user_agents,
|
|
-- client_headers (array pour clustering.py).
|
|
--
|
|
-- Structure UNION ALL : une branche par type d'entité.
|
|
-- ClickHouse optimise les requêtes WHERE entity_type = 'ip' en éliminant
|
|
-- les autres branches (condition constante sur colonne calculée).
|
|
--
|
|
-- Colonnes :
|
|
-- entity_type — 'ip' | 'ja4' | 'country' | 'asn' | 'host'
|
|
-- entity_value — Valeur de l'entité (ex: '1.2.3.4', 't13d...', 'FR', ...)
|
|
-- src_ip — IPv6 (format natif ClickHouse)
|
|
-- ja4 — Fingerprint JA4
|
|
-- host — Virtual host HTTP
|
|
-- log_date — Date de la requête
|
|
-- client_headers — Array des noms de headers (splitByChar depuis http_logs)
|
|
-- asns — Array(String) avec l'ASN source (pour groupUniqArrayArray)
|
|
-- countries — Array(String) avec le code pays source
|
|
-- user_agents — Array(String) avec le User-Agent
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE OR REPLACE VIEW ja4_processing.view_dashboard_entities AS
|
|
|
|
-- Perspective IP : entity_value = adresse IPv4 de la source
|
|
SELECT
|
|
'ip' AS entity_type,
|
|
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
log_date,
|
|
splitByChar(',', client_headers) AS client_headers,
|
|
[toString(src_asn)] AS asns,
|
|
[src_country_code] AS countries,
|
|
[header_user_agent] AS user_agents
|
|
FROM ja4_logs.http_logs
|
|
WHERE time >= now() - INTERVAL 7 DAY
|
|
|
|
UNION ALL
|
|
|
|
-- Perspective JA4 : entity_value = fingerprint TLS JA4
|
|
SELECT
|
|
'ja4' AS entity_type,
|
|
ja4 AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
log_date,
|
|
splitByChar(',', client_headers) AS client_headers,
|
|
[toString(src_asn)] AS asns,
|
|
[src_country_code] AS countries,
|
|
[header_user_agent] AS user_agents
|
|
FROM ja4_logs.http_logs
|
|
WHERE time >= now() - INTERVAL 7 DAY
|
|
AND ja4 != ''
|
|
|
|
UNION ALL
|
|
|
|
-- Perspective pays : entity_value = code pays ISO-3166 (ex: 'FR', 'US')
|
|
SELECT
|
|
'country' AS entity_type,
|
|
src_country_code AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
log_date,
|
|
splitByChar(',', client_headers) AS client_headers,
|
|
[toString(src_asn)] AS asns,
|
|
[src_country_code] AS countries,
|
|
[header_user_agent] AS user_agents
|
|
FROM ja4_logs.http_logs
|
|
WHERE time >= now() - INTERVAL 7 DAY
|
|
AND src_country_code != ''
|
|
|
|
UNION ALL
|
|
|
|
-- Perspective ASN : entity_value = numéro ASN (ex: '15169' pour Google)
|
|
SELECT
|
|
'asn' AS entity_type,
|
|
toString(src_asn) AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
log_date,
|
|
splitByChar(',', client_headers) AS client_headers,
|
|
[toString(src_asn)] AS asns,
|
|
[src_country_code] AS countries,
|
|
[header_user_agent] AS user_agents
|
|
FROM ja4_logs.http_logs
|
|
WHERE time >= now() - INTERVAL 7 DAY
|
|
AND src_asn > 0
|
|
|
|
UNION ALL
|
|
|
|
-- Perspective hôte : entity_value = virtual host HTTP (ex: 'api.example.com')
|
|
SELECT
|
|
'host' AS entity_type,
|
|
host AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
log_date,
|
|
splitByChar(',', client_headers) AS client_headers,
|
|
[toString(src_asn)] AS asns,
|
|
[src_country_code] AS countries,
|
|
[header_user_agent] AS user_agents
|
|
FROM ja4_logs.http_logs
|
|
WHERE time >= now() - INTERVAL 7 DAY
|
|
AND host != '';
|