Files
dashboard/deploy_dashboard_entities_view.sql
SOC Analyst ee2b24b277 fix: Subnet investigation - Récupération des user-agents depuis view_dashboard_entities
- Utilisation de 2 requêtes séparées + fusion en Python
- 1ère requête: ml_detected_anomalies pour les détections récentes
- 2ème requête: view_dashboard_entities avec IN clause pour les user-agents
- La clause IN permet d'utiliser l'index ClickHouse (splitByChar ne l'utilise pas)
- PREWHERE optimise les performances de requête

Problème résolu:
- unique_ua était toujours à 0 car la jointure LEFT JOIN ne fonctionnait pas
- La solution avec IN clause fonctionne car elle utilise l'index sur entity_value

Testé avec 141.98.11.0/24:
- 5 IPs, 8 détections, 65 user-agents uniques
- 141.98.11.209: 68 user-agents différents

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-03-15 19:41:48 +01:00

378 lines
11 KiB
SQL

-- =============================================================================
-- Vue materialisée unique pour Dashboard Entities - Bot Detector
-- =============================================================================
--
-- Entités gérées :
-- - ip : Adresses IP sources
-- - ja4 : Fingerprints JA4
-- - user_agent : User-Agents HTTP
-- - client_header : Client Headers
-- - host : Hosts HTTP
-- - path : Paths URL
-- - query_param : Noms de paramètres de query (concaténés: foo,baz)
--
-- Instructions d'installation :
-- -----------------------------
-- 1. Se connecter à ClickHouse en CLI :
-- clickhouse-client --host test-sdv-anubis.sdv.fr --port 8123 \
-- --user admin --password SuperPassword123!
--
-- 2. Copier-coller CHAQUE BLOC séparément (un par un)
--
-- 3. Vérifier que la vue est créée :
-- SELECT count() FROM mabase_prod.view_dashboard_entities;
--
-- =============================================================================
USE mabase_prod;
-- =============================================================================
-- BLOC 0/3 : Nettoyer l'existant (IMPORTANT)
-- =============================================================================
DROP TABLE IF EXISTS mabase_prod.view_dashboard_entities_mv;
DROP TABLE IF EXISTS mabase_prod.view_dashboard_entities;
-- =============================================================================
-- BLOC 1/3 : Créer la table
-- =============================================================================
CREATE TABLE IF NOT EXISTS mabase_prod.view_dashboard_entities
(
-- Identification de l'entité
entity_type LowCardinality(String),
entity_value String,
-- Contexte
src_ip IPv4,
ja4 String,
host String,
-- Temps (granularité journalière)
log_date Date,
-- Métriques
requests UInt64,
unique_ips UInt64,
-- Attributs associés (pour investigation croisée)
user_agents Array(String),
client_headers Array(String),
paths Array(String),
query_params Array(String),
asns Array(String),
countries Array(String)
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(log_date)
ORDER BY (entity_type, entity_value, log_date)
TTL log_date + INTERVAL 90 DAY -- Garder 90 jours (au lieu de 30)
SETTINGS index_granularity = 8192;
-- =============================================================================
-- BLOC 2/3 : Créer la vue materialisée
-- =============================================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mabase_prod.view_dashboard_entities_mv
TO mabase_prod.view_dashboard_entities
AS
-- 1. Entité : IP
SELECT
'ip' AS entity_type,
toString(src_ip) AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(header_user_agent) AS user_agents,
groupArrayDistinct(client_headers) AS client_headers,
groupArrayDistinct(path) AS paths,
groupArrayDistinct(
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
)
) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM mabase_prod.http_logs
WHERE src_ip IS NOT NULL
GROUP BY src_ip, ja4, host, log_date
UNION ALL
-- 2. Entité : JA4
SELECT
'ja4' AS entity_type,
ja4 AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(header_user_agent) AS user_agents,
groupArrayDistinct(client_headers) AS client_headers,
groupArrayDistinct(path) AS paths,
groupArrayDistinct(
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
)
) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM mabase_prod.http_logs
WHERE ja4 != '' AND ja4 IS NOT NULL
GROUP BY src_ip, ja4, host, log_date
UNION ALL
-- 3. Entité : User-Agent
SELECT
'user_agent' AS entity_type,
ua AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(ua) AS user_agents,
groupArrayDistinct(client_headers) AS client_headers,
groupArrayDistinct(path) AS paths,
groupArrayDistinct(
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
)
) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM
(
SELECT
src_ip,
ja4,
host,
time,
src_asn,
src_country_code,
header_user_agent AS ua,
client_headers,
path,
query
FROM mabase_prod.http_logs
)
WHERE ua != '' AND ua IS NOT NULL
GROUP BY src_ip, ja4, host, log_date, ua
UNION ALL
-- 4. Entité : Client Header
SELECT
'client_header' AS entity_type,
ch AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(header_user_agent) AS user_agents,
groupArrayDistinct(ch) AS client_headers,
groupArrayDistinct(path) AS paths,
groupArrayDistinct(
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
)
) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM
(
SELECT
src_ip,
ja4,
host,
time,
src_asn,
src_country_code,
header_user_agent,
client_headers AS ch,
path,
query
FROM mabase_prod.http_logs
)
WHERE ch != '' AND ch IS NOT NULL
GROUP BY src_ip, ja4, host, log_date, ch
UNION ALL
-- 5. Entité : Host
SELECT
'host' AS entity_type,
host AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(header_user_agent) AS user_agents,
groupArrayDistinct(client_headers) AS client_headers,
groupArrayDistinct(path) AS paths,
groupArrayDistinct(
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
)
) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM mabase_prod.http_logs
WHERE host != '' AND host IS NOT NULL
GROUP BY src_ip, ja4, host, log_date
UNION ALL
-- 6. Entité : Path
SELECT
'path' AS entity_type,
p AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(header_user_agent) AS user_agents,
groupArrayDistinct(client_headers) AS client_headers,
groupArrayDistinct(p) AS paths,
groupArrayDistinct(
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
)
) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM
(
SELECT
src_ip,
ja4,
host,
time,
src_asn,
src_country_code,
header_user_agent,
client_headers,
path AS p,
query
FROM mabase_prod.http_logs
)
WHERE p != '' AND p IS NOT NULL
GROUP BY src_ip, ja4, host, log_date, p
UNION ALL
-- 7. Entité : Query Param (noms concaténés)
SELECT
'query_param' AS entity_type,
query_params_string AS entity_value,
src_ip,
ja4,
host,
toDate(time) AS log_date,
count() AS requests,
uniq(src_ip) AS unique_ips,
groupArrayDistinct(header_user_agent) AS user_agents,
groupArrayDistinct(client_headers) AS client_headers,
groupArrayDistinct(path) AS paths,
groupArrayDistinct(query_params_string) AS query_params,
groupArrayDistinct(toString(src_asn)) AS asns,
groupArrayDistinct(src_country_code) AS countries
FROM (
SELECT
src_ip, ja4, host, time, src_asn, src_country_code,
header_user_agent, client_headers, path,
arrayStringConcat(
arrayMap(
x -> splitByChar('=', x)[1],
splitByChar('&', replaceOne(query, '?', ''))
),
','
) AS query_params_string
FROM mabase_prod.http_logs
WHERE query != '' AND query IS NOT NULL
)
WHERE query_params_string != ''
GROUP BY src_ip, ja4, host, log_date, query_params_string;
-- =============================================================================
-- BLOC 3/3 : Créer les index (optionnel - améliore les performances)
-- =============================================================================
ALTER TABLE mabase_prod.view_dashboard_entities
ADD INDEX IF NOT EXISTS idx_entities_type (entity_type) TYPE minmax GRANULARITY 1;
ALTER TABLE mabase_prod.view_dashboard_entities
ADD INDEX IF NOT EXISTS idx_entities_value (entity_value) TYPE minmax GRANULARITY 1;
ALTER TABLE mabase_prod.view_dashboard_entities
ADD INDEX IF NOT EXISTS idx_entities_ip (src_ip) TYPE minmax GRANULARITY 1;
-- =============================================================================
-- FIN
-- =============================================================================
--
-- Pour vérifier que la vue fonctionne :
-- -------------------------------------
-- SELECT entity_type, count() FROM mabase_prod.view_dashboard_entities GROUP BY entity_type;
--
-- Pour rafraîchir manuellement (si nécessaire) :
-- ----------------------------------------------
-- OPTIMIZE TABLE mabase_prod.view_dashboard_entities FINAL;
--
-- Exemples de requêtes :
-- ----------------------
-- -- Stats pour une IP
-- SELECT * FROM mabase_prod.view_dashboard_entities
-- WHERE entity_type = 'ip' AND entity_value = '116.179.33.143';
--
-- -- Stats pour un JA4
-- SELECT * FROM mabase_prod.view_dashboard_entities
-- WHERE entity_type = 'ja4' AND entity_value = 't13d190900_9dc949149365_97f8aa674fd9';
--
-- -- Top 10 des user-agents
-- SELECT entity_value, sum(requests) as total
-- FROM mabase_prod.view_dashboard_entities
-- WHERE entity_type = 'user_agent'
-- GROUP BY entity_value
-- ORDER BY total DESC
-- LIMIT 10;
--
-- =============================================================================