- Utilisation de 2 requêtes séparées + fusion en Python - 1ère requête: ml_detected_anomalies pour les détections récentes - 2ème requête: view_dashboard_entities avec IN clause pour les user-agents - La clause IN permet d'utiliser l'index ClickHouse (splitByChar ne l'utilise pas) - PREWHERE optimise les performances de requête Problème résolu: - unique_ua était toujours à 0 car la jointure LEFT JOIN ne fonctionnait pas - La solution avec IN clause fonctionne car elle utilise l'index sur entity_value Testé avec 141.98.11.0/24: - 5 IPs, 8 détections, 65 user-agents uniques - 141.98.11.209: 68 user-agents différents Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
378 lines
11 KiB
SQL
378 lines
11 KiB
SQL
-- =============================================================================
|
|
-- Vue materialisée unique pour Dashboard Entities - Bot Detector
|
|
-- =============================================================================
|
|
--
|
|
-- Entités gérées :
|
|
-- - ip : Adresses IP sources
|
|
-- - ja4 : Fingerprints JA4
|
|
-- - user_agent : User-Agents HTTP
|
|
-- - client_header : Client Headers
|
|
-- - host : Hosts HTTP
|
|
-- - path : Paths URL
|
|
-- - query_param : Noms de paramètres de query (concaténés: foo,baz)
|
|
--
|
|
-- Instructions d'installation :
|
|
-- -----------------------------
|
|
-- 1. Se connecter à ClickHouse en CLI :
|
|
-- clickhouse-client --host test-sdv-anubis.sdv.fr --port 8123 \
|
|
-- --user admin --password SuperPassword123!
|
|
--
|
|
-- 2. Copier-coller CHAQUE BLOC séparément (un par un)
|
|
--
|
|
-- 3. Vérifier que la vue est créée :
|
|
-- SELECT count() FROM mabase_prod.view_dashboard_entities;
|
|
--
|
|
-- =============================================================================
|
|
|
|
USE mabase_prod;
|
|
|
|
-- =============================================================================
|
|
-- BLOC 0/3 : Nettoyer l'existant (IMPORTANT)
|
|
-- =============================================================================
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.view_dashboard_entities_mv;
|
|
DROP TABLE IF EXISTS mabase_prod.view_dashboard_entities;
|
|
|
|
-- =============================================================================
|
|
-- BLOC 1/3 : Créer la table
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS mabase_prod.view_dashboard_entities
|
|
(
|
|
-- Identification de l'entité
|
|
entity_type LowCardinality(String),
|
|
entity_value String,
|
|
|
|
-- Contexte
|
|
src_ip IPv4,
|
|
ja4 String,
|
|
host String,
|
|
|
|
-- Temps (granularité journalière)
|
|
log_date Date,
|
|
|
|
-- Métriques
|
|
requests UInt64,
|
|
unique_ips UInt64,
|
|
|
|
-- Attributs associés (pour investigation croisée)
|
|
user_agents Array(String),
|
|
client_headers Array(String),
|
|
paths Array(String),
|
|
query_params Array(String),
|
|
asns Array(String),
|
|
countries Array(String)
|
|
)
|
|
ENGINE = MergeTree()
|
|
PARTITION BY toYYYYMM(log_date)
|
|
ORDER BY (entity_type, entity_value, log_date)
|
|
TTL log_date + INTERVAL 90 DAY -- Garder 90 jours (au lieu de 30)
|
|
SETTINGS index_granularity = 8192;
|
|
|
|
-- =============================================================================
|
|
-- BLOC 2/3 : Créer la vue materialisée
|
|
-- =============================================================================
|
|
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS mabase_prod.view_dashboard_entities_mv
|
|
TO mabase_prod.view_dashboard_entities
|
|
AS
|
|
-- 1. Entité : IP
|
|
SELECT
|
|
'ip' AS entity_type,
|
|
toString(src_ip) AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(header_user_agent) AS user_agents,
|
|
groupArrayDistinct(client_headers) AS client_headers,
|
|
groupArrayDistinct(path) AS paths,
|
|
groupArrayDistinct(
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
)
|
|
) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM mabase_prod.http_logs
|
|
WHERE src_ip IS NOT NULL
|
|
GROUP BY src_ip, ja4, host, log_date
|
|
|
|
UNION ALL
|
|
|
|
-- 2. Entité : JA4
|
|
SELECT
|
|
'ja4' AS entity_type,
|
|
ja4 AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(header_user_agent) AS user_agents,
|
|
groupArrayDistinct(client_headers) AS client_headers,
|
|
groupArrayDistinct(path) AS paths,
|
|
groupArrayDistinct(
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
)
|
|
) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM mabase_prod.http_logs
|
|
WHERE ja4 != '' AND ja4 IS NOT NULL
|
|
GROUP BY src_ip, ja4, host, log_date
|
|
|
|
UNION ALL
|
|
|
|
-- 3. Entité : User-Agent
|
|
SELECT
|
|
'user_agent' AS entity_type,
|
|
ua AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(ua) AS user_agents,
|
|
groupArrayDistinct(client_headers) AS client_headers,
|
|
groupArrayDistinct(path) AS paths,
|
|
groupArrayDistinct(
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
)
|
|
) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM
|
|
(
|
|
SELECT
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
time,
|
|
src_asn,
|
|
src_country_code,
|
|
header_user_agent AS ua,
|
|
client_headers,
|
|
path,
|
|
query
|
|
FROM mabase_prod.http_logs
|
|
)
|
|
WHERE ua != '' AND ua IS NOT NULL
|
|
GROUP BY src_ip, ja4, host, log_date, ua
|
|
|
|
UNION ALL
|
|
|
|
-- 4. Entité : Client Header
|
|
SELECT
|
|
'client_header' AS entity_type,
|
|
ch AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(header_user_agent) AS user_agents,
|
|
groupArrayDistinct(ch) AS client_headers,
|
|
groupArrayDistinct(path) AS paths,
|
|
groupArrayDistinct(
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
)
|
|
) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM
|
|
(
|
|
SELECT
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
time,
|
|
src_asn,
|
|
src_country_code,
|
|
header_user_agent,
|
|
client_headers AS ch,
|
|
path,
|
|
query
|
|
FROM mabase_prod.http_logs
|
|
)
|
|
WHERE ch != '' AND ch IS NOT NULL
|
|
GROUP BY src_ip, ja4, host, log_date, ch
|
|
|
|
UNION ALL
|
|
|
|
-- 5. Entité : Host
|
|
SELECT
|
|
'host' AS entity_type,
|
|
host AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(header_user_agent) AS user_agents,
|
|
groupArrayDistinct(client_headers) AS client_headers,
|
|
groupArrayDistinct(path) AS paths,
|
|
groupArrayDistinct(
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
)
|
|
) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM mabase_prod.http_logs
|
|
WHERE host != '' AND host IS NOT NULL
|
|
GROUP BY src_ip, ja4, host, log_date
|
|
|
|
UNION ALL
|
|
|
|
-- 6. Entité : Path
|
|
SELECT
|
|
'path' AS entity_type,
|
|
p AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(header_user_agent) AS user_agents,
|
|
groupArrayDistinct(client_headers) AS client_headers,
|
|
groupArrayDistinct(p) AS paths,
|
|
groupArrayDistinct(
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
)
|
|
) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM
|
|
(
|
|
SELECT
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
time,
|
|
src_asn,
|
|
src_country_code,
|
|
header_user_agent,
|
|
client_headers,
|
|
path AS p,
|
|
query
|
|
FROM mabase_prod.http_logs
|
|
)
|
|
WHERE p != '' AND p IS NOT NULL
|
|
GROUP BY src_ip, ja4, host, log_date, p
|
|
|
|
UNION ALL
|
|
|
|
-- 7. Entité : Query Param (noms concaténés)
|
|
SELECT
|
|
'query_param' AS entity_type,
|
|
query_params_string AS entity_value,
|
|
src_ip,
|
|
ja4,
|
|
host,
|
|
toDate(time) AS log_date,
|
|
count() AS requests,
|
|
uniq(src_ip) AS unique_ips,
|
|
groupArrayDistinct(header_user_agent) AS user_agents,
|
|
groupArrayDistinct(client_headers) AS client_headers,
|
|
groupArrayDistinct(path) AS paths,
|
|
groupArrayDistinct(query_params_string) AS query_params,
|
|
groupArrayDistinct(toString(src_asn)) AS asns,
|
|
groupArrayDistinct(src_country_code) AS countries
|
|
FROM (
|
|
SELECT
|
|
src_ip, ja4, host, time, src_asn, src_country_code,
|
|
header_user_agent, client_headers, path,
|
|
arrayStringConcat(
|
|
arrayMap(
|
|
x -> splitByChar('=', x)[1],
|
|
splitByChar('&', replaceOne(query, '?', ''))
|
|
),
|
|
','
|
|
) AS query_params_string
|
|
FROM mabase_prod.http_logs
|
|
WHERE query != '' AND query IS NOT NULL
|
|
)
|
|
WHERE query_params_string != ''
|
|
GROUP BY src_ip, ja4, host, log_date, query_params_string;
|
|
|
|
-- =============================================================================
|
|
-- BLOC 3/3 : Créer les index (optionnel - améliore les performances)
|
|
-- =============================================================================
|
|
|
|
ALTER TABLE mabase_prod.view_dashboard_entities
|
|
ADD INDEX IF NOT EXISTS idx_entities_type (entity_type) TYPE minmax GRANULARITY 1;
|
|
|
|
ALTER TABLE mabase_prod.view_dashboard_entities
|
|
ADD INDEX IF NOT EXISTS idx_entities_value (entity_value) TYPE minmax GRANULARITY 1;
|
|
|
|
ALTER TABLE mabase_prod.view_dashboard_entities
|
|
ADD INDEX IF NOT EXISTS idx_entities_ip (src_ip) TYPE minmax GRANULARITY 1;
|
|
|
|
-- =============================================================================
|
|
-- FIN
|
|
-- =============================================================================
|
|
--
|
|
-- Pour vérifier que la vue fonctionne :
|
|
-- -------------------------------------
|
|
-- SELECT entity_type, count() FROM mabase_prod.view_dashboard_entities GROUP BY entity_type;
|
|
--
|
|
-- Pour rafraîchir manuellement (si nécessaire) :
|
|
-- ----------------------------------------------
|
|
-- OPTIMIZE TABLE mabase_prod.view_dashboard_entities FINAL;
|
|
--
|
|
-- Exemples de requêtes :
|
|
-- ----------------------
|
|
-- -- Stats pour une IP
|
|
-- SELECT * FROM mabase_prod.view_dashboard_entities
|
|
-- WHERE entity_type = 'ip' AND entity_value = '116.179.33.143';
|
|
--
|
|
-- -- Stats pour un JA4
|
|
-- SELECT * FROM mabase_prod.view_dashboard_entities
|
|
-- WHERE entity_type = 'ja4' AND entity_value = 't13d190900_9dc949149365_97f8aa674fd9';
|
|
--
|
|
-- -- Top 10 des user-agents
|
|
-- SELECT entity_value, sum(requests) as total
|
|
-- FROM mabase_prod.view_dashboard_entities
|
|
-- WHERE entity_type = 'user_agent'
|
|
-- GROUP BY entity_value
|
|
-- ORDER BY total DESC
|
|
-- LIMIT 10;
|
|
--
|
|
-- =============================================================================
|