perf(clickhouse): P10 — créer les 4 vues métier manquantes + corriger préfixes DB
Bug de production : view_form_bruteforce_detected, view_host_ip_ja4_rotation,
view_dashboard_entities, view_dashboard_user_agents étaient référencées dans
13 endpoints du dashboard mais n'existaient nulle part dans le schéma.
Tous ces endpoints retournaient HTTP 500 en production.
shared/clickhouse/11_views.sql (nouveau) :
view_form_bruteforce_detected
Source : agg_host_ip_ja4_1h (24h)
Logique : GROUP BY (src_ip, host) HAVING count_post >= 10
Usage : bruteforce.py (3 endpoints), investigation_summary.py
view_host_ip_ja4_rotation
Source : agg_host_ip_ja4_1h (24h)
Logique : uniqExact(ja4) par src_ip, HAVING >= 2 (rotation de fingerprint)
Usage : rotation.py (3 endpoints), investigation_summary.py
view_dashboard_entities
Source : http_logs (7 jours), UNION ALL 5 branches (ip/ja4/country/asn/host)
Colonnes : entity_type, entity_value, src_ip, ja4, host, log_date,
client_headers Array(String), asns Array, countries Array,
user_agents Array
Usage : entities.py (5 endpoints), clustering.py
view_dashboard_user_agents
Source : http_logs (7 jours), GROUP BY (src_ip, ja4, hour)
Colonnes : src_ip, ja4, hour, log_date, user_agents Array(String), requests
Usage : variability.py (4 endpoints), fingerprints.py (5 endpoints)
attributes.py (2 endpoints)
deploy_schema.sh : ajout de 10_perf_indexes.sql et 11_views.sql dans la liste
routes/variability.py + fingerprints.py :
Correction de 9 requêtes utilisant view_dashboard_user_agents sans préfixe
de base de données → remplacé par {settings.CLICKHOUSE_DB_PROCESSING}.view_*
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -127,7 +127,7 @@ async def get_ja4_spoofing(
|
||||
SELECT ja4, groupArray(5)(ua) AS top_uas
|
||||
FROM (
|
||||
SELECT ja4, arrayJoin(user_agents) AS ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
@ -287,7 +287,7 @@ async def get_ja4_ua_matrix(
|
||||
ja4,
|
||||
ua,
|
||||
sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
@ -388,7 +388,7 @@ async def get_ua_analysis(
|
||||
SELECT
|
||||
ua,
|
||||
sum(requests) AS ip_count
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE hour >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ua != ''
|
||||
@ -407,7 +407,7 @@ async def get_ua_analysis(
|
||||
ua,
|
||||
uniq(ja4) AS unique_ja4s,
|
||||
groupUniqArray(3)(ja4) AS sample_ja4s
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ua IN ({ua_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
@ -557,7 +557,7 @@ async def get_ip_fingerprint_coherence(ip: str):
|
||||
# User-agents réels depuis view_dashboard_user_agents
|
||||
ua_query = """
|
||||
SELECT ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE toString(src_ip) = %(ip)s
|
||||
AND hour >= now() - INTERVAL 72 HOUR
|
||||
|
||||
@ -145,7 +145,7 @@ async def get_associated_attributes(
|
||||
ua_q = f"""
|
||||
SELECT ua AS value, sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {ua_where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR AND ua != ''
|
||||
@ -261,7 +261,7 @@ async def get_user_agents(
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
@ -274,7 +274,7 @@ async def get_user_agents(
|
||||
|
||||
count_query = f"""
|
||||
SELECT uniqExact(ua) AS total
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
@ -556,7 +556,7 @@ async def get_variability(attr_type: str, value: str):
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_user_agents
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {_ua_where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
|
||||
213
shared/clickhouse/11_views.sql
Normal file
213
shared/clickhouse/11_views.sql
Normal file
@ -0,0 +1,213 @@
|
||||
-- =============================================================================
|
||||
-- 11_views.sql — Vues métier du dashboard
|
||||
--
|
||||
-- Ce fichier crée les vues référencées par le dashboard mais absentes du schéma
|
||||
-- partagé. Ces vues agrègent les données de agg_host_ip_ja4_1h et http_logs
|
||||
-- pour fournir des perspectives métier aux endpoints FastAPI.
|
||||
--
|
||||
-- Vues créées :
|
||||
-- view_form_bruteforce_detected — IPs/hôtes avec fort volume de requêtes POST
|
||||
-- view_host_ip_ja4_rotation — IPs changeant de fingerprint JA4 (évasion)
|
||||
-- view_dashboard_entities — Pivot IP/JA4/pays/ASN/host pour investigation
|
||||
-- view_dashboard_user_agents — User-Agents agrégés par IP/JA4/heure
|
||||
-- view_dashboard_summary — Métriques globales 24h (si pas déjà créée)
|
||||
-- =============================================================================
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- view_form_bruteforce_detected
|
||||
--
|
||||
-- Détecte les IPs effectuant des attaques par force brute sur des formulaires :
|
||||
-- - Volume élevé de requêtes POST vers un hôte donné (≥ 10 POST/heure)
|
||||
-- - Fenêtre glissante 24h depuis agg_host_ip_ja4_1h
|
||||
--
|
||||
-- Colonnes utilisées par bruteforce.py et investigation_summary.py :
|
||||
-- src_ip, host, ja4, hits, query_params_count
|
||||
-- -----------------------------------------------------------------------------
|
||||
CREATE OR REPLACE VIEW ja4_processing.view_form_bruteforce_detected AS
|
||||
SELECT
|
||||
src_ip,
|
||||
host,
|
||||
-- JA4 le plus fréquent pour ce couple IP+hôte
|
||||
argMax(any(tcp_fp_raw), sum(hits)) AS ja4,
|
||||
-- Nombre total de requêtes (toutes méthodes)
|
||||
sum(hits) AS hits,
|
||||
-- Proxy pour les soumissions de formulaire : nombre de requêtes POST
|
||||
-- (count_post = SimpleAggregateFunction(sum) dans agg_host_ip_ja4_1h)
|
||||
sum(count_post) AS query_params_count
|
||||
FROM ja4_processing.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip, host
|
||||
-- Seuil : au moins 10 POST pour qualifier d'activité brute-force
|
||||
HAVING query_params_count >= 10;
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- view_host_ip_ja4_rotation
|
||||
--
|
||||
-- Détecte les IPs qui changent de fingerprint JA4 (rotation de TLS ClientHello)
|
||||
-- — indicateur d'évasion de détection par les outils de bot.
|
||||
--
|
||||
-- Colonnes utilisées par rotation.py et investigation_summary.py :
|
||||
-- src_ip, distinct_ja4_count, total_hits
|
||||
-- -----------------------------------------------------------------------------
|
||||
CREATE OR REPLACE VIEW ja4_processing.view_host_ip_ja4_rotation AS
|
||||
SELECT
|
||||
src_ip,
|
||||
-- Nombre de JA4 distincts émis par cette IP sur la fenêtre 24h
|
||||
uniqExact(ja4) AS distinct_ja4_count,
|
||||
sum(hits) AS total_hits,
|
||||
-- Fenêtre temporelle pour le contexte
|
||||
min(window_start) AS first_seen,
|
||||
max(window_start) AS last_seen
|
||||
FROM ja4_processing.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
AND ja4 != ''
|
||||
GROUP BY src_ip
|
||||
-- Rotation = au moins 2 JA4 distincts
|
||||
HAVING distinct_ja4_count >= 2
|
||||
ORDER BY distinct_ja4_count DESC;
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- view_dashboard_user_agents
|
||||
--
|
||||
-- Agrégation des User-Agents par IP, JA4 et heure.
|
||||
-- Utilisée par variability.py et attributes.py avec ARRAY JOIN user_agents.
|
||||
--
|
||||
-- Colonnes :
|
||||
-- src_ip — IPv4 (sans préfixe ::ffff:)
|
||||
-- ja4 — Fingerprint TLS
|
||||
-- hour — Début d'heure (toStartOfHour)
|
||||
-- log_date — Date (pour le TTL de la vue)
|
||||
-- user_agents — Array(String) des UAs distincts sur cette heure
|
||||
-- requests — Nombre total de requêtes sur cette heure
|
||||
-- -----------------------------------------------------------------------------
|
||||
CREATE OR REPLACE VIEW ja4_processing.view_dashboard_user_agents AS
|
||||
SELECT
|
||||
-- Normalisation IPv4 : supprime le préfixe ::ffff: des IPs mappées IPv6→IPv4
|
||||
toIPv4OrZero(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS src_ip,
|
||||
ja4,
|
||||
toStartOfHour(time) AS hour,
|
||||
log_date,
|
||||
-- Collecte les UAs distincts (max 100 pour éviter les tableaux géants)
|
||||
groupUniqArray(100)(header_user_agent) AS user_agents,
|
||||
count() AS requests
|
||||
FROM ja4_logs.http_logs
|
||||
WHERE time >= now() - INTERVAL 7 DAY
|
||||
AND header_user_agent != ''
|
||||
GROUP BY src_ip, ja4, toStartOfHour(time), log_date;
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- view_dashboard_entities
|
||||
--
|
||||
-- Vue pivot permettant de naviguer entre entités (IP ↔ JA4 ↔ pays ↔ ASN ↔ hôte).
|
||||
-- Pour chaque entité (entity_type + entity_value), expose les données associées :
|
||||
-- ips, ja4s, hosts (via GROUP BY en Python), asns, countries, user_agents,
|
||||
-- client_headers (array pour clustering.py).
|
||||
--
|
||||
-- Structure UNION ALL : une branche par type d'entité.
|
||||
-- ClickHouse optimise les requêtes WHERE entity_type = 'ip' en éliminant
|
||||
-- les autres branches (condition constante sur colonne calculée).
|
||||
--
|
||||
-- Colonnes :
|
||||
-- entity_type — 'ip' | 'ja4' | 'country' | 'asn' | 'host'
|
||||
-- entity_value — Valeur de l'entité (ex: '1.2.3.4', 't13d...', 'FR', ...)
|
||||
-- src_ip — IPv6 (format natif ClickHouse)
|
||||
-- ja4 — Fingerprint JA4
|
||||
-- host — Virtual host HTTP
|
||||
-- log_date — Date de la requête
|
||||
-- client_headers — Array des noms de headers (splitByChar depuis http_logs)
|
||||
-- asns — Array(String) avec l'ASN source (pour groupUniqArrayArray)
|
||||
-- countries — Array(String) avec le code pays source
|
||||
-- user_agents — Array(String) avec le User-Agent
|
||||
-- -----------------------------------------------------------------------------
|
||||
CREATE OR REPLACE VIEW ja4_processing.view_dashboard_entities AS
|
||||
|
||||
-- Perspective IP : entity_value = adresse IPv4 de la source
|
||||
SELECT
|
||||
'ip' AS entity_type,
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS entity_value,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
log_date,
|
||||
splitByChar(',', client_headers) AS client_headers,
|
||||
[toString(src_asn)] AS asns,
|
||||
[src_country_code] AS countries,
|
||||
[header_user_agent] AS user_agents
|
||||
FROM ja4_logs.http_logs
|
||||
WHERE time >= now() - INTERVAL 7 DAY
|
||||
|
||||
UNION ALL
|
||||
|
||||
-- Perspective JA4 : entity_value = fingerprint TLS JA4
|
||||
SELECT
|
||||
'ja4' AS entity_type,
|
||||
ja4 AS entity_value,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
log_date,
|
||||
splitByChar(',', client_headers) AS client_headers,
|
||||
[toString(src_asn)] AS asns,
|
||||
[src_country_code] AS countries,
|
||||
[header_user_agent] AS user_agents
|
||||
FROM ja4_logs.http_logs
|
||||
WHERE time >= now() - INTERVAL 7 DAY
|
||||
AND ja4 != ''
|
||||
|
||||
UNION ALL
|
||||
|
||||
-- Perspective pays : entity_value = code pays ISO-3166 (ex: 'FR', 'US')
|
||||
SELECT
|
||||
'country' AS entity_type,
|
||||
src_country_code AS entity_value,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
log_date,
|
||||
splitByChar(',', client_headers) AS client_headers,
|
||||
[toString(src_asn)] AS asns,
|
||||
[src_country_code] AS countries,
|
||||
[header_user_agent] AS user_agents
|
||||
FROM ja4_logs.http_logs
|
||||
WHERE time >= now() - INTERVAL 7 DAY
|
||||
AND src_country_code != ''
|
||||
|
||||
UNION ALL
|
||||
|
||||
-- Perspective ASN : entity_value = numéro ASN (ex: '15169' pour Google)
|
||||
SELECT
|
||||
'asn' AS entity_type,
|
||||
toString(src_asn) AS entity_value,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
log_date,
|
||||
splitByChar(',', client_headers) AS client_headers,
|
||||
[toString(src_asn)] AS asns,
|
||||
[src_country_code] AS countries,
|
||||
[header_user_agent] AS user_agents
|
||||
FROM ja4_logs.http_logs
|
||||
WHERE time >= now() - INTERVAL 7 DAY
|
||||
AND src_asn > 0
|
||||
|
||||
UNION ALL
|
||||
|
||||
-- Perspective hôte : entity_value = virtual host HTTP (ex: 'api.example.com')
|
||||
SELECT
|
||||
'host' AS entity_type,
|
||||
host AS entity_value,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
log_date,
|
||||
splitByChar(',', client_headers) AS client_headers,
|
||||
[toString(src_asn)] AS asns,
|
||||
[src_country_code] AS countries,
|
||||
[header_user_agent] AS user_agents
|
||||
FROM ja4_logs.http_logs
|
||||
WHERE time >= now() - INTERVAL 7 DAY
|
||||
AND host != '';
|
||||
@ -45,6 +45,8 @@ SQL_FILES=(
|
||||
07_ai_features_view.sql
|
||||
08_users.sql
|
||||
09_audit_table.sql
|
||||
10_perf_indexes.sql
|
||||
11_views.sql
|
||||
)
|
||||
|
||||
for f in "${SQL_FILES[@]}"; do
|
||||
|
||||
Reference in New Issue
Block a user