From 2bfb4b72826177d49fc5ddb61ff21bd80345ace0 Mon Sep 17 00:00:00 2001 From: toto Date: Tue, 7 Apr 2026 22:31:57 +0200 Subject: [PATCH] =?UTF-8?q?perf(dashboard):=20P2=20=E2=80=94=20remplacer?= =?UTF-8?q?=20replaceRegexpAll=20dans=20les=20WHERE=20par=20IPv4MappedToIP?= =?UTF-8?q?v6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problème : 8 clauses WHERE appliquaient une fonction sur la colonne src_ip : WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s → ClickHouse ne peut pas utiliser l'index de tri ou les skipping indexes quand une fonction est appliquée à la colonne filtrée. Fix : transformer l'INPUT (le paramètre) plutôt que la colonne : WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) → src_ip reste intact → ClickHouse utilise les indexes (P1) et la projection proj_by_ip (P1) pour ces requêtes. Fichiers modifiés : investigation_summary.py — 6 WHERE (ml_detected_anomalies, agg_host_ip_ja4_1h, view_form_bruteforce_detected, view_host_ip_ja4_rotation, view_ip_recurrence) ml_features.py — 1 WHERE (view_ai_features_1h) rotation.py — 1 WHERE (agg_host_ip_ja4_1h) Note : les 27 autres occurrences de replaceRegexpAll dans les SELECT sont des transformations d'affichage (IPv6→IPv4 pour l'UI) et ne bloquent pas les indexes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../backend/routes/investigation_summary.py | 12 ++++++------ services/dashboard/backend/routes/ml_features.py | 2 +- services/dashboard/backend/routes/rotation.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/services/dashboard/backend/routes/investigation_summary.py b/services/dashboard/backend/routes/investigation_summary.py index 5ec14bd..a12c01d 100644 --- a/services/dashboard/backend/routes/investigation_summary.py +++ b/services/dashboard/backend/routes/investigation_summary.py @@ -35,7 +35,7 @@ async def get_ip_full_summary(ip: str): uniq(host) AS distinct_hosts, uniq(ja4) AS distinct_ja4 FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) """ ml_res = db.query(ml_sql, {"ip": clean_ip}) ml_row = ml_res.result_rows[0] if ml_res.result_rows else None @@ -56,7 +56,7 @@ async def get_ip_full_summary(ip: str): sum(query_params_count) AS total_params, groupArray(3)(host) AS top_hosts FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) """ bf_res = db.query(bf_sql, {"ip": clean_ip}) bf_row = bf_res.result_rows[0] if bf_res.result_rows else None @@ -77,7 +77,7 @@ async def get_ip_full_summary(ip: str): any(tcp_mss_raw) AS mss, any(first_ua) AS ua FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) AND window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 LIMIT 1 @@ -113,7 +113,7 @@ async def get_ip_full_summary(ip: str): rot_sql = f""" SELECT distinct_ja4_count, total_hits FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) LIMIT 1 """ rot_res = db.query(rot_sql, {"ip": clean_ip}) @@ -127,7 +127,7 @@ async def get_ip_full_summary(ip: str): pers_sql = f""" SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) LIMIT 1 """ pers_res = db.query(pers_sql, {"ip": clean_ip}) @@ -150,7 +150,7 @@ async def get_ip_full_summary(ip: str): sum(hits) AS hits, groupUniqArray(3)(ja4) AS ja4s FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) AND window_start >= now() - INTERVAL 24 HOUR GROUP BY hour ORDER BY hour ASC diff --git a/services/dashboard/backend/routes/ml_features.py b/services/dashboard/backend/routes/ml_features.py index 8504d06..d1d122b 100644 --- a/services/dashboard/backend/routes/ml_features.py +++ b/services/dashboard/backend/routes/ml_features.py @@ -105,7 +105,7 @@ async def get_ip_radar(ip: str): avg(path_diversity_ratio) AS path_diversity_ratio, avg(anomalous_payload_ratio) AS anomalous_payload_ratio FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) AND window_start >= now() - INTERVAL 24 HOUR """ result = db.query(sql, {"ip": ip}) diff --git a/services/dashboard/backend/routes/rotation.py b/services/dashboard/backend/routes/rotation.py index cc1539d..fd16052 100644 --- a/services/dashboard/backend/routes/rotation.py +++ b/services/dashboard/backend/routes/rotation.py @@ -83,7 +83,7 @@ async def get_ip_ja4_history(ip: str): min(window_start) AS first_seen, max(window_start) AS last_seen FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h - WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s + WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s)) GROUP BY ja4 ORDER BY hits DESC """