perf(dashboard): P2 — remplacer replaceRegexpAll dans les WHERE par IPv4MappedToIPv6

Problème : 8 clauses WHERE appliquaient une fonction sur la colonne src_ip :
  WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
→ ClickHouse ne peut pas utiliser l'index de tri ou les skipping indexes
  quand une fonction est appliquée à la colonne filtrée.

Fix : transformer l'INPUT (le paramètre) plutôt que la colonne :
  WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
→ src_ip reste intact → ClickHouse utilise les indexes (P1) et la
  projection proj_by_ip (P1) pour ces requêtes.

Fichiers modifiés :
  investigation_summary.py — 6 WHERE (ml_detected_anomalies, agg_host_ip_ja4_1h,
                              view_form_bruteforce_detected, view_host_ip_ja4_rotation,
                              view_ip_recurrence)
  ml_features.py           — 1 WHERE (view_ai_features_1h)
  rotation.py              — 1 WHERE (agg_host_ip_ja4_1h)

Note : les 27 autres occurrences de replaceRegexpAll dans les SELECT sont des
transformations d'affichage (IPv6→IPv4 pour l'UI) et ne bloquent pas les indexes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 22:31:57 +02:00
parent 14323f7b05
commit 2bfb4b7282
3 changed files with 8 additions and 8 deletions

View File

@ -35,7 +35,7 @@ async def get_ip_full_summary(ip: str):
uniq(host) AS distinct_hosts,
uniq(ja4) AS distinct_ja4
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
"""
ml_res = db.query(ml_sql, {"ip": clean_ip})
ml_row = ml_res.result_rows[0] if ml_res.result_rows else None
@ -56,7 +56,7 @@ async def get_ip_full_summary(ip: str):
sum(query_params_count) AS total_params,
groupArray(3)(host) AS top_hosts
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
"""
bf_res = db.query(bf_sql, {"ip": clean_ip})
bf_row = bf_res.result_rows[0] if bf_res.result_rows else None
@ -77,7 +77,7 @@ async def get_ip_full_summary(ip: str):
any(tcp_mss_raw) AS mss,
any(first_ua) AS ua
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
AND window_start >= now() - INTERVAL 24 HOUR
AND tcp_ttl_raw > 0
LIMIT 1
@ -113,7 +113,7 @@ async def get_ip_full_summary(ip: str):
rot_sql = f"""
SELECT distinct_ja4_count, total_hits
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
LIMIT 1
"""
rot_res = db.query(rot_sql, {"ip": clean_ip})
@ -127,7 +127,7 @@ async def get_ip_full_summary(ip: str):
pers_sql = f"""
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
LIMIT 1
"""
pers_res = db.query(pers_sql, {"ip": clean_ip})
@ -150,7 +150,7 @@ async def get_ip_full_summary(ip: str):
sum(hits) AS hits,
groupUniqArray(3)(ja4) AS ja4s
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
AND window_start >= now() - INTERVAL 24 HOUR
GROUP BY hour
ORDER BY hour ASC

View File

@ -105,7 +105,7 @@ async def get_ip_radar(ip: str):
avg(path_diversity_ratio) AS path_diversity_ratio,
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
AND window_start >= now() - INTERVAL 24 HOUR
"""
result = db.query(sql, {"ip": ip})

View File

@ -83,7 +83,7 @@ async def get_ip_ja4_history(ip: str):
min(window_start) AS first_seen,
max(window_start) AS last_seen
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
GROUP BY ja4
ORDER BY hits DESC
"""