perf(dashboard): P2 — remplacer replaceRegexpAll dans les WHERE par IPv4MappedToIPv6
Problème : 8 clauses WHERE appliquaient une fonction sur la colonne src_ip :
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
→ ClickHouse ne peut pas utiliser l'index de tri ou les skipping indexes
quand une fonction est appliquée à la colonne filtrée.
Fix : transformer l'INPUT (le paramètre) plutôt que la colonne :
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
→ src_ip reste intact → ClickHouse utilise les indexes (P1) et la
projection proj_by_ip (P1) pour ces requêtes.
Fichiers modifiés :
investigation_summary.py — 6 WHERE (ml_detected_anomalies, agg_host_ip_ja4_1h,
view_form_bruteforce_detected, view_host_ip_ja4_rotation,
view_ip_recurrence)
ml_features.py — 1 WHERE (view_ai_features_1h)
rotation.py — 1 WHERE (agg_host_ip_ja4_1h)
Note : les 27 autres occurrences de replaceRegexpAll dans les SELECT sont des
transformations d'affichage (IPv6→IPv4 pour l'UI) et ne bloquent pas les indexes.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -35,7 +35,7 @@ async def get_ip_full_summary(ip: str):
|
|||||||
uniq(host) AS distinct_hosts,
|
uniq(host) AS distinct_hosts,
|
||||||
uniq(ja4) AS distinct_ja4
|
uniq(ja4) AS distinct_ja4
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
"""
|
"""
|
||||||
ml_res = db.query(ml_sql, {"ip": clean_ip})
|
ml_res = db.query(ml_sql, {"ip": clean_ip})
|
||||||
ml_row = ml_res.result_rows[0] if ml_res.result_rows else None
|
ml_row = ml_res.result_rows[0] if ml_res.result_rows else None
|
||||||
@ -56,7 +56,7 @@ async def get_ip_full_summary(ip: str):
|
|||||||
sum(query_params_count) AS total_params,
|
sum(query_params_count) AS total_params,
|
||||||
groupArray(3)(host) AS top_hosts
|
groupArray(3)(host) AS top_hosts
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
"""
|
"""
|
||||||
bf_res = db.query(bf_sql, {"ip": clean_ip})
|
bf_res = db.query(bf_sql, {"ip": clean_ip})
|
||||||
bf_row = bf_res.result_rows[0] if bf_res.result_rows else None
|
bf_row = bf_res.result_rows[0] if bf_res.result_rows else None
|
||||||
@ -77,7 +77,7 @@ async def get_ip_full_summary(ip: str):
|
|||||||
any(tcp_mss_raw) AS mss,
|
any(tcp_mss_raw) AS mss,
|
||||||
any(first_ua) AS ua
|
any(first_ua) AS ua
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
AND window_start >= now() - INTERVAL 24 HOUR
|
AND window_start >= now() - INTERVAL 24 HOUR
|
||||||
AND tcp_ttl_raw > 0
|
AND tcp_ttl_raw > 0
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
@ -113,7 +113,7 @@ async def get_ip_full_summary(ip: str):
|
|||||||
rot_sql = f"""
|
rot_sql = f"""
|
||||||
SELECT distinct_ja4_count, total_hits
|
SELECT distinct_ja4_count, total_hits
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
"""
|
"""
|
||||||
rot_res = db.query(rot_sql, {"ip": clean_ip})
|
rot_res = db.query(rot_sql, {"ip": clean_ip})
|
||||||
@ -127,7 +127,7 @@ async def get_ip_full_summary(ip: str):
|
|||||||
pers_sql = f"""
|
pers_sql = f"""
|
||||||
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
|
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
"""
|
"""
|
||||||
pers_res = db.query(pers_sql, {"ip": clean_ip})
|
pers_res = db.query(pers_sql, {"ip": clean_ip})
|
||||||
@ -150,7 +150,7 @@ async def get_ip_full_summary(ip: str):
|
|||||||
sum(hits) AS hits,
|
sum(hits) AS hits,
|
||||||
groupUniqArray(3)(ja4) AS ja4s
|
groupUniqArray(3)(ja4) AS ja4s
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
AND window_start >= now() - INTERVAL 24 HOUR
|
AND window_start >= now() - INTERVAL 24 HOUR
|
||||||
GROUP BY hour
|
GROUP BY hour
|
||||||
ORDER BY hour ASC
|
ORDER BY hour ASC
|
||||||
|
|||||||
@ -105,7 +105,7 @@ async def get_ip_radar(ip: str):
|
|||||||
avg(path_diversity_ratio) AS path_diversity_ratio,
|
avg(path_diversity_ratio) AS path_diversity_ratio,
|
||||||
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
|
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
AND window_start >= now() - INTERVAL 24 HOUR
|
AND window_start >= now() - INTERVAL 24 HOUR
|
||||||
"""
|
"""
|
||||||
result = db.query(sql, {"ip": ip})
|
result = db.query(sql, {"ip": ip})
|
||||||
|
|||||||
@ -83,7 +83,7 @@ async def get_ip_ja4_history(ip: str):
|
|||||||
min(window_start) AS first_seen,
|
min(window_start) AS first_seen,
|
||||||
max(window_start) AS last_seen
|
max(window_start) AS last_seen
|
||||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
||||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
WHERE src_ip = IPv4MappedToIPv6(toIPv4(%(ip)s))
|
||||||
GROUP BY ja4
|
GROUP BY ja4
|
||||||
ORDER BY hits DESC
|
ORDER BY hits DESC
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user