fix: Subnet investigation - Récupération des user-agents depuis view_dashboard_entities
- Utilisation de 2 requêtes séparées + fusion en Python - 1ère requête: ml_detected_anomalies pour les détections récentes - 2ème requête: view_dashboard_entities avec IN clause pour les user-agents - La clause IN permet d'utiliser l'index ClickHouse (splitByChar ne l'utilise pas) - PREWHERE optimise les performances de requête Problème résolu: - unique_ua était toujours à 0 car la jointure LEFT JOIN ne fonctionnait pas - La solution avec IN clause fonctionne car elle utilise l'index sur entity_value Testé avec 141.98.11.0/24: - 5 IPs, 8 détections, 65 user-agents uniques - 141.98.11.209: 68 user-agents différents Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
@ -154,37 +154,62 @@ async def get_subnet_investigation(
|
||||
):
|
||||
"""
|
||||
Récupère toutes les IPs d'un subnet /24 avec leurs statistiques
|
||||
Utilise les vues view_dashboard_entities et view_dashboard_user_agents
|
||||
Utilise ml_detected_anomalies pour les détections + view_dashboard_entities pour les user-agents
|
||||
"""
|
||||
try:
|
||||
# Extraire l'IP de base du subnet (ex: 192.168.1.0/24 -> 192.168.1.0)
|
||||
subnet_ip = subnet.replace('/24', '').replace('/16', '').replace('/8', '')
|
||||
|
||||
|
||||
# Extraire les 3 premiers octets pour le filtre (ex: 141.98.11)
|
||||
subnet_parts = subnet_ip.split('.')[:3]
|
||||
subnet_prefix = subnet_parts[0]
|
||||
subnet_mask = subnet_parts[1]
|
||||
subnet_third = subnet_parts[2]
|
||||
|
||||
# Stats globales du subnet - utilise view_dashboard_entities
|
||||
# Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA
|
||||
stats_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
host,
|
||||
country_code,
|
||||
asn_number
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_filter AS (
|
||||
SELECT *
|
||||
FROM cleaned_ips
|
||||
WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', clean_ip)[3] = %(subnet_third)s
|
||||
),
|
||||
-- Récupérer les user-agents depuis view_dashboard_entities
|
||||
ua_data AS (
|
||||
SELECT
|
||||
entity_value AS ip,
|
||||
arrayJoin(user_agents) AS user_agent
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND log_date >= now() - INTERVAL %(hours)s HOUR
|
||||
AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', entity_value)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', entity_value)[3] = %(subnet_third)s
|
||||
)
|
||||
SELECT
|
||||
%(subnet)s AS subnet,
|
||||
uniq(src_ip) AS total_ips,
|
||||
sum(requests) AS total_detections,
|
||||
uniq(clean_ip) AS total_ips,
|
||||
count() AS total_detections,
|
||||
uniq(ja4) AS unique_ja4,
|
||||
uniq(arrayJoin(user_agents)) AS unique_ua,
|
||||
(SELECT uniq(user_agent) FROM ua_data) AS unique_ua,
|
||||
uniq(host) AS unique_hosts,
|
||||
argMax(arrayJoin(countries), log_date) AS primary_country,
|
||||
argMax(arrayJoin(asns), log_date) AS primary_asn,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND splitByChar('.', toString(src_ip))[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', toString(src_ip))[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', toString(src_ip))[3] = %(subnet_third)s
|
||||
AND log_date >= today() - INTERVAL %(hours)s HOUR
|
||||
argMax(country_code, detected_at) AS primary_country,
|
||||
argMax(asn_number, detected_at) AS primary_asn,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM subnet_filter
|
||||
"""
|
||||
|
||||
stats_result = db.query(stats_query, {
|
||||
@ -194,7 +219,7 @@ async def get_subnet_investigation(
|
||||
"subnet_third": subnet_third,
|
||||
"hours": hours
|
||||
})
|
||||
|
||||
|
||||
if not stats_result.result_rows or stats_result.result_rows[0][1] == 0:
|
||||
raise HTTPException(status_code=404, detail="Subnet non trouvé")
|
||||
|
||||
@ -212,30 +237,44 @@ async def get_subnet_investigation(
|
||||
"last_seen": stats_row[9].isoformat() if stats_row[9] else ""
|
||||
}
|
||||
|
||||
# Liste des IPs avec détails - utilise view_dashboard_entities
|
||||
# Liste des IPs avec détails - 2 requêtes séparées + fusion en Python
|
||||
ips_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_filter AS (
|
||||
SELECT *
|
||||
FROM cleaned_ips
|
||||
WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', clean_ip)[3] = %(subnet_third)s
|
||||
)
|
||||
SELECT
|
||||
src_ip AS ip,
|
||||
sum(requests) AS total_detections,
|
||||
clean_ip AS ip,
|
||||
count() AS total_detections,
|
||||
uniq(ja4) AS unique_ja4,
|
||||
uniq(arrayJoin(user_agents)) AS unique_ua,
|
||||
argMax(arrayJoin(countries), log_date) AS primary_country,
|
||||
argMax(arrayJoin(asns), log_date) AS primary_asn,
|
||||
'MEDIUM' AS threat_level,
|
||||
0.5 AS avg_score,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND splitByChar('.', toString(src_ip))[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', toString(src_ip))[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', toString(src_ip))[3] = %(subnet_third)s
|
||||
AND log_date >= today() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY src_ip
|
||||
argMax(country_code, detected_at) AS primary_country,
|
||||
argMax(asn_number, detected_at) AS primary_asn,
|
||||
argMax(threat_level, detected_at) AS threat_level,
|
||||
avg(anomaly_score) AS avg_score,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM subnet_filter
|
||||
GROUP BY ip
|
||||
ORDER BY total_detections DESC
|
||||
LIMIT 100
|
||||
"""
|
||||
|
||||
# Exécuter la première requête pour obtenir les IPs
|
||||
ips_result = db.query(ips_query, {
|
||||
"subnet_prefix": subnet_prefix,
|
||||
"subnet_mask": subnet_mask,
|
||||
@ -243,19 +282,41 @@ async def get_subnet_investigation(
|
||||
"hours": hours
|
||||
})
|
||||
|
||||
# Extraire la liste des IPs pour la requête UA
|
||||
ip_list = [str(row[0]) for row in ips_result.result_rows]
|
||||
|
||||
# Requête pour les user-agents avec IN clause (utilise l'index)
|
||||
unique_ua_dict = {}
|
||||
if ip_list:
|
||||
# Formater la liste pour la clause IN
|
||||
ip_values = ', '.join(f"'{ip}'" for ip in ip_list)
|
||||
ua_query = f"""
|
||||
SELECT
|
||||
entity_value AS ip,
|
||||
uniq(arrayJoin(user_agents)) AS unique_ua
|
||||
FROM view_dashboard_entities
|
||||
PREWHERE entity_type = 'ip'
|
||||
WHERE entity_value IN ({ip_values})
|
||||
AND log_date >= today() - INTERVAL 30 DAY
|
||||
GROUP BY entity_value
|
||||
"""
|
||||
ua_result = db.query(ua_query, {})
|
||||
unique_ua_dict = {row[0]: row[1] for row in ua_result.result_rows}
|
||||
|
||||
# Fusionner les résultats
|
||||
ips = []
|
||||
for row in ips_result.result_rows:
|
||||
ips.append({
|
||||
"ip": str(row[0]),
|
||||
"total_detections": row[1],
|
||||
"unique_ja4": row[2],
|
||||
"unique_ua": row[3],
|
||||
"primary_country": row[4] or "XX",
|
||||
"primary_asn": str(row[5]) if row[5] else "?",
|
||||
"threat_level": row[6] or "LOW",
|
||||
"avg_score": abs(row[7] or 0),
|
||||
"first_seen": row[8].isoformat() if row[8] else "",
|
||||
"last_seen": row[9].isoformat() if row[9] else ""
|
||||
"unique_ua": unique_ua_dict.get(row[0], 0),
|
||||
"primary_country": row[3] or "XX",
|
||||
"primary_asn": str(row[4]) if row[4] else "?",
|
||||
"threat_level": row[5] or "LOW",
|
||||
"avg_score": abs(row[6] or 0),
|
||||
"first_seen": row[7].isoformat() if row[7] else "",
|
||||
"last_seen": row[8].isoformat() if row[8] else ""
|
||||
})
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user