fix: correct CampaignsView, analysis.py IPv4 split, entities date filter

- CampaignsView: update ClusterData interface to match real API response
  (severity/unique_ips/score instead of threat_level/total_ips/confidence_range)
  Fix fetch to use data.items, rewrite ClusterCard and BehavioralTab
  Remove unused getClassificationColor and THREAT_ORDER constants
- analysis.py: fix IPv4Address object has no attribute 'split' on line 322
  Add str() conversion before calling .split('.')
- entities.py: fix Date vs DateTime comparison — log_date is a Date column,
  comparing against now()-INTERVAL HOUR caused yesterday's entries to be excluded
  Use toDate(now() - INTERVAL X HOUR) for correct Date-level comparison

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
SOC Analyst
2026-03-15 23:10:35 +01:00
parent 8d35b91642
commit 1455e04303
50 changed files with 5442 additions and 7325 deletions

View File

@ -103,7 +103,7 @@ async def get_associated_attributes(
# Mapping des attributs cibles
target_column_map = {
"user_agents": "''", # Pas de user_agent
"user_agents": None, # handled separately via view_dashboard_entities
"ja4": "ja4",
"countries": "country_code",
"asns": "asn_number",
@ -122,9 +122,33 @@ async def get_associated_attributes(
column = type_column_map[attr_type]
target_column = target_column_map[target_attr]
# Pour user_agent, retourne liste vide
if target_column == "''":
return {"type": attr_type, "value": value, "target": target_attr, "items": [], "total": 0}
# Pour user_agents: requête via view_dashboard_user_agents
# Colonnes: src_ip, ja4, hour, log_date, user_agents, requests
if target_column is None:
if attr_type == "ip":
ua_where = "toString(src_ip) = %(value)s"
elif attr_type == "ja4":
ua_where = "ja4 = %(value)s"
else:
# country/asn/host: pivot via ml_detected_anomalies
ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)"""
ua_q = f"""
SELECT ua AS value, sum(requests) AS count,
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage
FROM view_dashboard_user_agents
ARRAY JOIN user_agents AS ua
WHERE {ua_where}
AND hour >= now() - INTERVAL 24 HOUR AND ua != ''
GROUP BY value ORDER BY count DESC LIMIT %(limit)s
"""
ua_result = db.query(ua_q, {"value": value, "limit": limit})
items = [{"value": str(r[0]), "count": r[1], "percentage": round(float(r[2]), 2) if r[2] else 0.0}
for r in ua_result.result_rows]
return {"type": attr_type, "value": value, "target": target_attr, "items": items, "total": len(items), "showing": len(items)}
query = f"""
SELECT
@ -193,8 +217,8 @@ async def get_user_agents(
type_column_map = {
"ip": "src_ip",
"ja4": "ja4",
"country": "src_country_code",
"asn": "src_asn",
"country": "country_code",
"asn": "asn_number",
"host": "host",
}
@ -206,25 +230,51 @@ async def get_user_agents(
column = type_column_map[attr_type]
# Requête sur la vue materialisée
# user_agents est un Array, on utilise arrayJoin pour l'aplatir
# view_dashboard_user_agents colonnes: src_ip, ja4, hour, log_date, user_agents, requests
if attr_type == "ip":
where = "toString(src_ip) = %(value)s"
params: dict = {"value": value, "limit": limit}
elif attr_type == "ja4":
where = "ja4 = %(value)s"
params = {"value": value, "limit": limit}
else:
# country / asn / host: pivot via ml_detected_anomalies → IPs connus → vue par src_ip
ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type]
where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
WHERE {ml_col} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
)"""
params = {"value": value, "limit": limit}
query = f"""
SELECT
ua AS user_agent,
sum(requests) AS count,
round(count * 100.0 / sum(count) OVER (), 2) AS percentage,
min(hour) AS first_seen,
max(hour) AS last_seen
FROM mabase_prod.view_dashboard_user_agents
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
min(log_date) AS first_seen,
max(log_date) AS last_seen
FROM view_dashboard_user_agents
ARRAY JOIN user_agents AS ua
WHERE {column} = %(value)s
WHERE {where}
AND hour >= now() - INTERVAL 24 HOUR
AND ua != ''
GROUP BY user_agent
ORDER BY count DESC
LIMIT %(limit)s
"""
result = db.query(query, params)
result = db.query(query, {"value": value, "limit": limit})
count_query = f"""
SELECT uniqExact(ua) AS total
FROM view_dashboard_user_agents
ARRAY JOIN user_agents AS ua
WHERE {where}
AND hour >= now() - INTERVAL 24 HOUR
AND ua != ''
"""
count_result = db.query(count_query, params)
user_agents = [
UserAgentValue(
@ -237,16 +287,6 @@ async def get_user_agents(
for row in result.result_rows
]
# Compter le total
count_query = f"""
SELECT uniq(ua) AS total
FROM mabase_prod.view_dashboard_user_agents
ARRAY JOIN user_agents AS ua
WHERE {column} = %(value)s
AND hour >= now() - INTERVAL 24 HOUR
"""
count_result = db.query(count_query, {"value": value})
total = count_result.result_rows[0][0] if count_result.result_rows else 0
return {
@ -451,38 +491,41 @@ async def get_variability(attr_type: str, value: str):
first_seen = stats_row[2]
last_seen = stats_row[3]
# User-Agents
ua_query = f"""
SELECT
user_agent,
count() AS count,
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen,
groupArray((threat_level, 1)) AS threats
FROM ({base_query})
WHERE user_agent != '' AND user_agent IS NOT NULL
GROUP BY user_agent
ORDER BY count DESC
LIMIT 10
"""
# Simplified query without complex threat parsing
# User-Agents via view_dashboard_user_agents (source principale pour les UAs)
# Colonnes disponibles: src_ip, ja4, hour, log_date, user_agents, requests
if attr_type == "ip":
_ua_where = "toString(src_ip) = %(value)s"
_ua_params: dict = {"value": value}
elif attr_type == "ja4":
_ua_where = "ja4 = %(value)s"
_ua_params = {"value": value}
else:
# country / asn / host: pivot via ml_detected_anomalies → IPs
_ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)"""
_ua_params = {"value": value}
ua_query_simple = f"""
SELECT
user_agent,
count() AS count,
round(count() * 100.0 / (SELECT count() FROM ({base_query}) WHERE user_agent != '' AND user_agent IS NOT NULL), 2) AS percentage,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen
FROM ({base_query})
WHERE user_agent != '' AND user_agent IS NOT NULL
ua AS user_agent,
sum(requests) AS count,
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
min(log_date) AS first_seen,
max(log_date) AS last_seen
FROM view_dashboard_user_agents
ARRAY JOIN user_agents AS ua
WHERE {_ua_where}
AND hour >= now() - INTERVAL 24 HOUR
AND ua != ''
GROUP BY user_agent
ORDER BY count DESC
LIMIT 10
"""
ua_result = db.query(ua_query_simple, {"value": value})
ua_result = db.query(ua_query_simple, _ua_params)
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
# JA4 fingerprints