feat(backend): champs Anubis dans les détections, fix alias ClickHouse

- models.py: ajout anubis_bot_name, anubis_bot_action, anubis_bot_category
  dans le modèle Pydantic Detection
- detections.py: fix ILLEGAL_AGGREGATION ClickHouse (argMin alias renommés
  en *_best), ajout des 3 champs Anubis dans les requêtes individuelle et
  groupée
- ml_features.py: fix alias sum(hits) AS total_hits (évite nested aggregate),
  ajout missing_accept_enc_ratio et http_scheme_ratio dans b-features

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
SOC Analyst
2026-03-19 18:01:52 +01:00
parent 9ee3d01059
commit 02d10b5363
3 changed files with 57 additions and 33 deletions

View File

@ -81,6 +81,9 @@ class Detection(BaseModel):
last_seen: Optional[datetime] = None last_seen: Optional[datetime] = None
unique_ja4s: Optional[List[str]] = None unique_ja4s: Optional[List[str]] = None
unique_hosts: Optional[List[str]] = None unique_hosts: Optional[List[str]] = None
anubis_bot_name: str = ""
anubis_bot_action: str = ""
anubis_bot_category: str = ""
class DetectionsListResponse(BaseModel): class DetectionsListResponse(BaseModel):

View File

@ -97,15 +97,18 @@ async def get_detections(
ip_data.unique_ja4s, ip_data.unique_ja4s,
ip_data.unique_hosts, ip_data.unique_hosts,
ip_data.min_score AS anomaly_score, ip_data.min_score AS anomaly_score,
ip_data.threat_level, ip_data.threat_level_best,
ip_data.model_name, ip_data.model_name_best,
ip_data.country_code, ip_data.country_code,
ip_data.asn_number, ip_data.asn_number,
ip_data.asn_org, ip_data.asn_org,
ip_data.hit_velocity, ip_data.hit_velocity,
ip_data.hits, ip_data.hits,
ip_data.asn_label, ip_data.asn_label,
ar.label AS asn_rep_label ar.label AS asn_rep_label,
ip_data.anubis_bot_name_best,
ip_data.anubis_bot_action_best,
ip_data.anubis_bot_category_best
FROM ( FROM (
SELECT SELECT
src_ip, src_ip,
@ -115,14 +118,17 @@ async def get_detections(
groupUniqArray(5)(ja4) AS unique_ja4s, groupUniqArray(5)(ja4) AS unique_ja4s,
groupUniqArray(5)(host) AS unique_hosts, groupUniqArray(5)(host) AS unique_hosts,
min(anomaly_score) AS min_score, min(anomaly_score) AS min_score,
argMin(threat_level, anomaly_score) AS threat_level, argMin(threat_level, anomaly_score) AS threat_level_best,
argMin(model_name, anomaly_score) AS model_name, argMin(model_name, anomaly_score) AS model_name_best,
any(country_code) AS country_code, any(country_code) AS country_code,
any(asn_number) AS asn_number, any(asn_number) AS asn_number,
any(asn_org) AS asn_org, any(asn_org) AS asn_org,
max(hit_velocity) AS hit_velocity, max(hit_velocity) AS hit_velocity,
sum(hits) AS hits, sum(hits) AS hits,
any(asn_label) AS asn_label any(asn_label) AS asn_label,
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
FROM ml_detected_anomalies FROM ml_detected_anomalies
WHERE {where_clause} WHERE {where_clause}
GROUP BY src_ip GROUP BY src_ip
@ -145,8 +151,9 @@ async def get_detections(
detections = [] detections = []
for row in gresult.result_rows: for row in gresult.result_rows:
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts, # row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
# anomaly_score, threat_level, model_name, country_code, asn_number, asn_org, # anomaly_score, threat_level_best, model_name_best, country_code, asn_number,
# hit_velocity, hits, asn_label, asn_rep_label # asn_org, hit_velocity, hits, asn_label, asn_rep_label,
# anubis_bot_name, anubis_bot_action, anubis_bot_category
ja4s = list(row[4]) if row[4] else [] ja4s = list(row[4]) if row[4] else []
hosts = list(row[5]) if row[5] else [] hosts = list(row[5]) if row[5] else []
detections.append(Detection( detections.append(Detection(
@ -176,6 +183,9 @@ async def get_detections(
last_seen=row[2], last_seen=row[2],
unique_ja4s=ja4s, unique_ja4s=ja4s,
unique_hosts=hosts, unique_hosts=hosts,
anubis_bot_name=row[16] or "",
anubis_bot_action=row[17] or "",
anubis_bot_category=row[18] or "",
)) ))
total_pages = (total + page_size - 1) // page_size total_pages = (total + page_size - 1) // page_size
@ -215,7 +225,10 @@ async def get_detections(
fuzzing_index, fuzzing_index,
post_ratio, post_ratio,
reason, reason,
ar.label AS asn_rep_label ar.label AS asn_rep_label,
anubis_bot_name,
anubis_bot_action,
anubis_bot_category
FROM ml_detected_anomalies FROM ml_detected_anomalies
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number) LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
WHERE {where_clause} WHERE {where_clause}
@ -267,6 +280,9 @@ async def get_detections(
reason=row[19] or "", reason=row[19] or "",
asn_rep_label=row[20] or "", asn_rep_label=row[20] or "",
asn_score=_label_to_score(row[20] or ""), asn_score=_label_to_score(row[20] or ""),
anubis_bot_name=row[21] or "",
anubis_bot_action=row[22] or "",
anubis_bot_category=row[23] or "",
) )
for row in result.result_rows for row in result.result_rows
] ]

View File

@ -238,27 +238,30 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
""" """
try: try:
sql = """ sql = """
SELECT ip, ja4, country, asn_name, hits, SELECT ip, ja4, country, asn_name, total_hits AS hits,
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
missing_accept_enc_ratio, http_scheme_ratio
FROM ( FROM (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
any(ja4) AS ja4, any(ja4) AS ja4,
any(src_country_code) AS country, any(src_country_code) AS country,
any(src_as_name) AS asn_name, any(src_as_name) AS asn_name,
sum(hits) AS hits, sum(hits) AS total_hits,
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio, round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence, round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio, round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio, round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
FROM mabase_prod.agg_host_ip_ja4_1h FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip GROUP BY src_ip
) )
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3 WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio) DESC ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC
LIMIT %(limit)s LIMIT %(limit)s
""" """
result = db.query(sql, {"limit": limit}) result = db.query(sql, {"limit": limit})
@ -275,6 +278,8 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
"tls12_ratio": float(row[7] or 0), "tls12_ratio": float(row[7] or 0),
"generic_accept_ratio": float(row[8] or 0), "generic_accept_ratio": float(row[8] or 0),
"http10_ratio": float(row[9] or 0), "http10_ratio": float(row[9] or 0),
"missing_accept_enc_ratio":float(row[10] or 0),
"http_scheme_ratio": float(row[11] or 0),
}) })
return {"items": items, "total": len(items)} return {"items": items, "total": len(items)}
except Exception as e: except Exception as e: