feat(backend): champs Anubis dans les détections, fix alias ClickHouse

- models.py: ajout anubis_bot_name, anubis_bot_action, anubis_bot_category
  dans le modèle Pydantic Detection
- detections.py: fix ILLEGAL_AGGREGATION ClickHouse (argMin alias renommés
  en *_best), ajout des 3 champs Anubis dans les requêtes individuelle et
  groupée
- ml_features.py: fix alias sum(hits) AS total_hits (évite nested aggregate),
  ajout missing_accept_enc_ratio et http_scheme_ratio dans b-features

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
SOC Analyst
2026-03-19 18:01:52 +01:00
parent 9ee3d01059
commit 02d10b5363
3 changed files with 57 additions and 33 deletions

View File

@ -97,15 +97,18 @@ async def get_detections(
ip_data.unique_ja4s,
ip_data.unique_hosts,
ip_data.min_score AS anomaly_score,
ip_data.threat_level,
ip_data.model_name,
ip_data.threat_level_best,
ip_data.model_name_best,
ip_data.country_code,
ip_data.asn_number,
ip_data.asn_org,
ip_data.hit_velocity,
ip_data.hits,
ip_data.asn_label,
ar.label AS asn_rep_label
ar.label AS asn_rep_label,
ip_data.anubis_bot_name_best,
ip_data.anubis_bot_action_best,
ip_data.anubis_bot_category_best
FROM (
SELECT
src_ip,
@ -115,14 +118,17 @@ async def get_detections(
groupUniqArray(5)(ja4) AS unique_ja4s,
groupUniqArray(5)(host) AS unique_hosts,
min(anomaly_score) AS min_score,
argMin(threat_level, anomaly_score) AS threat_level,
argMin(model_name, anomaly_score) AS model_name,
argMin(threat_level, anomaly_score) AS threat_level_best,
argMin(model_name, anomaly_score) AS model_name_best,
any(country_code) AS country_code,
any(asn_number) AS asn_number,
any(asn_org) AS asn_org,
max(hit_velocity) AS hit_velocity,
sum(hits) AS hits,
any(asn_label) AS asn_label
any(asn_label) AS asn_label,
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
FROM ml_detected_anomalies
WHERE {where_clause}
GROUP BY src_ip
@ -145,8 +151,9 @@ async def get_detections(
detections = []
for row in gresult.result_rows:
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
# anomaly_score, threat_level, model_name, country_code, asn_number, asn_org,
# hit_velocity, hits, asn_label, asn_rep_label
# anomaly_score, threat_level_best, model_name_best, country_code, asn_number,
# asn_org, hit_velocity, hits, asn_label, asn_rep_label,
# anubis_bot_name, anubis_bot_action, anubis_bot_category
ja4s = list(row[4]) if row[4] else []
hosts = list(row[5]) if row[5] else []
detections.append(Detection(
@ -176,6 +183,9 @@ async def get_detections(
last_seen=row[2],
unique_ja4s=ja4s,
unique_hosts=hosts,
anubis_bot_name=row[16] or "",
anubis_bot_action=row[17] or "",
anubis_bot_category=row[18] or "",
))
total_pages = (total + page_size - 1) // page_size
@ -215,7 +225,10 @@ async def get_detections(
fuzzing_index,
post_ratio,
reason,
ar.label AS asn_rep_label
ar.label AS asn_rep_label,
anubis_bot_name,
anubis_bot_action,
anubis_bot_category
FROM ml_detected_anomalies
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
WHERE {where_clause}
@ -267,6 +280,9 @@ async def get_detections(
reason=row[19] or "",
asn_rep_label=row[20] or "",
asn_score=_label_to_score(row[20] or ""),
anubis_bot_name=row[21] or "",
anubis_bot_action=row[22] or "",
anubis_bot_category=row[23] or "",
)
for row in result.result_rows
]

View File

@ -238,43 +238,48 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
"""
try:
sql = """
SELECT ip, ja4, country, asn_name, hits,
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio
SELECT ip, ja4, country, asn_name, total_hits AS hits,
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
missing_accept_enc_ratio, http_scheme_ratio
FROM (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
any(ja4) AS ja4,
any(src_country_code) AS country,
any(src_as_name) AS asn_name,
sum(hits) AS hits,
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
any(ja4) AS ja4,
any(src_country_code) AS country,
any(src_as_name) AS asn_name,
sum(hits) AS total_hits,
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
)
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
OR head_ratio > 0.1 OR tls12_ratio > 0.5
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio) DESC
OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC
LIMIT %(limit)s
"""
result = db.query(sql, {"limit": limit})
items = []
for row in result.result_rows:
items.append({
"ip": str(row[0]),
"ja4": str(row[1] or ""),
"country": str(row[2] or ""),
"asn_name": str(row[3] or ""),
"hits": int(row[4] or 0),
"head_ratio": float(row[5] or 0),
"sec_fetch_absence": float(row[6] or 0),
"tls12_ratio": float(row[7] or 0),
"generic_accept_ratio":float(row[8] or 0),
"http10_ratio": float(row[9] or 0),
"ip": str(row[0]),
"ja4": str(row[1] or ""),
"country": str(row[2] or ""),
"asn_name": str(row[3] or ""),
"hits": int(row[4] or 0),
"head_ratio": float(row[5] or 0),
"sec_fetch_absence": float(row[6] or 0),
"tls12_ratio": float(row[7] or 0),
"generic_accept_ratio": float(row[8] or 0),
"http10_ratio": float(row[9] or 0),
"missing_accept_enc_ratio":float(row[10] or 0),
"http_scheme_ratio": float(row[11] or 0),
})
return {"items": items, "total": len(items)}
except Exception as e: