feat(backend): champs Anubis dans les détections, fix alias ClickHouse
- models.py: ajout anubis_bot_name, anubis_bot_action, anubis_bot_category dans le modèle Pydantic Detection - detections.py: fix ILLEGAL_AGGREGATION ClickHouse (argMin alias renommés en *_best), ajout des 3 champs Anubis dans les requêtes individuelle et groupée - ml_features.py: fix alias sum(hits) AS total_hits (évite nested aggregate), ajout missing_accept_enc_ratio et http_scheme_ratio dans b-features Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -81,6 +81,9 @@ class Detection(BaseModel):
|
||||
last_seen: Optional[datetime] = None
|
||||
unique_ja4s: Optional[List[str]] = None
|
||||
unique_hosts: Optional[List[str]] = None
|
||||
anubis_bot_name: str = ""
|
||||
anubis_bot_action: str = ""
|
||||
anubis_bot_category: str = ""
|
||||
|
||||
|
||||
class DetectionsListResponse(BaseModel):
|
||||
|
||||
@ -97,15 +97,18 @@ async def get_detections(
|
||||
ip_data.unique_ja4s,
|
||||
ip_data.unique_hosts,
|
||||
ip_data.min_score AS anomaly_score,
|
||||
ip_data.threat_level,
|
||||
ip_data.model_name,
|
||||
ip_data.threat_level_best,
|
||||
ip_data.model_name_best,
|
||||
ip_data.country_code,
|
||||
ip_data.asn_number,
|
||||
ip_data.asn_org,
|
||||
ip_data.hit_velocity,
|
||||
ip_data.hits,
|
||||
ip_data.asn_label,
|
||||
ar.label AS asn_rep_label
|
||||
ar.label AS asn_rep_label,
|
||||
ip_data.anubis_bot_name_best,
|
||||
ip_data.anubis_bot_action_best,
|
||||
ip_data.anubis_bot_category_best
|
||||
FROM (
|
||||
SELECT
|
||||
src_ip,
|
||||
@ -115,14 +118,17 @@ async def get_detections(
|
||||
groupUniqArray(5)(ja4) AS unique_ja4s,
|
||||
groupUniqArray(5)(host) AS unique_hosts,
|
||||
min(anomaly_score) AS min_score,
|
||||
argMin(threat_level, anomaly_score) AS threat_level,
|
||||
argMin(model_name, anomaly_score) AS model_name,
|
||||
argMin(threat_level, anomaly_score) AS threat_level_best,
|
||||
argMin(model_name, anomaly_score) AS model_name_best,
|
||||
any(country_code) AS country_code,
|
||||
any(asn_number) AS asn_number,
|
||||
any(asn_org) AS asn_org,
|
||||
max(hit_velocity) AS hit_velocity,
|
||||
sum(hits) AS hits,
|
||||
any(asn_label) AS asn_label
|
||||
any(asn_label) AS asn_label,
|
||||
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
|
||||
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
|
||||
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
GROUP BY src_ip
|
||||
@ -145,8 +151,9 @@ async def get_detections(
|
||||
detections = []
|
||||
for row in gresult.result_rows:
|
||||
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
|
||||
# anomaly_score, threat_level, model_name, country_code, asn_number, asn_org,
|
||||
# hit_velocity, hits, asn_label, asn_rep_label
|
||||
# anomaly_score, threat_level_best, model_name_best, country_code, asn_number,
|
||||
# asn_org, hit_velocity, hits, asn_label, asn_rep_label,
|
||||
# anubis_bot_name, anubis_bot_action, anubis_bot_category
|
||||
ja4s = list(row[4]) if row[4] else []
|
||||
hosts = list(row[5]) if row[5] else []
|
||||
detections.append(Detection(
|
||||
@ -176,6 +183,9 @@ async def get_detections(
|
||||
last_seen=row[2],
|
||||
unique_ja4s=ja4s,
|
||||
unique_hosts=hosts,
|
||||
anubis_bot_name=row[16] or "",
|
||||
anubis_bot_action=row[17] or "",
|
||||
anubis_bot_category=row[18] or "",
|
||||
))
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
@ -215,7 +225,10 @@ async def get_detections(
|
||||
fuzzing_index,
|
||||
post_ratio,
|
||||
reason,
|
||||
ar.label AS asn_rep_label
|
||||
ar.label AS asn_rep_label,
|
||||
anubis_bot_name,
|
||||
anubis_bot_action,
|
||||
anubis_bot_category
|
||||
FROM ml_detected_anomalies
|
||||
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
|
||||
WHERE {where_clause}
|
||||
@ -267,6 +280,9 @@ async def get_detections(
|
||||
reason=row[19] or "",
|
||||
asn_rep_label=row[20] or "",
|
||||
asn_score=_label_to_score(row[20] or ""),
|
||||
anubis_bot_name=row[21] or "",
|
||||
anubis_bot_action=row[22] or "",
|
||||
anubis_bot_category=row[23] or "",
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
@ -238,27 +238,30 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT ip, ja4, country, asn_name, hits,
|
||||
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio
|
||||
SELECT ip, ja4, country, asn_name, total_hits AS hits,
|
||||
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
|
||||
missing_accept_enc_ratio, http_scheme_ratio
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(ja4) AS ja4,
|
||||
any(src_country_code) AS country,
|
||||
any(src_as_name) AS asn_name,
|
||||
sum(hits) AS hits,
|
||||
sum(hits) AS total_hits,
|
||||
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
|
||||
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
|
||||
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
|
||||
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
|
||||
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio
|
||||
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
|
||||
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
|
||||
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
|
||||
OR head_ratio > 0.1 OR tls12_ratio > 0.5
|
||||
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio) DESC
|
||||
OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3
|
||||
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
@ -273,8 +276,10 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
||||
"head_ratio": float(row[5] or 0),
|
||||
"sec_fetch_absence": float(row[6] or 0),
|
||||
"tls12_ratio": float(row[7] or 0),
|
||||
"generic_accept_ratio":float(row[8] or 0),
|
||||
"generic_accept_ratio": float(row[8] or 0),
|
||||
"http10_ratio": float(row[9] or 0),
|
||||
"missing_accept_enc_ratio":float(row[10] or 0),
|
||||
"http_scheme_ratio": float(row[11] or 0),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user