feat(backend): champs Anubis dans les détections, fix alias ClickHouse
- models.py: ajout anubis_bot_name, anubis_bot_action, anubis_bot_category dans le modèle Pydantic Detection - detections.py: fix ILLEGAL_AGGREGATION ClickHouse (argMin alias renommés en *_best), ajout des 3 champs Anubis dans les requêtes individuelle et groupée - ml_features.py: fix alias sum(hits) AS total_hits (évite nested aggregate), ajout missing_accept_enc_ratio et http_scheme_ratio dans b-features Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -81,6 +81,9 @@ class Detection(BaseModel):
|
|||||||
last_seen: Optional[datetime] = None
|
last_seen: Optional[datetime] = None
|
||||||
unique_ja4s: Optional[List[str]] = None
|
unique_ja4s: Optional[List[str]] = None
|
||||||
unique_hosts: Optional[List[str]] = None
|
unique_hosts: Optional[List[str]] = None
|
||||||
|
anubis_bot_name: str = ""
|
||||||
|
anubis_bot_action: str = ""
|
||||||
|
anubis_bot_category: str = ""
|
||||||
|
|
||||||
|
|
||||||
class DetectionsListResponse(BaseModel):
|
class DetectionsListResponse(BaseModel):
|
||||||
|
|||||||
@ -97,15 +97,18 @@ async def get_detections(
|
|||||||
ip_data.unique_ja4s,
|
ip_data.unique_ja4s,
|
||||||
ip_data.unique_hosts,
|
ip_data.unique_hosts,
|
||||||
ip_data.min_score AS anomaly_score,
|
ip_data.min_score AS anomaly_score,
|
||||||
ip_data.threat_level,
|
ip_data.threat_level_best,
|
||||||
ip_data.model_name,
|
ip_data.model_name_best,
|
||||||
ip_data.country_code,
|
ip_data.country_code,
|
||||||
ip_data.asn_number,
|
ip_data.asn_number,
|
||||||
ip_data.asn_org,
|
ip_data.asn_org,
|
||||||
ip_data.hit_velocity,
|
ip_data.hit_velocity,
|
||||||
ip_data.hits,
|
ip_data.hits,
|
||||||
ip_data.asn_label,
|
ip_data.asn_label,
|
||||||
ar.label AS asn_rep_label
|
ar.label AS asn_rep_label,
|
||||||
|
ip_data.anubis_bot_name_best,
|
||||||
|
ip_data.anubis_bot_action_best,
|
||||||
|
ip_data.anubis_bot_category_best
|
||||||
FROM (
|
FROM (
|
||||||
SELECT
|
SELECT
|
||||||
src_ip,
|
src_ip,
|
||||||
@ -115,14 +118,17 @@ async def get_detections(
|
|||||||
groupUniqArray(5)(ja4) AS unique_ja4s,
|
groupUniqArray(5)(ja4) AS unique_ja4s,
|
||||||
groupUniqArray(5)(host) AS unique_hosts,
|
groupUniqArray(5)(host) AS unique_hosts,
|
||||||
min(anomaly_score) AS min_score,
|
min(anomaly_score) AS min_score,
|
||||||
argMin(threat_level, anomaly_score) AS threat_level,
|
argMin(threat_level, anomaly_score) AS threat_level_best,
|
||||||
argMin(model_name, anomaly_score) AS model_name,
|
argMin(model_name, anomaly_score) AS model_name_best,
|
||||||
any(country_code) AS country_code,
|
any(country_code) AS country_code,
|
||||||
any(asn_number) AS asn_number,
|
any(asn_number) AS asn_number,
|
||||||
any(asn_org) AS asn_org,
|
any(asn_org) AS asn_org,
|
||||||
max(hit_velocity) AS hit_velocity,
|
max(hit_velocity) AS hit_velocity,
|
||||||
sum(hits) AS hits,
|
sum(hits) AS hits,
|
||||||
any(asn_label) AS asn_label
|
any(asn_label) AS asn_label,
|
||||||
|
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
|
||||||
|
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
|
||||||
|
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
|
||||||
FROM ml_detected_anomalies
|
FROM ml_detected_anomalies
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
GROUP BY src_ip
|
GROUP BY src_ip
|
||||||
@ -145,8 +151,9 @@ async def get_detections(
|
|||||||
detections = []
|
detections = []
|
||||||
for row in gresult.result_rows:
|
for row in gresult.result_rows:
|
||||||
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
|
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
|
||||||
# anomaly_score, threat_level, model_name, country_code, asn_number, asn_org,
|
# anomaly_score, threat_level_best, model_name_best, country_code, asn_number,
|
||||||
# hit_velocity, hits, asn_label, asn_rep_label
|
# asn_org, hit_velocity, hits, asn_label, asn_rep_label,
|
||||||
|
# anubis_bot_name, anubis_bot_action, anubis_bot_category
|
||||||
ja4s = list(row[4]) if row[4] else []
|
ja4s = list(row[4]) if row[4] else []
|
||||||
hosts = list(row[5]) if row[5] else []
|
hosts = list(row[5]) if row[5] else []
|
||||||
detections.append(Detection(
|
detections.append(Detection(
|
||||||
@ -176,6 +183,9 @@ async def get_detections(
|
|||||||
last_seen=row[2],
|
last_seen=row[2],
|
||||||
unique_ja4s=ja4s,
|
unique_ja4s=ja4s,
|
||||||
unique_hosts=hosts,
|
unique_hosts=hosts,
|
||||||
|
anubis_bot_name=row[16] or "",
|
||||||
|
anubis_bot_action=row[17] or "",
|
||||||
|
anubis_bot_category=row[18] or "",
|
||||||
))
|
))
|
||||||
|
|
||||||
total_pages = (total + page_size - 1) // page_size
|
total_pages = (total + page_size - 1) // page_size
|
||||||
@ -215,7 +225,10 @@ async def get_detections(
|
|||||||
fuzzing_index,
|
fuzzing_index,
|
||||||
post_ratio,
|
post_ratio,
|
||||||
reason,
|
reason,
|
||||||
ar.label AS asn_rep_label
|
ar.label AS asn_rep_label,
|
||||||
|
anubis_bot_name,
|
||||||
|
anubis_bot_action,
|
||||||
|
anubis_bot_category
|
||||||
FROM ml_detected_anomalies
|
FROM ml_detected_anomalies
|
||||||
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
|
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
@ -267,6 +280,9 @@ async def get_detections(
|
|||||||
reason=row[19] or "",
|
reason=row[19] or "",
|
||||||
asn_rep_label=row[20] or "",
|
asn_rep_label=row[20] or "",
|
||||||
asn_score=_label_to_score(row[20] or ""),
|
asn_score=_label_to_score(row[20] or ""),
|
||||||
|
anubis_bot_name=row[21] or "",
|
||||||
|
anubis_bot_action=row[22] or "",
|
||||||
|
anubis_bot_category=row[23] or "",
|
||||||
)
|
)
|
||||||
for row in result.result_rows
|
for row in result.result_rows
|
||||||
]
|
]
|
||||||
|
|||||||
@ -238,27 +238,30 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
sql = """
|
sql = """
|
||||||
SELECT ip, ja4, country, asn_name, hits,
|
SELECT ip, ja4, country, asn_name, total_hits AS hits,
|
||||||
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio
|
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
|
||||||
|
missing_accept_enc_ratio, http_scheme_ratio
|
||||||
FROM (
|
FROM (
|
||||||
SELECT
|
SELECT
|
||||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||||
any(ja4) AS ja4,
|
any(ja4) AS ja4,
|
||||||
any(src_country_code) AS country,
|
any(src_country_code) AS country,
|
||||||
any(src_as_name) AS asn_name,
|
any(src_as_name) AS asn_name,
|
||||||
sum(hits) AS hits,
|
sum(hits) AS total_hits,
|
||||||
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
|
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
|
||||||
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
|
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
|
||||||
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
|
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
|
||||||
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
|
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
|
||||||
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio
|
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
|
||||||
|
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
|
||||||
|
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
|
||||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||||
GROUP BY src_ip
|
GROUP BY src_ip
|
||||||
)
|
)
|
||||||
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
|
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
|
||||||
OR head_ratio > 0.1 OR tls12_ratio > 0.5
|
OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3
|
||||||
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio) DESC
|
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC
|
||||||
LIMIT %(limit)s
|
LIMIT %(limit)s
|
||||||
"""
|
"""
|
||||||
result = db.query(sql, {"limit": limit})
|
result = db.query(sql, {"limit": limit})
|
||||||
@ -275,6 +278,8 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
|||||||
"tls12_ratio": float(row[7] or 0),
|
"tls12_ratio": float(row[7] or 0),
|
||||||
"generic_accept_ratio": float(row[8] or 0),
|
"generic_accept_ratio": float(row[8] or 0),
|
||||||
"http10_ratio": float(row[9] or 0),
|
"http10_ratio": float(row[9] or 0),
|
||||||
|
"missing_accept_enc_ratio":float(row[10] or 0),
|
||||||
|
"http_scheme_ratio": float(row[11] or 0),
|
||||||
})
|
})
|
||||||
return {"items": items, "total": len(items)}
|
return {"items": items, "total": len(items)}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user