diff --git a/backend/models.py b/backend/models.py index fe59ded..b146a4c 100644 --- a/backend/models.py +++ b/backend/models.py @@ -81,6 +81,9 @@ class Detection(BaseModel): last_seen: Optional[datetime] = None unique_ja4s: Optional[List[str]] = None unique_hosts: Optional[List[str]] = None + anubis_bot_name: str = "" + anubis_bot_action: str = "" + anubis_bot_category: str = "" class DetectionsListResponse(BaseModel): diff --git a/backend/routes/detections.py b/backend/routes/detections.py index 9ce2f67..662e463 100644 --- a/backend/routes/detections.py +++ b/backend/routes/detections.py @@ -97,15 +97,18 @@ async def get_detections( ip_data.unique_ja4s, ip_data.unique_hosts, ip_data.min_score AS anomaly_score, - ip_data.threat_level, - ip_data.model_name, + ip_data.threat_level_best, + ip_data.model_name_best, ip_data.country_code, ip_data.asn_number, ip_data.asn_org, ip_data.hit_velocity, ip_data.hits, ip_data.asn_label, - ar.label AS asn_rep_label + ar.label AS asn_rep_label, + ip_data.anubis_bot_name_best, + ip_data.anubis_bot_action_best, + ip_data.anubis_bot_category_best FROM ( SELECT src_ip, @@ -115,14 +118,17 @@ async def get_detections( groupUniqArray(5)(ja4) AS unique_ja4s, groupUniqArray(5)(host) AS unique_hosts, min(anomaly_score) AS min_score, - argMin(threat_level, anomaly_score) AS threat_level, - argMin(model_name, anomaly_score) AS model_name, + argMin(threat_level, anomaly_score) AS threat_level_best, + argMin(model_name, anomaly_score) AS model_name_best, any(country_code) AS country_code, any(asn_number) AS asn_number, any(asn_org) AS asn_org, max(hit_velocity) AS hit_velocity, sum(hits) AS hits, - any(asn_label) AS asn_label + any(asn_label) AS asn_label, + argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best, + argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best, + argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best FROM ml_detected_anomalies WHERE {where_clause} GROUP BY src_ip @@ -145,8 +151,9 @@ async def get_detections( detections = [] for row in gresult.result_rows: # row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts, - # anomaly_score, threat_level, model_name, country_code, asn_number, asn_org, - # hit_velocity, hits, asn_label, asn_rep_label + # anomaly_score, threat_level_best, model_name_best, country_code, asn_number, + # asn_org, hit_velocity, hits, asn_label, asn_rep_label, + # anubis_bot_name, anubis_bot_action, anubis_bot_category ja4s = list(row[4]) if row[4] else [] hosts = list(row[5]) if row[5] else [] detections.append(Detection( @@ -176,6 +183,9 @@ async def get_detections( last_seen=row[2], unique_ja4s=ja4s, unique_hosts=hosts, + anubis_bot_name=row[16] or "", + anubis_bot_action=row[17] or "", + anubis_bot_category=row[18] or "", )) total_pages = (total + page_size - 1) // page_size @@ -215,7 +225,10 @@ async def get_detections( fuzzing_index, post_ratio, reason, - ar.label AS asn_rep_label + ar.label AS asn_rep_label, + anubis_bot_name, + anubis_bot_action, + anubis_bot_category FROM ml_detected_anomalies LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number) WHERE {where_clause} @@ -267,6 +280,9 @@ async def get_detections( reason=row[19] or "", asn_rep_label=row[20] or "", asn_score=_label_to_score(row[20] or ""), + anubis_bot_name=row[21] or "", + anubis_bot_action=row[22] or "", + anubis_bot_category=row[23] or "", ) for row in result.result_rows ] diff --git a/backend/routes/ml_features.py b/backend/routes/ml_features.py index 2868818..36954d9 100644 --- a/backend/routes/ml_features.py +++ b/backend/routes/ml_features.py @@ -238,43 +238,48 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)): """ try: sql = """ - SELECT ip, ja4, country, asn_name, hits, - head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio + SELECT ip, ja4, country, asn_name, total_hits AS hits, + head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio, + missing_accept_enc_ratio, http_scheme_ratio FROM ( SELECT - replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, - any(ja4) AS ja4, - any(src_country_code) AS country, - any(src_as_name) AS asn_name, - sum(hits) AS hits, - round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio, - round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence, - round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio, - round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio, - round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio + replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, + any(ja4) AS ja4, + any(src_country_code) AS country, + any(src_as_name) AS asn_name, + sum(hits) AS total_hits, + round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio, + round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence, + round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio, + round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio, + round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio, + round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio, + round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio FROM mabase_prod.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip ) WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3 - OR head_ratio > 0.1 OR tls12_ratio > 0.5 - ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio) DESC + OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3 + ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC LIMIT %(limit)s """ result = db.query(sql, {"limit": limit}) items = [] for row in result.result_rows: items.append({ - "ip": str(row[0]), - "ja4": str(row[1] or ""), - "country": str(row[2] or ""), - "asn_name": str(row[3] or ""), - "hits": int(row[4] or 0), - "head_ratio": float(row[5] or 0), - "sec_fetch_absence": float(row[6] or 0), - "tls12_ratio": float(row[7] or 0), - "generic_accept_ratio":float(row[8] or 0), - "http10_ratio": float(row[9] or 0), + "ip": str(row[0]), + "ja4": str(row[1] or ""), + "country": str(row[2] or ""), + "asn_name": str(row[3] or ""), + "hits": int(row[4] or 0), + "head_ratio": float(row[5] or 0), + "sec_fetch_absence": float(row[6] or 0), + "tls12_ratio": float(row[7] or 0), + "generic_accept_ratio": float(row[8] or 0), + "http10_ratio": float(row[9] or 0), + "missing_accept_enc_ratio":float(row[10] or 0), + "http_scheme_ratio": float(row[11] or 0), }) return {"items": items, "total": len(items)} except Exception as e: