feat: LEGITIMATE_BROWSER classification from JA4 + behavioral consistency
Add browser legitimacy classification (A9) to the bot detection pipeline: - New features: is_known_browser (binary) and browser_consistency_score [0..5] combining 5 signals: JA4 browser match, modern_browser_score, Accept-Language, cookies, Sec-Fetch-* presence - Post-scoring: sessions with known browser JA4 + consistency >= 4/5 + NORMAL/LOW threat level are reclassified as LEGITIMATE_BROWSER - Spoofing detection: inconsistent behavior (known JA4 but low consistency) stays in normal anomaly scoring — prevents evasion via JA4 spoofing - XGBoost treats LEGITIMATE_BROWSER as non-threat (negative label) - ClickHouse: browser_family column added to ml_detected_anomalies and ml_all_scores - Dashboard: browser_family filter/sort on detections and scores endpoints, legitimate_browsers count and browser_stats in overview - 6 new unit tests covering classification threshold, spoofing, exclusion logic Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -26,13 +26,13 @@ _DETECTION_SORT_COLS = {
|
||||
"detected_at", "src_ip", "ja4", "host", "anomaly_score",
|
||||
"threat_level", "recurrence", "hits", "hit_velocity",
|
||||
"fuzzing_index", "post_ratio", "campaign_id",
|
||||
"asn_org", "country_code", "bot_name",
|
||||
"asn_org", "country_code", "bot_name", "browser_family",
|
||||
}
|
||||
_SCORE_SORT_COLS = {
|
||||
"detected_at", "window_start", "src_ip", "ja4", "host",
|
||||
"anomaly_score", "raw_anomaly_score", "threat_level",
|
||||
"hits", "hit_velocity", "xgb_prob", "ae_recon_error",
|
||||
"asn_org", "country_code",
|
||||
"asn_org", "country_code", "browser_family",
|
||||
}
|
||||
_TRAFFIC_SORT_COLS = {
|
||||
"time", "src_ip", "method", "host", "path", "http_version",
|
||||
@ -111,6 +111,20 @@ async def overview() -> dict[str, Any]:
|
||||
"GROUP BY model_name"
|
||||
)
|
||||
|
||||
browser_stats = query(
|
||||
f"SELECT browser_family, count() AS cnt "
|
||||
f"FROM {_DB}.ml_all_scores "
|
||||
"WHERE detected_at >= now() - INTERVAL 1 DAY "
|
||||
"AND browser_family != '' "
|
||||
"GROUP BY browser_family ORDER BY cnt DESC"
|
||||
)
|
||||
|
||||
legitimate_browsers = query_scalar(
|
||||
f"SELECT count() FROM {_DB}.ml_all_scores "
|
||||
"WHERE detected_at >= now() - INTERVAL 1 DAY "
|
||||
"AND threat_level = 'LEGITIMATE_BROWSER'"
|
||||
) or 0
|
||||
|
||||
return {
|
||||
"detections_24h": detections_24h,
|
||||
"scored_24h": scored_24h,
|
||||
@ -118,6 +132,8 @@ async def overview() -> dict[str, Any]:
|
||||
"unique_ips": unique_ips,
|
||||
"critical_count": critical_count,
|
||||
"high_count": high_count,
|
||||
"legitimate_browsers": legitimate_browsers,
|
||||
"browser_stats": browser_stats,
|
||||
"threat_distribution": threat_distribution,
|
||||
"top_ips": top_ips,
|
||||
"timeline": [{"hour": str(r["hour"]), "cnt": r["cnt"]} for r in timeline],
|
||||
@ -143,6 +159,7 @@ async def detections(
|
||||
country_code: str | None = Query(None),
|
||||
ja4: str | None = Query(None),
|
||||
bot_name: str | None = Query(None),
|
||||
browser_family: str | None = Query(None),
|
||||
) -> dict[str, Any]:
|
||||
sort = _validate_sort(sort, _DETECTION_SORT_COLS, "detected_at")
|
||||
order = _validate_order(order)
|
||||
@ -177,6 +194,10 @@ async def detections(
|
||||
where_clauses.append("bot_name = {bn:String}")
|
||||
params["bn"] = bot_name
|
||||
|
||||
if browser_family:
|
||||
where_clauses.append("browser_family = {bf:String}")
|
||||
params["bf"] = browser_family
|
||||
|
||||
where = " AND ".join(where_clauses)
|
||||
|
||||
try:
|
||||
@ -219,6 +240,7 @@ async def scores(
|
||||
asn_org: str | None = Query(None),
|
||||
country_code: str | None = Query(None),
|
||||
ja4: str | None = Query(None),
|
||||
browser_family: str | None = Query(None),
|
||||
) -> dict[str, Any]:
|
||||
sort = _validate_sort(sort, _SCORE_SORT_COLS, "detected_at")
|
||||
order = _validate_order(order)
|
||||
@ -249,6 +271,10 @@ async def scores(
|
||||
where_clauses.append("ja4 = {ja4:String}")
|
||||
params["ja4"] = ja4
|
||||
|
||||
if browser_family:
|
||||
where_clauses.append("browser_family = {bf:String}")
|
||||
params["bf"] = browser_family
|
||||
|
||||
where = " AND ".join(where_clauses)
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user