feat: browser JA4 detection, Anubis bot rules, worldwide ASN data
- Add generate_browser_ja4.py: 1,186 browser JA4 fingerprints from FoxIO + ja4db.com covering 11 families (Chromium, Firefox, Safari, Edge, Tor, Opera, Vivaldi...) - Rewrite generate_bot_ip.py: Anubis YAML rules (Google, Bing, Apple, DuckDuck, OpenAI, Perplexity bots) + Tor exit nodes + cloud scanner IPs (3,555 entries) - Rewrite generate_asn_data.py: worldwide iptoasn.com data (78,049 ASNs, 714K CIDRs) - Add dict_browser_ja4 ClickHouse dictionary + browser_family in AI features views - Add /api/browsers dashboard endpoint - Fix CSV quoting for fields containing commas (User-Agent strings) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -18,6 +18,7 @@
|
||||
DROP TABLE IF EXISTS ja4_processing.ml_all_scores;
|
||||
DROP DICTIONARY IF EXISTS ja4_processing.dict_bot_ip;
|
||||
DROP DICTIONARY IF EXISTS ja4_processing.dict_bot_ja4;
|
||||
DROP DICTIONARY IF EXISTS ja4_processing.dict_browser_ja4;
|
||||
DROP DICTIONARY IF EXISTS ja4_processing.dict_asn_reputation;
|
||||
DROP TABLE IF EXISTS ja4_processing.ml_detected_anomalies;
|
||||
DROP VIEW IF EXISTS ja4_processing.view_ip_recurrence;
|
||||
@ -58,6 +59,10 @@ CREATE DICTIONARY ja4_processing.dict_asn_reputation (src_asn UInt64, label Stri
|
||||
PRIMARY KEY src_asn SOURCE(FILE(path '/var/lib/clickhouse/user_files/asn_reputation.csv' format 'CSV'))
|
||||
LAYOUT(HASHED()) LIFETIME(MIN 300 MAX 300);
|
||||
|
||||
CREATE DICTIONARY ja4_processing.dict_browser_ja4 (ja4 String, browser_family String, tls_library String, context String)
|
||||
PRIMARY KEY ja4 SOURCE(FILE(path '/var/lib/clickhouse/user_files/browser_ja4.csv' format 'CSV'))
|
||||
LAYOUT(COMPLEX_KEY_HASHED()) LIFETIME(MIN 300 MAX 300);
|
||||
|
||||
-- ============================================================================
|
||||
-- 3. TABLE D'AGRÉGATION COMPORTEMENTALE (L4 / L5 / L7)
|
||||
-- ============================================================================
|
||||
@ -291,6 +296,7 @@ WITH base_data AS (
|
||||
nullIf(dictGetOrDefault('ja4_processing.dict_bot_ja4', 'bot_name', tuple(a.ja4), ''), ''),
|
||||
''
|
||||
) AS bot_name,
|
||||
dictGetOrDefault('ja4_processing.dict_browser_ja4', 'browser_family', tuple(a.ja4), '') AS browser_family,
|
||||
a.hits AS hits,
|
||||
sum(a.hits) OVER (PARTITION BY a.src_ip) AS total_ip_hits,
|
||||
a.correlated AS correlated, a.tcp_jitter_variance AS tcp_jitter_variance,
|
||||
@ -437,6 +443,7 @@ WITH base_data AS (
|
||||
nullIf(dictGetOrDefault('ja4_processing.dict_bot_ja4', 'bot_name', tuple(a.ja4), ''), ''),
|
||||
''
|
||||
) AS bot_name,
|
||||
dictGetOrDefault('ja4_processing.dict_browser_ja4', 'browser_family', tuple(a.ja4), '') AS browser_family,
|
||||
a.hits AS hits,
|
||||
sum(a.hits) OVER (PARTITION BY a.src_ip) AS total_ip_hits,
|
||||
a.correlated AS correlated, a.tcp_jitter_variance AS tcp_jitter_variance,
|
||||
|
||||
@ -545,6 +545,43 @@ async def fingerprints() -> dict[str, Any]:
|
||||
return {"ja4_stats": [], "bot_ja4": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/browsers — Browser family distribution from JA4 fingerprints
|
||||
# ---------------------------------------------------------------------------
|
||||
@router.get("/browsers")
|
||||
async def browsers() -> dict[str, Any]:
|
||||
"""Browser identification via JA4 TLS fingerprint → browser_family dictionary."""
|
||||
try:
|
||||
distribution = query(
|
||||
f"SELECT browser_family, count() AS sessions, "
|
||||
f"uniqExact(src_ip) AS unique_ips, sum(hits) AS total_hits "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE browser_family != '' "
|
||||
"GROUP BY browser_family ORDER BY sessions DESC"
|
||||
)
|
||||
# Also get unknown (no browser match)
|
||||
unknown = query_scalar(
|
||||
f"SELECT count() FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE browser_family = '' AND bot_name = ''"
|
||||
)
|
||||
# Top JA4 per browser family
|
||||
top_ja4 = query(
|
||||
f"SELECT browser_family, ja4, count() AS sessions "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE browser_family != '' "
|
||||
"GROUP BY browser_family, ja4 ORDER BY browser_family, sessions DESC "
|
||||
"LIMIT 50"
|
||||
)
|
||||
return {
|
||||
"distribution": distribution,
|
||||
"unknown_sessions": unknown or 0,
|
||||
"top_ja4_by_browser": top_ja4,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("browsers query failed")
|
||||
return {"distribution": [], "unknown_sessions": 0, "top_ja4_by_browser": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/behavior — Feature scatter + distributions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user