feat: browser JA4 detection, Anubis bot rules, worldwide ASN data

- Add generate_browser_ja4.py: 1,186 browser JA4 fingerprints from FoxIO + ja4db.com
  covering 11 families (Chromium, Firefox, Safari, Edge, Tor, Opera, Vivaldi...)
- Rewrite generate_bot_ip.py: Anubis YAML rules (Google, Bing, Apple, DuckDuck,
  OpenAI, Perplexity bots) + Tor exit nodes + cloud scanner IPs (3,555 entries)
- Rewrite generate_asn_data.py: worldwide iptoasn.com data (78,049 ASNs, 714K CIDRs)
- Add dict_browser_ja4 ClickHouse dictionary + browser_family in AI features views
- Add /api/browsers dashboard endpoint
- Fix CSV quoting for fields containing commas (User-Agent strings)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-08 15:27:37 +02:00
parent b6184e6529
commit 7d09c614c3
15 changed files with 885900 additions and 3151 deletions

View File

@ -545,6 +545,43 @@ async def fingerprints() -> dict[str, Any]:
return {"ja4_stats": [], "bot_ja4": []}
# ---------------------------------------------------------------------------
# GET /api/browsers — Browser family distribution from JA4 fingerprints
# ---------------------------------------------------------------------------
@router.get("/browsers")
async def browsers() -> dict[str, Any]:
"""Browser identification via JA4 TLS fingerprint → browser_family dictionary."""
try:
distribution = query(
f"SELECT browser_family, count() AS sessions, "
f"uniqExact(src_ip) AS unique_ips, sum(hits) AS total_hits "
f"FROM {_DB}.view_ai_features_1h "
"WHERE browser_family != '' "
"GROUP BY browser_family ORDER BY sessions DESC"
)
# Also get unknown (no browser match)
unknown = query_scalar(
f"SELECT count() FROM {_DB}.view_ai_features_1h "
"WHERE browser_family = '' AND bot_name = ''"
)
# Top JA4 per browser family
top_ja4 = query(
f"SELECT browser_family, ja4, count() AS sessions "
f"FROM {_DB}.view_ai_features_1h "
"WHERE browser_family != '' "
"GROUP BY browser_family, ja4 ORDER BY browser_family, sessions DESC "
"LIMIT 50"
)
return {
"distribution": distribution,
"unknown_sessions": unknown or 0,
"top_ja4_by_browser": top_ja4,
}
except Exception as exc:
logger.exception("browsers query failed")
return {"distribution": [], "unknown_sessions": 0, "top_ja4_by_browser": []}
# ---------------------------------------------------------------------------
# GET /api/behavior — Feature scatter + distributions
# ---------------------------------------------------------------------------