feat: browser JA4 detection, Anubis bot rules, worldwide ASN data
- Add generate_browser_ja4.py: 1,186 browser JA4 fingerprints from FoxIO + ja4db.com covering 11 families (Chromium, Firefox, Safari, Edge, Tor, Opera, Vivaldi...) - Rewrite generate_bot_ip.py: Anubis YAML rules (Google, Bing, Apple, DuckDuck, OpenAI, Perplexity bots) + Tor exit nodes + cloud scanner IPs (3,555 entries) - Rewrite generate_asn_data.py: worldwide iptoasn.com data (78,049 ASNs, 714K CIDRs) - Add dict_browser_ja4 ClickHouse dictionary + browser_family in AI features views - Add /api/browsers dashboard endpoint - Fix CSV quoting for fields containing commas (User-Agent strings) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -40,6 +40,19 @@ SOURCE(FILE(path '/var/lib/clickhouse/user_files/asn_reputation.csv' format 'CSV
|
||||
LAYOUT(HASHED())
|
||||
LIFETIME(MIN 300 MAX 300);
|
||||
|
||||
DROP DICTIONARY IF EXISTS ja4_processing.dict_browser_ja4;
|
||||
CREATE DICTIONARY ja4_processing.dict_browser_ja4
|
||||
(
|
||||
ja4 String,
|
||||
browser_family String,
|
||||
tls_library String,
|
||||
context String
|
||||
)
|
||||
PRIMARY KEY ja4
|
||||
SOURCE(FILE(path '/var/lib/clickhouse/user_files/browser_ja4.csv' format 'CSV'))
|
||||
LAYOUT(COMPLEX_KEY_HASHED())
|
||||
LIFETIME(MIN 300 MAX 300);
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- agg_host_ip_ja4_1h — behavioral aggregation (L4/L5/L7)
|
||||
|
||||
@ -17,6 +17,7 @@ WITH base_data AS (
|
||||
nullIf(dictGetOrDefault('ja4_processing.dict_bot_ja4', 'bot_name', tuple(a.ja4), ''), ''),
|
||||
''
|
||||
) AS bot_name,
|
||||
dictGetOrDefault('ja4_processing.dict_browser_ja4', 'browser_family', tuple(a.ja4), '') AS browser_family,
|
||||
-- Anubis: combined UA+IP priority logic > UA only > IP only > ASN > Country
|
||||
CASE
|
||||
WHEN dictGet('ja4_processing.dict_anubis_ua', 'has_ip', a.first_ua) = '1'
|
||||
|
||||
Reference in New Issue
Block a user