-- ============================================================================= -- 03_anubis_tables.sql — Anubis crawler rule tables and dictionaries -- Only IP/CIDR and ASN rules are used. UA and Country have been removed. -- ============================================================================= -- ----------------------------------------------------------------------------- -- 1. TABLE SOURCE — IP/CIDR rules (for IP_TRIE dictionary) -- Populated by fetch_rules.py from Anubis GitHub data. -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS ja4_processing.anubis_ip_rules ( prefix String, bot_name LowCardinality(String), action LowCardinality(String), rule_id UInt64, has_ua UInt8, category LowCardinality(String) ) ENGINE = ReplacingMergeTree() ORDER BY prefix; -- ----------------------------------------------------------------------------- -- 3. TABLE SOURCE — ASN rules (for Flat dictionary) -- Populated by fetch_rules.py from botPolicies.yaml. -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS ja4_processing.anubis_asn_rules ( asn UInt32, bot_name LowCardinality(String), action LowCardinality(String), category LowCardinality(String) ) ENGINE = ReplacingMergeTree() ORDER BY asn; -- ----------------------------------------------------------------------------- -- 4. DICTIONARY — IP IP_TRIE (active) -- dictGetOrDefault('ja4_processing.dict_anubis_ip', 'bot_name', toIPv6(src_ip), '') -- ----------------------------------------------------------------------------- DROP DICTIONARY IF EXISTS ja4_processing.dict_anubis_ip; CREATE DICTIONARY ja4_processing.dict_anubis_ip ( prefix String, bot_name String, action String, rule_id UInt64, has_ua UInt8, category String ) PRIMARY KEY prefix SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'ja4_processing' TABLE 'anubis_ip_rules')) LAYOUT(IP_TRIE()) LIFETIME(MIN 300 MAX 600); -- ----------------------------------------------------------------------------- -- 5. DICTIONARY — ASN Flat (active) -- dictGetOrDefault('ja4_processing.dict_anubis_asn', 'bot_name', src_asn, '') -- ----------------------------------------------------------------------------- DROP DICTIONARY IF EXISTS ja4_processing.dict_anubis_asn; CREATE DICTIONARY ja4_processing.dict_anubis_asn ( asn UInt32, bot_name String, action String, category String ) PRIMARY KEY asn SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'ja4_processing' TABLE 'anubis_asn_rules')) LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 600);