-- ============================================================================= -- 03_anubis_tables.sql — Anubis crawler rule tables and dictionaries -- Items 1–8 from bot_detector/anubis/deploy_schema.sql -- ============================================================================= -- ----------------------------------------------------------------------------- -- 1. TABLE SOURCE — User-Agent rules (for REGEXP_TREE dictionary) -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS mabase_prod.anubis_ua_rules ( id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String) ) ENGINE = ReplacingMergeTree() ORDER BY id; -- ----------------------------------------------------------------------------- -- 2. TABLE SOURCE — IP/CIDR rules (for IP_TRIE dictionary) -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS mabase_prod.anubis_ip_rules ( prefix String, bot_name LowCardinality(String), action LowCardinality(String), rule_id UInt64, has_ua UInt8, category LowCardinality(String) ) ENGINE = ReplacingMergeTree() ORDER BY prefix; -- ----------------------------------------------------------------------------- -- 3. DICTIONARY — UA REGEXP_TREE -- dictGet('mabase_prod.dict_anubis_ua', 'bot_name', header_user_agent) -- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use. -- ----------------------------------------------------------------------------- DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_ua; CREATE DICTIONARY mabase_prod.dict_anubis_ua ( regexp String, bot_name String, action String ) PRIMARY KEY regexp SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_ua_rules')) LAYOUT(REGEXP_TREE) LIFETIME(MIN 300 MAX 600); -- ----------------------------------------------------------------------------- -- 4. DICTIONARY — IP IP_TRIE -- dictGetOrDefault('mabase_prod.dict_anubis_ip', 'bot_name', toIPv6(src_ip), '') -- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use. -- ----------------------------------------------------------------------------- DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_ip; CREATE DICTIONARY mabase_prod.dict_anubis_ip ( prefix String, bot_name String, action String, rule_id UInt64, has_ua UInt8, category String ) PRIMARY KEY prefix SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_ip_rules')) LAYOUT(IP_TRIE()) LIFETIME(MIN 300 MAX 600); -- ----------------------------------------------------------------------------- -- 5. TABLE SOURCE — ASN rules (for Flat dictionary) -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS mabase_prod.anubis_asn_rules ( asn UInt32, bot_name LowCardinality(String), action LowCardinality(String), category LowCardinality(String) ) ENGINE = ReplacingMergeTree() ORDER BY asn; -- ----------------------------------------------------------------------------- -- 6. TABLE SOURCE — Country rules (for Flat dictionary) -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS mabase_prod.anubis_country_rules ( country_code LowCardinality(String), bot_name LowCardinality(String), action LowCardinality(String), category LowCardinality(String) ) ENGINE = ReplacingMergeTree() ORDER BY country_code; -- ----------------------------------------------------------------------------- -- 7. DICTIONARY — ASN Flat -- dictGetOrDefault('mabase_prod.dict_anubis_asn', 'bot_name', src_asn, '') -- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use. -- ----------------------------------------------------------------------------- DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_asn; CREATE DICTIONARY mabase_prod.dict_anubis_asn ( asn UInt32, bot_name String, action String, category String ) PRIMARY KEY asn SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_asn_rules')) LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 600); -- ----------------------------------------------------------------------------- -- 8. DICTIONARY — Country Flat -- dictGetOrDefault('mabase_prod.dict_anubis_country', 'bot_name', src_country_code, '') -- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use. -- ----------------------------------------------------------------------------- DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_country; CREATE DICTIONARY mabase_prod.dict_anubis_country ( country_code String, bot_name String, action String, category String ) PRIMARY KEY country_code SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_country_rules')) LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 600);