feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions

View File

@ -0,0 +1,139 @@
-- =============================================================================
-- 03_anubis_tables.sql — Anubis crawler rule tables and dictionaries
-- Items 18 from bot_detector/anubis/deploy_schema.sql
-- =============================================================================
-- -----------------------------------------------------------------------------
-- 1. TABLE SOURCE — User-Agent rules (for REGEXP_TREE dictionary)
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.anubis_ua_rules
(
id UInt64,
parent_id UInt64,
regexp String,
keys Array(String),
values Array(String)
)
ENGINE = ReplacingMergeTree()
ORDER BY id;
-- -----------------------------------------------------------------------------
-- 2. TABLE SOURCE — IP/CIDR rules (for IP_TRIE dictionary)
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.anubis_ip_rules
(
prefix String,
bot_name LowCardinality(String),
action LowCardinality(String),
rule_id UInt64,
has_ua UInt8,
category LowCardinality(String)
)
ENGINE = ReplacingMergeTree()
ORDER BY prefix;
-- -----------------------------------------------------------------------------
-- 3. DICTIONARY — UA REGEXP_TREE
-- dictGet('mabase_prod.dict_anubis_ua', 'bot_name', header_user_agent)
-- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use.
-- -----------------------------------------------------------------------------
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_ua;
CREATE DICTIONARY mabase_prod.dict_anubis_ua
(
regexp String,
bot_name String,
action String
)
PRIMARY KEY regexp
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_ua_rules'))
LAYOUT(REGEXP_TREE)
LIFETIME(MIN 300 MAX 600);
-- -----------------------------------------------------------------------------
-- 4. DICTIONARY — IP IP_TRIE
-- dictGetOrDefault('mabase_prod.dict_anubis_ip', 'bot_name', toIPv6(src_ip), '')
-- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use.
-- -----------------------------------------------------------------------------
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_ip;
CREATE DICTIONARY mabase_prod.dict_anubis_ip
(
prefix String,
bot_name String,
action String,
rule_id UInt64,
has_ua UInt8,
category String
)
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_ip_rules'))
LAYOUT(IP_TRIE())
LIFETIME(MIN 300 MAX 600);
-- -----------------------------------------------------------------------------
-- 5. TABLE SOURCE — ASN rules (for Flat dictionary)
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.anubis_asn_rules
(
asn UInt32,
bot_name LowCardinality(String),
action LowCardinality(String),
category LowCardinality(String)
)
ENGINE = ReplacingMergeTree()
ORDER BY asn;
-- -----------------------------------------------------------------------------
-- 6. TABLE SOURCE — Country rules (for Flat dictionary)
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.anubis_country_rules
(
country_code LowCardinality(String),
bot_name LowCardinality(String),
action LowCardinality(String),
category LowCardinality(String)
)
ENGINE = ReplacingMergeTree()
ORDER BY country_code;
-- -----------------------------------------------------------------------------
-- 7. DICTIONARY — ASN Flat
-- dictGetOrDefault('mabase_prod.dict_anubis_asn', 'bot_name', src_asn, '')
-- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use.
-- -----------------------------------------------------------------------------
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_asn;
CREATE DICTIONARY mabase_prod.dict_anubis_asn
(
asn UInt32,
bot_name String,
action String,
category String
)
PRIMARY KEY asn
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_asn_rules'))
LAYOUT(FLAT())
LIFETIME(MIN 300 MAX 600);
-- -----------------------------------------------------------------------------
-- 8. DICTIONARY — Country Flat
-- dictGetOrDefault('mabase_prod.dict_anubis_country', 'bot_name', src_country_code, '')
-- NOTE: Change 'CHANGE_ME' to the actual ClickHouse admin password before use.
-- -----------------------------------------------------------------------------
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_country;
CREATE DICTIONARY mabase_prod.dict_anubis_country
(
country_code String,
bot_name String,
action String,
category String
)
PRIMARY KEY country_code
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'admin' PASSWORD 'CHANGE_ME' DB 'mabase_prod' TABLE 'anubis_country_rules'))
LAYOUT(FLAT())
LIFETIME(MIN 300 MAX 600);