Architecture: - ja4_logs: raw log ingestion (http_logs_raw, http_logs, mv_http_logs) - ja4_processing: analytics, aggregation, ML, dictionaries, audit Configuration (env vars): - CLICKHOUSE_DB_LOGS (default: ja4_logs) - CLICKHOUSE_DB_PROCESSING (default: ja4_processing) Changes: - SQL migrations (10 files): all mabase_prod refs → ja4_logs or ja4_processing with correct cross-database references (MVs, views, dicts) - deploy_schema.sh: substitutes DB names from env vars at deploy time - Python shared settings: added CLICKHOUSE_DB_LOGS + CLICKHOUSE_DB_PROCESSING - Dashboard routes (19 files): replaced ~80 hardcoded mabase_prod refs with settings.CLICKHOUSE_DB_LOGS / settings.CLICKHOUSE_DB_PROCESSING - Bot-detector: DB → CLICKHOUSE_DB_PROCESSING, fetch_rules.py configurable - Correlator: DSN example updated to ja4_logs - Docker-compose + .env files: new env vars with defaults - All documentation updated (14 markdown files) All tests pass: sentinel 10/10, correlator 67.1%, bot-detector 11, dashboard 20, ja4_common 18 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
91 lines
4.2 KiB
SQL
91 lines
4.2 KiB
SQL
-- =============================================================================
|
|
-- 06_ml_tables.sql — ML detection results tables
|
|
-- Source: bot_detector/deploy_views.sql sections 6-6b + deploy_schema.sql items 11-12
|
|
-- =============================================================================
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- ml_detected_anomalies — anomaly detections above threat threshold
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS ja4_processing.ml_detected_anomalies
|
|
(
|
|
detected_at DateTime, src_ip IPv6, ja4 String, host String, bot_name String,
|
|
anomaly_score Float32, threat_level String, model_name String, recurrence UInt32,
|
|
asn_number String, asn_org String, asn_detail String, asn_domain String,
|
|
country_code String, asn_label String,
|
|
hits UInt64, hit_velocity Float32, fuzzing_index Float32, post_ratio Float32,
|
|
port_exhaustion_ratio Float32, max_keepalives UInt32, orphan_ratio Float32,
|
|
tcp_jitter_variance Float32, tcp_shared_count UInt32, true_window_size UInt64,
|
|
window_mss_ratio Float32, alpn_http_mismatch UInt8, is_alpn_missing UInt8,
|
|
sni_host_mismatch UInt8, header_count UInt16, has_accept_language UInt8,
|
|
has_cookie UInt8, has_referer UInt8, modern_browser_score UInt8, is_headless UInt8,
|
|
ua_ch_mismatch UInt8, header_order_shared_count UInt32, ip_id_zero_ratio Float32,
|
|
request_size_variance Float32, multiplexing_efficiency Float32,
|
|
mss_mobile_mismatch UInt8, correlated UInt8, reason String,
|
|
asset_ratio Float32, direct_access_ratio Float32, is_ua_rotating UInt8,
|
|
distinct_ja4_count UInt32, src_port_density Float32, ja4_asn_concentration Float32,
|
|
ja4_country_concentration Float32, is_rare_ja4 UInt8, header_order_confidence Float32,
|
|
distinct_header_orders UInt32, temporal_entropy Float32, path_diversity_ratio Float32,
|
|
url_depth_variance Float32, anomalous_payload_ratio Float32,
|
|
-- v11 additions
|
|
campaign_id Int32 DEFAULT -1,
|
|
raw_anomaly_score Float32 DEFAULT 0,
|
|
-- Anubis enrichment (deploy_schema.sql item 11)
|
|
anubis_bot_name LowCardinality(String) DEFAULT '',
|
|
anubis_bot_action LowCardinality(String) DEFAULT '',
|
|
anubis_bot_category LowCardinality(String) DEFAULT ''
|
|
)
|
|
ENGINE = ReplacingMergeTree(detected_at)
|
|
ORDER BY (src_ip)
|
|
TTL detected_at + INTERVAL 30 DAY;
|
|
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- ml_all_scores — all classifications (no threshold, for observability)
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE TABLE IF NOT EXISTS ja4_processing.ml_all_scores
|
|
(
|
|
detected_at DateTime,
|
|
window_start DateTime,
|
|
src_ip IPv6,
|
|
ja4 String,
|
|
host String,
|
|
bot_name String,
|
|
anomaly_score Float32,
|
|
raw_anomaly_score Float32,
|
|
threat_level String,
|
|
model_name String,
|
|
correlated UInt8,
|
|
asn_number String,
|
|
asn_org String,
|
|
country_code String,
|
|
asn_label String,
|
|
hits UInt64,
|
|
hit_velocity Float32,
|
|
fuzzing_index Float32,
|
|
post_ratio Float32,
|
|
campaign_id Int32,
|
|
-- Anubis enrichment (deploy_schema.sql item 12)
|
|
anubis_bot_name LowCardinality(String) DEFAULT '',
|
|
anubis_bot_action LowCardinality(String) DEFAULT '',
|
|
anubis_bot_category LowCardinality(String) DEFAULT ''
|
|
)
|
|
ENGINE = ReplacingMergeTree(detected_at)
|
|
ORDER BY (window_start, src_ip, ja4, host, model_name)
|
|
TTL window_start + INTERVAL 3 DAY
|
|
SETTINGS index_granularity = 8192;
|
|
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- view_ip_recurrence — recurrence aggregation over ml_detected_anomalies
|
|
-- -----------------------------------------------------------------------------
|
|
CREATE OR REPLACE VIEW ja4_processing.view_ip_recurrence AS
|
|
SELECT
|
|
src_ip,
|
|
count() AS recurrence,
|
|
min(detected_at) AS first_seen,
|
|
max(detected_at) AS last_seen,
|
|
min(anomaly_score) AS worst_score,
|
|
argMin(threat_level, anomaly_score) AS worst_threat_level
|
|
FROM ja4_processing.ml_detected_anomalies
|
|
GROUP BY src_ip;
|