feat: split ClickHouse into dual configurable databases (ja4_logs / ja4_processing)
Architecture: - ja4_logs: raw log ingestion (http_logs_raw, http_logs, mv_http_logs) - ja4_processing: analytics, aggregation, ML, dictionaries, audit Configuration (env vars): - CLICKHOUSE_DB_LOGS (default: ja4_logs) - CLICKHOUSE_DB_PROCESSING (default: ja4_processing) Changes: - SQL migrations (10 files): all mabase_prod refs → ja4_logs or ja4_processing with correct cross-database references (MVs, views, dicts) - deploy_schema.sh: substitutes DB names from env vars at deploy time - Python shared settings: added CLICKHOUSE_DB_LOGS + CLICKHOUSE_DB_PROCESSING - Dashboard routes (19 files): replaced ~80 hardcoded mabase_prod refs with settings.CLICKHOUSE_DB_LOGS / settings.CLICKHOUSE_DB_PROCESSING - Bot-detector: DB → CLICKHOUSE_DB_PROCESSING, fetch_rules.py configurable - Correlator: DSN example updated to ja4_logs - Docker-compose + .env files: new env vars with defaults - All documentation updated (14 markdown files) All tests pass: sentinel 10/10, correlator 67.1%, bot-detector 11, dashboard 20, ja4_common 18 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -13,7 +13,7 @@ Usage (depuis le container dashboard_web) :
|
||||
python /tmp/fetch_rules.py
|
||||
|
||||
Variables d'environnement :
|
||||
CLICKHOUSE_HOST, CLICKHOUSE_DB, CLICKHOUSE_USER, CLICKHOUSE_PASSWORD
|
||||
CLICKHOUSE_HOST, CLICKHOUSE_DB, CLICKHOUSE_DB_PROCESSING, CLICKHOUSE_USER, CLICKHOUSE_PASSWORD
|
||||
"""
|
||||
|
||||
import json
|
||||
@ -336,17 +336,20 @@ def collect_all_rules() -> tuple[list, list, list, list]:
|
||||
def get_ch_client():
|
||||
return clickhouse_connect.get_client(
|
||||
host=os.environ.get("CLICKHOUSE_HOST", "clickhouse"),
|
||||
database=os.environ.get("CLICKHOUSE_DB", "mabase_prod"),
|
||||
database=os.environ.get("CLICKHOUSE_DB_PROCESSING", os.environ.get("CLICKHOUSE_DB", "ja4_processing")),
|
||||
username=os.environ.get("CLICKHOUSE_USER", "admin"),
|
||||
password=os.environ.get("CLICKHOUSE_PASSWORD", ""),
|
||||
)
|
||||
|
||||
|
||||
DB_PROC = os.environ.get("CLICKHOUSE_DB_PROCESSING", os.environ.get("CLICKHOUSE_DB", "ja4_processing"))
|
||||
|
||||
|
||||
def insert_ua_rules(client, rules: list[dict]) -> None:
|
||||
if not rules:
|
||||
print("[INFO] Aucune règle UA.")
|
||||
return
|
||||
client.command("TRUNCATE TABLE mabase_prod.anubis_ua_rules")
|
||||
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_ua_rules")
|
||||
# REGEXP_TREE format : id, parent_id, regexp, keys[], values[]
|
||||
# keys = ['bot_name', 'action', 'has_ip', 'rule_id', 'category']
|
||||
data = [
|
||||
@ -357,7 +360,7 @@ def insert_ua_rules(client, rules: list[dict]) -> None:
|
||||
]
|
||||
for r in rules
|
||||
]
|
||||
client.insert("mabase_prod.anubis_ua_rules", data,
|
||||
client.insert(f"{DB_PROC}.anubis_ua_rules", data,
|
||||
column_names=["id", "parent_id", "regexp", "keys", "values"])
|
||||
print(f"[OK] {len(rules)} règles UA insérées.")
|
||||
|
||||
@ -366,13 +369,13 @@ def insert_ip_rules(client, rules: list[dict]) -> None:
|
||||
if not rules:
|
||||
print("[INFO] Aucune règle IP.")
|
||||
return
|
||||
client.command("TRUNCATE TABLE mabase_prod.anubis_ip_rules")
|
||||
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_ip_rules")
|
||||
data = [
|
||||
[r["prefix"], r["bot_name"], r["action"],
|
||||
r["rule_id"], r["has_ua"], r["category"]]
|
||||
for r in rules
|
||||
]
|
||||
client.insert("mabase_prod.anubis_ip_rules", data,
|
||||
client.insert(f"{DB_PROC}.anubis_ip_rules", data,
|
||||
column_names=["prefix", "bot_name", "action", "rule_id", "has_ua", "category"])
|
||||
print(f"[OK] {len(rules)} règles IP insérées.")
|
||||
|
||||
@ -381,9 +384,9 @@ def insert_asn_rules(client, rules: list[dict]) -> None:
|
||||
if not rules:
|
||||
print("[INFO] Aucune règle ASN.")
|
||||
return
|
||||
client.command("TRUNCATE TABLE mabase_prod.anubis_asn_rules")
|
||||
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_asn_rules")
|
||||
data = [[r["asn"], r["bot_name"], r["action"], r["category"]] for r in rules]
|
||||
client.insert("mabase_prod.anubis_asn_rules", data,
|
||||
client.insert(f"{DB_PROC}.anubis_asn_rules", data,
|
||||
column_names=["asn", "bot_name", "action", "category"])
|
||||
print(f"[OK] {len(rules)} règles ASN insérées.")
|
||||
|
||||
@ -392,19 +395,19 @@ def insert_country_rules(client, rules: list[dict]) -> None:
|
||||
if not rules:
|
||||
print("[INFO] Aucune règle pays.")
|
||||
return
|
||||
client.command("TRUNCATE TABLE mabase_prod.anubis_country_rules")
|
||||
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_country_rules")
|
||||
data = [[r["country_code"], r["bot_name"], r["action"], r["category"]] for r in rules]
|
||||
client.insert("mabase_prod.anubis_country_rules", data,
|
||||
client.insert(f"{DB_PROC}.anubis_country_rules", data,
|
||||
column_names=["country_code", "bot_name", "action", "category"])
|
||||
print(f"[OK] {len(rules)} règles pays insérées.")
|
||||
|
||||
|
||||
def reload_dicts(client) -> None:
|
||||
dicts = [
|
||||
"mabase_prod.dict_anubis_ua",
|
||||
"mabase_prod.dict_anubis_ip",
|
||||
"mabase_prod.dict_anubis_asn",
|
||||
"mabase_prod.dict_anubis_country",
|
||||
f"{DB_PROC}.dict_anubis_ua",
|
||||
f"{DB_PROC}.dict_anubis_ip",
|
||||
f"{DB_PROC}.dict_anubis_asn",
|
||||
f"{DB_PROC}.dict_anubis_country",
|
||||
]
|
||||
for d in dicts:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user