feat: split ClickHouse into dual configurable databases (ja4_logs / ja4_processing)

Architecture:
- ja4_logs: raw log ingestion (http_logs_raw, http_logs, mv_http_logs)
- ja4_processing: analytics, aggregation, ML, dictionaries, audit

Configuration (env vars):
- CLICKHOUSE_DB_LOGS (default: ja4_logs)
- CLICKHOUSE_DB_PROCESSING (default: ja4_processing)

Changes:
- SQL migrations (10 files): all mabase_prod refs → ja4_logs or ja4_processing
  with correct cross-database references (MVs, views, dicts)
- deploy_schema.sh: substitutes DB names from env vars at deploy time
- Python shared settings: added CLICKHOUSE_DB_LOGS + CLICKHOUSE_DB_PROCESSING
- Dashboard routes (19 files): replaced ~80 hardcoded mabase_prod refs
  with settings.CLICKHOUSE_DB_LOGS / settings.CLICKHOUSE_DB_PROCESSING
- Bot-detector: DB → CLICKHOUSE_DB_PROCESSING, fetch_rules.py configurable
- Correlator: DSN example updated to ja4_logs
- Docker-compose + .env files: new env vars with defaults
- All documentation updated (14 markdown files)

All tests pass: sentinel 10/10, correlator 67.1%, bot-detector 11, dashboard 20, ja4_common 18

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 19:10:35 +02:00
parent b6391afbeb
commit 9f3e0621e5
46 changed files with 638 additions and 549 deletions

View File

@ -13,7 +13,7 @@ Usage (depuis le container dashboard_web) :
python /tmp/fetch_rules.py
Variables d'environnement :
CLICKHOUSE_HOST, CLICKHOUSE_DB, CLICKHOUSE_USER, CLICKHOUSE_PASSWORD
CLICKHOUSE_HOST, CLICKHOUSE_DB, CLICKHOUSE_DB_PROCESSING, CLICKHOUSE_USER, CLICKHOUSE_PASSWORD
"""
import json
@ -336,17 +336,20 @@ def collect_all_rules() -> tuple[list, list, list, list]:
def get_ch_client():
return clickhouse_connect.get_client(
host=os.environ.get("CLICKHOUSE_HOST", "clickhouse"),
database=os.environ.get("CLICKHOUSE_DB", "mabase_prod"),
database=os.environ.get("CLICKHOUSE_DB_PROCESSING", os.environ.get("CLICKHOUSE_DB", "ja4_processing")),
username=os.environ.get("CLICKHOUSE_USER", "admin"),
password=os.environ.get("CLICKHOUSE_PASSWORD", ""),
)
DB_PROC = os.environ.get("CLICKHOUSE_DB_PROCESSING", os.environ.get("CLICKHOUSE_DB", "ja4_processing"))
def insert_ua_rules(client, rules: list[dict]) -> None:
if not rules:
print("[INFO] Aucune règle UA.")
return
client.command("TRUNCATE TABLE mabase_prod.anubis_ua_rules")
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_ua_rules")
# REGEXP_TREE format : id, parent_id, regexp, keys[], values[]
# keys = ['bot_name', 'action', 'has_ip', 'rule_id', 'category']
data = [
@ -357,7 +360,7 @@ def insert_ua_rules(client, rules: list[dict]) -> None:
]
for r in rules
]
client.insert("mabase_prod.anubis_ua_rules", data,
client.insert(f"{DB_PROC}.anubis_ua_rules", data,
column_names=["id", "parent_id", "regexp", "keys", "values"])
print(f"[OK] {len(rules)} règles UA insérées.")
@ -366,13 +369,13 @@ def insert_ip_rules(client, rules: list[dict]) -> None:
if not rules:
print("[INFO] Aucune règle IP.")
return
client.command("TRUNCATE TABLE mabase_prod.anubis_ip_rules")
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_ip_rules")
data = [
[r["prefix"], r["bot_name"], r["action"],
r["rule_id"], r["has_ua"], r["category"]]
for r in rules
]
client.insert("mabase_prod.anubis_ip_rules", data,
client.insert(f"{DB_PROC}.anubis_ip_rules", data,
column_names=["prefix", "bot_name", "action", "rule_id", "has_ua", "category"])
print(f"[OK] {len(rules)} règles IP insérées.")
@ -381,9 +384,9 @@ def insert_asn_rules(client, rules: list[dict]) -> None:
if not rules:
print("[INFO] Aucune règle ASN.")
return
client.command("TRUNCATE TABLE mabase_prod.anubis_asn_rules")
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_asn_rules")
data = [[r["asn"], r["bot_name"], r["action"], r["category"]] for r in rules]
client.insert("mabase_prod.anubis_asn_rules", data,
client.insert(f"{DB_PROC}.anubis_asn_rules", data,
column_names=["asn", "bot_name", "action", "category"])
print(f"[OK] {len(rules)} règles ASN insérées.")
@ -392,19 +395,19 @@ def insert_country_rules(client, rules: list[dict]) -> None:
if not rules:
print("[INFO] Aucune règle pays.")
return
client.command("TRUNCATE TABLE mabase_prod.anubis_country_rules")
client.command(f"TRUNCATE TABLE {DB_PROC}.anubis_country_rules")
data = [[r["country_code"], r["bot_name"], r["action"], r["category"]] for r in rules]
client.insert("mabase_prod.anubis_country_rules", data,
client.insert(f"{DB_PROC}.anubis_country_rules", data,
column_names=["country_code", "bot_name", "action", "category"])
print(f"[OK] {len(rules)} règles pays insérées.")
def reload_dicts(client) -> None:
dicts = [
"mabase_prod.dict_anubis_ua",
"mabase_prod.dict_anubis_ip",
"mabase_prod.dict_anubis_asn",
"mabase_prod.dict_anubis_country",
f"{DB_PROC}.dict_anubis_ua",
f"{DB_PROC}.dict_anubis_ip",
f"{DB_PROC}.dict_anubis_asn",
f"{DB_PROC}.dict_anubis_country",
]
for d in dicts:
try: