refactor: replace hardcoded mabase_prod DB prefix with configurable settings

Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route
SQL queries with configurable database names from settings:

- http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS
- All other tables → settings.CLICKHOUSE_DB_PROCESSING

Also qualify previously unqualified table references (bare FROM/JOIN
table_name) with the appropriate database prefix for consistency.

Each route file now imports 'from ..config import settings' and uses
f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or
{settings.CLICKHOUSE_DB_LOGS} for database-qualified table names.

Files updated: analysis, attributes, audit, botnets, bruteforce,
clustering, detections, entities, fingerprints, header_fingerprint,
heatmap, incidents, investigation_summary, metrics, ml_features,
rotation, search, tcp_spoofing, variability (19 files).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 19:03:05 +02:00
parent dba2676fa7
commit b6391afbeb
19 changed files with 225 additions and 206 deletions

View File

@ -8,6 +8,7 @@ from ..models import (
VariabilityResponse, VariabilityAttributes, AttributeValue, Insight,
UserAgentsResponse, UserAgentValue
)
from ..config import settings
router = APIRouter(prefix="/api/variability", tags=["variability"])
@ -45,7 +46,7 @@ async def get_associated_ips(
query = f"""
SELECT src_ip, count() AS hit_count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
@ -64,7 +65,7 @@ async def get_associated_ips(
# Compter le total
count_query = f"""
SELECT uniq(src_ip) AS total
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -138,7 +139,7 @@ async def get_associated_attributes(
# country/asn/host: pivot via ml_detected_anomalies
ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)"""
ua_q = f"""
@ -160,7 +161,7 @@ async def get_associated_attributes(
{target_column} AS value,
count() AS count,
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND {target_column} != '' AND {target_column} IS NOT NULL
AND detected_at >= now() - INTERVAL 24 HOUR
@ -183,7 +184,7 @@ async def get_associated_attributes(
# Compter le total
count_query = f"""
SELECT uniq({target_column}) AS total
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND {target_column} != '' AND {target_column} IS NOT NULL
AND detected_at >= now() - INTERVAL 24 HOUR
@ -247,7 +248,7 @@ async def get_user_agents(
ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type]
where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {ml_col} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
)"""
@ -467,7 +468,7 @@ async def get_variability(attr_type: str, value: str):
threat_level,
model_name,
anomaly_score
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
)
@ -480,7 +481,7 @@ async def get_variability(attr_type: str, value: str):
uniq(src_ip) AS unique_ips,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -506,12 +507,12 @@ async def get_variability(attr_type: str, value: str):
header_user_agent AS user_agent,
count() AS count,
round(count() * 100.0 / (
SELECT count() FROM mabase_prod.http_logs
SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
), 2) AS percentage,
min(time) AS first_seen,
max(time) AS last_seen
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where}
AND time >= now() - INTERVAL 24 HOUR
AND header_user_agent != '' AND header_user_agent IS NOT NULL
@ -527,12 +528,12 @@ async def get_variability(attr_type: str, value: str):
header_user_agent AS user_agent,
count() AS count,
round(count() * 100.0 / (
SELECT count() FROM mabase_prod.http_logs
SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
), 2) AS percentage,
min(time) AS first_seen,
max(time) AS last_seen
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where}
AND time >= now() - INTERVAL 24 HOUR
AND header_user_agent != '' AND header_user_agent IS NOT NULL
@ -545,7 +546,7 @@ async def get_variability(attr_type: str, value: str):
# country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA
_ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)"""
ua_query_simple = f"""