refactor: replace hardcoded mabase_prod DB prefix with configurable settings

Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route
SQL queries with configurable database names from settings:

- http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS
- All other tables → settings.CLICKHOUSE_DB_PROCESSING

Also qualify previously unqualified table references (bare FROM/JOIN
table_name) with the appropriate database prefix for consistency.

Each route file now imports 'from ..config import settings' and uses
f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or
{settings.CLICKHOUSE_DB_LOGS} for database-qualified table names.

Files updated: analysis, attributes, audit, botnets, bruteforce,
clustering, detections, entities, fingerprints, header_fingerprint,
heatmap, incidents, investigation_summary, metrics, ml_features,
rotation, search, tcp_spoofing, variability (19 files).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 19:03:05 +02:00
parent dba2676fa7
commit b6391afbeb
19 changed files with 225 additions and 206 deletions

View File

@ -13,6 +13,7 @@ from fastapi import APIRouter, HTTPException, Query
import re
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"])
@ -68,7 +69,7 @@ async def get_ja4_spoofing(
"""
try:
# Agrégation par JA4 avec tous les indicateurs de spoofing
query = """
query = f"""
SELECT
ja4,
count() AS total_detections,
@ -102,7 +103,7 @@ async def get_ja4_spoofing(
avg(ja4_country_concentration) AS avg_country_concentration,
argMax(threat_level, detected_at) AS last_threat_level
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4
@ -254,7 +255,7 @@ async def get_ja4_ua_matrix(
"""
try:
# Stats JA4 depuis ml_detected_anomalies
stats_query = """
stats_query = f"""
SELECT
ja4,
uniq(src_ip) AS unique_ips,
@ -264,7 +265,7 @@ async def get_ja4_ua_matrix(
countIf(is_rare_ja4 = true) AS rare_count,
countIf(is_ua_rotating = true) AS rotating_count,
argMax(threat_level, detected_at) AS last_threat
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4
@ -426,11 +427,11 @@ async def get_ua_analysis(
pass
# IPs avec is_ua_rotating depuis ml_detected_anomalies
rotating_query = """
rotating_query = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
avg(ua_ch_mismatch) AS avg_ua_ch_mismatch
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND is_ua_rotating = true
GROUP BY clean_ip
@ -522,7 +523,7 @@ async def get_ip_fingerprint_coherence(ip: str):
"""
try:
# Données depuis ml_detected_anomalies
ml_query = """
ml_query = f"""
SELECT
ja4,
ua_ch_mismatch,
@ -543,7 +544,7 @@ async def get_ip_fingerprint_coherence(ip: str):
window_mss_ratio,
tcp_jitter_variance,
multiplexing_efficiency
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
"""
@ -681,7 +682,7 @@ async def get_legitimate_ja4(
Utile comme whitelist pour réduire les faux positifs.
"""
try:
query = """
query = f"""
SELECT
ja4,
uniq(src_ip) AS unique_ips,
@ -691,7 +692,7 @@ async def get_legitimate_ja4(
countIf(is_rare_ja4 = true) AS rare_count,
round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct,
round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4
@ -753,7 +754,7 @@ async def get_ja4_asn_correlation(
try:
# Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN,
# then aggregate per ja4 to compute concentration ratio
sql = """
sql = f"""
SELECT
ja4,
sum(ips_per_combo) AS unique_ips,
@ -774,7 +775,7 @@ async def get_ja4_asn_correlation(
uniq(src_ip) AS ips_per_combo,
uniq(src_ip) AS country_ips,
sum(hits) AS total_hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
AND ja4 != ''
GROUP BY ja4, src_asn, src_country_code