refactor: replace hardcoded mabase_prod DB prefix with configurable settings

Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route
SQL queries with configurable database names from settings:

- http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS
- All other tables → settings.CLICKHOUSE_DB_PROCESSING

Also qualify previously unqualified table references (bare FROM/JOIN
table_name) with the appropriate database prefix for consistency.

Each route file now imports 'from ..config import settings' and uses
f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or
{settings.CLICKHOUSE_DB_LOGS} for database-qualified table names.

Files updated: analysis, attributes, audit, botnets, bruteforce,
clustering, detections, entities, fingerprints, header_fingerprint,
heatmap, incidents, investigation_summary, metrics, ml_features,
rotation, search, tcp_spoofing, variability (19 files).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 19:03:05 +02:00
parent dba2676fa7
commit b6391afbeb
19 changed files with 225 additions and 206 deletions

View File

@ -14,6 +14,7 @@ from ..models import (
ClassificationRecommendation, ClassificationLabel,
ClassificationCreate, Classification, ClassificationsListResponse
)
from ..config import settings
router = APIRouter(prefix="/api/analysis", tags=["analysis"])
@ -42,9 +43,9 @@ async def analyze_subnet(ip: str):
subnet_str = str(subnet)
# Récupérer les infos ASN pour cette IP
asn_query = """
asn_query = f"""
SELECT asn_number, asn_org
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
LIMIT 1
@ -60,9 +61,9 @@ async def analyze_subnet(ip: str):
asn_org = asn_result.result_rows[0][1] or "Unknown"
# IPs du même subnet /24
subnet_ips_query = """
subnet_ips_query = f"""
SELECT DISTINCT src_ip
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s)
AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s)
AND detected_at >= now() - INTERVAL 24 HOUR
@ -78,9 +79,9 @@ async def analyze_subnet(ip: str):
# Total IPs du même ASN
if asn_number != "0":
asn_total_query = """
asn_total_query = f"""
SELECT uniq(src_ip)
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE asn_number = %(asn_number)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -114,9 +115,9 @@ async def analyze_ip_country(ip: str):
"""
try:
# Pays de l'IP
ip_country_query = """
ip_country_query = f"""
SELECT country_code, asn_number
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
LIMIT 1
@ -131,11 +132,11 @@ async def analyze_ip_country(ip: str):
asn_number = ip_result.result_rows[0][1]
# Répartition des autres pays du même ASN
asn_countries_query = """
asn_countries_query = f"""
SELECT
country_code,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE asn_number = %(asn_number)s
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY country_code
@ -179,11 +180,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)):
"""
try:
# Top pays
top_query = """
top_query = f"""
SELECT
country_code,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(days)s DAY
AND country_code != '' AND country_code IS NOT NULL
GROUP BY country_code
@ -206,11 +207,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)):
]
# Baseline (7 derniers jours)
baseline_query = """
baseline_query = f"""
SELECT
country_code,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 7 DAY
AND country_code != '' AND country_code IS NOT NULL
GROUP BY country_code
@ -254,9 +255,9 @@ async def analyze_ja4(ip: str):
"""
try:
# JA4 de cette IP
ja4_query = """
ja4_query = f"""
SELECT ja4
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
AND ja4 != '' AND ja4 IS NOT NULL
ORDER BY detected_at DESC
@ -276,9 +277,9 @@ async def analyze_ja4(ip: str):
ja4 = ja4_result.result_rows[0][0]
# IPs avec le même JA4
shared_query = """
shared_query = f"""
SELECT uniq(src_ip)
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ja4 = %(ja4)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -287,11 +288,11 @@ async def analyze_ja4(ip: str):
shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0
# Top subnets pour ce JA4 - Simplifié
subnets_query = """
subnets_query = f"""
SELECT
src_ip,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ja4 = %(ja4)s
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
@ -315,9 +316,9 @@ async def analyze_ja4(ip: str):
]
# Autres JA4 pour cette IP
other_ja4_query = """
other_ja4_query = f"""
SELECT DISTINCT ja4
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
AND ja4 != '' AND ja4 IS NOT NULL
AND ja4 != %(current_ja4)s
@ -348,11 +349,11 @@ async def analyze_user_agents(ip: str):
"""
try:
# User-Agents pour cette IP (depuis http_logs)
ip_ua_query = """
ip_ua_query = f"""
SELECT
header_user_agent AS ua,
count() AS count
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE src_ip = %(ip)s
AND header_user_agent != '' AND header_user_agent IS NOT NULL
AND time >= now() - INTERVAL 24 HOUR
@ -361,9 +362,9 @@ async def analyze_user_agents(ip: str):
"""
# Total réel des requêtes (pour les pourcentages corrects)
ip_total_query = """
ip_total_query = f"""
SELECT count()
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE src_ip = %(ip)s
AND time >= now() - INTERVAL 24 HOUR
"""
@ -529,8 +530,8 @@ async def create_classification(data: ClassificationCreate):
if not data.ip and not data.ja4:
raise HTTPException(status_code=400, detail="IP ou JA4 requis")
query = """
INSERT INTO mabase_prod.classifications
query = f"""
INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.classifications
(ip, ja4, label, tags, comment, confidence, features, analyst, created_at)
VALUES
(%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now())
@ -551,7 +552,7 @@ async def create_classification(data: ClassificationCreate):
where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s"
select_query = f"""
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause}
ORDER BY created_at DESC
LIMIT 1
@ -609,7 +610,7 @@ async def list_classifications(
query = f"""
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause}
ORDER BY created_at DESC
LIMIT %(limit)s
@ -635,7 +636,7 @@ async def list_classifications(
# Total
count_query = f"""
SELECT count()
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause}
"""
@ -657,13 +658,13 @@ async def get_classification_stats():
Statistiques des classifications
"""
try:
stats_query = """
stats_query = f"""
SELECT
label,
count() AS total,
uniq(ip) AS unique_ips,
avg(confidence) AS avg_confidence
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
GROUP BY label
ORDER BY total DESC
"""