refactor: replace hardcoded mabase_prod DB prefix with configurable settings
Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route
SQL queries with configurable database names from settings:
- http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS
- All other tables → settings.CLICKHOUSE_DB_PROCESSING
Also qualify previously unqualified table references (bare FROM/JOIN
table_name) with the appropriate database prefix for consistency.
Each route file now imports 'from ..config import settings' and uses
f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or
{settings.CLICKHOUSE_DB_LOGS} for database-qualified table names.
Files updated: analysis, attributes, audit, botnets, bruteforce,
clustering, detections, entities, fingerprints, header_fingerprint,
heatmap, incidents, investigation_summary, metrics, ml_features,
rotation, search, tcp_spoofing, variability (19 files).
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -4,6 +4,7 @@ Endpoints pour les features ML / IA (scores d'anomalies, radar, scatter)
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
from ..config import settings
|
||||
|
||||
router = APIRouter(prefix="/api/ml", tags=["ml_features"])
|
||||
|
||||
@ -27,7 +28,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
|
||||
Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
sql = f"""
|
||||
SELECT
|
||||
replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip,
|
||||
any(a.ja4) AS ja4,
|
||||
@ -45,13 +46,13 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
|
||||
any(a.src_as_name) AS asn_name,
|
||||
max(h.ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
max(h.modern_browser_score) AS browser_score,
|
||||
dictGetOrDefault('mabase_prod.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
|
||||
dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
|
||||
coalesce(
|
||||
nullIf(dictGetOrDefault('mabase_prod.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
|
||||
nullIf(dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
|
||||
''
|
||||
) AS bot_name
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h a
|
||||
LEFT JOIN mabase_prod.agg_header_fingerprint_1h h
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h a
|
||||
LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h h
|
||||
ON a.src_ip = h.src_ip AND a.window_start = h.window_start
|
||||
WHERE a.window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY a.src_ip
|
||||
@ -92,7 +93,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
|
||||
async def get_ip_radar(ip: str):
|
||||
"""Scores radar pour une IP spécifique (8 dimensions d'anomalie)."""
|
||||
try:
|
||||
sql = """
|
||||
sql = f"""
|
||||
SELECT
|
||||
avg(fuzzing_index) AS fuzzing_index,
|
||||
avg(hit_velocity) AS hit_velocity,
|
||||
@ -102,7 +103,7 @@ async def get_ip_radar(ip: str):
|
||||
avg(orphan_ratio) AS orphan_ratio,
|
||||
avg(path_diversity_ratio) AS path_diversity_ratio,
|
||||
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
|
||||
FROM mabase_prod.view_ai_features_1h
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
@ -139,7 +140,7 @@ async def get_score_distribution():
|
||||
"""
|
||||
try:
|
||||
# Single scan — global totals + per-model breakdown via GROUPING SETS
|
||||
sql = """
|
||||
sql = f"""
|
||||
SELECT
|
||||
threat_level,
|
||||
model_name,
|
||||
@ -149,7 +150,7 @@ async def get_score_distribution():
|
||||
countIf(threat_level = 'NORMAL') AS normal_count,
|
||||
countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count,
|
||||
countIf(threat_level = 'KNOWN_BOT') AS bot_count
|
||||
FROM mabase_prod.ml_all_scores
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores
|
||||
WHERE detected_at >= now() - INTERVAL 3 DAY
|
||||
GROUP BY threat_level, model_name
|
||||
ORDER BY model_name, total DESC
|
||||
@ -199,7 +200,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
|
||||
Retourne le score moyen et les counts par heure et par modèle.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
sql = f"""
|
||||
SELECT
|
||||
toStartOfHour(window_start) AS hour,
|
||||
model_name,
|
||||
@ -207,7 +208,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
|
||||
countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count,
|
||||
countIf(threat_level = 'KNOWN_BOT') AS bot_count,
|
||||
round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score
|
||||
FROM mabase_prod.ml_all_scores
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores
|
||||
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY hour, model_name
|
||||
ORDER BY hour ASC, model_name
|
||||
@ -237,7 +238,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
||||
Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
sql = f"""
|
||||
SELECT ip, ja4, country, asn_name, total_hits AS hits,
|
||||
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
|
||||
missing_accept_enc_ratio, http_scheme_ratio
|
||||
@ -255,7 +256,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
||||
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
|
||||
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
|
||||
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
@ -295,7 +296,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
|
||||
"""
|
||||
try:
|
||||
# First: check real campaigns
|
||||
campaign_sql = """
|
||||
campaign_sql = f"""
|
||||
SELECT
|
||||
campaign_id,
|
||||
count() AS total_detections,
|
||||
@ -305,7 +306,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
|
||||
groupUniqArray(3)(bot_name) AS bot_names,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND campaign_id >= 0
|
||||
GROUP BY campaign_id
|
||||
@ -330,7 +331,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
|
||||
|
||||
# Fallback: subnet-based clustering when DBSCAN has no campaigns
|
||||
if not campaigns:
|
||||
subnet_sql = """
|
||||
subnet_sql = f"""
|
||||
SELECT
|
||||
IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet,
|
||||
count() AS total_detections,
|
||||
@ -340,7 +341,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
|
||||
any(ja4) AS sample_ja4,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND threat_level IN ('HIGH','CRITICAL','MEDIUM')
|
||||
GROUP BY subnet
|
||||
@ -381,7 +382,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
|
||||
async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
|
||||
"""Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h."""
|
||||
try:
|
||||
sql = """
|
||||
sql = f"""
|
||||
SELECT
|
||||
ip,
|
||||
ja4,
|
||||
@ -400,7 +401,7 @@ async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
|
||||
max(last_seen) AS max_last,
|
||||
sum(count_head) AS total_count_head,
|
||||
max(correlated_raw) AS correlated
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user