From b6391afbeb37f3001e5740da763d9bdaff1dec34 Mon Sep 17 00:00:00 2001 From: toto Date: Tue, 7 Apr 2026 19:03:05 +0200 Subject: [PATCH] refactor: replace hardcoded mabase_prod DB prefix with configurable settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route SQL queries with configurable database names from settings: - http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS - All other tables → settings.CLICKHOUSE_DB_PROCESSING Also qualify previously unqualified table references (bare FROM/JOIN table_name) with the appropriate database prefix for consistency. Each route file now imports 'from ..config import settings' and uses f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or {settings.CLICKHOUSE_DB_LOGS} for database-qualified table names. Files updated: analysis, attributes, audit, botnets, bruteforce, clustering, detections, entities, fingerprints, header_fingerprint, heatmap, incidents, investigation_summary, metrics, ml_features, rotation, search, tcp_spoofing, variability (19 files). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- services/dashboard/backend/routes/analysis.py | 67 ++++++++++--------- .../dashboard/backend/routes/attributes.py | 5 +- services/dashboard/backend/routes/audit.py | 15 +++-- services/dashboard/backend/routes/botnets.py | 13 ++-- .../dashboard/backend/routes/bruteforce.py | 17 ++--- .../dashboard/backend/routes/clustering.py | 15 +++-- .../dashboard/backend/routes/detections.py | 17 ++--- services/dashboard/backend/routes/entities.py | 31 ++++----- .../dashboard/backend/routes/fingerprints.py | 25 +++---- .../backend/routes/header_fingerprint.py | 13 ++-- services/dashboard/backend/routes/heatmap.py | 21 +++--- .../dashboard/backend/routes/incidents.py | 13 ++-- .../backend/routes/investigation_summary.py | 25 +++---- services/dashboard/backend/routes/metrics.py | 17 ++--- .../dashboard/backend/routes/ml_features.py | 39 +++++------ services/dashboard/backend/routes/rotation.py | 25 +++---- services/dashboard/backend/routes/search.py | 17 ++--- .../dashboard/backend/routes/tcp_spoofing.py | 29 ++++---- .../dashboard/backend/routes/variability.py | 27 ++++---- 19 files changed, 225 insertions(+), 206 deletions(-) diff --git a/services/dashboard/backend/routes/analysis.py b/services/dashboard/backend/routes/analysis.py index 3accb71..bd23bbb 100644 --- a/services/dashboard/backend/routes/analysis.py +++ b/services/dashboard/backend/routes/analysis.py @@ -14,6 +14,7 @@ from ..models import ( ClassificationRecommendation, ClassificationLabel, ClassificationCreate, Classification, ClassificationsListResponse ) +from ..config import settings router = APIRouter(prefix="/api/analysis", tags=["analysis"]) @@ -42,9 +43,9 @@ async def analyze_subnet(ip: str): subnet_str = str(subnet) # Récupérer les infos ASN pour cette IP - asn_query = """ + asn_query = f""" SELECT asn_number, asn_org - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE src_ip = %(ip)s ORDER BY detected_at DESC LIMIT 1 @@ -60,9 +61,9 @@ async def analyze_subnet(ip: str): asn_org = asn_result.result_rows[0][1] or "Unknown" # IPs du même subnet /24 - subnet_ips_query = """ + subnet_ips_query = f""" SELECT DISTINCT src_ip - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s) AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s) AND detected_at >= now() - INTERVAL 24 HOUR @@ -78,9 +79,9 @@ async def analyze_subnet(ip: str): # Total IPs du même ASN if asn_number != "0": - asn_total_query = """ + asn_total_query = f""" SELECT uniq(src_ip) - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE asn_number = %(asn_number)s AND detected_at >= now() - INTERVAL 24 HOUR """ @@ -114,9 +115,9 @@ async def analyze_ip_country(ip: str): """ try: # Pays de l'IP - ip_country_query = """ + ip_country_query = f""" SELECT country_code, asn_number - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE src_ip = %(ip)s ORDER BY detected_at DESC LIMIT 1 @@ -131,11 +132,11 @@ async def analyze_ip_country(ip: str): asn_number = ip_result.result_rows[0][1] # Répartition des autres pays du même ASN - asn_countries_query = """ + asn_countries_query = f""" SELECT country_code, count() AS count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE asn_number = %(asn_number)s AND detected_at >= now() - INTERVAL 24 HOUR GROUP BY country_code @@ -179,11 +180,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)): """ try: # Top pays - top_query = """ + top_query = f""" SELECT country_code, count() AS count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(days)s DAY AND country_code != '' AND country_code IS NOT NULL GROUP BY country_code @@ -206,11 +207,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)): ] # Baseline (7 derniers jours) - baseline_query = """ + baseline_query = f""" SELECT country_code, count() AS count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 7 DAY AND country_code != '' AND country_code IS NOT NULL GROUP BY country_code @@ -254,9 +255,9 @@ async def analyze_ja4(ip: str): """ try: # JA4 de cette IP - ja4_query = """ + ja4_query = f""" SELECT ja4 - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE src_ip = %(ip)s AND ja4 != '' AND ja4 IS NOT NULL ORDER BY detected_at DESC @@ -276,9 +277,9 @@ async def analyze_ja4(ip: str): ja4 = ja4_result.result_rows[0][0] # IPs avec le même JA4 - shared_query = """ + shared_query = f""" SELECT uniq(src_ip) - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE ja4 = %(ja4)s AND detected_at >= now() - INTERVAL 24 HOUR """ @@ -287,11 +288,11 @@ async def analyze_ja4(ip: str): shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0 # Top subnets pour ce JA4 - Simplifié - subnets_query = """ + subnets_query = f""" SELECT src_ip, count() AS count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE ja4 = %(ja4)s AND detected_at >= now() - INTERVAL 24 HOUR GROUP BY src_ip @@ -315,9 +316,9 @@ async def analyze_ja4(ip: str): ] # Autres JA4 pour cette IP - other_ja4_query = """ + other_ja4_query = f""" SELECT DISTINCT ja4 - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE src_ip = %(ip)s AND ja4 != '' AND ja4 IS NOT NULL AND ja4 != %(current_ja4)s @@ -348,11 +349,11 @@ async def analyze_user_agents(ip: str): """ try: # User-Agents pour cette IP (depuis http_logs) - ip_ua_query = """ + ip_ua_query = f""" SELECT header_user_agent AS ua, count() AS count - FROM mabase_prod.http_logs + FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs WHERE src_ip = %(ip)s AND header_user_agent != '' AND header_user_agent IS NOT NULL AND time >= now() - INTERVAL 24 HOUR @@ -361,9 +362,9 @@ async def analyze_user_agents(ip: str): """ # Total réel des requêtes (pour les pourcentages corrects) - ip_total_query = """ + ip_total_query = f""" SELECT count() - FROM mabase_prod.http_logs + FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs WHERE src_ip = %(ip)s AND time >= now() - INTERVAL 24 HOUR """ @@ -529,8 +530,8 @@ async def create_classification(data: ClassificationCreate): if not data.ip and not data.ja4: raise HTTPException(status_code=400, detail="IP ou JA4 requis") - query = """ - INSERT INTO mabase_prod.classifications + query = f""" + INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.classifications (ip, ja4, label, tags, comment, confidence, features, analyst, created_at) VALUES (%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now()) @@ -551,7 +552,7 @@ async def create_classification(data: ClassificationCreate): where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s" select_query = f""" SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at - FROM mabase_prod.classifications + FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications WHERE {where_clause} ORDER BY created_at DESC LIMIT 1 @@ -609,7 +610,7 @@ async def list_classifications( query = f""" SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at - FROM mabase_prod.classifications + FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications WHERE {where_clause} ORDER BY created_at DESC LIMIT %(limit)s @@ -635,7 +636,7 @@ async def list_classifications( # Total count_query = f""" SELECT count() - FROM mabase_prod.classifications + FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications WHERE {where_clause} """ @@ -657,13 +658,13 @@ async def get_classification_stats(): Statistiques des classifications """ try: - stats_query = """ + stats_query = f""" SELECT label, count() AS total, uniq(ip) AS unique_ips, avg(confidence) AS avg_confidence - FROM mabase_prod.classifications + FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications GROUP BY label ORDER BY total DESC """ diff --git a/services/dashboard/backend/routes/attributes.py b/services/dashboard/backend/routes/attributes.py index 0a929e3..58676bc 100644 --- a/services/dashboard/backend/routes/attributes.py +++ b/services/dashboard/backend/routes/attributes.py @@ -4,6 +4,7 @@ Endpoints pour la liste des attributs uniques from fastapi import APIRouter, HTTPException, Query from ..database import db from ..models import AttributeListResponse, AttributeListItem +from ..config import settings router = APIRouter(prefix="/api/attributes", tags=["attributes"]) @@ -42,7 +43,7 @@ async def get_attributes( SELECT {column} AS value, count() AS count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 24 HOUR """ @@ -53,7 +54,7 @@ async def get_attributes( query = f""" SELECT value, count FROM ( SELECT toString({column}) AS value, count() AS count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 24 HOUR GROUP BY {column} ) diff --git a/services/dashboard/backend/routes/audit.py b/services/dashboard/backend/routes/audit.py index 3b47e43..531ef01 100644 --- a/services/dashboard/backend/routes/audit.py +++ b/services/dashboard/backend/routes/audit.py @@ -6,6 +6,7 @@ from fastapi import APIRouter, HTTPException, Query, Request from typing import Optional from datetime import datetime from ..database import db +from ..config import settings router = APIRouter(prefix="/api/audit", tags=["audit"]) logger = logging.getLogger(__name__) @@ -29,8 +30,8 @@ async def create_audit_log( client_ip = request.client.host if request.client else "unknown" # Insérer dans ClickHouse - insert_query = """ - INSERT INTO mabase_prod.audit_logs + insert_query = f""" + INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs (timestamp, user_name, action, entity_type, entity_id, entity_count, details, client_ip) VALUES (%(timestamp)s, %(user)s, %(action)s, %(entity_type)s, %(entity_id)s, %(entity_count)s, %(details)s, %(client_ip)s) @@ -105,7 +106,7 @@ async def get_audit_logs( entity_count, details, client_ip - FROM mabase_prod.audit_logs + FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs WHERE {where_clause} ORDER BY timestamp DESC LIMIT %(limit)s @@ -152,13 +153,13 @@ async def get_audit_stats( Statistiques d'audit """ try: - query = """ + query = f""" SELECT action, count() AS count, uniq(user_name) AS unique_users, sum(entity_count) AS total_entities - FROM mabase_prod.audit_logs + FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs WHERE timestamp >= now() - INTERVAL %(hours)s HOUR GROUP BY action ORDER BY count DESC @@ -198,14 +199,14 @@ async def get_user_activity( Activité par utilisateur """ try: - query = """ + query = f""" SELECT user_name, count() AS actions, uniq(action) AS action_types, min(timestamp) AS first_action, max(timestamp) AS last_action - FROM mabase_prod.audit_logs + FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs WHERE timestamp >= now() - INTERVAL %(hours)s HOUR GROUP BY user_name ORDER BY actions DESC diff --git a/services/dashboard/backend/routes/botnets.py b/services/dashboard/backend/routes/botnets.py index f3ed6ab..292104e 100644 --- a/services/dashboard/backend/routes/botnets.py +++ b/services/dashboard/backend/routes/botnets.py @@ -4,6 +4,7 @@ Endpoints pour l'analyse des botnets via la propagation des fingerprints JA4 from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/botnets", tags=["botnets"]) @@ -20,13 +21,13 @@ def _botnet_class(unique_countries: int) -> str: async def get_ja4_spread(): """Propagation des JA4 fingerprints à travers les pays et les IPs.""" try: - sql = """ + sql = f""" SELECT ja4, unique_ips, unique_countries, targeted_hosts - FROM mabase_prod.view_host_ja4_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ja4_anomalies ORDER BY unique_countries DESC """ result = db.query(sql) @@ -56,12 +57,12 @@ async def get_ja4_spread(): async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)): """Top pays pour un JA4 donné depuis agg_host_ip_ja4_1h.""" try: - sql = """ + sql = f""" SELECT src_country_code AS country_code, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, sum(hits) AS hits - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE ja4 = %(ja4)s GROUP BY src_country_code ORDER BY unique_ips DESC @@ -85,13 +86,13 @@ async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)): async def get_botnets_summary(): """Statistiques globales sur les botnets détectés.""" try: - sql = """ + sql = f""" SELECT countIf(unique_countries > 100) AS total_global_botnets, sumIf(unique_ips, unique_countries > 50) AS total_ips_in_botnets, argMax(ja4, unique_countries) AS most_spread_ja4, argMax(ja4, unique_ips) AS most_ips_ja4 - FROM mabase_prod.view_host_ja4_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ja4_anomalies """ result = db.query(sql) row = result.result_rows[0] diff --git a/services/dashboard/backend/routes/bruteforce.py b/services/dashboard/backend/routes/bruteforce.py index 5085025..13d1ceb 100644 --- a/services/dashboard/backend/routes/bruteforce.py +++ b/services/dashboard/backend/routes/bruteforce.py @@ -4,6 +4,7 @@ Endpoints pour l'analyse des attaques par force brute sur les formulaires from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"]) @@ -12,14 +13,14 @@ router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"]) async def get_bruteforce_targets(): """Liste des hôtes ciblés par brute-force, triés par total_hits DESC.""" try: - sql = """ + sql = f""" SELECT host, uniq(src_ip) AS unique_ips, sum(hits) AS total_hits, sum(query_params_count) AS total_params, groupArray(3)(ja4) AS top_ja4s - FROM mabase_prod.view_form_bruteforce_detected + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected GROUP BY host ORDER BY total_hits DESC """ @@ -53,14 +54,14 @@ async def get_bruteforce_targets(): async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)): """Top IPs attaquantes triées par total_hits DESC.""" try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, uniq(host) AS distinct_hosts, sum(hits) AS total_hits, sum(query_params_count) AS total_params, argMax(ja4, hits) AS ja4 - FROM mabase_prod.view_form_bruteforce_detected + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected GROUP BY src_ip ORDER BY total_hits DESC LIMIT %(limit)s @@ -84,12 +85,12 @@ async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)): async def get_bruteforce_timeline(): """Hits par heure (dernières 72h) depuis agg_host_ip_ja4_1h.""" try: - sql = """ + sql = f""" SELECT toHour(window_start) AS hour, sum(hits) AS hits, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS ips - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 72 HOUR GROUP BY hour ORDER BY hour ASC @@ -111,14 +112,14 @@ async def get_bruteforce_timeline(): async def get_host_attackers(host: str, limit: int = Query(20, ge=1, le=200)): """Top IPs attaquant un hôte spécifique, avec JA4 et type d'attaque.""" try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, sum(hits) AS total_hits, sum(query_params_count) AS total_params, argMax(ja4, hits) AS ja4, max(hits) AS max_hits_per_window - FROM mabase_prod.view_form_bruteforce_detected + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected WHERE host = %(host)s GROUP BY src_ip ORDER BY total_hits DESC diff --git a/services/dashboard/backend/routes/clustering.py b/services/dashboard/backend/routes/clustering.py index a6d1388..f2da51f 100644 --- a/services/dashboard/backend/routes/clustering.py +++ b/services/dashboard/backend/routes/clustering.py @@ -24,6 +24,7 @@ from ..services.clustering_engine import ( name_cluster, risk_score_from_centroid, standardize, risk_to_gradient_color, ) +from ..config import settings log = logging.getLogger(__name__) router = APIRouter(prefix="/api/clustering", tags=["clustering"]) @@ -47,7 +48,7 @@ _EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering") # ─── SQL : TOUTES les IPs sans LIMIT ───────────────────────────────────────── -_SQL_ALL_IPS = """ +_SQL_ALL_IPS = f""" SELECT replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip, t.ja4, @@ -96,8 +97,8 @@ SELECT -- Cookie et Referer issus de la table dédiée aux empreintes any(hfp.hfp_cookie) AS hfp_cookie, any(hfp.hfp_referer) AS hfp_referer -FROM mabase_prod.agg_host_ip_ja4_1h t -LEFT JOIN mabase_prod.ml_detected_anomalies ml +FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h t +LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ml ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4 AND ml.detected_at >= now() - INTERVAL %(hours)s HOUR LEFT JOIN ( @@ -107,7 +108,7 @@ LEFT JOIN ( any(arrayExists(x -> x LIKE '%%Accept-Encoding%%', client_headers)) AS hdr_enc, any(arrayExists(x -> x LIKE '%%Sec-Fetch%%', client_headers)) AS hdr_sec_fetch, any(length(splitByChar(',', client_headers[1]))) AS hdr_count - FROM mabase_prod.view_dashboard_entities + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE length(client_headers) > 0 AND log_date >= today() - 2 GROUP BY src_ip_v6, ja4 @@ -117,7 +118,7 @@ LEFT JOIN ( src_ip, avg(has_cookie) AS hfp_cookie, avg(has_referer) AS hfp_referer - FROM mabase_prod.agg_header_fingerprint_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h WHERE window_start >= now() - INTERVAL %(hours)s HOUR GROUP BY src_ip ) hfp ON t.src_ip = hfp.src_ip @@ -515,8 +516,8 @@ async def get_cluster_ips( any(ml.asn_org) AS asn_org, round(avg(ml.fuzzing_index), 2) AS fuzzing, round(avg(ml.hit_velocity), 2) AS velocity - FROM mabase_prod.agg_host_ip_ja4_1h t - LEFT JOIN mabase_prod.ml_detected_anomalies ml + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h t + LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ml ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4 AND ml.detected_at >= now() - INTERVAL 24 HOUR WHERE t.window_start >= now() - INTERVAL 24 HOUR diff --git a/services/dashboard/backend/routes/detections.py b/services/dashboard/backend/routes/detections.py index 0b3e784..bfcd695 100644 --- a/services/dashboard/backend/routes/detections.py +++ b/services/dashboard/backend/routes/detections.py @@ -5,6 +5,7 @@ from fastapi import APIRouter, HTTPException, Query from typing import Optional, List from ..database import db from ..models import DetectionsListResponse, Detection +from ..config import settings router = APIRouter(prefix="/api/detections", tags=["detections"]) @@ -82,7 +83,7 @@ async def get_detections( # Requête de comptage count_query = f""" SELECT count() - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {where_clause} """ @@ -107,7 +108,7 @@ async def get_detections( # Count distinct IPs count_ip_query = f""" SELECT uniq(src_ip) - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {where_clause} """ cr = db.query(count_ip_query, params) @@ -154,11 +155,11 @@ async def get_detections( argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best, argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best, argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {where_clause} GROUP BY src_ip ) ip_data - LEFT JOIN mabase_prod.asn_reputation ar + LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.asn_reputation ar ON ar.src_asn = toUInt32OrZero(ip_data.asn_number) ORDER BY {outer_sort} {sort_order} LIMIT %(limit)s OFFSET %(offset)s @@ -248,8 +249,8 @@ async def get_detections( anubis_bot_name, anubis_bot_action, anubis_bot_category - FROM ml_detected_anomalies - LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number) + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies + LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number) WHERE {where_clause} ORDER BY {sort_by} {sort_order} LIMIT %(limit)s OFFSET %(offset)s @@ -312,7 +313,7 @@ async def get_detection_details(detection_id: str): detection_id peut être une IP ou un identifiant """ try: - query = """ + query = f""" SELECT detected_at, src_ip, @@ -363,7 +364,7 @@ async def get_detection_details(detection_id: str): ja4_asn_concentration, ja4_country_concentration, is_rare_ja4 - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE src_ip = %(ip)s ORDER BY detected_at DESC LIMIT 1 diff --git a/services/dashboard/backend/routes/entities.py b/services/dashboard/backend/routes/entities.py index dc7038c..12bb347 100644 --- a/services/dashboard/backend/routes/entities.py +++ b/services/dashboard/backend/routes/entities.py @@ -11,6 +11,7 @@ from ..models import ( EntityRelatedAttributes, EntityAttributeValue ) +from ..config import settings router = APIRouter(prefix="/api/entities", tags=["Entities"]) @@ -24,7 +25,7 @@ def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Op """ Récupère les statistiques pour une entité donnée """ - query = """ + query = f""" SELECT entity_type, entity_value, @@ -32,7 +33,7 @@ def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Op sum(unique_ips) as unique_ips, min(log_date) as first_seen, max(log_date) as last_seen - FROM mabase_prod.view_dashboard_entities + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) @@ -64,13 +65,13 @@ def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24) Récupère les attributs associés à une entité """ # Requête pour agréger tous les attributs associés - query = """ + query = f""" SELECT - (SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips, - (SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s, - (SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts, - (SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns, - (SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries + (SELECT groupUniqArray(toString(src_ip)) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips, + (SELECT groupUniqArray(ja4) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s, + (SELECT groupUniqArray(host) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts, + (SELECT groupUniqArrayArray(asns) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns, + (SELECT groupUniqArrayArray(countries) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries """ result = db.query(query, { @@ -110,7 +111,7 @@ def get_array_values(entity_type: str, entity_value: str, array_field: str, hour FROM ( SELECT arrayJoin({array_field}) as value - FROM mabase_prod.view_dashboard_entities + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) @@ -156,7 +157,7 @@ async def get_subnet_investigation( subnet_third = subnet_parts[2] # Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA - stats_query = """ + stats_query = f""" WITH cleaned_ips AS ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, @@ -165,7 +166,7 @@ async def get_subnet_investigation( host, country_code, asn_number - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR ), subnet_filter AS ( @@ -180,7 +181,7 @@ async def get_subnet_investigation( SELECT entity_value AS ip, arrayJoin(user_agents) AS user_agent - FROM view_dashboard_entities + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = 'ip' AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s @@ -227,7 +228,7 @@ async def get_subnet_investigation( } # Liste des IPs avec détails - 2 requêtes séparées + fusion en Python - ips_query = """ + ips_query = f""" WITH cleaned_ips AS ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, @@ -237,7 +238,7 @@ async def get_subnet_investigation( asn_number, threat_level, anomaly_score - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR ), subnet_filter AS ( @@ -282,7 +283,7 @@ async def get_subnet_investigation( SELECT entity_value AS ip, uniq(arrayJoin(user_agents)) AS unique_ua - FROM view_dashboard_entities + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities PREWHERE entity_type = 'ip' WHERE entity_value IN ({ip_values}) AND log_date >= today() - INTERVAL 30 DAY diff --git a/services/dashboard/backend/routes/fingerprints.py b/services/dashboard/backend/routes/fingerprints.py index c195c08..e501205 100644 --- a/services/dashboard/backend/routes/fingerprints.py +++ b/services/dashboard/backend/routes/fingerprints.py @@ -13,6 +13,7 @@ from fastapi import APIRouter, HTTPException, Query import re from ..database import db +from ..config import settings router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"]) @@ -68,7 +69,7 @@ async def get_ja4_spoofing( """ try: # Agrégation par JA4 avec tous les indicateurs de spoofing - query = """ + query = f""" SELECT ja4, count() AS total_detections, @@ -102,7 +103,7 @@ async def get_ja4_spoofing( avg(ja4_country_concentration) AS avg_country_concentration, argMax(threat_level, detected_at) AS last_threat_level - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR AND ja4 != '' AND ja4 IS NOT NULL GROUP BY ja4 @@ -254,7 +255,7 @@ async def get_ja4_ua_matrix( """ try: # Stats JA4 depuis ml_detected_anomalies - stats_query = """ + stats_query = f""" SELECT ja4, uniq(src_ip) AS unique_ips, @@ -264,7 +265,7 @@ async def get_ja4_ua_matrix( countIf(is_rare_ja4 = true) AS rare_count, countIf(is_ua_rotating = true) AS rotating_count, argMax(threat_level, detected_at) AS last_threat - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR AND ja4 != '' AND ja4 IS NOT NULL GROUP BY ja4 @@ -426,11 +427,11 @@ async def get_ua_analysis( pass # IPs avec is_ua_rotating depuis ml_detected_anomalies - rotating_query = """ + rotating_query = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, avg(ua_ch_mismatch) AS avg_ua_ch_mismatch - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR AND is_ua_rotating = true GROUP BY clean_ip @@ -522,7 +523,7 @@ async def get_ip_fingerprint_coherence(ip: str): """ try: # Données depuis ml_detected_anomalies - ml_query = """ + ml_query = f""" SELECT ja4, ua_ch_mismatch, @@ -543,7 +544,7 @@ async def get_ip_fingerprint_coherence(ip: str): window_mss_ratio, tcp_jitter_variance, multiplexing_efficiency - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE src_ip = %(ip)s ORDER BY detected_at DESC """ @@ -681,7 +682,7 @@ async def get_legitimate_ja4( Utile comme whitelist pour réduire les faux positifs. """ try: - query = """ + query = f""" SELECT ja4, uniq(src_ip) AS unique_ips, @@ -691,7 +692,7 @@ async def get_legitimate_ja4( countIf(is_rare_ja4 = true) AS rare_count, round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct, round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR AND ja4 != '' AND ja4 IS NOT NULL GROUP BY ja4 @@ -753,7 +754,7 @@ async def get_ja4_asn_correlation( try: # Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN, # then aggregate per ja4 to compute concentration ratio - sql = """ + sql = f""" SELECT ja4, sum(ips_per_combo) AS unique_ips, @@ -774,7 +775,7 @@ async def get_ja4_asn_correlation( uniq(src_ip) AS ips_per_combo, uniq(src_ip) AS country_ips, sum(hits) AS total_hits - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND ja4 != '' GROUP BY ja4, src_asn, src_country_code diff --git a/services/dashboard/backend/routes/header_fingerprint.py b/services/dashboard/backend/routes/header_fingerprint.py index 927c5d3..82a7dad 100644 --- a/services/dashboard/backend/routes/header_fingerprint.py +++ b/services/dashboard/backend/routes/header_fingerprint.py @@ -4,6 +4,7 @@ Endpoints pour l'analyse des empreintes d'en-têtes HTTP from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"]) @@ -12,7 +13,7 @@ router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"]) async def get_header_clusters(limit: int = Query(50, ge=1, le=200)): """Clusters d'empreintes d'en-têtes groupés par header_order_hash.""" try: - sql = """ + sql = f""" SELECT header_order_hash AS hash, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, @@ -22,16 +23,16 @@ async def get_header_clusters(limit: int = Query(50, ge=1, le=200)): groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes, round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct, round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct - FROM mabase_prod.agg_header_fingerprint_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h GROUP BY header_order_hash ORDER BY unique_ips DESC LIMIT %(limit)s """ result = db.query(sql, {"limit": limit}) - total_sql = """ + total_sql = f""" SELECT uniq(header_order_hash) - FROM mabase_prod.agg_header_fingerprint_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h """ total_clusters = int(db.query(total_sql).result_rows[0][0]) @@ -73,14 +74,14 @@ async def get_header_clusters(limit: int = Query(50, ge=1, le=200)): async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)): """Liste des IPs appartenant à un cluster d'en-têtes donné.""" try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, any(modern_browser_score) AS browser_score, any(ua_ch_mismatch) AS ua_ch_mismatch, any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest - FROM mabase_prod.agg_header_fingerprint_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h WHERE header_order_hash = %(hash)s GROUP BY src_ip ORDER BY browser_score DESC diff --git a/services/dashboard/backend/routes/heatmap.py b/services/dashboard/backend/routes/heatmap.py index eeee9c3..8c013d9 100644 --- a/services/dashboard/backend/routes/heatmap.py +++ b/services/dashboard/backend/routes/heatmap.py @@ -5,6 +5,7 @@ from collections import defaultdict from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/heatmap", tags=["heatmap"]) @@ -13,13 +14,13 @@ router = APIRouter(prefix="/api/heatmap", tags=["heatmap"]) async def get_heatmap_hourly(): """Hits agrégés par heure sur les 72 dernières heures.""" try: - sql = """ + sql = f""" SELECT toHour(window_start) AS hour, sum(hits) AS hits, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, max(max_requests_per_sec) AS max_rps - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 72 HOUR GROUP BY hour ORDER BY hour ASC @@ -44,13 +45,13 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)): """Hôtes les plus ciblés avec répartition horaire sur 24h.""" try: # Aggregate overall stats per host - agg_sql = """ + agg_sql = f""" SELECT host, sum(hits) AS total_hits, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, uniq(ja4) AS unique_ja4s - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 72 HOUR GROUP BY host ORDER BY total_hits DESC @@ -72,12 +73,12 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)): return {"items": []} # Hourly breakdown per host - hourly_sql = """ + hourly_sql = f""" SELECT host, toHour(window_start) AS hour, sum(hits) AS hits - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 72 HOUR AND host IN %(hosts)s GROUP BY host, hour @@ -106,9 +107,9 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)): async def get_heatmap_matrix(): """Matrice top-15 hôtes × 24 heures (sum hits) sur les 72 dernières heures.""" try: - top_sql = """ + top_sql = f""" SELECT host, sum(hits) AS total_hits - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 72 HOUR GROUP BY host ORDER BY total_hits DESC @@ -119,12 +120,12 @@ async def get_heatmap_matrix(): if not top_hosts: return {"hosts": [], "matrix": []} - cell_sql = """ + cell_sql = f""" SELECT host, toHour(window_start) AS hour, sum(hits) AS hits - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 72 HOUR AND host IN %(hosts)s GROUP BY host, hour diff --git a/services/dashboard/backend/routes/incidents.py b/services/dashboard/backend/routes/incidents.py index 78f25b2..e27814f 100644 --- a/services/dashboard/backend/routes/incidents.py +++ b/services/dashboard/backend/routes/incidents.py @@ -6,6 +6,7 @@ from fastapi import APIRouter, HTTPException, Query from typing import List, Optional from datetime import datetime from ..database import db +from ..config import settings router = APIRouter(prefix="/api/incidents", tags=["incidents"]) @@ -28,7 +29,7 @@ async def get_incident_clusters( # Cluster par subnet /24 avec une IP exemple # Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x # toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x - cluster_query = """ + cluster_query = f""" WITH cleaned_ips AS ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, @@ -38,7 +39,7 @@ async def get_incident_clusters( asn_number, threat_level, anomaly_score - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR ), subnet_groups AS ( @@ -84,13 +85,13 @@ async def get_incident_clusters( # Collect sample IPs to fetch real UA and trend data in bulk sample_ips = [row[10] for row in result.result_rows if row[10]] - # Fetch real primary UA per sample IP from view_dashboard_entities + # Fetch real primary UA per sample IP from {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities ua_by_ip: dict = {} if sample_ips: ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50]) ua_query = f""" SELECT entity_value, arrayElement(user_agents, 1) AS top_ua - FROM view_dashboard_entities + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = 'ip' AND entity_value IN ({ip_list_sql}) AND notEmpty(user_agents) @@ -106,7 +107,7 @@ async def get_incident_clusters( pass # UA enrichment is best-effort # Compute real trend: compare current window vs previous window of same duration - trend_query = """ + trend_query = f""" WITH cleaned AS ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, @@ -116,7 +117,7 @@ async def get_incident_clusters( splitByChar('.', clean_ip)[2], '.', splitByChar('.', clean_ip)[3], '.0/24' ) AS subnet - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ), current_window AS ( SELECT subnet, count() AS cnt diff --git a/services/dashboard/backend/routes/investigation_summary.py b/services/dashboard/backend/routes/investigation_summary.py index 7833748..5ec14bd 100644 --- a/services/dashboard/backend/routes/investigation_summary.py +++ b/services/dashboard/backend/routes/investigation_summary.py @@ -8,6 +8,7 @@ from fastapi import APIRouter, HTTPException from ..database import db from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua +from ..config import settings router = APIRouter(prefix="/api/investigation", tags=["investigation"]) @@ -25,7 +26,7 @@ async def get_ip_full_summary(ip: str): clean_ip = ip.replace("::ffff:", "").strip() try: # ── 1. Score ML / features ───────────────────────────────────────────── - ml_sql = """ + ml_sql = f""" SELECT max(abs(anomaly_score)) AS max_score, any(threat_level) AS threat_level, @@ -33,7 +34,7 @@ async def get_ip_full_summary(ip: str): count() AS total_detections, uniq(host) AS distinct_hosts, uniq(ja4) AS distinct_ja4 - FROM mabase_prod.ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s """ ml_res = db.query(ml_sql, {"ip": clean_ip}) @@ -48,13 +49,13 @@ async def get_ip_full_summary(ip: str): } # ── 2. Brute force ───────────────────────────────────────────────────── - bf_sql = """ + bf_sql = f""" SELECT uniq(host) AS hosts_attacked, sum(hits) AS total_hits, sum(query_params_count) AS total_params, groupArray(3)(host) AS top_hosts - FROM mabase_prod.view_form_bruteforce_detected + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s """ bf_res = db.query(bf_sql, {"ip": clean_ip}) @@ -68,14 +69,14 @@ async def get_ip_full_summary(ip: str): } # ── 3. TCP spoofing — fingerprinting multi-signal ───────────────────── - tcp_sql = """ + tcp_sql = f""" SELECT any(tcp_ttl_raw) AS ttl, any(tcp_win_raw) AS win, any(tcp_scale_raw) AS scale, any(tcp_mss_raw) AS mss, any(first_ua) AS ua - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s AND window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 @@ -109,9 +110,9 @@ async def get_ip_full_summary(ip: str): } # ── 4. JA4 rotation ──────────────────────────────────────────────────── - rot_sql = """ + rot_sql = f""" SELECT distinct_ja4_count, total_hits - FROM mabase_prod.view_host_ip_ja4_rotation + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s LIMIT 1 """ @@ -123,9 +124,9 @@ async def get_ip_full_summary(ip: str): rot_data = {"rotating": cnt > 1, "distinct_ja4_count": cnt, "total_hits": int(row[1] or 0)} # ── 5. Persistance ───────────────────────────────────────────────────── - pers_sql = """ + pers_sql = f""" SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen - FROM mabase_prod.view_ip_recurrence + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s LIMIT 1 """ @@ -143,12 +144,12 @@ async def get_ip_full_summary(ip: str): } # ── 6. Timeline 24h ──────────────────────────────────────────────────── - tl_sql = """ + tl_sql = f""" SELECT toHour(window_start) AS hour, sum(hits) AS hits, groupUniqArray(3)(ja4) AS ja4s - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s AND window_start >= now() - INTERVAL 24 HOUR GROUP BY hour diff --git a/services/dashboard/backend/routes/metrics.py b/services/dashboard/backend/routes/metrics.py index 9a2d4c6..f151094 100644 --- a/services/dashboard/backend/routes/metrics.py +++ b/services/dashboard/backend/routes/metrics.py @@ -4,6 +4,7 @@ Endpoints pour les métriques du dashboard from fastapi import APIRouter, HTTPException from ..database import db from ..models import MetricsResponse, MetricsSummary, TimeSeriesPoint +from ..config import settings router = APIRouter(prefix="/api/metrics", tags=["metrics"]) @@ -15,7 +16,7 @@ async def get_metrics(): """ try: # Résumé des métriques - summary_query = """ + summary_query = f""" SELECT count() AS total_detections, countIf(threat_level = 'CRITICAL') AS critical_count, @@ -25,7 +26,7 @@ async def get_metrics(): countIf(bot_name != '') AS known_bots_count, countIf(bot_name = '') AS anomalies_count, uniq(src_ip) AS unique_ips - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 24 HOUR """ @@ -47,7 +48,7 @@ async def get_metrics(): ) # Série temporelle (par heure) - timeseries_query = """ + timeseries_query = f""" SELECT toStartOfHour(detected_at) AS hour, count() AS total, @@ -55,7 +56,7 @@ async def get_metrics(): countIf(threat_level = 'HIGH') AS high, countIf(threat_level = 'MEDIUM') AS medium, countIf(threat_level = 'LOW') AS low - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 24 HOUR GROUP BY hour ORDER BY hour @@ -98,12 +99,12 @@ async def get_threat_distribution(): Récupère la répartition par niveau de menace """ try: - query = """ + query = f""" SELECT threat_level, count() AS count, round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 24 HOUR GROUP BY threat_level ORDER BY count DESC @@ -128,7 +129,7 @@ async def get_metrics_baseline(): Compare les métriques actuelles (24h) vs hier (24h-48h) pour afficher les tendances. """ try: - query = """ + query = f""" SELECT countIf(detected_at >= now() - INTERVAL 24 HOUR) AS today_total, countIf(detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_total, @@ -136,7 +137,7 @@ async def get_metrics_baseline(): uniqIf(src_ip, detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_ips, countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 24 HOUR) AS today_critical, countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_critical - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL 48 HOUR """ r = db.query(query) diff --git a/services/dashboard/backend/routes/ml_features.py b/services/dashboard/backend/routes/ml_features.py index 36954d9..9e826e1 100644 --- a/services/dashboard/backend/routes/ml_features.py +++ b/services/dashboard/backend/routes/ml_features.py @@ -4,6 +4,7 @@ Endpoints pour les features ML / IA (scores d'anomalies, radar, scatter) from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/ml", tags=["ml_features"]) @@ -27,7 +28,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)): Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h. """ try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip, any(a.ja4) AS ja4, @@ -45,13 +46,13 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)): any(a.src_as_name) AS asn_name, max(h.ua_ch_mismatch) AS ua_ch_mismatch, max(h.modern_browser_score) AS browser_score, - dictGetOrDefault('mabase_prod.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label, + dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label, coalesce( - nullIf(dictGetOrDefault('mabase_prod.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''), + nullIf(dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''), '' ) AS bot_name - FROM mabase_prod.agg_host_ip_ja4_1h a - LEFT JOIN mabase_prod.agg_header_fingerprint_1h h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h a + LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h h ON a.src_ip = h.src_ip AND a.window_start = h.window_start WHERE a.window_start >= now() - INTERVAL 24 HOUR GROUP BY a.src_ip @@ -92,7 +93,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)): async def get_ip_radar(ip: str): """Scores radar pour une IP spécifique (8 dimensions d'anomalie).""" try: - sql = """ + sql = f""" SELECT avg(fuzzing_index) AS fuzzing_index, avg(hit_velocity) AS hit_velocity, @@ -102,7 +103,7 @@ async def get_ip_radar(ip: str): avg(orphan_ratio) AS orphan_ratio, avg(path_diversity_ratio) AS path_diversity_ratio, avg(anomalous_payload_ratio) AS anomalous_payload_ratio - FROM mabase_prod.view_ai_features_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s AND window_start >= now() - INTERVAL 24 HOUR """ @@ -139,7 +140,7 @@ async def get_score_distribution(): """ try: # Single scan — global totals + per-model breakdown via GROUPING SETS - sql = """ + sql = f""" SELECT threat_level, model_name, @@ -149,7 +150,7 @@ async def get_score_distribution(): countIf(threat_level = 'NORMAL') AS normal_count, countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count, countIf(threat_level = 'KNOWN_BOT') AS bot_count - FROM mabase_prod.ml_all_scores + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores WHERE detected_at >= now() - INTERVAL 3 DAY GROUP BY threat_level, model_name ORDER BY model_name, total DESC @@ -199,7 +200,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)): Retourne le score moyen et les counts par heure et par modèle. """ try: - sql = """ + sql = f""" SELECT toStartOfHour(window_start) AS hour, model_name, @@ -207,7 +208,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)): countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count, countIf(threat_level = 'KNOWN_BOT') AS bot_count, round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score - FROM mabase_prod.ml_all_scores + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores WHERE window_start >= now() - INTERVAL %(hours)s HOUR GROUP BY hour, model_name ORDER BY hour ASC, model_name @@ -237,7 +238,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)): Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard. """ try: - sql = """ + sql = f""" SELECT ip, ja4, country, asn_name, total_hits AS hits, head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio, missing_accept_enc_ratio, http_scheme_ratio @@ -255,7 +256,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)): round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio, round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio, round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip ) @@ -295,7 +296,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu """ try: # First: check real campaigns - campaign_sql = """ + campaign_sql = f""" SELECT campaign_id, count() AS total_detections, @@ -305,7 +306,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu groupUniqArray(3)(bot_name) AS bot_names, min(detected_at) AS first_seen, max(detected_at) AS last_seen - FROM mabase_prod.ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR AND campaign_id >= 0 GROUP BY campaign_id @@ -330,7 +331,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu # Fallback: subnet-based clustering when DBSCAN has no campaigns if not campaigns: - subnet_sql = """ + subnet_sql = f""" SELECT IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet, count() AS total_detections, @@ -340,7 +341,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu any(ja4) AS sample_ja4, min(detected_at) AS first_seen, max(detected_at) AS last_seen - FROM mabase_prod.ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR AND threat_level IN ('HIGH','CRITICAL','MEDIUM') GROUP BY subnet @@ -381,7 +382,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)): """Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h.""" try: - sql = """ + sql = f""" SELECT ip, ja4, @@ -400,7 +401,7 @@ async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)): max(last_seen) AS max_last, sum(count_head) AS total_count_head, max(correlated_raw) AS correlated - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY src_ip ) diff --git a/services/dashboard/backend/routes/rotation.py b/services/dashboard/backend/routes/rotation.py index a3633ea..cc1539d 100644 --- a/services/dashboard/backend/routes/rotation.py +++ b/services/dashboard/backend/routes/rotation.py @@ -4,6 +4,7 @@ Endpoints pour la détection de la rotation de fingerprints JA4 et des menaces p from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/rotation", tags=["rotation"]) @@ -12,12 +13,12 @@ router = APIRouter(prefix="/api/rotation", tags=["rotation"]) async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)): """IPs qui effectuent le plus de rotation de fingerprints JA4.""" try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, distinct_ja4_count, total_hits - FROM mabase_prod.view_host_ip_ja4_rotation + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation ORDER BY distinct_ja4_count DESC LIMIT %(limit)s """ @@ -40,7 +41,7 @@ async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)): async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)): """Menaces persistantes triées par score de persistance.""" try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, recurrence, @@ -48,7 +49,7 @@ async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)): worst_threat_level, first_seen, last_seen - FROM mabase_prod.view_ip_recurrence + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence ORDER BY (least(100, recurrence * 20 + worst_score * 50)) DESC LIMIT %(limit)s """ @@ -75,13 +76,13 @@ async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)): async def get_ip_ja4_history(ip: str): """Historique des JA4 utilisés par une IP donnée.""" try: - sql = """ + sql = f""" SELECT ja4, sum(hits) AS hits, min(window_start) AS first_seen, max(window_start) AS last_seen - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s GROUP BY ja4 ORDER BY hits DESC @@ -107,7 +108,7 @@ async def get_sophistication(limit: int = Query(50, ge=1, le=500)): Single SQL JOIN query — aucun traitement Python sur 34K entrées. """ try: - sql = """ + sql = f""" SELECT r.ip, r.distinct_ja4_count, @@ -122,20 +123,20 @@ async def get_sophistication(limit: int = Query(50, ge=1, le=500)): SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, distinct_ja4_count - FROM mabase_prod.view_host_ip_ja4_rotation + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation ) r LEFT JOIN ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, count() AS recurrence - FROM mabase_prod.ml_detected_anomalies FINAL + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies FINAL GROUP BY ip ) rec ON r.ip = rec.ip LEFT JOIN ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, sum(hits) AS bruteforce_hits - FROM mabase_prod.view_form_bruteforce_detected + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected GROUP BY ip ) bf ON r.ip = bf.ip ORDER BY sophistication_score DESC @@ -174,7 +175,7 @@ async def get_proactive_hunt( ): """IPs volant sous le radar : récurrentes mais sous le seuil de détection normal.""" try: - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, recurrence, @@ -183,7 +184,7 @@ async def get_proactive_hunt( first_seen, last_seen, dateDiff('day', first_seen, last_seen) AS days_active - FROM mabase_prod.view_ip_recurrence + FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence WHERE recurrence >= %(min_recurrence)s AND abs(worst_score) < 0.5 AND dateDiff('day', first_seen, last_seen) >= %(min_days)s diff --git a/services/dashboard/backend/routes/search.py b/services/dashboard/backend/routes/search.py index 5723a0d..c9ca035 100644 --- a/services/dashboard/backend/routes/search.py +++ b/services/dashboard/backend/routes/search.py @@ -3,6 +3,7 @@ Endpoint de recherche globale rapide — utilisé par la barre Cmd+K """ from fastapi import APIRouter, Query from ..database import db +from ..config import settings router = APIRouter(prefix="/api/search", tags=["search"]) @@ -21,13 +22,13 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)): # ── IPs ────────────────────────────────────────────────────────────────── ip_rows = db.query( - """ + f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, count() AS hits, max(detected_at) AS last_seen, any(threat_level) AS threat_level - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE ilike(toString(src_ip), %(p)s) AND detected_at >= now() - INTERVAL 24 HOUR GROUP BY clean_ip @@ -48,12 +49,12 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)): # ── JA4 fingerprints ───────────────────────────────────────────────────── ja4_rows = db.query( - """ + f""" SELECT ja4, count() AS hits, uniq(src_ip) AS unique_ips - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE ilike(ja4, %(p)s) AND ja4 != '' AND detected_at >= now() - INTERVAL 24 HOUR @@ -73,12 +74,12 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)): # ── Hosts ───────────────────────────────────────────────────────────────── host_rows = db.query( - """ + f""" SELECT host, count() AS hits, uniq(src_ip) AS unique_ips - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE ilike(host, %(p)s) AND host != '' AND detected_at >= now() - INTERVAL 24 HOUR @@ -98,13 +99,13 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)): # ── ASN ─────────────────────────────────────────────────────────────────── asn_rows = db.query( - """ + f""" SELECT asn_org, asn_number, count() AS hits, uniq(src_ip) AS unique_ips - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE (ilike(asn_org, %(p)s) OR ilike(asn_number, %(p)s)) AND asn_org != '' AND asn_number != '' AND detected_at >= now() - INTERVAL 24 HOUR diff --git a/services/dashboard/backend/routes/tcp_spoofing.py b/services/dashboard/backend/routes/tcp_spoofing.py index cdf3ebc..465a8c1 100644 --- a/services/dashboard/backend/routes/tcp_spoofing.py +++ b/services/dashboard/backend/routes/tcp_spoofing.py @@ -18,6 +18,7 @@ from ..services.tcp_fingerprint import ( detect_spoof, declared_os_from_ua, ) +from ..config import settings router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"]) @@ -26,7 +27,7 @@ router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"]) async def get_tcp_spoofing_overview(): """Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale).""" try: - sql = """ + sql = f""" SELECT count() AS total_entries, uniq(src_ip) AS unique_ips, @@ -36,34 +37,34 @@ async def get_tcp_spoofing_overview(): countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp, countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp, countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR """ result = db.query(sql) row = result.result_rows[0] # Distribution TTL (top 15) - ttl_sql = """ + ttl_sql = f""" SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 GROUP BY ttl ORDER BY cnt DESC """ ttl_res = db.query(ttl_sql) # Distribution MSS — nouveau signal clé (top 12) - mss_sql = """ + mss_sql = f""" SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0 GROUP BY mss ORDER BY cnt DESC """ mss_res = db.query(mss_sql) # Distribution fenêtre (top 10) - win_sql = """ + win_sql = f""" SELECT tcp_win_raw AS win, count() AS cnt - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 GROUP BY win ORDER BY cnt DESC """ @@ -105,17 +106,17 @@ async def get_tcp_spoofing_list( Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool. """ try: - count_sql = """ + count_sql = f""" SELECT count() FROM ( SELECT src_ip, ja4 - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 GROUP BY src_ip, ja4 ) """ total = int(db.query(count_sql).result_rows[0][0]) - sql = """ + sql = f""" SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip, ja4, @@ -125,7 +126,7 @@ async def get_tcp_spoofing_list( any(tcp_mss_raw) AS tcp_mss, any(first_ua) AS first_ua, sum(hits) AS hits - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 GROUP BY src_ip, ja4 ORDER BY hits DESC @@ -178,7 +179,7 @@ async def get_tcp_spoofing_list( async def get_tcp_spoofing_matrix(): """Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal.""" try: - sql = """ + sql = f""" SELECT any(tcp_ttl_raw) AS ttl, any(tcp_win_raw) AS win, @@ -186,7 +187,7 @@ async def get_tcp_spoofing_matrix(): any(tcp_mss_raw) AS mss, any(first_ua) AS ua, count() AS cnt - FROM mabase_prod.agg_host_ip_ja4_1h + FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 GROUP BY src_ip, ja4 """ diff --git a/services/dashboard/backend/routes/variability.py b/services/dashboard/backend/routes/variability.py index e91d938..cad381c 100644 --- a/services/dashboard/backend/routes/variability.py +++ b/services/dashboard/backend/routes/variability.py @@ -8,6 +8,7 @@ from ..models import ( VariabilityResponse, VariabilityAttributes, AttributeValue, Insight, UserAgentsResponse, UserAgentValue ) +from ..config import settings router = APIRouter(prefix="/api/variability", tags=["variability"]) @@ -45,7 +46,7 @@ async def get_associated_ips( query = f""" SELECT src_ip, count() AS hit_count - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR GROUP BY src_ip @@ -64,7 +65,7 @@ async def get_associated_ips( # Compter le total count_query = f""" SELECT uniq(src_ip) AS total - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR """ @@ -138,7 +139,7 @@ async def get_associated_attributes( # country/asn/host: pivot via ml_detected_anomalies ua_where = f"""toString(src_ip) IN ( SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '') - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR )""" ua_q = f""" @@ -160,7 +161,7 @@ async def get_associated_attributes( {target_column} AS value, count() AS count, round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND {target_column} != '' AND {target_column} IS NOT NULL AND detected_at >= now() - INTERVAL 24 HOUR @@ -183,7 +184,7 @@ async def get_associated_attributes( # Compter le total count_query = f""" SELECT uniq({target_column}) AS total - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND {target_column} != '' AND {target_column} IS NOT NULL AND detected_at >= now() - INTERVAL 24 HOUR @@ -247,7 +248,7 @@ async def get_user_agents( ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type] where = f"""toString(src_ip) IN ( SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '') - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {ml_col} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR )""" @@ -467,7 +468,7 @@ async def get_variability(attr_type: str, value: str): threat_level, model_name, anomaly_score - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR ) @@ -480,7 +481,7 @@ async def get_variability(attr_type: str, value: str): uniq(src_ip) AS unique_ips, min(detected_at) AS first_seen, max(detected_at) AS last_seen - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR """ @@ -506,12 +507,12 @@ async def get_variability(attr_type: str, value: str): header_user_agent AS user_agent, count() AS count, round(count() * 100.0 / ( - SELECT count() FROM mabase_prod.http_logs + SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR ), 2) AS percentage, min(time) AS first_seen, max(time) AS last_seen - FROM mabase_prod.http_logs + FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR AND header_user_agent != '' AND header_user_agent IS NOT NULL @@ -527,12 +528,12 @@ async def get_variability(attr_type: str, value: str): header_user_agent AS user_agent, count() AS count, round(count() * 100.0 / ( - SELECT count() FROM mabase_prod.http_logs + SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR ), 2) AS percentage, min(time) AS first_seen, max(time) AS last_seen - FROM mabase_prod.http_logs + FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR AND header_user_agent != '' AND header_user_agent IS NOT NULL @@ -545,7 +546,7 @@ async def get_variability(attr_type: str, value: str): # country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA _ua_where = f"""toString(src_ip) IN ( SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '') - FROM ml_detected_anomalies + FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR )""" ua_query_simple = f"""