refactor: replace hardcoded mabase_prod DB prefix with configurable settings

Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route
SQL queries with configurable database names from settings:

- http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS
- All other tables → settings.CLICKHOUSE_DB_PROCESSING

Also qualify previously unqualified table references (bare FROM/JOIN
table_name) with the appropriate database prefix for consistency.

Each route file now imports 'from ..config import settings' and uses
f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or
{settings.CLICKHOUSE_DB_LOGS} for database-qualified table names.

Files updated: analysis, attributes, audit, botnets, bruteforce,
clustering, detections, entities, fingerprints, header_fingerprint,
heatmap, incidents, investigation_summary, metrics, ml_features,
rotation, search, tcp_spoofing, variability (19 files).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 19:03:05 +02:00
parent dba2676fa7
commit b6391afbeb
19 changed files with 225 additions and 206 deletions

View File

@ -14,6 +14,7 @@ from ..models import (
ClassificationRecommendation, ClassificationLabel,
ClassificationCreate, Classification, ClassificationsListResponse
)
from ..config import settings
router = APIRouter(prefix="/api/analysis", tags=["analysis"])
@ -42,9 +43,9 @@ async def analyze_subnet(ip: str):
subnet_str = str(subnet)
# Récupérer les infos ASN pour cette IP
asn_query = """
asn_query = f"""
SELECT asn_number, asn_org
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
LIMIT 1
@ -60,9 +61,9 @@ async def analyze_subnet(ip: str):
asn_org = asn_result.result_rows[0][1] or "Unknown"
# IPs du même subnet /24
subnet_ips_query = """
subnet_ips_query = f"""
SELECT DISTINCT src_ip
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s)
AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s)
AND detected_at >= now() - INTERVAL 24 HOUR
@ -78,9 +79,9 @@ async def analyze_subnet(ip: str):
# Total IPs du même ASN
if asn_number != "0":
asn_total_query = """
asn_total_query = f"""
SELECT uniq(src_ip)
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE asn_number = %(asn_number)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -114,9 +115,9 @@ async def analyze_ip_country(ip: str):
"""
try:
# Pays de l'IP
ip_country_query = """
ip_country_query = f"""
SELECT country_code, asn_number
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
LIMIT 1
@ -131,11 +132,11 @@ async def analyze_ip_country(ip: str):
asn_number = ip_result.result_rows[0][1]
# Répartition des autres pays du même ASN
asn_countries_query = """
asn_countries_query = f"""
SELECT
country_code,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE asn_number = %(asn_number)s
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY country_code
@ -179,11 +180,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)):
"""
try:
# Top pays
top_query = """
top_query = f"""
SELECT
country_code,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(days)s DAY
AND country_code != '' AND country_code IS NOT NULL
GROUP BY country_code
@ -206,11 +207,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)):
]
# Baseline (7 derniers jours)
baseline_query = """
baseline_query = f"""
SELECT
country_code,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 7 DAY
AND country_code != '' AND country_code IS NOT NULL
GROUP BY country_code
@ -254,9 +255,9 @@ async def analyze_ja4(ip: str):
"""
try:
# JA4 de cette IP
ja4_query = """
ja4_query = f"""
SELECT ja4
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
AND ja4 != '' AND ja4 IS NOT NULL
ORDER BY detected_at DESC
@ -276,9 +277,9 @@ async def analyze_ja4(ip: str):
ja4 = ja4_result.result_rows[0][0]
# IPs avec le même JA4
shared_query = """
shared_query = f"""
SELECT uniq(src_ip)
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ja4 = %(ja4)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -287,11 +288,11 @@ async def analyze_ja4(ip: str):
shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0
# Top subnets pour ce JA4 - Simplifié
subnets_query = """
subnets_query = f"""
SELECT
src_ip,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ja4 = %(ja4)s
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
@ -315,9 +316,9 @@ async def analyze_ja4(ip: str):
]
# Autres JA4 pour cette IP
other_ja4_query = """
other_ja4_query = f"""
SELECT DISTINCT ja4
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
AND ja4 != '' AND ja4 IS NOT NULL
AND ja4 != %(current_ja4)s
@ -348,11 +349,11 @@ async def analyze_user_agents(ip: str):
"""
try:
# User-Agents pour cette IP (depuis http_logs)
ip_ua_query = """
ip_ua_query = f"""
SELECT
header_user_agent AS ua,
count() AS count
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE src_ip = %(ip)s
AND header_user_agent != '' AND header_user_agent IS NOT NULL
AND time >= now() - INTERVAL 24 HOUR
@ -361,9 +362,9 @@ async def analyze_user_agents(ip: str):
"""
# Total réel des requêtes (pour les pourcentages corrects)
ip_total_query = """
ip_total_query = f"""
SELECT count()
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE src_ip = %(ip)s
AND time >= now() - INTERVAL 24 HOUR
"""
@ -529,8 +530,8 @@ async def create_classification(data: ClassificationCreate):
if not data.ip and not data.ja4:
raise HTTPException(status_code=400, detail="IP ou JA4 requis")
query = """
INSERT INTO mabase_prod.classifications
query = f"""
INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.classifications
(ip, ja4, label, tags, comment, confidence, features, analyst, created_at)
VALUES
(%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now())
@ -551,7 +552,7 @@ async def create_classification(data: ClassificationCreate):
where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s"
select_query = f"""
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause}
ORDER BY created_at DESC
LIMIT 1
@ -609,7 +610,7 @@ async def list_classifications(
query = f"""
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause}
ORDER BY created_at DESC
LIMIT %(limit)s
@ -635,7 +636,7 @@ async def list_classifications(
# Total
count_query = f"""
SELECT count()
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause}
"""
@ -657,13 +658,13 @@ async def get_classification_stats():
Statistiques des classifications
"""
try:
stats_query = """
stats_query = f"""
SELECT
label,
count() AS total,
uniq(ip) AS unique_ips,
avg(confidence) AS avg_confidence
FROM mabase_prod.classifications
FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
GROUP BY label
ORDER BY total DESC
"""

View File

@ -4,6 +4,7 @@ Endpoints pour la liste des attributs uniques
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..models import AttributeListResponse, AttributeListItem
from ..config import settings
router = APIRouter(prefix="/api/attributes", tags=["attributes"])
@ -42,7 +43,7 @@ async def get_attributes(
SELECT
{column} AS value,
count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR
"""
@ -53,7 +54,7 @@ async def get_attributes(
query = f"""
SELECT value, count FROM (
SELECT toString({column}) AS value, count() AS count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR
GROUP BY {column}
)

View File

@ -6,6 +6,7 @@ from fastapi import APIRouter, HTTPException, Query, Request
from typing import Optional
from datetime import datetime
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/audit", tags=["audit"])
logger = logging.getLogger(__name__)
@ -29,8 +30,8 @@ async def create_audit_log(
client_ip = request.client.host if request.client else "unknown"
# Insérer dans ClickHouse
insert_query = """
INSERT INTO mabase_prod.audit_logs
insert_query = f"""
INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
(timestamp, user_name, action, entity_type, entity_id, entity_count, details, client_ip)
VALUES
(%(timestamp)s, %(user)s, %(action)s, %(entity_type)s, %(entity_id)s, %(entity_count)s, %(details)s, %(client_ip)s)
@ -105,7 +106,7 @@ async def get_audit_logs(
entity_count,
details,
client_ip
FROM mabase_prod.audit_logs
FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
WHERE {where_clause}
ORDER BY timestamp DESC
LIMIT %(limit)s
@ -152,13 +153,13 @@ async def get_audit_stats(
Statistiques d'audit
"""
try:
query = """
query = f"""
SELECT
action,
count() AS count,
uniq(user_name) AS unique_users,
sum(entity_count) AS total_entities
FROM mabase_prod.audit_logs
FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
GROUP BY action
ORDER BY count DESC
@ -198,14 +199,14 @@ async def get_user_activity(
Activité par utilisateur
"""
try:
query = """
query = f"""
SELECT
user_name,
count() AS actions,
uniq(action) AS action_types,
min(timestamp) AS first_action,
max(timestamp) AS last_action
FROM mabase_prod.audit_logs
FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
GROUP BY user_name
ORDER BY actions DESC

View File

@ -4,6 +4,7 @@ Endpoints pour l'analyse des botnets via la propagation des fingerprints JA4
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/botnets", tags=["botnets"])
@ -20,13 +21,13 @@ def _botnet_class(unique_countries: int) -> str:
async def get_ja4_spread():
"""Propagation des JA4 fingerprints à travers les pays et les IPs."""
try:
sql = """
sql = f"""
SELECT
ja4,
unique_ips,
unique_countries,
targeted_hosts
FROM mabase_prod.view_host_ja4_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ja4_anomalies
ORDER BY unique_countries DESC
"""
result = db.query(sql)
@ -56,12 +57,12 @@ async def get_ja4_spread():
async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)):
"""Top pays pour un JA4 donné depuis agg_host_ip_ja4_1h."""
try:
sql = """
sql = f"""
SELECT
src_country_code AS country_code,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE ja4 = %(ja4)s
GROUP BY src_country_code
ORDER BY unique_ips DESC
@ -85,13 +86,13 @@ async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)):
async def get_botnets_summary():
"""Statistiques globales sur les botnets détectés."""
try:
sql = """
sql = f"""
SELECT
countIf(unique_countries > 100) AS total_global_botnets,
sumIf(unique_ips, unique_countries > 50) AS total_ips_in_botnets,
argMax(ja4, unique_countries) AS most_spread_ja4,
argMax(ja4, unique_ips) AS most_ips_ja4
FROM mabase_prod.view_host_ja4_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ja4_anomalies
"""
result = db.query(sql)
row = result.result_rows[0]

View File

@ -4,6 +4,7 @@ Endpoints pour l'analyse des attaques par force brute sur les formulaires
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"])
@ -12,14 +13,14 @@ router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"])
async def get_bruteforce_targets():
"""Liste des hôtes ciblés par brute-force, triés par total_hits DESC."""
try:
sql = """
sql = f"""
SELECT
host,
uniq(src_ip) AS unique_ips,
sum(hits) AS total_hits,
sum(query_params_count) AS total_params,
groupArray(3)(ja4) AS top_ja4s
FROM mabase_prod.view_form_bruteforce_detected
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
GROUP BY host
ORDER BY total_hits DESC
"""
@ -53,14 +54,14 @@ async def get_bruteforce_targets():
async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)):
"""Top IPs attaquantes triées par total_hits DESC."""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
uniq(host) AS distinct_hosts,
sum(hits) AS total_hits,
sum(query_params_count) AS total_params,
argMax(ja4, hits) AS ja4
FROM mabase_prod.view_form_bruteforce_detected
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
GROUP BY src_ip
ORDER BY total_hits DESC
LIMIT %(limit)s
@ -84,12 +85,12 @@ async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)):
async def get_bruteforce_timeline():
"""Hits par heure (dernières 72h) depuis agg_host_ip_ja4_1h."""
try:
sql = """
sql = f"""
SELECT
toHour(window_start) AS hour,
sum(hits) AS hits,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY hour
ORDER BY hour ASC
@ -111,14 +112,14 @@ async def get_bruteforce_timeline():
async def get_host_attackers(host: str, limit: int = Query(20, ge=1, le=200)):
"""Top IPs attaquant un hôte spécifique, avec JA4 et type d'attaque."""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
sum(hits) AS total_hits,
sum(query_params_count) AS total_params,
argMax(ja4, hits) AS ja4,
max(hits) AS max_hits_per_window
FROM mabase_prod.view_form_bruteforce_detected
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
WHERE host = %(host)s
GROUP BY src_ip
ORDER BY total_hits DESC

View File

@ -24,6 +24,7 @@ from ..services.clustering_engine import (
name_cluster, risk_score_from_centroid, standardize,
risk_to_gradient_color,
)
from ..config import settings
log = logging.getLogger(__name__)
router = APIRouter(prefix="/api/clustering", tags=["clustering"])
@ -47,7 +48,7 @@ _EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
# ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
_SQL_ALL_IPS = """
_SQL_ALL_IPS = f"""
SELECT
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip,
t.ja4,
@ -96,8 +97,8 @@ SELECT
-- Cookie et Referer issus de la table dédiée aux empreintes
any(hfp.hfp_cookie) AS hfp_cookie,
any(hfp.hfp_referer) AS hfp_referer
FROM mabase_prod.agg_host_ip_ja4_1h t
LEFT JOIN mabase_prod.ml_detected_anomalies ml
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h t
LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ml
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
AND ml.detected_at >= now() - INTERVAL %(hours)s HOUR
LEFT JOIN (
@ -107,7 +108,7 @@ LEFT JOIN (
any(arrayExists(x -> x LIKE '%%Accept-Encoding%%', client_headers)) AS hdr_enc,
any(arrayExists(x -> x LIKE '%%Sec-Fetch%%', client_headers)) AS hdr_sec_fetch,
any(length(splitByChar(',', client_headers[1]))) AS hdr_count
FROM mabase_prod.view_dashboard_entities
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE length(client_headers) > 0
AND log_date >= today() - 2
GROUP BY src_ip_v6, ja4
@ -117,7 +118,7 @@ LEFT JOIN (
src_ip,
avg(has_cookie) AS hfp_cookie,
avg(has_referer) AS hfp_referer
FROM mabase_prod.agg_header_fingerprint_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
GROUP BY src_ip
) hfp ON t.src_ip = hfp.src_ip
@ -515,8 +516,8 @@ async def get_cluster_ips(
any(ml.asn_org) AS asn_org,
round(avg(ml.fuzzing_index), 2) AS fuzzing,
round(avg(ml.hit_velocity), 2) AS velocity
FROM mabase_prod.agg_host_ip_ja4_1h t
LEFT JOIN mabase_prod.ml_detected_anomalies ml
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h t
LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ml
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
AND ml.detected_at >= now() - INTERVAL 24 HOUR
WHERE t.window_start >= now() - INTERVAL 24 HOUR

View File

@ -5,6 +5,7 @@ from fastapi import APIRouter, HTTPException, Query
from typing import Optional, List
from ..database import db
from ..models import DetectionsListResponse, Detection
from ..config import settings
router = APIRouter(prefix="/api/detections", tags=["detections"])
@ -82,7 +83,7 @@ async def get_detections(
# Requête de comptage
count_query = f"""
SELECT count()
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {where_clause}
"""
@ -107,7 +108,7 @@ async def get_detections(
# Count distinct IPs
count_ip_query = f"""
SELECT uniq(src_ip)
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {where_clause}
"""
cr = db.query(count_ip_query, params)
@ -154,11 +155,11 @@ async def get_detections(
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {where_clause}
GROUP BY src_ip
) ip_data
LEFT JOIN mabase_prod.asn_reputation ar
LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.asn_reputation ar
ON ar.src_asn = toUInt32OrZero(ip_data.asn_number)
ORDER BY {outer_sort} {sort_order}
LIMIT %(limit)s OFFSET %(offset)s
@ -248,8 +249,8 @@ async def get_detections(
anubis_bot_name,
anubis_bot_action,
anubis_bot_category
FROM ml_detected_anomalies
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
WHERE {where_clause}
ORDER BY {sort_by} {sort_order}
LIMIT %(limit)s OFFSET %(offset)s
@ -312,7 +313,7 @@ async def get_detection_details(detection_id: str):
detection_id peut être une IP ou un identifiant
"""
try:
query = """
query = f"""
SELECT
detected_at,
src_ip,
@ -363,7 +364,7 @@ async def get_detection_details(detection_id: str):
ja4_asn_concentration,
ja4_country_concentration,
is_rare_ja4
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
LIMIT 1

View File

@ -11,6 +11,7 @@ from ..models import (
EntityRelatedAttributes,
EntityAttributeValue
)
from ..config import settings
router = APIRouter(prefix="/api/entities", tags=["Entities"])
@ -24,7 +25,7 @@ def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Op
"""
Récupère les statistiques pour une entité donnée
"""
query = """
query = f"""
SELECT
entity_type,
entity_value,
@ -32,7 +33,7 @@ def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Op
sum(unique_ips) as unique_ips,
min(log_date) as first_seen,
max(log_date) as last_seen
FROM mabase_prod.view_dashboard_entities
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = %(entity_type)s
AND entity_value = %(entity_value)s
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
@ -64,13 +65,13 @@ def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24)
Récupère les attributs associés à une entité
"""
# Requête pour agréger tous les attributs associés
query = """
query = f"""
SELECT
(SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips,
(SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s,
(SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts,
(SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns,
(SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries
(SELECT groupUniqArray(toString(src_ip)) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips,
(SELECT groupUniqArray(ja4) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s,
(SELECT groupUniqArray(host) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts,
(SELECT groupUniqArrayArray(asns) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns,
(SELECT groupUniqArrayArray(countries) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries
"""
result = db.query(query, {
@ -110,7 +111,7 @@ def get_array_values(entity_type: str, entity_value: str, array_field: str, hour
FROM (
SELECT
arrayJoin({array_field}) as value
FROM mabase_prod.view_dashboard_entities
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = %(entity_type)s
AND entity_value = %(entity_value)s
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
@ -156,7 +157,7 @@ async def get_subnet_investigation(
subnet_third = subnet_parts[2]
# Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA
stats_query = """
stats_query = f"""
WITH cleaned_ips AS (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -165,7 +166,7 @@ async def get_subnet_investigation(
host,
country_code,
asn_number
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
),
subnet_filter AS (
@ -180,7 +181,7 @@ async def get_subnet_investigation(
SELECT
entity_value AS ip,
arrayJoin(user_agents) AS user_agent
FROM view_dashboard_entities
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = 'ip'
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s
@ -227,7 +228,7 @@ async def get_subnet_investigation(
}
# Liste des IPs avec détails - 2 requêtes séparées + fusion en Python
ips_query = """
ips_query = f"""
WITH cleaned_ips AS (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -237,7 +238,7 @@ async def get_subnet_investigation(
asn_number,
threat_level,
anomaly_score
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
),
subnet_filter AS (
@ -282,7 +283,7 @@ async def get_subnet_investigation(
SELECT
entity_value AS ip,
uniq(arrayJoin(user_agents)) AS unique_ua
FROM view_dashboard_entities
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
PREWHERE entity_type = 'ip'
WHERE entity_value IN ({ip_values})
AND log_date >= today() - INTERVAL 30 DAY

View File

@ -13,6 +13,7 @@ from fastapi import APIRouter, HTTPException, Query
import re
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"])
@ -68,7 +69,7 @@ async def get_ja4_spoofing(
"""
try:
# Agrégation par JA4 avec tous les indicateurs de spoofing
query = """
query = f"""
SELECT
ja4,
count() AS total_detections,
@ -102,7 +103,7 @@ async def get_ja4_spoofing(
avg(ja4_country_concentration) AS avg_country_concentration,
argMax(threat_level, detected_at) AS last_threat_level
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4
@ -254,7 +255,7 @@ async def get_ja4_ua_matrix(
"""
try:
# Stats JA4 depuis ml_detected_anomalies
stats_query = """
stats_query = f"""
SELECT
ja4,
uniq(src_ip) AS unique_ips,
@ -264,7 +265,7 @@ async def get_ja4_ua_matrix(
countIf(is_rare_ja4 = true) AS rare_count,
countIf(is_ua_rotating = true) AS rotating_count,
argMax(threat_level, detected_at) AS last_threat
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4
@ -426,11 +427,11 @@ async def get_ua_analysis(
pass
# IPs avec is_ua_rotating depuis ml_detected_anomalies
rotating_query = """
rotating_query = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
avg(ua_ch_mismatch) AS avg_ua_ch_mismatch
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND is_ua_rotating = true
GROUP BY clean_ip
@ -522,7 +523,7 @@ async def get_ip_fingerprint_coherence(ip: str):
"""
try:
# Données depuis ml_detected_anomalies
ml_query = """
ml_query = f"""
SELECT
ja4,
ua_ch_mismatch,
@ -543,7 +544,7 @@ async def get_ip_fingerprint_coherence(ip: str):
window_mss_ratio,
tcp_jitter_variance,
multiplexing_efficiency
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s
ORDER BY detected_at DESC
"""
@ -681,7 +682,7 @@ async def get_legitimate_ja4(
Utile comme whitelist pour réduire les faux positifs.
"""
try:
query = """
query = f"""
SELECT
ja4,
uniq(src_ip) AS unique_ips,
@ -691,7 +692,7 @@ async def get_legitimate_ja4(
countIf(is_rare_ja4 = true) AS rare_count,
round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct,
round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4
@ -753,7 +754,7 @@ async def get_ja4_asn_correlation(
try:
# Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN,
# then aggregate per ja4 to compute concentration ratio
sql = """
sql = f"""
SELECT
ja4,
sum(ips_per_combo) AS unique_ips,
@ -774,7 +775,7 @@ async def get_ja4_asn_correlation(
uniq(src_ip) AS ips_per_combo,
uniq(src_ip) AS country_ips,
sum(hits) AS total_hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
AND ja4 != ''
GROUP BY ja4, src_asn, src_country_code

View File

@ -4,6 +4,7 @@ Endpoints pour l'analyse des empreintes d'en-têtes HTTP
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
@ -12,7 +13,7 @@ router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
"""Clusters d'empreintes d'en-têtes groupés par header_order_hash."""
try:
sql = """
sql = f"""
SELECT
header_order_hash AS hash,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
@ -22,16 +23,16 @@ async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes,
round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct,
round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct
FROM mabase_prod.agg_header_fingerprint_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
GROUP BY header_order_hash
ORDER BY unique_ips DESC
LIMIT %(limit)s
"""
result = db.query(sql, {"limit": limit})
total_sql = """
total_sql = f"""
SELECT uniq(header_order_hash)
FROM mabase_prod.agg_header_fingerprint_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
"""
total_clusters = int(db.query(total_sql).result_rows[0][0])
@ -73,14 +74,14 @@ async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)):
"""Liste des IPs appartenant à un cluster d'en-têtes donné."""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
any(modern_browser_score) AS browser_score,
any(ua_ch_mismatch) AS ua_ch_mismatch,
any(sec_fetch_mode) AS sec_fetch_mode,
any(sec_fetch_dest) AS sec_fetch_dest
FROM mabase_prod.agg_header_fingerprint_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
WHERE header_order_hash = %(hash)s
GROUP BY src_ip
ORDER BY browser_score DESC

View File

@ -5,6 +5,7 @@ from collections import defaultdict
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/heatmap", tags=["heatmap"])
@ -13,13 +14,13 @@ router = APIRouter(prefix="/api/heatmap", tags=["heatmap"])
async def get_heatmap_hourly():
"""Hits agrégés par heure sur les 72 dernières heures."""
try:
sql = """
sql = f"""
SELECT
toHour(window_start) AS hour,
sum(hits) AS hits,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
max(max_requests_per_sec) AS max_rps
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY hour
ORDER BY hour ASC
@ -44,13 +45,13 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
"""Hôtes les plus ciblés avec répartition horaire sur 24h."""
try:
# Aggregate overall stats per host
agg_sql = """
agg_sql = f"""
SELECT
host,
sum(hits) AS total_hits,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
uniq(ja4) AS unique_ja4s
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY host
ORDER BY total_hits DESC
@ -72,12 +73,12 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
return {"items": []}
# Hourly breakdown per host
hourly_sql = """
hourly_sql = f"""
SELECT
host,
toHour(window_start) AS hour,
sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR
AND host IN %(hosts)s
GROUP BY host, hour
@ -106,9 +107,9 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
async def get_heatmap_matrix():
"""Matrice top-15 hôtes × 24 heures (sum hits) sur les 72 dernières heures."""
try:
top_sql = """
top_sql = f"""
SELECT host, sum(hits) AS total_hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY host
ORDER BY total_hits DESC
@ -119,12 +120,12 @@ async def get_heatmap_matrix():
if not top_hosts:
return {"hosts": [], "matrix": []}
cell_sql = """
cell_sql = f"""
SELECT
host,
toHour(window_start) AS hour,
sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR
AND host IN %(hosts)s
GROUP BY host, hour

View File

@ -6,6 +6,7 @@ from fastapi import APIRouter, HTTPException, Query
from typing import List, Optional
from datetime import datetime
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/incidents", tags=["incidents"])
@ -28,7 +29,7 @@ async def get_incident_clusters(
# Cluster par subnet /24 avec une IP exemple
# Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x
# toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x
cluster_query = """
cluster_query = f"""
WITH cleaned_ips AS (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -38,7 +39,7 @@ async def get_incident_clusters(
asn_number,
threat_level,
anomaly_score
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
),
subnet_groups AS (
@ -84,13 +85,13 @@ async def get_incident_clusters(
# Collect sample IPs to fetch real UA and trend data in bulk
sample_ips = [row[10] for row in result.result_rows if row[10]]
# Fetch real primary UA per sample IP from view_dashboard_entities
# Fetch real primary UA per sample IP from {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
ua_by_ip: dict = {}
if sample_ips:
ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50])
ua_query = f"""
SELECT entity_value, arrayElement(user_agents, 1) AS top_ua
FROM view_dashboard_entities
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = 'ip'
AND entity_value IN ({ip_list_sql})
AND notEmpty(user_agents)
@ -106,7 +107,7 @@ async def get_incident_clusters(
pass # UA enrichment is best-effort
# Compute real trend: compare current window vs previous window of same duration
trend_query = """
trend_query = f"""
WITH cleaned AS (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -116,7 +117,7 @@ async def get_incident_clusters(
splitByChar('.', clean_ip)[2], '.',
splitByChar('.', clean_ip)[3], '.0/24'
) AS subnet
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
),
current_window AS (
SELECT subnet, count() AS cnt

View File

@ -8,6 +8,7 @@ from fastapi import APIRouter, HTTPException
from ..database import db
from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua
from ..config import settings
router = APIRouter(prefix="/api/investigation", tags=["investigation"])
@ -25,7 +26,7 @@ async def get_ip_full_summary(ip: str):
clean_ip = ip.replace("::ffff:", "").strip()
try:
# ── 1. Score ML / features ─────────────────────────────────────────────
ml_sql = """
ml_sql = f"""
SELECT
max(abs(anomaly_score)) AS max_score,
any(threat_level) AS threat_level,
@ -33,7 +34,7 @@ async def get_ip_full_summary(ip: str):
count() AS total_detections,
uniq(host) AS distinct_hosts,
uniq(ja4) AS distinct_ja4
FROM mabase_prod.ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
"""
ml_res = db.query(ml_sql, {"ip": clean_ip})
@ -48,13 +49,13 @@ async def get_ip_full_summary(ip: str):
}
# ── 2. Brute force ─────────────────────────────────────────────────────
bf_sql = """
bf_sql = f"""
SELECT
uniq(host) AS hosts_attacked,
sum(hits) AS total_hits,
sum(query_params_count) AS total_params,
groupArray(3)(host) AS top_hosts
FROM mabase_prod.view_form_bruteforce_detected
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
"""
bf_res = db.query(bf_sql, {"ip": clean_ip})
@ -68,14 +69,14 @@ async def get_ip_full_summary(ip: str):
}
# ── 3. TCP spoofing — fingerprinting multi-signal ─────────────────────
tcp_sql = """
tcp_sql = f"""
SELECT
any(tcp_ttl_raw) AS ttl,
any(tcp_win_raw) AS win,
any(tcp_scale_raw) AS scale,
any(tcp_mss_raw) AS mss,
any(first_ua) AS ua
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND window_start >= now() - INTERVAL 24 HOUR
AND tcp_ttl_raw > 0
@ -109,9 +110,9 @@ async def get_ip_full_summary(ip: str):
}
# ── 4. JA4 rotation ────────────────────────────────────────────────────
rot_sql = """
rot_sql = f"""
SELECT distinct_ja4_count, total_hits
FROM mabase_prod.view_host_ip_ja4_rotation
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
LIMIT 1
"""
@ -123,9 +124,9 @@ async def get_ip_full_summary(ip: str):
rot_data = {"rotating": cnt > 1, "distinct_ja4_count": cnt, "total_hits": int(row[1] or 0)}
# ── 5. Persistance ─────────────────────────────────────────────────────
pers_sql = """
pers_sql = f"""
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
FROM mabase_prod.view_ip_recurrence
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
LIMIT 1
"""
@ -143,12 +144,12 @@ async def get_ip_full_summary(ip: str):
}
# ── 6. Timeline 24h ────────────────────────────────────────────────────
tl_sql = """
tl_sql = f"""
SELECT
toHour(window_start) AS hour,
sum(hits) AS hits,
groupUniqArray(3)(ja4) AS ja4s
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND window_start >= now() - INTERVAL 24 HOUR
GROUP BY hour

View File

@ -4,6 +4,7 @@ Endpoints pour les métriques du dashboard
from fastapi import APIRouter, HTTPException
from ..database import db
from ..models import MetricsResponse, MetricsSummary, TimeSeriesPoint
from ..config import settings
router = APIRouter(prefix="/api/metrics", tags=["metrics"])
@ -15,7 +16,7 @@ async def get_metrics():
"""
try:
# Résumé des métriques
summary_query = """
summary_query = f"""
SELECT
count() AS total_detections,
countIf(threat_level = 'CRITICAL') AS critical_count,
@ -25,7 +26,7 @@ async def get_metrics():
countIf(bot_name != '') AS known_bots_count,
countIf(bot_name = '') AS anomalies_count,
uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR
"""
@ -47,7 +48,7 @@ async def get_metrics():
)
# Série temporelle (par heure)
timeseries_query = """
timeseries_query = f"""
SELECT
toStartOfHour(detected_at) AS hour,
count() AS total,
@ -55,7 +56,7 @@ async def get_metrics():
countIf(threat_level = 'HIGH') AS high,
countIf(threat_level = 'MEDIUM') AS medium,
countIf(threat_level = 'LOW') AS low
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR
GROUP BY hour
ORDER BY hour
@ -98,12 +99,12 @@ async def get_threat_distribution():
Récupère la répartition par niveau de menace
"""
try:
query = """
query = f"""
SELECT
threat_level,
count() AS count,
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR
GROUP BY threat_level
ORDER BY count DESC
@ -128,7 +129,7 @@ async def get_metrics_baseline():
Compare les métriques actuelles (24h) vs hier (24h-48h) pour afficher les tendances.
"""
try:
query = """
query = f"""
SELECT
countIf(detected_at >= now() - INTERVAL 24 HOUR) AS today_total,
countIf(detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_total,
@ -136,7 +137,7 @@ async def get_metrics_baseline():
uniqIf(src_ip, detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_ips,
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 24 HOUR) AS today_critical,
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_critical
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 48 HOUR
"""
r = db.query(query)

View File

@ -4,6 +4,7 @@ Endpoints pour les features ML / IA (scores d'anomalies, radar, scatter)
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/ml", tags=["ml_features"])
@ -27,7 +28,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h.
"""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip,
any(a.ja4) AS ja4,
@ -45,13 +46,13 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
any(a.src_as_name) AS asn_name,
max(h.ua_ch_mismatch) AS ua_ch_mismatch,
max(h.modern_browser_score) AS browser_score,
dictGetOrDefault('mabase_prod.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
coalesce(
nullIf(dictGetOrDefault('mabase_prod.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
nullIf(dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
''
) AS bot_name
FROM mabase_prod.agg_host_ip_ja4_1h a
LEFT JOIN mabase_prod.agg_header_fingerprint_1h h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h a
LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h h
ON a.src_ip = h.src_ip AND a.window_start = h.window_start
WHERE a.window_start >= now() - INTERVAL 24 HOUR
GROUP BY a.src_ip
@ -92,7 +93,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
async def get_ip_radar(ip: str):
"""Scores radar pour une IP spécifique (8 dimensions d'anomalie)."""
try:
sql = """
sql = f"""
SELECT
avg(fuzzing_index) AS fuzzing_index,
avg(hit_velocity) AS hit_velocity,
@ -102,7 +103,7 @@ async def get_ip_radar(ip: str):
avg(orphan_ratio) AS orphan_ratio,
avg(path_diversity_ratio) AS path_diversity_ratio,
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
FROM mabase_prod.view_ai_features_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND window_start >= now() - INTERVAL 24 HOUR
"""
@ -139,7 +140,7 @@ async def get_score_distribution():
"""
try:
# Single scan — global totals + per-model breakdown via GROUPING SETS
sql = """
sql = f"""
SELECT
threat_level,
model_name,
@ -149,7 +150,7 @@ async def get_score_distribution():
countIf(threat_level = 'NORMAL') AS normal_count,
countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count,
countIf(threat_level = 'KNOWN_BOT') AS bot_count
FROM mabase_prod.ml_all_scores
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores
WHERE detected_at >= now() - INTERVAL 3 DAY
GROUP BY threat_level, model_name
ORDER BY model_name, total DESC
@ -199,7 +200,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
Retourne le score moyen et les counts par heure et par modèle.
"""
try:
sql = """
sql = f"""
SELECT
toStartOfHour(window_start) AS hour,
model_name,
@ -207,7 +208,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count,
countIf(threat_level = 'KNOWN_BOT') AS bot_count,
round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score
FROM mabase_prod.ml_all_scores
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
GROUP BY hour, model_name
ORDER BY hour ASC, model_name
@ -237,7 +238,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard.
"""
try:
sql = """
sql = f"""
SELECT ip, ja4, country, asn_name, total_hits AS hits,
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
missing_accept_enc_ratio, http_scheme_ratio
@ -255,7 +256,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
)
@ -295,7 +296,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
"""
try:
# First: check real campaigns
campaign_sql = """
campaign_sql = f"""
SELECT
campaign_id,
count() AS total_detections,
@ -305,7 +306,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
groupUniqArray(3)(bot_name) AS bot_names,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen
FROM mabase_prod.ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND campaign_id >= 0
GROUP BY campaign_id
@ -330,7 +331,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
# Fallback: subnet-based clustering when DBSCAN has no campaigns
if not campaigns:
subnet_sql = """
subnet_sql = f"""
SELECT
IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet,
count() AS total_detections,
@ -340,7 +341,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
any(ja4) AS sample_ja4,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen
FROM mabase_prod.ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND threat_level IN ('HIGH','CRITICAL','MEDIUM')
GROUP BY subnet
@ -381,7 +382,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
"""Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h."""
try:
sql = """
sql = f"""
SELECT
ip,
ja4,
@ -400,7 +401,7 @@ async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
max(last_seen) AS max_last,
sum(count_head) AS total_count_head,
max(correlated_raw) AS correlated
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
)

View File

@ -4,6 +4,7 @@ Endpoints pour la détection de la rotation de fingerprints JA4 et des menaces p
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/rotation", tags=["rotation"])
@ -12,12 +13,12 @@ router = APIRouter(prefix="/api/rotation", tags=["rotation"])
async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)):
"""IPs qui effectuent le plus de rotation de fingerprints JA4."""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
distinct_ja4_count,
total_hits
FROM mabase_prod.view_host_ip_ja4_rotation
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
ORDER BY distinct_ja4_count DESC
LIMIT %(limit)s
"""
@ -40,7 +41,7 @@ async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)):
async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
"""Menaces persistantes triées par score de persistance."""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
recurrence,
@ -48,7 +49,7 @@ async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
worst_threat_level,
first_seen,
last_seen
FROM mabase_prod.view_ip_recurrence
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
ORDER BY (least(100, recurrence * 20 + worst_score * 50)) DESC
LIMIT %(limit)s
"""
@ -75,13 +76,13 @@ async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
async def get_ip_ja4_history(ip: str):
"""Historique des JA4 utilisés par une IP donnée."""
try:
sql = """
sql = f"""
SELECT
ja4,
sum(hits) AS hits,
min(window_start) AS first_seen,
max(window_start) AS last_seen
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
GROUP BY ja4
ORDER BY hits DESC
@ -107,7 +108,7 @@ async def get_sophistication(limit: int = Query(50, ge=1, le=500)):
Single SQL JOIN query — aucun traitement Python sur 34K entrées.
"""
try:
sql = """
sql = f"""
SELECT
r.ip,
r.distinct_ja4_count,
@ -122,20 +123,20 @@ async def get_sophistication(limit: int = Query(50, ge=1, le=500)):
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
distinct_ja4_count
FROM mabase_prod.view_host_ip_ja4_rotation
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
) r
LEFT JOIN (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
count() AS recurrence
FROM mabase_prod.ml_detected_anomalies FINAL
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies FINAL
GROUP BY ip
) rec ON r.ip = rec.ip
LEFT JOIN (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
sum(hits) AS bruteforce_hits
FROM mabase_prod.view_form_bruteforce_detected
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
GROUP BY ip
) bf ON r.ip = bf.ip
ORDER BY sophistication_score DESC
@ -174,7 +175,7 @@ async def get_proactive_hunt(
):
"""IPs volant sous le radar : récurrentes mais sous le seuil de détection normal."""
try:
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
recurrence,
@ -183,7 +184,7 @@ async def get_proactive_hunt(
first_seen,
last_seen,
dateDiff('day', first_seen, last_seen) AS days_active
FROM mabase_prod.view_ip_recurrence
FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
WHERE recurrence >= %(min_recurrence)s
AND abs(worst_score) < 0.5
AND dateDiff('day', first_seen, last_seen) >= %(min_days)s

View File

@ -3,6 +3,7 @@ Endpoint de recherche globale rapide — utilisé par la barre Cmd+K
"""
from fastapi import APIRouter, Query
from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/search", tags=["search"])
@ -21,13 +22,13 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── IPs ──────────────────────────────────────────────────────────────────
ip_rows = db.query(
"""
f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
count() AS hits,
max(detected_at) AS last_seen,
any(threat_level) AS threat_level
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ilike(toString(src_ip), %(p)s)
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY clean_ip
@ -48,12 +49,12 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── JA4 fingerprints ─────────────────────────────────────────────────────
ja4_rows = db.query(
"""
f"""
SELECT
ja4,
count() AS hits,
uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ilike(ja4, %(p)s)
AND ja4 != ''
AND detected_at >= now() - INTERVAL 24 HOUR
@ -73,12 +74,12 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── Hosts ─────────────────────────────────────────────────────────────────
host_rows = db.query(
"""
f"""
SELECT
host,
count() AS hits,
uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ilike(host, %(p)s)
AND host != ''
AND detected_at >= now() - INTERVAL 24 HOUR
@ -98,13 +99,13 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── ASN ───────────────────────────────────────────────────────────────────
asn_rows = db.query(
"""
f"""
SELECT
asn_org,
asn_number,
count() AS hits,
uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE (ilike(asn_org, %(p)s) OR ilike(asn_number, %(p)s))
AND asn_org != '' AND asn_number != ''
AND detected_at >= now() - INTERVAL 24 HOUR

View File

@ -18,6 +18,7 @@ from ..services.tcp_fingerprint import (
detect_spoof,
declared_os_from_ua,
)
from ..config import settings
router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
@ -26,7 +27,7 @@ router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
async def get_tcp_spoofing_overview():
"""Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale)."""
try:
sql = """
sql = f"""
SELECT
count() AS total_entries,
uniq(src_ip) AS unique_ips,
@ -36,34 +37,34 @@ async def get_tcp_spoofing_overview():
countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp,
countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp,
countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
"""
result = db.query(sql)
row = result.result_rows[0]
# Distribution TTL (top 15)
ttl_sql = """
ttl_sql = f"""
SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY ttl ORDER BY cnt DESC
"""
ttl_res = db.query(ttl_sql)
# Distribution MSS — nouveau signal clé (top 12)
mss_sql = """
mss_sql = f"""
SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0
GROUP BY mss ORDER BY cnt DESC
"""
mss_res = db.query(mss_sql)
# Distribution fenêtre (top 10)
win_sql = """
win_sql = f"""
SELECT tcp_win_raw AS win, count() AS cnt
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY win ORDER BY cnt DESC
"""
@ -105,17 +106,17 @@ async def get_tcp_spoofing_list(
Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool.
"""
try:
count_sql = """
count_sql = f"""
SELECT count() FROM (
SELECT src_ip, ja4
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4
)
"""
total = int(db.query(count_sql).result_rows[0][0])
sql = """
sql = f"""
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip,
ja4,
@ -125,7 +126,7 @@ async def get_tcp_spoofing_list(
any(tcp_mss_raw) AS tcp_mss,
any(first_ua) AS first_ua,
sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4
ORDER BY hits DESC
@ -178,7 +179,7 @@ async def get_tcp_spoofing_list(
async def get_tcp_spoofing_matrix():
"""Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal."""
try:
sql = """
sql = f"""
SELECT
any(tcp_ttl_raw) AS ttl,
any(tcp_win_raw) AS win,
@ -186,7 +187,7 @@ async def get_tcp_spoofing_matrix():
any(tcp_mss_raw) AS mss,
any(first_ua) AS ua,
count() AS cnt
FROM mabase_prod.agg_host_ip_ja4_1h
FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4
"""

View File

@ -8,6 +8,7 @@ from ..models import (
VariabilityResponse, VariabilityAttributes, AttributeValue, Insight,
UserAgentsResponse, UserAgentValue
)
from ..config import settings
router = APIRouter(prefix="/api/variability", tags=["variability"])
@ -45,7 +46,7 @@ async def get_associated_ips(
query = f"""
SELECT src_ip, count() AS hit_count
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY src_ip
@ -64,7 +65,7 @@ async def get_associated_ips(
# Compter le total
count_query = f"""
SELECT uniq(src_ip) AS total
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -138,7 +139,7 @@ async def get_associated_attributes(
# country/asn/host: pivot via ml_detected_anomalies
ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)"""
ua_q = f"""
@ -160,7 +161,7 @@ async def get_associated_attributes(
{target_column} AS value,
count() AS count,
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND {target_column} != '' AND {target_column} IS NOT NULL
AND detected_at >= now() - INTERVAL 24 HOUR
@ -183,7 +184,7 @@ async def get_associated_attributes(
# Compter le total
count_query = f"""
SELECT uniq({target_column}) AS total
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND {target_column} != '' AND {target_column} IS NOT NULL
AND detected_at >= now() - INTERVAL 24 HOUR
@ -247,7 +248,7 @@ async def get_user_agents(
ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type]
where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {ml_col} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
)"""
@ -467,7 +468,7 @@ async def get_variability(attr_type: str, value: str):
threat_level,
model_name,
anomaly_score
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
)
@ -480,7 +481,7 @@ async def get_variability(attr_type: str, value: str):
uniq(src_ip) AS unique_ips,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR
"""
@ -506,12 +507,12 @@ async def get_variability(attr_type: str, value: str):
header_user_agent AS user_agent,
count() AS count,
round(count() * 100.0 / (
SELECT count() FROM mabase_prod.http_logs
SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
), 2) AS percentage,
min(time) AS first_seen,
max(time) AS last_seen
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where}
AND time >= now() - INTERVAL 24 HOUR
AND header_user_agent != '' AND header_user_agent IS NOT NULL
@ -527,12 +528,12 @@ async def get_variability(attr_type: str, value: str):
header_user_agent AS user_agent,
count() AS count,
round(count() * 100.0 / (
SELECT count() FROM mabase_prod.http_logs
SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
), 2) AS percentage,
min(time) AS first_seen,
max(time) AS last_seen
FROM mabase_prod.http_logs
FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where}
AND time >= now() - INTERVAL 24 HOUR
AND header_user_agent != '' AND header_user_agent IS NOT NULL
@ -545,7 +546,7 @@ async def get_variability(attr_type: str, value: str):
# country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA
_ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies
FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)"""
ua_query_simple = f"""