refactor: replace hardcoded mabase_prod DB prefix with configurable settings

Replace all hardcoded 'mabase_prod.' table prefixes in dashboard route
SQL queries with configurable database names from settings:

- http_logs, http_logs_raw → settings.CLICKHOUSE_DB_LOGS
- All other tables → settings.CLICKHOUSE_DB_PROCESSING

Also qualify previously unqualified table references (bare FROM/JOIN
table_name) with the appropriate database prefix for consistency.

Each route file now imports 'from ..config import settings' and uses
f-strings with {settings.CLICKHOUSE_DB_PROCESSING} or
{settings.CLICKHOUSE_DB_LOGS} for database-qualified table names.

Files updated: analysis, attributes, audit, botnets, bruteforce,
clustering, detections, entities, fingerprints, header_fingerprint,
heatmap, incidents, investigation_summary, metrics, ml_features,
rotation, search, tcp_spoofing, variability (19 files).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 19:03:05 +02:00
parent dba2676fa7
commit b6391afbeb
19 changed files with 225 additions and 206 deletions

View File

@ -14,6 +14,7 @@ from ..models import (
ClassificationRecommendation, ClassificationLabel, ClassificationRecommendation, ClassificationLabel,
ClassificationCreate, Classification, ClassificationsListResponse ClassificationCreate, Classification, ClassificationsListResponse
) )
from ..config import settings
router = APIRouter(prefix="/api/analysis", tags=["analysis"]) router = APIRouter(prefix="/api/analysis", tags=["analysis"])
@ -42,9 +43,9 @@ async def analyze_subnet(ip: str):
subnet_str = str(subnet) subnet_str = str(subnet)
# Récupérer les infos ASN pour cette IP # Récupérer les infos ASN pour cette IP
asn_query = """ asn_query = f"""
SELECT asn_number, asn_org SELECT asn_number, asn_org
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
ORDER BY detected_at DESC ORDER BY detected_at DESC
LIMIT 1 LIMIT 1
@ -60,9 +61,9 @@ async def analyze_subnet(ip: str):
asn_org = asn_result.result_rows[0][1] or "Unknown" asn_org = asn_result.result_rows[0][1] or "Unknown"
# IPs du même subnet /24 # IPs du même subnet /24
subnet_ips_query = """ subnet_ips_query = f"""
SELECT DISTINCT src_ip SELECT DISTINCT src_ip
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s) WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s)
AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s) AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s)
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
@ -78,9 +79,9 @@ async def analyze_subnet(ip: str):
# Total IPs du même ASN # Total IPs du même ASN
if asn_number != "0": if asn_number != "0":
asn_total_query = """ asn_total_query = f"""
SELECT uniq(src_ip) SELECT uniq(src_ip)
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE asn_number = %(asn_number)s WHERE asn_number = %(asn_number)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
""" """
@ -114,9 +115,9 @@ async def analyze_ip_country(ip: str):
""" """
try: try:
# Pays de l'IP # Pays de l'IP
ip_country_query = """ ip_country_query = f"""
SELECT country_code, asn_number SELECT country_code, asn_number
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
ORDER BY detected_at DESC ORDER BY detected_at DESC
LIMIT 1 LIMIT 1
@ -131,11 +132,11 @@ async def analyze_ip_country(ip: str):
asn_number = ip_result.result_rows[0][1] asn_number = ip_result.result_rows[0][1]
# Répartition des autres pays du même ASN # Répartition des autres pays du même ASN
asn_countries_query = """ asn_countries_query = f"""
SELECT SELECT
country_code, country_code,
count() AS count count() AS count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE asn_number = %(asn_number)s WHERE asn_number = %(asn_number)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY country_code GROUP BY country_code
@ -179,11 +180,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)):
""" """
try: try:
# Top pays # Top pays
top_query = """ top_query = f"""
SELECT SELECT
country_code, country_code,
count() AS count count() AS count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(days)s DAY WHERE detected_at >= now() - INTERVAL %(days)s DAY
AND country_code != '' AND country_code IS NOT NULL AND country_code != '' AND country_code IS NOT NULL
GROUP BY country_code GROUP BY country_code
@ -206,11 +207,11 @@ async def analyze_country(days: int = Query(1, ge=1, le=30)):
] ]
# Baseline (7 derniers jours) # Baseline (7 derniers jours)
baseline_query = """ baseline_query = f"""
SELECT SELECT
country_code, country_code,
count() AS count count() AS count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 7 DAY WHERE detected_at >= now() - INTERVAL 7 DAY
AND country_code != '' AND country_code IS NOT NULL AND country_code != '' AND country_code IS NOT NULL
GROUP BY country_code GROUP BY country_code
@ -254,9 +255,9 @@ async def analyze_ja4(ip: str):
""" """
try: try:
# JA4 de cette IP # JA4 de cette IP
ja4_query = """ ja4_query = f"""
SELECT ja4 SELECT ja4
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
AND ja4 != '' AND ja4 IS NOT NULL AND ja4 != '' AND ja4 IS NOT NULL
ORDER BY detected_at DESC ORDER BY detected_at DESC
@ -276,9 +277,9 @@ async def analyze_ja4(ip: str):
ja4 = ja4_result.result_rows[0][0] ja4 = ja4_result.result_rows[0][0]
# IPs avec le même JA4 # IPs avec le même JA4
shared_query = """ shared_query = f"""
SELECT uniq(src_ip) SELECT uniq(src_ip)
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ja4 = %(ja4)s WHERE ja4 = %(ja4)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
""" """
@ -287,11 +288,11 @@ async def analyze_ja4(ip: str):
shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0 shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0
# Top subnets pour ce JA4 - Simplifié # Top subnets pour ce JA4 - Simplifié
subnets_query = """ subnets_query = f"""
SELECT SELECT
src_ip, src_ip,
count() AS count count() AS count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ja4 = %(ja4)s WHERE ja4 = %(ja4)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY src_ip GROUP BY src_ip
@ -315,9 +316,9 @@ async def analyze_ja4(ip: str):
] ]
# Autres JA4 pour cette IP # Autres JA4 pour cette IP
other_ja4_query = """ other_ja4_query = f"""
SELECT DISTINCT ja4 SELECT DISTINCT ja4
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
AND ja4 != '' AND ja4 IS NOT NULL AND ja4 != '' AND ja4 IS NOT NULL
AND ja4 != %(current_ja4)s AND ja4 != %(current_ja4)s
@ -348,11 +349,11 @@ async def analyze_user_agents(ip: str):
""" """
try: try:
# User-Agents pour cette IP (depuis http_logs) # User-Agents pour cette IP (depuis http_logs)
ip_ua_query = """ ip_ua_query = f"""
SELECT SELECT
header_user_agent AS ua, header_user_agent AS ua,
count() AS count count() AS count
FROM mabase_prod.http_logs FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
AND header_user_agent != '' AND header_user_agent IS NOT NULL AND header_user_agent != '' AND header_user_agent IS NOT NULL
AND time >= now() - INTERVAL 24 HOUR AND time >= now() - INTERVAL 24 HOUR
@ -361,9 +362,9 @@ async def analyze_user_agents(ip: str):
""" """
# Total réel des requêtes (pour les pourcentages corrects) # Total réel des requêtes (pour les pourcentages corrects)
ip_total_query = """ ip_total_query = f"""
SELECT count() SELECT count()
FROM mabase_prod.http_logs FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
AND time >= now() - INTERVAL 24 HOUR AND time >= now() - INTERVAL 24 HOUR
""" """
@ -529,8 +530,8 @@ async def create_classification(data: ClassificationCreate):
if not data.ip and not data.ja4: if not data.ip and not data.ja4:
raise HTTPException(status_code=400, detail="IP ou JA4 requis") raise HTTPException(status_code=400, detail="IP ou JA4 requis")
query = """ query = f"""
INSERT INTO mabase_prod.classifications INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.classifications
(ip, ja4, label, tags, comment, confidence, features, analyst, created_at) (ip, ja4, label, tags, comment, confidence, features, analyst, created_at)
VALUES VALUES
(%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now()) (%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now())
@ -551,7 +552,7 @@ async def create_classification(data: ClassificationCreate):
where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s" where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s"
select_query = f""" select_query = f"""
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
FROM mabase_prod.classifications FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause} WHERE {where_clause}
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT 1 LIMIT 1
@ -609,7 +610,7 @@ async def list_classifications(
query = f""" query = f"""
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
FROM mabase_prod.classifications FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause} WHERE {where_clause}
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT %(limit)s LIMIT %(limit)s
@ -635,7 +636,7 @@ async def list_classifications(
# Total # Total
count_query = f""" count_query = f"""
SELECT count() SELECT count()
FROM mabase_prod.classifications FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
WHERE {where_clause} WHERE {where_clause}
""" """
@ -657,13 +658,13 @@ async def get_classification_stats():
Statistiques des classifications Statistiques des classifications
""" """
try: try:
stats_query = """ stats_query = f"""
SELECT SELECT
label, label,
count() AS total, count() AS total,
uniq(ip) AS unique_ips, uniq(ip) AS unique_ips,
avg(confidence) AS avg_confidence avg(confidence) AS avg_confidence
FROM mabase_prod.classifications FROM {settings.CLICKHOUSE_DB_PROCESSING}.classifications
GROUP BY label GROUP BY label
ORDER BY total DESC ORDER BY total DESC
""" """

View File

@ -4,6 +4,7 @@ Endpoints pour la liste des attributs uniques
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..models import AttributeListResponse, AttributeListItem from ..models import AttributeListResponse, AttributeListItem
from ..config import settings
router = APIRouter(prefix="/api/attributes", tags=["attributes"]) router = APIRouter(prefix="/api/attributes", tags=["attributes"])
@ -42,7 +43,7 @@ async def get_attributes(
SELECT SELECT
{column} AS value, {column} AS value,
count() AS count count() AS count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR WHERE detected_at >= now() - INTERVAL 24 HOUR
""" """
@ -53,7 +54,7 @@ async def get_attributes(
query = f""" query = f"""
SELECT value, count FROM ( SELECT value, count FROM (
SELECT toString({column}) AS value, count() AS count SELECT toString({column}) AS value, count() AS count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR WHERE detected_at >= now() - INTERVAL 24 HOUR
GROUP BY {column} GROUP BY {column}
) )

View File

@ -6,6 +6,7 @@ from fastapi import APIRouter, HTTPException, Query, Request
from typing import Optional from typing import Optional
from datetime import datetime from datetime import datetime
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/audit", tags=["audit"]) router = APIRouter(prefix="/api/audit", tags=["audit"])
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -29,8 +30,8 @@ async def create_audit_log(
client_ip = request.client.host if request.client else "unknown" client_ip = request.client.host if request.client else "unknown"
# Insérer dans ClickHouse # Insérer dans ClickHouse
insert_query = """ insert_query = f"""
INSERT INTO mabase_prod.audit_logs INSERT INTO {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
(timestamp, user_name, action, entity_type, entity_id, entity_count, details, client_ip) (timestamp, user_name, action, entity_type, entity_id, entity_count, details, client_ip)
VALUES VALUES
(%(timestamp)s, %(user)s, %(action)s, %(entity_type)s, %(entity_id)s, %(entity_count)s, %(details)s, %(client_ip)s) (%(timestamp)s, %(user)s, %(action)s, %(entity_type)s, %(entity_id)s, %(entity_count)s, %(details)s, %(client_ip)s)
@ -105,7 +106,7 @@ async def get_audit_logs(
entity_count, entity_count,
details, details,
client_ip client_ip
FROM mabase_prod.audit_logs FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
WHERE {where_clause} WHERE {where_clause}
ORDER BY timestamp DESC ORDER BY timestamp DESC
LIMIT %(limit)s LIMIT %(limit)s
@ -152,13 +153,13 @@ async def get_audit_stats(
Statistiques d'audit Statistiques d'audit
""" """
try: try:
query = """ query = f"""
SELECT SELECT
action, action,
count() AS count, count() AS count,
uniq(user_name) AS unique_users, uniq(user_name) AS unique_users,
sum(entity_count) AS total_entities sum(entity_count) AS total_entities
FROM mabase_prod.audit_logs FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
GROUP BY action GROUP BY action
ORDER BY count DESC ORDER BY count DESC
@ -198,14 +199,14 @@ async def get_user_activity(
Activité par utilisateur Activité par utilisateur
""" """
try: try:
query = """ query = f"""
SELECT SELECT
user_name, user_name,
count() AS actions, count() AS actions,
uniq(action) AS action_types, uniq(action) AS action_types,
min(timestamp) AS first_action, min(timestamp) AS first_action,
max(timestamp) AS last_action max(timestamp) AS last_action
FROM mabase_prod.audit_logs FROM {settings.CLICKHOUSE_DB_PROCESSING}.audit_logs
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
GROUP BY user_name GROUP BY user_name
ORDER BY actions DESC ORDER BY actions DESC

View File

@ -4,6 +4,7 @@ Endpoints pour l'analyse des botnets via la propagation des fingerprints JA4
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/botnets", tags=["botnets"]) router = APIRouter(prefix="/api/botnets", tags=["botnets"])
@ -20,13 +21,13 @@ def _botnet_class(unique_countries: int) -> str:
async def get_ja4_spread(): async def get_ja4_spread():
"""Propagation des JA4 fingerprints à travers les pays et les IPs.""" """Propagation des JA4 fingerprints à travers les pays et les IPs."""
try: try:
sql = """ sql = f"""
SELECT SELECT
ja4, ja4,
unique_ips, unique_ips,
unique_countries, unique_countries,
targeted_hosts targeted_hosts
FROM mabase_prod.view_host_ja4_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ja4_anomalies
ORDER BY unique_countries DESC ORDER BY unique_countries DESC
""" """
result = db.query(sql) result = db.query(sql)
@ -56,12 +57,12 @@ async def get_ja4_spread():
async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)): async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)):
"""Top pays pour un JA4 donné depuis agg_host_ip_ja4_1h.""" """Top pays pour un JA4 donné depuis agg_host_ip_ja4_1h."""
try: try:
sql = """ sql = f"""
SELECT SELECT
src_country_code AS country_code, src_country_code AS country_code,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
sum(hits) AS hits sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE ja4 = %(ja4)s WHERE ja4 = %(ja4)s
GROUP BY src_country_code GROUP BY src_country_code
ORDER BY unique_ips DESC ORDER BY unique_ips DESC
@ -85,13 +86,13 @@ async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)):
async def get_botnets_summary(): async def get_botnets_summary():
"""Statistiques globales sur les botnets détectés.""" """Statistiques globales sur les botnets détectés."""
try: try:
sql = """ sql = f"""
SELECT SELECT
countIf(unique_countries > 100) AS total_global_botnets, countIf(unique_countries > 100) AS total_global_botnets,
sumIf(unique_ips, unique_countries > 50) AS total_ips_in_botnets, sumIf(unique_ips, unique_countries > 50) AS total_ips_in_botnets,
argMax(ja4, unique_countries) AS most_spread_ja4, argMax(ja4, unique_countries) AS most_spread_ja4,
argMax(ja4, unique_ips) AS most_ips_ja4 argMax(ja4, unique_ips) AS most_ips_ja4
FROM mabase_prod.view_host_ja4_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ja4_anomalies
""" """
result = db.query(sql) result = db.query(sql)
row = result.result_rows[0] row = result.result_rows[0]

View File

@ -4,6 +4,7 @@ Endpoints pour l'analyse des attaques par force brute sur les formulaires
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"]) router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"])
@ -12,14 +13,14 @@ router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"])
async def get_bruteforce_targets(): async def get_bruteforce_targets():
"""Liste des hôtes ciblés par brute-force, triés par total_hits DESC.""" """Liste des hôtes ciblés par brute-force, triés par total_hits DESC."""
try: try:
sql = """ sql = f"""
SELECT SELECT
host, host,
uniq(src_ip) AS unique_ips, uniq(src_ip) AS unique_ips,
sum(hits) AS total_hits, sum(hits) AS total_hits,
sum(query_params_count) AS total_params, sum(query_params_count) AS total_params,
groupArray(3)(ja4) AS top_ja4s groupArray(3)(ja4) AS top_ja4s
FROM mabase_prod.view_form_bruteforce_detected FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
GROUP BY host GROUP BY host
ORDER BY total_hits DESC ORDER BY total_hits DESC
""" """
@ -53,14 +54,14 @@ async def get_bruteforce_targets():
async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)): async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)):
"""Top IPs attaquantes triées par total_hits DESC.""" """Top IPs attaquantes triées par total_hits DESC."""
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
uniq(host) AS distinct_hosts, uniq(host) AS distinct_hosts,
sum(hits) AS total_hits, sum(hits) AS total_hits,
sum(query_params_count) AS total_params, sum(query_params_count) AS total_params,
argMax(ja4, hits) AS ja4 argMax(ja4, hits) AS ja4
FROM mabase_prod.view_form_bruteforce_detected FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
GROUP BY src_ip GROUP BY src_ip
ORDER BY total_hits DESC ORDER BY total_hits DESC
LIMIT %(limit)s LIMIT %(limit)s
@ -84,12 +85,12 @@ async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)):
async def get_bruteforce_timeline(): async def get_bruteforce_timeline():
"""Hits par heure (dernières 72h) depuis agg_host_ip_ja4_1h.""" """Hits par heure (dernières 72h) depuis agg_host_ip_ja4_1h."""
try: try:
sql = """ sql = f"""
SELECT SELECT
toHour(window_start) AS hour, toHour(window_start) AS hour,
sum(hits) AS hits, sum(hits) AS hits,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS ips uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY hour GROUP BY hour
ORDER BY hour ASC ORDER BY hour ASC
@ -111,14 +112,14 @@ async def get_bruteforce_timeline():
async def get_host_attackers(host: str, limit: int = Query(20, ge=1, le=200)): async def get_host_attackers(host: str, limit: int = Query(20, ge=1, le=200)):
"""Top IPs attaquant un hôte spécifique, avec JA4 et type d'attaque.""" """Top IPs attaquant un hôte spécifique, avec JA4 et type d'attaque."""
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
sum(hits) AS total_hits, sum(hits) AS total_hits,
sum(query_params_count) AS total_params, sum(query_params_count) AS total_params,
argMax(ja4, hits) AS ja4, argMax(ja4, hits) AS ja4,
max(hits) AS max_hits_per_window max(hits) AS max_hits_per_window
FROM mabase_prod.view_form_bruteforce_detected FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
WHERE host = %(host)s WHERE host = %(host)s
GROUP BY src_ip GROUP BY src_ip
ORDER BY total_hits DESC ORDER BY total_hits DESC

View File

@ -24,6 +24,7 @@ from ..services.clustering_engine import (
name_cluster, risk_score_from_centroid, standardize, name_cluster, risk_score_from_centroid, standardize,
risk_to_gradient_color, risk_to_gradient_color,
) )
from ..config import settings
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
router = APIRouter(prefix="/api/clustering", tags=["clustering"]) router = APIRouter(prefix="/api/clustering", tags=["clustering"])
@ -47,7 +48,7 @@ _EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
# ─── SQL : TOUTES les IPs sans LIMIT ───────────────────────────────────────── # ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
_SQL_ALL_IPS = """ _SQL_ALL_IPS = f"""
SELECT SELECT
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip,
t.ja4, t.ja4,
@ -96,8 +97,8 @@ SELECT
-- Cookie et Referer issus de la table dédiée aux empreintes -- Cookie et Referer issus de la table dédiée aux empreintes
any(hfp.hfp_cookie) AS hfp_cookie, any(hfp.hfp_cookie) AS hfp_cookie,
any(hfp.hfp_referer) AS hfp_referer any(hfp.hfp_referer) AS hfp_referer
FROM mabase_prod.agg_host_ip_ja4_1h t FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h t
LEFT JOIN mabase_prod.ml_detected_anomalies ml LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ml
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4 ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
AND ml.detected_at >= now() - INTERVAL %(hours)s HOUR AND ml.detected_at >= now() - INTERVAL %(hours)s HOUR
LEFT JOIN ( LEFT JOIN (
@ -107,7 +108,7 @@ LEFT JOIN (
any(arrayExists(x -> x LIKE '%%Accept-Encoding%%', client_headers)) AS hdr_enc, any(arrayExists(x -> x LIKE '%%Accept-Encoding%%', client_headers)) AS hdr_enc,
any(arrayExists(x -> x LIKE '%%Sec-Fetch%%', client_headers)) AS hdr_sec_fetch, any(arrayExists(x -> x LIKE '%%Sec-Fetch%%', client_headers)) AS hdr_sec_fetch,
any(length(splitByChar(',', client_headers[1]))) AS hdr_count any(length(splitByChar(',', client_headers[1]))) AS hdr_count
FROM mabase_prod.view_dashboard_entities FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE length(client_headers) > 0 WHERE length(client_headers) > 0
AND log_date >= today() - 2 AND log_date >= today() - 2
GROUP BY src_ip_v6, ja4 GROUP BY src_ip_v6, ja4
@ -117,7 +118,7 @@ LEFT JOIN (
src_ip, src_ip,
avg(has_cookie) AS hfp_cookie, avg(has_cookie) AS hfp_cookie,
avg(has_referer) AS hfp_referer avg(has_referer) AS hfp_referer
FROM mabase_prod.agg_header_fingerprint_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
WHERE window_start >= now() - INTERVAL %(hours)s HOUR WHERE window_start >= now() - INTERVAL %(hours)s HOUR
GROUP BY src_ip GROUP BY src_ip
) hfp ON t.src_ip = hfp.src_ip ) hfp ON t.src_ip = hfp.src_ip
@ -515,8 +516,8 @@ async def get_cluster_ips(
any(ml.asn_org) AS asn_org, any(ml.asn_org) AS asn_org,
round(avg(ml.fuzzing_index), 2) AS fuzzing, round(avg(ml.fuzzing_index), 2) AS fuzzing,
round(avg(ml.hit_velocity), 2) AS velocity round(avg(ml.hit_velocity), 2) AS velocity
FROM mabase_prod.agg_host_ip_ja4_1h t FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h t
LEFT JOIN mabase_prod.ml_detected_anomalies ml LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies ml
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4 ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
AND ml.detected_at >= now() - INTERVAL 24 HOUR AND ml.detected_at >= now() - INTERVAL 24 HOUR
WHERE t.window_start >= now() - INTERVAL 24 HOUR WHERE t.window_start >= now() - INTERVAL 24 HOUR

View File

@ -5,6 +5,7 @@ from fastapi import APIRouter, HTTPException, Query
from typing import Optional, List from typing import Optional, List
from ..database import db from ..database import db
from ..models import DetectionsListResponse, Detection from ..models import DetectionsListResponse, Detection
from ..config import settings
router = APIRouter(prefix="/api/detections", tags=["detections"]) router = APIRouter(prefix="/api/detections", tags=["detections"])
@ -82,7 +83,7 @@ async def get_detections(
# Requête de comptage # Requête de comptage
count_query = f""" count_query = f"""
SELECT count() SELECT count()
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {where_clause} WHERE {where_clause}
""" """
@ -107,7 +108,7 @@ async def get_detections(
# Count distinct IPs # Count distinct IPs
count_ip_query = f""" count_ip_query = f"""
SELECT uniq(src_ip) SELECT uniq(src_ip)
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {where_clause} WHERE {where_clause}
""" """
cr = db.query(count_ip_query, params) cr = db.query(count_ip_query, params)
@ -154,11 +155,11 @@ async def get_detections(
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best, argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best, argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {where_clause} WHERE {where_clause}
GROUP BY src_ip GROUP BY src_ip
) ip_data ) ip_data
LEFT JOIN mabase_prod.asn_reputation ar LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.asn_reputation ar
ON ar.src_asn = toUInt32OrZero(ip_data.asn_number) ON ar.src_asn = toUInt32OrZero(ip_data.asn_number)
ORDER BY {outer_sort} {sort_order} ORDER BY {outer_sort} {sort_order}
LIMIT %(limit)s OFFSET %(offset)s LIMIT %(limit)s OFFSET %(offset)s
@ -248,8 +249,8 @@ async def get_detections(
anubis_bot_name, anubis_bot_name,
anubis_bot_action, anubis_bot_action,
anubis_bot_category anubis_bot_category
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number) LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
WHERE {where_clause} WHERE {where_clause}
ORDER BY {sort_by} {sort_order} ORDER BY {sort_by} {sort_order}
LIMIT %(limit)s OFFSET %(offset)s LIMIT %(limit)s OFFSET %(offset)s
@ -312,7 +313,7 @@ async def get_detection_details(detection_id: str):
detection_id peut être une IP ou un identifiant detection_id peut être une IP ou un identifiant
""" """
try: try:
query = """ query = f"""
SELECT SELECT
detected_at, detected_at,
src_ip, src_ip,
@ -363,7 +364,7 @@ async def get_detection_details(detection_id: str):
ja4_asn_concentration, ja4_asn_concentration,
ja4_country_concentration, ja4_country_concentration,
is_rare_ja4 is_rare_ja4
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
ORDER BY detected_at DESC ORDER BY detected_at DESC
LIMIT 1 LIMIT 1

View File

@ -11,6 +11,7 @@ from ..models import (
EntityRelatedAttributes, EntityRelatedAttributes,
EntityAttributeValue EntityAttributeValue
) )
from ..config import settings
router = APIRouter(prefix="/api/entities", tags=["Entities"]) router = APIRouter(prefix="/api/entities", tags=["Entities"])
@ -24,7 +25,7 @@ def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Op
""" """
Récupère les statistiques pour une entité donnée Récupère les statistiques pour une entité donnée
""" """
query = """ query = f"""
SELECT SELECT
entity_type, entity_type,
entity_value, entity_value,
@ -32,7 +33,7 @@ def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Op
sum(unique_ips) as unique_ips, sum(unique_ips) as unique_ips,
min(log_date) as first_seen, min(log_date) as first_seen,
max(log_date) as last_seen max(log_date) as last_seen
FROM mabase_prod.view_dashboard_entities FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = %(entity_type)s WHERE entity_type = %(entity_type)s
AND entity_value = %(entity_value)s AND entity_value = %(entity_value)s
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
@ -64,13 +65,13 @@ def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24)
Récupère les attributs associés à une entité Récupère les attributs associés à une entité
""" """
# Requête pour agréger tous les attributs associés # Requête pour agréger tous les attributs associés
query = """ query = f"""
SELECT SELECT
(SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips, (SELECT groupUniqArray(toString(src_ip)) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips,
(SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s, (SELECT groupUniqArray(ja4) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s,
(SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts, (SELECT groupUniqArray(host) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts,
(SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns, (SELECT groupUniqArrayArray(asns) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns,
(SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries (SELECT groupUniqArrayArray(countries) FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries
""" """
result = db.query(query, { result = db.query(query, {
@ -110,7 +111,7 @@ def get_array_values(entity_type: str, entity_value: str, array_field: str, hour
FROM ( FROM (
SELECT SELECT
arrayJoin({array_field}) as value arrayJoin({array_field}) as value
FROM mabase_prod.view_dashboard_entities FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = %(entity_type)s WHERE entity_type = %(entity_type)s
AND entity_value = %(entity_value)s AND entity_value = %(entity_value)s
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
@ -156,7 +157,7 @@ async def get_subnet_investigation(
subnet_third = subnet_parts[2] subnet_third = subnet_parts[2]
# Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA # Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA
stats_query = """ stats_query = f"""
WITH cleaned_ips AS ( WITH cleaned_ips AS (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -165,7 +166,7 @@ async def get_subnet_investigation(
host, host,
country_code, country_code,
asn_number asn_number
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
), ),
subnet_filter AS ( subnet_filter AS (
@ -180,7 +181,7 @@ async def get_subnet_investigation(
SELECT SELECT
entity_value AS ip, entity_value AS ip,
arrayJoin(user_agents) AS user_agent arrayJoin(user_agents) AS user_agent
FROM view_dashboard_entities FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = 'ip' WHERE entity_type = 'ip'
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s
@ -227,7 +228,7 @@ async def get_subnet_investigation(
} }
# Liste des IPs avec détails - 2 requêtes séparées + fusion en Python # Liste des IPs avec détails - 2 requêtes séparées + fusion en Python
ips_query = """ ips_query = f"""
WITH cleaned_ips AS ( WITH cleaned_ips AS (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -237,7 +238,7 @@ async def get_subnet_investigation(
asn_number, asn_number,
threat_level, threat_level,
anomaly_score anomaly_score
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
), ),
subnet_filter AS ( subnet_filter AS (
@ -282,7 +283,7 @@ async def get_subnet_investigation(
SELECT SELECT
entity_value AS ip, entity_value AS ip,
uniq(arrayJoin(user_agents)) AS unique_ua uniq(arrayJoin(user_agents)) AS unique_ua
FROM view_dashboard_entities FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
PREWHERE entity_type = 'ip' PREWHERE entity_type = 'ip'
WHERE entity_value IN ({ip_values}) WHERE entity_value IN ({ip_values})
AND log_date >= today() - INTERVAL 30 DAY AND log_date >= today() - INTERVAL 30 DAY

View File

@ -13,6 +13,7 @@ from fastapi import APIRouter, HTTPException, Query
import re import re
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"]) router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"])
@ -68,7 +69,7 @@ async def get_ja4_spoofing(
""" """
try: try:
# Agrégation par JA4 avec tous les indicateurs de spoofing # Agrégation par JA4 avec tous les indicateurs de spoofing
query = """ query = f"""
SELECT SELECT
ja4, ja4,
count() AS total_detections, count() AS total_detections,
@ -102,7 +103,7 @@ async def get_ja4_spoofing(
avg(ja4_country_concentration) AS avg_country_concentration, avg(ja4_country_concentration) AS avg_country_concentration,
argMax(threat_level, detected_at) AS last_threat_level argMax(threat_level, detected_at) AS last_threat_level
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4 GROUP BY ja4
@ -254,7 +255,7 @@ async def get_ja4_ua_matrix(
""" """
try: try:
# Stats JA4 depuis ml_detected_anomalies # Stats JA4 depuis ml_detected_anomalies
stats_query = """ stats_query = f"""
SELECT SELECT
ja4, ja4,
uniq(src_ip) AS unique_ips, uniq(src_ip) AS unique_ips,
@ -264,7 +265,7 @@ async def get_ja4_ua_matrix(
countIf(is_rare_ja4 = true) AS rare_count, countIf(is_rare_ja4 = true) AS rare_count,
countIf(is_ua_rotating = true) AS rotating_count, countIf(is_ua_rotating = true) AS rotating_count,
argMax(threat_level, detected_at) AS last_threat argMax(threat_level, detected_at) AS last_threat
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4 GROUP BY ja4
@ -426,11 +427,11 @@ async def get_ua_analysis(
pass pass
# IPs avec is_ua_rotating depuis ml_detected_anomalies # IPs avec is_ua_rotating depuis ml_detected_anomalies
rotating_query = """ rotating_query = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
avg(ua_ch_mismatch) AS avg_ua_ch_mismatch avg(ua_ch_mismatch) AS avg_ua_ch_mismatch
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND is_ua_rotating = true AND is_ua_rotating = true
GROUP BY clean_ip GROUP BY clean_ip
@ -522,7 +523,7 @@ async def get_ip_fingerprint_coherence(ip: str):
""" """
try: try:
# Données depuis ml_detected_anomalies # Données depuis ml_detected_anomalies
ml_query = """ ml_query = f"""
SELECT SELECT
ja4, ja4,
ua_ch_mismatch, ua_ch_mismatch,
@ -543,7 +544,7 @@ async def get_ip_fingerprint_coherence(ip: str):
window_mss_ratio, window_mss_ratio,
tcp_jitter_variance, tcp_jitter_variance,
multiplexing_efficiency multiplexing_efficiency
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE src_ip = %(ip)s WHERE src_ip = %(ip)s
ORDER BY detected_at DESC ORDER BY detected_at DESC
""" """
@ -681,7 +682,7 @@ async def get_legitimate_ja4(
Utile comme whitelist pour réduire les faux positifs. Utile comme whitelist pour réduire les faux positifs.
""" """
try: try:
query = """ query = f"""
SELECT SELECT
ja4, ja4,
uniq(src_ip) AS unique_ips, uniq(src_ip) AS unique_ips,
@ -691,7 +692,7 @@ async def get_legitimate_ja4(
countIf(is_rare_ja4 = true) AS rare_count, countIf(is_rare_ja4 = true) AS rare_count,
round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct, round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct,
round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND ja4 != '' AND ja4 IS NOT NULL AND ja4 != '' AND ja4 IS NOT NULL
GROUP BY ja4 GROUP BY ja4
@ -753,7 +754,7 @@ async def get_ja4_asn_correlation(
try: try:
# Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN, # Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN,
# then aggregate per ja4 to compute concentration ratio # then aggregate per ja4 to compute concentration ratio
sql = """ sql = f"""
SELECT SELECT
ja4, ja4,
sum(ips_per_combo) AS unique_ips, sum(ips_per_combo) AS unique_ips,
@ -774,7 +775,7 @@ async def get_ja4_asn_correlation(
uniq(src_ip) AS ips_per_combo, uniq(src_ip) AS ips_per_combo,
uniq(src_ip) AS country_ips, uniq(src_ip) AS country_ips,
sum(hits) AS total_hits sum(hits) AS total_hits
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR WHERE window_start >= now() - INTERVAL 24 HOUR
AND ja4 != '' AND ja4 != ''
GROUP BY ja4, src_asn, src_country_code GROUP BY ja4, src_asn, src_country_code

View File

@ -4,6 +4,7 @@ Endpoints pour l'analyse des empreintes d'en-têtes HTTP
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"]) router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
@ -12,7 +13,7 @@ router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
async def get_header_clusters(limit: int = Query(50, ge=1, le=200)): async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
"""Clusters d'empreintes d'en-têtes groupés par header_order_hash.""" """Clusters d'empreintes d'en-têtes groupés par header_order_hash."""
try: try:
sql = """ sql = f"""
SELECT SELECT
header_order_hash AS hash, header_order_hash AS hash,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
@ -22,16 +23,16 @@ async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes, groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes,
round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct, round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct,
round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct
FROM mabase_prod.agg_header_fingerprint_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
GROUP BY header_order_hash GROUP BY header_order_hash
ORDER BY unique_ips DESC ORDER BY unique_ips DESC
LIMIT %(limit)s LIMIT %(limit)s
""" """
result = db.query(sql, {"limit": limit}) result = db.query(sql, {"limit": limit})
total_sql = """ total_sql = f"""
SELECT uniq(header_order_hash) SELECT uniq(header_order_hash)
FROM mabase_prod.agg_header_fingerprint_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
""" """
total_clusters = int(db.query(total_sql).result_rows[0][0]) total_clusters = int(db.query(total_sql).result_rows[0][0])
@ -73,14 +74,14 @@ async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)): async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)):
"""Liste des IPs appartenant à un cluster d'en-têtes donné.""" """Liste des IPs appartenant à un cluster d'en-têtes donné."""
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
any(modern_browser_score) AS browser_score, any(modern_browser_score) AS browser_score,
any(ua_ch_mismatch) AS ua_ch_mismatch, any(ua_ch_mismatch) AS ua_ch_mismatch,
any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_mode) AS sec_fetch_mode,
any(sec_fetch_dest) AS sec_fetch_dest any(sec_fetch_dest) AS sec_fetch_dest
FROM mabase_prod.agg_header_fingerprint_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h
WHERE header_order_hash = %(hash)s WHERE header_order_hash = %(hash)s
GROUP BY src_ip GROUP BY src_ip
ORDER BY browser_score DESC ORDER BY browser_score DESC

View File

@ -5,6 +5,7 @@ from collections import defaultdict
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/heatmap", tags=["heatmap"]) router = APIRouter(prefix="/api/heatmap", tags=["heatmap"])
@ -13,13 +14,13 @@ router = APIRouter(prefix="/api/heatmap", tags=["heatmap"])
async def get_heatmap_hourly(): async def get_heatmap_hourly():
"""Hits agrégés par heure sur les 72 dernières heures.""" """Hits agrégés par heure sur les 72 dernières heures."""
try: try:
sql = """ sql = f"""
SELECT SELECT
toHour(window_start) AS hour, toHour(window_start) AS hour,
sum(hits) AS hits, sum(hits) AS hits,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
max(max_requests_per_sec) AS max_rps max(max_requests_per_sec) AS max_rps
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY hour GROUP BY hour
ORDER BY hour ASC ORDER BY hour ASC
@ -44,13 +45,13 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
"""Hôtes les plus ciblés avec répartition horaire sur 24h.""" """Hôtes les plus ciblés avec répartition horaire sur 24h."""
try: try:
# Aggregate overall stats per host # Aggregate overall stats per host
agg_sql = """ agg_sql = f"""
SELECT SELECT
host, host,
sum(hits) AS total_hits, sum(hits) AS total_hits,
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
uniq(ja4) AS unique_ja4s uniq(ja4) AS unique_ja4s
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY host GROUP BY host
ORDER BY total_hits DESC ORDER BY total_hits DESC
@ -72,12 +73,12 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
return {"items": []} return {"items": []}
# Hourly breakdown per host # Hourly breakdown per host
hourly_sql = """ hourly_sql = f"""
SELECT SELECT
host, host,
toHour(window_start) AS hour, toHour(window_start) AS hour,
sum(hits) AS hits sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR WHERE window_start >= now() - INTERVAL 72 HOUR
AND host IN %(hosts)s AND host IN %(hosts)s
GROUP BY host, hour GROUP BY host, hour
@ -106,9 +107,9 @@ async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
async def get_heatmap_matrix(): async def get_heatmap_matrix():
"""Matrice top-15 hôtes × 24 heures (sum hits) sur les 72 dernières heures.""" """Matrice top-15 hôtes × 24 heures (sum hits) sur les 72 dernières heures."""
try: try:
top_sql = """ top_sql = f"""
SELECT host, sum(hits) AS total_hits SELECT host, sum(hits) AS total_hits
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR WHERE window_start >= now() - INTERVAL 72 HOUR
GROUP BY host GROUP BY host
ORDER BY total_hits DESC ORDER BY total_hits DESC
@ -119,12 +120,12 @@ async def get_heatmap_matrix():
if not top_hosts: if not top_hosts:
return {"hosts": [], "matrix": []} return {"hosts": [], "matrix": []}
cell_sql = """ cell_sql = f"""
SELECT SELECT
host, host,
toHour(window_start) AS hour, toHour(window_start) AS hour,
sum(hits) AS hits sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 72 HOUR WHERE window_start >= now() - INTERVAL 72 HOUR
AND host IN %(hosts)s AND host IN %(hosts)s
GROUP BY host, hour GROUP BY host, hour

View File

@ -6,6 +6,7 @@ from fastapi import APIRouter, HTTPException, Query
from typing import List, Optional from typing import List, Optional
from datetime import datetime from datetime import datetime
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/incidents", tags=["incidents"]) router = APIRouter(prefix="/api/incidents", tags=["incidents"])
@ -28,7 +29,7 @@ async def get_incident_clusters(
# Cluster par subnet /24 avec une IP exemple # Cluster par subnet /24 avec une IP exemple
# Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x # Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x
# toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x # toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x
cluster_query = """ cluster_query = f"""
WITH cleaned_ips AS ( WITH cleaned_ips AS (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -38,7 +39,7 @@ async def get_incident_clusters(
asn_number, asn_number,
threat_level, threat_level,
anomaly_score anomaly_score
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
), ),
subnet_groups AS ( subnet_groups AS (
@ -84,13 +85,13 @@ async def get_incident_clusters(
# Collect sample IPs to fetch real UA and trend data in bulk # Collect sample IPs to fetch real UA and trend data in bulk
sample_ips = [row[10] for row in result.result_rows if row[10]] sample_ips = [row[10] for row in result.result_rows if row[10]]
# Fetch real primary UA per sample IP from view_dashboard_entities # Fetch real primary UA per sample IP from {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
ua_by_ip: dict = {} ua_by_ip: dict = {}
if sample_ips: if sample_ips:
ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50]) ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50])
ua_query = f""" ua_query = f"""
SELECT entity_value, arrayElement(user_agents, 1) AS top_ua SELECT entity_value, arrayElement(user_agents, 1) AS top_ua
FROM view_dashboard_entities FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_dashboard_entities
WHERE entity_type = 'ip' WHERE entity_type = 'ip'
AND entity_value IN ({ip_list_sql}) AND entity_value IN ({ip_list_sql})
AND notEmpty(user_agents) AND notEmpty(user_agents)
@ -106,7 +107,7 @@ async def get_incident_clusters(
pass # UA enrichment is best-effort pass # UA enrichment is best-effort
# Compute real trend: compare current window vs previous window of same duration # Compute real trend: compare current window vs previous window of same duration
trend_query = """ trend_query = f"""
WITH cleaned AS ( WITH cleaned AS (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
@ -116,7 +117,7 @@ async def get_incident_clusters(
splitByChar('.', clean_ip)[2], '.', splitByChar('.', clean_ip)[2], '.',
splitByChar('.', clean_ip)[3], '.0/24' splitByChar('.', clean_ip)[3], '.0/24'
) AS subnet ) AS subnet
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
), ),
current_window AS ( current_window AS (
SELECT subnet, count() AS cnt SELECT subnet, count() AS cnt

View File

@ -8,6 +8,7 @@ from fastapi import APIRouter, HTTPException
from ..database import db from ..database import db
from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua
from ..config import settings
router = APIRouter(prefix="/api/investigation", tags=["investigation"]) router = APIRouter(prefix="/api/investigation", tags=["investigation"])
@ -25,7 +26,7 @@ async def get_ip_full_summary(ip: str):
clean_ip = ip.replace("::ffff:", "").strip() clean_ip = ip.replace("::ffff:", "").strip()
try: try:
# ── 1. Score ML / features ───────────────────────────────────────────── # ── 1. Score ML / features ─────────────────────────────────────────────
ml_sql = """ ml_sql = f"""
SELECT SELECT
max(abs(anomaly_score)) AS max_score, max(abs(anomaly_score)) AS max_score,
any(threat_level) AS threat_level, any(threat_level) AS threat_level,
@ -33,7 +34,7 @@ async def get_ip_full_summary(ip: str):
count() AS total_detections, count() AS total_detections,
uniq(host) AS distinct_hosts, uniq(host) AS distinct_hosts,
uniq(ja4) AS distinct_ja4 uniq(ja4) AS distinct_ja4
FROM mabase_prod.ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
""" """
ml_res = db.query(ml_sql, {"ip": clean_ip}) ml_res = db.query(ml_sql, {"ip": clean_ip})
@ -48,13 +49,13 @@ async def get_ip_full_summary(ip: str):
} }
# ── 2. Brute force ───────────────────────────────────────────────────── # ── 2. Brute force ─────────────────────────────────────────────────────
bf_sql = """ bf_sql = f"""
SELECT SELECT
uniq(host) AS hosts_attacked, uniq(host) AS hosts_attacked,
sum(hits) AS total_hits, sum(hits) AS total_hits,
sum(query_params_count) AS total_params, sum(query_params_count) AS total_params,
groupArray(3)(host) AS top_hosts groupArray(3)(host) AS top_hosts
FROM mabase_prod.view_form_bruteforce_detected FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
""" """
bf_res = db.query(bf_sql, {"ip": clean_ip}) bf_res = db.query(bf_sql, {"ip": clean_ip})
@ -68,14 +69,14 @@ async def get_ip_full_summary(ip: str):
} }
# ── 3. TCP spoofing — fingerprinting multi-signal ───────────────────── # ── 3. TCP spoofing — fingerprinting multi-signal ─────────────────────
tcp_sql = """ tcp_sql = f"""
SELECT SELECT
any(tcp_ttl_raw) AS ttl, any(tcp_ttl_raw) AS ttl,
any(tcp_win_raw) AS win, any(tcp_win_raw) AS win,
any(tcp_scale_raw) AS scale, any(tcp_scale_raw) AS scale,
any(tcp_mss_raw) AS mss, any(tcp_mss_raw) AS mss,
any(first_ua) AS ua any(first_ua) AS ua
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND window_start >= now() - INTERVAL 24 HOUR AND window_start >= now() - INTERVAL 24 HOUR
AND tcp_ttl_raw > 0 AND tcp_ttl_raw > 0
@ -109,9 +110,9 @@ async def get_ip_full_summary(ip: str):
} }
# ── 4. JA4 rotation ──────────────────────────────────────────────────── # ── 4. JA4 rotation ────────────────────────────────────────────────────
rot_sql = """ rot_sql = f"""
SELECT distinct_ja4_count, total_hits SELECT distinct_ja4_count, total_hits
FROM mabase_prod.view_host_ip_ja4_rotation FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
LIMIT 1 LIMIT 1
""" """
@ -123,9 +124,9 @@ async def get_ip_full_summary(ip: str):
rot_data = {"rotating": cnt > 1, "distinct_ja4_count": cnt, "total_hits": int(row[1] or 0)} rot_data = {"rotating": cnt > 1, "distinct_ja4_count": cnt, "total_hits": int(row[1] or 0)}
# ── 5. Persistance ───────────────────────────────────────────────────── # ── 5. Persistance ─────────────────────────────────────────────────────
pers_sql = """ pers_sql = f"""
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
FROM mabase_prod.view_ip_recurrence FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
LIMIT 1 LIMIT 1
""" """
@ -143,12 +144,12 @@ async def get_ip_full_summary(ip: str):
} }
# ── 6. Timeline 24h ──────────────────────────────────────────────────── # ── 6. Timeline 24h ────────────────────────────────────────────────────
tl_sql = """ tl_sql = f"""
SELECT SELECT
toHour(window_start) AS hour, toHour(window_start) AS hour,
sum(hits) AS hits, sum(hits) AS hits,
groupUniqArray(3)(ja4) AS ja4s groupUniqArray(3)(ja4) AS ja4s
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND window_start >= now() - INTERVAL 24 HOUR AND window_start >= now() - INTERVAL 24 HOUR
GROUP BY hour GROUP BY hour

View File

@ -4,6 +4,7 @@ Endpoints pour les métriques du dashboard
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from ..database import db from ..database import db
from ..models import MetricsResponse, MetricsSummary, TimeSeriesPoint from ..models import MetricsResponse, MetricsSummary, TimeSeriesPoint
from ..config import settings
router = APIRouter(prefix="/api/metrics", tags=["metrics"]) router = APIRouter(prefix="/api/metrics", tags=["metrics"])
@ -15,7 +16,7 @@ async def get_metrics():
""" """
try: try:
# Résumé des métriques # Résumé des métriques
summary_query = """ summary_query = f"""
SELECT SELECT
count() AS total_detections, count() AS total_detections,
countIf(threat_level = 'CRITICAL') AS critical_count, countIf(threat_level = 'CRITICAL') AS critical_count,
@ -25,7 +26,7 @@ async def get_metrics():
countIf(bot_name != '') AS known_bots_count, countIf(bot_name != '') AS known_bots_count,
countIf(bot_name = '') AS anomalies_count, countIf(bot_name = '') AS anomalies_count,
uniq(src_ip) AS unique_ips uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR WHERE detected_at >= now() - INTERVAL 24 HOUR
""" """
@ -47,7 +48,7 @@ async def get_metrics():
) )
# Série temporelle (par heure) # Série temporelle (par heure)
timeseries_query = """ timeseries_query = f"""
SELECT SELECT
toStartOfHour(detected_at) AS hour, toStartOfHour(detected_at) AS hour,
count() AS total, count() AS total,
@ -55,7 +56,7 @@ async def get_metrics():
countIf(threat_level = 'HIGH') AS high, countIf(threat_level = 'HIGH') AS high,
countIf(threat_level = 'MEDIUM') AS medium, countIf(threat_level = 'MEDIUM') AS medium,
countIf(threat_level = 'LOW') AS low countIf(threat_level = 'LOW') AS low
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR WHERE detected_at >= now() - INTERVAL 24 HOUR
GROUP BY hour GROUP BY hour
ORDER BY hour ORDER BY hour
@ -98,12 +99,12 @@ async def get_threat_distribution():
Récupère la répartition par niveau de menace Récupère la répartition par niveau de menace
""" """
try: try:
query = """ query = f"""
SELECT SELECT
threat_level, threat_level,
count() AS count, count() AS count,
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 24 HOUR WHERE detected_at >= now() - INTERVAL 24 HOUR
GROUP BY threat_level GROUP BY threat_level
ORDER BY count DESC ORDER BY count DESC
@ -128,7 +129,7 @@ async def get_metrics_baseline():
Compare les métriques actuelles (24h) vs hier (24h-48h) pour afficher les tendances. Compare les métriques actuelles (24h) vs hier (24h-48h) pour afficher les tendances.
""" """
try: try:
query = """ query = f"""
SELECT SELECT
countIf(detected_at >= now() - INTERVAL 24 HOUR) AS today_total, countIf(detected_at >= now() - INTERVAL 24 HOUR) AS today_total,
countIf(detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_total, countIf(detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_total,
@ -136,7 +137,7 @@ async def get_metrics_baseline():
uniqIf(src_ip, detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_ips, uniqIf(src_ip, detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_ips,
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 24 HOUR) AS today_critical, countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 24 HOUR) AS today_critical,
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_critical countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_critical
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL 48 HOUR WHERE detected_at >= now() - INTERVAL 48 HOUR
""" """
r = db.query(query) r = db.query(query)

View File

@ -4,6 +4,7 @@ Endpoints pour les features ML / IA (scores d'anomalies, radar, scatter)
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/ml", tags=["ml_features"]) router = APIRouter(prefix="/api/ml", tags=["ml_features"])
@ -27,7 +28,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h. Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h.
""" """
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip,
any(a.ja4) AS ja4, any(a.ja4) AS ja4,
@ -45,13 +46,13 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
any(a.src_as_name) AS asn_name, any(a.src_as_name) AS asn_name,
max(h.ua_ch_mismatch) AS ua_ch_mismatch, max(h.ua_ch_mismatch) AS ua_ch_mismatch,
max(h.modern_browser_score) AS browser_score, max(h.modern_browser_score) AS browser_score,
dictGetOrDefault('mabase_prod.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label, dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
coalesce( coalesce(
nullIf(dictGetOrDefault('mabase_prod.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''), nullIf(dictGetOrDefault('{settings.CLICKHOUSE_DB_PROCESSING}.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
'' ''
) AS bot_name ) AS bot_name
FROM mabase_prod.agg_host_ip_ja4_1h a FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h a
LEFT JOIN mabase_prod.agg_header_fingerprint_1h h LEFT JOIN {settings.CLICKHOUSE_DB_PROCESSING}.agg_header_fingerprint_1h h
ON a.src_ip = h.src_ip AND a.window_start = h.window_start ON a.src_ip = h.src_ip AND a.window_start = h.window_start
WHERE a.window_start >= now() - INTERVAL 24 HOUR WHERE a.window_start >= now() - INTERVAL 24 HOUR
GROUP BY a.src_ip GROUP BY a.src_ip
@ -92,7 +93,7 @@ async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
async def get_ip_radar(ip: str): async def get_ip_radar(ip: str):
"""Scores radar pour une IP spécifique (8 dimensions d'anomalie).""" """Scores radar pour une IP spécifique (8 dimensions d'anomalie)."""
try: try:
sql = """ sql = f"""
SELECT SELECT
avg(fuzzing_index) AS fuzzing_index, avg(fuzzing_index) AS fuzzing_index,
avg(hit_velocity) AS hit_velocity, avg(hit_velocity) AS hit_velocity,
@ -102,7 +103,7 @@ async def get_ip_radar(ip: str):
avg(orphan_ratio) AS orphan_ratio, avg(orphan_ratio) AS orphan_ratio,
avg(path_diversity_ratio) AS path_diversity_ratio, avg(path_diversity_ratio) AS path_diversity_ratio,
avg(anomalous_payload_ratio) AS anomalous_payload_ratio avg(anomalous_payload_ratio) AS anomalous_payload_ratio
FROM mabase_prod.view_ai_features_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ai_features_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND window_start >= now() - INTERVAL 24 HOUR AND window_start >= now() - INTERVAL 24 HOUR
""" """
@ -139,7 +140,7 @@ async def get_score_distribution():
""" """
try: try:
# Single scan — global totals + per-model breakdown via GROUPING SETS # Single scan — global totals + per-model breakdown via GROUPING SETS
sql = """ sql = f"""
SELECT SELECT
threat_level, threat_level,
model_name, model_name,
@ -149,7 +150,7 @@ async def get_score_distribution():
countIf(threat_level = 'NORMAL') AS normal_count, countIf(threat_level = 'NORMAL') AS normal_count,
countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count, countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count,
countIf(threat_level = 'KNOWN_BOT') AS bot_count countIf(threat_level = 'KNOWN_BOT') AS bot_count
FROM mabase_prod.ml_all_scores FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores
WHERE detected_at >= now() - INTERVAL 3 DAY WHERE detected_at >= now() - INTERVAL 3 DAY
GROUP BY threat_level, model_name GROUP BY threat_level, model_name
ORDER BY model_name, total DESC ORDER BY model_name, total DESC
@ -199,7 +200,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
Retourne le score moyen et les counts par heure et par modèle. Retourne le score moyen et les counts par heure et par modèle.
""" """
try: try:
sql = """ sql = f"""
SELECT SELECT
toStartOfHour(window_start) AS hour, toStartOfHour(window_start) AS hour,
model_name, model_name,
@ -207,7 +208,7 @@ async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count, countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count,
countIf(threat_level = 'KNOWN_BOT') AS bot_count, countIf(threat_level = 'KNOWN_BOT') AS bot_count,
round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score
FROM mabase_prod.ml_all_scores FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_all_scores
WHERE window_start >= now() - INTERVAL %(hours)s HOUR WHERE window_start >= now() - INTERVAL %(hours)s HOUR
GROUP BY hour, model_name GROUP BY hour, model_name
ORDER BY hour ASC, model_name ORDER BY hour ASC, model_name
@ -237,7 +238,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard. Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard.
""" """
try: try:
sql = """ sql = f"""
SELECT ip, ja4, country, asn_name, total_hits AS hits, SELECT ip, ja4, country, asn_name, total_hits AS hits,
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio, head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
missing_accept_enc_ratio, http_scheme_ratio missing_accept_enc_ratio, http_scheme_ratio
@ -255,7 +256,7 @@ async def get_b_features(limit: int = Query(50, ge=1, le=200)):
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio, round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio, round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip GROUP BY src_ip
) )
@ -295,7 +296,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
""" """
try: try:
# First: check real campaigns # First: check real campaigns
campaign_sql = """ campaign_sql = f"""
SELECT SELECT
campaign_id, campaign_id,
count() AS total_detections, count() AS total_detections,
@ -305,7 +306,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
groupUniqArray(3)(bot_name) AS bot_names, groupUniqArray(3)(bot_name) AS bot_names,
min(detected_at) AS first_seen, min(detected_at) AS first_seen,
max(detected_at) AS last_seen max(detected_at) AS last_seen
FROM mabase_prod.ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND campaign_id >= 0 AND campaign_id >= 0
GROUP BY campaign_id GROUP BY campaign_id
@ -330,7 +331,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
# Fallback: subnet-based clustering when DBSCAN has no campaigns # Fallback: subnet-based clustering when DBSCAN has no campaigns
if not campaigns: if not campaigns:
subnet_sql = """ subnet_sql = f"""
SELECT SELECT
IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet, IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet,
count() AS total_detections, count() AS total_detections,
@ -340,7 +341,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
any(ja4) AS sample_ja4, any(ja4) AS sample_ja4,
min(detected_at) AS first_seen, min(detected_at) AS first_seen,
max(detected_at) AS last_seen max(detected_at) AS last_seen
FROM mabase_prod.ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
AND threat_level IN ('HIGH','CRITICAL','MEDIUM') AND threat_level IN ('HIGH','CRITICAL','MEDIUM')
GROUP BY subnet GROUP BY subnet
@ -381,7 +382,7 @@ async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Qu
async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)): async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
"""Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h.""" """Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h."""
try: try:
sql = """ sql = f"""
SELECT SELECT
ip, ip,
ja4, ja4,
@ -400,7 +401,7 @@ async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
max(last_seen) AS max_last, max(last_seen) AS max_last,
sum(count_head) AS total_count_head, sum(count_head) AS total_count_head,
max(correlated_raw) AS correlated max(correlated_raw) AS correlated
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY src_ip GROUP BY src_ip
) )

View File

@ -4,6 +4,7 @@ Endpoints pour la détection de la rotation de fingerprints JA4 et des menaces p
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/rotation", tags=["rotation"]) router = APIRouter(prefix="/api/rotation", tags=["rotation"])
@ -12,12 +13,12 @@ router = APIRouter(prefix="/api/rotation", tags=["rotation"])
async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)): async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)):
"""IPs qui effectuent le plus de rotation de fingerprints JA4.""" """IPs qui effectuent le plus de rotation de fingerprints JA4."""
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
distinct_ja4_count, distinct_ja4_count,
total_hits total_hits
FROM mabase_prod.view_host_ip_ja4_rotation FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
ORDER BY distinct_ja4_count DESC ORDER BY distinct_ja4_count DESC
LIMIT %(limit)s LIMIT %(limit)s
""" """
@ -40,7 +41,7 @@ async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)):
async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)): async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
"""Menaces persistantes triées par score de persistance.""" """Menaces persistantes triées par score de persistance."""
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
recurrence, recurrence,
@ -48,7 +49,7 @@ async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
worst_threat_level, worst_threat_level,
first_seen, first_seen,
last_seen last_seen
FROM mabase_prod.view_ip_recurrence FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
ORDER BY (least(100, recurrence * 20 + worst_score * 50)) DESC ORDER BY (least(100, recurrence * 20 + worst_score * 50)) DESC
LIMIT %(limit)s LIMIT %(limit)s
""" """
@ -75,13 +76,13 @@ async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
async def get_ip_ja4_history(ip: str): async def get_ip_ja4_history(ip: str):
"""Historique des JA4 utilisés par une IP donnée.""" """Historique des JA4 utilisés par une IP donnée."""
try: try:
sql = """ sql = f"""
SELECT SELECT
ja4, ja4,
sum(hits) AS hits, sum(hits) AS hits,
min(window_start) AS first_seen, min(window_start) AS first_seen,
max(window_start) AS last_seen max(window_start) AS last_seen
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
GROUP BY ja4 GROUP BY ja4
ORDER BY hits DESC ORDER BY hits DESC
@ -107,7 +108,7 @@ async def get_sophistication(limit: int = Query(50, ge=1, le=500)):
Single SQL JOIN query — aucun traitement Python sur 34K entrées. Single SQL JOIN query — aucun traitement Python sur 34K entrées.
""" """
try: try:
sql = """ sql = f"""
SELECT SELECT
r.ip, r.ip,
r.distinct_ja4_count, r.distinct_ja4_count,
@ -122,20 +123,20 @@ async def get_sophistication(limit: int = Query(50, ge=1, le=500)):
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
distinct_ja4_count distinct_ja4_count
FROM mabase_prod.view_host_ip_ja4_rotation FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_host_ip_ja4_rotation
) r ) r
LEFT JOIN ( LEFT JOIN (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
count() AS recurrence count() AS recurrence
FROM mabase_prod.ml_detected_anomalies FINAL FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies FINAL
GROUP BY ip GROUP BY ip
) rec ON r.ip = rec.ip ) rec ON r.ip = rec.ip
LEFT JOIN ( LEFT JOIN (
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
sum(hits) AS bruteforce_hits sum(hits) AS bruteforce_hits
FROM mabase_prod.view_form_bruteforce_detected FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_form_bruteforce_detected
GROUP BY ip GROUP BY ip
) bf ON r.ip = bf.ip ) bf ON r.ip = bf.ip
ORDER BY sophistication_score DESC ORDER BY sophistication_score DESC
@ -174,7 +175,7 @@ async def get_proactive_hunt(
): ):
"""IPs volant sous le radar : récurrentes mais sous le seuil de détection normal.""" """IPs volant sous le radar : récurrentes mais sous le seuil de détection normal."""
try: try:
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
recurrence, recurrence,
@ -183,7 +184,7 @@ async def get_proactive_hunt(
first_seen, first_seen,
last_seen, last_seen,
dateDiff('day', first_seen, last_seen) AS days_active dateDiff('day', first_seen, last_seen) AS days_active
FROM mabase_prod.view_ip_recurrence FROM {settings.CLICKHOUSE_DB_PROCESSING}.view_ip_recurrence
WHERE recurrence >= %(min_recurrence)s WHERE recurrence >= %(min_recurrence)s
AND abs(worst_score) < 0.5 AND abs(worst_score) < 0.5
AND dateDiff('day', first_seen, last_seen) >= %(min_days)s AND dateDiff('day', first_seen, last_seen) >= %(min_days)s

View File

@ -3,6 +3,7 @@ Endpoint de recherche globale rapide — utilisé par la barre Cmd+K
""" """
from fastapi import APIRouter, Query from fastapi import APIRouter, Query
from ..database import db from ..database import db
from ..config import settings
router = APIRouter(prefix="/api/search", tags=["search"]) router = APIRouter(prefix="/api/search", tags=["search"])
@ -21,13 +22,13 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── IPs ────────────────────────────────────────────────────────────────── # ── IPs ──────────────────────────────────────────────────────────────────
ip_rows = db.query( ip_rows = db.query(
""" f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
count() AS hits, count() AS hits,
max(detected_at) AS last_seen, max(detected_at) AS last_seen,
any(threat_level) AS threat_level any(threat_level) AS threat_level
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ilike(toString(src_ip), %(p)s) WHERE ilike(toString(src_ip), %(p)s)
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY clean_ip GROUP BY clean_ip
@ -48,12 +49,12 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── JA4 fingerprints ───────────────────────────────────────────────────── # ── JA4 fingerprints ─────────────────────────────────────────────────────
ja4_rows = db.query( ja4_rows = db.query(
""" f"""
SELECT SELECT
ja4, ja4,
count() AS hits, count() AS hits,
uniq(src_ip) AS unique_ips uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ilike(ja4, %(p)s) WHERE ilike(ja4, %(p)s)
AND ja4 != '' AND ja4 != ''
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
@ -73,12 +74,12 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── Hosts ───────────────────────────────────────────────────────────────── # ── Hosts ─────────────────────────────────────────────────────────────────
host_rows = db.query( host_rows = db.query(
""" f"""
SELECT SELECT
host, host,
count() AS hits, count() AS hits,
uniq(src_ip) AS unique_ips uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE ilike(host, %(p)s) WHERE ilike(host, %(p)s)
AND host != '' AND host != ''
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
@ -98,13 +99,13 @@ async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
# ── ASN ─────────────────────────────────────────────────────────────────── # ── ASN ───────────────────────────────────────────────────────────────────
asn_rows = db.query( asn_rows = db.query(
""" f"""
SELECT SELECT
asn_org, asn_org,
asn_number, asn_number,
count() AS hits, count() AS hits,
uniq(src_ip) AS unique_ips uniq(src_ip) AS unique_ips
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE (ilike(asn_org, %(p)s) OR ilike(asn_number, %(p)s)) WHERE (ilike(asn_org, %(p)s) OR ilike(asn_number, %(p)s))
AND asn_org != '' AND asn_number != '' AND asn_org != '' AND asn_number != ''
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR

View File

@ -18,6 +18,7 @@ from ..services.tcp_fingerprint import (
detect_spoof, detect_spoof,
declared_os_from_ua, declared_os_from_ua,
) )
from ..config import settings
router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"]) router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
@ -26,7 +27,7 @@ router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
async def get_tcp_spoofing_overview(): async def get_tcp_spoofing_overview():
"""Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale).""" """Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale)."""
try: try:
sql = """ sql = f"""
SELECT SELECT
count() AS total_entries, count() AS total_entries,
uniq(src_ip) AS unique_ips, uniq(src_ip) AS unique_ips,
@ -36,34 +37,34 @@ async def get_tcp_spoofing_overview():
countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp, countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp,
countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp, countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp,
countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR WHERE window_start >= now() - INTERVAL 24 HOUR
""" """
result = db.query(sql) result = db.query(sql)
row = result.result_rows[0] row = result.result_rows[0]
# Distribution TTL (top 15) # Distribution TTL (top 15)
ttl_sql = """ ttl_sql = f"""
SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY ttl ORDER BY cnt DESC GROUP BY ttl ORDER BY cnt DESC
""" """
ttl_res = db.query(ttl_sql) ttl_res = db.query(ttl_sql)
# Distribution MSS — nouveau signal clé (top 12) # Distribution MSS — nouveau signal clé (top 12)
mss_sql = """ mss_sql = f"""
SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0 WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0
GROUP BY mss ORDER BY cnt DESC GROUP BY mss ORDER BY cnt DESC
""" """
mss_res = db.query(mss_sql) mss_res = db.query(mss_sql)
# Distribution fenêtre (top 10) # Distribution fenêtre (top 10)
win_sql = """ win_sql = f"""
SELECT tcp_win_raw AS win, count() AS cnt SELECT tcp_win_raw AS win, count() AS cnt
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY win ORDER BY cnt DESC GROUP BY win ORDER BY cnt DESC
""" """
@ -105,17 +106,17 @@ async def get_tcp_spoofing_list(
Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool. Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool.
""" """
try: try:
count_sql = """ count_sql = f"""
SELECT count() FROM ( SELECT count() FROM (
SELECT src_ip, ja4 SELECT src_ip, ja4
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4 GROUP BY src_ip, ja4
) )
""" """
total = int(db.query(count_sql).result_rows[0][0]) total = int(db.query(count_sql).result_rows[0][0])
sql = """ sql = f"""
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip,
ja4, ja4,
@ -125,7 +126,7 @@ async def get_tcp_spoofing_list(
any(tcp_mss_raw) AS tcp_mss, any(tcp_mss_raw) AS tcp_mss,
any(first_ua) AS first_ua, any(first_ua) AS first_ua,
sum(hits) AS hits sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4 GROUP BY src_ip, ja4
ORDER BY hits DESC ORDER BY hits DESC
@ -178,7 +179,7 @@ async def get_tcp_spoofing_list(
async def get_tcp_spoofing_matrix(): async def get_tcp_spoofing_matrix():
"""Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal.""" """Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal."""
try: try:
sql = """ sql = f"""
SELECT SELECT
any(tcp_ttl_raw) AS ttl, any(tcp_ttl_raw) AS ttl,
any(tcp_win_raw) AS win, any(tcp_win_raw) AS win,
@ -186,7 +187,7 @@ async def get_tcp_spoofing_matrix():
any(tcp_mss_raw) AS mss, any(tcp_mss_raw) AS mss,
any(first_ua) AS ua, any(first_ua) AS ua,
count() AS cnt count() AS cnt
FROM mabase_prod.agg_host_ip_ja4_1h FROM {settings.CLICKHOUSE_DB_PROCESSING}.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4 GROUP BY src_ip, ja4
""" """

View File

@ -8,6 +8,7 @@ from ..models import (
VariabilityResponse, VariabilityAttributes, AttributeValue, Insight, VariabilityResponse, VariabilityAttributes, AttributeValue, Insight,
UserAgentsResponse, UserAgentValue UserAgentsResponse, UserAgentValue
) )
from ..config import settings
router = APIRouter(prefix="/api/variability", tags=["variability"]) router = APIRouter(prefix="/api/variability", tags=["variability"])
@ -45,7 +46,7 @@ async def get_associated_ips(
query = f""" query = f"""
SELECT src_ip, count() AS hit_count SELECT src_ip, count() AS hit_count
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
GROUP BY src_ip GROUP BY src_ip
@ -64,7 +65,7 @@ async def get_associated_ips(
# Compter le total # Compter le total
count_query = f""" count_query = f"""
SELECT uniq(src_ip) AS total SELECT uniq(src_ip) AS total
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
""" """
@ -138,7 +139,7 @@ async def get_associated_attributes(
# country/asn/host: pivot via ml_detected_anomalies # country/asn/host: pivot via ml_detected_anomalies
ua_where = f"""toString(src_ip) IN ( ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '') SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)""" )"""
ua_q = f""" ua_q = f"""
@ -160,7 +161,7 @@ async def get_associated_attributes(
{target_column} AS value, {target_column} AS value,
count() AS count, count() AS count,
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s WHERE {column} = %(value)s
AND {target_column} != '' AND {target_column} IS NOT NULL AND {target_column} != '' AND {target_column} IS NOT NULL
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
@ -183,7 +184,7 @@ async def get_associated_attributes(
# Compter le total # Compter le total
count_query = f""" count_query = f"""
SELECT uniq({target_column}) AS total SELECT uniq({target_column}) AS total
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s WHERE {column} = %(value)s
AND {target_column} != '' AND {target_column} IS NOT NULL AND {target_column} != '' AND {target_column} IS NOT NULL
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
@ -247,7 +248,7 @@ async def get_user_agents(
ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type] ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type]
where = f"""toString(src_ip) IN ( where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '') SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {ml_col} = %(value)s WHERE {ml_col} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
)""" )"""
@ -467,7 +468,7 @@ async def get_variability(attr_type: str, value: str):
threat_level, threat_level,
model_name, model_name,
anomaly_score anomaly_score
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
) )
@ -480,7 +481,7 @@ async def get_variability(attr_type: str, value: str):
uniq(src_ip) AS unique_ips, uniq(src_ip) AS unique_ips,
min(detected_at) AS first_seen, min(detected_at) AS first_seen,
max(detected_at) AS last_seen max(detected_at) AS last_seen
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s WHERE {column} = %(value)s
AND detected_at >= now() - INTERVAL 24 HOUR AND detected_at >= now() - INTERVAL 24 HOUR
""" """
@ -506,12 +507,12 @@ async def get_variability(attr_type: str, value: str):
header_user_agent AS user_agent, header_user_agent AS user_agent,
count() AS count, count() AS count,
round(count() * 100.0 / ( round(count() * 100.0 / (
SELECT count() FROM mabase_prod.http_logs SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
), 2) AS percentage, ), 2) AS percentage,
min(time) AS first_seen, min(time) AS first_seen,
max(time) AS last_seen max(time) AS last_seen
FROM mabase_prod.http_logs FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} WHERE {_ua_logs_where}
AND time >= now() - INTERVAL 24 HOUR AND time >= now() - INTERVAL 24 HOUR
AND header_user_agent != '' AND header_user_agent IS NOT NULL AND header_user_agent != '' AND header_user_agent IS NOT NULL
@ -527,12 +528,12 @@ async def get_variability(attr_type: str, value: str):
header_user_agent AS user_agent, header_user_agent AS user_agent,
count() AS count, count() AS count,
round(count() * 100.0 / ( round(count() * 100.0 / (
SELECT count() FROM mabase_prod.http_logs SELECT count() FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
), 2) AS percentage, ), 2) AS percentage,
min(time) AS first_seen, min(time) AS first_seen,
max(time) AS last_seen max(time) AS last_seen
FROM mabase_prod.http_logs FROM {settings.CLICKHOUSE_DB_LOGS}.http_logs
WHERE {_ua_logs_where} WHERE {_ua_logs_where}
AND time >= now() - INTERVAL 24 HOUR AND time >= now() - INTERVAL 24 HOUR
AND header_user_agent != '' AND header_user_agent IS NOT NULL AND header_user_agent != '' AND header_user_agent IS NOT NULL
@ -545,7 +546,7 @@ async def get_variability(attr_type: str, value: str):
# country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA # country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA
_ua_where = f"""toString(src_ip) IN ( _ua_where = f"""toString(src_ip) IN (
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '') SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
FROM ml_detected_anomalies FROM {settings.CLICKHOUSE_DB_PROCESSING}.ml_detected_anomalies
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
)""" )"""
ua_query_simple = f""" ua_query_simple = f"""