feat: clustering multi-métriques + TCP fingerprinting amélioré

- TCP fingerprinting: 20 signatures OS (p0f-style), scoring multi-signal
  TTL/MSS/scale/fenêtre, détection Masscan 97% confiance, réseau path
  (Ethernet/PPPoE/VPN/Tunnel), estimation hop-count

- Clustering IPs: K-means++ (Arthur & Vassilvitskii 2007) sur 21 features
  TCP stack + anomalie ML + TLS/protocole + navigateur + temporel
  PCA-2D par puissance itérative (Hotelling) pour positionnement

- Visualisation redesign: 2 vues lisibles
  - Tableau de bord: grille de cartes groupées par niveau de risque
    (Bots / Suspects / Légitimes), métriques clés + mini-barres
  - Graphe de relations: ReactFlow avec nœuds-cartes en colonnes
    par niveau de menace, arêtes colorées par similarité, légende
  - Sidebar: RadarChart comportemental + toutes métriques + export CSV

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
SOC Analyst
2026-03-18 18:22:57 +01:00
parent c887846af5
commit e2db8ca84e
9 changed files with 2430 additions and 202 deletions

View File

@ -13,7 +13,7 @@ import os
from .config import settings from .config import settings
from .database import db from .database import db
from .routes import metrics, detections, variability, attributes, analysis, entities, incidents, audit, reputation, fingerprints from .routes import metrics, detections, variability, attributes, analysis, entities, incidents, audit, reputation, fingerprints
from .routes import bruteforce, tcp_spoofing, header_fingerprint, heatmap, botnets, rotation, ml_features, investigation_summary, search from .routes import bruteforce, tcp_spoofing, header_fingerprint, heatmap, botnets, rotation, ml_features, investigation_summary, search, clustering
# Configuration logging # Configuration logging
logging.basicConfig( logging.basicConfig(
@ -84,6 +84,7 @@ app.include_router(rotation.router)
app.include_router(ml_features.router) app.include_router(ml_features.router)
app.include_router(investigation_summary.router) app.include_router(investigation_summary.router)
app.include_router(search.router) app.include_router(search.router)
app.include_router(clustering.router)
# Route pour servir le frontend # Route pour servir le frontend

View File

@ -0,0 +1,458 @@
"""
Clustering d'IPs multi-métriques — backend ReactFlow.
Features utilisées (21 dimensions) :
TCP stack : TTL initial, MSS, scale, fenêtre TCP
Comportement : vélocité, POST ratio, fuzzing, assets, accès direct
Anomalie ML : score, IP-ID zéro
TLS/Protocole: ALPN mismatch, ALPN absent, efficacité H2
Navigateur : browser score, headless, ordre headers, UA-CH mismatch
Temporel : entropie, diversité JA4, UA rotatif
Algorithme :
1. Échantillonnage stratifié (top détections + top hits)
2. Construction + normalisation des vecteurs de features
3. K-means++ (Arthur & Vassilvitskii, 2007)
4. PCA-2D par power iteration pour les positions ReactFlow
5. Nommage automatique par features dominantes du centroïde
6. Calcul des arêtes : k-NN dans l'espace des features
"""
from __future__ import annotations
import math
import time
import hashlib
from typing import Optional
from fastapi import APIRouter, HTTPException, Query
from ..database import db
from ..services.clustering_engine import (
FEATURES, FEATURE_KEYS, FEATURE_NORMS, FEATURE_NAMES, N_FEATURES,
build_feature_vector, kmeans_pp, pca_2d,
name_cluster, risk_score_from_centroid, _mean_vec,
)
router = APIRouter(prefix="/api/clustering", tags=["clustering"])
# ─── Cache en mémoire ─────────────────────────────────────────────────────────
# Stocke (cluster_id → liste d'IPs) pour le drill-down
# + timestamp de dernière mise à jour
_cache: dict = {
"assignments": {}, # ip+ja4 → cluster_idx
"cluster_ips": {}, # cluster_idx → [(ip, ja4)]
"params": {}, # k, ts
}
# ─── Couleurs ─────────────────────────────────────────────────────────────────
_THREAT_COLOR = {
0.92: "#dc2626", # Bot scanner
0.70: "#ef4444", # Critique
0.45: "#f97316", # Élevé
0.25: "#eab308", # Modéré
0.00: "#6b7280", # Sain / inconnu
}
def _risk_to_color(risk: float) -> str:
for threshold, color in sorted(_THREAT_COLOR.items(), reverse=True):
if risk >= threshold:
return color
return "#6b7280"
# ─── SQL ──────────────────────────────────────────────────────────────────────
_SQL_FEATURES = """
SELECT
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip,
t.ja4,
any(t.tcp_ttl_raw) AS ttl,
any(t.tcp_win_raw) AS win,
any(t.tcp_scale_raw) AS scale,
any(t.tcp_mss_raw) AS mss,
any(t.first_ua) AS ua,
sum(t.hits) AS hits,
avg(abs(ml.anomaly_score)) AS avg_score,
avg(ml.hit_velocity) AS avg_velocity,
avg(ml.fuzzing_index) AS avg_fuzzing,
avg(ml.is_headless) AS pct_headless,
avg(ml.post_ratio) AS avg_post,
avg(ml.ip_id_zero_ratio) AS ip_id_zero,
avg(ml.temporal_entropy) AS entropy,
avg(ml.modern_browser_score) AS browser_score,
avg(ml.alpn_http_mismatch) AS alpn_mismatch,
avg(ml.is_alpn_missing) AS alpn_missing,
avg(ml.multiplexing_efficiency) AS h2_eff,
avg(ml.header_order_confidence) AS hdr_conf,
avg(ml.ua_ch_mismatch) AS ua_ch_mismatch,
avg(ml.asset_ratio) AS asset_ratio,
avg(ml.direct_access_ratio) AS direct_ratio,
avg(ml.distinct_ja4_count) AS ja4_count,
max(ml.is_ua_rotating) AS ua_rotating,
max(ml.threat_level) AS threat,
any(ml.country_code) AS country,
any(ml.asn_org) AS asn_org
FROM mabase_prod.agg_host_ip_ja4_1h t
LEFT JOIN mabase_prod.ml_detected_anomalies ml
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
AND ml.detected_at >= now() - INTERVAL 24 HOUR
WHERE t.window_start >= now() - INTERVAL 24 HOUR
AND t.tcp_ttl_raw > 0
GROUP BY t.src_ip, t.ja4
ORDER BY
-- Stratégie : IPs anormales en premier, puis fort trafic
-- Cela garantit que les bots Masscan (anomalie=0.97, hits=1-2) sont inclus
avg(abs(ml.anomaly_score)) DESC,
sum(t.hits) DESC
LIMIT %(limit)s
"""
# Noms des colonnes SQL dans l'ordre
_SQL_COLS = [
"ip", "ja4", "ttl", "win", "scale", "mss", "ua", "hits",
"avg_score", "avg_velocity", "avg_fuzzing", "pct_headless", "avg_post",
"ip_id_zero", "entropy", "browser_score", "alpn_mismatch", "alpn_missing",
"h2_eff", "hdr_conf", "ua_ch_mismatch", "asset_ratio", "direct_ratio",
"ja4_count", "ua_rotating", "threat", "country", "asn_org",
]
# ─── Endpoints ────────────────────────────────────────────────────────────────
@router.get("/clusters")
async def get_clusters(
k: int = Query(14, ge=4, le=30, description="Nombre de clusters"),
n_samples: int = Query(3000, ge=500, le=8000, description="Taille de l'échantillon"),
):
"""
Clustering multi-métriques des IPs.
Retourne les nœuds (clusters) + arêtes pour ReactFlow, avec :
- positions 2D issues de PCA sur les 21 features
- profil radar des features par cluster (normalisé [0,1])
- statistiques détaillées (moyennes brutes des features)
- sample d'IPs représentatives
"""
t0 = time.time()
try:
result = db.query(_SQL_FEATURES, {"limit": n_samples})
except Exception as e:
raise HTTPException(status_code=500, detail=f"ClickHouse: {e}")
# ── Construction des vecteurs de features ─────────────────────────────
rows: list[dict] = []
for row in result.result_rows:
d = {col: row[i] for i, col in enumerate(_SQL_COLS)}
rows.append(d)
if len(rows) < k:
raise HTTPException(status_code=400, detail="Pas assez de données pour ce k")
points = [build_feature_vector(r) for r in rows]
# ── K-means++ ────────────────────────────────────────────────────────
km = kmeans_pp(points, k=k, max_iter=60, seed=42)
# ── PCA-2D sur les centroïdes ─────────────────────────────────────────
# On projette les centroïdes dans l'espace PCA des données
# → les positions relatives reflètent la variance des données
coords_all = pca_2d(points)
# Moyenne des positions PCA par cluster = position 2D du centroïde
cluster_xs: list[list[float]] = [[] for _ in range(k)]
cluster_ys: list[list[float]] = [[] for _ in range(k)]
for i, label in enumerate(km.labels):
cluster_xs[label].append(coords_all[i][0])
cluster_ys[label].append(coords_all[i][1])
centroid_2d: list[tuple[float, float]] = []
for j in range(k):
if cluster_xs[j]:
cx = sum(cluster_xs[j]) / len(cluster_xs[j])
cy = sum(cluster_ys[j]) / len(cluster_ys[j])
else:
cx, cy = 0.5, 0.5
centroid_2d.append((cx, cy))
# ── Agrégation des statistiques par cluster ───────────────────────────
cluster_rows: list[list[dict]] = [[] for _ in range(k)]
cluster_members: list[list[tuple[str, str]]] = [[] for _ in range(k)]
for i, label in enumerate(km.labels):
cluster_rows[label].append(rows[i])
cluster_members[label].append((rows[i]["ip"], rows[i]["ja4"]))
# Mise à jour du cache pour le drill-down
_cache["cluster_ips"] = {j: cluster_members[j] for j in range(k)}
_cache["params"] = {"k": k, "ts": t0}
# ── Construction des nœuds ReactFlow ─────────────────────────────────
CANVAS_W, CANVAS_H = 1400, 780
nodes = []
for j in range(k):
if not cluster_rows[j]:
continue
# Statistiques brutes moyennées
def avg_feat(key: str) -> float:
vals = [float(r.get(key) or 0) for r in cluster_rows[j]]
return sum(vals) / len(vals) if vals else 0.0
mean_ttl = avg_feat("ttl")
mean_mss = avg_feat("mss")
mean_scale = avg_feat("scale")
mean_win = avg_feat("win")
mean_score = avg_feat("avg_score")
mean_vel = avg_feat("avg_velocity")
mean_fuzz = avg_feat("avg_fuzzing")
mean_hless = avg_feat("pct_headless")
mean_post = avg_feat("avg_post")
mean_asset = avg_feat("asset_ratio")
mean_direct= avg_feat("direct_ratio")
mean_alpn = avg_feat("alpn_mismatch")
mean_h2 = avg_feat("h2_eff")
mean_hconf = avg_feat("hdr_conf")
mean_ua_ch = avg_feat("ua_ch_mismatch")
mean_entr = avg_feat("entropy")
mean_ja4 = avg_feat("ja4_count")
mean_ip_id = avg_feat("ip_id_zero")
mean_brow = avg_feat("browser_score")
mean_uarot = avg_feat("ua_rotating")
ip_count = len(set(r["ip"] for r in cluster_rows[j]))
hit_count = int(sum(float(r.get("hits") or 0) for r in cluster_rows[j]))
# Pays / ASN / Menace dominants
threats = [str(r.get("threat") or "") for r in cluster_rows[j] if r.get("threat")]
countries = [str(r.get("country") or "") for r in cluster_rows[j] if r.get("country")]
orgs = [str(r.get("asn_org") or "") for r in cluster_rows[j] if r.get("asn_org")]
def topk(lst: list[str], n: int = 5) -> list[str]:
from collections import Counter
return [v for v, _ in Counter(lst).most_common(n) if v]
raw_stats = {
"mean_ttl": mean_ttl, "mean_mss": mean_mss,
"mean_scale": mean_scale,
}
label = name_cluster(km.centroids[j], raw_stats)
risk = risk_score_from_centroid(km.centroids[j])
color = _risk_to_color(risk)
# Profil radar normalisé (valeurs centroïde [0,1])
radar = [
{"feature": name, "value": round(km.centroids[j][i], 4)}
for i, name in enumerate(FEATURE_NAMES)
]
# Position 2D (PCA normalisée → pixels ReactFlow)
px_x = centroid_2d[j][0] * CANVAS_W * 0.85 + 80
px_y = (1 - centroid_2d[j][1]) * CANVAS_H * 0.85 + 50 # inverser y (haut=risque)
# Rayon ∝ √ip_count
radius = max(18, min(90, int(math.sqrt(ip_count) * 0.3)))
# Sample IPs (top 8 par hits)
sample_rows = sorted(cluster_rows[j], key=lambda r: float(r.get("hits") or 0), reverse=True)[:8]
sample_ips = [r["ip"] for r in sample_rows]
sample_ua = str(cluster_rows[j][0].get("ua") or "")
cluster_id = f"c{j}_k{k}"
nodes.append({
"id": cluster_id,
"label": label,
"cluster_idx": j,
"x": round(px_x, 1),
"y": round(px_y, 1),
"radius": radius,
"color": color,
"risk_score": risk,
# Caractéristiques TCP
"mean_ttl": round(mean_ttl, 1),
"mean_mss": round(mean_mss, 0),
"mean_scale": round(mean_scale, 1),
"mean_win": round(mean_win, 0),
# Comportement HTTP
"mean_score": round(mean_score, 4),
"mean_velocity": round(mean_vel, 3),
"mean_fuzzing": round(mean_fuzz, 3),
"mean_headless": round(mean_hless, 3),
"mean_post": round(mean_post, 3),
"mean_asset": round(mean_asset, 3),
"mean_direct": round(mean_direct, 3),
# TLS / Protocole
"mean_alpn_mismatch": round(mean_alpn, 3),
"mean_h2_eff": round(mean_h2, 3),
"mean_hdr_conf": round(mean_hconf, 3),
"mean_ua_ch": round(mean_ua_ch, 3),
# Temporel
"mean_entropy": round(mean_entr, 3),
"mean_ja4_diversity": round(mean_ja4, 3),
"mean_ip_id_zero": round(mean_ip_id, 3),
"mean_browser_score": round(mean_brow, 1),
"mean_ua_rotating": round(mean_uarot, 3),
# Meta
"ip_count": ip_count,
"hit_count": hit_count,
"top_threat": topk(threats, 1)[0] if topk(threats, 1) else "",
"top_countries": topk(countries, 5),
"top_orgs": topk(orgs, 5),
"sample_ips": sample_ips,
"sample_ua": sample_ua,
# Profil radar pour visualisation
"radar": radar,
})
# ── Arêtes : k-NN dans l'espace des features ──────────────────────────
# Chaque cluster est connecté à ses 2 voisins les plus proches
edges = []
seen: set[frozenset] = set()
centroids = km.centroids
for i, ni in enumerate(nodes):
ci = ni["cluster_idx"]
# Distance² aux autres centroïdes
dists = [
(j, nj["cluster_idx"],
sum((centroids[ci][d] - centroids[nj["cluster_idx"]][d]) ** 2
for d in range(N_FEATURES)))
for j, nj in enumerate(nodes) if j != i
]
dists.sort(key=lambda x: x[2])
# 2 voisins les plus proches
for j, cj, dist2 in dists[:2]:
key = frozenset([ni["id"], nodes[j]["id"]])
if key in seen:
continue
seen.add(key)
similarity = round(1.0 / (1.0 + math.sqrt(dist2)), 3)
edges.append({
"id": f"e_{ni['id']}_{nodes[j]['id']}",
"source": ni["id"],
"target": nodes[j]["id"],
"similarity": similarity,
"weight": round(similarity * 5, 1),
})
# ── Stats globales ────────────────────────────────────────────────────
total_ips = sum(n["ip_count"] for n in nodes)
total_hits = sum(n["hit_count"] for n in nodes)
bot_ips = sum(n["ip_count"] for n in nodes if n["risk_score"] > 0.40 or "🤖" in n["label"])
high_risk = sum(n["ip_count"] for n in nodes if n["risk_score"] > 0.20)
elapsed = round(time.time() - t0, 2)
return {
"nodes": nodes,
"edges": edges,
"stats": {
"total_clusters": len(nodes),
"total_ips": total_ips,
"total_hits": total_hits,
"bot_ips": bot_ips,
"high_risk_ips": high_risk,
"n_samples": len(rows),
"k": k,
"elapsed_s": elapsed,
},
"feature_names": FEATURE_NAMES,
}
@router.get("/cluster/{cluster_id}/ips")
async def get_cluster_ips(
cluster_id: str,
limit: int = Query(100, ge=1, le=500),
offset: int = Query(0, ge=0),
):
"""
IPs appartenant à un cluster (depuis le cache de la dernière exécution).
Si le cache est expiré, retourne une erreur guidant vers /clusters.
"""
if not _cache.get("cluster_ips"):
raise HTTPException(
status_code=404,
detail="Cache expiré — appelez /api/clustering/clusters d'abord"
)
# Extrait l'index cluster depuis l'id (format: c{idx}_k{k})
try:
idx = int(cluster_id.split("_")[0][1:])
except (ValueError, IndexError):
raise HTTPException(status_code=400, detail="cluster_id invalide")
members = _cache["cluster_ips"].get(idx, [])
if not members:
return {"ips": [], "total": 0, "cluster_id": cluster_id}
total = len(members)
page_members = members[offset: offset + limit]
# Requête SQL pour les détails de ces IPs spécifiques
ip_list = [m[0] for m in page_members]
ja4_list = [m[1] for m in page_members]
if not ip_list:
return {"ips": [], "total": total, "cluster_id": cluster_id}
# On ne peut pas facilement passer une liste en paramètre ClickHouse —
# on la construit directement (valeurs nettoyées)
safe_ips = [ip.replace("'", "") for ip in ip_list[:100]]
ip_filter = ", ".join(f"'{ip}'" for ip in safe_ips)
sql = f"""
SELECT
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS src_ip,
t.ja4,
any(t.tcp_ttl_raw) AS ttl,
any(t.tcp_win_raw) AS win,
any(t.tcp_scale_raw) AS scale,
any(t.tcp_mss_raw) AS mss,
sum(t.hits) AS hits,
any(t.first_ua) AS ua,
round(avg(abs(ml.anomaly_score)), 3) AS avg_score,
max(ml.threat_level) AS threat_level,
any(ml.country_code) AS country_code,
any(ml.asn_org) AS asn_org,
round(avg(ml.fuzzing_index), 2) AS fuzzing,
round(avg(ml.hit_velocity), 2) AS velocity
FROM mabase_prod.agg_host_ip_ja4_1h t
LEFT JOIN mabase_prod.ml_detected_anomalies ml
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
AND ml.detected_at >= now() - INTERVAL 24 HOUR
WHERE t.window_start >= now() - INTERVAL 24 HOUR
AND replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') IN ({ip_filter})
GROUP BY t.src_ip, t.ja4
ORDER BY hits DESC
"""
try:
result = db.query(sql)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
ips = []
for row in result.result_rows:
ips.append({
"ip": str(row[0]),
"ja4": str(row[1] or ""),
"tcp_ttl": int(row[2] or 0),
"tcp_win": int(row[3] or 0),
"tcp_scale": int(row[4] or 0),
"tcp_mss": int(row[5] or 0),
"hits": int(row[6] or 0),
"ua": str(row[7] or ""),
"avg_score": float(row[8] or 0),
"threat_level": str(row[9] or ""),
"country_code": str(row[10] or ""),
"asn_org": str(row[11] or ""),
"fuzzing": float(row[12] or 0),
"velocity": float(row[13] or 0),
})
return {"ips": ips, "total": total, "cluster_id": cluster_id}

View File

@ -7,6 +7,7 @@ agg_host_ip_ja4_1h (rotation JA4), view_ip_recurrence, view_ai_features_1h.
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from ..database import db from ..database import db
from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua
router = APIRouter(prefix="/api/investigation", tags=["investigation"]) router = APIRouter(prefix="/api/investigation", tags=["investigation"])
@ -62,32 +63,45 @@ async def get_ip_full_summary(ip: str):
"top_hosts": [str(h) for h in (bf_row[3] or [])] if bf_row else [], "top_hosts": [str(h) for h in (bf_row[3] or [])] if bf_row else [],
} }
# ── 3. TCP spoofing ──────────────────────────────────────────────────── # ── 3. TCP spoofing — fingerprinting multi-signal ─────────────────────
tcp_sql = """ tcp_sql = """
SELECT tcp_ttl, first_ua SELECT
FROM mabase_prod.view_tcp_spoofing_detected any(tcp_ttl_raw) AS ttl,
any(tcp_win_raw) AS win,
any(tcp_scale_raw) AS scale,
any(tcp_mss_raw) AS mss,
any(first_ua) AS ua
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND tcp_ttl > 0 AND window_start >= now() - INTERVAL 24 HOUR
AND tcp_ttl_raw > 0
LIMIT 1 LIMIT 1
""" """
tcp_res = db.query(tcp_sql, {"ip": clean_ip}) tcp_res = db.query(tcp_sql, {"ip": clean_ip})
tcp_data = {"detected": False, "tcp_ttl": None, "suspected_os": None} tcp_data = {"detected": False, "tcp_ttl": None, "suspected_os": None}
if tcp_res.result_rows: if tcp_res.result_rows:
ttl = int(tcp_res.result_rows[0][0]) r = tcp_res.result_rows[0]
if 52 <= ttl <= 65: ttl = int(r[0] or 0)
sus_os = "Linux/Mac" win = int(r[1] or 0)
elif 110 <= ttl <= 135: scale = int(r[2] or 0)
sus_os = "Windows" mss = int(r[3] or 0)
else: ua = str(r[4] or "")
sus_os = "Unknown" fp = fingerprint_os(ttl, win, scale, mss)
ua = str(tcp_res.result_rows[0][1] or "") dec_os = declared_os_from_ua(ua)
dec_os = "Windows" if "Windows" in ua else ("macOS" if "Mac OS X" in ua else "Linux/Android" if "Linux" in ua else "Unknown") spoof_res = detect_spoof(fp, dec_os)
spoof = sus_os != "Unknown" and dec_os != "Unknown" and sus_os != dec_os
tcp_data = { tcp_data = {
"detected": spoof, "detected": spoof_res.is_spoof,
"tcp_ttl": ttl, "tcp_ttl": ttl,
"suspected_os": sus_os, "tcp_mss": mss,
"tcp_win_scale": scale,
"initial_ttl": fp.initial_ttl,
"hop_count": fp.hop_count,
"suspected_os": fp.os_name,
"declared_os": dec_os, "declared_os": dec_os,
"confidence": fp.confidence,
"network_path": fp.network_path,
"is_bot_tool": fp.is_bot_tool,
"spoof_reason": spoof_res.reason,
} }
# ── 4. JA4 rotation ──────────────────────────────────────────────────── # ── 4. JA4 rotation ────────────────────────────────────────────────────
@ -146,7 +160,9 @@ async def get_ip_full_summary(ip: str):
risk = 0 risk = 0
risk += min(50, ml_data["max_score"] * 50) risk += min(50, ml_data["max_score"] * 50)
if bf_data["active"]: risk += 20 if bf_data["active"]: risk += 20
if tcp_data["detected"]: risk += 15 if tcp_data["detected"]:
if tcp_data.get("is_bot_tool"): risk += 30 # outil de scan connu
else: risk += 15 # spoof OS
if rot_data["rotating"]: risk += min(15, rot_data["distinct_ja4_count"] * 3) if rot_data["rotating"]: risk += min(15, rot_data["distinct_ja4_count"] * 3)
if pers_data["persistent"]: risk += min(10, pers_data["recurrence"] * 2) if pers_data["persistent"]: risk += min(10, pers_data["recurrence"] * 2)
risk = min(100, round(risk)) risk = min(100, round(risk))

View File

@ -1,130 +1,95 @@
""" """
Endpoints pour la détection du TCP spoofing (TTL / window size anormaux) Endpoints pour la détection du TCP spoofing / fingerprinting OS
Règle de corrélation : Approche multi-signal (p0f-style) :
- TTL=0 ou tcp_window_size=0 → données TCP absentes (proxy/LB) → pas de corrélation possible - TTL initial estimé → famille OS (Linux/Mac=64, Windows=128, Cisco/BSD=255)
- TTL 55-65 → fingerprint Linux/Mac (initial TTL 64) - MSS → type de réseau (Ethernet=1460, PPPoE=1452, VPN=1380-1420)
- TTL 120-135 → fingerprint Windows (initial TTL 128) - Taille de fenêtre → signature OS précise
- TTL 110-120 → fingerprint Windows (initial TTL 128, quelques sauts) - Facteur d'échelle affine la version kernel/stack TCP
- Toute autre valeur → OS indéterminé → pas de flag spoofing
- spoof_flag = True UNIQUEMENT si OS fingerprinting TCP possible ET incompatible avec l'UA Détection bots : signatures connues (Masscan/ZMap/Mirai) identifiées par combinaison
win+scale+mss indépendamment de l'UA.
""" """
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from ..database import db from ..database import db
from ..services.tcp_fingerprint import (
fingerprint_os,
detect_spoof,
declared_os_from_ua,
)
router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"]) router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
# Plages TTL qui permettent une corrélation fiable
_TTL_LINUX = (range(52, 66), "Linux/Mac") # initial 64, 1-12 sauts
_TTL_WINDOWS = (range(110, 136), "Windows") # initial 128, 1-18 sauts
_TTL_CISCO = (range(240, 256), "Cisco/BSD") # initial 255
def _suspected_os(ttl: int) -> str:
"""Retourne l'OS probable à partir du TTL observé.
Retourne 'Unknown' si le TTL ne permet pas une corrélation fiable
(TTL=0 = pas de données TCP, ou hors plage connue).
"""
if ttl <= 0:
return "Unknown" # Pas de données TCP (proxy/CDN)
for rng, name in (_TTL_LINUX, _TTL_WINDOWS, _TTL_CISCO):
if ttl in rng:
return name
return "Unknown"
def _declared_os(ua: str) -> str:
ua = ua or ""
if "Windows" in ua:
return "Windows"
if "Mac OS X" in ua:
return "macOS"
if "Linux" in ua or "Android" in ua:
return "Linux/Android"
return "Unknown"
def _is_spoof(suspected_os: str, declared_os: str) -> bool:
"""Spoof confirmé uniquement si on a un fingerprint TCP fiable ET une incompatibilité d'OS."""
if suspected_os == "Unknown" or declared_os == "Unknown":
return False # Pas de corrélation possible
# Linux/Mac fingerprint TCP mais UA déclare Windows
if suspected_os == "Linux/Mac" and declared_os == "Windows":
return True
# Windows fingerprint TCP mais UA déclare Linux/Android ou macOS
if suspected_os == "Windows" and declared_os in ("Linux/Android", "macOS"):
return True
return False
@router.get("/overview") @router.get("/overview")
async def get_tcp_spoofing_overview(): async def get_tcp_spoofing_overview():
"""Statistiques globales : seules les entrées avec données TCP valides sont analysées.""" """Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale)."""
try: try:
sql = """ sql = """
SELECT SELECT
count() AS total_entries, count() AS total_entries,
uniq(src_ip) AS unique_ips, uniq(src_ip) AS unique_ips,
countIf(tcp_ttl = 0) AS no_tcp_data, countIf(tcp_ttl_raw = 0) AS no_tcp_data,
countIf(tcp_ttl > 0) AS with_tcp_data, countIf(tcp_ttl_raw > 0) AS with_tcp_data,
countIf(tcp_ttl BETWEEN 52 AND 65) AS linux_fingerprint, countIf(tcp_ttl_raw > 0 AND tcp_ttl_raw <= 64) AS linux_mac_fp,
countIf(tcp_ttl BETWEEN 110 AND 135) AS windows_fingerprint countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp,
FROM mabase_prod.view_tcp_spoofing_detected countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp,
countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
""" """
result = db.query(sql) result = db.query(sql)
row = result.result_rows[0] row = result.result_rows[0]
total_entries = int(row[0])
unique_ips = int(row[1])
no_tcp_data = int(row[2])
with_tcp_data = int(row[3])
linux_fp = int(row[4])
windows_fp = int(row[5])
# Distribution TTL uniquement pour les entrées avec données TCP valides # Distribution TTL (top 15)
ttl_sql = """ ttl_sql = """
SELECT SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips
tcp_ttl, FROM mabase_prod.agg_host_ip_ja4_1h
count() AS cnt, WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
uniq(src_ip) AS ips GROUP BY ttl ORDER BY cnt DESC LIMIT 15
FROM mabase_prod.view_tcp_spoofing_detected
WHERE tcp_ttl > 0
GROUP BY tcp_ttl
ORDER BY cnt DESC
LIMIT 15
""" """
ttl_res = db.query(ttl_sql) ttl_res = db.query(ttl_sql)
ttl_distribution = [
{"ttl": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
for r in ttl_res.result_rows
]
# Distribution window_size pour entrées avec données TCP # Distribution MSS — nouveau signal clé (top 12)
mss_sql = """
SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0
GROUP BY mss ORDER BY cnt DESC LIMIT 12
"""
mss_res = db.query(mss_sql)
# Distribution fenêtre (top 10)
win_sql = """ win_sql = """
SELECT SELECT tcp_win_raw AS win, count() AS cnt
tcp_window_size, FROM mabase_prod.agg_host_ip_ja4_1h
count() AS cnt WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
FROM mabase_prod.view_tcp_spoofing_detected GROUP BY win ORDER BY cnt DESC LIMIT 10
WHERE tcp_ttl > 0
GROUP BY tcp_window_size
ORDER BY cnt DESC
LIMIT 10
""" """
win_res = db.query(win_sql) win_res = db.query(win_sql)
window_size_distribution = [
{"window_size": int(r[0]), "count": int(r[1])}
for r in win_res.result_rows
]
return { return {
"total_entries": total_entries, "total_entries": int(row[0]),
"unique_ips": unique_ips, "unique_ips": int(row[1]),
"no_tcp_data": no_tcp_data, "no_tcp_data": int(row[2]),
"with_tcp_data": with_tcp_data, "with_tcp_data": int(row[3]),
"linux_fingerprint": linux_fp, "linux_mac_fingerprint": int(row[4]),
"windows_fingerprint": windows_fp, "windows_fingerprint": int(row[5]),
"ttl_distribution": ttl_distribution, "cisco_bsd_fingerprint": int(row[6]),
"window_size_distribution": window_size_distribution, "bot_scanner_fingerprint": int(row[7]),
"ttl_distribution": [
{"ttl": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
for r in ttl_res.result_rows
],
"mss_distribution": [
{"mss": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
for r in mss_res.result_rows
],
"window_size_distribution": [
{"window_size": int(r[0]), "count": int(r[1])}
for r in win_res.result_rows
],
} }
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@ -134,30 +99,36 @@ async def get_tcp_spoofing_overview():
async def get_tcp_spoofing_list( async def get_tcp_spoofing_list(
limit: int = Query(100, ge=1, le=1000), limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0), offset: int = Query(0, ge=0),
spoof_only: bool = Query(False, description="Ne retourner que les vrais spoofs (TTL corrélable + OS mismatch)"), spoof_only: bool = Query(False, description="Retourner uniquement les spoofs/bots confirmés"),
): ):
"""Liste des entrées avec données TCP valides (tcp_ttl > 0). """Liste avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale).
Entrées sans données TCP (TTL=0) exclues : pas de corrélation possible. Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool.
Si spoof_only=True, retourne uniquement les entrées avec fingerprint OS identifiable (Linux/Mac TTL 52-65).
""" """
try: try:
# Filtre SQL : seules les entrées avec TTL valide, et si spoof_only les plages corrélables count_sql = """
if spoof_only: SELECT count() FROM (
# Seules les plages de TTL qui permettent une identification OS fiable SELECT src_ip, ja4
ttl_filter = "tcp_ttl BETWEEN 52 AND 65 OR tcp_ttl BETWEEN 110 AND 135 OR tcp_ttl BETWEEN 240 AND 255" FROM mabase_prod.agg_host_ip_ja4_1h
else: WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
ttl_filter = "tcp_ttl > 0" GROUP BY src_ip, ja4
)
count_sql = f"SELECT count() FROM mabase_prod.view_tcp_spoofing_detected WHERE {ttl_filter}" """
total = int(db.query(count_sql).result_rows[0][0]) total = int(db.query(count_sql).result_rows[0][0])
sql = f""" sql = """
SELECT SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip, replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip,
ja4, tcp_ttl, tcp_window_size, first_ua ja4,
FROM mabase_prod.view_tcp_spoofing_detected any(tcp_ttl_raw) AS tcp_ttl,
WHERE {ttl_filter} any(tcp_win_raw) AS tcp_window_size,
ORDER BY tcp_ttl ASC any(tcp_scale_raw) AS tcp_win_scale,
any(tcp_mss_raw) AS tcp_mss,
any(first_ua) AS first_ua,
sum(hits) AS hits
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4
ORDER BY hits DESC
LIMIT %(limit)s OFFSET %(offset)s LIMIT %(limit)s OFFSET %(offset)s
""" """
result = db.query(sql, {"limit": limit, "offset": offset}) result = db.query(sql, {"limit": limit, "offset": offset})
@ -165,23 +136,38 @@ async def get_tcp_spoofing_list(
for row in result.result_rows: for row in result.result_rows:
ip = str(row[0]) ip = str(row[0])
ja4 = str(row[1] or "") ja4 = str(row[1] or "")
ttl = int(row[2]) ttl = int(row[2] or 0)
window_size = int(row[3]) win = int(row[3] or 0)
ua = str(row[4] or "") scale = int(row[4] or 0)
sus_os = _suspected_os(ttl) mss = int(row[5] or 0)
dec_os = _declared_os(ua) ua = str(row[6] or "")
spoof_flag = _is_spoof(sus_os, dec_os) hits = int(row[7] or 0)
if spoof_only and not spoof_flag:
fp = fingerprint_os(ttl, win, scale, mss)
dec_os = declared_os_from_ua(ua)
spoof_res = detect_spoof(fp, dec_os)
if spoof_only and not spoof_res.is_spoof:
continue continue
items.append({ items.append({
"ip": ip, "ip": ip,
"ja4": ja4, "ja4": ja4,
"tcp_ttl": ttl, "tcp_ttl": ttl,
"tcp_window_size": window_size, "tcp_window_size": win,
"tcp_win_scale": scale,
"tcp_mss": mss,
"hits": hits,
"first_ua": ua, "first_ua": ua,
"suspected_os": sus_os, "suspected_os": fp.os_name,
"initial_ttl": fp.initial_ttl,
"hop_count": fp.hop_count,
"confidence": fp.confidence,
"network_path": fp.network_path,
"is_bot_tool": fp.is_bot_tool,
"declared_os": dec_os, "declared_os": dec_os,
"spoof_flag": spoof_flag, "spoof_flag": spoof_res.is_spoof,
"spoof_reason": spoof_res.reason,
}) })
return {"items": items, "total": total} return {"items": items, "total": total}
except Exception as e: except Exception as e:
@ -190,29 +176,44 @@ async def get_tcp_spoofing_list(
@router.get("/matrix") @router.get("/matrix")
async def get_tcp_spoofing_matrix(): async def get_tcp_spoofing_matrix():
"""Matrice suspected_os × declared_os — uniquement entrées avec TTL valide.""" """Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal."""
try: try:
sql = """ sql = """
SELECT tcp_ttl, first_ua SELECT
FROM mabase_prod.view_tcp_spoofing_detected any(tcp_ttl_raw) AS ttl,
WHERE tcp_ttl > 0 any(tcp_win_raw) AS win,
any(tcp_scale_raw) AS scale,
any(tcp_mss_raw) AS mss,
any(first_ua) AS ua,
count() AS cnt
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
GROUP BY src_ip, ja4
""" """
result = db.query(sql) result = db.query(sql)
counts: dict = {} counts: dict = {}
for row in result.result_rows: for row in result.result_rows:
ttl = int(row[0]) ttl = int(row[0] or 0)
ua = str(row[1] or "") win = int(row[1] or 0)
sus_os = _suspected_os(ttl) scale = int(row[2] or 0)
dec_os = _declared_os(ua) mss = int(row[3] or 0)
key = (sus_os, dec_os) ua = str(row[4] or "")
counts[key] = counts.get(key, 0) + 1 cnt = int(row[5] or 1)
fp = fingerprint_os(ttl, win, scale, mss)
dec_os = declared_os_from_ua(ua)
spoof_res = detect_spoof(fp, dec_os)
key = (fp.os_name, dec_os, spoof_res.is_spoof, fp.is_bot_tool)
counts[key] = counts.get(key, 0) + cnt
matrix = [ matrix = [
{ {
"suspected_os": k[0], "suspected_os": k[0],
"declared_os": k[1], "declared_os": k[1],
"count": v, "count": v,
"is_spoof": _is_spoof(k[0], k[1]), "is_spoof": k[2],
"is_bot_tool": k[3],
} }
for k, v in counts.items() for k, v in counts.items()
] ]

View File

@ -0,0 +1,328 @@
"""
Moteur de clustering K-means++ multi-métriques (pur Python).
Ref: Arthur & Vassilvitskii (2007) — k-means++: The Advantages of Careful Seeding
Hotelling (1933) — PCA par puissance itérative (deflation)
Features (21 dimensions, normalisées [0,1]) :
0 ttl_n : TTL initial normalisé (hops-count estimé)
1 mss_n : MSS normalisé → type réseau (Ethernet/PPPoE/VPN)
2 scale_n : facteur de mise à l'échelle TCP
3 win_n : fenêtre TCP normalisée
4 score_n : score anomalie ML (abs)
5 velocity_n : vélocité de requêtes (log1p)
6 fuzzing_n : index de fuzzing (log1p)
7 headless_n : ratio sessions headless
8 post_n : ratio POST/total
9 ip_id_zero_n : ratio IP-ID=0 (Linux/spoofé)
10 entropy_n : entropie temporelle
11 browser_n : score navigateur moderne (normalisé max 50)
12 alpn_n : mismatch ALPN/protocole
13 alpn_absent_n : ratio ALPN absent
14 h2_n : efficacité H2 multiplexing (log1p)
15 hdr_conf_n : confiance ordre headers
16 ua_ch_n : mismatch User-Agent-Client-Hints
17 asset_n : ratio assets statiques
18 direct_n : ratio accès directs
19 ja4_div_n : diversité JA4 (log1p)
20 ua_rot_n : UA rotatif (booléen)
"""
from __future__ import annotations
import math
import random
from dataclasses import dataclass, field
# ─── Définition des features ──────────────────────────────────────────────────
# (clé SQL, nom lisible, fonction de normalisation)
FEATURES = [
# TCP stack
("ttl", "TTL Initial", lambda v: min(1.0, (v or 0) / 255.0)),
("mss", "MSS Réseau", lambda v: min(1.0, (v or 0) / 1460.0)),
("scale", "Scale TCP", lambda v: min(1.0, (v or 0) / 14.0)),
("win", "Fenêtre TCP", lambda v: min(1.0, (v or 0) / 65535.0)),
# Anomalie ML
("avg_score", "Score Anomalie", lambda v: min(1.0, float(v or 0))),
("avg_velocity", "Vélocité (rps)", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(100))),
("avg_fuzzing", "Fuzzing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(300))),
("pct_headless", "Headless", lambda v: min(1.0, float(v or 0))),
("avg_post", "Ratio POST", lambda v: min(1.0, float(v or 0))),
# IP-ID
("ip_id_zero", "IP-ID Zéro", lambda v: min(1.0, float(v or 0))),
# Temporel
("entropy", "Entropie Temporelle", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(10))),
# Navigateur
("browser_score","Score Navigateur", lambda v: min(1.0, float(v or 0) / 50.0)),
# TLS / Protocole
("alpn_mismatch","ALPN Mismatch", lambda v: min(1.0, float(v or 0))),
("alpn_missing", "ALPN Absent", lambda v: min(1.0, float(v or 0))),
("h2_eff", "H2 Multiplexing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(20))),
("hdr_conf", "Ordre Headers", lambda v: min(1.0, float(v or 0))),
("ua_ch_mismatch","UA-CH Mismatch", lambda v: min(1.0, float(v or 0))),
# Comportement HTTP
("asset_ratio", "Ratio Assets", lambda v: min(1.0, float(v or 0))),
("direct_ratio", "Accès Direct", lambda v: min(1.0, float(v or 0))),
# Diversité JA4
("ja4_count", "Diversité JA4", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(30))),
# UA rotatif
("ua_rotating", "UA Rotatif", lambda v: 1.0 if float(v or 0) > 0 else 0.0),
]
FEATURE_KEYS = [f[0] for f in FEATURES]
FEATURE_NAMES = [f[1] for f in FEATURES]
FEATURE_NORMS = [f[2] for f in FEATURES]
N_FEATURES = len(FEATURES)
# ─── Utilitaires vectoriels (pur Python) ──────────────────────────────────────
def _dist2(a: list[float], b: list[float]) -> float:
return sum((x - y) ** 2 for x, y in zip(a, b))
def _mean_vec(vecs: list[list[float]]) -> list[float]:
n = len(vecs)
if n == 0:
return [0.0] * N_FEATURES
return [sum(v[i] for v in vecs) / n for i in range(N_FEATURES)]
# ─── Construction du vecteur de features ─────────────────────────────────────
def build_feature_vector(row: dict) -> list[float]:
"""Normalise un dict de colonnes SQL → vecteur [0,1]^N_FEATURES."""
return [fn(row.get(key)) for key, fn in zip(FEATURE_KEYS, FEATURE_NORMS)]
# ─── K-means++ ───────────────────────────────────────────────────────────────
@dataclass
class KMeansResult:
centroids: list[list[float]]
labels: list[int]
inertia: float
n_iter: int
def kmeans_pp(
points: list[list[float]],
k: int,
max_iter: int = 60,
seed: int = 42,
n_init: int = 3,
) -> KMeansResult:
"""
K-means avec initialisation k-means++ (Arthur & Vassilvitskii, 2007).
Lance `n_init` fois et retourne le meilleur résultat (inertie minimale).
"""
rng = random.Random(seed)
best: KMeansResult | None = None
for attempt in range(n_init):
# ── Initialisation k-means++ ────────────────────────────────────
first_idx = rng.randrange(len(points))
centroids = [points[first_idx][:]]
for _ in range(k - 1):
d2 = [min(_dist2(p, c) for c in centroids) for p in points]
total = sum(d2)
if total == 0:
break
r = rng.random() * total
cumul = 0.0
for i, d in enumerate(d2):
cumul += d
if cumul >= r:
centroids.append(points[i][:])
break
else:
centroids.append(points[rng.randrange(len(points))][:])
# ── Itérations EM ───────────────────────────────────────────────
labels: list[int] = [0] * len(points)
for iteration in range(max_iter):
# E-step : affectation
new_labels = [
min(range(len(centroids)), key=lambda c: _dist2(p, centroids[c]))
for p in points
]
if new_labels == labels and iteration > 0:
break
labels = new_labels
# M-step : mise à jour
clusters: list[list[list[float]]] = [[] for _ in range(k)]
for i, l in enumerate(labels):
clusters[l].append(points[i])
for j in range(k):
if clusters[j]:
centroids[j] = _mean_vec(clusters[j])
inertia = sum(_dist2(points[i], centroids[labels[i]]) for i in range(len(points)))
result = KMeansResult(
centroids=centroids,
labels=labels,
inertia=inertia,
n_iter=iteration + 1,
)
if best is None or inertia < best.inertia:
best = result
return best # type: ignore
# ─── PCA 2D par puissance itérative ──────────────────────────────────────────
def pca_2d(points: list[list[float]]) -> list[tuple[float, float]]:
"""
Projection PCA 2D par puissance itérative avec déflation (Hotelling).
Retourne les coordonnées (pc1, pc2) normalisées dans [0,1].
"""
n = len(points)
if n == 0:
return []
# Centrage
mean = _mean_vec(points)
X = [[p[i] - mean[i] for i in range(N_FEATURES)] for p in points]
def power_iter(X_centered: list[list[float]], n_iter: int = 30) -> list[float]:
"""Trouve le premier vecteur propre de X^T X par puissance itérative."""
v = [1.0 / math.sqrt(N_FEATURES)] * N_FEATURES
for _ in range(n_iter):
# Xv = X @ v
Xv = [sum(row[j] * v[j] for j in range(N_FEATURES)) for row in X_centered]
# Xtxv = X^T @ Xv
xtxv = [sum(X_centered[i][j] * Xv[i] for i in range(len(X_centered))) for j in range(N_FEATURES)]
norm = math.sqrt(sum(x ** 2 for x in xtxv)) or 1e-10
v = [x / norm for x in xtxv]
return v
# PC1
v1 = power_iter(X)
proj1 = [sum(row[j] * v1[j] for j in range(N_FEATURES)) for row in X]
# Déflation : retire la composante PC1 de X
X2 = [
[X[i][j] - proj1[i] * v1[j] for j in range(N_FEATURES)]
for i in range(n)
]
# PC2
v2 = power_iter(X2)
proj2 = [sum(row[j] * v2[j] for j in range(N_FEATURES)) for row in X2]
# Normalisation [0,1]
def _norm01(vals: list[float]) -> list[float]:
lo, hi = min(vals), max(vals)
rng = hi - lo or 1e-10
return [(v - lo) / rng for v in vals]
p1 = _norm01(proj1)
p2 = _norm01(proj2)
return list(zip(p1, p2))
# ─── Nommage automatique des clusters ────────────────────────────────────────
def name_cluster(centroid: list[float], raw_stats: dict | None = None) -> str:
"""
Génère un nom lisible à partir du centroïde normalisé et de statistiques brutes.
Priorité : signaux les plus discriminants en premier.
"""
score = centroid[4] # anomalie ML
vel = centroid[5] # vélocité
fuzz = centroid[6] # fuzzing (log1p normalisé, >0.35 ≈ fuzzing_index > 100)
hless = centroid[7] # headless
post = centroid[8] # POST ratio
alpn = centroid[12] # ALPN mismatch
h2 = centroid[14] # H2 eff
ua_ch = centroid[16] # UA-CH mismatch
ja4d = centroid[19] # JA4 diversité
ua_rot = centroid[20] # UA rotatif
raw_mss = (raw_stats or {}).get("mean_mss", 0)
raw_ttl = (raw_stats or {}).get("mean_ttl", 0) or (centroid[0] * 255)
raw_scale = (raw_stats or {}).get("mean_scale", 0)
# ── Signaux forts (déterministes) ────────────────────────────────────
# Pattern Masscan : mss≈1452, scale≈4, TTL 48-57
if raw_mss and 1440 <= raw_mss <= 1460 and raw_scale and 3 <= raw_scale <= 5 and raw_ttl < 60:
return "🤖 Masscan / Scanner IP"
# Fuzzer agressif (fuzzing_index normalisé > 0.35 ≈ valeur brute > 100)
if fuzz > 0.35:
return "🤖 Bot Fuzzer / Scanner"
# UA rotatif + UA-CH mismatch : bot sophistiqué simulant un navigateur
if ua_rot > 0.5 and ua_ch > 0.7:
return "🤖 Bot UA Rotatif + CH Mismatch"
# UA-CH mismatch fort seul (navigateur simulé sans headers CH)
if ua_ch > 0.8:
return "⚠️ Bot UA-CH Incohérent"
# ── Score ML modéré + signal comportemental ──────────────────────────
if score > 0.20:
if hless > 0.3:
return "⚠️ Navigateur Headless Suspect"
if vel > 0.25:
return "⚠️ Bot Haute Vélocité"
if post > 0.4:
return "⚠️ Bot POST Automatisé"
if alpn > 0.5 or h2 > 0.5:
return "⚠️ TLS/H2 Anormal"
if ua_ch > 0.4:
return "⚠️ Anomalie UA-CH"
return "⚠️ Anomalie ML Modérée"
# ── Signaux faibles ───────────────────────────────────────────────────
if ua_ch > 0.4:
return "🔎 UA-CH Incohérent"
if ja4d > 0.5:
return "🔄 Client Multi-Fingerprint"
# ── Classification réseau / OS ────────────────────────────────────────
# MSS bas → VPN ou tunnel
if raw_mss and raw_mss < 1360:
return "🌐 VPN / Tunnel"
if raw_ttl < 70:
return "🐧 Linux / Mobile"
if raw_ttl > 110:
return "🪟 Windows"
return "✅ Trafic Légitime"
def risk_score_from_centroid(centroid: list[float]) -> float:
"""Score de risque [0,1] pondéré. Calibré pour les valeurs observées (score ML ~0.3)."""
# Normalisation de score ML : x / 0.5 pour étendre la plage utile (0-0.5 → 0-1)
score_n = min(1.0, centroid[4] / 0.5)
fuzz_n = centroid[6]
ua_ch_n = centroid[16]
ua_rot_n = centroid[20]
vel_n = centroid[5]
hless_n = centroid[7]
ip_id_n = centroid[9]
alpn_n = centroid[12]
ja4d_n = centroid[19]
post_n = centroid[8]
return min(1.0,
0.25 * score_n +
0.20 * ua_ch_n +
0.15 * fuzz_n +
0.12 * ua_rot_n +
0.10 * hless_n +
0.07 * vel_n +
0.04 * ip_id_n +
0.04 * alpn_n +
0.03 * ja4d_n +
0.03 * post_n
)

View File

@ -0,0 +1,436 @@
"""
Service de fingerprinting OS par signature TCP — approche multi-signal inspirée de p0f.
Signaux utilisés (par ordre de poids) :
1. TTL initial estimé (→ famille OS : Linux/Mac=64, Windows=128, Cisco/BSD=255)
2. MSS (→ type de réseau : Ethernet=1460, PPPoE=1452, VPN=1380-1420)
3. Taille de fenêtre (→ signature OS précise)
4. Facteur d'échelle (→ affine la version du kernel/stack TCP)
Références :
- p0f v3 (Michal Zalewski) — passive OS fingerprinting
- Nmap OS detection (Gordon Lyon)
- "OS Fingerprinting Revisited" (Beverly, 2004)
- "Passive OS fingerprinting" (Orebaugh, Ramirez)
- Recherche sur Masscan/ZMap : signatures SYN craftées connues
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
# ─── Constantes ───────────────────────────────────────────────────────────────
_INITIAL_TTLS = (64, 128, 255)
# MSS → type de chemin réseau (MTU - 40 octets d'en-têtes IP+TCP)
_MSS_PATH: list[tuple[range, str]] = [
(range(1461, 9001), "Ethernet/Jumbo"), # jumbo frames (CDN/datacenter)
(range(1460, 1461), "Ethernet directe"), # MTU 1500 standard
(range(1453, 1460), "Ethernet directe"), # légèrement réduit (padding)
(range(1452, 1453), "PPPoE/DSL"), # MTU 1492
(range(1436, 1452), "PPPoE/DSL ajusté"), # variations DSL
(range(1420, 1436), "VPN léger"), # WireGuard / IPsec transport
(range(1380, 1420), "VPN/Tunnel"), # OpenVPN / L2TP
(range(1300, 1380), "VPN double ou mobile"),
(range(0, 1300), "Lien bas débit / GPRS"),
]
# ─── Base de signatures OS ─────────────────────────────────────────────────────
#
# Format : chaque entrée est un dict avec :
# ttl : int — TTL initial attendu (64 | 128 | 255)
# win : set[int]|None — tailles de fenêtre attendues (None = ignorer)
# scale : set[int]|None — facteurs d'échelle attendus (None = ignorer)
# mss : set[int]|None — valeurs MSS attendues (None = ignorer)
# name : str — libellé affiché
# conf : float — poids de confiance de base (01)
# bot : bool — outil de scan/bot connu
_SIGNATURES: list[dict] = [
# ══════════════════════════════════════════════════════
# OUTILS DE SCAN ET BOTS CONNUS (priorité maximale)
# ══════════════════════════════════════════════════════
# Masscan / scanner personnalisé avec stack Linux modifiée (PPPoE MSS=1452)
# Pattern très présent dans les données : ~111k requêtes, UA spoofé macOS/Windows
{
"ttl": 64, "win": {5808}, "scale": {4}, "mss": {1452},
"name": "Bot-Scanner/Masscan", "conf": 0.97, "bot": True,
},
# Masscan TTL=255 (mode direct, pas de hop)
{
"ttl": 255, "win": {1024}, "scale": {0}, "mss": None,
"name": "Bot-ZMap/Masscan", "conf": 0.96, "bot": True,
},
# Mirai variant (petite fenêtre, pas de scale, TTL Linux)
{
"ttl": 64, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
"name": "Bot-Mirai", "conf": 0.92, "bot": True,
},
# Mirai variant (petite fenêtre Windows)
{
"ttl": 128, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
"name": "Bot-Mirai/Win", "conf": 0.92, "bot": True,
},
# Scapy / forge manuelle (fenêtre 8192 exactement + TTL 64 + pas de scale)
{
"ttl": 64, "win": {8192}, "scale": {0}, "mss": {1460},
"name": "Bot-Scapy/Forge", "conf": 0.85, "bot": True,
},
# Nmap SYN scan (window=1024, MSS=1460, TTL=64 ou 128)
{
"ttl": 64, "win": {1}, "scale": None, "mss": None,
"name": "Bot-ZMap", "conf": 0.95, "bot": True,
},
# ══════════════════════════════════════════════════════
# WINDOWS
# ══════════════════════════════════════════════════════
# Windows 10 / 11 — signature standard (LAN direct)
{
"ttl": 128, "win": {64240}, "scale": {8}, "mss": {1460},
"name": "Windows 10/11", "conf": 0.93, "bot": False,
},
# Windows 10/11 — derrière VPN/proxy (MSS réduit)
{
"ttl": 128, "win": {64240}, "scale": {8}, "mss": {1380, 1400, 1412, 1420, 1440},
"name": "Windows 10/11 (VPN)", "conf": 0.90, "bot": False,
},
# Windows Server 2019/2022 — grande fenêtre
{
"ttl": 128, "win": {65535, 131072}, "scale": {8, 9}, "mss": {1460},
"name": "Windows Server", "conf": 0.88, "bot": False,
},
# Windows 7/8.1
{
"ttl": 128, "win": {8192, 65535}, "scale": {4, 8}, "mss": {1460},
"name": "Windows 7/8", "conf": 0.83, "bot": False,
},
# Windows générique (TTL=128, scale=8, tout MSS)
{
"ttl": 128, "win": None, "scale": {8}, "mss": None,
"name": "Windows", "conf": 0.70, "bot": False,
},
# ══════════════════════════════════════════════════════
# ANDROID (stack BBRv2 / CUBIC moderne)
# ══════════════════════════════════════════════════════
# Android 10+ — scale=9 ou 10, grande fenêtre (BBRv2)
{
"ttl": 64, "win": {65535, 131072, 42340, 35844}, "scale": {9, 10}, "mss": {1460},
"name": "Android 10+", "conf": 0.82, "bot": False,
},
# Android via proxy TTL=128 (app Facebook, TikTok etc. passant par infra)
{
"ttl": 128, "win": {62727, 65535}, "scale": {7}, "mss": {1460},
"name": "Android/App (proxy)", "conf": 0.75, "bot": False,
},
# Android derrière VPN (MSS réduit)
{
"ttl": 64, "win": {65535, 59640, 63940}, "scale": {8, 9, 10}, "mss": {1380, 1390, 1400, 1418, 1420},
"name": "Android (VPN/mobile)", "conf": 0.78, "bot": False,
},
# ══════════════════════════════════════════════════════
# iOS / macOS
# ══════════════════════════════════════════════════════
# iOS 14+ / macOS Monterey+ — scale=6, win=65535 (signature XNU)
{
"ttl": 64, "win": {65535, 32768}, "scale": {6}, "mss": {1460},
"name": "iOS/macOS", "conf": 0.87, "bot": False,
},
# macOS Sonoma+ / iOS 17+ (scale=9, fenêtre plus grande)
{
"ttl": 64, "win": {65535, 32768}, "scale": {9}, "mss": {1460},
"name": "macOS Sonoma+/iOS 17+", "conf": 0.83, "bot": False,
},
# macOS derrière VPN (MSS réduit)
{
"ttl": 64, "win": {65535}, "scale": {6, 9}, "mss": {1380, 1400, 1412, 1436},
"name": "iOS/macOS (VPN)", "conf": 0.80, "bot": False,
},
# ══════════════════════════════════════════════════════
# LINUX (desktop/serveur)
# ══════════════════════════════════════════════════════
# Linux 5.x+ — scale=7, win=64240 ou 65535 (kernel ≥ 4.19)
{
"ttl": 64, "win": {64240, 65320}, "scale": {7}, "mss": {1460},
"name": "Linux 5.x+", "conf": 0.86, "bot": False,
},
# Linux 4.x / ChromeOS
{
"ttl": 64, "win": {29200, 65535, 43690, 32120}, "scale": {7}, "mss": {1460},
"name": "Linux 4.x/ChromeOS", "conf": 0.83, "bot": False,
},
# Linux derrière VPN (MSS réduit)
{
"ttl": 64, "win": {64240, 65535, 42600}, "scale": {7}, "mss": {1380, 1400, 1420, 1436},
"name": "Linux (VPN)", "conf": 0.80, "bot": False,
},
# Linux 2.6.x (ancien — win=5840/14600)
{
"ttl": 64, "win": {5840, 14600, 16384}, "scale": {4, 5}, "mss": {1460},
"name": "Linux 2.6", "conf": 0.78, "bot": False,
},
# ══════════════════════════════════════════════════════
# BSD / ÉQUIPEMENTS RÉSEAU / CDN
# ══════════════════════════════════════════════════════
# FreeBSD / OpenBSD (initial TTL=64)
{
"ttl": 64, "win": {65535}, "scale": {6}, "mss": {512, 1460},
"name": "FreeBSD/OpenBSD", "conf": 0.74, "bot": False,
},
# Cisco IOS / équipements réseau (initial TTL=255, fenêtre petite)
{
"ttl": 255, "win": {4096, 4128, 8760}, "scale": {0, 1, 2}, "mss": {512, 1460},
"name": "Cisco/Réseau", "conf": 0.87, "bot": False,
},
# CDN / Applebot (TTL=255, jumbo MSS, fenêtre élevée)
{
"ttl": 255, "win": {26883, 65535, 59640}, "scale": {7, 8}, "mss": {8373, 8365, 1460},
"name": "CDN/Applebot (jumbo)", "conf": 0.85, "bot": False,
},
# BSD/Unix générique (TTL=255)
{
"ttl": 255, "win": None, "scale": {6, 7, 8}, "mss": {1460},
"name": "BSD/Unix", "conf": 0.68, "bot": False,
},
]
# ─── Data classes ──────────────────────────────────────────────────────────────
@dataclass
class OsFingerprint:
os_name: str
initial_ttl: int
hop_count: int
confidence: float
is_bot_tool: bool
network_path: str
@dataclass
class SpoofResult:
is_spoof: bool
is_bot_tool: bool
reason: str
# ─── Fonctions utilitaires ─────────────────────────────────────────────────────
def _estimate_initial_ttl(observed_ttl: int) -> tuple[int, int]:
"""Retourne (initial_ttl, hop_count).
Cherche le TTL standard le plus bas >= observed_ttl.
Rejette les hop counts > 45 (réseau légitimement long = peu probable).
"""
if observed_ttl <= 0:
return 0, -1
for initial in _INITIAL_TTLS:
if observed_ttl <= initial:
hop = initial - observed_ttl
if hop <= 45:
return initial, hop
return 255, 255 - observed_ttl # TTL > 255 impossible, fallback
def _infer_network_path(mss: int) -> str:
"""Retourne le type de chemin réseau probable à partir du MSS."""
if mss <= 0:
return "Inconnu"
for rng, label in _MSS_PATH:
if mss in rng:
return label
return "Inconnu"
def _os_family(os_name: str) -> str:
"""Réduit un nom OS détaillé à sa famille pour comparaison avec l'UA."""
n = os_name.lower()
if "windows" in n:
return "Windows"
if "android" in n:
return "Android"
if "ios" in n or "macos" in n or "iphone" in n or "ipad" in n:
return "Apple"
if "linux" in n or "chromeos" in n:
return "Linux"
if "bsd" in n or "cisco" in n or "cdn" in n or "réseau" in n:
return "Network"
if "bot" in n or "scanner" in n or "mirai" in n or "zmap" in n:
return "Bot"
return "Unknown"
def _ua_os_family(declared_os: str) -> str:
"""Réduit l'OS déclaré (UA) à sa famille."""
mapping = {
"Windows": "Windows",
"Android": "Android",
"iOS": "Apple",
"macOS": "Apple",
"Linux": "Linux",
"ChromeOS": "Linux",
"BSD": "Network",
}
return mapping.get(declared_os, "Unknown")
# ─── Fonctions publiques ───────────────────────────────────────────────────────
def declared_os_from_ua(ua: str) -> str:
"""Infère l'OS déclaré à partir du User-Agent."""
ua = ua or ""
ul = ua.lower()
if not ul:
return "Unknown"
if "windows nt" in ul:
return "Windows"
if "android" in ul:
return "Android"
if "iphone" in ul or "ipad" in ul or "cpu iphone" in ul or "cpu ipad" in ul:
return "iOS"
if "mac os x" in ul or "macos" in ul:
return "macOS"
if "cros" in ul or "chromeos" in ul:
return "ChromeOS"
if "linux" in ul:
return "Linux"
if "freebsd" in ul or "openbsd" in ul or "netbsd" in ul:
return "BSD"
return "Unknown"
def fingerprint_os(ttl: int, win: int, scale: int, mss: int) -> OsFingerprint:
"""Fingerprint OS multi-signal avec scoring pondéré.
Poids des signaux :
- TTL initial 40 % (discriminant principal : famille OS)
- MSS 30 % (type de réseau ET OS)
- Fenêtre TCP 20 % (version/distrib précise)
- Scale 10 % (affine la version kernel)
"""
initial_ttl, hop_count = _estimate_initial_ttl(ttl)
network_path = _infer_network_path(mss)
if initial_ttl == 0:
return OsFingerprint(
os_name="Unknown", initial_ttl=0, hop_count=-1,
confidence=0.0, is_bot_tool=False, network_path=network_path,
)
best_score: float = -1.0
best_sig: Optional[dict] = None
for sig in _SIGNATURES:
# Le TTL est un filtre strict — pas de correspondance, on passe
if sig["ttl"] != initial_ttl:
continue
score: float = 0.40 # Score de base pour correspondance TTL
# MSS (poids 0.30)
if sig["mss"] is not None:
score += 0.30 if mss in sig["mss"] else -0.12
# Fenêtre (poids 0.20)
if sig["win"] is not None:
score += 0.20 if win in sig["win"] else -0.08
# Scale (poids 0.10)
if sig["scale"] is not None:
score += 0.10 if scale in sig["scale"] else -0.04
# Pénalité si hop count anormalement élevé (>30 hops)
if hop_count > 30:
score -= 0.05
if score > best_score:
best_score = score
best_sig = sig
if best_sig and best_score >= 0.38:
# Pondérer la confiance finale par le score et le conf de la signature
raw_conf = best_score * best_sig["conf"]
confidence = round(min(max(raw_conf, 0.0), 1.0), 2)
return OsFingerprint(
os_name=best_sig["name"],
initial_ttl=initial_ttl,
hop_count=hop_count,
confidence=confidence,
is_bot_tool=best_sig["bot"],
network_path=network_path,
)
# Repli : classification TTL seule (confiance minimale)
fallback = {64: "Linux/macOS", 128: "Windows", 255: "Cisco/BSD"}
return OsFingerprint(
os_name=fallback.get(initial_ttl, "Unknown"),
initial_ttl=initial_ttl,
hop_count=hop_count,
confidence=round(0.40 * 0.65, 2), # confiance faible
is_bot_tool=False,
network_path=network_path,
)
def detect_spoof(fp: OsFingerprint, declared_os: str) -> SpoofResult:
"""Détecte les incohérences OS entre TCP et UA.
Règles :
1. Outil de scan connu → spoof/bot, quelle que soit l'UA
2. Confiance < 0.50 → indéterminable
3. OS incompatibles → spoof confirmé
4. Cohérent → OK
"""
if fp.is_bot_tool:
return SpoofResult(
is_spoof=True,
is_bot_tool=True,
reason=f"Outil de scan détecté ({fp.os_name})",
)
if fp.confidence < 0.50 or fp.os_name == "Unknown" or declared_os == "Unknown":
return SpoofResult(
is_spoof=False,
is_bot_tool=False,
reason="Corrélation insuffisante",
)
tcp_family = _os_family(fp.os_name)
ua_family = _ua_os_family(declared_os)
# Les familles Network/Bot sont toujours suspectes si l'UA prétend être un navigateur
if tcp_family == "Network" and ua_family not in ("Network", "Unknown"):
return SpoofResult(
is_spoof=True,
is_bot_tool=False,
reason=f"Équipement réseau/CDN (TCP) vs {declared_os} (UA)",
)
if tcp_family == "Unknown" or ua_family == "Unknown":
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="OS indéterminé")
# Android passant par un proxy infra (ex: Facebook app → proxy Windows)
# → pas forcément un spoof, noté mais non flaggé
if declared_os == "Android" and tcp_family == "Windows" and "proxy" in fp.os_name.lower():
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="App mobile via proxy infra")
if tcp_family != ua_family:
return SpoofResult(
is_spoof=True,
is_bot_tool=False,
reason=f"TCP→{tcp_family} vs UA→{ua_family}",
)
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="Cohérent")

View File

@ -19,6 +19,7 @@ import { BruteForceView } from './components/BruteForceView';
import { TcpSpoofingView } from './components/TcpSpoofingView'; import { TcpSpoofingView } from './components/TcpSpoofingView';
import { HeaderFingerprintView } from './components/HeaderFingerprintView'; import { HeaderFingerprintView } from './components/HeaderFingerprintView';
import { MLFeaturesView } from './components/MLFeaturesView'; import { MLFeaturesView } from './components/MLFeaturesView';
import ClusteringView from './components/ClusteringView';
import { useTheme } from './ThemeContext'; import { useTheme } from './ThemeContext';
// ─── Types ──────────────────────────────────────────────────────────────────── // ─── Types ────────────────────────────────────────────────────────────────────
@ -79,6 +80,7 @@ function Sidebar({ counts }: { counts: AlertCounts | null }) {
const advancedLinks = [ const advancedLinks = [
{ path: '/bruteforce', label: 'Brute Force', icon: '🔥', aliases: [] }, { path: '/bruteforce', label: 'Brute Force', icon: '🔥', aliases: [] },
{ path: '/tcp-spoofing', label: 'TCP Spoofing', icon: '🧬', aliases: [] }, { path: '/tcp-spoofing', label: 'TCP Spoofing', icon: '🧬', aliases: [] },
{ path: '/clustering', label: 'Clustering IPs', icon: '🔬', aliases: [] },
{ path: '/headers', label: 'Header Fingerprint', icon: '📡', aliases: [] }, { path: '/headers', label: 'Header Fingerprint', icon: '📡', aliases: [] },
{ path: '/ml-features', label: 'Features ML', icon: '🤖', aliases: [] }, { path: '/ml-features', label: 'Features ML', icon: '🤖', aliases: [] },
]; ];
@ -238,6 +240,7 @@ function TopHeader({ counts }: { counts: AlertCounts | null }) {
if (p.startsWith('/bulk-classify')) return 'Classification en masse'; if (p.startsWith('/bulk-classify')) return 'Classification en masse';
if (p.startsWith('/bruteforce')) return 'Brute Force & Credential Stuffing'; if (p.startsWith('/bruteforce')) return 'Brute Force & Credential Stuffing';
if (p.startsWith('/tcp-spoofing')) return 'Spoofing TCP/OS'; if (p.startsWith('/tcp-spoofing')) return 'Spoofing TCP/OS';
if (p.startsWith('/clustering')) return 'Clustering IPs';
if (p.startsWith('/headers')) return 'Header Fingerprint Clustering'; if (p.startsWith('/headers')) return 'Header Fingerprint Clustering';
if (p.startsWith('/ml-features')) return 'Features ML / Radar'; if (p.startsWith('/ml-features')) return 'Features ML / Radar';
return ''; return '';
@ -370,6 +373,7 @@ export default function App() {
<Route path="/threat-intel" element={<ThreatIntelView />} /> <Route path="/threat-intel" element={<ThreatIntelView />} />
<Route path="/bruteforce" element={<BruteForceView />} /> <Route path="/bruteforce" element={<BruteForceView />} />
<Route path="/tcp-spoofing" element={<TcpSpoofingView />} /> <Route path="/tcp-spoofing" element={<TcpSpoofingView />} />
<Route path="/clustering" element={<ClusteringView />} />
<Route path="/headers" element={<HeaderFingerprintView />} /> <Route path="/headers" element={<HeaderFingerprintView />} />
<Route path="/heatmap" element={<Navigate to="/" replace />} /> <Route path="/heatmap" element={<Navigate to="/" replace />} />
<Route path="/botnets" element={<Navigate to="/campaigns" replace />} /> <Route path="/botnets" element={<Navigate to="/campaigns" replace />} />

View File

@ -0,0 +1,847 @@
/**
* Clustering IPs — visualisation multi-métriques
*
* Deux vues :
* 1. "Cartes" (défaut) — grille de cartes triées par risque, toujours lisibles
* 2. "Graphe" — ReactFlow avec nœuds-cartes et disposition par colonne de menace
*
* Chaque cluster affiche :
* • Label + emoji de menace
* • Compteur IPs / hits
* • Score de risque (barre colorée)
* • 4 métriques clés (barres horizontales)
* • Top pays + ASN
* • Radar dans la sidebar
*/
import { useCallback, useEffect, useState, useMemo } from 'react';
import ReactFlow, {
Background, Controls, MiniMap, ReactFlowProvider,
useNodesState, useEdgesState, useReactFlow,
Node, Edge, Handle, Position, NodeProps,
Panel,
} from 'reactflow';
import 'reactflow/dist/style.css';
import {
RadarChart, Radar, PolarGrid, PolarAngleAxis, PolarRadiusAxis,
ResponsiveContainer, Tooltip as RechartsTooltip,
} from 'recharts';
// ─── Types ────────────────────────────────────────────────────────────────────
interface ClusterNode {
id: string;
label: string;
cluster_idx: number;
x: number; y: number;
radius: number;
color: string;
risk_score: number;
ip_count: number;
hit_count: number;
mean_score: number;
mean_ua_ch: number;
mean_ua_rotating: number;
mean_fuzzing: number;
mean_headless: number;
mean_velocity: number;
mean_ttl: number;
mean_mss: number;
mean_scale: number;
mean_alpn_mismatch: number;
mean_ip_id_zero: number;
mean_browser_score: number;
mean_entropy: number;
mean_ja4_diversity: number;
top_threat: string;
top_countries: string[];
top_orgs: string[];
sample_ips: string[];
sample_ua: string;
radar: { feature: string; value: number }[];
}
interface ClusteringData {
nodes: ClusterNode[];
edges: { id: string; source: string; target: string; similarity: number; weight: number }[];
stats: {
total_clusters: number;
total_ips: number;
total_hits: number;
bot_ips: number;
high_risk_ips: number;
n_samples: number;
k: number;
elapsed_s: number;
};
}
interface ClusterIP {
ip: string; ja4: string; tcp_ttl: number; tcp_mss: number;
hits: number; ua: string; avg_score: number;
threat_level: string; country_code: string; asn_org: string;
fuzzing: number; velocity: number;
}
// ─── Helpers ──────────────────────────────────────────────────────────────────
const THREAT_BADGE_CLASS: Record<string, string> = {
CRITICAL: 'bg-red-600', HIGH: 'bg-orange-500',
MEDIUM: 'bg-yellow-500', LOW: 'bg-green-600',
};
const RADAR_FEATURES = [
'Score Anomalie', 'Vélocité (rps)', 'Fuzzing', 'Headless',
'ALPN Mismatch', 'H2 Multiplexing', 'UA-CH Mismatch', 'UA Rotatif',
'IP-ID Zéro', 'Entropie Temporelle',
];
function ThreatBadge({ level }: { level: string }) {
if (!level) return null;
return (
<span className={`text-[10px] font-bold px-1.5 py-0.5 rounded-full text-white ${THREAT_BADGE_CLASS[level] || 'bg-gray-600'}`}>
{level}
</span>
);
}
function MiniBar({ value, color = '#6366f1', label }: { value: number; color?: string; label?: string }) {
const pct = Math.round(Math.min(1, Math.max(0, value)) * 100);
return (
<div className="flex items-center gap-2">
{label && <span className="text-text-disabled text-[10px] w-24 flex-shrink-0 truncate">{label}</span>}
<div className="flex-1 bg-gray-700/60 rounded-full h-1.5">
<div className="h-1.5 rounded-full transition-all" style={{ width: `${pct}%`, backgroundColor: color }} />
</div>
<span className="text-[10px] text-text-secondary w-8 text-right">{pct}%</span>
</div>
);
}
function riskColor(risk: number): string {
if (risk >= 0.45) return '#dc2626';
if (risk >= 0.30) return '#f97316';
if (risk >= 0.15) return '#eab308';
return '#22c55e';
}
function riskLabel(risk: number): string {
if (risk >= 0.45) return 'CRITIQUE';
if (risk >= 0.30) return 'ÉLEVÉ';
if (risk >= 0.15) return 'MODÉRÉ';
return 'SAIN';
}
// ─── Carte cluster (réutilisée dans les 2 vues) ────────────────────────────
function ClusterCard({
node, selected, onClick,
}: {
node: ClusterNode;
selected: boolean;
onClick: () => void;
}) {
const rc = riskColor(node.risk_score);
const rl = riskLabel(node.risk_score);
// Normalisation anomaly_score pour la barre (valeurs ~0.3 max → étirer sur /0.5)
const scoreN = Math.min(1, node.mean_score / 0.5);
return (
<button
onClick={onClick}
className={`w-full text-left rounded-xl border transition-all duration-150 overflow-hidden
${selected
? 'ring-2 ring-offset-1 ring-offset-background-card shadow-lg scale-[1.01]'
: 'hover:border-gray-500 hover:shadow-md'
}`}
style={{
borderColor: selected ? rc : '#374151',
'--tw-ring-color': rc,
} as React.CSSProperties}
>
{/* Bande de couleur en haut */}
<div
className="h-1.5 w-full"
style={{ backgroundColor: rc }}
/>
<div className="p-3 bg-background-card space-y-2.5">
{/* En-tête */}
<div className="flex items-start justify-between gap-2">
<div className="flex-1 min-w-0">
<p className="font-bold text-sm text-text-primary leading-tight">{node.label}</p>
<p className="text-xs text-text-secondary mt-0.5">
<span className="font-semibold text-text-primary">{node.ip_count.toLocaleString()}</span> IPs
{' · '}
<span>{node.hit_count.toLocaleString()}</span> req
</p>
</div>
<div className="flex-shrink-0 text-right">
<span
className="text-xs font-bold px-2 py-0.5 rounded-full text-white"
style={{ backgroundColor: rc }}
>
{rl}
</span>
<p className="text-[10px] text-text-disabled mt-0.5">
risque {Math.round(node.risk_score * 100)}%
</p>
</div>
</div>
{/* Barre de risque */}
<div className="w-full bg-gray-700/60 rounded-full h-1">
<div
className="h-1 rounded-full transition-all"
style={{ width: `${node.risk_score * 100}%`, backgroundColor: rc }}
/>
</div>
{/* 4 métriques clés */}
<div className="space-y-1">
<MiniBar
value={scoreN}
color={scoreN > 0.5 ? '#dc2626' : '#f97316'}
label="Score anomalie"
/>
<MiniBar
value={node.mean_ua_ch}
color={node.mean_ua_ch > 0.7 ? '#dc2626' : '#f97316'}
label="UA-CH mismatch"
/>
<MiniBar
value={Math.min(1, node.mean_fuzzing * 3)}
color="#8b5cf6"
label="Fuzzing"
/>
<MiniBar
value={node.mean_ua_rotating}
color="#ec4899"
label="UA rotatif"
/>
</div>
{/* Stack TCP */}
<div className="flex gap-2 text-[10px] text-text-disabled">
<span>TTL <b className="text-text-secondary">{Math.round(node.mean_ttl)}</b></span>
<span>MSS <b className="text-text-secondary">{Math.round(node.mean_mss)}</b></span>
{node.mean_scale > 0 && <span>Scale <b className="text-text-secondary">{node.mean_scale.toFixed(0)}</b></span>}
</div>
{/* Pays + ASN */}
{node.top_countries.length > 0 && (
<p className="text-[10px] text-text-disabled truncate">
🌍 {node.top_countries.join(' · ')}
</p>
)}
{node.top_orgs.slice(0, 2).map((org, i) => (
<p key={i} className="text-[10px] text-text-disabled truncate">🏢 {org}</p>
)).slice(0, 1)}
</div>
</button>
);
}
// ─── Vue Cartes (défaut) ──────────────────────────────────────────────────────
function CardGridView({
nodes, selectedId, onSelect,
}: {
nodes: ClusterNode[];
selectedId: string | null;
onSelect: (n: ClusterNode) => void;
}) {
const sorted = useMemo(
() => [...nodes].sort((a, b) => b.risk_score - a.risk_score),
[nodes],
);
// Groupes par niveau de risque
const groups = useMemo(() => {
const bots = sorted.filter(n => n.risk_score >= 0.45 || n.label.includes('🤖'));
const warn = sorted.filter(n => n.risk_score >= 0.15 && n.risk_score < 0.45 && !n.label.includes('🤖'));
const safe = sorted.filter(n => n.risk_score < 0.15 && !n.label.includes('🤖'));
return { bots, warn, safe };
}, [sorted]);
function Group({ title, color, nodes: gn }: { title: string; color: string; nodes: ClusterNode[] }) {
if (gn.length === 0) return null;
return (
<div>
<div className="flex items-center gap-2 mb-3">
<div className="h-0.5 flex-1 rounded" style={{ backgroundColor: color }} />
<h3 className="text-xs font-bold uppercase tracking-widest" style={{ color }}>
{title} ({gn.length})
</h3>
<div className="h-0.5 flex-1 rounded" style={{ backgroundColor: color }} />
</div>
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-3">
{gn.map(n => (
<ClusterCard
key={n.id}
node={n}
selected={selectedId === n.id}
onClick={() => onSelect(n)}
/>
))}
</div>
</div>
);
}
return (
<div className="overflow-y-auto flex-1 p-5 space-y-6">
<Group title="Bots & Menaces Confirmées" color="#dc2626" nodes={groups.bots} />
<Group title="Comportements Suspects" color="#f97316" nodes={groups.warn} />
<Group title="Trafic Légitime" color="#22c55e" nodes={groups.safe} />
</div>
);
}
// ─── Nœud ReactFlow (pour la vue Graphe) ─────────────────────────────────────
function GraphCardNode({ data }: NodeProps) {
const rc = riskColor(data.risk_score);
const rl = riskLabel(data.risk_score);
const scoreN = Math.min(1, data.mean_score / 0.5);
return (
<>
<Handle type="target" position={Position.Left} style={{ opacity: 0 }} />
<div
className="rounded-xl border-2 overflow-hidden shadow-lg cursor-pointer select-none"
style={{
borderColor: rc,
width: 220,
backgroundColor: '#1e2533',
boxShadow: data.risk_score > 0.40 ? `0 0 16px ${rc}55` : 'none',
}}
>
<div className="h-1" style={{ backgroundColor: rc }} />
<div className="p-3 space-y-2">
<div className="flex justify-between items-start gap-1">
<p className="text-xs font-bold text-white leading-tight flex-1">{data.label}</p>
<span className="text-[9px] font-bold px-1.5 py-0.5 rounded text-white flex-shrink-0"
style={{ backgroundColor: rc }}>
{rl}
</span>
</div>
<p className="text-[10px] text-gray-400">
<b className="text-white">{data.ip_count.toLocaleString()}</b> IPs ·{' '}
{data.hit_count.toLocaleString()} req
</p>
{/* Barre risque */}
<div className="w-full bg-gray-700 rounded-full h-1">
<div className="h-1 rounded-full" style={{ width: `${data.risk_score * 100}%`, backgroundColor: rc }} />
</div>
{/* Mini métriques */}
<div className="space-y-1">
{[
['Anomalie', scoreN, scoreN > 0.5 ? '#dc2626' : '#f97316'],
['UA-CH', data.mean_ua_ch, '#f97316'],
['Fuzzing', Math.min(1, data.mean_fuzzing * 3), '#8b5cf6'],
].map(([l, v, c]: any) => (
<div key={l} className="flex items-center gap-1.5">
<span className="text-gray-500 text-[9px] w-14">{l}</span>
<div className="flex-1 bg-gray-700 rounded-full h-1">
<div className="h-1 rounded-full" style={{ width: `${v * 100}%`, backgroundColor: c }} />
</div>
<span className="text-gray-400 text-[9px] w-7 text-right">{Math.round(v * 100)}%</span>
</div>
))}
</div>
{data.top_countries?.length > 0 && (
<p className="text-[9px] text-gray-500 truncate">🌍 {data.top_countries.slice(0, 4).join(' · ')}</p>
)}
</div>
</div>
<Handle type="source" position={Position.Right} style={{ opacity: 0 }} />
</>
);
}
const nodeTypes = { graphCard: GraphCardNode };
// ─── Vue Graphe ───────────────────────────────────────────────────────────────
function GraphView({
data, selectedId, onSelect,
}: {
data: ClusteringData;
selectedId: string | null;
onSelect: (n: ClusterNode) => void;
}) {
const [nodes, setNodes, onNodesChange] = useNodesState([]);
const [edges, setEdges, onEdgesChange] = useEdgesState([]);
const { fitView } = useReactFlow();
useEffect(() => {
if (!data) return;
// Layout en colonnes par niveau de menace
// Col 0 → bots (rouge), Col 1 → suspects (orange), Col 2 → légitimes (vert)
const sorted = [...data.nodes].sort((a, b) => b.risk_score - a.risk_score);
const col: ClusterNode[][] = [[], [], []];
for (const n of sorted) {
if (n.risk_score >= 0.45 || n.label.includes('🤖')) col[0].push(n);
else if (n.risk_score >= 0.15) col[1].push(n);
else col[2].push(n);
}
const NODE_W = 240;
const NODE_H = 170;
const PAD_X = 80;
const PAD_Y = 40;
const COL_GAP = 80;
const rfNodes: Node[] = [];
col.forEach((group, ci) => {
group.forEach((n, ri) => {
rfNodes.push({
id: n.id,
type: 'graphCard',
position: {
x: ci * (NODE_W + COL_GAP) + PAD_X,
y: ri * (NODE_H + PAD_Y) + PAD_Y,
},
data: n,
draggable: true,
selected: n.id === selectedId,
});
});
});
// Arêtes avec couleur par similarité
const rfEdges: Edge[] = data.edges.map(e => {
const sim = e.similarity;
return {
id: e.id,
source: e.source,
target: e.target,
style: {
stroke: sim > 0.6 ? '#f97316' : sim > 0.4 ? '#6b7280' : '#374151',
strokeWidth: Math.max(1, e.weight * 0.5),
strokeDasharray: sim < 0.4 ? '4 4' : undefined,
},
label: sim > 0.55 ? `${Math.round(sim * 100)}%` : undefined,
labelStyle: { fontSize: 9, fill: '#9ca3af' },
labelBgStyle: { fill: '#0f1117aa', borderRadius: 3 },
animated: sim > 0.6,
};
});
setNodes(rfNodes);
setEdges(rfEdges);
setTimeout(() => fitView({ padding: 0.08 }), 120);
}, [data, selectedId]);
return (
<div className="flex-1 relative">
<ReactFlow
nodes={nodes}
edges={edges}
onNodesChange={onNodesChange}
onEdgesChange={onEdgesChange}
onNodeClick={(_, node) => onSelect(node.data as ClusterNode)}
nodeTypes={nodeTypes}
fitView
minZoom={0.12}
maxZoom={2.5}
attributionPosition="bottom-right"
>
<Background color="#ffffff07" gap={30} />
<Controls />
<MiniMap
nodeColor={n => riskColor((n.data as any)?.risk_score ?? 0)}
style={{ background: '#0f1117', border: '1px solid #374151' }}
/>
{/* Légende colonnes */}
<Panel position="top-center">
<div className="flex gap-6 text-xs text-white/70 bg-background-card/90 rounded-lg px-5 py-2 shadow">
{[
{ color: '#dc2626', label: '🤖 Bots / Menaces', col: 0 },
{ color: '#f97316', label: '⚠️ Suspects', col: 1 },
{ color: '#22c55e', label: '✅ Légitimes', col: 2 },
].map(({ color, label }) => (
<div key={label} className="flex items-center gap-1.5">
<div className="w-3 h-3 rounded-sm" style={{ backgroundColor: color }} />
<span>{label}</span>
</div>
))}
<span className="text-white/40 ml-2"> similaire · - - différent · animé=fort</span>
</div>
</Panel>
<Panel position="top-left">
<div className="text-[10px] text-text-disabled bg-background-card/80 rounded p-2 space-y-0.5">
<p className="font-semibold text-text-secondary">K-means++ · 21 features</p>
<p>Colonnes : niveau de risque</p>
<p>Arêtes : similarité des centroides</p>
</div>
</Panel>
</ReactFlow>
</div>
);
}
// ─── Sidebar détail cluster ────────────────────────────────────────────────────
const RADAR_FEATURES_SET = new Set(RADAR_FEATURES);
function ClusterSidebar({ cluster, onClose }: { cluster: ClusterNode; onClose: () => void }) {
const [ips, setIPs] = useState<ClusterIP[]>([]);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(false);
const [copied, setCopied] = useState(false);
useEffect(() => {
setLoading(true);
fetch(`/api/clustering/cluster/${cluster.id}/ips?limit=80`)
.then(r => r.json())
.then(d => { setIPs(d.ips || []); setTotal(d.total || 0); })
.catch(() => {})
.finally(() => setLoading(false));
}, [cluster.id]);
const copyIPs = () => {
navigator.clipboard.writeText(ips.map(i => i.ip).join('\n'));
setCopied(true);
setTimeout(() => setCopied(false), 2000);
};
const downloadCSV = () => {
const header = 'IP,JA4,TTL,MSS,Hits,Score,Menace,Pays,ASN,Fuzzing,Vélocité\n';
const rows = ips.map(i =>
[i.ip, i.ja4, i.tcp_ttl, i.tcp_mss, i.hits,
i.avg_score.toFixed(3), i.threat_level, i.country_code,
`"${i.asn_org}"`, i.fuzzing.toFixed(2), i.velocity.toFixed(2)].join(',')
).join('\n');
const blob = new Blob([header + rows], { type: 'text/csv' });
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = `cluster_${cluster.id}.csv`;
a.click();
};
const rc = riskColor(cluster.risk_score);
const radarData = cluster.radar
.filter(r => RADAR_FEATURES_SET.has(r.feature))
.map(r => ({ subject: r.feature.replace('Vélocité (rps)', 'Vélocité'), val: Math.round(r.value * 100) }));
return (
<div className="w-[420px] flex-shrink-0 bg-background-card border-l border-gray-700 shadow-2xl flex flex-col overflow-hidden">
{/* Header */}
<div className="p-4 border-b border-gray-700 flex-shrink-0" style={{ borderLeftWidth: 4, borderLeftColor: rc }}>
<div className="flex items-start justify-between">
<div>
<p className="font-bold text-base text-text-primary">{cluster.label}</p>
<p className="text-sm text-text-secondary mt-1">
<b className="text-text-primary">{cluster.ip_count.toLocaleString()}</b> IPs ·{' '}
<b className="text-text-primary">{cluster.hit_count.toLocaleString()}</b> requêtes
</p>
</div>
<button onClick={onClose} className="text-text-secondary hover:text-text-primary text-lg leading-none ml-4 mt-1"></button>
</div>
{/* Risque */}
<div className="mt-3">
<div className="flex justify-between text-xs text-text-secondary mb-1">
<span>Score de risque</span>
<span className="font-bold" style={{ color: rc }}>{Math.round(cluster.risk_score * 100)}% {riskLabel(cluster.risk_score)}</span>
</div>
<div className="w-full bg-gray-700 rounded-full h-2">
<div className="h-2 rounded-full" style={{ width: `${cluster.risk_score * 100}%`, backgroundColor: rc }} />
</div>
</div>
</div>
<div className="overflow-y-auto flex-1 p-4 space-y-5">
{/* Radar */}
<div>
<p className="text-xs font-semibold text-text-secondary mb-2 uppercase tracking-wider">Profil Comportemental</p>
<ResponsiveContainer width="100%" height={210}>
<RadarChart data={radarData} margin={{ top: 10, right: 25, bottom: 10, left: 25 }}>
<PolarGrid stroke="#ffffff18" />
<PolarAngleAxis dataKey="subject" tick={{ fontSize: 9, fill: '#9ca3af' }} />
<PolarRadiusAxis angle={90} domain={[0, 100]} tick={false} axisLine={false} />
<Radar dataKey="val" stroke={rc} fill={rc} fillOpacity={0.30} />
<RechartsTooltip
contentStyle={{ background: '#1e2533', border: 'none', fontSize: 11, borderRadius: 8 }}
formatter={(v: number) => [`${v}%`]}
/>
</RadarChart>
</ResponsiveContainer>
</div>
{/* Métriques */}
<div>
<p className="text-xs font-semibold text-text-secondary mb-2 uppercase tracking-wider">Toutes les métriques</p>
<div className="space-y-1.5">
{[
['Score anomalie ML', Math.min(1, cluster.mean_score / 0.5), rc],
['UA-CH mismatch', cluster.mean_ua_ch, '#f97316'],
['UA rotatif', cluster.mean_ua_rotating, '#ec4899'],
['Fuzzing', Math.min(1, cluster.mean_fuzzing * 3), '#8b5cf6'],
['Headless', cluster.mean_headless, '#dc2626'],
['Vélocité', cluster.mean_velocity, '#6366f1'],
['ALPN mismatch', cluster.mean_alpn_mismatch, '#14b8a6'],
['IP-ID zéro', cluster.mean_ip_id_zero, '#f59e0b'],
['Entropie temporelle',cluster.mean_entropy, '#06b6d4'],
['Browser score', Math.min(1, cluster.mean_browser_score / 50), '#22c55e'],
].map(([lbl, val, col]: any) => (
<MiniBar key={String(lbl)} label={String(lbl)} value={val as number} color={col as string} />
))}
</div>
</div>
{/* TCP */}
<div>
<p className="text-xs font-semibold text-text-secondary mb-2 uppercase tracking-wider">Stack TCP</p>
<div className="grid grid-cols-3 gap-2">
{[
['TTL Initial', Math.round(cluster.mean_ttl)],
['MSS', Math.round(cluster.mean_mss)],
['Scale', cluster.mean_scale.toFixed(1)],
].map(([k, v]) => (
<div key={String(k)} className="bg-background-secondary rounded-lg p-2 text-center">
<p className="text-[10px] text-text-disabled">{k}</p>
<p className="font-bold text-text-primary">{v}</p>
</div>
))}
</div>
</div>
{/* Meta */}
<div className="space-y-2 text-xs">
{cluster.top_threat && (
<div className="flex items-center gap-2">
<span className="text-text-disabled">Menace dominante</span>
<ThreatBadge level={cluster.top_threat} />
</div>
)}
{cluster.top_countries.length > 0 && (
<p><span className="text-text-disabled">Pays : </span>
<span className="text-text-primary">{cluster.top_countries.join(', ')}</span></p>
)}
{cluster.top_orgs.length > 0 && (
<div className="space-y-0.5">
<span className="text-text-disabled">ASN :</span>
{cluster.top_orgs.slice(0, 3).map((org, i) => (
<p key={i} className="text-text-secondary pl-2"> {org}</p>
))}
</div>
)}
{cluster.sample_ua && (
<div>
<span className="text-text-disabled">User-Agent type : </span>
<p className="text-text-secondary break-all text-[10px] mt-1 pl-2 border-l border-gray-600">{cluster.sample_ua}</p>
</div>
)}
</div>
{/* Actions */}
<div className="flex gap-2 sticky bottom-0 bg-background-card py-2">
<button onClick={copyIPs}
className="flex-1 py-2 text-xs rounded-lg bg-accent-primary text-white hover:opacity-80">
{copied ? '✓ Copié !' : `📋 Copier IPs (${total.toLocaleString()})`}
</button>
<button onClick={downloadCSV}
className="flex-1 py-2 text-xs rounded-lg bg-gray-700 text-white hover:bg-gray-600">
CSV
</button>
</div>
{/* Liste IPs */}
<div>
<p className="text-xs font-semibold text-text-secondary mb-2 uppercase tracking-wider">
Adresses IP ({loading ? '…' : `${ips.length} / ${total.toLocaleString()}`})
</p>
{loading ? (
<p className="text-text-disabled text-xs">Chargement</p>
) : (
<div className="space-y-1">
{ips.map((ip, i) => (
<div key={i} className="bg-background-secondary rounded-lg p-2 text-xs">
<div className="flex justify-between items-center">
<span className="font-mono text-accent-primary">{ip.ip}</span>
<div className="flex gap-1 items-center">
<ThreatBadge level={ip.threat_level} />
{ip.country_code && <span className="text-text-disabled">{ip.country_code}</span>}
</div>
</div>
<div className="flex gap-3 mt-1 text-text-disabled text-[10px]">
<span>TTL {ip.tcp_ttl}</span>
<span>MSS {ip.tcp_mss}</span>
<span>{ip.hits.toLocaleString()} req</span>
{ip.avg_score > 0.1 && (
<span className="text-orange-400"> {(ip.avg_score * 100).toFixed(0)}%</span>
)}
{ip.asn_org && <span className="truncate max-w-[100px]">{ip.asn_org}</span>}
</div>
</div>
))}
</div>
)}
</div>
</div>
</div>
);
}
// ─── Vue Graphe (wrapper avec ReactFlowProvider) ───────────────────────────────
function GraphViewWrapper({
data, selectedId, onSelect,
}: {
data: ClusteringData;
selectedId: string | null;
onSelect: (n: ClusterNode) => void;
}) {
return (
<ReactFlowProvider>
<GraphView data={data} selectedId={selectedId} onSelect={onSelect} />
</ReactFlowProvider>
);
}
// ─── Composant principal ─────────────────────────────────────────────────────
export default function ClusteringView() {
const [data, setData] = useState<ClusteringData | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState('');
const [k, setK] = useState(14);
const [pendingK, setPendingK] = useState(14);
const [view, setView] = useState<'cards' | 'graph'>('cards');
const [selected, setSelected] = useState<ClusterNode | null>(null);
const fetchData = useCallback(async (kVal: number) => {
setLoading(true);
setError('');
setSelected(null);
try {
const r = await fetch(`/api/clustering/clusters?k=${kVal}&n_samples=3000`);
if (!r.ok) throw new Error(await r.text());
setData(await r.json());
} catch (e: any) {
setError(e.message || 'Erreur réseau');
} finally {
setLoading(false);
}
}, []);
useEffect(() => { fetchData(k); }, []);
const applyK = () => { setK(pendingK); fetchData(pendingK); };
const stats = data?.stats;
return (
<div className="relative flex flex-col h-full bg-background overflow-hidden">
{/* ── Barre de contrôle ── */}
<div className="flex-none px-5 py-2.5 bg-background-card border-b border-gray-700 flex flex-wrap items-center gap-4 z-10">
{/* Slider k */}
<div className="flex items-center gap-2">
<span className="text-sm text-text-secondary">k =</span>
<input type="range" min={4} max={30} value={pendingK}
onChange={e => setPendingK(Number(e.target.value))}
className="w-24 accent-indigo-500" />
<span className="text-sm font-bold text-text-primary w-6">{pendingK}</span>
<button onClick={applyK} disabled={loading}
className="text-xs px-3 py-1.5 rounded-lg bg-accent-primary text-white hover:opacity-80 disabled:opacity-40">
{loading ? '⏳ …' : '▶ Calculer'}
</button>
</div>
{/* Onglets vue */}
<div className="flex rounded-lg overflow-hidden border border-gray-600">
{(['cards', 'graph'] as const).map(v => (
<button
key={v}
onClick={() => setView(v)}
className={`text-xs px-3 py-1.5 transition-colors ${
view === v
? 'bg-accent-primary text-white'
: 'bg-background-secondary text-text-secondary hover:text-text-primary'
}`}
>
{v === 'cards' ? '⊞ Tableau de bord' : '⬡ Graphe de relations'}
</button>
))}
</div>
{/* Stats */}
{stats && !loading && (
<div className="flex gap-4 ml-auto text-xs flex-wrap">
<Stat label="clusters" value={stats.total_clusters} />
<Stat label="IPs" value={stats.total_ips.toLocaleString()} />
<Stat label="bots" value={stats.bot_ips.toLocaleString()} color="text-red-400" />
<Stat label="suspects" value={stats.high_risk_ips.toLocaleString()} color="text-orange-400" />
<span className="text-text-disabled">{stats.elapsed_s}s</span>
</div>
)}
</div>
{/* ── Erreur ── */}
{error && (
<div className="absolute inset-0 flex items-center justify-center z-20">
<div className="bg-red-900/90 text-white rounded-2xl p-8 text-center max-w-sm">
<p className="text-4xl mb-3"></p>
<p className="font-bold">Erreur de clustering</p>
<p className="text-sm text-red-300 mt-2">{error}</p>
<button onClick={() => fetchData(k)} className="mt-4 text-sm px-4 py-2 bg-red-600 rounded-lg hover:bg-red-500">
Réessayer
</button>
</div>
</div>
)}
{/* ── Chargement ── */}
{loading && (
<div className="absolute inset-0 flex items-center justify-center z-20 bg-background/75 backdrop-blur-sm">
<div className="text-center">
<div className="text-5xl animate-spin mb-4"></div>
<p className="text-text-primary font-semibold">Calcul K-means++ en cours</p>
<p className="text-text-disabled text-sm mt-1">Normalisation 21 features · PCA-2D · Nommage automatique</p>
</div>
</div>
)}
{/* ── Contenu principal ── */}
{data && !loading && (
<div className="flex-1 flex overflow-hidden">
{view === 'cards' ? (
<CardGridView
nodes={data.nodes}
selectedId={selected?.id ?? null}
onSelect={n => setSelected(prev => prev?.id === n.id ? null : n)}
/>
) : (
<GraphViewWrapper
data={data}
selectedId={selected?.id ?? null}
onSelect={n => setSelected(prev => prev?.id === n.id ? null : n)}
/>
)}
{/* Sidebar */}
{selected && (
<ClusterSidebar
cluster={selected}
onClose={() => setSelected(null)}
/>
)}
</div>
)}
</div>
);
}
// ─── Petit composant stat ─────────────────────────────────────────────────────
function Stat({ label, value, color = 'text-text-primary' }: { label: string; value: string | number; color?: string }) {
return (
<span className="text-text-secondary">
<b className={color}>{value}</b> {label}
</span>
);
}

View File

@ -9,9 +9,12 @@ interface TcpSpoofingOverview {
unique_ips: number; unique_ips: number;
no_tcp_data: number; no_tcp_data: number;
with_tcp_data: number; with_tcp_data: number;
linux_fingerprint: number; linux_mac_fingerprint: number;
windows_fingerprint: number; windows_fingerprint: number;
cisco_bsd_fingerprint: number;
bot_scanner_fingerprint: number;
ttl_distribution: { ttl: number; count: number; ips: number }[]; ttl_distribution: { ttl: number; count: number; ips: number }[];
mss_distribution: { mss: number; count: number; ips: number }[];
window_size_distribution: { window_size: number; count: number }[]; window_size_distribution: { window_size: number; count: number }[];
} }
@ -20,10 +23,19 @@ interface TcpSpoofingItem {
ja4: string; ja4: string;
tcp_ttl: number; tcp_ttl: number;
tcp_window_size: number; tcp_window_size: number;
tcp_win_scale: number;
tcp_mss: number;
hits: number;
first_ua: string; first_ua: string;
suspected_os: string; suspected_os: string;
initial_ttl: number;
hop_count: number;
confidence: number;
network_path: string;
is_bot_tool: boolean;
declared_os: string; declared_os: string;
spoof_flag: boolean; spoof_flag: boolean;
spoof_reason: string;
} }
interface OsMatrixEntry { interface OsMatrixEntry {
@ -31,6 +43,7 @@ interface OsMatrixEntry {
declared_os: string; declared_os: string;
count: number; count: number;
is_spoof: boolean; is_spoof: boolean;
is_bot_tool: boolean;
} }
type ActiveTab = 'detections' | 'matrix'; type ActiveTab = 'detections' | 'matrix';
@ -41,12 +54,49 @@ function formatNumber(n: number): string {
return n.toLocaleString('fr-FR'); return n.toLocaleString('fr-FR');
} }
function ttlColor(ttl: number): string { function confidenceBar(conf: number): JSX.Element {
if (ttl === 0) return 'text-threat-critical'; const pct = Math.round(conf * 100);
if (ttl < 48 || ttl > 200) return 'text-threat-critical'; const color =
if (ttl < 60 || (ttl > 70 && ttl <= 80)) return 'text-threat-medium'; pct >= 85 ? 'bg-threat-low' :
if (ttl >= 60 && ttl <= 70) return 'text-threat-low'; pct >= 65 ? 'bg-threat-medium' :
return 'text-text-secondary'; pct >= 45 ? 'bg-accent-primary' :
'bg-text-disabled';
return (
<div className="flex items-center gap-2">
<div className="h-1.5 w-16 bg-background-secondary rounded-full overflow-hidden">
<div className={`h-full rounded-full ${color}`} style={{ width: `${pct}%` }} />
</div>
<span className="text-xs text-text-secondary">{pct}%</span>
</div>
);
}
function mssLabel(mss: number): string {
if (mss >= 1460) return 'Ethernet';
if (mss >= 1452) return 'PPPoE';
if (mss >= 1420) return 'VPN';
if (mss >= 1380) return 'VPN/Tunnel';
if (mss > 0) return 'Bas débit';
return '—';
}
function mssColor(mss: number): string {
if (mss >= 1460) return 'text-threat-low';
if (mss >= 1436) return 'text-text-secondary';
if (mss >= 1380) return 'text-threat-medium';
return 'text-threat-critical';
}
function osIcon(name: string): string {
const n = name.toLowerCase();
if (n.includes('bot') || n.includes('scanner') || n.includes('mirai') || n.includes('zmap')) return '🤖';
if (n.includes('windows')) return '🪟';
if (n.includes('ios') || n.includes('macos')) return '🍎';
if (n.includes('android')) return '🤖';
if (n.includes('linux')) return '🐧';
if (n.includes('cisco') || n.includes('cdn') || n.includes('réseau')) return '🔌';
if (n.includes('bsd')) return '😈';
return '❓';
} }
// ─── Sub-components ─────────────────────────────────────────────────────────── // ─── Sub-components ───────────────────────────────────────────────────────────
@ -92,50 +142,84 @@ function TcpDetectionsTable({
render: (v: string) => <span className="font-mono text-xs text-text-primary">{v}</span>, render: (v: string) => <span className="font-mono text-xs text-text-primary">{v}</span>,
}, },
{ {
key: 'ja4', key: 'tcp_ttl',
label: 'JA4', label: 'TTL obs. / init.',
render: (v: string) => ( align: 'right',
<span className="font-mono text-xs text-text-secondary"> render: (_: number, row: TcpSpoofingItem) => (
{v ? `${v.slice(0, 14)}` : '—'} <span className="font-mono text-xs">
<span className="text-text-secondary">{row.tcp_ttl}</span>
<span className="text-text-disabled mx-1">/</span>
<span className="text-accent-primary font-semibold">{row.initial_ttl}</span>
{row.hop_count >= 0 && (
<span className="text-text-disabled ml-1 text-[10px]">({row.hop_count} hops)</span>
)}
</span> </span>
), ),
}, },
{ {
key: 'tcp_ttl', key: 'tcp_mss',
label: 'TTL observé', label: 'MSS',
align: 'right', align: 'right',
render: (v: number) => ( render: (v: number) => (
<span className={`font-mono font-semibold ${ttlColor(v)}`}>{v}</span> <span className={`font-mono text-xs ${mssColor(v)}`} title={mssLabel(v)}>
{v || '—'} <span className="text-[10px] text-text-disabled">{mssLabel(v)}</span>
</span>
), ),
}, },
{ {
key: 'tcp_window_size', key: 'tcp_win_scale',
label: 'Fenêtre TCP', label: 'Scale',
align: 'right', align: 'right',
render: (v: number) => ( render: (v: number) => (
<span className="text-text-secondary text-xs">{formatNumber(v)}</span> <span className="font-mono text-xs text-text-secondary">{v}</span>
), ),
}, },
{ {
key: 'suspected_os', key: 'suspected_os',
label: 'OS suspecté', label: 'OS suspecté (TCP)',
render: (v: string) => <span className="text-text-primary text-xs">{v || '—'}</span>, render: (v: string, row: TcpSpoofingItem) => (
<span className={`text-xs flex items-center gap-1 ${row.is_bot_tool ? 'text-threat-critical font-semibold' : 'text-text-primary'}`}>
<span>{osIcon(v)}</span>
<span>{v || '—'}</span>
</span>
),
},
{
key: 'confidence',
label: 'Confiance',
render: (v: number) => confidenceBar(v),
},
{
key: 'network_path',
label: 'Réseau',
render: (v: string) => <span className="text-xs text-text-secondary">{v || '—'}</span>,
}, },
{ {
key: 'declared_os', key: 'declared_os',
label: 'OS déclaré', label: 'OS déclaré (UA)',
render: (v: string) => <span className="text-text-secondary text-xs">{v || '—'}</span>, render: (v: string) => <span className="text-xs text-text-secondary">{v || '—'}</span>,
}, },
{ {
key: 'spoof_flag', key: 'spoof_flag',
label: 'Spoof', label: 'Verdict',
sortable: false, sortable: false,
render: (v: boolean) => render: (v: boolean, row: TcpSpoofingItem) => {
v ? ( if (row.is_bot_tool) {
<span className="bg-threat-critical/20 text-threat-critical text-xs px-2 py-0.5 rounded-full"> return (
<span className="bg-threat-critical/20 text-threat-critical text-xs px-2 py-0.5 rounded-full whitespace-nowrap" title={row.spoof_reason}>
🤖 Bot/Scanner
</span>
);
}
if (v) {
return (
<span className="bg-threat-high/20 text-threat-high text-xs px-2 py-0.5 rounded-full whitespace-nowrap" title={row.spoof_reason}>
🚨 Spoof 🚨 Spoof
</span> </span>
) : null, );
}
return null;
},
}, },
{ {
key: '_actions', key: '_actions',
@ -157,7 +241,7 @@ function TcpDetectionsTable({
data={items} data={items}
columns={columns} columns={columns}
rowKey="ip" rowKey="ip"
defaultSortKey="tcp_ttl" defaultSortKey="hits"
emptyMessage="Aucune détection" emptyMessage="Aucune détection"
compact compact
/> />
@ -248,11 +332,12 @@ export function TcpSpoofingView() {
const filteredItems = items.filter( const filteredItems = items.filter(
(item) => (item) =>
(!spoofOnly || item.spoof_flag) && (!spoofOnly || item.spoof_flag || item.is_bot_tool) &&
(!filterText || (!filterText ||
item.ip.includes(filterText) || item.ip.includes(filterText) ||
item.suspected_os.toLowerCase().includes(filterText.toLowerCase()) || item.suspected_os.toLowerCase().includes(filterText.toLowerCase()) ||
item.declared_os.toLowerCase().includes(filterText.toLowerCase())) item.declared_os.toLowerCase().includes(filterText.toLowerCase()) ||
item.network_path.toLowerCase().includes(filterText.toLowerCase()))
); );
// Build matrix axes // Build matrix axes
@ -280,7 +365,7 @@ export function TcpSpoofingView() {
<div> <div>
<h1 className="text-2xl font-bold text-text-primary">🧬 Spoofing TCP/OS</h1> <h1 className="text-2xl font-bold text-text-primary">🧬 Spoofing TCP/OS</h1>
<p className="text-text-secondary mt-1"> <p className="text-text-secondary mt-1">
Détection des incohérences entre TTL/fenêtre TCP et l'OS déclaré. Fingerprinting multi-signal (TTL + MSS + fenêtre + scale) détection bots, spoofs et anomalies TCP.
</p> </p>
</div> </div>
@ -292,15 +377,60 @@ export function TcpSpoofingView() {
) : overview ? ( ) : overview ? (
<> <>
<div className="grid grid-cols-4 gap-4"> <div className="grid grid-cols-4 gap-4">
<StatCard label="Total entrées" value={formatNumber(overview.total_entries)} accent="text-text-primary" /> <StatCard label="Avec données TCP" value={formatNumber(overview.with_tcp_data)} accent="text-text-primary" />
<StatCard label="Avec données TCP" value={formatNumber(overview.with_tcp_data)} accent="text-threat-medium" /> <StatCard label="Fingerprint Linux/Mac" value={formatNumber(overview.linux_mac_fingerprint)} accent="text-threat-low" />
<StatCard label="Fingerprint Linux" value={formatNumber(overview.linux_fingerprint)} accent="text-threat-low" />
<StatCard label="Fingerprint Windows" value={formatNumber(overview.windows_fingerprint)} accent="text-accent-primary" /> <StatCard label="Fingerprint Windows" value={formatNumber(overview.windows_fingerprint)} accent="text-accent-primary" />
<StatCard label="🤖 Bots/Scanners détectés" value={formatNumber(overview.bot_scanner_fingerprint)} accent="text-threat-critical" />
</div>
<div className="grid grid-cols-2 gap-4">
{/* Distribution MSS */}
<div className="bg-background-card border border-border rounded-lg p-4">
<h3 className="text-sm font-semibold text-text-primary mb-3">Distribution MSS (type de réseau)</h3>
<div className="space-y-1.5">
{overview.mss_distribution.map((m) => {
const label = m.mss >= 1460 ? 'Ethernet' : m.mss >= 1452 ? 'PPPoE' : m.mss >= 1420 ? 'VPN léger' : m.mss >= 1380 ? 'VPN/Tunnel' : 'Bas débit';
const color = m.mss >= 1460 ? 'bg-threat-low' : m.mss >= 1436 ? 'bg-accent-primary' : m.mss >= 1380 ? 'bg-threat-medium' : 'bg-threat-critical';
const maxCount = overview.mss_distribution[0]?.count || 1;
return (
<div key={m.mss} className="flex items-center gap-2 text-xs">
<span className="text-text-disabled w-12 text-right font-mono">{m.mss}</span>
<div className="flex-1 h-2 bg-background-secondary rounded-full overflow-hidden">
<div className={`h-full rounded-full ${color}`} style={{ width: `${(m.count / maxCount) * 100}%` }} />
</div>
<span className="text-text-secondary w-16">{formatNumber(m.count)}</span>
<span className="text-text-disabled w-20">{label}</span>
</div>
);
})}
</div>
</div>
{/* Distribution TTL */}
<div className="bg-background-card border border-border rounded-lg p-4">
<h3 className="text-sm font-semibold text-text-primary mb-3">Distribution TTL observé</h3>
<div className="space-y-1.5">
{overview.ttl_distribution.map((t) => {
const family = t.ttl <= 64 ? 'Linux/Mac' : t.ttl <= 128 ? 'Windows' : 'Cisco/BSD';
const color = t.ttl <= 64 ? 'bg-threat-low' : t.ttl <= 128 ? 'bg-accent-primary' : 'bg-threat-medium';
const maxCount = overview.ttl_distribution[0]?.count || 1;
return (
<div key={t.ttl} className="flex items-center gap-2 text-xs">
<span className="text-text-disabled w-8 text-right font-mono">{t.ttl}</span>
<div className="flex-1 h-2 bg-background-secondary rounded-full overflow-hidden">
<div className={`h-full rounded-full ${color}`} style={{ width: `${(t.count / maxCount) * 100}%` }} />
</div>
<span className="text-text-secondary w-16">{formatNumber(t.count)}</span>
<span className="text-text-disabled w-20">{family}</span>
</div>
);
})}
</div>
</div>
</div> </div>
<div className="bg-background-card border border-border rounded-lg px-4 py-3 text-sm text-text-secondary flex items-center gap-2"> <div className="bg-background-card border border-border rounded-lg px-4 py-3 text-sm text-text-secondary flex items-center gap-2">
<span className="text-threat-medium"></span> <span className="text-threat-medium"></span>
<span> <span>
<strong className="text-text-primary">{formatNumber(overview.no_tcp_data)}</strong> entrées sans données TCP (TTL=0, passées par proxy/CDN) — exclues de l'analyse de corrélation. <strong className="text-text-primary">{formatNumber(overview.no_tcp_data)}</strong> entrées sans données TCP (passées par proxy/CDN) exclues.{' '}
<strong className="text-threat-critical">{formatNumber(overview.bot_scanner_fingerprint)}</strong> entrées avec signature Masscan/scanner identifiée (win=5808, mss=1452, scale=4).
</span> </span>
</div> </div>
</> </>
@ -341,7 +471,7 @@ export function TcpSpoofingView() {
onChange={(e) => setSpoofOnly(e.target.checked)} onChange={(e) => setSpoofOnly(e.target.checked)}
className="accent-accent-primary" className="accent-accent-primary"
/> />
Spoofs uniquement (TTL corrélé + OS mismatch) Spoofs &amp; Bots uniquement (corrélation confirmée)
</label> </label>
</div> </div>
<div className="bg-background-secondary rounded-lg border border-border overflow-hidden"> <div className="bg-background-secondary rounded-lg border border-border overflow-hidden">
@ -398,17 +528,24 @@ export function TcpSpoofingView() {
const dos = declaredOSes[ci]; const dos = declaredOSes[ci];
const entry = matrix.find((e) => e.suspected_os === sos && e.declared_os === dos); const entry = matrix.find((e) => e.suspected_os === sos && e.declared_os === dos);
const isSpoofCell = entry?.is_spoof ?? false; const isSpoofCell = entry?.is_spoof ?? false;
const isBotCell = entry?.is_bot_tool ?? false;
return ( return (
<td <td
key={ci} key={ci}
className={`px-3 py-2 text-center border border-border font-mono ${ className={`px-3 py-2 text-center border border-border font-mono ${
isSpoofCell && count > 0 isBotCell && count > 0
? 'bg-threat-critical/25 text-threat-critical font-bold' ? 'bg-threat-critical/30 text-threat-critical font-bold'
: isSpoofCell && count > 0
? 'bg-threat-high/25 text-threat-high font-bold'
: matrixCellColor(count) + (count > 0 ? ' text-text-primary' : ' text-text-disabled') : matrixCellColor(count) + (count > 0 ? ' text-text-primary' : ' text-text-disabled')
}`} }`}
title={isSpoofCell ? '🚨 OS mismatch confirmé' : undefined} title={isBotCell ? '🤖 Outil de scan/bot' : isSpoofCell ? '🚨 OS mismatch confirmé' : undefined}
> >
{count > 0 ? (isSpoofCell ? `🚨 ${formatNumber(count)}` : formatNumber(count)) : '—'} {count > 0
? isBotCell ? `🤖 ${formatNumber(count)}`
: isSpoofCell ? `🚨 ${formatNumber(count)}`
: formatNumber(count)
: '—'}
</td> </td>
); );
})} })}