From e2db8ca84e4e3090cb790988c5ba49d840453e36 Mon Sep 17 00:00:00 2001 From: SOC Analyst Date: Wed, 18 Mar 2026 18:22:57 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20clustering=20multi-m=C3=A9triques=20+?= =?UTF-8?q?=20TCP=20fingerprinting=20am=C3=A9lior=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TCP fingerprinting: 20 signatures OS (p0f-style), scoring multi-signal TTL/MSS/scale/fenêtre, détection Masscan 97% confiance, réseau path (Ethernet/PPPoE/VPN/Tunnel), estimation hop-count - Clustering IPs: K-means++ (Arthur & Vassilvitskii 2007) sur 21 features TCP stack + anomalie ML + TLS/protocole + navigateur + temporel PCA-2D par puissance itérative (Hotelling) pour positionnement - Visualisation redesign: 2 vues lisibles - Tableau de bord: grille de cartes groupées par niveau de risque (Bots / Suspects / Légitimes), métriques clés + mini-barres - Graphe de relations: ReactFlow avec nœuds-cartes en colonnes par niveau de menace, arêtes colorées par similarité, légende - Sidebar: RadarChart comportemental + toutes métriques + export CSV Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- backend/main.py | 3 +- backend/routes/clustering.py | 458 +++++++++++ backend/routes/investigation_summary.py | 54 +- backend/routes/tcp_spoofing.py | 279 +++---- backend/services/clustering_engine.py | 328 ++++++++ backend/services/tcp_fingerprint.py | 436 ++++++++++ frontend/src/App.tsx | 6 +- frontend/src/components/ClusteringView.tsx | 847 ++++++++++++++++++++ frontend/src/components/TcpSpoofingView.tsx | 221 ++++- 9 files changed, 2430 insertions(+), 202 deletions(-) create mode 100644 backend/routes/clustering.py create mode 100644 backend/services/clustering_engine.py create mode 100644 backend/services/tcp_fingerprint.py create mode 100644 frontend/src/components/ClusteringView.tsx diff --git a/backend/main.py b/backend/main.py index 3c94346..9afeffc 100644 --- a/backend/main.py +++ b/backend/main.py @@ -13,7 +13,7 @@ import os from .config import settings from .database import db from .routes import metrics, detections, variability, attributes, analysis, entities, incidents, audit, reputation, fingerprints -from .routes import bruteforce, tcp_spoofing, header_fingerprint, heatmap, botnets, rotation, ml_features, investigation_summary, search +from .routes import bruteforce, tcp_spoofing, header_fingerprint, heatmap, botnets, rotation, ml_features, investigation_summary, search, clustering # Configuration logging logging.basicConfig( @@ -84,6 +84,7 @@ app.include_router(rotation.router) app.include_router(ml_features.router) app.include_router(investigation_summary.router) app.include_router(search.router) +app.include_router(clustering.router) # Route pour servir le frontend diff --git a/backend/routes/clustering.py b/backend/routes/clustering.py new file mode 100644 index 0000000..611f7fe --- /dev/null +++ b/backend/routes/clustering.py @@ -0,0 +1,458 @@ +""" +Clustering d'IPs multi-métriques — backend ReactFlow. + +Features utilisées (21 dimensions) : + TCP stack : TTL initial, MSS, scale, fenêtre TCP + Comportement : vélocité, POST ratio, fuzzing, assets, accès direct + Anomalie ML : score, IP-ID zéro + TLS/Protocole: ALPN mismatch, ALPN absent, efficacité H2 + Navigateur : browser score, headless, ordre headers, UA-CH mismatch + Temporel : entropie, diversité JA4, UA rotatif + +Algorithme : + 1. Échantillonnage stratifié (top détections + top hits) + 2. Construction + normalisation des vecteurs de features + 3. K-means++ (Arthur & Vassilvitskii, 2007) + 4. PCA-2D par power iteration pour les positions ReactFlow + 5. Nommage automatique par features dominantes du centroïde + 6. Calcul des arêtes : k-NN dans l'espace des features +""" +from __future__ import annotations +import math +import time +import hashlib +from typing import Optional +from fastapi import APIRouter, HTTPException, Query + +from ..database import db +from ..services.clustering_engine import ( + FEATURES, FEATURE_KEYS, FEATURE_NORMS, FEATURE_NAMES, N_FEATURES, + build_feature_vector, kmeans_pp, pca_2d, + name_cluster, risk_score_from_centroid, _mean_vec, +) + +router = APIRouter(prefix="/api/clustering", tags=["clustering"]) + +# ─── Cache en mémoire ───────────────────────────────────────────────────────── +# Stocke (cluster_id → liste d'IPs) pour le drill-down +# + timestamp de dernière mise à jour +_cache: dict = { + "assignments": {}, # ip+ja4 → cluster_idx + "cluster_ips": {}, # cluster_idx → [(ip, ja4)] + "params": {}, # k, ts +} + +# ─── Couleurs ───────────────────────────────────────────────────────────────── +_THREAT_COLOR = { + 0.92: "#dc2626", # Bot scanner + 0.70: "#ef4444", # Critique + 0.45: "#f97316", # Élevé + 0.25: "#eab308", # Modéré + 0.00: "#6b7280", # Sain / inconnu +} + +def _risk_to_color(risk: float) -> str: + for threshold, color in sorted(_THREAT_COLOR.items(), reverse=True): + if risk >= threshold: + return color + return "#6b7280" + + +# ─── SQL ────────────────────────────────────────────────────────────────────── + +_SQL_FEATURES = """ +SELECT + replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip, + t.ja4, + any(t.tcp_ttl_raw) AS ttl, + any(t.tcp_win_raw) AS win, + any(t.tcp_scale_raw) AS scale, + any(t.tcp_mss_raw) AS mss, + any(t.first_ua) AS ua, + sum(t.hits) AS hits, + + avg(abs(ml.anomaly_score)) AS avg_score, + avg(ml.hit_velocity) AS avg_velocity, + avg(ml.fuzzing_index) AS avg_fuzzing, + avg(ml.is_headless) AS pct_headless, + avg(ml.post_ratio) AS avg_post, + avg(ml.ip_id_zero_ratio) AS ip_id_zero, + avg(ml.temporal_entropy) AS entropy, + avg(ml.modern_browser_score) AS browser_score, + avg(ml.alpn_http_mismatch) AS alpn_mismatch, + avg(ml.is_alpn_missing) AS alpn_missing, + avg(ml.multiplexing_efficiency) AS h2_eff, + avg(ml.header_order_confidence) AS hdr_conf, + avg(ml.ua_ch_mismatch) AS ua_ch_mismatch, + avg(ml.asset_ratio) AS asset_ratio, + avg(ml.direct_access_ratio) AS direct_ratio, + avg(ml.distinct_ja4_count) AS ja4_count, + max(ml.is_ua_rotating) AS ua_rotating, + + max(ml.threat_level) AS threat, + any(ml.country_code) AS country, + any(ml.asn_org) AS asn_org +FROM mabase_prod.agg_host_ip_ja4_1h t +LEFT JOIN mabase_prod.ml_detected_anomalies ml + ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4 + AND ml.detected_at >= now() - INTERVAL 24 HOUR +WHERE t.window_start >= now() - INTERVAL 24 HOUR + AND t.tcp_ttl_raw > 0 +GROUP BY t.src_ip, t.ja4 +ORDER BY + -- Stratégie : IPs anormales en premier, puis fort trafic + -- Cela garantit que les bots Masscan (anomalie=0.97, hits=1-2) sont inclus + avg(abs(ml.anomaly_score)) DESC, + sum(t.hits) DESC +LIMIT %(limit)s +""" + +# Noms des colonnes SQL dans l'ordre +_SQL_COLS = [ + "ip", "ja4", "ttl", "win", "scale", "mss", "ua", "hits", + "avg_score", "avg_velocity", "avg_fuzzing", "pct_headless", "avg_post", + "ip_id_zero", "entropy", "browser_score", "alpn_mismatch", "alpn_missing", + "h2_eff", "hdr_conf", "ua_ch_mismatch", "asset_ratio", "direct_ratio", + "ja4_count", "ua_rotating", "threat", "country", "asn_org", +] + + +# ─── Endpoints ──────────────────────────────────────────────────────────────── + +@router.get("/clusters") +async def get_clusters( + k: int = Query(14, ge=4, le=30, description="Nombre de clusters"), + n_samples: int = Query(3000, ge=500, le=8000, description="Taille de l'échantillon"), +): + """ + Clustering multi-métriques des IPs. + + Retourne les nœuds (clusters) + arêtes pour ReactFlow, avec : + - positions 2D issues de PCA sur les 21 features + - profil radar des features par cluster (normalisé [0,1]) + - statistiques détaillées (moyennes brutes des features) + - sample d'IPs représentatives + """ + t0 = time.time() + try: + result = db.query(_SQL_FEATURES, {"limit": n_samples}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"ClickHouse: {e}") + + # ── Construction des vecteurs de features ───────────────────────────── + rows: list[dict] = [] + for row in result.result_rows: + d = {col: row[i] for i, col in enumerate(_SQL_COLS)} + rows.append(d) + + if len(rows) < k: + raise HTTPException(status_code=400, detail="Pas assez de données pour ce k") + + points = [build_feature_vector(r) for r in rows] + + # ── K-means++ ──────────────────────────────────────────────────────── + km = kmeans_pp(points, k=k, max_iter=60, seed=42) + + # ── PCA-2D sur les centroïdes ───────────────────────────────────────── + # On projette les centroïdes dans l'espace PCA des données + # → les positions relatives reflètent la variance des données + coords_all = pca_2d(points) + # Moyenne des positions PCA par cluster = position 2D du centroïde + cluster_xs: list[list[float]] = [[] for _ in range(k)] + cluster_ys: list[list[float]] = [[] for _ in range(k)] + for i, label in enumerate(km.labels): + cluster_xs[label].append(coords_all[i][0]) + cluster_ys[label].append(coords_all[i][1]) + + centroid_2d: list[tuple[float, float]] = [] + for j in range(k): + if cluster_xs[j]: + cx = sum(cluster_xs[j]) / len(cluster_xs[j]) + cy = sum(cluster_ys[j]) / len(cluster_ys[j]) + else: + cx, cy = 0.5, 0.5 + centroid_2d.append((cx, cy)) + + # ── Agrégation des statistiques par cluster ─────────────────────────── + cluster_rows: list[list[dict]] = [[] for _ in range(k)] + cluster_members: list[list[tuple[str, str]]] = [[] for _ in range(k)] + for i, label in enumerate(km.labels): + cluster_rows[label].append(rows[i]) + cluster_members[label].append((rows[i]["ip"], rows[i]["ja4"])) + + # Mise à jour du cache pour le drill-down + _cache["cluster_ips"] = {j: cluster_members[j] for j in range(k)} + _cache["params"] = {"k": k, "ts": t0} + + # ── Construction des nœuds ReactFlow ───────────────────────────────── + CANVAS_W, CANVAS_H = 1400, 780 + + nodes = [] + for j in range(k): + if not cluster_rows[j]: + continue + + # Statistiques brutes moyennées + def avg_feat(key: str) -> float: + vals = [float(r.get(key) or 0) for r in cluster_rows[j]] + return sum(vals) / len(vals) if vals else 0.0 + + mean_ttl = avg_feat("ttl") + mean_mss = avg_feat("mss") + mean_scale = avg_feat("scale") + mean_win = avg_feat("win") + mean_score = avg_feat("avg_score") + mean_vel = avg_feat("avg_velocity") + mean_fuzz = avg_feat("avg_fuzzing") + mean_hless = avg_feat("pct_headless") + mean_post = avg_feat("avg_post") + mean_asset = avg_feat("asset_ratio") + mean_direct= avg_feat("direct_ratio") + mean_alpn = avg_feat("alpn_mismatch") + mean_h2 = avg_feat("h2_eff") + mean_hconf = avg_feat("hdr_conf") + mean_ua_ch = avg_feat("ua_ch_mismatch") + mean_entr = avg_feat("entropy") + mean_ja4 = avg_feat("ja4_count") + mean_ip_id = avg_feat("ip_id_zero") + mean_brow = avg_feat("browser_score") + mean_uarot = avg_feat("ua_rotating") + + ip_count = len(set(r["ip"] for r in cluster_rows[j])) + hit_count = int(sum(float(r.get("hits") or 0) for r in cluster_rows[j])) + + # Pays / ASN / Menace dominants + threats = [str(r.get("threat") or "") for r in cluster_rows[j] if r.get("threat")] + countries = [str(r.get("country") or "") for r in cluster_rows[j] if r.get("country")] + orgs = [str(r.get("asn_org") or "") for r in cluster_rows[j] if r.get("asn_org")] + + def topk(lst: list[str], n: int = 5) -> list[str]: + from collections import Counter + return [v for v, _ in Counter(lst).most_common(n) if v] + + raw_stats = { + "mean_ttl": mean_ttl, "mean_mss": mean_mss, + "mean_scale": mean_scale, + } + label = name_cluster(km.centroids[j], raw_stats) + risk = risk_score_from_centroid(km.centroids[j]) + color = _risk_to_color(risk) + + # Profil radar normalisé (valeurs centroïde [0,1]) + radar = [ + {"feature": name, "value": round(km.centroids[j][i], 4)} + for i, name in enumerate(FEATURE_NAMES) + ] + + # Position 2D (PCA normalisée → pixels ReactFlow) + px_x = centroid_2d[j][0] * CANVAS_W * 0.85 + 80 + px_y = (1 - centroid_2d[j][1]) * CANVAS_H * 0.85 + 50 # inverser y (haut=risque) + + # Rayon ∝ √ip_count + radius = max(18, min(90, int(math.sqrt(ip_count) * 0.3))) + + # Sample IPs (top 8 par hits) + sample_rows = sorted(cluster_rows[j], key=lambda r: float(r.get("hits") or 0), reverse=True)[:8] + sample_ips = [r["ip"] for r in sample_rows] + sample_ua = str(cluster_rows[j][0].get("ua") or "") + + cluster_id = f"c{j}_k{k}" + + nodes.append({ + "id": cluster_id, + "label": label, + "cluster_idx": j, + "x": round(px_x, 1), + "y": round(px_y, 1), + "radius": radius, + "color": color, + "risk_score": risk, + + # Caractéristiques TCP + "mean_ttl": round(mean_ttl, 1), + "mean_mss": round(mean_mss, 0), + "mean_scale": round(mean_scale, 1), + "mean_win": round(mean_win, 0), + + # Comportement HTTP + "mean_score": round(mean_score, 4), + "mean_velocity": round(mean_vel, 3), + "mean_fuzzing": round(mean_fuzz, 3), + "mean_headless": round(mean_hless, 3), + "mean_post": round(mean_post, 3), + "mean_asset": round(mean_asset, 3), + "mean_direct": round(mean_direct, 3), + + # TLS / Protocole + "mean_alpn_mismatch": round(mean_alpn, 3), + "mean_h2_eff": round(mean_h2, 3), + "mean_hdr_conf": round(mean_hconf, 3), + "mean_ua_ch": round(mean_ua_ch, 3), + + # Temporel + "mean_entropy": round(mean_entr, 3), + "mean_ja4_diversity": round(mean_ja4, 3), + "mean_ip_id_zero": round(mean_ip_id, 3), + "mean_browser_score": round(mean_brow, 1), + "mean_ua_rotating": round(mean_uarot, 3), + + # Meta + "ip_count": ip_count, + "hit_count": hit_count, + "top_threat": topk(threats, 1)[0] if topk(threats, 1) else "", + "top_countries": topk(countries, 5), + "top_orgs": topk(orgs, 5), + "sample_ips": sample_ips, + "sample_ua": sample_ua, + + # Profil radar pour visualisation + "radar": radar, + }) + + # ── Arêtes : k-NN dans l'espace des features ────────────────────────── + # Chaque cluster est connecté à ses 2 voisins les plus proches + edges = [] + seen: set[frozenset] = set() + centroids = km.centroids + + for i, ni in enumerate(nodes): + ci = ni["cluster_idx"] + # Distance² aux autres centroïdes + dists = [ + (j, nj["cluster_idx"], + sum((centroids[ci][d] - centroids[nj["cluster_idx"]][d]) ** 2 + for d in range(N_FEATURES))) + for j, nj in enumerate(nodes) if j != i + ] + dists.sort(key=lambda x: x[2]) + # 2 voisins les plus proches + for j, cj, dist2 in dists[:2]: + key = frozenset([ni["id"], nodes[j]["id"]]) + if key in seen: + continue + seen.add(key) + similarity = round(1.0 / (1.0 + math.sqrt(dist2)), 3) + edges.append({ + "id": f"e_{ni['id']}_{nodes[j]['id']}", + "source": ni["id"], + "target": nodes[j]["id"], + "similarity": similarity, + "weight": round(similarity * 5, 1), + }) + + # ── Stats globales ──────────────────────────────────────────────────── + total_ips = sum(n["ip_count"] for n in nodes) + total_hits = sum(n["hit_count"] for n in nodes) + bot_ips = sum(n["ip_count"] for n in nodes if n["risk_score"] > 0.40 or "🤖" in n["label"]) + high_risk = sum(n["ip_count"] for n in nodes if n["risk_score"] > 0.20) + + elapsed = round(time.time() - t0, 2) + + return { + "nodes": nodes, + "edges": edges, + "stats": { + "total_clusters": len(nodes), + "total_ips": total_ips, + "total_hits": total_hits, + "bot_ips": bot_ips, + "high_risk_ips": high_risk, + "n_samples": len(rows), + "k": k, + "elapsed_s": elapsed, + }, + "feature_names": FEATURE_NAMES, + } + + +@router.get("/cluster/{cluster_id}/ips") +async def get_cluster_ips( + cluster_id: str, + limit: int = Query(100, ge=1, le=500), + offset: int = Query(0, ge=0), +): + """ + IPs appartenant à un cluster (depuis le cache de la dernière exécution). + Si le cache est expiré, retourne une erreur guidant vers /clusters. + """ + if not _cache.get("cluster_ips"): + raise HTTPException( + status_code=404, + detail="Cache expiré — appelez /api/clustering/clusters d'abord" + ) + + # Extrait l'index cluster depuis l'id (format: c{idx}_k{k}) + try: + idx = int(cluster_id.split("_")[0][1:]) + except (ValueError, IndexError): + raise HTTPException(status_code=400, detail="cluster_id invalide") + + members = _cache["cluster_ips"].get(idx, []) + if not members: + return {"ips": [], "total": 0, "cluster_id": cluster_id} + + total = len(members) + page_members = members[offset: offset + limit] + + # Requête SQL pour les détails de ces IPs spécifiques + ip_list = [m[0] for m in page_members] + ja4_list = [m[1] for m in page_members] + + if not ip_list: + return {"ips": [], "total": total, "cluster_id": cluster_id} + + # On ne peut pas facilement passer une liste en paramètre ClickHouse — + # on la construit directement (valeurs nettoyées) + safe_ips = [ip.replace("'", "") for ip in ip_list[:100]] + ip_filter = ", ".join(f"'{ip}'" for ip in safe_ips) + + sql = f""" + SELECT + replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS src_ip, + t.ja4, + any(t.tcp_ttl_raw) AS ttl, + any(t.tcp_win_raw) AS win, + any(t.tcp_scale_raw) AS scale, + any(t.tcp_mss_raw) AS mss, + sum(t.hits) AS hits, + any(t.first_ua) AS ua, + round(avg(abs(ml.anomaly_score)), 3) AS avg_score, + max(ml.threat_level) AS threat_level, + any(ml.country_code) AS country_code, + any(ml.asn_org) AS asn_org, + round(avg(ml.fuzzing_index), 2) AS fuzzing, + round(avg(ml.hit_velocity), 2) AS velocity + FROM mabase_prod.agg_host_ip_ja4_1h t + LEFT JOIN mabase_prod.ml_detected_anomalies ml + ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4 + AND ml.detected_at >= now() - INTERVAL 24 HOUR + WHERE t.window_start >= now() - INTERVAL 24 HOUR + AND replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') IN ({ip_filter}) + GROUP BY t.src_ip, t.ja4 + ORDER BY hits DESC + """ + try: + result = db.query(sql) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + ips = [] + for row in result.result_rows: + ips.append({ + "ip": str(row[0]), + "ja4": str(row[1] or ""), + "tcp_ttl": int(row[2] or 0), + "tcp_win": int(row[3] or 0), + "tcp_scale": int(row[4] or 0), + "tcp_mss": int(row[5] or 0), + "hits": int(row[6] or 0), + "ua": str(row[7] or ""), + "avg_score": float(row[8] or 0), + "threat_level": str(row[9] or ""), + "country_code": str(row[10] or ""), + "asn_org": str(row[11] or ""), + "fuzzing": float(row[12] or 0), + "velocity": float(row[13] or 0), + }) + + return {"ips": ips, "total": total, "cluster_id": cluster_id} diff --git a/backend/routes/investigation_summary.py b/backend/routes/investigation_summary.py index 46bad70..d778a33 100644 --- a/backend/routes/investigation_summary.py +++ b/backend/routes/investigation_summary.py @@ -7,6 +7,7 @@ agg_host_ip_ja4_1h (rotation JA4), view_ip_recurrence, view_ai_features_1h. from fastapi import APIRouter, HTTPException from ..database import db +from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua router = APIRouter(prefix="/api/investigation", tags=["investigation"]) @@ -62,32 +63,45 @@ async def get_ip_full_summary(ip: str): "top_hosts": [str(h) for h in (bf_row[3] or [])] if bf_row else [], } - # ── 3. TCP spoofing ──────────────────────────────────────────────────── + # ── 3. TCP spoofing — fingerprinting multi-signal ───────────────────── tcp_sql = """ - SELECT tcp_ttl, first_ua - FROM mabase_prod.view_tcp_spoofing_detected + SELECT + any(tcp_ttl_raw) AS ttl, + any(tcp_win_raw) AS win, + any(tcp_scale_raw) AS scale, + any(tcp_mss_raw) AS mss, + any(first_ua) AS ua + FROM mabase_prod.agg_host_ip_ja4_1h WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s - AND tcp_ttl > 0 + AND window_start >= now() - INTERVAL 24 HOUR + AND tcp_ttl_raw > 0 LIMIT 1 """ tcp_res = db.query(tcp_sql, {"ip": clean_ip}) tcp_data = {"detected": False, "tcp_ttl": None, "suspected_os": None} if tcp_res.result_rows: - ttl = int(tcp_res.result_rows[0][0]) - if 52 <= ttl <= 65: - sus_os = "Linux/Mac" - elif 110 <= ttl <= 135: - sus_os = "Windows" - else: - sus_os = "Unknown" - ua = str(tcp_res.result_rows[0][1] or "") - dec_os = "Windows" if "Windows" in ua else ("macOS" if "Mac OS X" in ua else "Linux/Android" if "Linux" in ua else "Unknown") - spoof = sus_os != "Unknown" and dec_os != "Unknown" and sus_os != dec_os + r = tcp_res.result_rows[0] + ttl = int(r[0] or 0) + win = int(r[1] or 0) + scale = int(r[2] or 0) + mss = int(r[3] or 0) + ua = str(r[4] or "") + fp = fingerprint_os(ttl, win, scale, mss) + dec_os = declared_os_from_ua(ua) + spoof_res = detect_spoof(fp, dec_os) tcp_data = { - "detected": spoof, - "tcp_ttl": ttl, - "suspected_os": sus_os, - "declared_os": dec_os, + "detected": spoof_res.is_spoof, + "tcp_ttl": ttl, + "tcp_mss": mss, + "tcp_win_scale": scale, + "initial_ttl": fp.initial_ttl, + "hop_count": fp.hop_count, + "suspected_os": fp.os_name, + "declared_os": dec_os, + "confidence": fp.confidence, + "network_path": fp.network_path, + "is_bot_tool": fp.is_bot_tool, + "spoof_reason": spoof_res.reason, } # ── 4. JA4 rotation ──────────────────────────────────────────────────── @@ -146,7 +160,9 @@ async def get_ip_full_summary(ip: str): risk = 0 risk += min(50, ml_data["max_score"] * 50) if bf_data["active"]: risk += 20 - if tcp_data["detected"]: risk += 15 + if tcp_data["detected"]: + if tcp_data.get("is_bot_tool"): risk += 30 # outil de scan connu + else: risk += 15 # spoof OS if rot_data["rotating"]: risk += min(15, rot_data["distinct_ja4_count"] * 3) if pers_data["persistent"]: risk += min(10, pers_data["recurrence"] * 2) risk = min(100, round(risk)) diff --git a/backend/routes/tcp_spoofing.py b/backend/routes/tcp_spoofing.py index e4b3b64..90cd196 100644 --- a/backend/routes/tcp_spoofing.py +++ b/backend/routes/tcp_spoofing.py @@ -1,130 +1,95 @@ """ -Endpoints pour la détection du TCP spoofing (TTL / window size anormaux) +Endpoints pour la détection du TCP spoofing / fingerprinting OS -Règle de corrélation : - - TTL=0 ou tcp_window_size=0 → données TCP absentes (proxy/LB) → pas de corrélation possible - - TTL 55-65 → fingerprint Linux/Mac (initial TTL 64) - - TTL 120-135 → fingerprint Windows (initial TTL 128) - - TTL 110-120 → fingerprint Windows (initial TTL 128, quelques sauts) - - Toute autre valeur → OS indéterminé → pas de flag spoofing - - spoof_flag = True UNIQUEMENT si OS fingerprinting TCP possible ET incompatible avec l'UA +Approche multi-signal (p0f-style) : + - TTL initial estimé → famille OS (Linux/Mac=64, Windows=128, Cisco/BSD=255) + - MSS → type de réseau (Ethernet=1460, PPPoE=1452, VPN=1380-1420) + - Taille de fenêtre → signature OS précise + - Facteur d'échelle → affine la version kernel/stack TCP + +Détection bots : signatures connues (Masscan/ZMap/Mirai) identifiées par combinaison +win+scale+mss indépendamment de l'UA. """ from fastapi import APIRouter, HTTPException, Query from ..database import db +from ..services.tcp_fingerprint import ( + fingerprint_os, + detect_spoof, + declared_os_from_ua, +) router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"]) -# Plages TTL qui permettent une corrélation fiable -_TTL_LINUX = (range(52, 66), "Linux/Mac") # initial 64, 1-12 sauts -_TTL_WINDOWS = (range(110, 136), "Windows") # initial 128, 1-18 sauts -_TTL_CISCO = (range(240, 256), "Cisco/BSD") # initial 255 - - -def _suspected_os(ttl: int) -> str: - """Retourne l'OS probable à partir du TTL observé. - Retourne 'Unknown' si le TTL ne permet pas une corrélation fiable - (TTL=0 = pas de données TCP, ou hors plage connue). - """ - if ttl <= 0: - return "Unknown" # Pas de données TCP (proxy/CDN) - for rng, name in (_TTL_LINUX, _TTL_WINDOWS, _TTL_CISCO): - if ttl in rng: - return name - return "Unknown" - - -def _declared_os(ua: str) -> str: - ua = ua or "" - if "Windows" in ua: - return "Windows" - if "Mac OS X" in ua: - return "macOS" - if "Linux" in ua or "Android" in ua: - return "Linux/Android" - return "Unknown" - - -def _is_spoof(suspected_os: str, declared_os: str) -> bool: - """Spoof confirmé uniquement si on a un fingerprint TCP fiable ET une incompatibilité d'OS.""" - if suspected_os == "Unknown" or declared_os == "Unknown": - return False # Pas de corrélation possible - # Linux/Mac fingerprint TCP mais UA déclare Windows - if suspected_os == "Linux/Mac" and declared_os == "Windows": - return True - # Windows fingerprint TCP mais UA déclare Linux/Android ou macOS - if suspected_os == "Windows" and declared_os in ("Linux/Android", "macOS"): - return True - return False - @router.get("/overview") async def get_tcp_spoofing_overview(): - """Statistiques globales : seules les entrées avec données TCP valides sont analysées.""" + """Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale).""" try: sql = """ SELECT - count() AS total_entries, - uniq(src_ip) AS unique_ips, - countIf(tcp_ttl = 0) AS no_tcp_data, - countIf(tcp_ttl > 0) AS with_tcp_data, - countIf(tcp_ttl BETWEEN 52 AND 65) AS linux_fingerprint, - countIf(tcp_ttl BETWEEN 110 AND 135) AS windows_fingerprint - FROM mabase_prod.view_tcp_spoofing_detected + count() AS total_entries, + uniq(src_ip) AS unique_ips, + countIf(tcp_ttl_raw = 0) AS no_tcp_data, + countIf(tcp_ttl_raw > 0) AS with_tcp_data, + countIf(tcp_ttl_raw > 0 AND tcp_ttl_raw <= 64) AS linux_mac_fp, + countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp, + countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp, + countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR """ result = db.query(sql) row = result.result_rows[0] - total_entries = int(row[0]) - unique_ips = int(row[1]) - no_tcp_data = int(row[2]) - with_tcp_data = int(row[3]) - linux_fp = int(row[4]) - windows_fp = int(row[5]) - # Distribution TTL uniquement pour les entrées avec données TCP valides + # Distribution TTL (top 15) ttl_sql = """ - SELECT - tcp_ttl, - count() AS cnt, - uniq(src_ip) AS ips - FROM mabase_prod.view_tcp_spoofing_detected - WHERE tcp_ttl > 0 - GROUP BY tcp_ttl - ORDER BY cnt DESC - LIMIT 15 + SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 + GROUP BY ttl ORDER BY cnt DESC LIMIT 15 """ ttl_res = db.query(ttl_sql) - ttl_distribution = [ - {"ttl": int(r[0]), "count": int(r[1]), "ips": int(r[2])} - for r in ttl_res.result_rows - ] - # Distribution window_size pour entrées avec données TCP + # Distribution MSS — nouveau signal clé (top 12) + mss_sql = """ + SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0 + GROUP BY mss ORDER BY cnt DESC LIMIT 12 + """ + mss_res = db.query(mss_sql) + + # Distribution fenêtre (top 10) win_sql = """ - SELECT - tcp_window_size, - count() AS cnt - FROM mabase_prod.view_tcp_spoofing_detected - WHERE tcp_ttl > 0 - GROUP BY tcp_window_size - ORDER BY cnt DESC - LIMIT 10 + SELECT tcp_win_raw AS win, count() AS cnt + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 + GROUP BY win ORDER BY cnt DESC LIMIT 10 """ win_res = db.query(win_sql) - window_size_distribution = [ - {"window_size": int(r[0]), "count": int(r[1])} - for r in win_res.result_rows - ] return { - "total_entries": total_entries, - "unique_ips": unique_ips, - "no_tcp_data": no_tcp_data, - "with_tcp_data": with_tcp_data, - "linux_fingerprint": linux_fp, - "windows_fingerprint": windows_fp, - "ttl_distribution": ttl_distribution, - "window_size_distribution": window_size_distribution, + "total_entries": int(row[0]), + "unique_ips": int(row[1]), + "no_tcp_data": int(row[2]), + "with_tcp_data": int(row[3]), + "linux_mac_fingerprint": int(row[4]), + "windows_fingerprint": int(row[5]), + "cisco_bsd_fingerprint": int(row[6]), + "bot_scanner_fingerprint": int(row[7]), + "ttl_distribution": [ + {"ttl": int(r[0]), "count": int(r[1]), "ips": int(r[2])} + for r in ttl_res.result_rows + ], + "mss_distribution": [ + {"mss": int(r[0]), "count": int(r[1]), "ips": int(r[2])} + for r in mss_res.result_rows + ], + "window_size_distribution": [ + {"window_size": int(r[0]), "count": int(r[1])} + for r in win_res.result_rows + ], } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -134,54 +99,75 @@ async def get_tcp_spoofing_overview(): async def get_tcp_spoofing_list( limit: int = Query(100, ge=1, le=1000), offset: int = Query(0, ge=0), - spoof_only: bool = Query(False, description="Ne retourner que les vrais spoofs (TTL corrélable + OS mismatch)"), + spoof_only: bool = Query(False, description="Retourner uniquement les spoofs/bots confirmés"), ): - """Liste des entrées avec données TCP valides (tcp_ttl > 0). - Entrées sans données TCP (TTL=0) exclues : pas de corrélation possible. - Si spoof_only=True, retourne uniquement les entrées avec fingerprint OS identifiable (Linux/Mac TTL 52-65). + """Liste avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale). + Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool. """ try: - # Filtre SQL : seules les entrées avec TTL valide, et si spoof_only les plages corrélables - if spoof_only: - # Seules les plages de TTL qui permettent une identification OS fiable - ttl_filter = "tcp_ttl BETWEEN 52 AND 65 OR tcp_ttl BETWEEN 110 AND 135 OR tcp_ttl BETWEEN 240 AND 255" - else: - ttl_filter = "tcp_ttl > 0" - - count_sql = f"SELECT count() FROM mabase_prod.view_tcp_spoofing_detected WHERE {ttl_filter}" + count_sql = """ + SELECT count() FROM ( + SELECT src_ip, ja4 + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 + GROUP BY src_ip, ja4 + ) + """ total = int(db.query(count_sql).result_rows[0][0]) - sql = f""" + sql = """ SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip, - ja4, tcp_ttl, tcp_window_size, first_ua - FROM mabase_prod.view_tcp_spoofing_detected - WHERE {ttl_filter} - ORDER BY tcp_ttl ASC + ja4, + any(tcp_ttl_raw) AS tcp_ttl, + any(tcp_win_raw) AS tcp_window_size, + any(tcp_scale_raw) AS tcp_win_scale, + any(tcp_mss_raw) AS tcp_mss, + any(first_ua) AS first_ua, + sum(hits) AS hits + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 + GROUP BY src_ip, ja4 + ORDER BY hits DESC LIMIT %(limit)s OFFSET %(offset)s """ result = db.query(sql, {"limit": limit, "offset": offset}) items = [] for row in result.result_rows: - ip = str(row[0]) - ja4 = str(row[1] or "") - ttl = int(row[2]) - window_size = int(row[3]) - ua = str(row[4] or "") - sus_os = _suspected_os(ttl) - dec_os = _declared_os(ua) - spoof_flag = _is_spoof(sus_os, dec_os) - if spoof_only and not spoof_flag: + ip = str(row[0]) + ja4 = str(row[1] or "") + ttl = int(row[2] or 0) + win = int(row[3] or 0) + scale = int(row[4] or 0) + mss = int(row[5] or 0) + ua = str(row[6] or "") + hits = int(row[7] or 0) + + fp = fingerprint_os(ttl, win, scale, mss) + dec_os = declared_os_from_ua(ua) + spoof_res = detect_spoof(fp, dec_os) + + if spoof_only and not spoof_res.is_spoof: continue + items.append({ "ip": ip, "ja4": ja4, "tcp_ttl": ttl, - "tcp_window_size": window_size, + "tcp_window_size": win, + "tcp_win_scale": scale, + "tcp_mss": mss, + "hits": hits, "first_ua": ua, - "suspected_os": sus_os, + "suspected_os": fp.os_name, + "initial_ttl": fp.initial_ttl, + "hop_count": fp.hop_count, + "confidence": fp.confidence, + "network_path": fp.network_path, + "is_bot_tool": fp.is_bot_tool, "declared_os": dec_os, - "spoof_flag": spoof_flag, + "spoof_flag": spoof_res.is_spoof, + "spoof_reason": spoof_res.reason, }) return {"items": items, "total": total} except Exception as e: @@ -190,29 +176,44 @@ async def get_tcp_spoofing_list( @router.get("/matrix") async def get_tcp_spoofing_matrix(): - """Matrice suspected_os × declared_os — uniquement entrées avec TTL valide.""" + """Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal.""" try: sql = """ - SELECT tcp_ttl, first_ua - FROM mabase_prod.view_tcp_spoofing_detected - WHERE tcp_ttl > 0 + SELECT + any(tcp_ttl_raw) AS ttl, + any(tcp_win_raw) AS win, + any(tcp_scale_raw) AS scale, + any(tcp_mss_raw) AS mss, + any(first_ua) AS ua, + count() AS cnt + FROM mabase_prod.agg_host_ip_ja4_1h + WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0 + GROUP BY src_ip, ja4 """ result = db.query(sql) counts: dict = {} for row in result.result_rows: - ttl = int(row[0]) - ua = str(row[1] or "") - sus_os = _suspected_os(ttl) - dec_os = _declared_os(ua) - key = (sus_os, dec_os) - counts[key] = counts.get(key, 0) + 1 + ttl = int(row[0] or 0) + win = int(row[1] or 0) + scale = int(row[2] or 0) + mss = int(row[3] or 0) + ua = str(row[4] or "") + cnt = int(row[5] or 1) + + fp = fingerprint_os(ttl, win, scale, mss) + dec_os = declared_os_from_ua(ua) + spoof_res = detect_spoof(fp, dec_os) + + key = (fp.os_name, dec_os, spoof_res.is_spoof, fp.is_bot_tool) + counts[key] = counts.get(key, 0) + cnt matrix = [ { "suspected_os": k[0], "declared_os": k[1], "count": v, - "is_spoof": _is_spoof(k[0], k[1]), + "is_spoof": k[2], + "is_bot_tool": k[3], } for k, v in counts.items() ] diff --git a/backend/services/clustering_engine.py b/backend/services/clustering_engine.py new file mode 100644 index 0000000..213425c --- /dev/null +++ b/backend/services/clustering_engine.py @@ -0,0 +1,328 @@ +""" +Moteur de clustering K-means++ multi-métriques (pur Python). + +Ref: Arthur & Vassilvitskii (2007) — k-means++: The Advantages of Careful Seeding + Hotelling (1933) — PCA par puissance itérative (deflation) + +Features (21 dimensions, normalisées [0,1]) : + 0 ttl_n : TTL initial normalisé (hops-count estimé) + 1 mss_n : MSS normalisé → type réseau (Ethernet/PPPoE/VPN) + 2 scale_n : facteur de mise à l'échelle TCP + 3 win_n : fenêtre TCP normalisée + 4 score_n : score anomalie ML (abs) + 5 velocity_n : vélocité de requêtes (log1p) + 6 fuzzing_n : index de fuzzing (log1p) + 7 headless_n : ratio sessions headless + 8 post_n : ratio POST/total + 9 ip_id_zero_n : ratio IP-ID=0 (Linux/spoofé) + 10 entropy_n : entropie temporelle + 11 browser_n : score navigateur moderne (normalisé max 50) + 12 alpn_n : mismatch ALPN/protocole + 13 alpn_absent_n : ratio ALPN absent + 14 h2_n : efficacité H2 multiplexing (log1p) + 15 hdr_conf_n : confiance ordre headers + 16 ua_ch_n : mismatch User-Agent-Client-Hints + 17 asset_n : ratio assets statiques + 18 direct_n : ratio accès directs + 19 ja4_div_n : diversité JA4 (log1p) + 20 ua_rot_n : UA rotatif (booléen) +""" +from __future__ import annotations +import math +import random +from dataclasses import dataclass, field + +# ─── Définition des features ────────────────────────────────────────────────── + +# (clé SQL, nom lisible, fonction de normalisation) +FEATURES = [ + # TCP stack + ("ttl", "TTL Initial", lambda v: min(1.0, (v or 0) / 255.0)), + ("mss", "MSS Réseau", lambda v: min(1.0, (v or 0) / 1460.0)), + ("scale", "Scale TCP", lambda v: min(1.0, (v or 0) / 14.0)), + ("win", "Fenêtre TCP", lambda v: min(1.0, (v or 0) / 65535.0)), + # Anomalie ML + ("avg_score", "Score Anomalie", lambda v: min(1.0, float(v or 0))), + ("avg_velocity", "Vélocité (rps)", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(100))), + ("avg_fuzzing", "Fuzzing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(300))), + ("pct_headless", "Headless", lambda v: min(1.0, float(v or 0))), + ("avg_post", "Ratio POST", lambda v: min(1.0, float(v or 0))), + # IP-ID + ("ip_id_zero", "IP-ID Zéro", lambda v: min(1.0, float(v or 0))), + # Temporel + ("entropy", "Entropie Temporelle", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(10))), + # Navigateur + ("browser_score","Score Navigateur", lambda v: min(1.0, float(v or 0) / 50.0)), + # TLS / Protocole + ("alpn_mismatch","ALPN Mismatch", lambda v: min(1.0, float(v or 0))), + ("alpn_missing", "ALPN Absent", lambda v: min(1.0, float(v or 0))), + ("h2_eff", "H2 Multiplexing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(20))), + ("hdr_conf", "Ordre Headers", lambda v: min(1.0, float(v or 0))), + ("ua_ch_mismatch","UA-CH Mismatch", lambda v: min(1.0, float(v or 0))), + # Comportement HTTP + ("asset_ratio", "Ratio Assets", lambda v: min(1.0, float(v or 0))), + ("direct_ratio", "Accès Direct", lambda v: min(1.0, float(v or 0))), + # Diversité JA4 + ("ja4_count", "Diversité JA4", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(30))), + # UA rotatif + ("ua_rotating", "UA Rotatif", lambda v: 1.0 if float(v or 0) > 0 else 0.0), +] + +FEATURE_KEYS = [f[0] for f in FEATURES] +FEATURE_NAMES = [f[1] for f in FEATURES] +FEATURE_NORMS = [f[2] for f in FEATURES] +N_FEATURES = len(FEATURES) + + +# ─── Utilitaires vectoriels (pur Python) ────────────────────────────────────── + +def _dist2(a: list[float], b: list[float]) -> float: + return sum((x - y) ** 2 for x, y in zip(a, b)) + +def _mean_vec(vecs: list[list[float]]) -> list[float]: + n = len(vecs) + if n == 0: + return [0.0] * N_FEATURES + return [sum(v[i] for v in vecs) / n for i in range(N_FEATURES)] + + +# ─── Construction du vecteur de features ───────────────────────────────────── + +def build_feature_vector(row: dict) -> list[float]: + """Normalise un dict de colonnes SQL → vecteur [0,1]^N_FEATURES.""" + return [fn(row.get(key)) for key, fn in zip(FEATURE_KEYS, FEATURE_NORMS)] + + +# ─── K-means++ ─────────────────────────────────────────────────────────────── + +@dataclass +class KMeansResult: + centroids: list[list[float]] + labels: list[int] + inertia: float + n_iter: int + + +def kmeans_pp( + points: list[list[float]], + k: int, + max_iter: int = 60, + seed: int = 42, + n_init: int = 3, +) -> KMeansResult: + """ + K-means avec initialisation k-means++ (Arthur & Vassilvitskii, 2007). + Lance `n_init` fois et retourne le meilleur résultat (inertie minimale). + """ + rng = random.Random(seed) + best: KMeansResult | None = None + + for attempt in range(n_init): + # ── Initialisation k-means++ ──────────────────────────────────── + first_idx = rng.randrange(len(points)) + centroids = [points[first_idx][:]] + + for _ in range(k - 1): + d2 = [min(_dist2(p, c) for c in centroids) for p in points] + total = sum(d2) + if total == 0: + break + r = rng.random() * total + cumul = 0.0 + for i, d in enumerate(d2): + cumul += d + if cumul >= r: + centroids.append(points[i][:]) + break + else: + centroids.append(points[rng.randrange(len(points))][:]) + + # ── Itérations EM ─────────────────────────────────────────────── + labels: list[int] = [0] * len(points) + for iteration in range(max_iter): + # E-step : affectation + new_labels = [ + min(range(len(centroids)), key=lambda c: _dist2(p, centroids[c])) + for p in points + ] + if new_labels == labels and iteration > 0: + break + labels = new_labels + + # M-step : mise à jour + clusters: list[list[list[float]]] = [[] for _ in range(k)] + for i, l in enumerate(labels): + clusters[l].append(points[i]) + for j in range(k): + if clusters[j]: + centroids[j] = _mean_vec(clusters[j]) + + inertia = sum(_dist2(points[i], centroids[labels[i]]) for i in range(len(points))) + result = KMeansResult( + centroids=centroids, + labels=labels, + inertia=inertia, + n_iter=iteration + 1, + ) + if best is None or inertia < best.inertia: + best = result + + return best # type: ignore + + +# ─── PCA 2D par puissance itérative ────────────────────────────────────────── + +def pca_2d(points: list[list[float]]) -> list[tuple[float, float]]: + """ + Projection PCA 2D par puissance itérative avec déflation (Hotelling). + Retourne les coordonnées (pc1, pc2) normalisées dans [0,1]. + """ + n = len(points) + if n == 0: + return [] + + # Centrage + mean = _mean_vec(points) + X = [[p[i] - mean[i] for i in range(N_FEATURES)] for p in points] + + def power_iter(X_centered: list[list[float]], n_iter: int = 30) -> list[float]: + """Trouve le premier vecteur propre de X^T X par puissance itérative.""" + v = [1.0 / math.sqrt(N_FEATURES)] * N_FEATURES + for _ in range(n_iter): + # Xv = X @ v + Xv = [sum(row[j] * v[j] for j in range(N_FEATURES)) for row in X_centered] + # Xtxv = X^T @ Xv + xtxv = [sum(X_centered[i][j] * Xv[i] for i in range(len(X_centered))) for j in range(N_FEATURES)] + norm = math.sqrt(sum(x ** 2 for x in xtxv)) or 1e-10 + v = [x / norm for x in xtxv] + return v + + # PC1 + v1 = power_iter(X) + proj1 = [sum(row[j] * v1[j] for j in range(N_FEATURES)) for row in X] + + # Déflation : retire la composante PC1 de X + X2 = [ + [X[i][j] - proj1[i] * v1[j] for j in range(N_FEATURES)] + for i in range(n) + ] + + # PC2 + v2 = power_iter(X2) + proj2 = [sum(row[j] * v2[j] for j in range(N_FEATURES)) for row in X2] + + # Normalisation [0,1] + def _norm01(vals: list[float]) -> list[float]: + lo, hi = min(vals), max(vals) + rng = hi - lo or 1e-10 + return [(v - lo) / rng for v in vals] + + p1 = _norm01(proj1) + p2 = _norm01(proj2) + + return list(zip(p1, p2)) + + +# ─── Nommage automatique des clusters ──────────────────────────────────────── + +def name_cluster(centroid: list[float], raw_stats: dict | None = None) -> str: + """ + Génère un nom lisible à partir du centroïde normalisé et de statistiques brutes. + Priorité : signaux les plus discriminants en premier. + """ + score = centroid[4] # anomalie ML + vel = centroid[5] # vélocité + fuzz = centroid[6] # fuzzing (log1p normalisé, >0.35 ≈ fuzzing_index > 100) + hless = centroid[7] # headless + post = centroid[8] # POST ratio + alpn = centroid[12] # ALPN mismatch + h2 = centroid[14] # H2 eff + ua_ch = centroid[16] # UA-CH mismatch + ja4d = centroid[19] # JA4 diversité + ua_rot = centroid[20] # UA rotatif + + raw_mss = (raw_stats or {}).get("mean_mss", 0) + raw_ttl = (raw_stats or {}).get("mean_ttl", 0) or (centroid[0] * 255) + raw_scale = (raw_stats or {}).get("mean_scale", 0) + + # ── Signaux forts (déterministes) ──────────────────────────────────── + + # Pattern Masscan : mss≈1452, scale≈4, TTL 48-57 + if raw_mss and 1440 <= raw_mss <= 1460 and raw_scale and 3 <= raw_scale <= 5 and raw_ttl < 60: + return "🤖 Masscan / Scanner IP" + + # Fuzzer agressif (fuzzing_index normalisé > 0.35 ≈ valeur brute > 100) + if fuzz > 0.35: + return "🤖 Bot Fuzzer / Scanner" + + # UA rotatif + UA-CH mismatch : bot sophistiqué simulant un navigateur + if ua_rot > 0.5 and ua_ch > 0.7: + return "🤖 Bot UA Rotatif + CH Mismatch" + + # UA-CH mismatch fort seul (navigateur simulé sans headers CH) + if ua_ch > 0.8: + return "⚠️ Bot UA-CH Incohérent" + + # ── Score ML modéré + signal comportemental ────────────────────────── + + if score > 0.20: + if hless > 0.3: + return "⚠️ Navigateur Headless Suspect" + if vel > 0.25: + return "⚠️ Bot Haute Vélocité" + if post > 0.4: + return "⚠️ Bot POST Automatisé" + if alpn > 0.5 or h2 > 0.5: + return "⚠️ TLS/H2 Anormal" + if ua_ch > 0.4: + return "⚠️ Anomalie UA-CH" + return "⚠️ Anomalie ML Modérée" + + # ── Signaux faibles ─────────────────────────────────────────────────── + + if ua_ch > 0.4: + return "🔎 UA-CH Incohérent" + + if ja4d > 0.5: + return "🔄 Client Multi-Fingerprint" + + # ── Classification réseau / OS ──────────────────────────────────────── + + # MSS bas → VPN ou tunnel + if raw_mss and raw_mss < 1360: + return "🌐 VPN / Tunnel" + + if raw_ttl < 70: + return "🐧 Linux / Mobile" + if raw_ttl > 110: + return "🪟 Windows" + + return "✅ Trafic Légitime" + + +def risk_score_from_centroid(centroid: list[float]) -> float: + """Score de risque [0,1] pondéré. Calibré pour les valeurs observées (score ML ~0.3).""" + # Normalisation de score ML : x / 0.5 pour étendre la plage utile (0-0.5 → 0-1) + score_n = min(1.0, centroid[4] / 0.5) + fuzz_n = centroid[6] + ua_ch_n = centroid[16] + ua_rot_n = centroid[20] + vel_n = centroid[5] + hless_n = centroid[7] + ip_id_n = centroid[9] + alpn_n = centroid[12] + ja4d_n = centroid[19] + post_n = centroid[8] + + return min(1.0, + 0.25 * score_n + + 0.20 * ua_ch_n + + 0.15 * fuzz_n + + 0.12 * ua_rot_n + + 0.10 * hless_n + + 0.07 * vel_n + + 0.04 * ip_id_n + + 0.04 * alpn_n + + 0.03 * ja4d_n + + 0.03 * post_n + ) diff --git a/backend/services/tcp_fingerprint.py b/backend/services/tcp_fingerprint.py new file mode 100644 index 0000000..da51470 --- /dev/null +++ b/backend/services/tcp_fingerprint.py @@ -0,0 +1,436 @@ +""" +Service de fingerprinting OS par signature TCP — approche multi-signal inspirée de p0f. + +Signaux utilisés (par ordre de poids) : + 1. TTL initial estimé (→ famille OS : Linux/Mac=64, Windows=128, Cisco/BSD=255) + 2. MSS (→ type de réseau : Ethernet=1460, PPPoE=1452, VPN=1380-1420) + 3. Taille de fenêtre (→ signature OS précise) + 4. Facteur d'échelle (→ affine la version du kernel/stack TCP) + +Références : + - p0f v3 (Michal Zalewski) — passive OS fingerprinting + - Nmap OS detection (Gordon Lyon) + - "OS Fingerprinting Revisited" (Beverly, 2004) + - "Passive OS fingerprinting" (Orebaugh, Ramirez) + - Recherche sur Masscan/ZMap : signatures SYN craftées connues +""" + +from __future__ import annotations +from dataclasses import dataclass +from typing import Optional + + +# ─── Constantes ─────────────────────────────────────────────────────────────── + +_INITIAL_TTLS = (64, 128, 255) + +# MSS → type de chemin réseau (MTU - 40 octets d'en-têtes IP+TCP) +_MSS_PATH: list[tuple[range, str]] = [ + (range(1461, 9001), "Ethernet/Jumbo"), # jumbo frames (CDN/datacenter) + (range(1460, 1461), "Ethernet directe"), # MTU 1500 standard + (range(1453, 1460), "Ethernet directe"), # légèrement réduit (padding) + (range(1452, 1453), "PPPoE/DSL"), # MTU 1492 + (range(1436, 1452), "PPPoE/DSL ajusté"), # variations DSL + (range(1420, 1436), "VPN léger"), # WireGuard / IPsec transport + (range(1380, 1420), "VPN/Tunnel"), # OpenVPN / L2TP + (range(1300, 1380), "VPN double ou mobile"), + (range(0, 1300), "Lien bas débit / GPRS"), +] + + +# ─── Base de signatures OS ───────────────────────────────────────────────────── +# +# Format : chaque entrée est un dict avec : +# ttl : int — TTL initial attendu (64 | 128 | 255) +# win : set[int]|None — tailles de fenêtre attendues (None = ignorer) +# scale : set[int]|None — facteurs d'échelle attendus (None = ignorer) +# mss : set[int]|None — valeurs MSS attendues (None = ignorer) +# name : str — libellé affiché +# conf : float — poids de confiance de base (0–1) +# bot : bool — outil de scan/bot connu + +_SIGNATURES: list[dict] = [ + # ══════════════════════════════════════════════════════ + # OUTILS DE SCAN ET BOTS CONNUS (priorité maximale) + # ══════════════════════════════════════════════════════ + + # Masscan / scanner personnalisé avec stack Linux modifiée (PPPoE MSS=1452) + # Pattern très présent dans les données : ~111k requêtes, UA spoofé macOS/Windows + { + "ttl": 64, "win": {5808}, "scale": {4}, "mss": {1452}, + "name": "Bot-Scanner/Masscan", "conf": 0.97, "bot": True, + }, + # Masscan TTL=255 (mode direct, pas de hop) + { + "ttl": 255, "win": {1024}, "scale": {0}, "mss": None, + "name": "Bot-ZMap/Masscan", "conf": 0.96, "bot": True, + }, + # Mirai variant (petite fenêtre, pas de scale, TTL Linux) + { + "ttl": 64, "win": {1024, 2048}, "scale": {0}, "mss": {1460}, + "name": "Bot-Mirai", "conf": 0.92, "bot": True, + }, + # Mirai variant (petite fenêtre Windows) + { + "ttl": 128, "win": {1024, 2048}, "scale": {0}, "mss": {1460}, + "name": "Bot-Mirai/Win", "conf": 0.92, "bot": True, + }, + # Scapy / forge manuelle (fenêtre 8192 exactement + TTL 64 + pas de scale) + { + "ttl": 64, "win": {8192}, "scale": {0}, "mss": {1460}, + "name": "Bot-Scapy/Forge", "conf": 0.85, "bot": True, + }, + # Nmap SYN scan (window=1024, MSS=1460, TTL=64 ou 128) + { + "ttl": 64, "win": {1}, "scale": None, "mss": None, + "name": "Bot-ZMap", "conf": 0.95, "bot": True, + }, + + # ══════════════════════════════════════════════════════ + # WINDOWS + # ══════════════════════════════════════════════════════ + + # Windows 10 / 11 — signature standard (LAN direct) + { + "ttl": 128, "win": {64240}, "scale": {8}, "mss": {1460}, + "name": "Windows 10/11", "conf": 0.93, "bot": False, + }, + # Windows 10/11 — derrière VPN/proxy (MSS réduit) + { + "ttl": 128, "win": {64240}, "scale": {8}, "mss": {1380, 1400, 1412, 1420, 1440}, + "name": "Windows 10/11 (VPN)", "conf": 0.90, "bot": False, + }, + # Windows Server 2019/2022 — grande fenêtre + { + "ttl": 128, "win": {65535, 131072}, "scale": {8, 9}, "mss": {1460}, + "name": "Windows Server", "conf": 0.88, "bot": False, + }, + # Windows 7/8.1 + { + "ttl": 128, "win": {8192, 65535}, "scale": {4, 8}, "mss": {1460}, + "name": "Windows 7/8", "conf": 0.83, "bot": False, + }, + # Windows générique (TTL=128, scale=8, tout MSS) + { + "ttl": 128, "win": None, "scale": {8}, "mss": None, + "name": "Windows", "conf": 0.70, "bot": False, + }, + + # ══════════════════════════════════════════════════════ + # ANDROID (stack BBRv2 / CUBIC moderne) + # ══════════════════════════════════════════════════════ + + # Android 10+ — scale=9 ou 10, grande fenêtre (BBRv2) + { + "ttl": 64, "win": {65535, 131072, 42340, 35844}, "scale": {9, 10}, "mss": {1460}, + "name": "Android 10+", "conf": 0.82, "bot": False, + }, + # Android via proxy TTL=128 (app Facebook, TikTok etc. passant par infra) + { + "ttl": 128, "win": {62727, 65535}, "scale": {7}, "mss": {1460}, + "name": "Android/App (proxy)", "conf": 0.75, "bot": False, + }, + # Android derrière VPN (MSS réduit) + { + "ttl": 64, "win": {65535, 59640, 63940}, "scale": {8, 9, 10}, "mss": {1380, 1390, 1400, 1418, 1420}, + "name": "Android (VPN/mobile)", "conf": 0.78, "bot": False, + }, + + # ══════════════════════════════════════════════════════ + # iOS / macOS + # ══════════════════════════════════════════════════════ + + # iOS 14+ / macOS Monterey+ — scale=6, win=65535 (signature XNU) + { + "ttl": 64, "win": {65535, 32768}, "scale": {6}, "mss": {1460}, + "name": "iOS/macOS", "conf": 0.87, "bot": False, + }, + # macOS Sonoma+ / iOS 17+ (scale=9, fenêtre plus grande) + { + "ttl": 64, "win": {65535, 32768}, "scale": {9}, "mss": {1460}, + "name": "macOS Sonoma+/iOS 17+", "conf": 0.83, "bot": False, + }, + # macOS derrière VPN (MSS réduit) + { + "ttl": 64, "win": {65535}, "scale": {6, 9}, "mss": {1380, 1400, 1412, 1436}, + "name": "iOS/macOS (VPN)", "conf": 0.80, "bot": False, + }, + + # ══════════════════════════════════════════════════════ + # LINUX (desktop/serveur) + # ══════════════════════════════════════════════════════ + + # Linux 5.x+ — scale=7, win=64240 ou 65535 (kernel ≥ 4.19) + { + "ttl": 64, "win": {64240, 65320}, "scale": {7}, "mss": {1460}, + "name": "Linux 5.x+", "conf": 0.86, "bot": False, + }, + # Linux 4.x / ChromeOS + { + "ttl": 64, "win": {29200, 65535, 43690, 32120}, "scale": {7}, "mss": {1460}, + "name": "Linux 4.x/ChromeOS", "conf": 0.83, "bot": False, + }, + # Linux derrière VPN (MSS réduit) + { + "ttl": 64, "win": {64240, 65535, 42600}, "scale": {7}, "mss": {1380, 1400, 1420, 1436}, + "name": "Linux (VPN)", "conf": 0.80, "bot": False, + }, + # Linux 2.6.x (ancien — win=5840/14600) + { + "ttl": 64, "win": {5840, 14600, 16384}, "scale": {4, 5}, "mss": {1460}, + "name": "Linux 2.6", "conf": 0.78, "bot": False, + }, + + # ══════════════════════════════════════════════════════ + # BSD / ÉQUIPEMENTS RÉSEAU / CDN + # ══════════════════════════════════════════════════════ + + # FreeBSD / OpenBSD (initial TTL=64) + { + "ttl": 64, "win": {65535}, "scale": {6}, "mss": {512, 1460}, + "name": "FreeBSD/OpenBSD", "conf": 0.74, "bot": False, + }, + # Cisco IOS / équipements réseau (initial TTL=255, fenêtre petite) + { + "ttl": 255, "win": {4096, 4128, 8760}, "scale": {0, 1, 2}, "mss": {512, 1460}, + "name": "Cisco/Réseau", "conf": 0.87, "bot": False, + }, + # CDN / Applebot (TTL=255, jumbo MSS, fenêtre élevée) + { + "ttl": 255, "win": {26883, 65535, 59640}, "scale": {7, 8}, "mss": {8373, 8365, 1460}, + "name": "CDN/Applebot (jumbo)", "conf": 0.85, "bot": False, + }, + # BSD/Unix générique (TTL=255) + { + "ttl": 255, "win": None, "scale": {6, 7, 8}, "mss": {1460}, + "name": "BSD/Unix", "conf": 0.68, "bot": False, + }, +] + + +# ─── Data classes ────────────────────────────────────────────────────────────── + +@dataclass +class OsFingerprint: + os_name: str + initial_ttl: int + hop_count: int + confidence: float + is_bot_tool: bool + network_path: str + + +@dataclass +class SpoofResult: + is_spoof: bool + is_bot_tool: bool + reason: str + + +# ─── Fonctions utilitaires ───────────────────────────────────────────────────── + +def _estimate_initial_ttl(observed_ttl: int) -> tuple[int, int]: + """Retourne (initial_ttl, hop_count). + Cherche le TTL standard le plus bas >= observed_ttl. + Rejette les hop counts > 45 (réseau légitimement long = peu probable). + """ + if observed_ttl <= 0: + return 0, -1 + for initial in _INITIAL_TTLS: + if observed_ttl <= initial: + hop = initial - observed_ttl + if hop <= 45: + return initial, hop + return 255, 255 - observed_ttl # TTL > 255 impossible, fallback + + +def _infer_network_path(mss: int) -> str: + """Retourne le type de chemin réseau probable à partir du MSS.""" + if mss <= 0: + return "Inconnu" + for rng, label in _MSS_PATH: + if mss in rng: + return label + return "Inconnu" + + +def _os_family(os_name: str) -> str: + """Réduit un nom OS détaillé à sa famille pour comparaison avec l'UA.""" + n = os_name.lower() + if "windows" in n: + return "Windows" + if "android" in n: + return "Android" + if "ios" in n or "macos" in n or "iphone" in n or "ipad" in n: + return "Apple" + if "linux" in n or "chromeos" in n: + return "Linux" + if "bsd" in n or "cisco" in n or "cdn" in n or "réseau" in n: + return "Network" + if "bot" in n or "scanner" in n or "mirai" in n or "zmap" in n: + return "Bot" + return "Unknown" + + +def _ua_os_family(declared_os: str) -> str: + """Réduit l'OS déclaré (UA) à sa famille.""" + mapping = { + "Windows": "Windows", + "Android": "Android", + "iOS": "Apple", + "macOS": "Apple", + "Linux": "Linux", + "ChromeOS": "Linux", + "BSD": "Network", + } + return mapping.get(declared_os, "Unknown") + + +# ─── Fonctions publiques ─────────────────────────────────────────────────────── + +def declared_os_from_ua(ua: str) -> str: + """Infère l'OS déclaré à partir du User-Agent.""" + ua = ua or "" + ul = ua.lower() + if not ul: + return "Unknown" + if "windows nt" in ul: + return "Windows" + if "android" in ul: + return "Android" + if "iphone" in ul or "ipad" in ul or "cpu iphone" in ul or "cpu ipad" in ul: + return "iOS" + if "mac os x" in ul or "macos" in ul: + return "macOS" + if "cros" in ul or "chromeos" in ul: + return "ChromeOS" + if "linux" in ul: + return "Linux" + if "freebsd" in ul or "openbsd" in ul or "netbsd" in ul: + return "BSD" + return "Unknown" + + +def fingerprint_os(ttl: int, win: int, scale: int, mss: int) -> OsFingerprint: + """Fingerprint OS multi-signal avec scoring pondéré. + + Poids des signaux : + - TTL initial 40 % (discriminant principal : famille OS) + - MSS 30 % (type de réseau ET OS) + - Fenêtre TCP 20 % (version/distrib précise) + - Scale 10 % (affine la version kernel) + """ + initial_ttl, hop_count = _estimate_initial_ttl(ttl) + network_path = _infer_network_path(mss) + + if initial_ttl == 0: + return OsFingerprint( + os_name="Unknown", initial_ttl=0, hop_count=-1, + confidence=0.0, is_bot_tool=False, network_path=network_path, + ) + + best_score: float = -1.0 + best_sig: Optional[dict] = None + + for sig in _SIGNATURES: + # Le TTL est un filtre strict — pas de correspondance, on passe + if sig["ttl"] != initial_ttl: + continue + + score: float = 0.40 # Score de base pour correspondance TTL + + # MSS (poids 0.30) + if sig["mss"] is not None: + score += 0.30 if mss in sig["mss"] else -0.12 + + # Fenêtre (poids 0.20) + if sig["win"] is not None: + score += 0.20 if win in sig["win"] else -0.08 + + # Scale (poids 0.10) + if sig["scale"] is not None: + score += 0.10 if scale in sig["scale"] else -0.04 + + # Pénalité si hop count anormalement élevé (>30 hops) + if hop_count > 30: + score -= 0.05 + + if score > best_score: + best_score = score + best_sig = sig + + if best_sig and best_score >= 0.38: + # Pondérer la confiance finale par le score et le conf de la signature + raw_conf = best_score * best_sig["conf"] + confidence = round(min(max(raw_conf, 0.0), 1.0), 2) + return OsFingerprint( + os_name=best_sig["name"], + initial_ttl=initial_ttl, + hop_count=hop_count, + confidence=confidence, + is_bot_tool=best_sig["bot"], + network_path=network_path, + ) + + # Repli : classification TTL seule (confiance minimale) + fallback = {64: "Linux/macOS", 128: "Windows", 255: "Cisco/BSD"} + return OsFingerprint( + os_name=fallback.get(initial_ttl, "Unknown"), + initial_ttl=initial_ttl, + hop_count=hop_count, + confidence=round(0.40 * 0.65, 2), # confiance faible + is_bot_tool=False, + network_path=network_path, + ) + + +def detect_spoof(fp: OsFingerprint, declared_os: str) -> SpoofResult: + """Détecte les incohérences OS entre TCP et UA. + + Règles : + 1. Outil de scan connu → spoof/bot, quelle que soit l'UA + 2. Confiance < 0.50 → indéterminable + 3. OS incompatibles → spoof confirmé + 4. Cohérent → OK + """ + if fp.is_bot_tool: + return SpoofResult( + is_spoof=True, + is_bot_tool=True, + reason=f"Outil de scan détecté ({fp.os_name})", + ) + + if fp.confidence < 0.50 or fp.os_name == "Unknown" or declared_os == "Unknown": + return SpoofResult( + is_spoof=False, + is_bot_tool=False, + reason="Corrélation insuffisante", + ) + + tcp_family = _os_family(fp.os_name) + ua_family = _ua_os_family(declared_os) + + # Les familles Network/Bot sont toujours suspectes si l'UA prétend être un navigateur + if tcp_family == "Network" and ua_family not in ("Network", "Unknown"): + return SpoofResult( + is_spoof=True, + is_bot_tool=False, + reason=f"Équipement réseau/CDN (TCP) vs {declared_os} (UA)", + ) + + if tcp_family == "Unknown" or ua_family == "Unknown": + return SpoofResult(is_spoof=False, is_bot_tool=False, reason="OS indéterminé") + + # Android passant par un proxy infra (ex: Facebook app → proxy Windows) + # → pas forcément un spoof, noté mais non flaggé + if declared_os == "Android" and tcp_family == "Windows" and "proxy" in fp.os_name.lower(): + return SpoofResult(is_spoof=False, is_bot_tool=False, reason="App mobile via proxy infra") + + if tcp_family != ua_family: + return SpoofResult( + is_spoof=True, + is_bot_tool=False, + reason=f"TCP→{tcp_family} vs UA→{ua_family}", + ) + + return SpoofResult(is_spoof=False, is_bot_tool=False, reason="Cohérent") diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 7af1b1c..82784a6 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -19,6 +19,7 @@ import { BruteForceView } from './components/BruteForceView'; import { TcpSpoofingView } from './components/TcpSpoofingView'; import { HeaderFingerprintView } from './components/HeaderFingerprintView'; import { MLFeaturesView } from './components/MLFeaturesView'; +import ClusteringView from './components/ClusteringView'; import { useTheme } from './ThemeContext'; // ─── Types ──────────────────────────────────────────────────────────────────── @@ -79,6 +80,7 @@ function Sidebar({ counts }: { counts: AlertCounts | null }) { const advancedLinks = [ { path: '/bruteforce', label: 'Brute Force', icon: '🔥', aliases: [] }, { path: '/tcp-spoofing', label: 'TCP Spoofing', icon: '🧬', aliases: [] }, + { path: '/clustering', label: 'Clustering IPs', icon: '🔬', aliases: [] }, { path: '/headers', label: 'Header Fingerprint', icon: '📡', aliases: [] }, { path: '/ml-features', label: 'Features ML', icon: '🤖', aliases: [] }, ]; @@ -237,7 +239,8 @@ function TopHeader({ counts }: { counts: AlertCounts | null }) { if (p.startsWith('/pivot')) return 'Pivot / Corrélation'; if (p.startsWith('/bulk-classify')) return 'Classification en masse'; if (p.startsWith('/bruteforce')) return 'Brute Force & Credential Stuffing'; - if (p.startsWith('/tcp-spoofing')) return 'Spoofing TCP/OS'; + if (p.startsWith('/tcp-spoofing')) return 'Spoofing TCP/OS'; + if (p.startsWith('/clustering')) return 'Clustering IPs'; if (p.startsWith('/headers')) return 'Header Fingerprint Clustering'; if (p.startsWith('/ml-features')) return 'Features ML / Radar'; return ''; @@ -370,6 +373,7 @@ export default function App() { } /> } /> } /> + } /> } /> } /> } /> diff --git a/frontend/src/components/ClusteringView.tsx b/frontend/src/components/ClusteringView.tsx new file mode 100644 index 0000000..d44d3ae --- /dev/null +++ b/frontend/src/components/ClusteringView.tsx @@ -0,0 +1,847 @@ +/** + * Clustering IPs — visualisation multi-métriques + * + * Deux vues : + * 1. "Cartes" (défaut) — grille de cartes triées par risque, toujours lisibles + * 2. "Graphe" — ReactFlow avec nœuds-cartes et disposition par colonne de menace + * + * Chaque cluster affiche : + * • Label + emoji de menace + * • Compteur IPs / hits + * • Score de risque (barre colorée) + * • 4 métriques clés (barres horizontales) + * • Top pays + ASN + * • Radar dans la sidebar + */ +import { useCallback, useEffect, useState, useMemo } from 'react'; +import ReactFlow, { + Background, Controls, MiniMap, ReactFlowProvider, + useNodesState, useEdgesState, useReactFlow, + Node, Edge, Handle, Position, NodeProps, + Panel, +} from 'reactflow'; +import 'reactflow/dist/style.css'; +import { + RadarChart, Radar, PolarGrid, PolarAngleAxis, PolarRadiusAxis, + ResponsiveContainer, Tooltip as RechartsTooltip, +} from 'recharts'; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +interface ClusterNode { + id: string; + label: string; + cluster_idx: number; + x: number; y: number; + radius: number; + color: string; + risk_score: number; + ip_count: number; + hit_count: number; + mean_score: number; + mean_ua_ch: number; + mean_ua_rotating: number; + mean_fuzzing: number; + mean_headless: number; + mean_velocity: number; + mean_ttl: number; + mean_mss: number; + mean_scale: number; + mean_alpn_mismatch: number; + mean_ip_id_zero: number; + mean_browser_score: number; + mean_entropy: number; + mean_ja4_diversity: number; + top_threat: string; + top_countries: string[]; + top_orgs: string[]; + sample_ips: string[]; + sample_ua: string; + radar: { feature: string; value: number }[]; +} + +interface ClusteringData { + nodes: ClusterNode[]; + edges: { id: string; source: string; target: string; similarity: number; weight: number }[]; + stats: { + total_clusters: number; + total_ips: number; + total_hits: number; + bot_ips: number; + high_risk_ips: number; + n_samples: number; + k: number; + elapsed_s: number; + }; +} + +interface ClusterIP { + ip: string; ja4: string; tcp_ttl: number; tcp_mss: number; + hits: number; ua: string; avg_score: number; + threat_level: string; country_code: string; asn_org: string; + fuzzing: number; velocity: number; +} + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +const THREAT_BADGE_CLASS: Record = { + CRITICAL: 'bg-red-600', HIGH: 'bg-orange-500', + MEDIUM: 'bg-yellow-500', LOW: 'bg-green-600', +}; + +const RADAR_FEATURES = [ + 'Score Anomalie', 'Vélocité (rps)', 'Fuzzing', 'Headless', + 'ALPN Mismatch', 'H2 Multiplexing', 'UA-CH Mismatch', 'UA Rotatif', + 'IP-ID Zéro', 'Entropie Temporelle', +]; + +function ThreatBadge({ level }: { level: string }) { + if (!level) return null; + return ( + + {level} + + ); +} + +function MiniBar({ value, color = '#6366f1', label }: { value: number; color?: string; label?: string }) { + const pct = Math.round(Math.min(1, Math.max(0, value)) * 100); + return ( +
+ {label && {label}} +
+
+
+ {pct}% +
+ ); +} + +function riskColor(risk: number): string { + if (risk >= 0.45) return '#dc2626'; + if (risk >= 0.30) return '#f97316'; + if (risk >= 0.15) return '#eab308'; + return '#22c55e'; +} + +function riskLabel(risk: number): string { + if (risk >= 0.45) return 'CRITIQUE'; + if (risk >= 0.30) return 'ÉLEVÉ'; + if (risk >= 0.15) return 'MODÉRÉ'; + return 'SAIN'; +} + +// ─── Carte cluster (réutilisée dans les 2 vues) ──────────────────────────── + +function ClusterCard({ + node, selected, onClick, +}: { + node: ClusterNode; + selected: boolean; + onClick: () => void; +}) { + const rc = riskColor(node.risk_score); + const rl = riskLabel(node.risk_score); + + // Normalisation anomaly_score pour la barre (valeurs ~0.3 max → étirer sur /0.5) + const scoreN = Math.min(1, node.mean_score / 0.5); + + return ( + + ); +} + +// ─── Vue Cartes (défaut) ────────────────────────────────────────────────────── + +function CardGridView({ + nodes, selectedId, onSelect, +}: { + nodes: ClusterNode[]; + selectedId: string | null; + onSelect: (n: ClusterNode) => void; +}) { + const sorted = useMemo( + () => [...nodes].sort((a, b) => b.risk_score - a.risk_score), + [nodes], + ); + + // Groupes par niveau de risque + const groups = useMemo(() => { + const bots = sorted.filter(n => n.risk_score >= 0.45 || n.label.includes('🤖')); + const warn = sorted.filter(n => n.risk_score >= 0.15 && n.risk_score < 0.45 && !n.label.includes('🤖')); + const safe = sorted.filter(n => n.risk_score < 0.15 && !n.label.includes('🤖')); + return { bots, warn, safe }; + }, [sorted]); + + function Group({ title, color, nodes: gn }: { title: string; color: string; nodes: ClusterNode[] }) { + if (gn.length === 0) return null; + return ( +
+
+
+

+ {title} ({gn.length}) +

+
+
+
+ {gn.map(n => ( + onSelect(n)} + /> + ))} +
+
+ ); + } + + return ( +
+ + + +
+ ); +} + +// ─── Nœud ReactFlow (pour la vue Graphe) ───────────────────────────────────── + +function GraphCardNode({ data }: NodeProps) { + const rc = riskColor(data.risk_score); + const rl = riskLabel(data.risk_score); + const scoreN = Math.min(1, data.mean_score / 0.5); + + return ( + <> + +
0.40 ? `0 0 16px ${rc}55` : 'none', + }} + > +
+
+
+

{data.label}

+ + {rl} + +
+

+ {data.ip_count.toLocaleString()} IPs ·{' '} + {data.hit_count.toLocaleString()} req +

+ {/* Barre risque */} +
+
+
+ {/* Mini métriques */} +
+ {[ + ['Anomalie', scoreN, scoreN > 0.5 ? '#dc2626' : '#f97316'], + ['UA-CH', data.mean_ua_ch, '#f97316'], + ['Fuzzing', Math.min(1, data.mean_fuzzing * 3), '#8b5cf6'], + ].map(([l, v, c]: any) => ( +
+ {l} +
+
+
+ {Math.round(v * 100)}% +
+ ))} +
+ {data.top_countries?.length > 0 && ( +

🌍 {data.top_countries.slice(0, 4).join(' · ')}

+ )} +
+
+ + + ); +} + +const nodeTypes = { graphCard: GraphCardNode }; + +// ─── Vue Graphe ─────────────────────────────────────────────────────────────── + +function GraphView({ + data, selectedId, onSelect, +}: { + data: ClusteringData; + selectedId: string | null; + onSelect: (n: ClusterNode) => void; +}) { + const [nodes, setNodes, onNodesChange] = useNodesState([]); + const [edges, setEdges, onEdgesChange] = useEdgesState([]); + const { fitView } = useReactFlow(); + + useEffect(() => { + if (!data) return; + + // Layout en colonnes par niveau de menace + // Col 0 → bots (rouge), Col 1 → suspects (orange), Col 2 → légitimes (vert) + const sorted = [...data.nodes].sort((a, b) => b.risk_score - a.risk_score); + + const col: ClusterNode[][] = [[], [], []]; + for (const n of sorted) { + if (n.risk_score >= 0.45 || n.label.includes('🤖')) col[0].push(n); + else if (n.risk_score >= 0.15) col[1].push(n); + else col[2].push(n); + } + + const NODE_W = 240; + const NODE_H = 170; + const PAD_X = 80; + const PAD_Y = 40; + const COL_GAP = 80; + + const rfNodes: Node[] = []; + col.forEach((group, ci) => { + group.forEach((n, ri) => { + rfNodes.push({ + id: n.id, + type: 'graphCard', + position: { + x: ci * (NODE_W + COL_GAP) + PAD_X, + y: ri * (NODE_H + PAD_Y) + PAD_Y, + }, + data: n, + draggable: true, + selected: n.id === selectedId, + }); + }); + }); + + // Arêtes avec couleur par similarité + const rfEdges: Edge[] = data.edges.map(e => { + const sim = e.similarity; + return { + id: e.id, + source: e.source, + target: e.target, + style: { + stroke: sim > 0.6 ? '#f97316' : sim > 0.4 ? '#6b7280' : '#374151', + strokeWidth: Math.max(1, e.weight * 0.5), + strokeDasharray: sim < 0.4 ? '4 4' : undefined, + }, + label: sim > 0.55 ? `${Math.round(sim * 100)}%` : undefined, + labelStyle: { fontSize: 9, fill: '#9ca3af' }, + labelBgStyle: { fill: '#0f1117aa', borderRadius: 3 }, + animated: sim > 0.6, + }; + }); + + setNodes(rfNodes); + setEdges(rfEdges); + setTimeout(() => fitView({ padding: 0.08 }), 120); + }, [data, selectedId]); + + return ( +
+ onSelect(node.data as ClusterNode)} + nodeTypes={nodeTypes} + fitView + minZoom={0.12} + maxZoom={2.5} + attributionPosition="bottom-right" + > + + + riskColor((n.data as any)?.risk_score ?? 0)} + style={{ background: '#0f1117', border: '1px solid #374151' }} + /> + {/* Légende colonnes */} + +
+ {[ + { color: '#dc2626', label: '🤖 Bots / Menaces', col: 0 }, + { color: '#f97316', label: '⚠️ Suspects', col: 1 }, + { color: '#22c55e', label: '✅ Légitimes', col: 2 }, + ].map(({ color, label }) => ( +
+
+ {label} +
+ ))} + ── similaire · - - différent · animé=fort +
+ + +
+

K-means++ · 21 features

+

Colonnes : niveau de risque

+

Arêtes : similarité des centroides

+
+
+ +
+ ); +} + +// ─── Sidebar détail cluster ──────────────────────────────────────────────────── + +const RADAR_FEATURES_SET = new Set(RADAR_FEATURES); + +function ClusterSidebar({ cluster, onClose }: { cluster: ClusterNode; onClose: () => void }) { + const [ips, setIPs] = useState([]); + const [total, setTotal] = useState(0); + const [loading, setLoading] = useState(false); + const [copied, setCopied] = useState(false); + + useEffect(() => { + setLoading(true); + fetch(`/api/clustering/cluster/${cluster.id}/ips?limit=80`) + .then(r => r.json()) + .then(d => { setIPs(d.ips || []); setTotal(d.total || 0); }) + .catch(() => {}) + .finally(() => setLoading(false)); + }, [cluster.id]); + + const copyIPs = () => { + navigator.clipboard.writeText(ips.map(i => i.ip).join('\n')); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }; + + const downloadCSV = () => { + const header = 'IP,JA4,TTL,MSS,Hits,Score,Menace,Pays,ASN,Fuzzing,Vélocité\n'; + const rows = ips.map(i => + [i.ip, i.ja4, i.tcp_ttl, i.tcp_mss, i.hits, + i.avg_score.toFixed(3), i.threat_level, i.country_code, + `"${i.asn_org}"`, i.fuzzing.toFixed(2), i.velocity.toFixed(2)].join(',') + ).join('\n'); + const blob = new Blob([header + rows], { type: 'text/csv' }); + const a = document.createElement('a'); + a.href = URL.createObjectURL(blob); + a.download = `cluster_${cluster.id}.csv`; + a.click(); + }; + + const rc = riskColor(cluster.risk_score); + const radarData = cluster.radar + .filter(r => RADAR_FEATURES_SET.has(r.feature)) + .map(r => ({ subject: r.feature.replace('Vélocité (rps)', 'Vélocité'), val: Math.round(r.value * 100) })); + + return ( +
+ {/* Header */} +
+
+
+

{cluster.label}

+

+ {cluster.ip_count.toLocaleString()} IPs ·{' '} + {cluster.hit_count.toLocaleString()} requêtes +

+
+ +
+ {/* Risque */} +
+
+ Score de risque + {Math.round(cluster.risk_score * 100)}% — {riskLabel(cluster.risk_score)} +
+
+
+
+
+
+ +
+ {/* Radar */} +
+

Profil Comportemental

+ + + + + + + [`${v}%`]} + /> + + +
+ + {/* Métriques */} +
+

Toutes les métriques

+
+ {[ + ['Score anomalie ML', Math.min(1, cluster.mean_score / 0.5), rc], + ['UA-CH mismatch', cluster.mean_ua_ch, '#f97316'], + ['UA rotatif', cluster.mean_ua_rotating, '#ec4899'], + ['Fuzzing', Math.min(1, cluster.mean_fuzzing * 3), '#8b5cf6'], + ['Headless', cluster.mean_headless, '#dc2626'], + ['Vélocité', cluster.mean_velocity, '#6366f1'], + ['ALPN mismatch', cluster.mean_alpn_mismatch, '#14b8a6'], + ['IP-ID zéro', cluster.mean_ip_id_zero, '#f59e0b'], + ['Entropie temporelle',cluster.mean_entropy, '#06b6d4'], + ['Browser score', Math.min(1, cluster.mean_browser_score / 50), '#22c55e'], + ].map(([lbl, val, col]: any) => ( + + ))} +
+
+ + {/* TCP */} +
+

Stack TCP

+
+ {[ + ['TTL Initial', Math.round(cluster.mean_ttl)], + ['MSS', Math.round(cluster.mean_mss)], + ['Scale', cluster.mean_scale.toFixed(1)], + ].map(([k, v]) => ( +
+

{k}

+

{v}

+
+ ))} +
+
+ + {/* Meta */} +
+ {cluster.top_threat && ( +
+ Menace dominante + +
+ )} + {cluster.top_countries.length > 0 && ( +

Pays : + {cluster.top_countries.join(', ')}

+ )} + {cluster.top_orgs.length > 0 && ( +
+ ASN : + {cluster.top_orgs.slice(0, 3).map((org, i) => ( +

• {org}

+ ))} +
+ )} + {cluster.sample_ua && ( +
+ User-Agent type : +

{cluster.sample_ua}

+
+ )} +
+ + {/* Actions */} +
+ + +
+ + {/* Liste IPs */} +
+

+ Adresses IP ({loading ? '…' : `${ips.length} / ${total.toLocaleString()}`}) +

+ {loading ? ( +

Chargement…

+ ) : ( +
+ {ips.map((ip, i) => ( +
+
+ {ip.ip} +
+ + {ip.country_code && {ip.country_code}} +
+
+
+ TTL {ip.tcp_ttl} + MSS {ip.tcp_mss} + {ip.hits.toLocaleString()} req + {ip.avg_score > 0.1 && ( + ⚠ {(ip.avg_score * 100).toFixed(0)}% + )} + {ip.asn_org && {ip.asn_org}} +
+
+ ))} +
+ )} +
+
+
+ ); +} + +// ─── Vue Graphe (wrapper avec ReactFlowProvider) ─────────────────────────────── + +function GraphViewWrapper({ + data, selectedId, onSelect, +}: { + data: ClusteringData; + selectedId: string | null; + onSelect: (n: ClusterNode) => void; +}) { + return ( + + + + ); +} + +// ─── Composant principal ───────────────────────────────────────────────────── + +export default function ClusteringView() { + const [data, setData] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(''); + const [k, setK] = useState(14); + const [pendingK, setPendingK] = useState(14); + const [view, setView] = useState<'cards' | 'graph'>('cards'); + const [selected, setSelected] = useState(null); + + const fetchData = useCallback(async (kVal: number) => { + setLoading(true); + setError(''); + setSelected(null); + try { + const r = await fetch(`/api/clustering/clusters?k=${kVal}&n_samples=3000`); + if (!r.ok) throw new Error(await r.text()); + setData(await r.json()); + } catch (e: any) { + setError(e.message || 'Erreur réseau'); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { fetchData(k); }, []); + + const applyK = () => { setK(pendingK); fetchData(pendingK); }; + + const stats = data?.stats; + + return ( +
+ {/* ── Barre de contrôle ── */} +
+ {/* Slider k */} +
+ k = + setPendingK(Number(e.target.value))} + className="w-24 accent-indigo-500" /> + {pendingK} + +
+ + {/* Onglets vue */} +
+ {(['cards', 'graph'] as const).map(v => ( + + ))} +
+ + {/* Stats */} + {stats && !loading && ( +
+ + + + + {stats.elapsed_s}s +
+ )} +
+ + {/* ── Erreur ── */} + {error && ( +
+
+

⚠️

+

Erreur de clustering

+

{error}

+ +
+
+ )} + + {/* ── Chargement ── */} + {loading && ( +
+
+
⚙️
+

Calcul K-means++ en cours…

+

Normalisation 21 features · PCA-2D · Nommage automatique

+
+
+ )} + + {/* ── Contenu principal ── */} + {data && !loading && ( +
+ {view === 'cards' ? ( + setSelected(prev => prev?.id === n.id ? null : n)} + /> + ) : ( + setSelected(prev => prev?.id === n.id ? null : n)} + /> + )} + + {/* Sidebar */} + {selected && ( + setSelected(null)} + /> + )} +
+ )} +
+ ); +} + +// ─── Petit composant stat ───────────────────────────────────────────────────── + +function Stat({ label, value, color = 'text-text-primary' }: { label: string; value: string | number; color?: string }) { + return ( + + {value} {label} + + ); +} diff --git a/frontend/src/components/TcpSpoofingView.tsx b/frontend/src/components/TcpSpoofingView.tsx index 16a88d0..2e60076 100644 --- a/frontend/src/components/TcpSpoofingView.tsx +++ b/frontend/src/components/TcpSpoofingView.tsx @@ -9,9 +9,12 @@ interface TcpSpoofingOverview { unique_ips: number; no_tcp_data: number; with_tcp_data: number; - linux_fingerprint: number; + linux_mac_fingerprint: number; windows_fingerprint: number; + cisco_bsd_fingerprint: number; + bot_scanner_fingerprint: number; ttl_distribution: { ttl: number; count: number; ips: number }[]; + mss_distribution: { mss: number; count: number; ips: number }[]; window_size_distribution: { window_size: number; count: number }[]; } @@ -20,10 +23,19 @@ interface TcpSpoofingItem { ja4: string; tcp_ttl: number; tcp_window_size: number; + tcp_win_scale: number; + tcp_mss: number; + hits: number; first_ua: string; suspected_os: string; + initial_ttl: number; + hop_count: number; + confidence: number; + network_path: string; + is_bot_tool: boolean; declared_os: string; spoof_flag: boolean; + spoof_reason: string; } interface OsMatrixEntry { @@ -31,6 +43,7 @@ interface OsMatrixEntry { declared_os: string; count: number; is_spoof: boolean; + is_bot_tool: boolean; } type ActiveTab = 'detections' | 'matrix'; @@ -41,12 +54,49 @@ function formatNumber(n: number): string { return n.toLocaleString('fr-FR'); } -function ttlColor(ttl: number): string { - if (ttl === 0) return 'text-threat-critical'; - if (ttl < 48 || ttl > 200) return 'text-threat-critical'; - if (ttl < 60 || (ttl > 70 && ttl <= 80)) return 'text-threat-medium'; - if (ttl >= 60 && ttl <= 70) return 'text-threat-low'; - return 'text-text-secondary'; +function confidenceBar(conf: number): JSX.Element { + const pct = Math.round(conf * 100); + const color = + pct >= 85 ? 'bg-threat-low' : + pct >= 65 ? 'bg-threat-medium' : + pct >= 45 ? 'bg-accent-primary' : + 'bg-text-disabled'; + return ( +
+
+
+
+ {pct}% +
+ ); +} + +function mssLabel(mss: number): string { + if (mss >= 1460) return 'Ethernet'; + if (mss >= 1452) return 'PPPoE'; + if (mss >= 1420) return 'VPN'; + if (mss >= 1380) return 'VPN/Tunnel'; + if (mss > 0) return 'Bas débit'; + return '—'; +} + +function mssColor(mss: number): string { + if (mss >= 1460) return 'text-threat-low'; + if (mss >= 1436) return 'text-text-secondary'; + if (mss >= 1380) return 'text-threat-medium'; + return 'text-threat-critical'; +} + +function osIcon(name: string): string { + const n = name.toLowerCase(); + if (n.includes('bot') || n.includes('scanner') || n.includes('mirai') || n.includes('zmap')) return '🤖'; + if (n.includes('windows')) return '🪟'; + if (n.includes('ios') || n.includes('macos')) return '🍎'; + if (n.includes('android')) return '🤖'; + if (n.includes('linux')) return '🐧'; + if (n.includes('cisco') || n.includes('cdn') || n.includes('réseau')) return '🔌'; + if (n.includes('bsd')) return '😈'; + return '❓'; } // ─── Sub-components ─────────────────────────────────────────────────────────── @@ -92,50 +142,84 @@ function TcpDetectionsTable({ render: (v: string) => {v}, }, { - key: 'ja4', - label: 'JA4', - render: (v: string) => ( - - {v ? `${v.slice(0, 14)}…` : '—'} + key: 'tcp_ttl', + label: 'TTL obs. / init.', + align: 'right', + render: (_: number, row: TcpSpoofingItem) => ( + + {row.tcp_ttl} + / + {row.initial_ttl} + {row.hop_count >= 0 && ( + ({row.hop_count} hops) + )} ), }, { - key: 'tcp_ttl', - label: 'TTL observé', + key: 'tcp_mss', + label: 'MSS', align: 'right', render: (v: number) => ( - {v} + + {v || '—'} {mssLabel(v)} + ), }, { - key: 'tcp_window_size', - label: 'Fenêtre TCP', + key: 'tcp_win_scale', + label: 'Scale', align: 'right', render: (v: number) => ( - {formatNumber(v)} + {v} ), }, { key: 'suspected_os', - label: 'OS suspecté', - render: (v: string) => {v || '—'}, + label: 'OS suspecté (TCP)', + render: (v: string, row: TcpSpoofingItem) => ( + + {osIcon(v)} + {v || '—'} + + ), + }, + { + key: 'confidence', + label: 'Confiance', + render: (v: number) => confidenceBar(v), + }, + { + key: 'network_path', + label: 'Réseau', + render: (v: string) => {v || '—'}, }, { key: 'declared_os', - label: 'OS déclaré', - render: (v: string) => {v || '—'}, + label: 'OS déclaré (UA)', + render: (v: string) => {v || '—'}, }, { key: 'spoof_flag', - label: 'Spoof', + label: 'Verdict', sortable: false, - render: (v: boolean) => - v ? ( - - 🚨 Spoof - - ) : null, + render: (v: boolean, row: TcpSpoofingItem) => { + if (row.is_bot_tool) { + return ( + + 🤖 Bot/Scanner + + ); + } + if (v) { + return ( + + 🚨 Spoof + + ); + } + return null; + }, }, { key: '_actions', @@ -157,7 +241,7 @@ function TcpDetectionsTable({ data={items} columns={columns} rowKey="ip" - defaultSortKey="tcp_ttl" + defaultSortKey="hits" emptyMessage="Aucune détection" compact /> @@ -248,11 +332,12 @@ export function TcpSpoofingView() { const filteredItems = items.filter( (item) => - (!spoofOnly || item.spoof_flag) && + (!spoofOnly || item.spoof_flag || item.is_bot_tool) && (!filterText || item.ip.includes(filterText) || item.suspected_os.toLowerCase().includes(filterText.toLowerCase()) || - item.declared_os.toLowerCase().includes(filterText.toLowerCase())) + item.declared_os.toLowerCase().includes(filterText.toLowerCase()) || + item.network_path.toLowerCase().includes(filterText.toLowerCase())) ); // Build matrix axes @@ -280,7 +365,7 @@ export function TcpSpoofingView() {

🧬 Spoofing TCP/OS

- Détection des incohérences entre TTL/fenêtre TCP et l'OS déclaré. + Fingerprinting multi-signal (TTL + MSS + fenêtre + scale) — détection bots, spoofs et anomalies TCP.

@@ -292,15 +377,60 @@ export function TcpSpoofingView() { ) : overview ? ( <>
- - - + + + +
+
+ {/* Distribution MSS */} +
+

Distribution MSS (type de réseau)

+
+ {overview.mss_distribution.map((m) => { + const label = m.mss >= 1460 ? 'Ethernet' : m.mss >= 1452 ? 'PPPoE' : m.mss >= 1420 ? 'VPN léger' : m.mss >= 1380 ? 'VPN/Tunnel' : 'Bas débit'; + const color = m.mss >= 1460 ? 'bg-threat-low' : m.mss >= 1436 ? 'bg-accent-primary' : m.mss >= 1380 ? 'bg-threat-medium' : 'bg-threat-critical'; + const maxCount = overview.mss_distribution[0]?.count || 1; + return ( +
+ {m.mss} +
+
+
+ {formatNumber(m.count)} + {label} +
+ ); + })} +
+
+ {/* Distribution TTL */} +
+

Distribution TTL observé

+
+ {overview.ttl_distribution.map((t) => { + const family = t.ttl <= 64 ? 'Linux/Mac' : t.ttl <= 128 ? 'Windows' : 'Cisco/BSD'; + const color = t.ttl <= 64 ? 'bg-threat-low' : t.ttl <= 128 ? 'bg-accent-primary' : 'bg-threat-medium'; + const maxCount = overview.ttl_distribution[0]?.count || 1; + return ( +
+ {t.ttl} +
+
+
+ {formatNumber(t.count)} + {family} +
+ ); + })} +
+
⚠️ - {formatNumber(overview.no_tcp_data)} entrées sans données TCP (TTL=0, passées par proxy/CDN) — exclues de l'analyse de corrélation. + {formatNumber(overview.no_tcp_data)} entrées sans données TCP (passées par proxy/CDN) — exclues.{' '} + {formatNumber(overview.bot_scanner_fingerprint)} entrées avec signature Masscan/scanner identifiée (win=5808, mss=1452, scale=4).
@@ -341,7 +471,7 @@ export function TcpSpoofingView() { onChange={(e) => setSpoofOnly(e.target.checked)} className="accent-accent-primary" /> - Spoofs uniquement (TTL corrélé + OS mismatch) + Spoofs & Bots uniquement (corrélation confirmée)
@@ -398,17 +528,24 @@ export function TcpSpoofingView() { const dos = declaredOSes[ci]; const entry = matrix.find((e) => e.suspected_os === sos && e.declared_os === dos); const isSpoofCell = entry?.is_spoof ?? false; + const isBotCell = entry?.is_bot_tool ?? false; return ( 0 - ? 'bg-threat-critical/25 text-threat-critical font-bold' + isBotCell && count > 0 + ? 'bg-threat-critical/30 text-threat-critical font-bold' + : isSpoofCell && count > 0 + ? 'bg-threat-high/25 text-threat-high font-bold' : matrixCellColor(count) + (count > 0 ? ' text-text-primary' : ' text-text-disabled') }`} - title={isSpoofCell ? '🚨 OS mismatch confirmé' : undefined} + title={isBotCell ? '🤖 Outil de scan/bot' : isSpoofCell ? '🚨 OS mismatch confirmé' : undefined} > - {count > 0 ? (isSpoofCell ? `🚨 ${formatNumber(count)}` : formatNumber(count)) : '—'} + {count > 0 + ? isBotCell ? `🤖 ${formatNumber(count)}` + : isSpoofCell ? `🚨 ${formatNumber(count)}` + : formatNumber(count) + : '—'} ); })}