feat: clustering multi-métriques + TCP fingerprinting amélioré

- TCP fingerprinting: 20 signatures OS (p0f-style), scoring multi-signal TTL/MSS/scale/fenêtre, détection Masscan 97% confiance, réseau path (Ethernet/PPPoE/VPN/Tunnel), estimation hop-count - Clustering IPs: K-means++ (Arthur & Vassilvitskii 2007) sur 21 features TCP stack + anomalie ML + TLS/protocole + navigateur + temporel PCA-2D par puissance itérative (Hotelling) pour positionnement - Visualisation redesign: 2 vues lisibles - Tableau de bord: grille de cartes groupées par niveau de risque (Bots / Suspects / Légitimes), métriques clés + mini-barres - Graphe de relations: ReactFlow avec nœuds-cartes en colonnes par niveau de menace, arêtes colorées par similarité, légende - Sidebar: RadarChart comportemental + toutes métriques + export CSV Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-03-18 18:22:57 +01:00
parent c887846af5
commit e2db8ca84e
9 changed files with 2430 additions and 202 deletions
--- a/backend/services/clustering_engine.py
+++ b/backend/services/clustering_engine.py
@ -0,0 +1,328 @@
+"""
+Moteur de clustering K-means++ multi-métriques (pur Python).
+
+Ref: Arthur & Vassilvitskii (2007) — k-means++: The Advantages of Careful Seeding
+     Hotelling (1933) — PCA par puissance itérative (deflation)
+
+Features (21 dimensions, normalisées [0,1]) :
+  0  ttl_n          : TTL initial normalisé (hops-count estimé)
+  1  mss_n          : MSS normalisé → type réseau (Ethernet/PPPoE/VPN)
+  2  scale_n        : facteur de mise à l'échelle TCP
+  3  win_n          : fenêtre TCP normalisée
+  4  score_n        : score anomalie ML (abs)
+  5  velocity_n     : vélocité de requêtes (log1p)
+  6  fuzzing_n      : index de fuzzing (log1p)
+  7  headless_n     : ratio sessions headless
+  8  post_n         : ratio POST/total
+  9  ip_id_zero_n   : ratio IP-ID=0 (Linux/spoofé)
+  10 entropy_n      : entropie temporelle
+  11 browser_n      : score navigateur moderne (normalisé max 50)
+  12 alpn_n         : mismatch ALPN/protocole
+  13 alpn_absent_n  : ratio ALPN absent
+  14 h2_n           : efficacité H2 multiplexing (log1p)
+  15 hdr_conf_n     : confiance ordre headers
+  16 ua_ch_n        : mismatch User-Agent-Client-Hints
+  17 asset_n        : ratio assets statiques
+  18 direct_n       : ratio accès directs
+  19 ja4_div_n      : diversité JA4 (log1p)
+  20 ua_rot_n       : UA rotatif (booléen)
+"""
+from __future__ import annotations
+import math
+import random
+from dataclasses import dataclass, field
+
+# ─── Définition des features ──────────────────────────────────────────────────
+
+# (clé SQL, nom lisible, fonction de normalisation)
+FEATURES = [
+    # TCP stack
+    ("ttl",          "TTL Initial",           lambda v: min(1.0, (v or 0) / 255.0)),
+    ("mss",          "MSS Réseau",            lambda v: min(1.0, (v or 0) / 1460.0)),
+    ("scale",        "Scale TCP",             lambda v: min(1.0, (v or 0) / 14.0)),
+    ("win",          "Fenêtre TCP",           lambda v: min(1.0, (v or 0) / 65535.0)),
+    # Anomalie ML
+    ("avg_score",    "Score Anomalie",        lambda v: min(1.0, float(v or 0))),
+    ("avg_velocity", "Vélocité (rps)",        lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(100))),
+    ("avg_fuzzing",  "Fuzzing",               lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(300))),
+    ("pct_headless", "Headless",              lambda v: min(1.0, float(v or 0))),
+    ("avg_post",     "Ratio POST",            lambda v: min(1.0, float(v or 0))),
+    # IP-ID
+    ("ip_id_zero",   "IP-ID Zéro",           lambda v: min(1.0, float(v or 0))),
+    # Temporel
+    ("entropy",      "Entropie Temporelle",   lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(10))),
+    # Navigateur
+    ("browser_score","Score Navigateur",      lambda v: min(1.0, float(v or 0) / 50.0)),
+    # TLS / Protocole
+    ("alpn_mismatch","ALPN Mismatch",         lambda v: min(1.0, float(v or 0))),
+    ("alpn_missing", "ALPN Absent",           lambda v: min(1.0, float(v or 0))),
+    ("h2_eff",       "H2 Multiplexing",       lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(20))),
+    ("hdr_conf",     "Ordre Headers",         lambda v: min(1.0, float(v or 0))),
+    ("ua_ch_mismatch","UA-CH Mismatch",       lambda v: min(1.0, float(v or 0))),
+    # Comportement HTTP
+    ("asset_ratio",  "Ratio Assets",          lambda v: min(1.0, float(v or 0))),
+    ("direct_ratio", "Accès Direct",          lambda v: min(1.0, float(v or 0))),
+    # Diversité JA4
+    ("ja4_count",    "Diversité JA4",         lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(30))),
+    # UA rotatif
+    ("ua_rotating",  "UA Rotatif",            lambda v: 1.0 if float(v or 0) > 0 else 0.0),
+]
+
+FEATURE_KEYS   = [f[0] for f in FEATURES]
+FEATURE_NAMES  = [f[1] for f in FEATURES]
+FEATURE_NORMS  = [f[2] for f in FEATURES]
+N_FEATURES     = len(FEATURES)
+
+
+# ─── Utilitaires vectoriels (pur Python) ──────────────────────────────────────
+
+def _dist2(a: list[float], b: list[float]) -> float:
+    return sum((x - y) ** 2 for x, y in zip(a, b))
+
+def _mean_vec(vecs: list[list[float]]) -> list[float]:
+    n = len(vecs)
+    if n == 0:
+        return [0.0] * N_FEATURES
+    return [sum(v[i] for v in vecs) / n for i in range(N_FEATURES)]
+
+
+# ─── Construction du vecteur de features ─────────────────────────────────────
+
+def build_feature_vector(row: dict) -> list[float]:
+    """Normalise un dict de colonnes SQL → vecteur [0,1]^N_FEATURES."""
+    return [fn(row.get(key)) for key, fn in zip(FEATURE_KEYS, FEATURE_NORMS)]
+
+
+# ─── K-means++ ───────────────────────────────────────────────────────────────
+
+@dataclass
+class KMeansResult:
+    centroids: list[list[float]]
+    labels:    list[int]
+    inertia:   float
+    n_iter:    int
+
+
+def kmeans_pp(
+    points:   list[list[float]],
+    k:        int,
+    max_iter: int = 60,
+    seed:     int = 42,
+    n_init:   int = 3,
+) -> KMeansResult:
+    """
+    K-means avec initialisation k-means++ (Arthur & Vassilvitskii, 2007).
+    Lance `n_init` fois et retourne le meilleur résultat (inertie minimale).
+    """
+    rng = random.Random(seed)
+    best: KMeansResult | None = None
+
+    for attempt in range(n_init):
+        # ── Initialisation k-means++ ────────────────────────────────────
+        first_idx = rng.randrange(len(points))
+        centroids = [points[first_idx][:]]
+
+        for _ in range(k - 1):
+            d2 = [min(_dist2(p, c) for c in centroids) for p in points]
+            total = sum(d2)
+            if total == 0:
+                break
+            r = rng.random() * total
+            cumul = 0.0
+            for i, d in enumerate(d2):
+                cumul += d
+                if cumul >= r:
+                    centroids.append(points[i][:])
+                    break
+            else:
+                centroids.append(points[rng.randrange(len(points))][:])
+
+        # ── Itérations EM ───────────────────────────────────────────────
+        labels: list[int] = [0] * len(points)
+        for iteration in range(max_iter):
+            # E-step : affectation
+            new_labels = [
+                min(range(len(centroids)), key=lambda c: _dist2(p, centroids[c]))
+                for p in points
+            ]
+            if new_labels == labels and iteration > 0:
+                break
+            labels = new_labels
+
+            # M-step : mise à jour
+            clusters: list[list[list[float]]] = [[] for _ in range(k)]
+            for i, l in enumerate(labels):
+                clusters[l].append(points[i])
+            for j in range(k):
+                if clusters[j]:
+                    centroids[j] = _mean_vec(clusters[j])
+
+        inertia = sum(_dist2(points[i], centroids[labels[i]]) for i in range(len(points)))
+        result = KMeansResult(
+            centroids=centroids,
+            labels=labels,
+            inertia=inertia,
+            n_iter=iteration + 1,
+        )
+        if best is None or inertia < best.inertia:
+            best = result
+
+    return best  # type: ignore
+
+
+# ─── PCA 2D par puissance itérative ──────────────────────────────────────────
+
+def pca_2d(points: list[list[float]]) -> list[tuple[float, float]]:
+    """
+    Projection PCA 2D par puissance itérative avec déflation (Hotelling).
+    Retourne les coordonnées (pc1, pc2) normalisées dans [0,1].
+    """
+    n = len(points)
+    if n == 0:
+        return []
+
+    # Centrage
+    mean = _mean_vec(points)
+    X = [[p[i] - mean[i] for i in range(N_FEATURES)] for p in points]
+
+    def power_iter(X_centered: list[list[float]], n_iter: int = 30) -> list[float]:
+        """Trouve le premier vecteur propre de X^T X par puissance itérative."""
+        v = [1.0 / math.sqrt(N_FEATURES)] * N_FEATURES
+        for _ in range(n_iter):
+            # Xv = X @ v
+            Xv = [sum(row[j] * v[j] for j in range(N_FEATURES)) for row in X_centered]
+            # Xtxv = X^T @ Xv
+            xtxv = [sum(X_centered[i][j] * Xv[i] for i in range(len(X_centered))) for j in range(N_FEATURES)]
+            norm = math.sqrt(sum(x ** 2 for x in xtxv)) or 1e-10
+            v = [x / norm for x in xtxv]
+        return v
+
+    # PC1
+    v1 = power_iter(X)
+    proj1 = [sum(row[j] * v1[j] for j in range(N_FEATURES)) for row in X]
+
+    # Déflation : retire la composante PC1 de X
+    X2 = [
+        [X[i][j] - proj1[i] * v1[j] for j in range(N_FEATURES)]
+        for i in range(n)
+    ]
+
+    # PC2
+    v2 = power_iter(X2)
+    proj2 = [sum(row[j] * v2[j] for j in range(N_FEATURES)) for row in X2]
+
+    # Normalisation [0,1]
+    def _norm01(vals: list[float]) -> list[float]:
+        lo, hi = min(vals), max(vals)
+        rng = hi - lo or 1e-10
+        return [(v - lo) / rng for v in vals]
+
+    p1 = _norm01(proj1)
+    p2 = _norm01(proj2)
+
+    return list(zip(p1, p2))
+
+
+# ─── Nommage automatique des clusters ────────────────────────────────────────
+
+def name_cluster(centroid: list[float], raw_stats: dict | None = None) -> str:
+    """
+    Génère un nom lisible à partir du centroïde normalisé et de statistiques brutes.
+    Priorité : signaux les plus discriminants en premier.
+    """
+    score  = centroid[4]   # anomalie ML
+    vel    = centroid[5]   # vélocité
+    fuzz   = centroid[6]   # fuzzing (log1p normalisé, >0.35 ≈ fuzzing_index > 100)
+    hless  = centroid[7]   # headless
+    post   = centroid[8]   # POST ratio
+    alpn   = centroid[12]  # ALPN mismatch
+    h2     = centroid[14]  # H2 eff
+    ua_ch  = centroid[16]  # UA-CH mismatch
+    ja4d   = centroid[19]  # JA4 diversité
+    ua_rot = centroid[20]  # UA rotatif
+
+    raw_mss   = (raw_stats or {}).get("mean_mss",   0)
+    raw_ttl   = (raw_stats or {}).get("mean_ttl",   0) or (centroid[0] * 255)
+    raw_scale = (raw_stats or {}).get("mean_scale", 0)
+
+    # ── Signaux forts (déterministes) ────────────────────────────────────
+
+    # Pattern Masscan : mss≈1452, scale≈4, TTL 48-57
+    if raw_mss and 1440 <= raw_mss <= 1460 and raw_scale and 3 <= raw_scale <= 5 and raw_ttl < 60:
+        return "🤖 Masscan / Scanner IP"
+
+    # Fuzzer agressif (fuzzing_index normalisé > 0.35 ≈ valeur brute > 100)
+    if fuzz > 0.35:
+        return "🤖 Bot Fuzzer / Scanner"
+
+    # UA rotatif + UA-CH mismatch : bot sophistiqué simulant un navigateur
+    if ua_rot > 0.5 and ua_ch > 0.7:
+        return "🤖 Bot UA Rotatif + CH Mismatch"
+
+    # UA-CH mismatch fort seul (navigateur simulé sans headers CH)
+    if ua_ch > 0.8:
+        return "⚠️ Bot UA-CH Incohérent"
+
+    # ── Score ML modéré + signal comportemental ──────────────────────────
+
+    if score > 0.20:
+        if hless > 0.3:
+            return "⚠️ Navigateur Headless Suspect"
+        if vel > 0.25:
+            return "⚠️ Bot Haute Vélocité"
+        if post > 0.4:
+            return "⚠️ Bot POST Automatisé"
+        if alpn > 0.5 or h2 > 0.5:
+            return "⚠️ TLS/H2 Anormal"
+        if ua_ch > 0.4:
+            return "⚠️ Anomalie UA-CH"
+        return "⚠️ Anomalie ML Modérée"
+
+    # ── Signaux faibles ───────────────────────────────────────────────────
+
+    if ua_ch > 0.4:
+        return "🔎 UA-CH Incohérent"
+
+    if ja4d > 0.5:
+        return "🔄 Client Multi-Fingerprint"
+
+    # ── Classification réseau / OS ────────────────────────────────────────
+
+    # MSS bas → VPN ou tunnel
+    if raw_mss and raw_mss < 1360:
+        return "🌐 VPN / Tunnel"
+
+    if raw_ttl < 70:
+        return "🐧 Linux / Mobile"
+    if raw_ttl > 110:
+        return "🪟 Windows"
+
+    return "✅ Trafic Légitime"
+
+
+def risk_score_from_centroid(centroid: list[float]) -> float:
+    """Score de risque [0,1] pondéré. Calibré pour les valeurs observées (score ML ~0.3)."""
+    # Normalisation de score ML : x / 0.5 pour étendre la plage utile (0-0.5 → 0-1)
+    score_n  = min(1.0, centroid[4] / 0.5)
+    fuzz_n   = centroid[6]
+    ua_ch_n  = centroid[16]
+    ua_rot_n = centroid[20]
+    vel_n    = centroid[5]
+    hless_n  = centroid[7]
+    ip_id_n  = centroid[9]
+    alpn_n   = centroid[12]
+    ja4d_n   = centroid[19]
+    post_n   = centroid[8]
+
+    return min(1.0,
+        0.25 * score_n +
+        0.20 * ua_ch_n +
+        0.15 * fuzz_n +
+        0.12 * ua_rot_n +
+        0.10 * hless_n +
+        0.07 * vel_n +
+        0.04 * ip_id_n +
+        0.04 * alpn_n +
+        0.03 * ja4d_n +
+        0.03 * post_n
+    )
--- a/backend/services/tcp_fingerprint.py
+++ b/backend/services/tcp_fingerprint.py
@ -0,0 +1,436 @@
+"""
+Service de fingerprinting OS par signature TCP — approche multi-signal inspirée de p0f.
+
+Signaux utilisés (par ordre de poids) :
+  1. TTL initial estimé  (→ famille OS : Linux/Mac=64, Windows=128, Cisco/BSD=255)
+  2. MSS                 (→ type de réseau : Ethernet=1460, PPPoE=1452, VPN=1380-1420)
+  3. Taille de fenêtre   (→ signature OS précise)
+  4. Facteur d'échelle   (→ affine la version du kernel/stack TCP)
+
+Références :
+  - p0f v3 (Michal Zalewski) — passive OS fingerprinting
+  - Nmap OS detection (Gordon Lyon)
+  - "OS Fingerprinting Revisited" (Beverly, 2004)
+  - "Passive OS fingerprinting" (Orebaugh, Ramirez)
+  - Recherche sur Masscan/ZMap : signatures SYN craftées connues
+"""
+
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+
+
+# ─── Constantes ───────────────────────────────────────────────────────────────
+
+_INITIAL_TTLS = (64, 128, 255)
+
+# MSS → type de chemin réseau (MTU - 40 octets d'en-têtes IP+TCP)
+_MSS_PATH: list[tuple[range, str]] = [
+    (range(1461, 9001), "Ethernet/Jumbo"),   # jumbo frames (CDN/datacenter)
+    (range(1460, 1461), "Ethernet directe"),  # MTU 1500 standard
+    (range(1453, 1460), "Ethernet directe"),  # légèrement réduit (padding)
+    (range(1452, 1453), "PPPoE/DSL"),         # MTU 1492
+    (range(1436, 1452), "PPPoE/DSL ajusté"), # variations DSL
+    (range(1420, 1436), "VPN léger"),         # WireGuard / IPsec transport
+    (range(1380, 1420), "VPN/Tunnel"),        # OpenVPN / L2TP
+    (range(1300, 1380), "VPN double ou mobile"),
+    (range(0,    1300), "Lien bas débit / GPRS"),
+]
+
+
+# ─── Base de signatures OS ─────────────────────────────────────────────────────
+#
+# Format : chaque entrée est un dict avec :
+#   ttl   : int            — TTL initial attendu (64 | 128 | 255)
+#   win   : set[int]|None  — tailles de fenêtre attendues (None = ignorer)
+#   scale : set[int]|None  — facteurs d'échelle attendus  (None = ignorer)
+#   mss   : set[int]|None  — valeurs MSS attendues        (None = ignorer)
+#   name  : str            — libellé affiché
+#   conf  : float          — poids de confiance de base (0–1)
+#   bot   : bool           — outil de scan/bot connu
+
+_SIGNATURES: list[dict] = [
+    # ══════════════════════════════════════════════════════
+    # OUTILS DE SCAN ET BOTS CONNUS (priorité maximale)
+    # ══════════════════════════════════════════════════════
+
+    # Masscan / scanner personnalisé avec stack Linux modifiée (PPPoE MSS=1452)
+    # Pattern très présent dans les données : ~111k requêtes, UA spoofé macOS/Windows
+    {
+        "ttl": 64, "win": {5808}, "scale": {4}, "mss": {1452},
+        "name": "Bot-Scanner/Masscan", "conf": 0.97, "bot": True,
+    },
+    # Masscan TTL=255 (mode direct, pas de hop)
+    {
+        "ttl": 255, "win": {1024}, "scale": {0}, "mss": None,
+        "name": "Bot-ZMap/Masscan", "conf": 0.96, "bot": True,
+    },
+    # Mirai variant (petite fenêtre, pas de scale, TTL Linux)
+    {
+        "ttl": 64, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
+        "name": "Bot-Mirai", "conf": 0.92, "bot": True,
+    },
+    # Mirai variant (petite fenêtre Windows)
+    {
+        "ttl": 128, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
+        "name": "Bot-Mirai/Win", "conf": 0.92, "bot": True,
+    },
+    # Scapy / forge manuelle (fenêtre 8192 exactement + TTL 64 + pas de scale)
+    {
+        "ttl": 64, "win": {8192}, "scale": {0}, "mss": {1460},
+        "name": "Bot-Scapy/Forge", "conf": 0.85, "bot": True,
+    },
+    # Nmap SYN scan (window=1024, MSS=1460, TTL=64 ou 128)
+    {
+        "ttl": 64, "win": {1}, "scale": None, "mss": None,
+        "name": "Bot-ZMap", "conf": 0.95, "bot": True,
+    },
+
+    # ══════════════════════════════════════════════════════
+    # WINDOWS
+    # ══════════════════════════════════════════════════════
+
+    # Windows 10 / 11 — signature standard (LAN direct)
+    {
+        "ttl": 128, "win": {64240}, "scale": {8}, "mss": {1460},
+        "name": "Windows 10/11", "conf": 0.93, "bot": False,
+    },
+    # Windows 10/11 — derrière VPN/proxy (MSS réduit)
+    {
+        "ttl": 128, "win": {64240}, "scale": {8}, "mss": {1380, 1400, 1412, 1420, 1440},
+        "name": "Windows 10/11 (VPN)", "conf": 0.90, "bot": False,
+    },
+    # Windows Server 2019/2022 — grande fenêtre
+    {
+        "ttl": 128, "win": {65535, 131072}, "scale": {8, 9}, "mss": {1460},
+        "name": "Windows Server", "conf": 0.88, "bot": False,
+    },
+    # Windows 7/8.1
+    {
+        "ttl": 128, "win": {8192, 65535}, "scale": {4, 8}, "mss": {1460},
+        "name": "Windows 7/8", "conf": 0.83, "bot": False,
+    },
+    # Windows générique (TTL=128, scale=8, tout MSS)
+    {
+        "ttl": 128, "win": None, "scale": {8}, "mss": None,
+        "name": "Windows", "conf": 0.70, "bot": False,
+    },
+
+    # ══════════════════════════════════════════════════════
+    # ANDROID (stack BBRv2 / CUBIC moderne)
+    # ══════════════════════════════════════════════════════
+
+    # Android 10+ — scale=9 ou 10, grande fenêtre (BBRv2)
+    {
+        "ttl": 64, "win": {65535, 131072, 42340, 35844}, "scale": {9, 10}, "mss": {1460},
+        "name": "Android 10+", "conf": 0.82, "bot": False,
+    },
+    # Android via proxy TTL=128 (app Facebook, TikTok etc. passant par infra)
+    {
+        "ttl": 128, "win": {62727, 65535}, "scale": {7}, "mss": {1460},
+        "name": "Android/App (proxy)", "conf": 0.75, "bot": False,
+    },
+    # Android derrière VPN (MSS réduit)
+    {
+        "ttl": 64, "win": {65535, 59640, 63940}, "scale": {8, 9, 10}, "mss": {1380, 1390, 1400, 1418, 1420},
+        "name": "Android (VPN/mobile)", "conf": 0.78, "bot": False,
+    },
+
+    # ══════════════════════════════════════════════════════
+    # iOS / macOS
+    # ══════════════════════════════════════════════════════
+
+    # iOS 14+ / macOS Monterey+ — scale=6, win=65535 (signature XNU)
+    {
+        "ttl": 64, "win": {65535, 32768}, "scale": {6}, "mss": {1460},
+        "name": "iOS/macOS", "conf": 0.87, "bot": False,
+    },
+    # macOS Sonoma+ / iOS 17+ (scale=9, fenêtre plus grande)
+    {
+        "ttl": 64, "win": {65535, 32768}, "scale": {9}, "mss": {1460},
+        "name": "macOS Sonoma+/iOS 17+", "conf": 0.83, "bot": False,
+    },
+    # macOS derrière VPN (MSS réduit)
+    {
+        "ttl": 64, "win": {65535}, "scale": {6, 9}, "mss": {1380, 1400, 1412, 1436},
+        "name": "iOS/macOS (VPN)", "conf": 0.80, "bot": False,
+    },
+
+    # ══════════════════════════════════════════════════════
+    # LINUX (desktop/serveur)
+    # ══════════════════════════════════════════════════════
+
+    # Linux 5.x+ — scale=7, win=64240 ou 65535 (kernel ≥ 4.19)
+    {
+        "ttl": 64, "win": {64240, 65320}, "scale": {7}, "mss": {1460},
+        "name": "Linux 5.x+", "conf": 0.86, "bot": False,
+    },
+    # Linux 4.x / ChromeOS
+    {
+        "ttl": 64, "win": {29200, 65535, 43690, 32120}, "scale": {7}, "mss": {1460},
+        "name": "Linux 4.x/ChromeOS", "conf": 0.83, "bot": False,
+    },
+    # Linux derrière VPN (MSS réduit)
+    {
+        "ttl": 64, "win": {64240, 65535, 42600}, "scale": {7}, "mss": {1380, 1400, 1420, 1436},
+        "name": "Linux (VPN)", "conf": 0.80, "bot": False,
+    },
+    # Linux 2.6.x (ancien — win=5840/14600)
+    {
+        "ttl": 64, "win": {5840, 14600, 16384}, "scale": {4, 5}, "mss": {1460},
+        "name": "Linux 2.6", "conf": 0.78, "bot": False,
+    },
+
+    # ══════════════════════════════════════════════════════
+    # BSD / ÉQUIPEMENTS RÉSEAU / CDN
+    # ══════════════════════════════════════════════════════
+
+    # FreeBSD / OpenBSD (initial TTL=64)
+    {
+        "ttl": 64, "win": {65535}, "scale": {6}, "mss": {512, 1460},
+        "name": "FreeBSD/OpenBSD", "conf": 0.74, "bot": False,
+    },
+    # Cisco IOS / équipements réseau (initial TTL=255, fenêtre petite)
+    {
+        "ttl": 255, "win": {4096, 4128, 8760}, "scale": {0, 1, 2}, "mss": {512, 1460},
+        "name": "Cisco/Réseau", "conf": 0.87, "bot": False,
+    },
+    # CDN / Applebot (TTL=255, jumbo MSS, fenêtre élevée)
+    {
+        "ttl": 255, "win": {26883, 65535, 59640}, "scale": {7, 8}, "mss": {8373, 8365, 1460},
+        "name": "CDN/Applebot (jumbo)", "conf": 0.85, "bot": False,
+    },
+    # BSD/Unix générique (TTL=255)
+    {
+        "ttl": 255, "win": None, "scale": {6, 7, 8}, "mss": {1460},
+        "name": "BSD/Unix", "conf": 0.68, "bot": False,
+    },
+]
+
+
+# ─── Data classes ──────────────────────────────────────────────────────────────
+
+@dataclass
+class OsFingerprint:
+    os_name:      str
+    initial_ttl:  int
+    hop_count:    int
+    confidence:   float
+    is_bot_tool:  bool
+    network_path: str
+
+
+@dataclass
+class SpoofResult:
+    is_spoof:    bool
+    is_bot_tool: bool
+    reason:      str
+
+
+# ─── Fonctions utilitaires ─────────────────────────────────────────────────────
+
+def _estimate_initial_ttl(observed_ttl: int) -> tuple[int, int]:
+    """Retourne (initial_ttl, hop_count).
+    Cherche le TTL standard le plus bas >= observed_ttl.
+    Rejette les hop counts > 45 (réseau légitimement long = peu probable).
+    """
+    if observed_ttl <= 0:
+        return 0, -1
+    for initial in _INITIAL_TTLS:
+        if observed_ttl <= initial:
+            hop = initial - observed_ttl
+            if hop <= 45:
+                return initial, hop
+    return 255, 255 - observed_ttl  # TTL > 255 impossible, fallback
+
+
+def _infer_network_path(mss: int) -> str:
+    """Retourne le type de chemin réseau probable à partir du MSS."""
+    if mss <= 0:
+        return "Inconnu"
+    for rng, label in _MSS_PATH:
+        if mss in rng:
+            return label
+    return "Inconnu"
+
+
+def _os_family(os_name: str) -> str:
+    """Réduit un nom OS détaillé à sa famille pour comparaison avec l'UA."""
+    n = os_name.lower()
+    if "windows" in n:
+        return "Windows"
+    if "android" in n:
+        return "Android"
+    if "ios" in n or "macos" in n or "iphone" in n or "ipad" in n:
+        return "Apple"
+    if "linux" in n or "chromeos" in n:
+        return "Linux"
+    if "bsd" in n or "cisco" in n or "cdn" in n or "réseau" in n:
+        return "Network"
+    if "bot" in n or "scanner" in n or "mirai" in n or "zmap" in n:
+        return "Bot"
+    return "Unknown"
+
+
+def _ua_os_family(declared_os: str) -> str:
+    """Réduit l'OS déclaré (UA) à sa famille."""
+    mapping = {
+        "Windows":      "Windows",
+        "Android":      "Android",
+        "iOS":          "Apple",
+        "macOS":        "Apple",
+        "Linux":        "Linux",
+        "ChromeOS":     "Linux",
+        "BSD":          "Network",
+    }
+    return mapping.get(declared_os, "Unknown")
+
+
+# ─── Fonctions publiques ───────────────────────────────────────────────────────
+
+def declared_os_from_ua(ua: str) -> str:
+    """Infère l'OS déclaré à partir du User-Agent."""
+    ua = ua or ""
+    ul = ua.lower()
+    if not ul:
+        return "Unknown"
+    if "windows nt" in ul:
+        return "Windows"
+    if "android" in ul:
+        return "Android"
+    if "iphone" in ul or "ipad" in ul or "cpu iphone" in ul or "cpu ipad" in ul:
+        return "iOS"
+    if "mac os x" in ul or "macos" in ul:
+        return "macOS"
+    if "cros" in ul or "chromeos" in ul:
+        return "ChromeOS"
+    if "linux" in ul:
+        return "Linux"
+    if "freebsd" in ul or "openbsd" in ul or "netbsd" in ul:
+        return "BSD"
+    return "Unknown"
+
+
+def fingerprint_os(ttl: int, win: int, scale: int, mss: int) -> OsFingerprint:
+    """Fingerprint OS multi-signal avec scoring pondéré.
+
+    Poids des signaux :
+      - TTL initial  40 % (discriminant principal : famille OS)
+      - MSS          30 % (type de réseau ET OS)
+      - Fenêtre TCP  20 % (version/distrib précise)
+      - Scale        10 % (affine la version kernel)
+    """
+    initial_ttl, hop_count = _estimate_initial_ttl(ttl)
+    network_path = _infer_network_path(mss)
+
+    if initial_ttl == 0:
+        return OsFingerprint(
+            os_name="Unknown", initial_ttl=0, hop_count=-1,
+            confidence=0.0, is_bot_tool=False, network_path=network_path,
+        )
+
+    best_score: float = -1.0
+    best_sig: Optional[dict] = None
+
+    for sig in _SIGNATURES:
+        # Le TTL est un filtre strict — pas de correspondance, on passe
+        if sig["ttl"] != initial_ttl:
+            continue
+
+        score: float = 0.40  # Score de base pour correspondance TTL
+
+        # MSS (poids 0.30)
+        if sig["mss"] is not None:
+            score += 0.30 if mss in sig["mss"] else -0.12
+
+        # Fenêtre (poids 0.20)
+        if sig["win"] is not None:
+            score += 0.20 if win in sig["win"] else -0.08
+
+        # Scale (poids 0.10)
+        if sig["scale"] is not None:
+            score += 0.10 if scale in sig["scale"] else -0.04
+
+        # Pénalité si hop count anormalement élevé (>30 hops)
+        if hop_count > 30:
+            score -= 0.05
+
+        if score > best_score:
+            best_score = score
+            best_sig = sig
+
+    if best_sig and best_score >= 0.38:
+        # Pondérer la confiance finale par le score et le conf de la signature
+        raw_conf = best_score * best_sig["conf"]
+        confidence = round(min(max(raw_conf, 0.0), 1.0), 2)
+        return OsFingerprint(
+            os_name=best_sig["name"],
+            initial_ttl=initial_ttl,
+            hop_count=hop_count,
+            confidence=confidence,
+            is_bot_tool=best_sig["bot"],
+            network_path=network_path,
+        )
+
+    # Repli : classification TTL seule (confiance minimale)
+    fallback = {64: "Linux/macOS", 128: "Windows", 255: "Cisco/BSD"}
+    return OsFingerprint(
+        os_name=fallback.get(initial_ttl, "Unknown"),
+        initial_ttl=initial_ttl,
+        hop_count=hop_count,
+        confidence=round(0.40 * 0.65, 2),  # confiance faible
+        is_bot_tool=False,
+        network_path=network_path,
+    )
+
+
+def detect_spoof(fp: OsFingerprint, declared_os: str) -> SpoofResult:
+    """Détecte les incohérences OS entre TCP et UA.
+
+    Règles :
+      1. Outil de scan connu → spoof/bot, quelle que soit l'UA
+      2. Confiance < 0.50 → indéterminable
+      3. OS incompatibles → spoof confirmé
+      4. Cohérent → OK
+    """
+    if fp.is_bot_tool:
+        return SpoofResult(
+            is_spoof=True,
+            is_bot_tool=True,
+            reason=f"Outil de scan détecté ({fp.os_name})",
+        )
+
+    if fp.confidence < 0.50 or fp.os_name == "Unknown" or declared_os == "Unknown":
+        return SpoofResult(
+            is_spoof=False,
+            is_bot_tool=False,
+            reason="Corrélation insuffisante",
+        )
+
+    tcp_family = _os_family(fp.os_name)
+    ua_family  = _ua_os_family(declared_os)
+
+    # Les familles Network/Bot sont toujours suspectes si l'UA prétend être un navigateur
+    if tcp_family == "Network" and ua_family not in ("Network", "Unknown"):
+        return SpoofResult(
+            is_spoof=True,
+            is_bot_tool=False,
+            reason=f"Équipement réseau/CDN (TCP) vs {declared_os} (UA)",
+        )
+
+    if tcp_family == "Unknown" or ua_family == "Unknown":
+        return SpoofResult(is_spoof=False, is_bot_tool=False, reason="OS indéterminé")
+
+    # Android passant par un proxy infra (ex: Facebook app → proxy Windows)
+    # → pas forcément un spoof, noté mais non flaggé
+    if declared_os == "Android" and tcp_family == "Windows" and "proxy" in fp.os_name.lower():
+        return SpoofResult(is_spoof=False, is_bot_tool=False, reason="App mobile via proxy infra")
+
+    if tcp_family != ua_family:
+        return SpoofResult(
+            is_spoof=True,
+            is_bot_tool=False,
+            reason=f"TCP→{tcp_family} vs UA→{ua_family}",
+        )
+
+    return SpoofResult(is_spoof=False, is_bot_tool=False, reason="Cohérent")