Files
dashboard/backend/services/tcp_fingerprint.py
SOC Analyst e2db8ca84e feat: clustering multi-métriques + TCP fingerprinting amélioré
- TCP fingerprinting: 20 signatures OS (p0f-style), scoring multi-signal
  TTL/MSS/scale/fenêtre, détection Masscan 97% confiance, réseau path
  (Ethernet/PPPoE/VPN/Tunnel), estimation hop-count

- Clustering IPs: K-means++ (Arthur & Vassilvitskii 2007) sur 21 features
  TCP stack + anomalie ML + TLS/protocole + navigateur + temporel
  PCA-2D par puissance itérative (Hotelling) pour positionnement

- Visualisation redesign: 2 vues lisibles
  - Tableau de bord: grille de cartes groupées par niveau de risque
    (Bots / Suspects / Légitimes), métriques clés + mini-barres
  - Graphe de relations: ReactFlow avec nœuds-cartes en colonnes
    par niveau de menace, arêtes colorées par similarité, légende
  - Sidebar: RadarChart comportemental + toutes métriques + export CSV

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-03-18 18:22:57 +01:00

437 lines
17 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Service de fingerprinting OS par signature TCP — approche multi-signal inspirée de p0f.
Signaux utilisés (par ordre de poids) :
1. TTL initial estimé (→ famille OS : Linux/Mac=64, Windows=128, Cisco/BSD=255)
2. MSS (→ type de réseau : Ethernet=1460, PPPoE=1452, VPN=1380-1420)
3. Taille de fenêtre (→ signature OS précise)
4. Facteur d'échelle (→ affine la version du kernel/stack TCP)
Références :
- p0f v3 (Michal Zalewski) — passive OS fingerprinting
- Nmap OS detection (Gordon Lyon)
- "OS Fingerprinting Revisited" (Beverly, 2004)
- "Passive OS fingerprinting" (Orebaugh, Ramirez)
- Recherche sur Masscan/ZMap : signatures SYN craftées connues
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
# ─── Constantes ───────────────────────────────────────────────────────────────
_INITIAL_TTLS = (64, 128, 255)
# MSS → type de chemin réseau (MTU - 40 octets d'en-têtes IP+TCP)
_MSS_PATH: list[tuple[range, str]] = [
(range(1461, 9001), "Ethernet/Jumbo"), # jumbo frames (CDN/datacenter)
(range(1460, 1461), "Ethernet directe"), # MTU 1500 standard
(range(1453, 1460), "Ethernet directe"), # légèrement réduit (padding)
(range(1452, 1453), "PPPoE/DSL"), # MTU 1492
(range(1436, 1452), "PPPoE/DSL ajusté"), # variations DSL
(range(1420, 1436), "VPN léger"), # WireGuard / IPsec transport
(range(1380, 1420), "VPN/Tunnel"), # OpenVPN / L2TP
(range(1300, 1380), "VPN double ou mobile"),
(range(0, 1300), "Lien bas débit / GPRS"),
]
# ─── Base de signatures OS ─────────────────────────────────────────────────────
#
# Format : chaque entrée est un dict avec :
# ttl : int — TTL initial attendu (64 | 128 | 255)
# win : set[int]|None — tailles de fenêtre attendues (None = ignorer)
# scale : set[int]|None — facteurs d'échelle attendus (None = ignorer)
# mss : set[int]|None — valeurs MSS attendues (None = ignorer)
# name : str — libellé affiché
# conf : float — poids de confiance de base (01)
# bot : bool — outil de scan/bot connu
_SIGNATURES: list[dict] = [
# ══════════════════════════════════════════════════════
# OUTILS DE SCAN ET BOTS CONNUS (priorité maximale)
# ══════════════════════════════════════════════════════
# Masscan / scanner personnalisé avec stack Linux modifiée (PPPoE MSS=1452)
# Pattern très présent dans les données : ~111k requêtes, UA spoofé macOS/Windows
{
"ttl": 64, "win": {5808}, "scale": {4}, "mss": {1452},
"name": "Bot-Scanner/Masscan", "conf": 0.97, "bot": True,
},
# Masscan TTL=255 (mode direct, pas de hop)
{
"ttl": 255, "win": {1024}, "scale": {0}, "mss": None,
"name": "Bot-ZMap/Masscan", "conf": 0.96, "bot": True,
},
# Mirai variant (petite fenêtre, pas de scale, TTL Linux)
{
"ttl": 64, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
"name": "Bot-Mirai", "conf": 0.92, "bot": True,
},
# Mirai variant (petite fenêtre Windows)
{
"ttl": 128, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
"name": "Bot-Mirai/Win", "conf": 0.92, "bot": True,
},
# Scapy / forge manuelle (fenêtre 8192 exactement + TTL 64 + pas de scale)
{
"ttl": 64, "win": {8192}, "scale": {0}, "mss": {1460},
"name": "Bot-Scapy/Forge", "conf": 0.85, "bot": True,
},
# Nmap SYN scan (window=1024, MSS=1460, TTL=64 ou 128)
{
"ttl": 64, "win": {1}, "scale": None, "mss": None,
"name": "Bot-ZMap", "conf": 0.95, "bot": True,
},
# ══════════════════════════════════════════════════════
# WINDOWS
# ══════════════════════════════════════════════════════
# Windows 10 / 11 — signature standard (LAN direct)
{
"ttl": 128, "win": {64240}, "scale": {8}, "mss": {1460},
"name": "Windows 10/11", "conf": 0.93, "bot": False,
},
# Windows 10/11 — derrière VPN/proxy (MSS réduit)
{
"ttl": 128, "win": {64240}, "scale": {8}, "mss": {1380, 1400, 1412, 1420, 1440},
"name": "Windows 10/11 (VPN)", "conf": 0.90, "bot": False,
},
# Windows Server 2019/2022 — grande fenêtre
{
"ttl": 128, "win": {65535, 131072}, "scale": {8, 9}, "mss": {1460},
"name": "Windows Server", "conf": 0.88, "bot": False,
},
# Windows 7/8.1
{
"ttl": 128, "win": {8192, 65535}, "scale": {4, 8}, "mss": {1460},
"name": "Windows 7/8", "conf": 0.83, "bot": False,
},
# Windows générique (TTL=128, scale=8, tout MSS)
{
"ttl": 128, "win": None, "scale": {8}, "mss": None,
"name": "Windows", "conf": 0.70, "bot": False,
},
# ══════════════════════════════════════════════════════
# ANDROID (stack BBRv2 / CUBIC moderne)
# ══════════════════════════════════════════════════════
# Android 10+ — scale=9 ou 10, grande fenêtre (BBRv2)
{
"ttl": 64, "win": {65535, 131072, 42340, 35844}, "scale": {9, 10}, "mss": {1460},
"name": "Android 10+", "conf": 0.82, "bot": False,
},
# Android via proxy TTL=128 (app Facebook, TikTok etc. passant par infra)
{
"ttl": 128, "win": {62727, 65535}, "scale": {7}, "mss": {1460},
"name": "Android/App (proxy)", "conf": 0.75, "bot": False,
},
# Android derrière VPN (MSS réduit)
{
"ttl": 64, "win": {65535, 59640, 63940}, "scale": {8, 9, 10}, "mss": {1380, 1390, 1400, 1418, 1420},
"name": "Android (VPN/mobile)", "conf": 0.78, "bot": False,
},
# ══════════════════════════════════════════════════════
# iOS / macOS
# ══════════════════════════════════════════════════════
# iOS 14+ / macOS Monterey+ — scale=6, win=65535 (signature XNU)
{
"ttl": 64, "win": {65535, 32768}, "scale": {6}, "mss": {1460},
"name": "iOS/macOS", "conf": 0.87, "bot": False,
},
# macOS Sonoma+ / iOS 17+ (scale=9, fenêtre plus grande)
{
"ttl": 64, "win": {65535, 32768}, "scale": {9}, "mss": {1460},
"name": "macOS Sonoma+/iOS 17+", "conf": 0.83, "bot": False,
},
# macOS derrière VPN (MSS réduit)
{
"ttl": 64, "win": {65535}, "scale": {6, 9}, "mss": {1380, 1400, 1412, 1436},
"name": "iOS/macOS (VPN)", "conf": 0.80, "bot": False,
},
# ══════════════════════════════════════════════════════
# LINUX (desktop/serveur)
# ══════════════════════════════════════════════════════
# Linux 5.x+ — scale=7, win=64240 ou 65535 (kernel ≥ 4.19)
{
"ttl": 64, "win": {64240, 65320}, "scale": {7}, "mss": {1460},
"name": "Linux 5.x+", "conf": 0.86, "bot": False,
},
# Linux 4.x / ChromeOS
{
"ttl": 64, "win": {29200, 65535, 43690, 32120}, "scale": {7}, "mss": {1460},
"name": "Linux 4.x/ChromeOS", "conf": 0.83, "bot": False,
},
# Linux derrière VPN (MSS réduit)
{
"ttl": 64, "win": {64240, 65535, 42600}, "scale": {7}, "mss": {1380, 1400, 1420, 1436},
"name": "Linux (VPN)", "conf": 0.80, "bot": False,
},
# Linux 2.6.x (ancien — win=5840/14600)
{
"ttl": 64, "win": {5840, 14600, 16384}, "scale": {4, 5}, "mss": {1460},
"name": "Linux 2.6", "conf": 0.78, "bot": False,
},
# ══════════════════════════════════════════════════════
# BSD / ÉQUIPEMENTS RÉSEAU / CDN
# ══════════════════════════════════════════════════════
# FreeBSD / OpenBSD (initial TTL=64)
{
"ttl": 64, "win": {65535}, "scale": {6}, "mss": {512, 1460},
"name": "FreeBSD/OpenBSD", "conf": 0.74, "bot": False,
},
# Cisco IOS / équipements réseau (initial TTL=255, fenêtre petite)
{
"ttl": 255, "win": {4096, 4128, 8760}, "scale": {0, 1, 2}, "mss": {512, 1460},
"name": "Cisco/Réseau", "conf": 0.87, "bot": False,
},
# CDN / Applebot (TTL=255, jumbo MSS, fenêtre élevée)
{
"ttl": 255, "win": {26883, 65535, 59640}, "scale": {7, 8}, "mss": {8373, 8365, 1460},
"name": "CDN/Applebot (jumbo)", "conf": 0.85, "bot": False,
},
# BSD/Unix générique (TTL=255)
{
"ttl": 255, "win": None, "scale": {6, 7, 8}, "mss": {1460},
"name": "BSD/Unix", "conf": 0.68, "bot": False,
},
]
# ─── Data classes ──────────────────────────────────────────────────────────────
@dataclass
class OsFingerprint:
os_name: str
initial_ttl: int
hop_count: int
confidence: float
is_bot_tool: bool
network_path: str
@dataclass
class SpoofResult:
is_spoof: bool
is_bot_tool: bool
reason: str
# ─── Fonctions utilitaires ─────────────────────────────────────────────────────
def _estimate_initial_ttl(observed_ttl: int) -> tuple[int, int]:
"""Retourne (initial_ttl, hop_count).
Cherche le TTL standard le plus bas >= observed_ttl.
Rejette les hop counts > 45 (réseau légitimement long = peu probable).
"""
if observed_ttl <= 0:
return 0, -1
for initial in _INITIAL_TTLS:
if observed_ttl <= initial:
hop = initial - observed_ttl
if hop <= 45:
return initial, hop
return 255, 255 - observed_ttl # TTL > 255 impossible, fallback
def _infer_network_path(mss: int) -> str:
"""Retourne le type de chemin réseau probable à partir du MSS."""
if mss <= 0:
return "Inconnu"
for rng, label in _MSS_PATH:
if mss in rng:
return label
return "Inconnu"
def _os_family(os_name: str) -> str:
"""Réduit un nom OS détaillé à sa famille pour comparaison avec l'UA."""
n = os_name.lower()
if "windows" in n:
return "Windows"
if "android" in n:
return "Android"
if "ios" in n or "macos" in n or "iphone" in n or "ipad" in n:
return "Apple"
if "linux" in n or "chromeos" in n:
return "Linux"
if "bsd" in n or "cisco" in n or "cdn" in n or "réseau" in n:
return "Network"
if "bot" in n or "scanner" in n or "mirai" in n or "zmap" in n:
return "Bot"
return "Unknown"
def _ua_os_family(declared_os: str) -> str:
"""Réduit l'OS déclaré (UA) à sa famille."""
mapping = {
"Windows": "Windows",
"Android": "Android",
"iOS": "Apple",
"macOS": "Apple",
"Linux": "Linux",
"ChromeOS": "Linux",
"BSD": "Network",
}
return mapping.get(declared_os, "Unknown")
# ─── Fonctions publiques ───────────────────────────────────────────────────────
def declared_os_from_ua(ua: str) -> str:
"""Infère l'OS déclaré à partir du User-Agent."""
ua = ua or ""
ul = ua.lower()
if not ul:
return "Unknown"
if "windows nt" in ul:
return "Windows"
if "android" in ul:
return "Android"
if "iphone" in ul or "ipad" in ul or "cpu iphone" in ul or "cpu ipad" in ul:
return "iOS"
if "mac os x" in ul or "macos" in ul:
return "macOS"
if "cros" in ul or "chromeos" in ul:
return "ChromeOS"
if "linux" in ul:
return "Linux"
if "freebsd" in ul or "openbsd" in ul or "netbsd" in ul:
return "BSD"
return "Unknown"
def fingerprint_os(ttl: int, win: int, scale: int, mss: int) -> OsFingerprint:
"""Fingerprint OS multi-signal avec scoring pondéré.
Poids des signaux :
- TTL initial 40 % (discriminant principal : famille OS)
- MSS 30 % (type de réseau ET OS)
- Fenêtre TCP 20 % (version/distrib précise)
- Scale 10 % (affine la version kernel)
"""
initial_ttl, hop_count = _estimate_initial_ttl(ttl)
network_path = _infer_network_path(mss)
if initial_ttl == 0:
return OsFingerprint(
os_name="Unknown", initial_ttl=0, hop_count=-1,
confidence=0.0, is_bot_tool=False, network_path=network_path,
)
best_score: float = -1.0
best_sig: Optional[dict] = None
for sig in _SIGNATURES:
# Le TTL est un filtre strict — pas de correspondance, on passe
if sig["ttl"] != initial_ttl:
continue
score: float = 0.40 # Score de base pour correspondance TTL
# MSS (poids 0.30)
if sig["mss"] is not None:
score += 0.30 if mss in sig["mss"] else -0.12
# Fenêtre (poids 0.20)
if sig["win"] is not None:
score += 0.20 if win in sig["win"] else -0.08
# Scale (poids 0.10)
if sig["scale"] is not None:
score += 0.10 if scale in sig["scale"] else -0.04
# Pénalité si hop count anormalement élevé (>30 hops)
if hop_count > 30:
score -= 0.05
if score > best_score:
best_score = score
best_sig = sig
if best_sig and best_score >= 0.38:
# Pondérer la confiance finale par le score et le conf de la signature
raw_conf = best_score * best_sig["conf"]
confidence = round(min(max(raw_conf, 0.0), 1.0), 2)
return OsFingerprint(
os_name=best_sig["name"],
initial_ttl=initial_ttl,
hop_count=hop_count,
confidence=confidence,
is_bot_tool=best_sig["bot"],
network_path=network_path,
)
# Repli : classification TTL seule (confiance minimale)
fallback = {64: "Linux/macOS", 128: "Windows", 255: "Cisco/BSD"}
return OsFingerprint(
os_name=fallback.get(initial_ttl, "Unknown"),
initial_ttl=initial_ttl,
hop_count=hop_count,
confidence=round(0.40 * 0.65, 2), # confiance faible
is_bot_tool=False,
network_path=network_path,
)
def detect_spoof(fp: OsFingerprint, declared_os: str) -> SpoofResult:
"""Détecte les incohérences OS entre TCP et UA.
Règles :
1. Outil de scan connu → spoof/bot, quelle que soit l'UA
2. Confiance < 0.50 → indéterminable
3. OS incompatibles → spoof confirmé
4. Cohérent → OK
"""
if fp.is_bot_tool:
return SpoofResult(
is_spoof=True,
is_bot_tool=True,
reason=f"Outil de scan détecté ({fp.os_name})",
)
if fp.confidence < 0.50 or fp.os_name == "Unknown" or declared_os == "Unknown":
return SpoofResult(
is_spoof=False,
is_bot_tool=False,
reason="Corrélation insuffisante",
)
tcp_family = _os_family(fp.os_name)
ua_family = _ua_os_family(declared_os)
# Les familles Network/Bot sont toujours suspectes si l'UA prétend être un navigateur
if tcp_family == "Network" and ua_family not in ("Network", "Unknown"):
return SpoofResult(
is_spoof=True,
is_bot_tool=False,
reason=f"Équipement réseau/CDN (TCP) vs {declared_os} (UA)",
)
if tcp_family == "Unknown" or ua_family == "Unknown":
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="OS indéterminé")
# Android passant par un proxy infra (ex: Facebook app → proxy Windows)
# → pas forcément un spoof, noté mais non flaggé
if declared_os == "Android" and tcp_family == "Windows" and "proxy" in fp.os_name.lower():
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="App mobile via proxy infra")
if tcp_family != ua_family:
return SpoofResult(
is_spoof=True,
is_bot_tool=False,
reason=f"TCP→{tcp_family} vs UA→{ua_family}",
)
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="Cohérent")