""" Service de fingerprinting OS par signature TCP — approche multi-signal inspirée de p0f. Signaux utilisés (par ordre de poids) : 1. TTL initial estimé (→ famille OS : Linux/Mac=64, Windows=128, Cisco/BSD=255) 2. MSS (→ type de réseau : Ethernet=1460, PPPoE=1452, VPN=1380-1420) 3. Taille de fenêtre (→ signature OS précise) 4. Facteur d'échelle (→ affine la version du kernel/stack TCP) Références : - p0f v3 (Michal Zalewski) — passive OS fingerprinting - Nmap OS detection (Gordon Lyon) - "OS Fingerprinting Revisited" (Beverly, 2004) - "Passive OS fingerprinting" (Orebaugh, Ramirez) - Recherche sur Masscan/ZMap : signatures SYN craftées connues """ from __future__ import annotations from dataclasses import dataclass from typing import Optional # ─── Constantes ─────────────────────────────────────────────────────────────── _INITIAL_TTLS = (64, 128, 255) # MSS → type de chemin réseau (MTU - 40 octets d'en-têtes IP+TCP) _MSS_PATH: list[tuple[range, str]] = [ (range(1461, 9001), "Ethernet/Jumbo"), # jumbo frames (CDN/datacenter) (range(1460, 1461), "Ethernet directe"), # MTU 1500 standard (range(1453, 1460), "Ethernet directe"), # légèrement réduit (padding) (range(1452, 1453), "PPPoE/DSL"), # MTU 1492 (range(1436, 1452), "PPPoE/DSL ajusté"), # variations DSL (range(1420, 1436), "VPN léger"), # WireGuard / IPsec transport (range(1380, 1420), "VPN/Tunnel"), # OpenVPN / L2TP (range(1300, 1380), "VPN double ou mobile"), (range(0, 1300), "Lien bas débit / GPRS"), ] # ─── Base de signatures OS ───────────────────────────────────────────────────── # # Format : chaque entrée est un dict avec : # ttl : int — TTL initial attendu (64 | 128 | 255) # win : set[int]|None — tailles de fenêtre attendues (None = ignorer) # scale : set[int]|None — facteurs d'échelle attendus (None = ignorer) # mss : set[int]|None — valeurs MSS attendues (None = ignorer) # name : str — libellé affiché # conf : float — poids de confiance de base (0–1) # bot : bool — outil de scan/bot connu _SIGNATURES: list[dict] = [ # ══════════════════════════════════════════════════════ # OUTILS DE SCAN ET BOTS CONNUS (priorité maximale) # ══════════════════════════════════════════════════════ # Masscan / scanner personnalisé avec stack Linux modifiée (PPPoE MSS=1452) # Pattern très présent dans les données : ~111k requêtes, UA spoofé macOS/Windows { "ttl": 64, "win": {5808}, "scale": {4}, "mss": {1452}, "name": "Bot-Scanner/Masscan", "conf": 0.97, "bot": True, }, # Masscan TTL=255 (mode direct, pas de hop) { "ttl": 255, "win": {1024}, "scale": {0}, "mss": None, "name": "Bot-ZMap/Masscan", "conf": 0.96, "bot": True, }, # Mirai variant (petite fenêtre, pas de scale, TTL Linux) { "ttl": 64, "win": {1024, 2048}, "scale": {0}, "mss": {1460}, "name": "Bot-Mirai", "conf": 0.92, "bot": True, }, # Mirai variant (petite fenêtre Windows) { "ttl": 128, "win": {1024, 2048}, "scale": {0}, "mss": {1460}, "name": "Bot-Mirai/Win", "conf": 0.92, "bot": True, }, # Scapy / forge manuelle (fenêtre 8192 exactement + TTL 64 + pas de scale) { "ttl": 64, "win": {8192}, "scale": {0}, "mss": {1460}, "name": "Bot-Scapy/Forge", "conf": 0.85, "bot": True, }, # Nmap SYN scan (window=1024, MSS=1460, TTL=64 ou 128) { "ttl": 64, "win": {1}, "scale": None, "mss": None, "name": "Bot-ZMap", "conf": 0.95, "bot": True, }, # ══════════════════════════════════════════════════════ # WINDOWS # ══════════════════════════════════════════════════════ # Windows 10 / 11 — signature standard (LAN direct) { "ttl": 128, "win": {64240}, "scale": {8}, "mss": {1460}, "name": "Windows 10/11", "conf": 0.93, "bot": False, }, # Windows 10/11 — derrière VPN/proxy (MSS réduit) { "ttl": 128, "win": {64240}, "scale": {8}, "mss": {1380, 1400, 1412, 1420, 1440}, "name": "Windows 10/11 (VPN)", "conf": 0.90, "bot": False, }, # Windows Server 2019/2022 — grande fenêtre { "ttl": 128, "win": {65535, 131072}, "scale": {8, 9}, "mss": {1460}, "name": "Windows Server", "conf": 0.88, "bot": False, }, # Windows 7/8.1 { "ttl": 128, "win": {8192, 65535}, "scale": {4, 8}, "mss": {1460}, "name": "Windows 7/8", "conf": 0.83, "bot": False, }, # Windows générique (TTL=128, scale=8, tout MSS) { "ttl": 128, "win": None, "scale": {8}, "mss": None, "name": "Windows", "conf": 0.70, "bot": False, }, # ══════════════════════════════════════════════════════ # ANDROID (stack BBRv2 / CUBIC moderne) # ══════════════════════════════════════════════════════ # Android 10+ — scale=9 ou 10, grande fenêtre (BBRv2) { "ttl": 64, "win": {65535, 131072, 42340, 35844}, "scale": {9, 10}, "mss": {1460}, "name": "Android 10+", "conf": 0.82, "bot": False, }, # Android via proxy TTL=128 (app Facebook, TikTok etc. passant par infra) { "ttl": 128, "win": {62727, 65535}, "scale": {7}, "mss": {1460}, "name": "Android/App (proxy)", "conf": 0.75, "bot": False, }, # Android derrière VPN (MSS réduit) { "ttl": 64, "win": {65535, 59640, 63940}, "scale": {8, 9, 10}, "mss": {1380, 1390, 1400, 1418, 1420}, "name": "Android (VPN/mobile)", "conf": 0.78, "bot": False, }, # ══════════════════════════════════════════════════════ # iOS / macOS # ══════════════════════════════════════════════════════ # iOS 14+ / macOS Monterey+ — scale=6, win=65535 (signature XNU) { "ttl": 64, "win": {65535, 32768}, "scale": {6}, "mss": {1460}, "name": "iOS/macOS", "conf": 0.87, "bot": False, }, # macOS Sonoma+ / iOS 17+ (scale=9, fenêtre plus grande) { "ttl": 64, "win": {65535, 32768}, "scale": {9}, "mss": {1460}, "name": "macOS Sonoma+/iOS 17+", "conf": 0.83, "bot": False, }, # macOS derrière VPN (MSS réduit) { "ttl": 64, "win": {65535}, "scale": {6, 9}, "mss": {1380, 1400, 1412, 1436}, "name": "iOS/macOS (VPN)", "conf": 0.80, "bot": False, }, # ══════════════════════════════════════════════════════ # LINUX (desktop/serveur) # ══════════════════════════════════════════════════════ # Linux 5.x+ — scale=7, win=64240 ou 65535 (kernel ≥ 4.19) { "ttl": 64, "win": {64240, 65320}, "scale": {7}, "mss": {1460}, "name": "Linux 5.x+", "conf": 0.86, "bot": False, }, # Linux 4.x / ChromeOS { "ttl": 64, "win": {29200, 65535, 43690, 32120}, "scale": {7}, "mss": {1460}, "name": "Linux 4.x/ChromeOS", "conf": 0.83, "bot": False, }, # Linux derrière VPN (MSS réduit) { "ttl": 64, "win": {64240, 65535, 42600}, "scale": {7}, "mss": {1380, 1400, 1420, 1436}, "name": "Linux (VPN)", "conf": 0.80, "bot": False, }, # Linux 2.6.x (ancien — win=5840/14600) { "ttl": 64, "win": {5840, 14600, 16384}, "scale": {4, 5}, "mss": {1460}, "name": "Linux 2.6", "conf": 0.78, "bot": False, }, # ══════════════════════════════════════════════════════ # BSD / ÉQUIPEMENTS RÉSEAU / CDN # ══════════════════════════════════════════════════════ # FreeBSD / OpenBSD (initial TTL=64) { "ttl": 64, "win": {65535}, "scale": {6}, "mss": {512, 1460}, "name": "FreeBSD/OpenBSD", "conf": 0.74, "bot": False, }, # Cisco IOS / équipements réseau (initial TTL=255, fenêtre petite) { "ttl": 255, "win": {4096, 4128, 8760}, "scale": {0, 1, 2}, "mss": {512, 1460}, "name": "Cisco/Réseau", "conf": 0.87, "bot": False, }, # CDN / Applebot (TTL=255, jumbo MSS, fenêtre élevée) { "ttl": 255, "win": {26883, 65535, 59640}, "scale": {7, 8}, "mss": {8373, 8365, 1460}, "name": "CDN/Applebot (jumbo)", "conf": 0.85, "bot": False, }, # BSD/Unix générique (TTL=255) { "ttl": 255, "win": None, "scale": {6, 7, 8}, "mss": {1460}, "name": "BSD/Unix", "conf": 0.68, "bot": False, }, ] # ─── Data classes ────────────────────────────────────────────────────────────── @dataclass class OsFingerprint: os_name: str initial_ttl: int hop_count: int confidence: float is_bot_tool: bool network_path: str @dataclass class SpoofResult: is_spoof: bool is_bot_tool: bool reason: str # ─── Fonctions utilitaires ───────────────────────────────────────────────────── def _estimate_initial_ttl(observed_ttl: int) -> tuple[int, int]: """Retourne (initial_ttl, hop_count). Cherche le TTL standard le plus bas >= observed_ttl. Rejette les hop counts > 45 (réseau légitimement long = peu probable). """ if observed_ttl <= 0: return 0, -1 for initial in _INITIAL_TTLS: if observed_ttl <= initial: hop = initial - observed_ttl if hop <= 45: return initial, hop return 255, 255 - observed_ttl # TTL > 255 impossible, fallback def _infer_network_path(mss: int) -> str: """Retourne le type de chemin réseau probable à partir du MSS.""" if mss <= 0: return "Inconnu" for rng, label in _MSS_PATH: if mss in rng: return label return "Inconnu" def _os_family(os_name: str) -> str: """Réduit un nom OS détaillé à sa famille pour comparaison avec l'UA.""" n = os_name.lower() if "windows" in n: return "Windows" if "android" in n: return "Android" if "ios" in n or "macos" in n or "iphone" in n or "ipad" in n: return "Apple" if "linux" in n or "chromeos" in n: return "Linux" if "bsd" in n or "cisco" in n or "cdn" in n or "réseau" in n: return "Network" if "bot" in n or "scanner" in n or "mirai" in n or "zmap" in n: return "Bot" return "Unknown" def _ua_os_family(declared_os: str) -> str: """Réduit l'OS déclaré (UA) à sa famille.""" mapping = { "Windows": "Windows", "Android": "Android", "iOS": "Apple", "macOS": "Apple", "Linux": "Linux", "ChromeOS": "Linux", "BSD": "Network", } return mapping.get(declared_os, "Unknown") # ─── Fonctions publiques ─────────────────────────────────────────────────────── def declared_os_from_ua(ua: str) -> str: """Infère l'OS déclaré à partir du User-Agent.""" ua = ua or "" ul = ua.lower() if not ul: return "Unknown" if "windows nt" in ul: return "Windows" if "android" in ul: return "Android" if "iphone" in ul or "ipad" in ul or "cpu iphone" in ul or "cpu ipad" in ul: return "iOS" if "mac os x" in ul or "macos" in ul: return "macOS" if "cros" in ul or "chromeos" in ul: return "ChromeOS" if "linux" in ul: return "Linux" if "freebsd" in ul or "openbsd" in ul or "netbsd" in ul: return "BSD" return "Unknown" def fingerprint_os(ttl: int, win: int, scale: int, mss: int) -> OsFingerprint: """Fingerprint OS multi-signal avec scoring pondéré. Poids des signaux : - TTL initial 40 % (discriminant principal : famille OS) - MSS 30 % (type de réseau ET OS) - Fenêtre TCP 20 % (version/distrib précise) - Scale 10 % (affine la version kernel) """ initial_ttl, hop_count = _estimate_initial_ttl(ttl) network_path = _infer_network_path(mss) if initial_ttl == 0: return OsFingerprint( os_name="Unknown", initial_ttl=0, hop_count=-1, confidence=0.0, is_bot_tool=False, network_path=network_path, ) best_score: float = -1.0 best_sig: Optional[dict] = None for sig in _SIGNATURES: # Le TTL est un filtre strict — pas de correspondance, on passe if sig["ttl"] != initial_ttl: continue score: float = 0.40 # Score de base pour correspondance TTL # MSS (poids 0.30) if sig["mss"] is not None: score += 0.30 if mss in sig["mss"] else -0.12 # Fenêtre (poids 0.20) if sig["win"] is not None: score += 0.20 if win in sig["win"] else -0.08 # Scale (poids 0.10) if sig["scale"] is not None: score += 0.10 if scale in sig["scale"] else -0.04 # Pénalité si hop count anormalement élevé (>30 hops) if hop_count > 30: score -= 0.05 if score > best_score: best_score = score best_sig = sig if best_sig and best_score >= 0.38: # Pondérer la confiance finale par le score et le conf de la signature raw_conf = best_score * best_sig["conf"] confidence = round(min(max(raw_conf, 0.0), 1.0), 2) return OsFingerprint( os_name=best_sig["name"], initial_ttl=initial_ttl, hop_count=hop_count, confidence=confidence, is_bot_tool=best_sig["bot"], network_path=network_path, ) # Repli : classification TTL seule (confiance minimale) fallback = {64: "Linux/macOS", 128: "Windows", 255: "Cisco/BSD"} return OsFingerprint( os_name=fallback.get(initial_ttl, "Unknown"), initial_ttl=initial_ttl, hop_count=hop_count, confidence=round(0.40 * 0.65, 2), # confiance faible is_bot_tool=False, network_path=network_path, ) def detect_spoof(fp: OsFingerprint, declared_os: str) -> SpoofResult: """Détecte les incohérences OS entre TCP et UA. Règles : 1. Outil de scan connu → spoof/bot, quelle que soit l'UA 2. Confiance < 0.50 → indéterminable 3. OS incompatibles → spoof confirmé 4. Cohérent → OK """ if fp.is_bot_tool: return SpoofResult( is_spoof=True, is_bot_tool=True, reason=f"Outil de scan détecté ({fp.os_name})", ) if fp.confidence < 0.50 or fp.os_name == "Unknown" or declared_os == "Unknown": return SpoofResult( is_spoof=False, is_bot_tool=False, reason="Corrélation insuffisante", ) tcp_family = _os_family(fp.os_name) ua_family = _ua_os_family(declared_os) # Les familles Network/Bot sont toujours suspectes si l'UA prétend être un navigateur if tcp_family == "Network" and ua_family not in ("Network", "Unknown"): return SpoofResult( is_spoof=True, is_bot_tool=False, reason=f"Équipement réseau/CDN (TCP) vs {declared_os} (UA)", ) if tcp_family == "Unknown" or ua_family == "Unknown": return SpoofResult(is_spoof=False, is_bot_tool=False, reason="OS indéterminé") # Android passant par un proxy infra (ex: Facebook app → proxy Windows) # → pas forcément un spoof, noté mais non flaggé if declared_os == "Android" and tcp_family == "Windows" and "proxy" in fp.os_name.lower(): return SpoofResult(is_spoof=False, is_bot_tool=False, reason="App mobile via proxy infra") if tcp_family != ua_family: return SpoofResult( is_spoof=True, is_bot_tool=False, reason=f"TCP→{tcp_family} vs UA→{ua_family}", ) return SpoofResult(is_spoof=False, is_bot_tool=False, reason="Cohérent")