feat: clustering multi-métriques + TCP fingerprinting amélioré

- TCP fingerprinting: 20 signatures OS (p0f-style), scoring multi-signal
  TTL/MSS/scale/fenêtre, détection Masscan 97% confiance, réseau path
  (Ethernet/PPPoE/VPN/Tunnel), estimation hop-count

- Clustering IPs: K-means++ (Arthur & Vassilvitskii 2007) sur 21 features
  TCP stack + anomalie ML + TLS/protocole + navigateur + temporel
  PCA-2D par puissance itérative (Hotelling) pour positionnement

- Visualisation redesign: 2 vues lisibles
  - Tableau de bord: grille de cartes groupées par niveau de risque
    (Bots / Suspects / Légitimes), métriques clés + mini-barres
  - Graphe de relations: ReactFlow avec nœuds-cartes en colonnes
    par niveau de menace, arêtes colorées par similarité, légende
  - Sidebar: RadarChart comportemental + toutes métriques + export CSV

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
SOC Analyst
2026-03-18 18:22:57 +01:00
parent c887846af5
commit e2db8ca84e
9 changed files with 2430 additions and 202 deletions

View File

@ -7,6 +7,7 @@ agg_host_ip_ja4_1h (rotation JA4), view_ip_recurrence, view_ai_features_1h.
from fastapi import APIRouter, HTTPException
from ..database import db
from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua
router = APIRouter(prefix="/api/investigation", tags=["investigation"])
@ -62,32 +63,45 @@ async def get_ip_full_summary(ip: str):
"top_hosts": [str(h) for h in (bf_row[3] or [])] if bf_row else [],
}
# ── 3. TCP spoofing ────────────────────────────────────────────────────
# ── 3. TCP spoofing — fingerprinting multi-signal ─────────────────────
tcp_sql = """
SELECT tcp_ttl, first_ua
FROM mabase_prod.view_tcp_spoofing_detected
SELECT
any(tcp_ttl_raw) AS ttl,
any(tcp_win_raw) AS win,
any(tcp_scale_raw) AS scale,
any(tcp_mss_raw) AS mss,
any(first_ua) AS ua
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
AND tcp_ttl > 0
AND window_start >= now() - INTERVAL 24 HOUR
AND tcp_ttl_raw > 0
LIMIT 1
"""
tcp_res = db.query(tcp_sql, {"ip": clean_ip})
tcp_data = {"detected": False, "tcp_ttl": None, "suspected_os": None}
if tcp_res.result_rows:
ttl = int(tcp_res.result_rows[0][0])
if 52 <= ttl <= 65:
sus_os = "Linux/Mac"
elif 110 <= ttl <= 135:
sus_os = "Windows"
else:
sus_os = "Unknown"
ua = str(tcp_res.result_rows[0][1] or "")
dec_os = "Windows" if "Windows" in ua else ("macOS" if "Mac OS X" in ua else "Linux/Android" if "Linux" in ua else "Unknown")
spoof = sus_os != "Unknown" and dec_os != "Unknown" and sus_os != dec_os
r = tcp_res.result_rows[0]
ttl = int(r[0] or 0)
win = int(r[1] or 0)
scale = int(r[2] or 0)
mss = int(r[3] or 0)
ua = str(r[4] or "")
fp = fingerprint_os(ttl, win, scale, mss)
dec_os = declared_os_from_ua(ua)
spoof_res = detect_spoof(fp, dec_os)
tcp_data = {
"detected": spoof,
"tcp_ttl": ttl,
"suspected_os": sus_os,
"declared_os": dec_os,
"detected": spoof_res.is_spoof,
"tcp_ttl": ttl,
"tcp_mss": mss,
"tcp_win_scale": scale,
"initial_ttl": fp.initial_ttl,
"hop_count": fp.hop_count,
"suspected_os": fp.os_name,
"declared_os": dec_os,
"confidence": fp.confidence,
"network_path": fp.network_path,
"is_bot_tool": fp.is_bot_tool,
"spoof_reason": spoof_res.reason,
}
# ── 4. JA4 rotation ────────────────────────────────────────────────────
@ -146,7 +160,9 @@ async def get_ip_full_summary(ip: str):
risk = 0
risk += min(50, ml_data["max_score"] * 50)
if bf_data["active"]: risk += 20
if tcp_data["detected"]: risk += 15
if tcp_data["detected"]:
if tcp_data.get("is_bot_tool"): risk += 30 # outil de scan connu
else: risk += 15 # spoof OS
if rot_data["rotating"]: risk += min(15, rot_data["distinct_ja4_count"] * 3)
if pers_data["persistent"]: risk += min(10, pers_data["recurrence"] * 2)
risk = min(100, round(risk))