feat(clustering): palette diversifiée, suppression scores anomalie/robot, visualisation éclatée
- Suppression de 'Score Anomalie' (avg_score) des 31→30 features de clustering - Suppression de 'Score de détection robot' (mean_score) de la sidebar et de l'API - Suppression de bot_ips / high_risk_ips des stats (métriques dérivées des scores supprimés) - Redistribution des poids dans risk_score_from_centroid: UA-CH mismatch +17%, fuzzing +14%, headless +10%, vélocité +9%, ip_id_zero +7% - Mise à jour des indices feature dans name_cluster et risk_score_from_centroid - Palette 24 couleurs spectrales (cluster_color) → bleu/violet/rose/teal/amber/cyan/lime... Les couleurs identifient les clusters, non leur niveau de risque - Remplacement de la légende CRITICAL/HIGH/MEDIUM/LOW par la liste des clusters actifs - Ajout de spread_clusters(): répulsion itérative des centroïdes trop proches (50 iter) min_dist=0.16 → les clusters se repoussent mutuellement → visualisation plus lisible - Interface TypeScript mise à jour (suppression mean_score, bot_ips, high_risk_ips) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -24,6 +24,7 @@ from ..services.clustering_engine import (
|
|||||||
FEATURE_KEYS, FEATURE_NAMES, FEATURE_NORMS, N_FEATURES,
|
FEATURE_KEYS, FEATURE_NAMES, FEATURE_NORMS, N_FEATURES,
|
||||||
build_feature_vector, kmeans_pp, pca_2d, compute_hulls,
|
build_feature_vector, kmeans_pp, pca_2d, compute_hulls,
|
||||||
name_cluster, risk_score_from_centroid, standardize,
|
name_cluster, risk_score_from_centroid, standardize,
|
||||||
|
cluster_color, spread_clusters,
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@ -42,19 +43,9 @@ _CACHE_TTL = 1800 # 30 minutes
|
|||||||
_LOCK = threading.Lock()
|
_LOCK = threading.Lock()
|
||||||
_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
|
_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
|
||||||
|
|
||||||
# ─── Couleurs menace ──────────────────────────────────────────────────────────
|
# ─── Palette de couleurs (remplace l'ancienne logique menace) ─────────────────
|
||||||
_THREAT_COLOR = {
|
# Les couleurs sont désormais attribuées par index de cluster pour maximiser
|
||||||
0.70: "#dc2626", # Critique
|
# la distinction visuelle, indépendamment du niveau de risque.
|
||||||
0.45: "#f97316", # Élevé
|
|
||||||
0.25: "#eab308", # Modéré
|
|
||||||
0.00: "#22c55e", # Sain
|
|
||||||
}
|
|
||||||
|
|
||||||
def _risk_to_color(risk: float) -> str:
|
|
||||||
for threshold, color in sorted(_THREAT_COLOR.items(), reverse=True):
|
|
||||||
if risk >= threshold:
|
|
||||||
return color
|
|
||||||
return "#6b7280"
|
|
||||||
|
|
||||||
|
|
||||||
# ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
|
# ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
|
||||||
@ -205,7 +196,11 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
|||||||
# ── 5. PCA-2D sur les features ORIGINALES (normalisées [0,1]) ────
|
# ── 5. PCA-2D sur les features ORIGINALES (normalisées [0,1]) ────
|
||||||
coords = pca_2d(X64) # (n, 2), normalisé [0,1]
|
coords = pca_2d(X64) # (n, 2), normalisé [0,1]
|
||||||
|
|
||||||
# ── 5b. Enveloppes convexes par cluster ──────────────────────────
|
# ── 5b. Dispersion — repousse les clusters trop proches ──────────
|
||||||
|
coords = spread_clusters(coords, km.labels, k_actual,
|
||||||
|
n_iter=60, min_dist=0.16)
|
||||||
|
|
||||||
|
# ── 5c. Enveloppes convexes par cluster ──────────────────────────
|
||||||
hulls = compute_hulls(coords, km.labels, k_actual)
|
hulls = compute_hulls(coords, km.labels, k_actual)
|
||||||
|
|
||||||
# ── 6. Agrégation par cluster ─────────────────────────────────────
|
# ── 6. Agrégation par cluster ─────────────────────────────────────
|
||||||
@ -242,7 +237,7 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
|||||||
raw_stats = {"mean_ttl": mean_ttl, "mean_mss": mean_mss, "mean_scale": mean_scale}
|
raw_stats = {"mean_ttl": mean_ttl, "mean_mss": mean_mss, "mean_scale": mean_scale}
|
||||||
label_name = name_cluster(centroids_orig[j], raw_stats)
|
label_name = name_cluster(centroids_orig[j], raw_stats)
|
||||||
risk = float(risk_score_from_centroid(centroids_orig[j]))
|
risk = float(risk_score_from_centroid(centroids_orig[j]))
|
||||||
color = _risk_to_color(risk)
|
color = cluster_color(j)
|
||||||
|
|
||||||
# Centroïde 2D = moyenne des coords du cluster
|
# Centroïde 2D = moyenne des coords du cluster
|
||||||
cxy = np.mean(cluster_coords[j], axis=0).tolist() if cluster_coords[j] else [0.5, 0.5]
|
cxy = np.mean(cluster_coords[j], axis=0).tolist() if cluster_coords[j] else [0.5, 0.5]
|
||||||
@ -282,7 +277,6 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
|||||||
"mean_mss": round(mean_mss, 0),
|
"mean_mss": round(mean_mss, 0),
|
||||||
"mean_scale": round(mean_scale, 1),
|
"mean_scale": round(mean_scale, 1),
|
||||||
"mean_win": round(mean_win, 0),
|
"mean_win": round(mean_win, 0),
|
||||||
"mean_score": round(avg_f("avg_score"), 4),
|
|
||||||
"mean_velocity":round(avg_f("avg_velocity"),3),
|
"mean_velocity":round(avg_f("avg_velocity"),3),
|
||||||
"mean_fuzzing": round(avg_f("avg_fuzzing"), 3),
|
"mean_fuzzing": round(avg_f("avg_fuzzing"), 3),
|
||||||
"mean_headless":round(avg_f("pct_headless"),3),
|
"mean_headless":round(avg_f("pct_headless"),3),
|
||||||
@ -338,8 +332,6 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
|||||||
# ── 9. Stockage résultat + cache IPs ─────────────────────────────
|
# ── 9. Stockage résultat + cache IPs ─────────────────────────────
|
||||||
total_ips = sum(n_["ip_count"] for n_ in nodes)
|
total_ips = sum(n_["ip_count"] for n_ in nodes)
|
||||||
total_hits = sum(n_["hit_count"] for n_ in nodes)
|
total_hits = sum(n_["hit_count"] for n_ in nodes)
|
||||||
bot_ips = sum(n_["ip_count"] for n_ in nodes if n_["risk_score"] > 0.45 or "🤖" in n_["label"])
|
|
||||||
high_ips = sum(n_["ip_count"] for n_ in nodes if n_["risk_score"] > 0.25)
|
|
||||||
elapsed = round(time.time() - t0, 2)
|
elapsed = round(time.time() - t0, 2)
|
||||||
|
|
||||||
result_dict = {
|
result_dict = {
|
||||||
@ -349,8 +341,6 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
|||||||
"total_clusters": len(nodes),
|
"total_clusters": len(nodes),
|
||||||
"total_ips": total_ips,
|
"total_ips": total_ips,
|
||||||
"total_hits": total_hits,
|
"total_hits": total_hits,
|
||||||
"bot_ips": bot_ips,
|
|
||||||
"high_risk_ips": high_ips,
|
|
||||||
"n_samples": n,
|
"n_samples": n,
|
||||||
"k": k_actual,
|
"k": k_actual,
|
||||||
"k_base": k,
|
"k_base": k,
|
||||||
|
|||||||
@ -127,9 +127,7 @@ FEATURES: list[tuple[str, str, object]] = [
|
|||||||
("scale", "Scale TCP", lambda v: min(1.0, (v or 0) / 14.0)),
|
("scale", "Scale TCP", lambda v: min(1.0, (v or 0) / 14.0)),
|
||||||
("win", "Fenêtre TCP", lambda v: min(1.0, (v or 0) / 65535.0)),
|
("win", "Fenêtre TCP", lambda v: min(1.0, (v or 0) / 65535.0)),
|
||||||
# Anomalie ML
|
# Anomalie ML
|
||||||
("avg_score", "Score Anomalie", lambda v: min(1.0, float(v or 0))),
|
("avg_velocity", "Vélocité (rps)", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(100))), ("avg_fuzzing", "Fuzzing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(300))),
|
||||||
("avg_velocity", "Vélocité (rps)", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(100))),
|
|
||||||
("avg_fuzzing", "Fuzzing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(300))),
|
|
||||||
("pct_headless", "Headless", lambda v: min(1.0, float(v or 0))),
|
("pct_headless", "Headless", lambda v: min(1.0, float(v or 0))),
|
||||||
("avg_post", "Ratio POST", lambda v: min(1.0, float(v or 0))),
|
("avg_post", "Ratio POST", lambda v: min(1.0, float(v or 0))),
|
||||||
# IP-ID
|
# IP-ID
|
||||||
@ -353,51 +351,48 @@ def name_cluster(centroid: np.ndarray, raw_stats: dict) -> str:
|
|||||||
n = len(s)
|
n = len(s)
|
||||||
ttl_raw = float(raw_stats.get("mean_ttl", 0))
|
ttl_raw = float(raw_stats.get("mean_ttl", 0))
|
||||||
mss_raw = float(raw_stats.get("mean_mss", 0))
|
mss_raw = float(raw_stats.get("mean_mss", 0))
|
||||||
country_risk_v = s[21] if n > 21 else 0.0
|
country_risk_v = s[20] if n > 20 else 0.0
|
||||||
asn_cloud = s[22] if n > 22 else 0.0
|
asn_cloud = s[21] if n > 21 else 0.0
|
||||||
accept_lang = s[23] if n > 23 else 1.0
|
accept_lang = s[22] if n > 22 else 1.0
|
||||||
accept_enc = s[24] if n > 24 else 1.0
|
accept_enc = s[23] if n > 23 else 1.0
|
||||||
sec_fetch = s[25] if n > 25 else 0.0
|
sec_fetch = s[24] if n > 24 else 0.0
|
||||||
hdr_count = s[26] if n > 26 else 0.5
|
hdr_count = s[25] if n > 25 else 0.5
|
||||||
hfp_popular = s[27] if n > 27 else 0.5
|
hfp_popular = s[26] if n > 26 else 0.5
|
||||||
hfp_rotating = s[28] if n > 28 else 0.0
|
hfp_rotating = s[27] if n > 27 else 0.0
|
||||||
|
|
||||||
# Scanner pur : aucun header browser, fingerprint rare, peu de headers
|
# Scanner pur : aucun header browser, fingerprint rare, peu de headers
|
||||||
if accept_lang < 0.15 and accept_enc < 0.15 and hdr_count < 0.25:
|
if accept_lang < 0.15 and accept_enc < 0.15 and hdr_count < 0.25:
|
||||||
return "🤖 Scanner pur (no headers)"
|
return "🤖 Scanner pur (no headers)"
|
||||||
# Fingerprint tournant ET suspect : bot qui change de profil headers
|
# Fingerprint tournant : bot qui change de profil headers
|
||||||
if hfp_rotating > 0.6 and s[4] > 0.15:
|
if hfp_rotating > 0.6:
|
||||||
return "🔄 Bot fingerprint tournant"
|
return "🔄 Bot fingerprint tournant"
|
||||||
# Fingerprint très rare et anomalie : bot artisanal unique
|
# Fingerprint très rare : bot artisanal unique
|
||||||
if hfp_popular < 0.15 and s[4] > 0.20:
|
if hfp_popular < 0.15:
|
||||||
return "🕵️ Fingerprint rare suspect"
|
return "🕵️ Fingerprint rare suspect"
|
||||||
# Scanners Masscan
|
# Scanners Masscan
|
||||||
if s[0] > 0.16 and s[0] < 0.25 and mss_raw in range(1440, 1460) and s[2] > 0.25:
|
if s[0] > 0.16 and s[0] < 0.25 and mss_raw in range(1440, 1460) and s[2] > 0.25:
|
||||||
return "🤖 Masscan Scanner"
|
return "🤖 Masscan Scanner"
|
||||||
# Bots offensifs agressifs (fuzzing + anomalie)
|
# Bots offensifs agressifs (fuzzing élevé)
|
||||||
if s[4] > 0.40 and s[6] > 0.3:
|
if s[4] > 0.40 and s[5] > 0.3:
|
||||||
return "🤖 Bot agressif"
|
return "🤖 Bot agressif"
|
||||||
# Bot qui simule un navigateur mais sans les vrais headers
|
# Bot qui simule un navigateur mais sans les vrais headers
|
||||||
if s[16] > 0.40 and sec_fetch < 0.2 and accept_lang < 0.3:
|
if s[15] > 0.40 and sec_fetch < 0.2 and accept_lang < 0.3:
|
||||||
return "🤖 Bot UA simulé"
|
return "🤖 Bot UA simulé"
|
||||||
# Pays à très haut risque avec trafic anormal
|
# Pays à très haut risque avec infrastructure cloud
|
||||||
if country_risk_v > 0.75 and (s[4] > 0.10 or asn_cloud > 0.5):
|
if country_risk_v > 0.75 and asn_cloud > 0.5:
|
||||||
return "🌏 Source pays risqué"
|
return "🌏 Source pays risqué"
|
||||||
# Cloud + UA-CH mismatch
|
# Cloud + UA-CH mismatch
|
||||||
if s[16] > 0.50 and asn_cloud > 0.70:
|
if s[15] > 0.50 and asn_cloud > 0.70:
|
||||||
return "☁️ Bot cloud UA-CH"
|
return "☁️ Bot cloud UA-CH"
|
||||||
if s[16] > 0.60:
|
if s[15] > 0.60:
|
||||||
return "🤖 UA-CH Mismatch"
|
return "🤖 UA-CH Mismatch"
|
||||||
# Headless browser (Puppeteer/Playwright) : a les headers Sec-Fetch mais headless
|
# Headless browser (Puppeteer/Playwright) : a les headers Sec-Fetch mais headless
|
||||||
if s[7] > 0.50 and sec_fetch > 0.5:
|
if s[6] > 0.50 and sec_fetch > 0.5:
|
||||||
return "🤖 Headless Browser"
|
return "🤖 Headless Browser"
|
||||||
if s[7] > 0.50:
|
if s[6] > 0.50:
|
||||||
return "🤖 Headless (no Sec-Fetch)"
|
return "🤖 Headless (no Sec-Fetch)"
|
||||||
# Anomalie ML significative
|
|
||||||
if s[4] > 0.35:
|
|
||||||
return "⚠️ Anomalie ML"
|
|
||||||
# Cloud pur (CDN/crawler légitime ?)
|
# Cloud pur (CDN/crawler légitime ?)
|
||||||
if asn_cloud > 0.85 and s[4] < 0.15:
|
if asn_cloud > 0.85:
|
||||||
return "☁️ Infrastructure cloud"
|
return "☁️ Infrastructure cloud"
|
||||||
# Pays à risque élevé sans autre signal
|
# Pays à risque élevé sans autre signal
|
||||||
if country_risk_v > 0.60:
|
if country_risk_v > 0.60:
|
||||||
@ -413,9 +408,9 @@ def name_cluster(centroid: np.ndarray, raw_stats: dict) -> str:
|
|||||||
return "🐧 Linux"
|
return "🐧 Linux"
|
||||||
if mss_raw < 1380 and mss_raw > 0:
|
if mss_raw < 1380 and mss_raw > 0:
|
||||||
return "🌐 Tunnel réseau"
|
return "🌐 Tunnel réseau"
|
||||||
if s[5] > 0.40:
|
if s[4] > 0.40:
|
||||||
return "⚡ Trafic rapide"
|
return "⚡ Trafic rapide"
|
||||||
if s[4] < 0.08 and s[5] < 0.10 and asn_cloud < 0.30:
|
if s[4] < 0.10 and asn_cloud < 0.30:
|
||||||
return "✅ Trafic sain"
|
return "✅ Trafic sain"
|
||||||
return "📊 Cluster mixte"
|
return "📊 Cluster mixte"
|
||||||
|
|
||||||
@ -423,34 +418,130 @@ def name_cluster(centroid: np.ndarray, raw_stats: dict) -> str:
|
|||||||
def risk_score_from_centroid(centroid: np.ndarray) -> float:
|
def risk_score_from_centroid(centroid: np.ndarray) -> float:
|
||||||
"""
|
"""
|
||||||
Score de risque [0,1] depuis le centroïde (espace original [0,1]).
|
Score de risque [0,1] depuis le centroïde (espace original [0,1]).
|
||||||
31 features — poids calibrés pour sommer à 1.0.
|
30 features (avg_score supprimé) — poids calibrés pour sommer à 1.0.
|
||||||
|
Indices décalés de -1 après suppression de avg_score (ancien idx 4).
|
||||||
"""
|
"""
|
||||||
s = centroid
|
s = centroid
|
||||||
n = len(s)
|
n = len(s)
|
||||||
country_risk_v = s[21] if n > 21 else 0.0
|
country_risk_v = s[20] if n > 20 else 0.0
|
||||||
asn_cloud = s[22] if n > 22 else 0.0
|
asn_cloud = s[21] if n > 21 else 0.0
|
||||||
no_accept_lang = 1.0 - (s[23] if n > 23 else 1.0)
|
no_accept_lang = 1.0 - (s[22] if n > 22 else 1.0)
|
||||||
no_encoding = 1.0 - (s[24] if n > 24 else 1.0)
|
no_encoding = 1.0 - (s[23] if n > 23 else 1.0)
|
||||||
no_sec_fetch = 1.0 - (s[25] if n > 25 else 0.0)
|
no_sec_fetch = 1.0 - (s[24] if n > 24 else 0.0)
|
||||||
few_headers = 1.0 - (s[26] if n > 26 else 0.5)
|
few_headers = 1.0 - (s[25] if n > 25 else 0.5)
|
||||||
# Fingerprint rare = suspect (faible popularité), fingerprint tournant = bot
|
hfp_rare = 1.0 - (s[26] if n > 26 else 0.5)
|
||||||
hfp_rare = 1.0 - (s[27] if n > 27 else 0.5)
|
hfp_rotating = s[27] if n > 27 else 0.0
|
||||||
hfp_rotating = s[28] if n > 28 else 0.0
|
|
||||||
|
|
||||||
|
# [4]=vélocité [5]=fuzzing [6]=headless [8]=ip_id_zero [15]=ua_ch_mismatch
|
||||||
|
# Poids redistribués depuis l'ancien score ML anomalie (0.25) vers les signaux restants
|
||||||
return float(np.clip(
|
return float(np.clip(
|
||||||
0.25 * s[4] + # score ML anomalie (principal)
|
0.14 * s[5] + # fuzzing
|
||||||
0.09 * s[6] + # fuzzing
|
0.17 * s[15] + # UA-CH mismatch (fort signal impersonation navigateur)
|
||||||
0.07 * s[16] + # UA-CH mismatch
|
0.10 * s[6] + # headless
|
||||||
0.06 * s[7] + # headless
|
0.09 * s[4] + # vélocité (rps)
|
||||||
0.05 * s[5] + # vélocité
|
0.07 * s[8] + # IP-ID zéro
|
||||||
0.05 * s[9] + # IP-ID zéro
|
|
||||||
0.09 * country_risk_v+ # risque pays source
|
0.09 * country_risk_v+ # risque pays source
|
||||||
0.06 * asn_cloud + # infrastructure cloud/VPN
|
0.06 * asn_cloud + # infrastructure cloud/VPN
|
||||||
0.04 * no_accept_lang+ # absence Accept-Language
|
0.04 * no_accept_lang+ # absence Accept-Language
|
||||||
0.04 * no_encoding + # absence Accept-Encoding
|
0.04 * no_encoding + # absence Accept-Encoding
|
||||||
0.04 * no_sec_fetch + # absence Sec-Fetch (pas un vrai navigateur)
|
0.04 * no_sec_fetch + # absence Sec-Fetch
|
||||||
0.04 * few_headers + # très peu de headers (scanner/curl)
|
0.04 * few_headers + # très peu de headers
|
||||||
0.06 * hfp_rare + # fingerprint headers rare = suspect
|
0.06 * hfp_rare + # fingerprint rare = suspect
|
||||||
0.06 * hfp_rotating, # rotation de fingerprint = bot
|
0.06 * hfp_rotating, # rotation de fingerprint = bot
|
||||||
0.0, 1.0
|
0.0, 1.0
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Palette de couleurs diversifiée (non liée au risque) ────────────────────
|
||||||
|
# 24 couleurs couvrant tout le spectre HSL pour distinguer les clusters visuellement.
|
||||||
|
# Choix: teintes espacées de ~15° avec alternance de saturation/luminosité.
|
||||||
|
|
||||||
|
_CLUSTER_PALETTE: list[str] = [
|
||||||
|
"#3b82f6", # blue
|
||||||
|
"#8b5cf6", # violet
|
||||||
|
"#ec4899", # pink
|
||||||
|
"#14b8a6", # teal
|
||||||
|
"#f59e0b", # amber
|
||||||
|
"#06b6d4", # cyan
|
||||||
|
"#a3e635", # lime
|
||||||
|
"#f97316", # orange
|
||||||
|
"#6366f1", # indigo
|
||||||
|
"#10b981", # emerald
|
||||||
|
"#e879f9", # fuchsia
|
||||||
|
"#fbbf24", # yellow
|
||||||
|
"#60a5fa", # light blue
|
||||||
|
"#c084fc", # light purple
|
||||||
|
"#fb7185", # rose
|
||||||
|
"#34d399", # light green
|
||||||
|
"#38bdf8", # sky
|
||||||
|
"#a78bfa", # lavender
|
||||||
|
"#fdba74", # peach
|
||||||
|
"#4ade80", # green
|
||||||
|
"#f472b6", # light pink
|
||||||
|
"#67e8f9", # light cyan
|
||||||
|
"#d97706", # dark amber
|
||||||
|
"#7c3aed", # dark violet
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def cluster_color(cluster_idx: int) -> str:
|
||||||
|
"""Couleur distinctive pour un cluster, cyclique sur la palette."""
|
||||||
|
return _CLUSTER_PALETTE[cluster_idx % len(_CLUSTER_PALETTE)]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Dispersion des clusters dans l'espace 2D ────────────────────────────────
|
||||||
|
|
||||||
|
def spread_clusters(coords_2d: np.ndarray, labels: np.ndarray, k: int,
|
||||||
|
n_iter: int = 50, min_dist: float = 0.14) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Repousse les centroïdes trop proches par répulsion itérative (spring repulsion).
|
||||||
|
Chaque point suit le déplacement de son centroïde.
|
||||||
|
|
||||||
|
Paramètres
|
||||||
|
----------
|
||||||
|
min_dist : distance minimale souhaitée entre centroïdes (espace [0,1]).
|
||||||
|
Augmenter pour plus d'éclatement.
|
||||||
|
n_iter : nombre d'itérations de la physique de répulsion.
|
||||||
|
"""
|
||||||
|
rng = np.random.default_rng(0)
|
||||||
|
centroids = np.zeros((k, 2))
|
||||||
|
counts = np.zeros(k, dtype=int)
|
||||||
|
for j in range(k):
|
||||||
|
mask = labels == j
|
||||||
|
if mask.any():
|
||||||
|
centroids[j] = coords_2d[mask].mean(axis=0)
|
||||||
|
counts[j] = int(mask.sum())
|
||||||
|
|
||||||
|
orig = centroids.copy()
|
||||||
|
|
||||||
|
for _ in range(n_iter):
|
||||||
|
forces = np.zeros_like(centroids)
|
||||||
|
for i in range(k):
|
||||||
|
if counts[i] == 0:
|
||||||
|
continue
|
||||||
|
for j in range(k):
|
||||||
|
if i == j or counts[j] == 0:
|
||||||
|
continue
|
||||||
|
delta = centroids[i] - centroids[j]
|
||||||
|
dist = float(np.linalg.norm(delta))
|
||||||
|
if dist < 1e-8:
|
||||||
|
delta = rng.uniform(-0.02, 0.02, size=2)
|
||||||
|
dist = float(np.linalg.norm(delta)) + 1e-8
|
||||||
|
if dist < min_dist:
|
||||||
|
# Force inversement proportionnelle à l'écart
|
||||||
|
magnitude = (min_dist - dist) / min_dist
|
||||||
|
forces[i] += magnitude * (delta / dist)
|
||||||
|
centroids += forces * 0.10
|
||||||
|
|
||||||
|
# Déplace chaque point par le delta de son centroïde
|
||||||
|
displaced = coords_2d.copy()
|
||||||
|
for j in range(k):
|
||||||
|
if counts[j] == 0:
|
||||||
|
continue
|
||||||
|
displaced[labels == j] += centroids[j] - orig[j]
|
||||||
|
|
||||||
|
# Re-normalisation [0, 1]
|
||||||
|
mn, mx = displaced.min(axis=0), displaced.max(axis=0)
|
||||||
|
rng_ = mx - mn
|
||||||
|
rng_[rng_ < 1e-8] = 1.0
|
||||||
|
return (displaced - mn) / rng_
|
||||||
|
|||||||
@ -35,7 +35,6 @@ interface ClusterNode {
|
|||||||
hit_count: number;
|
hit_count: number;
|
||||||
mean_ttl: number;
|
mean_ttl: number;
|
||||||
mean_mss: number;
|
mean_mss: number;
|
||||||
mean_score: number;
|
|
||||||
mean_velocity: number;
|
mean_velocity: number;
|
||||||
mean_fuzzing: number;
|
mean_fuzzing: number;
|
||||||
mean_headless: number;
|
mean_headless: number;
|
||||||
@ -60,8 +59,6 @@ interface ClusterStats {
|
|||||||
total_clusters: number;
|
total_clusters: number;
|
||||||
total_ips: number;
|
total_ips: number;
|
||||||
total_hits: number;
|
total_hits: number;
|
||||||
bot_ips: number;
|
|
||||||
high_risk_ips: number;
|
|
||||||
n_samples: number;
|
n_samples: number;
|
||||||
k: number;
|
k: number;
|
||||||
elapsed_s: number;
|
elapsed_s: number;
|
||||||
@ -427,8 +424,6 @@ export default function ClusteringView() {
|
|||||||
<div className="font-semibold text-sm mb-2">Résultats</div>
|
<div className="font-semibold text-sm mb-2">Résultats</div>
|
||||||
<Stat label="Clusters" value={data.stats.total_clusters} tooltip={TIPS.k_actual} />
|
<Stat label="Clusters" value={data.stats.total_clusters} tooltip={TIPS.k_actual} />
|
||||||
<Stat label="IPs totales" value={data.stats.total_ips.toLocaleString()} tooltip={TIPS.pca_2d} />
|
<Stat label="IPs totales" value={data.stats.total_ips.toLocaleString()} tooltip={TIPS.pca_2d} />
|
||||||
<Stat label="IPs bots 🤖" value={data.stats.bot_ips.toLocaleString()} color="text-red-400" tooltip={TIPS.ips_bots} />
|
|
||||||
<Stat label="Risque élevé" value={data.stats.high_risk_ips.toLocaleString()} color="text-orange-400" tooltip={TIPS.high_risk} />
|
|
||||||
<Stat label="Hits totaux" value={data.stats.total_hits.toLocaleString()} tooltip={TIPS.total_hits} />
|
<Stat label="Hits totaux" value={data.stats.total_hits.toLocaleString()} tooltip={TIPS.total_hits} />
|
||||||
<Stat label="Calcul" value={`${data.stats.elapsed_s}s`} tooltip={TIPS.calc_time} />
|
<Stat label="Calcul" value={`${data.stats.elapsed_s}s`} tooltip={TIPS.calc_time} />
|
||||||
</div>
|
</div>
|
||||||
@ -504,7 +499,7 @@ export default function ClusteringView() {
|
|||||||
</div>
|
</div>
|
||||||
<p className="text-white font-semibold text-lg tracking-wide">Clustering en cours…</p>
|
<p className="text-white font-semibold text-lg tracking-wide">Clustering en cours…</p>
|
||||||
<p className="text-text-secondary text-sm mt-1">
|
<p className="text-text-secondary text-sm mt-1">
|
||||||
K-means++ · 31 features · {Math.round(k * sensitivity)} clusters · toutes les IPs
|
K-means++ · 30 features · {Math.round(k * sensitivity)} clusters · toutes les IPs
|
||||||
</p>
|
</p>
|
||||||
<p className="text-text-disabled text-xs mt-2 animate-pulse">Mise à jour automatique toutes les 3 secondes</p>
|
<p className="text-text-disabled text-xs mt-2 animate-pulse">Mise à jour automatique toutes les 3 secondes</p>
|
||||||
</div>
|
</div>
|
||||||
@ -527,19 +522,18 @@ export default function ClusteringView() {
|
|||||||
{/* Légende overlay */}
|
{/* Légende overlay */}
|
||||||
<div style={{ position: 'absolute', bottom: 16, left: 16, pointerEvents: 'all' }}>
|
<div style={{ position: 'absolute', bottom: 16, left: 16, pointerEvents: 'all' }}>
|
||||||
<div className="bg-black/70 rounded-lg p-2 text-xs flex flex-col gap-1">
|
<div className="bg-black/70 rounded-lg p-2 text-xs flex flex-col gap-1">
|
||||||
{([
|
<div className="text-white/50 text-[10px] uppercase tracking-wide mb-1">Clusters</div>
|
||||||
['#dc2626', 'CRITICAL', TIPS.risk_critical],
|
{data?.nodes?.slice(0, 6).map((n) => (
|
||||||
['#f97316', 'HIGH', TIPS.risk_high],
|
<div key={n.id} className="flex items-center gap-2">
|
||||||
['#eab308', 'MEDIUM', TIPS.risk_medium],
|
<span className="w-3 h-3 rounded-full flex-shrink-0" style={{ background: n.color }} />
|
||||||
['#22c55e', 'LOW', TIPS.risk_low],
|
<span className="text-white/70 truncate max-w-[120px]">{n.label}</span>
|
||||||
] as const).map(([c, l, tip]) => (
|
|
||||||
<div key={l} className="flex items-center gap-2" title={tip}>
|
|
||||||
<span className="w-3 h-3 rounded-full flex-shrink-0" style={{ background: c }} />
|
|
||||||
<span className="text-white/80 cursor-help">{l}</span>
|
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
|
{(data?.nodes?.length ?? 0) > 6 && (
|
||||||
|
<div className="text-white/30 text-[10px]">+{(data?.nodes?.length ?? 0) - 6} autres…</div>
|
||||||
|
)}
|
||||||
<div className="mt-1 pt-1 border-t border-white/10 text-white/40 text-[10px] cursor-help" title={TIPS.features_31}>
|
<div className="mt-1 pt-1 border-t border-white/10 text-white/40 text-[10px] cursor-help" title={TIPS.features_31}>
|
||||||
31 features · PCA 2D ⓘ
|
30 features · PCA 2D ⓘ
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -666,7 +660,6 @@ function ClusterSidebar({ node, ipDetails, ipTotal, ipPage, clusterPoints, onClo
|
|||||||
<div className="font-semibold mb-2">Stack TCP</div>
|
<div className="font-semibold mb-2">Stack TCP</div>
|
||||||
<Stat label="TTL moyen" value={node.mean_ttl} tooltip={TIPS.mean_ttl} />
|
<Stat label="TTL moyen" value={node.mean_ttl} tooltip={TIPS.mean_ttl} />
|
||||||
<Stat label="MSS moyen" value={node.mean_mss} tooltip={TIPS.mean_mss} />
|
<Stat label="MSS moyen" value={node.mean_mss} tooltip={TIPS.mean_mss} />
|
||||||
<Stat label="Score ML" value={`${(node.mean_score * 100).toFixed(1)}%`} tooltip={TIPS.mean_score} />
|
|
||||||
<Stat label="Vélocité" value={node.mean_velocity?.toFixed ? `${node.mean_velocity.toFixed(2)} rps` : '-'} tooltip={TIPS.mean_velocity} />
|
<Stat label="Vélocité" value={node.mean_velocity?.toFixed ? `${node.mean_velocity.toFixed(2)} rps` : '-'} tooltip={TIPS.mean_velocity} />
|
||||||
<Stat label="Headless" value={node.mean_headless ? `${(node.mean_headless * 100).toFixed(0)}%` : '-'} tooltip={TIPS.mean_headless} />
|
<Stat label="Headless" value={node.mean_headless ? `${(node.mean_headless * 100).toFixed(0)}%` : '-'} tooltip={TIPS.mean_headless} />
|
||||||
<Stat label="UA-CH Mismatch" value={node.mean_ua_ch ? `${(node.mean_ua_ch * 100).toFixed(0)}%` : '-'} tooltip={TIPS.mean_ua_ch} />
|
<Stat label="UA-CH Mismatch" value={node.mean_ua_ch ? `${(node.mean_ua_ch * 100).toFixed(0)}%` : '-'} tooltip={TIPS.mean_ua_ch} />
|
||||||
|
|||||||
Reference in New Issue
Block a user