feat(clustering): palette diversifiée, suppression scores anomalie/robot, visualisation éclatée
- Suppression de 'Score Anomalie' (avg_score) des 31→30 features de clustering - Suppression de 'Score de détection robot' (mean_score) de la sidebar et de l'API - Suppression de bot_ips / high_risk_ips des stats (métriques dérivées des scores supprimés) - Redistribution des poids dans risk_score_from_centroid: UA-CH mismatch +17%, fuzzing +14%, headless +10%, vélocité +9%, ip_id_zero +7% - Mise à jour des indices feature dans name_cluster et risk_score_from_centroid - Palette 24 couleurs spectrales (cluster_color) → bleu/violet/rose/teal/amber/cyan/lime... Les couleurs identifient les clusters, non leur niveau de risque - Remplacement de la légende CRITICAL/HIGH/MEDIUM/LOW par la liste des clusters actifs - Ajout de spread_clusters(): répulsion itérative des centroïdes trop proches (50 iter) min_dist=0.16 → les clusters se repoussent mutuellement → visualisation plus lisible - Interface TypeScript mise à jour (suppression mean_score, bot_ips, high_risk_ips) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -24,6 +24,7 @@ from ..services.clustering_engine import (
|
||||
FEATURE_KEYS, FEATURE_NAMES, FEATURE_NORMS, N_FEATURES,
|
||||
build_feature_vector, kmeans_pp, pca_2d, compute_hulls,
|
||||
name_cluster, risk_score_from_centroid, standardize,
|
||||
cluster_color, spread_clusters,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -42,19 +43,9 @@ _CACHE_TTL = 1800 # 30 minutes
|
||||
_LOCK = threading.Lock()
|
||||
_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
|
||||
|
||||
# ─── Couleurs menace ──────────────────────────────────────────────────────────
|
||||
_THREAT_COLOR = {
|
||||
0.70: "#dc2626", # Critique
|
||||
0.45: "#f97316", # Élevé
|
||||
0.25: "#eab308", # Modéré
|
||||
0.00: "#22c55e", # Sain
|
||||
}
|
||||
|
||||
def _risk_to_color(risk: float) -> str:
|
||||
for threshold, color in sorted(_THREAT_COLOR.items(), reverse=True):
|
||||
if risk >= threshold:
|
||||
return color
|
||||
return "#6b7280"
|
||||
# ─── Palette de couleurs (remplace l'ancienne logique menace) ─────────────────
|
||||
# Les couleurs sont désormais attribuées par index de cluster pour maximiser
|
||||
# la distinction visuelle, indépendamment du niveau de risque.
|
||||
|
||||
|
||||
# ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
|
||||
@ -205,7 +196,11 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
# ── 5. PCA-2D sur les features ORIGINALES (normalisées [0,1]) ────
|
||||
coords = pca_2d(X64) # (n, 2), normalisé [0,1]
|
||||
|
||||
# ── 5b. Enveloppes convexes par cluster ──────────────────────────
|
||||
# ── 5b. Dispersion — repousse les clusters trop proches ──────────
|
||||
coords = spread_clusters(coords, km.labels, k_actual,
|
||||
n_iter=60, min_dist=0.16)
|
||||
|
||||
# ── 5c. Enveloppes convexes par cluster ──────────────────────────
|
||||
hulls = compute_hulls(coords, km.labels, k_actual)
|
||||
|
||||
# ── 6. Agrégation par cluster ─────────────────────────────────────
|
||||
@ -242,7 +237,7 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
raw_stats = {"mean_ttl": mean_ttl, "mean_mss": mean_mss, "mean_scale": mean_scale}
|
||||
label_name = name_cluster(centroids_orig[j], raw_stats)
|
||||
risk = float(risk_score_from_centroid(centroids_orig[j]))
|
||||
color = _risk_to_color(risk)
|
||||
color = cluster_color(j)
|
||||
|
||||
# Centroïde 2D = moyenne des coords du cluster
|
||||
cxy = np.mean(cluster_coords[j], axis=0).tolist() if cluster_coords[j] else [0.5, 0.5]
|
||||
@ -282,7 +277,6 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
"mean_mss": round(mean_mss, 0),
|
||||
"mean_scale": round(mean_scale, 1),
|
||||
"mean_win": round(mean_win, 0),
|
||||
"mean_score": round(avg_f("avg_score"), 4),
|
||||
"mean_velocity":round(avg_f("avg_velocity"),3),
|
||||
"mean_fuzzing": round(avg_f("avg_fuzzing"), 3),
|
||||
"mean_headless":round(avg_f("pct_headless"),3),
|
||||
@ -338,8 +332,6 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
# ── 9. Stockage résultat + cache IPs ─────────────────────────────
|
||||
total_ips = sum(n_["ip_count"] for n_ in nodes)
|
||||
total_hits = sum(n_["hit_count"] for n_ in nodes)
|
||||
bot_ips = sum(n_["ip_count"] for n_ in nodes if n_["risk_score"] > 0.45 or "🤖" in n_["label"])
|
||||
high_ips = sum(n_["ip_count"] for n_ in nodes if n_["risk_score"] > 0.25)
|
||||
elapsed = round(time.time() - t0, 2)
|
||||
|
||||
result_dict = {
|
||||
@ -349,8 +341,6 @@ def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
"total_clusters": len(nodes),
|
||||
"total_ips": total_ips,
|
||||
"total_hits": total_hits,
|
||||
"bot_ips": bot_ips,
|
||||
"high_risk_ips": high_ips,
|
||||
"n_samples": n,
|
||||
"k": k_actual,
|
||||
"k_base": k,
|
||||
|
||||
Reference in New Issue
Block a user