Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls, --src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead of --tls-v1-2), skip redundant local verification in distributed mode, and fix dashboard is_available() cache that never retried after ClickHouse recovery. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
176 lines
6.7 KiB
Python
176 lines
6.7 KiB
Python
"""Module de métriques de performance du pipeline ML.
|
|
|
|
Enregistre un résumé de chaque cycle dans ml_performance_metrics :
|
|
- Taux d'anomalie par niveau (CRITICAL/HIGH/MEDIUM/LOW)
|
|
- Taux de corrélation (correlated=1 vs 0)
|
|
- Drift rate, latence, taille baseline, seuil adaptatif
|
|
- Alertes automatiques sur calibration, drift, corrélation, latence
|
|
|
|
Utilisation dans cycle.py :
|
|
from .metrics import record_cycle_metrics
|
|
record_cycle_metrics(client, cycle_id, model_name, ...)
|
|
"""
|
|
import time
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
import pandas as pd
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Seuils d'alerte (configurables via env vars)
|
|
import os
|
|
ALERT_ANOMALY_RATE_HIGH = float(os.getenv('ALERT_ANOMALY_RATE_HIGH', '0.10'))
|
|
ALERT_ANOMALY_RATE_LOW = float(os.getenv('ALERT_ANOMALY_RATE_LOW', '0.005'))
|
|
ALERT_DRIFT_RATE = float(os.getenv('ALERT_DRIFT_RATE', '0.30'))
|
|
ALERT_CORRELATION_RATE = float(os.getenv('ALERT_CORRELATION_RATE', '0.50'))
|
|
ALERT_LATENCY_MS = int(os.getenv('ALERT_LATENCY_MS', '300000'))
|
|
|
|
|
|
def record_cycle_metrics(
|
|
client,
|
|
db: str,
|
|
cycle_id: str,
|
|
model_name: str,
|
|
df_all: pd.DataFrame,
|
|
anomalies: pd.DataFrame,
|
|
all_scored: pd.DataFrame,
|
|
drift_rate: float,
|
|
cycle_start_time: float,
|
|
baseline_size: int,
|
|
threshold: float,
|
|
valid_features: int,
|
|
total_features: int,
|
|
meta_learner_active: bool = False,
|
|
) -> None:
|
|
"""Enregistre les métriques d'un cycle dans ml_performance_metrics.
|
|
|
|
Émet également des alertes dans les logs si les seuils sont dépassés.
|
|
|
|
Args:
|
|
client : ClickHouseClient actif
|
|
db : nom de la base ja4_processing
|
|
cycle_id : identifiant du cycle (timestamp)
|
|
model_name : 'Complet' ou 'Applicatif' (ou variante 24h)
|
|
df_all : DataFrame complet du cycle (avant scoring)
|
|
anomalies : DataFrame des anomalies détectées
|
|
all_scored : DataFrame de tous les scores
|
|
drift_rate : taux de features en dérive [0, 1]
|
|
cycle_start_time : timestamp (time.time()) du début du cycle
|
|
baseline_size : nombre de sessions dans la baseline humaine
|
|
threshold : seuil adaptatif utilisé
|
|
valid_features : nombre de features valides
|
|
total_features : nombre total de features demandées
|
|
meta_learner_active : True si le meta-learner a été utilisé ce cycle
|
|
"""
|
|
now = datetime.utcnow()
|
|
latency_ms = int((time.time() - cycle_start_time) * 1000)
|
|
|
|
n_total = max(len(df_all), 1)
|
|
n_scored = max(len(all_scored), 1)
|
|
|
|
# Taux de corrélation
|
|
if 'correlated' in df_all.columns:
|
|
n_correlated = int((df_all['correlated'] == 1).sum())
|
|
correlated_rate = n_correlated / n_total
|
|
else:
|
|
correlated_rate = 0.0
|
|
|
|
# Comptage par niveau de menace
|
|
n_critical = n_high = n_medium = n_low = 0
|
|
if not anomalies.empty and 'threat_level' in anomalies.columns:
|
|
levels = anomalies['threat_level'].value_counts()
|
|
n_critical = int(levels.get('CRITICAL', 0))
|
|
n_high = int(levels.get('HIGH', 0))
|
|
n_medium = int(levels.get('MEDIUM', 0))
|
|
n_low = int(levels.get('LOW', 0))
|
|
|
|
# Bots connus et navigateurs légitimes
|
|
n_known_bot = 0
|
|
n_anubis_deny = 0
|
|
n_legit_browser = 0
|
|
if not df_all.empty:
|
|
if 'bot_name' in df_all.columns:
|
|
n_known_bot = int((df_all['bot_name'].fillna('') != '').sum())
|
|
if 'anubis_bot_action' in df_all.columns:
|
|
n_anubis_deny = int((df_all['anubis_bot_action'] == 'DENY').sum())
|
|
if 'browser_confidence' in df_all.columns:
|
|
from .config import BROWSER_CONFIDENCE_THRESHOLD
|
|
n_legit_browser = int((df_all['browser_confidence'] >= BROWSER_CONFIDENCE_THRESHOLD).sum())
|
|
|
|
anomaly_rate = (n_critical + n_high + n_medium + n_low) / n_scored
|
|
drift_alert = 1 if drift_rate > ALERT_DRIFT_RATE else 0
|
|
|
|
# Alertes
|
|
_emit_alerts(model_name, anomaly_rate, drift_rate, correlated_rate, latency_ms, drift_alert)
|
|
|
|
try:
|
|
# Vérifier que la table existe avant d'insérer (optionnelle)
|
|
table_check = client.query(
|
|
f"SELECT name FROM system.tables WHERE database = '{db}' AND name = 'ml_performance_metrics'"
|
|
)
|
|
if not table_check.result_rows:
|
|
logger.debug("[Metrics] Table ml_performance_metrics absente — métriques non enregistrées")
|
|
return
|
|
|
|
client.insert(
|
|
f"{db}.ml_performance_metrics",
|
|
[[
|
|
now,
|
|
model_name,
|
|
n_total,
|
|
round(float(correlated_rate), 4),
|
|
round(float(anomaly_rate), 4),
|
|
n_critical,
|
|
n_high,
|
|
n_medium,
|
|
n_low,
|
|
n_known_bot,
|
|
n_anubis_deny,
|
|
n_legit_browser,
|
|
round(float(drift_rate), 4),
|
|
drift_alert,
|
|
latency_ms,
|
|
valid_features,
|
|
total_features,
|
|
baseline_size,
|
|
round(float(threshold), 6),
|
|
1 if meta_learner_active else 0,
|
|
]]
|
|
)
|
|
logger.debug(f"[Metrics] Cycle {cycle_id} enregistré ({latency_ms}ms)")
|
|
except Exception as e:
|
|
logger.warning(f"[Metrics] Erreur d'enregistrement des métriques : {e}")
|
|
|
|
|
|
def _emit_alerts(model_name: str, anomaly_rate: float, drift_rate: float,
|
|
correlated_rate: float, latency_ms: int, drift_alert: int) -> None:
|
|
"""Émet des alertes dans les logs si les seuils sont dépassés."""
|
|
if anomaly_rate > ALERT_ANOMALY_RATE_HIGH:
|
|
logger.warning(
|
|
f"[{model_name}] ⚠ ALERTE CALIBRATION : taux d'anomalie élevé "
|
|
f"({anomaly_rate:.1%} > {ALERT_ANOMALY_RATE_HIGH:.1%})"
|
|
)
|
|
elif anomaly_rate < ALERT_ANOMALY_RATE_LOW and anomaly_rate > 0:
|
|
logger.warning(
|
|
f"[{model_name}] ⚠ ALERTE CALIBRATION : taux d'anomalie très bas "
|
|
f"({anomaly_rate:.3%} < {ALERT_ANOMALY_RATE_LOW:.1%})"
|
|
)
|
|
if drift_alert:
|
|
logger.warning(
|
|
f"[{model_name}] ⚠ ALERTE DRIFT : {drift_rate:.1%} des features en dérive "
|
|
f"(seuil {ALERT_DRIFT_RATE:.1%})"
|
|
)
|
|
if correlated_rate < ALERT_CORRELATION_RATE:
|
|
logger.warning(
|
|
f"[{model_name}] ⚠ ALERTE CORRÉLATION : taux de corrélation bas "
|
|
f"({correlated_rate:.1%} < {ALERT_CORRELATION_RATE:.1%}) — "
|
|
"vérifier ja4sentinel/logcorrelator"
|
|
)
|
|
if latency_ms > ALERT_LATENCY_MS:
|
|
logger.warning(
|
|
f"[{model_name}] ⚠ ALERTE PERFORMANCE : latence cycle {latency_ms}ms "
|
|
f"> {ALERT_LATENCY_MS}ms"
|
|
)
|