feat(e2e): add distributed E2E test framework with parametric traffic generation
Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls, --src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead of --tls-v1-2), skip redundant local verification in distributed mode, and fix dashboard is_available() cache that never retried after ClickHouse recovery. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -72,6 +72,11 @@ HEALTH_PORT = int(os.getenv('HEALTH_PORT', '8080'))
|
||||
DEDUP_TTL_MIN = int(os.getenv('DEDUP_TTL_MIN', '60'))
|
||||
RECURRENCE_WEIGHT = _require_float('RECURRENCE_WEIGHT', 0.005)
|
||||
|
||||
# ─── Baseline minimum — nombre minimum de sessions humaines pour l'IF ─────
|
||||
MIN_HUMAN_BASELINE = int(os.getenv('MIN_HUMAN_BASELINE', '500'))
|
||||
# En mode test, les IPs privées n'ont pas d'ASN 'isp' — utiliser 'unknown' comme fallback
|
||||
BASELINE_ACCEPT_UNKNOWN = os.getenv('BASELINE_ACCEPT_UNKNOWN', 'false').lower() == 'true'
|
||||
|
||||
# ─── Autoencoder (AE) — second scorer parallèle ────────────────────────────
|
||||
|
||||
AE_WEIGHT = _require_float('AE_WEIGHT', 0.30, 0, 1)
|
||||
@ -79,6 +84,9 @@ AE_EPOCHS = int(os.getenv('AE_EPOCHS', '50'))
|
||||
AE_LATENT_DIM = int(os.getenv('AE_LATENT_DIM', '16'))
|
||||
AE_LEARNING_RATE = float(os.getenv('AE_LEARNING_RATE', '1e-3'))
|
||||
|
||||
# ─── NFEnsemble — Deep Ensemble (M=5) incertitude ──────────────────────────
|
||||
NF_UNCERTAINTY_THRESHOLD = float(os.getenv('NF_UNCERTAINTY_THRESHOLD', '1.0'))
|
||||
|
||||
SESSION_TRANSFORMER_PATH = os.getenv(
|
||||
'SESSION_TRANSFORMER_PATH',
|
||||
os.path.join(MODEL_DIR, 'session_transformer.pt')
|
||||
|
||||
@ -218,16 +218,28 @@ def fetch_and_analyze():
|
||||
if not unknown_h2.empty:
|
||||
n_unknown = len(unknown_h2)
|
||||
# Insérer les fingerprints inconnus dans la table ClickHouse
|
||||
client.command(
|
||||
"INSERT INTO ja4_processing.unknown_h2_fingerprints "
|
||||
"(observed_at, src_ip, ja4, h2_fingerprint, h2_settings_fp, "
|
||||
"h2_window_update, h2_pseudo_order, h2_has_priority, "
|
||||
"browser_confidence_score, header_user_agent, tls_version) "
|
||||
"SELECT now(), src_ip, ja4, h2_fingerprint, h2_settings_fp, "
|
||||
"h2_window_update, h2_pseudo_order, h2_has_priority, "
|
||||
"browser_confidence, header_user_agent, tls_version "
|
||||
"FROM input"
|
||||
)
|
||||
cols = [
|
||||
'observed_at', 'src_ip', 'ja4', 'h2_fingerprint', 'h2_settings_fp',
|
||||
'h2_window_update', 'h2_pseudo_order', 'h2_has_priority',
|
||||
'browser_confidence_score', 'header_user_agent', 'tls_version',
|
||||
]
|
||||
rows = []
|
||||
for _, row in unknown_h2.iterrows():
|
||||
rows.append({
|
||||
'observed_at': row.get('time', ''),
|
||||
'src_ip': row.get('src_ip', ''),
|
||||
'ja4': row.get('ja4', ''),
|
||||
'h2_fingerprint': row.get('h2_fingerprint', ''),
|
||||
'h2_settings_fp': row.get('h2_settings_fp', ''),
|
||||
'h2_window_update': int(row.get('h2_window_update', 0)),
|
||||
'h2_pseudo_order': row.get('h2_pseudo_order', ''),
|
||||
'h2_has_priority': int(row.get('h2_has_priority', 0)),
|
||||
'browser_confidence_score': float(row.get('browser_confidence', 0.0)),
|
||||
'header_user_agent': row.get('header_user_agent', ''),
|
||||
'tls_version': row.get('tls_version', ''),
|
||||
})
|
||||
client.insert('ja4_processing.unknown_h2_fingerprints', rows,
|
||||
column_names=cols)
|
||||
log_info(f'[H2 Queue] {n_unknown} fingerprint(s) H2 inconnu(s) mis en file d\'examen.')
|
||||
except Exception as e:
|
||||
log_info(f'[H2 Queue] Erreur insertion unknown_h2_fingerprints : {e}')
|
||||
@ -324,8 +336,12 @@ def fetch_and_analyze():
|
||||
log_info('')
|
||||
log_info(f'── Modèle Applicatif (L7 seul, non-corrélé) : {len(df_uncorr)} sessions, {len(feats)} features ──')
|
||||
anom_b, scored_b = run_semi_supervised_logic(df_uncorr, feats, 'Applicatif', cycle_id, recurrence_map)
|
||||
all_anom = pd.concat([anom_a, anom_b], ignore_index=True)
|
||||
all_scored = pd.concat([scored_a, scored_b], ignore_index=True)
|
||||
_anom_dfs = [df for df in [anom_a, anom_b]
|
||||
if df is not None and not df.empty]
|
||||
all_anom = pd.concat(_anom_dfs, ignore_index=True) if _anom_dfs else pd.DataFrame()
|
||||
_scored_dfs = [df for df in [scored_a, scored_b]
|
||||
if df is not None and not df.empty]
|
||||
all_scored = pd.concat(_scored_dfs, ignore_index=True) if _scored_dfs else pd.DataFrame()
|
||||
|
||||
# ── A3 : Analyse fenêtre 24h (optionnelle) ────────────────────────────────
|
||||
if ENABLE_MULTIWINDOW:
|
||||
@ -336,8 +352,12 @@ def fetch_and_analyze():
|
||||
log_info(f"[24h] {len(df_24h)} sessions dans la fenêtre 24h.")
|
||||
anom_c, scored_c = run_semi_supervised_logic(df_24h[df_24h['correlated'] == 1].copy(), feats_complet, 'Complet_24h', cycle_id, recurrence_map)
|
||||
anom_d, scored_d = run_semi_supervised_logic(df_24h[df_24h['correlated'] == 0].copy(), feats, 'Applicatif_24h', cycle_id, recurrence_map)
|
||||
all_anom_24h = pd.concat([anom_c, anom_d], ignore_index=True)
|
||||
all_scored_24h = pd.concat([scored_c, scored_d], ignore_index=True)
|
||||
_anom_24h_dfs = [df for df in [anom_c, anom_d]
|
||||
if df is not None and not df.empty]
|
||||
all_anom_24h = pd.concat(_anom_24h_dfs, ignore_index=True) if _anom_24h_dfs else pd.DataFrame()
|
||||
_scored_24h_dfs = [df for df in [scored_c, scored_d]
|
||||
if df is not None and not df.empty]
|
||||
all_scored_24h = pd.concat(_scored_24h_dfs, ignore_index=True) if _scored_24h_dfs else pd.DataFrame()
|
||||
# Fusion : pour les IPs présentes dans les deux fenêtres, conserver le score le plus bas
|
||||
if not all_anom_24h.empty:
|
||||
all_anom = pd.concat([all_anom, all_anom_24h], ignore_index=True)
|
||||
|
||||
@ -106,31 +106,40 @@ def record_cycle_metrics(
|
||||
_emit_alerts(model_name, anomaly_rate, drift_rate, correlated_rate, latency_ms, drift_alert)
|
||||
|
||||
try:
|
||||
client.execute(
|
||||
f"INSERT INTO {db}.ml_performance_metrics VALUES",
|
||||
[{
|
||||
'cycle_at': now,
|
||||
'model_name': model_name,
|
||||
'total_sessions': n_total,
|
||||
'correlated_rate': round(float(correlated_rate), 4),
|
||||
'anomaly_rate': round(float(anomaly_rate), 4),
|
||||
'critical_count': n_critical,
|
||||
'high_count': n_high,
|
||||
'medium_count': n_medium,
|
||||
'low_count': n_low,
|
||||
'known_bot_count': n_known_bot,
|
||||
'anubis_deny_count': n_anubis_deny,
|
||||
'legit_browser_count': n_legit_browser,
|
||||
'drift_rate': round(float(drift_rate), 4),
|
||||
'drift_alert': drift_alert,
|
||||
'cycle_latency_ms': latency_ms,
|
||||
'features_valid': valid_features,
|
||||
'features_total': total_features,
|
||||
'baseline_size': baseline_size,
|
||||
'threshold': round(float(threshold), 6),
|
||||
'meta_learner_active': 1 if meta_learner_active else 0,
|
||||
}]
|
||||
# Vérifier que la table existe avant d'insérer (optionnelle)
|
||||
table_check = client.query(
|
||||
f"SELECT name FROM system.tables WHERE database = '{db}' AND name = 'ml_performance_metrics'"
|
||||
)
|
||||
if not table_check.result_rows:
|
||||
logger.debug("[Metrics] Table ml_performance_metrics absente — métriques non enregistrées")
|
||||
return
|
||||
|
||||
client.insert(
|
||||
f"{db}.ml_performance_metrics",
|
||||
[[
|
||||
now,
|
||||
model_name,
|
||||
n_total,
|
||||
round(float(correlated_rate), 4),
|
||||
round(float(anomaly_rate), 4),
|
||||
n_critical,
|
||||
n_high,
|
||||
n_medium,
|
||||
n_low,
|
||||
n_known_bot,
|
||||
n_anubis_deny,
|
||||
n_legit_browser,
|
||||
round(float(drift_rate), 4),
|
||||
drift_alert,
|
||||
latency_ms,
|
||||
valid_features,
|
||||
total_features,
|
||||
baseline_size,
|
||||
round(float(threshold), 6),
|
||||
1 if meta_learner_active else 0,
|
||||
]]
|
||||
)
|
||||
logger.debug(f"[Metrics] Cycle {cycle_id} enregistré ({latency_ms}ms)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Metrics] Erreur d'enregistrement des métriques : {e}")
|
||||
|
||||
|
||||
@ -203,6 +203,78 @@ class TrafficNormalizingFlow:
|
||||
return nf
|
||||
|
||||
|
||||
class NFEnsemble:
|
||||
"""Deep Ensemble de M=5 Normalizing Flows pour quantification d'incertitude.
|
||||
|
||||
Chaque membre est un TrafficNormalizingFlow indépendant, entraîné sur un
|
||||
échantillon bootstrap (avec remise) de la baseline humaine. L'incertitude
|
||||
(variance inter-modèles) discrimine la dérive organique (variance faible,
|
||||
les modèles s'accordent) de la dérive adversariale (variance élevée, les
|
||||
modèles ne s'accordent pas sur la nouveauté).
|
||||
|
||||
Référence : Lakshminarayanan et al., 2017 — "Simple and Scalable Predictive
|
||||
Uncertainty Estimation using Deep Ensembles" (NeurIPS).
|
||||
"""
|
||||
|
||||
ENSEMBLE_SIZE = 5
|
||||
|
||||
def __init__(self, n_features: int):
|
||||
if not TORCH_AVAILABLE:
|
||||
raise RuntimeError("PyTorch non disponible — NFEnsemble désactivé.")
|
||||
self.n_features = n_features
|
||||
self.models = [TrafficNormalizingFlow(n_features) for _ in range(self.ENSEMBLE_SIZE)]
|
||||
|
||||
def fit(self, X: np.ndarray, epochs: int = AE_EPOCHS, lr: float = AE_LEARNING_RATE,
|
||||
batch_size: int = 256) -> dict:
|
||||
"""Entraîne les M modèles sur des échantillons bootstrapés (avec remise)."""
|
||||
n = len(X)
|
||||
all_losses = []
|
||||
for i, nf in enumerate(self.models):
|
||||
idx = np.random.choice(n, size=n, replace=True)
|
||||
X_boot = X[idx]
|
||||
stats = nf.fit(X_boot, epochs=epochs, lr=lr, batch_size=batch_size)
|
||||
all_losses.append(stats['final_loss'])
|
||||
return {
|
||||
'final_losses': all_losses,
|
||||
'mean_loss': float(np.mean(all_losses)),
|
||||
'ensemble_size': self.ENSEMBLE_SIZE,
|
||||
'n_samples': n,
|
||||
}
|
||||
|
||||
def predict_anomalies(self, X: np.ndarray) -> tuple:
|
||||
"""Retourne (mean_score, uncertainty_score) — tuple de np.ndarray.
|
||||
|
||||
mean_score : moyenne des -log p(x) sur les M modèles.
|
||||
uncertainty_score : variance des -log p(x) sur les M modèles.
|
||||
"""
|
||||
scores = np.stack([nf.score_samples(X) for nf in self.models], axis=0)
|
||||
return scores.mean(axis=0), scores.var(axis=0)
|
||||
|
||||
def score_samples(self, X: np.ndarray) -> np.ndarray:
|
||||
"""Compatibilité : retourne mean_score seul (comme TrafficNormalizingFlow)."""
|
||||
mean, _ = self.predict_anomalies(X)
|
||||
return mean
|
||||
|
||||
def encode(self, X: np.ndarray) -> np.ndarray:
|
||||
"""Espace latent moyen sur l'ensemble."""
|
||||
latents = np.stack([nf.encode(X) for nf in self.models], axis=0)
|
||||
return latents.mean(axis=0)
|
||||
|
||||
def state_dict(self) -> dict:
|
||||
return {
|
||||
'ensemble_size': self.ENSEMBLE_SIZE,
|
||||
'n_features': self.n_features,
|
||||
'members': [nf.state_dict() for nf in self.models],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def load_state_dict(cls, state: dict) -> 'NFEnsemble':
|
||||
ens = cls(state['n_features'])
|
||||
for i, member_state in enumerate(state['members']):
|
||||
ens.models[i] = TrafficNormalizingFlow.load_state_dict(member_state)
|
||||
return ens
|
||||
|
||||
|
||||
def _ae_model_path(name: str, version_id: str) -> str:
|
||||
return os.path.join(MODEL_DIR, f'ae_{name}_{version_id}.pt')
|
||||
|
||||
@ -411,7 +483,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
|
||||
dérive, entraîne un nouveau modèle sur ``human_baseline``, le sérialise sur disque,
|
||||
met à jour le fichier pointeur et purge les anciennes versions.
|
||||
|
||||
Retourne (IsolationForest, TrafficNormalizingFlow|None, list[str] features).
|
||||
Retourne (IsolationForest, NFEnsemble|None, list[str] features).
|
||||
"""
|
||||
model_path, meta = _get_current_version(name)
|
||||
if model_path and meta:
|
||||
@ -455,8 +527,8 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
|
||||
ae_path = _ae_model_path(name, meta['version_id'])
|
||||
if os.path.exists(ae_path):
|
||||
try:
|
||||
ae_loaded = TrafficNormalizingFlow.load_state_dict(torch.load(ae_path, weights_only=False))
|
||||
log_info(f"[{name}] Normalizing Flow v{meta['version_id']} rechargé.")
|
||||
ae_loaded = NFEnsemble.load_state_dict(torch.load(ae_path, weights_only=False))
|
||||
log_info(f"[{name}] NFEnsemble v{meta['version_id']} rechargé (M={NFEnsemble.ENSEMBLE_SIZE}).")
|
||||
except Exception as exc:
|
||||
log_info(f"[{name}] Erreur chargement AE : {exc} — AE désactivé ce cycle.")
|
||||
return joblib.load(model_path), ae_loaded, meta.get('features', features)
|
||||
@ -519,7 +591,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
|
||||
ae_prev_path = _ae_model_path(name, meta.get('version_id', ''))
|
||||
if os.path.exists(ae_prev_path):
|
||||
try:
|
||||
ae_prev = TrafficNormalizingFlow.load_state_dict(torch.load(ae_prev_path, weights_only=False))
|
||||
ae_prev = NFEnsemble.load_state_dict(torch.load(ae_prev_path, weights_only=False))
|
||||
except Exception:
|
||||
pass
|
||||
return joblib.load(model_path), ae_prev, meta.get('features', features)
|
||||
@ -539,17 +611,17 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
|
||||
new_meta_path = os.path.join(MODEL_DIR, f'model_{name}_{version_id}.meta.json')
|
||||
joblib.dump(model, new_model_path)
|
||||
|
||||
# Entraînement du Normalizing Flow en parallèle (si PyTorch disponible et AE_WEIGHT > 0)
|
||||
# Entraînement du NFEnsemble (M=5) en parallèle (si PyTorch disponible et AE_WEIGHT > 0)
|
||||
ae_model = None
|
||||
if TORCH_AVAILABLE and AE_WEIGHT > 0:
|
||||
try:
|
||||
ae_model = TrafficNormalizingFlow(n_features=len(features))
|
||||
ae_model = NFEnsemble(n_features=len(features))
|
||||
ae_stats = ae_model.fit(X_train.values)
|
||||
ae_path = _ae_model_path(name, version_id)
|
||||
torch.save(ae_model.state_dict(), ae_path)
|
||||
log_info(f"[{name}] Normalizing Flow entraîné : NLL={ae_stats['final_loss']:.6f}, epochs={ae_stats['epochs']}")
|
||||
log_info(f"[{name}] NFEnsemble entraîné (M={NFEnsemble.ENSEMBLE_SIZE}) : NLL moyen={ae_stats['mean_loss']:.6f}")
|
||||
except Exception as exc:
|
||||
log_info(f"[{name}] Normalizing Flow training échoué : {exc} — NF désactivé.")
|
||||
log_info(f"[{name}] NFEnsemble training échoué : {exc} — NF désactivé.")
|
||||
ae_model = None
|
||||
|
||||
previous_version = meta.get('version_id', None) if meta else None
|
||||
|
||||
@ -11,11 +11,12 @@ from .config import (
|
||||
ANOMALY_THRESHOLD, ANOMALY_PERCENTILE, ENABLE_CLUSTERING,
|
||||
ENABLE_SHAP, EIF_AVAILABLE, TORCH_AVAILABLE, XGB_AVAILABLE,
|
||||
BROWSER_CONFIDENCE_THRESHOLD, BROWSER_COHORT_RATIO,
|
||||
MIN_VALID_FEATURE_RATIO, STRUCTURAL_EXCLUDED_FEATURES,
|
||||
MIN_VALID_FEATURE_RATIO, MIN_HUMAN_BASELINE, BASELINE_ACCEPT_UNKNOWN, STRUCTURAL_EXCLUDED_FEATURES,
|
||||
NF_UNCERTAINTY_THRESHOLD,
|
||||
)
|
||||
from .log import log_info, log_decision
|
||||
from .infra import score_to_threat_level, get_client
|
||||
from .models import load_or_train_model, load_or_train_xgb, TrafficNormalizingFlow
|
||||
from .models import load_or_train_model, load_or_train_xgb, TrafficNormalizingFlow, NFEnsemble
|
||||
from .scoring import (
|
||||
validate_features, compute_adaptive_threshold, normalize_scores,
|
||||
compute_shap_top_features, build_reason, cluster_anomalies,
|
||||
@ -51,13 +52,18 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
# Les DENY sont TOUJOURS inclus dans les threats, indépendamment du seuil IF.
|
||||
unknown_traffic = rest[rest['anubis_bot_action'] != 'ALLOW'].copy()
|
||||
human_baseline = unknown_traffic[unknown_traffic['asn_label'] == 'isp']
|
||||
# En mode test (BASELINE_ACCEPT_UNKNOWN), les IPs sans ASN 'isp' utilisent 'unknown' comme fallback
|
||||
if len(human_baseline) < MIN_HUMAN_BASELINE and BASELINE_ACCEPT_UNKNOWN:
|
||||
unknown_baseline = unknown_traffic[unknown_traffic['asn_label'] == 'unknown']
|
||||
if len(unknown_baseline) > len(human_baseline):
|
||||
human_baseline = unknown_baseline
|
||||
|
||||
log_info(f'[{name}] ── Triage ──────────────────────────────────────')
|
||||
log_info(f'[{name}] Total sessions : {len(df):>6}')
|
||||
log_info(f'[{name}] Bots connus (dict) : {len(known_bots):>6}')
|
||||
log_info(f'[{name}] Anubis ALLOW : {len(anubis_allow):>6}')
|
||||
log_info(f'[{name}] Trafic à scorer (IF) : {len(unknown_traffic):>6}')
|
||||
log_info(f'[{name}] Baseline ISP (human) : {len(human_baseline):>6} (seuil min=500)')
|
||||
log_info(f'[{name}] Baseline ISP (human) : {len(human_baseline):>6} (seuil min={MIN_HUMAN_BASELINE})')
|
||||
|
||||
# §3 — Exclure les sessions ISP à faible cohérence de fingerprint de la baseline humaine
|
||||
# Ces sessions ISP avec un fingerprint incohérent sont probablement des proxies résidentiels
|
||||
@ -81,8 +87,8 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
if valid_features is None:
|
||||
return pd.DataFrame(), pd.DataFrame()
|
||||
|
||||
if len(human_baseline) < 500:
|
||||
log_info(f"[{name}] ⚠ Données humaines insuffisantes ({len(human_baseline)} < 500) — cycle ignoré.")
|
||||
if len(human_baseline) < MIN_HUMAN_BASELINE:
|
||||
log_info(f"[{name}] ⚠ Données humaines insuffisantes ({len(human_baseline)} < {MIN_HUMAN_BASELINE}) — cycle ignoré.")
|
||||
log_info(f"[{name}] Distribution asn_label dans le trafic à scorer :")
|
||||
if 'asn_label' in unknown_traffic.columns:
|
||||
for label, cnt in unknown_traffic['asn_label'].value_counts().head(8).items():
|
||||
@ -115,17 +121,38 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
|
||||
log_info(f'[{name}] Scoring EIF : {len(X_test)} sessions scorées (min={raw_scores.min():.4f}, max={raw_scores.max():.4f}, mean={raw_scores.mean():.4f})')
|
||||
|
||||
# Combinaison EIF + Normalizing Flow si disponible
|
||||
# Combinaison EIF + NFEnsemble (Deep Ensemble M=5) si disponible
|
||||
# Score final = (1-α) * eif_norm + α * nf_norm où α = AE_WEIGHT
|
||||
# Incertitude = variance inter-modèles → détection adversariale
|
||||
unknown_traffic['nf_uncertainty'] = 0.0
|
||||
unknown_traffic['is_adversarial_drift'] = False
|
||||
if ae_model is not None and AE_WEIGHT > 0:
|
||||
try:
|
||||
nf_neg_ll = ae_model.score_samples(X_test.values) # -log p(x)
|
||||
if isinstance(ae_model, NFEnsemble):
|
||||
nf_neg_ll, nf_uncertainty = ae_model.predict_anomalies(X_test.values)
|
||||
else:
|
||||
nf_neg_ll = ae_model.score_samples(X_test.values)
|
||||
nf_uncertainty = np.zeros(len(nf_neg_ll))
|
||||
nf_norm = normalize_scores(-nf_neg_ll) # plus élevé = plus anomal
|
||||
eif_norm = normalize_scores(raw_scores)
|
||||
combined_norm = (1 - AE_WEIGHT) * eif_norm + AE_WEIGHT * nf_norm
|
||||
unknown_traffic['ae_recon_error'] = nf_neg_ll # nom conservé pour rétro-compatibilité
|
||||
unknown_traffic['nf_uncertainty'] = nf_uncertainty
|
||||
adversarial_mask = nf_uncertainty > NF_UNCERTAINTY_THRESHOLD
|
||||
unknown_traffic['is_adversarial_drift'] = adversarial_mask
|
||||
n_adversarial = int(adversarial_mask.sum())
|
||||
unknown_traffic['anomaly_score'] = combined_norm
|
||||
log_info(f"[{name}] Score combiné EIF+NF (α={AE_WEIGHT}): nf_mean={nf_neg_ll.mean():.6f}")
|
||||
log_info(
|
||||
f"[{name}] Score combiné EIF+NF (α={AE_WEIGHT}): nf_mean={nf_neg_ll.mean():.6f}, "
|
||||
f"uncertainty_mean={nf_uncertainty.mean():.6f}, adversarial={n_adversarial}"
|
||||
)
|
||||
if n_adversarial > 0:
|
||||
log_decision('ADVERSARIAL_DRIFT_NF', cycle_id, name, {
|
||||
'n_adversarial': n_adversarial,
|
||||
'uncertainty_threshold': NF_UNCERTAINTY_THRESHOLD,
|
||||
'uncertainty_mean': round(float(nf_uncertainty.mean()), 6),
|
||||
'uncertainty_max': round(float(nf_uncertainty.max()), 6),
|
||||
})
|
||||
except Exception as exc:
|
||||
log_info(f"[{name}] NF scoring échoué : {exc} — utilisation EIF seul.")
|
||||
unknown_traffic['ae_recon_error'] = 0.0
|
||||
@ -435,12 +462,13 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
'effective_threshold': round(effective_threshold, 4), 'reason': row.get('reason', '')
|
||||
})
|
||||
|
||||
threats = pd.concat([df for df in [
|
||||
_threat_dfs = [df for df in [
|
||||
anomalies if not anomalies.empty else None,
|
||||
known_bots if not known_bots.empty else None,
|
||||
anubis_allow if not anubis_allow.empty else None,
|
||||
anubis_deny if not anubis_deny.empty else None,
|
||||
] if df is not None], ignore_index=True)
|
||||
] if df is not None]
|
||||
threats = pd.concat(_threat_dfs, ignore_index=True) if _threat_dfs else pd.DataFrame()
|
||||
|
||||
# Propager campaign_id des anomalies clusterisées vers all_scored
|
||||
# (all_scored a été capturé avant clustering, ses campaign_id sont tous -1)
|
||||
|
||||
@ -12,6 +12,7 @@ Regroupe les fonctions de scoring utilisées par le pipeline de détection :
|
||||
"""
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
|
||||
from .config import (
|
||||
ANOMALY_THRESHOLD, ANOMALY_PERCENTILE,
|
||||
@ -107,7 +108,7 @@ class ADWINDriftMonitor:
|
||||
for feat, value in feature_means.items():
|
||||
if feat in self._detectors:
|
||||
self._detectors[feat].update(value)
|
||||
detected = self._detectors[feat].detected_change()
|
||||
detected = self._detectors[feat].drift_detected
|
||||
changes[feat] = detected
|
||||
if detected:
|
||||
self._last_changes[feat] = True
|
||||
|
||||
Reference in New Issue
Block a user