feat(ml): replace Autoencoder with RealNVP Normalizing Flow and add SessionTransformer embeddings

Replace TrafficAutoEncoder (MSE reconstruction scoring) with TrafficNormalizingFlow
(RealNVP via FrEIA, 4 affine coupling blocks, anomaly score = -log p(x)) for
mathematically rigorous density estimation. Add SessionTransformer module producing
32-dimensional sequence embeddings from raw HTTP request sequences (path, method,
timing) via a lightweight TransformerEncoder, replacing path_transition_entropy and
cadence_cv features. Update thesis documentation sections 2.4.2b and 3.8 accordingly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-13 15:11:21 +02:00
parent 0e5f94dd0d
commit c1821dcbc4
14 changed files with 515 additions and 3590 deletions

View File

@ -15,7 +15,7 @@ from .config import (
)
from .log import log_info, log_decision
from .infra import score_to_threat_level, get_client
from .models import load_or_train_model, load_or_train_xgb, TrafficAutoEncoder
from .models import load_or_train_model, load_or_train_xgb, TrafficNormalizingFlow
from .scoring import (
validate_features, compute_adaptive_threshold, normalize_scores,
compute_shap_top_features, build_reason, cluster_anomalies,
@ -115,19 +115,19 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
log_info(f'[{name}] Scoring EIF : {len(X_test)} sessions scorées (min={raw_scores.min():.4f}, max={raw_scores.max():.4f}, mean={raw_scores.mean():.4f})')
# Combinaison EIF + Autoencoder si disponible
# Score final = (1-α) * eif_norm + α * ae_norm où α = AE_WEIGHT
# Combinaison EIF + Normalizing Flow si disponible
# Score final = (1-α) * eif_norm + α * nf_norm où α = AE_WEIGHT
if ae_model is not None and AE_WEIGHT > 0:
try:
ae_recon_errors = ae_model.score_samples(X_test.values)
ae_norm = normalize_scores(-ae_recon_errors) # plus élevé = plus anomal
nf_neg_ll = ae_model.score_samples(X_test.values) # -log p(x)
nf_norm = normalize_scores(-nf_neg_ll) # plus élevé = plus anomal
eif_norm = normalize_scores(raw_scores)
combined_norm = (1 - AE_WEIGHT) * eif_norm + AE_WEIGHT * ae_norm
unknown_traffic['ae_recon_error'] = ae_recon_errors
combined_norm = (1 - AE_WEIGHT) * eif_norm + AE_WEIGHT * nf_norm
unknown_traffic['ae_recon_error'] = nf_neg_ll # nom conservé pour rétro-compatibilité
unknown_traffic['anomaly_score'] = combined_norm
log_info(f"[{name}] Score combiné EIF+AE (α={AE_WEIGHT}): ae_mean={ae_recon_errors.mean():.6f}")
log_info(f"[{name}] Score combiné EIF+NF (α={AE_WEIGHT}): nf_mean={nf_neg_ll.mean():.6f}")
except Exception as exc:
log_info(f"[{name}] AE scoring échoué : {exc} — utilisation EIF seul.")
log_info(f"[{name}] NF scoring échoué : {exc} — utilisation EIF seul.")
unknown_traffic['ae_recon_error'] = 0.0
unknown_traffic['anomaly_score'] = normalize_scores(raw_scores)
else: