feat(e2e): add distributed E2E test framework with parametric traffic generation
Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls, --src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead of --tls-v1-2), skip redundant local verification in distributed mode, and fix dashboard is_available() cache that never retried after ClickHouse recovery. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -11,11 +11,12 @@ from .config import (
|
||||
ANOMALY_THRESHOLD, ANOMALY_PERCENTILE, ENABLE_CLUSTERING,
|
||||
ENABLE_SHAP, EIF_AVAILABLE, TORCH_AVAILABLE, XGB_AVAILABLE,
|
||||
BROWSER_CONFIDENCE_THRESHOLD, BROWSER_COHORT_RATIO,
|
||||
MIN_VALID_FEATURE_RATIO, STRUCTURAL_EXCLUDED_FEATURES,
|
||||
MIN_VALID_FEATURE_RATIO, MIN_HUMAN_BASELINE, BASELINE_ACCEPT_UNKNOWN, STRUCTURAL_EXCLUDED_FEATURES,
|
||||
NF_UNCERTAINTY_THRESHOLD,
|
||||
)
|
||||
from .log import log_info, log_decision
|
||||
from .infra import score_to_threat_level, get_client
|
||||
from .models import load_or_train_model, load_or_train_xgb, TrafficNormalizingFlow
|
||||
from .models import load_or_train_model, load_or_train_xgb, TrafficNormalizingFlow, NFEnsemble
|
||||
from .scoring import (
|
||||
validate_features, compute_adaptive_threshold, normalize_scores,
|
||||
compute_shap_top_features, build_reason, cluster_anomalies,
|
||||
@ -51,13 +52,18 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
# Les DENY sont TOUJOURS inclus dans les threats, indépendamment du seuil IF.
|
||||
unknown_traffic = rest[rest['anubis_bot_action'] != 'ALLOW'].copy()
|
||||
human_baseline = unknown_traffic[unknown_traffic['asn_label'] == 'isp']
|
||||
# En mode test (BASELINE_ACCEPT_UNKNOWN), les IPs sans ASN 'isp' utilisent 'unknown' comme fallback
|
||||
if len(human_baseline) < MIN_HUMAN_BASELINE and BASELINE_ACCEPT_UNKNOWN:
|
||||
unknown_baseline = unknown_traffic[unknown_traffic['asn_label'] == 'unknown']
|
||||
if len(unknown_baseline) > len(human_baseline):
|
||||
human_baseline = unknown_baseline
|
||||
|
||||
log_info(f'[{name}] ── Triage ──────────────────────────────────────')
|
||||
log_info(f'[{name}] Total sessions : {len(df):>6}')
|
||||
log_info(f'[{name}] Bots connus (dict) : {len(known_bots):>6}')
|
||||
log_info(f'[{name}] Anubis ALLOW : {len(anubis_allow):>6}')
|
||||
log_info(f'[{name}] Trafic à scorer (IF) : {len(unknown_traffic):>6}')
|
||||
log_info(f'[{name}] Baseline ISP (human) : {len(human_baseline):>6} (seuil min=500)')
|
||||
log_info(f'[{name}] Baseline ISP (human) : {len(human_baseline):>6} (seuil min={MIN_HUMAN_BASELINE})')
|
||||
|
||||
# §3 — Exclure les sessions ISP à faible cohérence de fingerprint de la baseline humaine
|
||||
# Ces sessions ISP avec un fingerprint incohérent sont probablement des proxies résidentiels
|
||||
@ -81,8 +87,8 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
if valid_features is None:
|
||||
return pd.DataFrame(), pd.DataFrame()
|
||||
|
||||
if len(human_baseline) < 500:
|
||||
log_info(f"[{name}] ⚠ Données humaines insuffisantes ({len(human_baseline)} < 500) — cycle ignoré.")
|
||||
if len(human_baseline) < MIN_HUMAN_BASELINE:
|
||||
log_info(f"[{name}] ⚠ Données humaines insuffisantes ({len(human_baseline)} < {MIN_HUMAN_BASELINE}) — cycle ignoré.")
|
||||
log_info(f"[{name}] Distribution asn_label dans le trafic à scorer :")
|
||||
if 'asn_label' in unknown_traffic.columns:
|
||||
for label, cnt in unknown_traffic['asn_label'].value_counts().head(8).items():
|
||||
@ -115,17 +121,38 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
|
||||
log_info(f'[{name}] Scoring EIF : {len(X_test)} sessions scorées (min={raw_scores.min():.4f}, max={raw_scores.max():.4f}, mean={raw_scores.mean():.4f})')
|
||||
|
||||
# Combinaison EIF + Normalizing Flow si disponible
|
||||
# Combinaison EIF + NFEnsemble (Deep Ensemble M=5) si disponible
|
||||
# Score final = (1-α) * eif_norm + α * nf_norm où α = AE_WEIGHT
|
||||
# Incertitude = variance inter-modèles → détection adversariale
|
||||
unknown_traffic['nf_uncertainty'] = 0.0
|
||||
unknown_traffic['is_adversarial_drift'] = False
|
||||
if ae_model is not None and AE_WEIGHT > 0:
|
||||
try:
|
||||
nf_neg_ll = ae_model.score_samples(X_test.values) # -log p(x)
|
||||
if isinstance(ae_model, NFEnsemble):
|
||||
nf_neg_ll, nf_uncertainty = ae_model.predict_anomalies(X_test.values)
|
||||
else:
|
||||
nf_neg_ll = ae_model.score_samples(X_test.values)
|
||||
nf_uncertainty = np.zeros(len(nf_neg_ll))
|
||||
nf_norm = normalize_scores(-nf_neg_ll) # plus élevé = plus anomal
|
||||
eif_norm = normalize_scores(raw_scores)
|
||||
combined_norm = (1 - AE_WEIGHT) * eif_norm + AE_WEIGHT * nf_norm
|
||||
unknown_traffic['ae_recon_error'] = nf_neg_ll # nom conservé pour rétro-compatibilité
|
||||
unknown_traffic['nf_uncertainty'] = nf_uncertainty
|
||||
adversarial_mask = nf_uncertainty > NF_UNCERTAINTY_THRESHOLD
|
||||
unknown_traffic['is_adversarial_drift'] = adversarial_mask
|
||||
n_adversarial = int(adversarial_mask.sum())
|
||||
unknown_traffic['anomaly_score'] = combined_norm
|
||||
log_info(f"[{name}] Score combiné EIF+NF (α={AE_WEIGHT}): nf_mean={nf_neg_ll.mean():.6f}")
|
||||
log_info(
|
||||
f"[{name}] Score combiné EIF+NF (α={AE_WEIGHT}): nf_mean={nf_neg_ll.mean():.6f}, "
|
||||
f"uncertainty_mean={nf_uncertainty.mean():.6f}, adversarial={n_adversarial}"
|
||||
)
|
||||
if n_adversarial > 0:
|
||||
log_decision('ADVERSARIAL_DRIFT_NF', cycle_id, name, {
|
||||
'n_adversarial': n_adversarial,
|
||||
'uncertainty_threshold': NF_UNCERTAINTY_THRESHOLD,
|
||||
'uncertainty_mean': round(float(nf_uncertainty.mean()), 6),
|
||||
'uncertainty_max': round(float(nf_uncertainty.max()), 6),
|
||||
})
|
||||
except Exception as exc:
|
||||
log_info(f"[{name}] NF scoring échoué : {exc} — utilisation EIF seul.")
|
||||
unknown_traffic['ae_recon_error'] = 0.0
|
||||
@ -435,12 +462,13 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
'effective_threshold': round(effective_threshold, 4), 'reason': row.get('reason', '')
|
||||
})
|
||||
|
||||
threats = pd.concat([df for df in [
|
||||
_threat_dfs = [df for df in [
|
||||
anomalies if not anomalies.empty else None,
|
||||
known_bots if not known_bots.empty else None,
|
||||
anubis_allow if not anubis_allow.empty else None,
|
||||
anubis_deny if not anubis_deny.empty else None,
|
||||
] if df is not None], ignore_index=True)
|
||||
] if df is not None]
|
||||
threats = pd.concat(_threat_dfs, ignore_index=True) if _threat_dfs else pd.DataFrame()
|
||||
|
||||
# Propager campaign_id des anomalies clusterisées vers all_scored
|
||||
# (all_scored a été capturé avant clustering, ses campaign_id sont tous -1)
|
||||
|
||||
Reference in New Issue
Block a user