feat: multi-distro VM tests, ja4ebpf eBPF improvements, bot-detector scoring
ja4ebpf: - Refactor BPF TC capture with improved SYN offset handling and TCP option parsing - Enhance TLS uprobe SSL hooking for better key extraction - Add ClickHouse writer improvements for HTTP log materialized views - Update RPM spec for Rocky Linux 8/9/10, fix systemd service - Simplify loader with cleaner bpf2go integration bot-detector: - Add H2 SETTINGS per-parameter comparison in browser_matcher - Enhance browser signatures and scoring pipeline - Improve preprocessing and cycle detection infra: - Multi-distro Vagrantfile (centos8, rocky9, rocky10) with per-distro provisioning - New Makefile targets: vm-up-all, test-vm-matrix, test-vm-centos8/rocky10 - Add debug helpers and run-test-from-host.sh for host-driven VM testing - Update run-tests-vm.sh for cross-distro compatibility - Remove accidental binary blob (\004) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -140,6 +140,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
|
||||
# XGBoost supervisé — troisième voix (si labels historiques disponibles)
|
||||
unknown_traffic['xgb_prob'] = 0.0
|
||||
xgb_model_ref = None # Référence pour SHAP TreeExplainer (§2.4.5)
|
||||
if XGB_AVAILABLE and XGB_WEIGHT > 0:
|
||||
try:
|
||||
xgb_client = get_client()
|
||||
@ -150,6 +151,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
X_xgb = unknown_traffic[xgb_cols].replace([np.inf, -np.inf], np.nan).fillna(0)
|
||||
xgb_probs = xgb_model.predict_proba(X_xgb.values)[:, 1]
|
||||
unknown_traffic['xgb_prob'] = xgb_probs
|
||||
xgb_model_ref = xgb_model
|
||||
log_info(f"[{name}] XGBoost : xgb_mean={xgb_probs.mean():.4f}")
|
||||
except Exception as exc:
|
||||
log_info(f"[{name}] XGBoost scoring échoué : {exc} — EIF+AE seuls.")
|
||||
@ -187,9 +189,9 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
except Exception as exc:
|
||||
log_info(f"[{name}] MetaLearner entraînement échoué : {exc}")
|
||||
|
||||
# §7 — ExIFFI : importance de features pour l'EIF (quand SHAP désactivé)
|
||||
# §7 — ExIFFI : importance de features pour l'EIF (toujours actif en complément de SHAP)
|
||||
exiffi_tops: list = [{}] * len(unknown_traffic)
|
||||
if not ENABLE_SHAP and len(unknown_traffic) > 0:
|
||||
if len(unknown_traffic) > 0:
|
||||
try:
|
||||
exiffi_tops = compute_exiffi_importance(model, X_test, scoring_features)
|
||||
except Exception:
|
||||
@ -376,9 +378,10 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
log_info(f"[{name}] ALERT: {len(anomalies)} anomalies détectées (seuil={effective_threshold:.4f}).")
|
||||
anomalies['recurrence'] = anomalies['src_ip'].map(recurrence_map).fillna(0).astype(int) + 1
|
||||
|
||||
# A4 — Explainabilité SHAP : top features responsables de chaque anomalie
|
||||
# A4 — Explainabilité SHAP : TreeExplainer sur XGBoost si dispo, sinon EIF
|
||||
X_anomalies = X_test.loc[anomalies.index]
|
||||
shap_tops = compute_shap_top_features(model, X_anomalies, valid_features)
|
||||
shap_tops = compute_shap_top_features(model, X_anomalies, valid_features,
|
||||
xgb_model=xgb_model_ref)
|
||||
|
||||
# §7 — ExIFFI : utiliser les tops ExIFFI précalculés quand SHAP est inactif
|
||||
# Construire un mapping index → exiffi_top pour accès rapide
|
||||
|
||||
Reference in New Issue
Block a user