feat: multi-distro VM tests, ja4ebpf eBPF improvements, bot-detector scoring

ja4ebpf:
- Refactor BPF TC capture with improved SYN offset handling and TCP option parsing
- Enhance TLS uprobe SSL hooking for better key extraction
- Add ClickHouse writer improvements for HTTP log materialized views
- Update RPM spec for Rocky Linux 8/9/10, fix systemd service
- Simplify loader with cleaner bpf2go integration

bot-detector:
- Add H2 SETTINGS per-parameter comparison in browser_matcher
- Enhance browser signatures and scoring pipeline
- Improve preprocessing and cycle detection

infra:
- Multi-distro Vagrantfile (centos8, rocky9, rocky10) with per-distro provisioning
- New Makefile targets: vm-up-all, test-vm-matrix, test-vm-centos8/rocky10
- Add debug helpers and run-test-from-host.sh for host-driven VM testing
- Update run-tests-vm.sh for cross-distro compatibility
- Remove accidental binary blob (\004)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-13 01:09:33 +02:00
parent d81463a589
commit d75825278e
32 changed files with 2148 additions and 890 deletions

View File

@ -140,6 +140,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
# XGBoost supervisé — troisième voix (si labels historiques disponibles)
unknown_traffic['xgb_prob'] = 0.0
xgb_model_ref = None # Référence pour SHAP TreeExplainer (§2.4.5)
if XGB_AVAILABLE and XGB_WEIGHT > 0:
try:
xgb_client = get_client()
@ -150,6 +151,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
X_xgb = unknown_traffic[xgb_cols].replace([np.inf, -np.inf], np.nan).fillna(0)
xgb_probs = xgb_model.predict_proba(X_xgb.values)[:, 1]
unknown_traffic['xgb_prob'] = xgb_probs
xgb_model_ref = xgb_model
log_info(f"[{name}] XGBoost : xgb_mean={xgb_probs.mean():.4f}")
except Exception as exc:
log_info(f"[{name}] XGBoost scoring échoué : {exc} — EIF+AE seuls.")
@ -187,9 +189,9 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
except Exception as exc:
log_info(f"[{name}] MetaLearner entraînement échoué : {exc}")
# §7 — ExIFFI : importance de features pour l'EIF (quand SHAP désactivé)
# §7 — ExIFFI : importance de features pour l'EIF (toujours actif en complément de SHAP)
exiffi_tops: list = [{}] * len(unknown_traffic)
if not ENABLE_SHAP and len(unknown_traffic) > 0:
if len(unknown_traffic) > 0:
try:
exiffi_tops = compute_exiffi_importance(model, X_test, scoring_features)
except Exception:
@ -376,9 +378,10 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
log_info(f"[{name}] ALERT: {len(anomalies)} anomalies détectées (seuil={effective_threshold:.4f}).")
anomalies['recurrence'] = anomalies['src_ip'].map(recurrence_map).fillna(0).astype(int) + 1
# A4 — Explainabilité SHAP : top features responsables de chaque anomalie
# A4 — Explainabilité SHAP : TreeExplainer sur XGBoost si dispo, sinon EIF
X_anomalies = X_test.loc[anomalies.index]
shap_tops = compute_shap_top_features(model, X_anomalies, valid_features)
shap_tops = compute_shap_top_features(model, X_anomalies, valid_features,
xgb_model=xgb_model_ref)
# §7 — ExIFFI : utiliser les tops ExIFFI précalculés quand SHAP est inactif
# Construire un mapping index → exiffi_top pour accès rapide