feat: multi-distro VM tests, ja4ebpf eBPF improvements, bot-detector scoring
ja4ebpf: - Refactor BPF TC capture with improved SYN offset handling and TCP option parsing - Enhance TLS uprobe SSL hooking for better key extraction - Add ClickHouse writer improvements for HTTP log materialized views - Update RPM spec for Rocky Linux 8/9/10, fix systemd service - Simplify loader with cleaner bpf2go integration bot-detector: - Add H2 SETTINGS per-parameter comparison in browser_matcher - Enhance browser signatures and scoring pipeline - Improve preprocessing and cycle detection infra: - Multi-distro Vagrantfile (centos8, rocky9, rocky10) with per-distro provisioning - New Makefile targets: vm-up-all, test-vm-matrix, test-vm-centos8/rocky10 - Add debug helpers and run-test-from-host.sh for host-driven VM testing - Update run-tests-vm.sh for cross-distro compatibility - Remove accidental binary blob (\004) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -18,6 +18,7 @@ from .infra import get_client, set_healthy
|
||||
from .preprocessing import preprocess_df, FEATURES, FEATURES_COMPLET
|
||||
from .pipeline import run_semi_supervised_logic
|
||||
from .fleet import enrich_with_fleet_score
|
||||
from .browser_signatures import reload_signatures_from_clickhouse
|
||||
from .metrics import record_cycle_metrics
|
||||
|
||||
|
||||
@ -120,6 +121,13 @@ def fetch_and_analyze():
|
||||
|
||||
client = get_client()
|
||||
|
||||
# §3.9.5 — Rechargement périodique des signatures H2 depuis ClickHouse
|
||||
try:
|
||||
if reload_signatures_from_clickhouse(client):
|
||||
log_info('[Signatures] Signatures H2 rechargées depuis browser_h2_signatures.')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Récupération du trafic (fenêtre 1h) ──────────────────────────────────
|
||||
try:
|
||||
df = client.query_df(f'SELECT * FROM {DB}.view_ai_features_1h')
|
||||
@ -171,6 +179,43 @@ def fetch_and_analyze():
|
||||
except Exception as e:
|
||||
log_info(f'[Fleet §5] Enrichissement de flotte échoué : {e}')
|
||||
|
||||
# §3.9.5 — Queue unknown_h2_fingerprints : sessions H2 inconnues mais navigateur-like
|
||||
try:
|
||||
bm_col = 'bm_score' if 'bm_score' in df.columns else None
|
||||
bc_col = 'browser_confidence' if 'browser_confidence' in df.columns else None
|
||||
h2_col = 'h2_settings_known' if 'h2_settings_known' in df.columns else None
|
||||
tls_col = 'tls_version' if 'tls_version' in df.columns else None
|
||||
|
||||
if bm_col and h2_col:
|
||||
# Conditions : H2 inconnu + comportement navigateur + TLS 1.3
|
||||
unknown_h2_mask = (
|
||||
(df[h2_col] == 0) # H2 SETTINGS inconnu
|
||||
& (
|
||||
(df[bm_col] < 0.45) # browser_matcher ne reconnaît pas
|
||||
| (bc_col and df[bc_col] >= 0.55) # mais browser_confidence élevé
|
||||
)
|
||||
)
|
||||
if tls_col:
|
||||
unknown_h2_mask = unknown_h2_mask & (df[tls_col].astype(str).str.startswith('TLSv1.3'))
|
||||
|
||||
unknown_h2 = df[unknown_h2_mask]
|
||||
if not unknown_h2.empty:
|
||||
n_unknown = len(unknown_h2)
|
||||
# Insérer les fingerprints inconnus dans la table ClickHouse
|
||||
client.command(
|
||||
"INSERT INTO ja4_processing.unknown_h2_fingerprints "
|
||||
"(observed_at, src_ip, ja4, h2_fingerprint, h2_settings_fp, "
|
||||
"h2_window_update, h2_pseudo_order, h2_has_priority, "
|
||||
"browser_confidence_score, header_user_agent, tls_version) "
|
||||
"SELECT now(), src_ip, ja4, h2_fingerprint, h2_settings_fp, "
|
||||
"h2_window_update, h2_pseudo_order, h2_has_priority, "
|
||||
"browser_confidence, header_user_agent, tls_version "
|
||||
"FROM input"
|
||||
)
|
||||
log_info(f'[H2 Queue] {n_unknown} fingerprint(s) H2 inconnu(s) mis en file d\'examen.')
|
||||
except Exception as e:
|
||||
log_info(f'[H2 Queue] Erreur insertion unknown_h2_fingerprints : {e}')
|
||||
|
||||
# ── Résumé des données chargées ───────────────────────────────────────────
|
||||
n_total = len(df)
|
||||
n_correlated = int((df.get('correlated', pd.Series()) == 1).sum())
|
||||
|
||||
Reference in New Issue
Block a user