feat(e2e): add distributed E2E test framework with parametric traffic generation
Add run-e2e-test.sh with CLI parameters (--hits, --http-ratio, --dns, --tls, --src-ips, --keep-analysis, --up) for configurable traffic generation. Traffic runs from VM endpoints with multiple source IPs (alias IPs on eth0) to produce distinct sessions for the ML pipeline. Fix curl TLS flags (--tlsv1.2 instead of --tls-v1-2), skip redundant local verification in distributed mode, and fix dashboard is_available() cache that never retried after ClickHouse recovery. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -218,16 +218,28 @@ def fetch_and_analyze():
|
||||
if not unknown_h2.empty:
|
||||
n_unknown = len(unknown_h2)
|
||||
# Insérer les fingerprints inconnus dans la table ClickHouse
|
||||
client.command(
|
||||
"INSERT INTO ja4_processing.unknown_h2_fingerprints "
|
||||
"(observed_at, src_ip, ja4, h2_fingerprint, h2_settings_fp, "
|
||||
"h2_window_update, h2_pseudo_order, h2_has_priority, "
|
||||
"browser_confidence_score, header_user_agent, tls_version) "
|
||||
"SELECT now(), src_ip, ja4, h2_fingerprint, h2_settings_fp, "
|
||||
"h2_window_update, h2_pseudo_order, h2_has_priority, "
|
||||
"browser_confidence, header_user_agent, tls_version "
|
||||
"FROM input"
|
||||
)
|
||||
cols = [
|
||||
'observed_at', 'src_ip', 'ja4', 'h2_fingerprint', 'h2_settings_fp',
|
||||
'h2_window_update', 'h2_pseudo_order', 'h2_has_priority',
|
||||
'browser_confidence_score', 'header_user_agent', 'tls_version',
|
||||
]
|
||||
rows = []
|
||||
for _, row in unknown_h2.iterrows():
|
||||
rows.append({
|
||||
'observed_at': row.get('time', ''),
|
||||
'src_ip': row.get('src_ip', ''),
|
||||
'ja4': row.get('ja4', ''),
|
||||
'h2_fingerprint': row.get('h2_fingerprint', ''),
|
||||
'h2_settings_fp': row.get('h2_settings_fp', ''),
|
||||
'h2_window_update': int(row.get('h2_window_update', 0)),
|
||||
'h2_pseudo_order': row.get('h2_pseudo_order', ''),
|
||||
'h2_has_priority': int(row.get('h2_has_priority', 0)),
|
||||
'browser_confidence_score': float(row.get('browser_confidence', 0.0)),
|
||||
'header_user_agent': row.get('header_user_agent', ''),
|
||||
'tls_version': row.get('tls_version', ''),
|
||||
})
|
||||
client.insert('ja4_processing.unknown_h2_fingerprints', rows,
|
||||
column_names=cols)
|
||||
log_info(f'[H2 Queue] {n_unknown} fingerprint(s) H2 inconnu(s) mis en file d\'examen.')
|
||||
except Exception as e:
|
||||
log_info(f'[H2 Queue] Erreur insertion unknown_h2_fingerprints : {e}')
|
||||
@ -324,8 +336,12 @@ def fetch_and_analyze():
|
||||
log_info('')
|
||||
log_info(f'── Modèle Applicatif (L7 seul, non-corrélé) : {len(df_uncorr)} sessions, {len(feats)} features ──')
|
||||
anom_b, scored_b = run_semi_supervised_logic(df_uncorr, feats, 'Applicatif', cycle_id, recurrence_map)
|
||||
all_anom = pd.concat([anom_a, anom_b], ignore_index=True)
|
||||
all_scored = pd.concat([scored_a, scored_b], ignore_index=True)
|
||||
_anom_dfs = [df for df in [anom_a, anom_b]
|
||||
if df is not None and not df.empty]
|
||||
all_anom = pd.concat(_anom_dfs, ignore_index=True) if _anom_dfs else pd.DataFrame()
|
||||
_scored_dfs = [df for df in [scored_a, scored_b]
|
||||
if df is not None and not df.empty]
|
||||
all_scored = pd.concat(_scored_dfs, ignore_index=True) if _scored_dfs else pd.DataFrame()
|
||||
|
||||
# ── A3 : Analyse fenêtre 24h (optionnelle) ────────────────────────────────
|
||||
if ENABLE_MULTIWINDOW:
|
||||
@ -336,8 +352,12 @@ def fetch_and_analyze():
|
||||
log_info(f"[24h] {len(df_24h)} sessions dans la fenêtre 24h.")
|
||||
anom_c, scored_c = run_semi_supervised_logic(df_24h[df_24h['correlated'] == 1].copy(), feats_complet, 'Complet_24h', cycle_id, recurrence_map)
|
||||
anom_d, scored_d = run_semi_supervised_logic(df_24h[df_24h['correlated'] == 0].copy(), feats, 'Applicatif_24h', cycle_id, recurrence_map)
|
||||
all_anom_24h = pd.concat([anom_c, anom_d], ignore_index=True)
|
||||
all_scored_24h = pd.concat([scored_c, scored_d], ignore_index=True)
|
||||
_anom_24h_dfs = [df for df in [anom_c, anom_d]
|
||||
if df is not None and not df.empty]
|
||||
all_anom_24h = pd.concat(_anom_24h_dfs, ignore_index=True) if _anom_24h_dfs else pd.DataFrame()
|
||||
_scored_24h_dfs = [df for df in [scored_c, scored_d]
|
||||
if df is not None and not df.empty]
|
||||
all_scored_24h = pd.concat(_scored_24h_dfs, ignore_index=True) if _scored_24h_dfs else pd.DataFrame()
|
||||
# Fusion : pour les IPs présentes dans les deux fenêtres, conserver le score le plus bas
|
||||
if not all_anom_24h.empty:
|
||||
all_anom = pd.concat([all_anom, all_anom_24h], ignore_index=True)
|
||||
|
||||
Reference in New Issue
Block a user