feat: nouvelles techniques de détection et page tactiques SOC
SQL: - Ajout 5 colonnes d'agrégation (count_xff, count_unusual_ct, count_non_std_port, count_login_post, sec_ch_mobile_mismatch) - Exposition de 5 features calculées dans view_ai_features_1h - Migration ALTER TABLE pour déploiements existants Bot-detector: - 7 nouvelles features ML (has_xff, unusual_content_type_ratio, non_standard_port_ratio, login_post_concentration, sec_ch_mobile_mismatch, true_window_size, window_mss_ratio) - Propagation campaign_id vers ml_all_scores (était toujours -1) - Escalade campagne : HIGH→CRITICAL si cluster ≥5 membres Dashboard: - Page Tactiques SOC : brute-force, rotation JA4, récurrence, alertes temps réel — 4 KPIs + 4 panneaux + infobulles doc - Ajout fmtDate() helper global - Navigation sidebar mise à jour Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -109,7 +109,8 @@ STRUCTURAL_EXCLUDED_FEATURES: dict[str, list] = {
|
||||
'request_size_variance', 'mss_mobile_mismatch',
|
||||
'ja3_diversity_ratio', 'syn_timing_cv', 'tls12_ratio', 'ip_df_variance',
|
||||
'avg_ttl', 'ttl_std', 'no_window_scale_ratio',
|
||||
'ja4_drift_ratio'],
|
||||
'ja4_drift_ratio',
|
||||
'true_window_size', 'window_mss_ratio'],
|
||||
}
|
||||
|
||||
# ─── Imports optionnels (bibliothèques lourdes) ────────────────────────────
|
||||
|
||||
@ -306,6 +306,23 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
if ENABLE_CLUSTERING:
|
||||
anomalies = cluster_anomalies(anomalies, scoring_features, ae_model=ae_model)
|
||||
|
||||
# P2 — Escalade par taille de campagne : les IPs dans un cluster
|
||||
# coordonné de grande taille sont plus menaçantes que des IPs isolées.
|
||||
# Escalader HIGH → CRITICAL si cluster_size ≥ 5.
|
||||
if 'campaign_id' in anomalies.columns:
|
||||
cid_counts = anomalies['campaign_id'].value_counts()
|
||||
for cid, size in cid_counts.items():
|
||||
if cid < 0:
|
||||
continue
|
||||
if size >= 5:
|
||||
mask = (anomalies['campaign_id'] == cid) & (anomalies['threat_level'] == 'HIGH')
|
||||
n_escalated = mask.sum()
|
||||
if n_escalated > 0:
|
||||
anomalies.loc[mask, 'threat_level'] = 'CRITICAL'
|
||||
anomalies.loc[mask, 'reason'] = anomalies.loc[mask, 'reason'] + \
|
||||
f' [Escalade campagne #{cid}, {size} IPs coordonnées]'
|
||||
log_info(f"[{name}] Escalade campagne #{cid}: {n_escalated} IP(s) HIGH→CRITICAL ({size} membres)")
|
||||
|
||||
anomalies['ja4'] = anomalies['ja4'].replace({'': 'HTTP_CLEAR_TEXT'})
|
||||
for _, row in anomalies.iterrows():
|
||||
log_decision('ANOMALY', cycle_id, name, {
|
||||
@ -330,6 +347,14 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
|
||||
anubis_deny if not anubis_deny.empty else None,
|
||||
] if df is not None], ignore_index=True)
|
||||
|
||||
# Propager campaign_id des anomalies clusterisées vers all_scored
|
||||
# (all_scored a été capturé avant clustering, ses campaign_id sont tous -1)
|
||||
if not anomalies.empty and 'campaign_id' in anomalies.columns:
|
||||
cid_map = anomalies.set_index(anomalies.index)['campaign_id']
|
||||
matched = all_scored.index.isin(cid_map.index)
|
||||
if matched.any():
|
||||
all_scored.loc[matched, 'campaign_id'] = cid_map
|
||||
|
||||
# Inclure anubis_allow dans all_scored pour traçabilité dans ml_all_scores.
|
||||
# Ces IPs sont exclues de l'analyse IF mais doivent apparaître dans la table
|
||||
# de scores avec threat_level='KNOWN_BOT' et anomaly_score=0.0.
|
||||
|
||||
@ -41,6 +41,12 @@ FEATURES = [
|
||||
'cadence_cv', 'burst_ratio', 'pause_ratio',
|
||||
'lag1_autocorrelation', 'benford_deviation',
|
||||
'host_diversity', 'host_sweep_speed', 'host_coverage_uniformity',
|
||||
# P0+P1 : features sous-exploitées (SQL existant ou ajouté)
|
||||
'is_fake_navigation',
|
||||
'true_window_size', 'window_mss_ratio',
|
||||
# P1 : nouvelles features de détection
|
||||
'has_xff', 'unusual_content_type_ratio', 'non_standard_port_ratio',
|
||||
'login_post_concentration', 'sec_ch_mobile_mismatch',
|
||||
]
|
||||
|
||||
# Features supplémentaires pour le modèle Complet (données TCP/TLS requises)
|
||||
@ -100,6 +106,7 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
|
||||
'has_accept_language', 'has_cookie', 'has_referer', 'ua_ch_mismatch',
|
||||
'is_ua_rotating', 'is_alpn_missing', 'sni_host_mismatch', 'alpn_http_mismatch',
|
||||
'mss_mobile_mismatch', 'anubis_is_flagged', 'is_rare_ja4',
|
||||
'is_fake_navigation', 'has_xff', 'sec_ch_mobile_mismatch',
|
||||
}
|
||||
for col in df.columns:
|
||||
if col in binary_features:
|
||||
|
||||
Reference in New Issue
Block a user