From 039086a0b347345062f248288f3f703d109d64da Mon Sep 17 00:00:00 2001 From: toto Date: Thu, 9 Apr 2026 14:29:18 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20nouvelles=20techniques=20de=20d=C3=A9te?= =?UTF-8?q?ction=20et=20page=20tactiques=20SOC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SQL: - Ajout 5 colonnes d'agrégation (count_xff, count_unusual_ct, count_non_std_port, count_login_post, sec_ch_mobile_mismatch) - Exposition de 5 features calculées dans view_ai_features_1h - Migration ALTER TABLE pour déploiements existants Bot-detector: - 7 nouvelles features ML (has_xff, unusual_content_type_ratio, non_standard_port_ratio, login_post_concentration, sec_ch_mobile_mismatch, true_window_size, window_mss_ratio) - Propagation campaign_id vers ml_all_scores (était toujours -1) - Escalade campagne : HIGH→CRITICAL si cluster ≥5 membres Dashboard: - Page Tactiques SOC : brute-force, rotation JA4, récurrence, alertes temps réel — 4 KPIs + 4 panneaux + infobulles doc - Ajout fmtDate() helper global - Navigation sidebar mise à jour Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- services/bot-detector/bot_detector/config.py | 3 +- .../bot-detector/bot_detector/pipeline.py | 25 +++ .../bot_detector/preprocessing.py | 7 + .../sql/migrations/02_detection_features.sql | 17 ++ services/dashboard/backend/routes/pages.py | 5 + .../dashboard/backend/templates/base.html | 8 + .../dashboard/backend/templates/tactics.html | 199 ++++++++++++++++++ shared/clickhouse/05_aggregation_tables.sql | 20 +- shared/clickhouse/07_ai_features_view.sql | 16 +- 9 files changed, 295 insertions(+), 5 deletions(-) create mode 100644 services/correlator/sql/migrations/02_detection_features.sql create mode 100644 services/dashboard/backend/templates/tactics.html diff --git a/services/bot-detector/bot_detector/config.py b/services/bot-detector/bot_detector/config.py index 3d9256d..10c8746 100644 --- a/services/bot-detector/bot_detector/config.py +++ b/services/bot-detector/bot_detector/config.py @@ -109,7 +109,8 @@ STRUCTURAL_EXCLUDED_FEATURES: dict[str, list] = { 'request_size_variance', 'mss_mobile_mismatch', 'ja3_diversity_ratio', 'syn_timing_cv', 'tls12_ratio', 'ip_df_variance', 'avg_ttl', 'ttl_std', 'no_window_scale_ratio', - 'ja4_drift_ratio'], + 'ja4_drift_ratio', + 'true_window_size', 'window_mss_ratio'], } # ─── Imports optionnels (bibliothèques lourdes) ──────────────────────────── diff --git a/services/bot-detector/bot_detector/pipeline.py b/services/bot-detector/bot_detector/pipeline.py index d62f2a8..a61b5bf 100644 --- a/services/bot-detector/bot_detector/pipeline.py +++ b/services/bot-detector/bot_detector/pipeline.py @@ -306,6 +306,23 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map): if ENABLE_CLUSTERING: anomalies = cluster_anomalies(anomalies, scoring_features, ae_model=ae_model) + # P2 — Escalade par taille de campagne : les IPs dans un cluster + # coordonné de grande taille sont plus menaçantes que des IPs isolées. + # Escalader HIGH → CRITICAL si cluster_size ≥ 5. + if 'campaign_id' in anomalies.columns: + cid_counts = anomalies['campaign_id'].value_counts() + for cid, size in cid_counts.items(): + if cid < 0: + continue + if size >= 5: + mask = (anomalies['campaign_id'] == cid) & (anomalies['threat_level'] == 'HIGH') + n_escalated = mask.sum() + if n_escalated > 0: + anomalies.loc[mask, 'threat_level'] = 'CRITICAL' + anomalies.loc[mask, 'reason'] = anomalies.loc[mask, 'reason'] + \ + f' [Escalade campagne #{cid}, {size} IPs coordonnées]' + log_info(f"[{name}] Escalade campagne #{cid}: {n_escalated} IP(s) HIGH→CRITICAL ({size} membres)") + anomalies['ja4'] = anomalies['ja4'].replace({'': 'HTTP_CLEAR_TEXT'}) for _, row in anomalies.iterrows(): log_decision('ANOMALY', cycle_id, name, { @@ -330,6 +347,14 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map): anubis_deny if not anubis_deny.empty else None, ] if df is not None], ignore_index=True) + # Propager campaign_id des anomalies clusterisées vers all_scored + # (all_scored a été capturé avant clustering, ses campaign_id sont tous -1) + if not anomalies.empty and 'campaign_id' in anomalies.columns: + cid_map = anomalies.set_index(anomalies.index)['campaign_id'] + matched = all_scored.index.isin(cid_map.index) + if matched.any(): + all_scored.loc[matched, 'campaign_id'] = cid_map + # Inclure anubis_allow dans all_scored pour traçabilité dans ml_all_scores. # Ces IPs sont exclues de l'analyse IF mais doivent apparaître dans la table # de scores avec threat_level='KNOWN_BOT' et anomaly_score=0.0. diff --git a/services/bot-detector/bot_detector/preprocessing.py b/services/bot-detector/bot_detector/preprocessing.py index 4adf085..d55ddc7 100644 --- a/services/bot-detector/bot_detector/preprocessing.py +++ b/services/bot-detector/bot_detector/preprocessing.py @@ -41,6 +41,12 @@ FEATURES = [ 'cadence_cv', 'burst_ratio', 'pause_ratio', 'lag1_autocorrelation', 'benford_deviation', 'host_diversity', 'host_sweep_speed', 'host_coverage_uniformity', + # P0+P1 : features sous-exploitées (SQL existant ou ajouté) + 'is_fake_navigation', + 'true_window_size', 'window_mss_ratio', + # P1 : nouvelles features de détection + 'has_xff', 'unusual_content_type_ratio', 'non_standard_port_ratio', + 'login_post_concentration', 'sec_ch_mobile_mismatch', ] # Features supplémentaires pour le modèle Complet (données TCP/TLS requises) @@ -100,6 +106,7 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame: 'has_accept_language', 'has_cookie', 'has_referer', 'ua_ch_mismatch', 'is_ua_rotating', 'is_alpn_missing', 'sni_host_mismatch', 'alpn_http_mismatch', 'mss_mobile_mismatch', 'anubis_is_flagged', 'is_rare_ja4', + 'is_fake_navigation', 'has_xff', 'sec_ch_mobile_mismatch', } for col in df.columns: if col in binary_features: diff --git a/services/correlator/sql/migrations/02_detection_features.sql b/services/correlator/sql/migrations/02_detection_features.sql new file mode 100644 index 0000000..cf82863 --- /dev/null +++ b/services/correlator/sql/migrations/02_detection_features.sql @@ -0,0 +1,17 @@ +-- ============================================================================= +-- 02_detection_features.sql — Ajout des features de détection P0+P1 +-- Colonnes supplémentaires dans agg_host_ip_ja4_1h et agg_header_fingerprint_1h +-- NOTE : les MVs doivent être recréées (DROP + CREATE) car ALTER VIEW n'existe pas. +-- Exécuter deploy_schema.sh pour recréer les MVs, ou relancer le schema complet. +-- ============================================================================= + +-- agg_host_ip_ja4_1h : nouvelles colonnes de comptage +ALTER TABLE ja4_processing.agg_host_ip_ja4_1h + ADD COLUMN IF NOT EXISTS count_xff SimpleAggregateFunction(sum, UInt64) AFTER count_http_scheme, + ADD COLUMN IF NOT EXISTS count_unusual_ct SimpleAggregateFunction(sum, UInt64) AFTER count_xff, + ADD COLUMN IF NOT EXISTS count_non_std_port SimpleAggregateFunction(sum, UInt64) AFTER count_unusual_ct, + ADD COLUMN IF NOT EXISTS count_login_post SimpleAggregateFunction(sum, UInt64) AFTER count_non_std_port; + +-- agg_header_fingerprint_1h : mismatch mobile Sec-CH-UA +ALTER TABLE ja4_processing.agg_header_fingerprint_1h + ADD COLUMN IF NOT EXISTS sec_ch_mobile_mismatch SimpleAggregateFunction(max, UInt8) AFTER ua_ch_mismatch; diff --git a/services/dashboard/backend/routes/pages.py b/services/dashboard/backend/routes/pages.py index 0e28ba7..511324d 100644 --- a/services/dashboard/backend/routes/pages.py +++ b/services/dashboard/backend/routes/pages.py @@ -71,3 +71,8 @@ async def ja4_detail_page(request: Request, fingerprint: str): @router.get("/cluster/{cid}") async def cluster_detail_page(request: Request, cid: int): return templates.TemplateResponse("cluster_detail.html", _ctx(request, "cluster_detail", cid=cid)) + + +@router.get("/tactics") +async def tactics_page(request: Request): + return templates.TemplateResponse("tactics.html", _ctx(request, "tactics")) diff --git a/services/dashboard/backend/templates/base.html b/services/dashboard/backend/templates/base.html index b1bbddf..df4ce41 100644 --- a/services/dashboard/backend/templates/base.html +++ b/services/dashboard/backend/templates/base.html @@ -151,6 +151,10 @@ Campagnes + + + Tactiques + @@ -324,6 +328,10 @@ if (diff < 86400) return Math.round(diff/3600) + 'h'; return Math.round(diff/86400) + 'j'; } + function fmtDate(d) { + if (!d) return '—'; + return String(d).substring(0, 16).replace('T', ' '); + } // ── ECharts helpers ── const EC_COLORS = ['#6366f1','#22c55e','#f97316','#ef4444','#3b82f6','#eab308','#ec4899','#14b8a6','#8b5cf6','#f43f5e']; diff --git a/services/dashboard/backend/templates/tactics.html b/services/dashboard/backend/templates/tactics.html new file mode 100644 index 0000000..03b1470 --- /dev/null +++ b/services/dashboard/backend/templates/tactics.html @@ -0,0 +1,199 @@ +{% extends "base.html" %} +{% block page_title %}Tactiques d'attaque{% endblock %} + +{% block content %} + + + +
+
Brute-force IPs
+
JA4 rotation IPs
+
IPs récurrentes
+
Alertes 24h
+
+ + +
+
+ + + Brute-force / Credential Stuffing + +
+ +
+ Détection brute-force
+ IPs envoyant ≥10 requêtes POST/heure sur des endpoints d'authentification. + Signale les tentatives de credential stuffing, brute-force de mots de passe, + ou abus d'API. Données issues de view_form_bruteforce_detected. +
+
+
+
+
+ + + +
IPHostPOST/hPaths distinctsPremière vueDernière vue
Chargement…
+
+
+
+ + +
+
+ + + Rotation de fingerprint JA4 + +
+ +
+ Détection d'évasion JA4
+ IPs utilisant ≥2 fingerprints JA4 distincts en 24h. Technique d'évasion + classique : rotation de la configuration TLS pour contourner les blocages + par fingerprint. Données issues de view_host_ip_ja4_rotation. +
+
+
+
+
+ + + +
IPHostJA4 distinctsFingerprintsHitsFenêtre
Chargement…
+
+
+
+ + +
+
+ + + Menaces persistantes (récurrence) + +
+ +
+ IPs récurrentes
+ IPs détectées comme anomalies sur plusieurs fenêtres temporelles distinctes. + La récurrence augmente la confiance dans la classification malveillante. + Score agravé par log1p(recurrence) × 0.005. + Données issues de view_ip_recurrence. +
+
+
+
+
+ + + +
IPRécurrencePire scorePire menacePremière vueDernière vueJA4 topHost top
Chargement…
+
+
+
+ + +
+
+ + + Alertes récentes (24h) + +
+ +
+ Flux d'alertes
+ Dernières détections CRITICAL, HIGH et KNOWN_BOT sur les 24 dernières heures. + Chaque alerte inclut le score, la raison SHAP, et le lien vers l'investigation IP. +
+
+
+
+
+ + + +
DateIPScoreMenaceJA4HostASNHitsRaison
Chargement…
+
+
+
+ + +{% endblock %} diff --git a/shared/clickhouse/05_aggregation_tables.sql b/shared/clickhouse/05_aggregation_tables.sql index 3b60c61..d17a1f7 100644 --- a/shared/clickhouse/05_aggregation_tables.sql +++ b/shared/clickhouse/05_aggregation_tables.sql @@ -112,6 +112,11 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_host_ip_ja4_1h -- HTTP features count_no_accept_enc SimpleAggregateFunction(sum, UInt64), count_http_scheme SimpleAggregateFunction(sum, UInt64), + -- P1 : nouvelles features de détection + count_xff SimpleAggregateFunction(sum, UInt64), + count_unusual_ct SimpleAggregateFunction(sum, UInt64), + count_non_std_port SimpleAggregateFunction(sum, UInt64), + count_login_post SimpleAggregateFunction(sum, UInt64), -- Projection pour les requêtes d'investigation par IP : -- ORDER BY actuel (window_start, src_ip, ...) est optimal pour heatmap @@ -157,7 +162,7 @@ SELECT sum(IF(match(src.path, '(?i)\.(png|jpg|jpeg|gif|css|js|ico|woff2|svg|eot)$'), 1, 0)) AS count_assets, sum(IF(position(src.client_headers, 'Referer') = 0, 1, 0)) AS count_no_referer, uniqState(src.header_user_agent) AS uniq_ua, - 0 AS max_requests_per_sec, + 0 AS max_requests_per_sec, -- TODO(P0): calculer via sous-requête par seconde (impossible dans un seul GROUP BY) varPopState(toFloat64(length(replaceAll(src.path, '/', '//')) - length(src.path))) AS url_depth_variance, sum(IF(src.ip_meta_total_length < 60 OR src.ip_meta_total_length > 1500, 1, 0)) AS count_anomalous_payload, uniqState(src.ja3) AS uniq_ja3, @@ -173,7 +178,13 @@ SELECT sum(IF(src.tcp_meta_window_scale = 0 AND src.correlated = 1, 1, 0)) AS count_no_wscale, sum(toUInt64(src.correlated)) AS count_correlated, sum(IF(length(src.header_accept_encoding) = 0, 1, 0)) AS count_no_accept_enc, - sum(IF(src.scheme = 'http', 1, 0)) AS count_http_scheme + sum(IF(src.scheme = 'http', 1, 0)) AS count_http_scheme, + -- P1 : nouvelles features + sum(IF(length(src.header_x_forwarded_for) > 0, 1, 0)) AS count_xff, + sum(IF(src.method = 'POST' AND length(src.header_content_type) > 0 + AND NOT match(src.header_content_type, '(?i)(form-urlencoded|multipart|json|xml|text/plain|grpc|protobuf)'), 1, 0)) AS count_unusual_ct, + sum(IF(src.dst_port NOT IN (80, 443, 8080, 8443), 1, 0)) AS count_non_std_port, + sum(IF(src.method = 'POST' AND match(src.path, '(?i)(login|signin|auth|token|session|wp-login|connect|oauth)'), 1, 0)) AS count_login_post FROM ja4_logs.http_logs AS src GROUP BY window_start, src_ip, ja4, host, src_asn; @@ -192,6 +203,7 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_header_fingerprint_1h has_referer SimpleAggregateFunction(max, UInt8), modern_browser_score SimpleAggregateFunction(max, UInt8), ua_ch_mismatch SimpleAggregateFunction(max, UInt8), + sec_ch_mobile_mismatch SimpleAggregateFunction(max, UInt8), sec_fetch_mode SimpleAggregateFunction(any, String), sec_fetch_dest SimpleAggregateFunction(any, String) ) @@ -212,6 +224,10 @@ SELECT max(toUInt8(if(position(src.client_headers, 'Referer') > 0, 1, 0))) AS has_referer, max(toUInt8(if(length(src.header_sec_ch_ua) > 0, 100, if(length(src.header_user_agent) > 0, 50, 0)))) AS modern_browser_score, max(toUInt8(if((position(src.header_user_agent, 'Windows') > 0 AND position(src.header_sec_ch_ua_platform, 'Windows') == 0) OR (position(src.header_user_agent, 'iPhone') > 0 AND position(src.header_sec_ch_ua_platform, 'iOS') == 0), 1, 0))) AS ua_ch_mismatch, + max(toUInt8(if( + (src.header_sec_ch_ua_mobile = '?1' AND position(src.header_user_agent, 'Mobile') == 0 AND position(src.header_user_agent, 'Android') == 0 AND position(src.header_user_agent, 'iPhone') == 0) + OR (src.header_sec_ch_ua_mobile = '?0' AND (position(src.header_user_agent, 'iPhone') > 0 OR position(src.header_user_agent, 'Android') > 0)), + 1, 0))) AS sec_ch_mobile_mismatch, any(src.header_sec_fetch_mode) AS sec_fetch_mode, any(src.header_sec_fetch_dest) AS sec_fetch_dest FROM ja4_logs.http_logs AS src diff --git a/shared/clickhouse/07_ai_features_view.sql b/shared/clickhouse/07_ai_features_view.sql index 800d6ac..5456de8 100644 --- a/shared/clickhouse/07_ai_features_view.sql +++ b/shared/clickhouse/07_ai_features_view.sql @@ -127,7 +127,13 @@ WITH base_data AS ( sqrt(a.ttl_variance_val) AS ttl_std, IF(a.count_correlated_val > 0, a.count_no_wscale_val / a.count_correlated_val, 0) AS no_window_scale_ratio, a.count_no_accept_enc_val / (a.hits + 1) AS missing_accept_enc_ratio, - a.count_http_scheme_val / (a.hits + 1) AS http_scheme_ratio + a.count_http_scheme_val / (a.hits + 1) AS http_scheme_ratio, + -- P1 : nouvelles features de détection + IF(a.count_xff_val > 0, 1, 0) AS has_xff, + a.count_unusual_ct_val / greatest(a.count_post, 1) AS unusual_content_type_ratio, + a.count_non_std_port_val / (a.hits + 1) AS non_standard_port_ratio, + a.count_login_post_val / greatest(a.count_post, 1) AS login_post_concentration, + h.sec_ch_mobile_mismatch AS sec_ch_mobile_mismatch FROM ( SELECT window_start, src_ip, ja4, host, src_asn, @@ -162,7 +168,12 @@ WITH base_data AS ( sum(count_no_wscale) AS count_no_wscale_val, sum(count_correlated) AS count_correlated_val, sum(count_no_accept_enc) AS count_no_accept_enc_val, - sum(count_http_scheme) AS count_http_scheme_val + sum(count_http_scheme) AS count_http_scheme_val, + -- P1 : nouvelles features de détection + sum(count_xff) AS count_xff_val, + sum(count_unusual_ct) AS count_unusual_ct_val, + sum(count_non_std_port) AS count_non_std_port_val, + sum(count_login_post) AS count_login_post_val FROM ja4_processing.agg_host_ip_ja4_1h WHERE window_start >= now() - INTERVAL 24 HOUR GROUP BY window_start, src_ip, ja4, host, src_asn @@ -173,6 +184,7 @@ WITH base_data AS ( max(header_count) AS header_count, max(has_accept_language) AS has_accept_language, max(has_cookie) AS has_cookie, max(has_referer) AS has_referer, max(modern_browser_score) AS modern_browser_score, max(ua_ch_mismatch) AS ua_ch_mismatch, + max(sec_ch_mobile_mismatch) AS sec_ch_mobile_mismatch, any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest FROM ja4_processing.agg_header_fingerprint_1h WHERE window_start >= now() - INTERVAL 24 HOUR