diff --git a/services/bot-detector/bot_detector/config.py b/services/bot-detector/bot_detector/config.py
index 3d9256d..10c8746 100644
--- a/services/bot-detector/bot_detector/config.py
+++ b/services/bot-detector/bot_detector/config.py
@@ -109,7 +109,8 @@ STRUCTURAL_EXCLUDED_FEATURES: dict[str, list] = {
'request_size_variance', 'mss_mobile_mismatch',
'ja3_diversity_ratio', 'syn_timing_cv', 'tls12_ratio', 'ip_df_variance',
'avg_ttl', 'ttl_std', 'no_window_scale_ratio',
- 'ja4_drift_ratio'],
+ 'ja4_drift_ratio',
+ 'true_window_size', 'window_mss_ratio'],
}
# ─── Imports optionnels (bibliothèques lourdes) ────────────────────────────
diff --git a/services/bot-detector/bot_detector/pipeline.py b/services/bot-detector/bot_detector/pipeline.py
index d62f2a8..a61b5bf 100644
--- a/services/bot-detector/bot_detector/pipeline.py
+++ b/services/bot-detector/bot_detector/pipeline.py
@@ -306,6 +306,23 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
if ENABLE_CLUSTERING:
anomalies = cluster_anomalies(anomalies, scoring_features, ae_model=ae_model)
+ # P2 — Escalade par taille de campagne : les IPs dans un cluster
+ # coordonné de grande taille sont plus menaçantes que des IPs isolées.
+ # Escalader HIGH → CRITICAL si cluster_size ≥ 5.
+ if 'campaign_id' in anomalies.columns:
+ cid_counts = anomalies['campaign_id'].value_counts()
+ for cid, size in cid_counts.items():
+ if cid < 0:
+ continue
+ if size >= 5:
+ mask = (anomalies['campaign_id'] == cid) & (anomalies['threat_level'] == 'HIGH')
+ n_escalated = mask.sum()
+ if n_escalated > 0:
+ anomalies.loc[mask, 'threat_level'] = 'CRITICAL'
+ anomalies.loc[mask, 'reason'] = anomalies.loc[mask, 'reason'] + \
+ f' [Escalade campagne #{cid}, {size} IPs coordonnées]'
+ log_info(f"[{name}] Escalade campagne #{cid}: {n_escalated} IP(s) HIGH→CRITICAL ({size} membres)")
+
anomalies['ja4'] = anomalies['ja4'].replace({'': 'HTTP_CLEAR_TEXT'})
for _, row in anomalies.iterrows():
log_decision('ANOMALY', cycle_id, name, {
@@ -330,6 +347,14 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
anubis_deny if not anubis_deny.empty else None,
] if df is not None], ignore_index=True)
+ # Propager campaign_id des anomalies clusterisées vers all_scored
+ # (all_scored a été capturé avant clustering, ses campaign_id sont tous -1)
+ if not anomalies.empty and 'campaign_id' in anomalies.columns:
+ cid_map = anomalies.set_index(anomalies.index)['campaign_id']
+ matched = all_scored.index.isin(cid_map.index)
+ if matched.any():
+ all_scored.loc[matched, 'campaign_id'] = cid_map
+
# Inclure anubis_allow dans all_scored pour traçabilité dans ml_all_scores.
# Ces IPs sont exclues de l'analyse IF mais doivent apparaître dans la table
# de scores avec threat_level='KNOWN_BOT' et anomaly_score=0.0.
diff --git a/services/bot-detector/bot_detector/preprocessing.py b/services/bot-detector/bot_detector/preprocessing.py
index 4adf085..d55ddc7 100644
--- a/services/bot-detector/bot_detector/preprocessing.py
+++ b/services/bot-detector/bot_detector/preprocessing.py
@@ -41,6 +41,12 @@ FEATURES = [
'cadence_cv', 'burst_ratio', 'pause_ratio',
'lag1_autocorrelation', 'benford_deviation',
'host_diversity', 'host_sweep_speed', 'host_coverage_uniformity',
+ # P0+P1 : features sous-exploitées (SQL existant ou ajouté)
+ 'is_fake_navigation',
+ 'true_window_size', 'window_mss_ratio',
+ # P1 : nouvelles features de détection
+ 'has_xff', 'unusual_content_type_ratio', 'non_standard_port_ratio',
+ 'login_post_concentration', 'sec_ch_mobile_mismatch',
]
# Features supplémentaires pour le modèle Complet (données TCP/TLS requises)
@@ -100,6 +106,7 @@ def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
'has_accept_language', 'has_cookie', 'has_referer', 'ua_ch_mismatch',
'is_ua_rotating', 'is_alpn_missing', 'sni_host_mismatch', 'alpn_http_mismatch',
'mss_mobile_mismatch', 'anubis_is_flagged', 'is_rare_ja4',
+ 'is_fake_navigation', 'has_xff', 'sec_ch_mobile_mismatch',
}
for col in df.columns:
if col in binary_features:
diff --git a/services/correlator/sql/migrations/02_detection_features.sql b/services/correlator/sql/migrations/02_detection_features.sql
new file mode 100644
index 0000000..cf82863
--- /dev/null
+++ b/services/correlator/sql/migrations/02_detection_features.sql
@@ -0,0 +1,17 @@
+-- =============================================================================
+-- 02_detection_features.sql — Ajout des features de détection P0+P1
+-- Colonnes supplémentaires dans agg_host_ip_ja4_1h et agg_header_fingerprint_1h
+-- NOTE : les MVs doivent être recréées (DROP + CREATE) car ALTER VIEW n'existe pas.
+-- Exécuter deploy_schema.sh pour recréer les MVs, ou relancer le schema complet.
+-- =============================================================================
+
+-- agg_host_ip_ja4_1h : nouvelles colonnes de comptage
+ALTER TABLE ja4_processing.agg_host_ip_ja4_1h
+ ADD COLUMN IF NOT EXISTS count_xff SimpleAggregateFunction(sum, UInt64) AFTER count_http_scheme,
+ ADD COLUMN IF NOT EXISTS count_unusual_ct SimpleAggregateFunction(sum, UInt64) AFTER count_xff,
+ ADD COLUMN IF NOT EXISTS count_non_std_port SimpleAggregateFunction(sum, UInt64) AFTER count_unusual_ct,
+ ADD COLUMN IF NOT EXISTS count_login_post SimpleAggregateFunction(sum, UInt64) AFTER count_non_std_port;
+
+-- agg_header_fingerprint_1h : mismatch mobile Sec-CH-UA
+ALTER TABLE ja4_processing.agg_header_fingerprint_1h
+ ADD COLUMN IF NOT EXISTS sec_ch_mobile_mismatch SimpleAggregateFunction(max, UInt8) AFTER ua_ch_mismatch;
diff --git a/services/dashboard/backend/routes/pages.py b/services/dashboard/backend/routes/pages.py
index 0e28ba7..511324d 100644
--- a/services/dashboard/backend/routes/pages.py
+++ b/services/dashboard/backend/routes/pages.py
@@ -71,3 +71,8 @@ async def ja4_detail_page(request: Request, fingerprint: str):
@router.get("/cluster/{cid}")
async def cluster_detail_page(request: Request, cid: int):
return templates.TemplateResponse("cluster_detail.html", _ctx(request, "cluster_detail", cid=cid))
+
+
+@router.get("/tactics")
+async def tactics_page(request: Request):
+ return templates.TemplateResponse("tactics.html", _ctx(request, "tactics"))
diff --git a/services/dashboard/backend/templates/base.html b/services/dashboard/backend/templates/base.html
index b1bbddf..df4ce41 100644
--- a/services/dashboard/backend/templates/base.html
+++ b/services/dashboard/backend/templates/base.html
@@ -151,6 +151,10 @@
Campagnes
+
+
+ Tactiques
+
Investigation
@@ -324,6 +328,10 @@
if (diff < 86400) return Math.round(diff/3600) + 'h';
return Math.round(diff/86400) + 'j';
}
+ function fmtDate(d) {
+ if (!d) return '—';
+ return String(d).substring(0, 16).replace('T', ' ');
+ }
// ── ECharts helpers ──
const EC_COLORS = ['#6366f1','#22c55e','#f97316','#ef4444','#3b82f6','#eab308','#ec4899','#14b8a6','#8b5cf6','#f43f5e'];
diff --git a/services/dashboard/backend/templates/tactics.html b/services/dashboard/backend/templates/tactics.html
new file mode 100644
index 0000000..03b1470
--- /dev/null
+++ b/services/dashboard/backend/templates/tactics.html
@@ -0,0 +1,199 @@
+{% extends "base.html" %}
+{% block page_title %}Tactiques d'attaque{% endblock %}
+
+{% block content %}
+
+
+
+
+
+
+
+
+
+
+
+ | IP | Host | POST/h | Paths distincts | Première vue | Dernière vue |
+ | Chargement… |
+
+
+
+
+
+
+
+
+
+
+
+ | IP | Host | JA4 distincts | Fingerprints | Hits | Fenêtre |
+ | Chargement… |
+
+
+
+
+
+
+
+
+
+
+
+ | IP | Récurrence | Pire score | Pire menace | Première vue | Dernière vue | JA4 top | Host top |
+ | Chargement… |
+
+
+
+
+
+
+
+
+
+
+
+ | Date | IP | Score | Menace | JA4 | Host | ASN | Hits | Raison |
+ | Chargement… |
+
+
+
+
+
+
+{% endblock %}
diff --git a/shared/clickhouse/05_aggregation_tables.sql b/shared/clickhouse/05_aggregation_tables.sql
index 3b60c61..d17a1f7 100644
--- a/shared/clickhouse/05_aggregation_tables.sql
+++ b/shared/clickhouse/05_aggregation_tables.sql
@@ -112,6 +112,11 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_host_ip_ja4_1h
-- HTTP features
count_no_accept_enc SimpleAggregateFunction(sum, UInt64),
count_http_scheme SimpleAggregateFunction(sum, UInt64),
+ -- P1 : nouvelles features de détection
+ count_xff SimpleAggregateFunction(sum, UInt64),
+ count_unusual_ct SimpleAggregateFunction(sum, UInt64),
+ count_non_std_port SimpleAggregateFunction(sum, UInt64),
+ count_login_post SimpleAggregateFunction(sum, UInt64),
-- Projection pour les requêtes d'investigation par IP :
-- ORDER BY actuel (window_start, src_ip, ...) est optimal pour heatmap
@@ -157,7 +162,7 @@ SELECT
sum(IF(match(src.path, '(?i)\.(png|jpg|jpeg|gif|css|js|ico|woff2|svg|eot)$'), 1, 0)) AS count_assets,
sum(IF(position(src.client_headers, 'Referer') = 0, 1, 0)) AS count_no_referer,
uniqState(src.header_user_agent) AS uniq_ua,
- 0 AS max_requests_per_sec,
+ 0 AS max_requests_per_sec, -- TODO(P0): calculer via sous-requête par seconde (impossible dans un seul GROUP BY)
varPopState(toFloat64(length(replaceAll(src.path, '/', '//')) - length(src.path))) AS url_depth_variance,
sum(IF(src.ip_meta_total_length < 60 OR src.ip_meta_total_length > 1500, 1, 0)) AS count_anomalous_payload,
uniqState(src.ja3) AS uniq_ja3,
@@ -173,7 +178,13 @@ SELECT
sum(IF(src.tcp_meta_window_scale = 0 AND src.correlated = 1, 1, 0)) AS count_no_wscale,
sum(toUInt64(src.correlated)) AS count_correlated,
sum(IF(length(src.header_accept_encoding) = 0, 1, 0)) AS count_no_accept_enc,
- sum(IF(src.scheme = 'http', 1, 0)) AS count_http_scheme
+ sum(IF(src.scheme = 'http', 1, 0)) AS count_http_scheme,
+ -- P1 : nouvelles features
+ sum(IF(length(src.header_x_forwarded_for) > 0, 1, 0)) AS count_xff,
+ sum(IF(src.method = 'POST' AND length(src.header_content_type) > 0
+ AND NOT match(src.header_content_type, '(?i)(form-urlencoded|multipart|json|xml|text/plain|grpc|protobuf)'), 1, 0)) AS count_unusual_ct,
+ sum(IF(src.dst_port NOT IN (80, 443, 8080, 8443), 1, 0)) AS count_non_std_port,
+ sum(IF(src.method = 'POST' AND match(src.path, '(?i)(login|signin|auth|token|session|wp-login|connect|oauth)'), 1, 0)) AS count_login_post
FROM ja4_logs.http_logs AS src
GROUP BY window_start, src_ip, ja4, host, src_asn;
@@ -192,6 +203,7 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_header_fingerprint_1h
has_referer SimpleAggregateFunction(max, UInt8),
modern_browser_score SimpleAggregateFunction(max, UInt8),
ua_ch_mismatch SimpleAggregateFunction(max, UInt8),
+ sec_ch_mobile_mismatch SimpleAggregateFunction(max, UInt8),
sec_fetch_mode SimpleAggregateFunction(any, String),
sec_fetch_dest SimpleAggregateFunction(any, String)
)
@@ -212,6 +224,10 @@ SELECT
max(toUInt8(if(position(src.client_headers, 'Referer') > 0, 1, 0))) AS has_referer,
max(toUInt8(if(length(src.header_sec_ch_ua) > 0, 100, if(length(src.header_user_agent) > 0, 50, 0)))) AS modern_browser_score,
max(toUInt8(if((position(src.header_user_agent, 'Windows') > 0 AND position(src.header_sec_ch_ua_platform, 'Windows') == 0) OR (position(src.header_user_agent, 'iPhone') > 0 AND position(src.header_sec_ch_ua_platform, 'iOS') == 0), 1, 0))) AS ua_ch_mismatch,
+ max(toUInt8(if(
+ (src.header_sec_ch_ua_mobile = '?1' AND position(src.header_user_agent, 'Mobile') == 0 AND position(src.header_user_agent, 'Android') == 0 AND position(src.header_user_agent, 'iPhone') == 0)
+ OR (src.header_sec_ch_ua_mobile = '?0' AND (position(src.header_user_agent, 'iPhone') > 0 OR position(src.header_user_agent, 'Android') > 0)),
+ 1, 0))) AS sec_ch_mobile_mismatch,
any(src.header_sec_fetch_mode) AS sec_fetch_mode,
any(src.header_sec_fetch_dest) AS sec_fetch_dest
FROM ja4_logs.http_logs AS src
diff --git a/shared/clickhouse/07_ai_features_view.sql b/shared/clickhouse/07_ai_features_view.sql
index 800d6ac..5456de8 100644
--- a/shared/clickhouse/07_ai_features_view.sql
+++ b/shared/clickhouse/07_ai_features_view.sql
@@ -127,7 +127,13 @@ WITH base_data AS (
sqrt(a.ttl_variance_val) AS ttl_std,
IF(a.count_correlated_val > 0, a.count_no_wscale_val / a.count_correlated_val, 0) AS no_window_scale_ratio,
a.count_no_accept_enc_val / (a.hits + 1) AS missing_accept_enc_ratio,
- a.count_http_scheme_val / (a.hits + 1) AS http_scheme_ratio
+ a.count_http_scheme_val / (a.hits + 1) AS http_scheme_ratio,
+ -- P1 : nouvelles features de détection
+ IF(a.count_xff_val > 0, 1, 0) AS has_xff,
+ a.count_unusual_ct_val / greatest(a.count_post, 1) AS unusual_content_type_ratio,
+ a.count_non_std_port_val / (a.hits + 1) AS non_standard_port_ratio,
+ a.count_login_post_val / greatest(a.count_post, 1) AS login_post_concentration,
+ h.sec_ch_mobile_mismatch AS sec_ch_mobile_mismatch
FROM (
SELECT
window_start, src_ip, ja4, host, src_asn,
@@ -162,7 +168,12 @@ WITH base_data AS (
sum(count_no_wscale) AS count_no_wscale_val,
sum(count_correlated) AS count_correlated_val,
sum(count_no_accept_enc) AS count_no_accept_enc_val,
- sum(count_http_scheme) AS count_http_scheme_val
+ sum(count_http_scheme) AS count_http_scheme_val,
+ -- P1 : nouvelles features de détection
+ sum(count_xff) AS count_xff_val,
+ sum(count_unusual_ct) AS count_unusual_ct_val,
+ sum(count_non_std_port) AS count_non_std_port_val,
+ sum(count_login_post) AS count_login_post_val
FROM ja4_processing.agg_host_ip_ja4_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY window_start, src_ip, ja4, host, src_asn
@@ -173,6 +184,7 @@ WITH base_data AS (
max(header_count) AS header_count, max(has_accept_language) AS has_accept_language,
max(has_cookie) AS has_cookie, max(has_referer) AS has_referer,
max(modern_browser_score) AS modern_browser_score, max(ua_ch_mismatch) AS ua_ch_mismatch,
+ max(sec_ch_mobile_mismatch) AS sec_ch_mobile_mismatch,
any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest
FROM ja4_processing.agg_header_fingerprint_1h
WHERE window_start >= now() - INTERVAL 24 HOUR