diff --git a/services/bot-detector/bot_detector/bot_detector.py b/services/bot-detector/bot_detector/bot_detector.py
index 5781381..6047856 100644
--- a/services/bot-detector/bot_detector/bot_detector.py
+++ b/services/bot-detector/bot_detector/bot_detector.py
@@ -538,7 +538,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
dérive, entraîne un nouveau modèle sur ``human_baseline``, le sérialise sur disque,
met à jour le fichier pointeur et purge les anciennes versions.
- Retourne (IsolationForest, TrafficAutoEncoder|None).
+ Retourne (IsolationForest, TrafficAutoEncoder|None, list[str] features).
"""
model_path, meta = _get_current_version(name)
if model_path and meta:
@@ -575,7 +575,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
log_info(f"[{name}] Autoencoder v{meta['version_id']} rechargé.")
except Exception as exc:
log_info(f"[{name}] Erreur chargement AE : {exc} — AE désactivé ce cycle.")
- return joblib.load(model_path), ae_loaded
+ return joblib.load(model_path), ae_loaded, meta.get('features', features)
elif not drift_forced:
log_info(f"[{name}] Modèle v{meta['version_id']} expiré ({age_h:.1f}h ≥ {RETRAIN_INTERVAL_H}h) — retraining.")
@@ -593,7 +593,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
log_info(f"[{name}] Élagage : {len(low_var_features)} feature(s) à variance < {PRUNE_VARIANCE_THRESHOLD} retirées : {low_var_features}")
X = X.drop(columns=low_var_features)
features = [f for f in features if f not in low_var_features]
- log_decision('FEATURE_PRUNED', name, '', {'pruned': low_var_features, 'remaining': len(features)})
+ log_decision('FEATURE_PRUNED', cycle_id, name, {'pruned': low_var_features, 'remaining': len(features)})
# Validation split : réserver 20% pour évaluation offline
val_size = max(1, int(len(X) * 0.2))
@@ -620,7 +620,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
VAL_ANOMALY_GATE = float(os.getenv('VAL_ANOMALY_GATE', '0.20'))
if val_anomaly_rate > VAL_ANOMALY_GATE:
log_info(f"[{name}] ⚠ REJET : val_anomaly_rate={val_anomaly_rate:.2%} > gate={VAL_ANOMALY_GATE:.0%} — baseline probablement contaminée.")
- log_decision('MODEL_REJECTED', name, '', {
+ log_decision('MODEL_REJECTED', cycle_id, name, {
'val_anomaly_rate': round(val_anomaly_rate, 4), 'gate': VAL_ANOMALY_GATE,
'val_mean_score': round(val_mean_score, 4), 'version_id': version_id,
})
@@ -635,7 +635,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
ae_prev = TrafficAutoEncoder.load_state_dict(torch.load(ae_prev_path, weights_only=False))
except Exception:
pass
- return joblib.load(model_path), ae_prev
+ return joblib.load(model_path), ae_prev, meta.get('features', features)
log_info(f"[{name}] Aucun modèle précédent — utilisation du modèle rejeté par défaut.")
# A1 — Sauvegarder les statistiques de distribution avec quantile digest pour drift detection
@@ -694,7 +694,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
'human_samples': len(human_baseline), 'next_retrain_in_h': RETRAIN_INTERVAL_H,
'history_kept': MODEL_HISTORY_COUNT
})
- return model, ae_model
+ return model, ae_model, features
# ═══════════════════════════════════════════════════════════════════════════════
# A1 — DÉTECTION DE DÉRIVE CONCEPTUELLE (CONCEPT DRIFT)
@@ -1006,10 +1006,12 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
log_info(f'[{name}] Features validées : {len(valid_features)}/{len(features)} ({", ".join(valid_features[:5])}{"…" if len(valid_features) > 5 else ""})')
# A1 — Dérive conceptuelle intégrée dans load_or_train_model
- model, ae_model = load_or_train_model(name, human_baseline, valid_features, cycle_id)
+ model, ae_model, model_features = load_or_train_model(name, human_baseline, valid_features, cycle_id)
+ # Utiliser les features du modèle (possiblement différentes après pruning/chargement)
+ scoring_features = [f for f in model_features if f in unknown_traffic.columns]
unknown_traffic = unknown_traffic.copy()
- X_test = unknown_traffic[valid_features].replace([np.inf, -np.inf], np.nan)
+ X_test = unknown_traffic[scoring_features].replace([np.inf, -np.inf], np.nan)
X_test = X_test.fillna(X_test.median())
raw_scores = model.decision_function(X_test)
log_info(f'[{name}] Scoring EIF : {len(X_test)} sessions scorées (min={raw_scores.min():.4f}, max={raw_scores.max():.4f}, mean={raw_scores.mean():.4f})')
@@ -1135,6 +1137,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
anubis_allow['ae_recon_error'] = 0.0
anubis_allow['xgb_prob'] = 0.0
anubis_allow['threat_level'] = 'KNOWN_BOT'
+ anubis_allow['bot_name'] = anubis_allow['anubis_bot_name']
anubis_allow['model_name'] = name
anubis_allow['campaign_id'] = -1
anubis_allow['reason'] = '[Anubis ALLOW] ' + anubis_allow['anubis_bot_name']
@@ -1189,7 +1192,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
# A8 — Clustering DBSCAN pour identifier les campagnes coordonnées
if ENABLE_CLUSTERING:
- anomalies = _cluster_anomalies(anomalies, valid_features, ae_model=ae_model)
+ anomalies = _cluster_anomalies(anomalies, scoring_features, ae_model=ae_model)
anomalies['ja4'] = anomalies['ja4'].replace({'': 'HTTP_CLEAR_TEXT'})
for _, row in anomalies.iterrows():
@@ -1623,6 +1626,7 @@ def fetch_and_analyze():
cols = [
'detected_at', 'src_ip', 'ja4', 'host', 'bot_name', 'browser_family', 'anomaly_score',
+ 'raw_anomaly_score', 'campaign_id',
'threat_level', 'model_name', 'recurrence',
'asn_number', 'asn_org', 'asn_detail', 'asn_domain', 'country_code', 'asn_label',
'hits', 'hit_velocity', 'fuzzing_index', 'post_ratio', 'port_exhaustion_ratio', 'max_keepalives', 'orphan_ratio',
diff --git a/services/dashboard/backend/routes/api.py b/services/dashboard/backend/routes/api.py
index 459c368..611b879 100644
--- a/services/dashboard/backend/routes/api.py
+++ b/services/dashboard/backend/routes/api.py
@@ -314,7 +314,7 @@ async def traffic(
order: str = Query("DESC"),
method: str | None = Query(None),
host: str | None = Query(None),
- status: str | None = Query(None),
+ http_version: str | None = Query(None),
) -> dict[str, Any]:
sort = _validate_sort(sort, _TRAFFIC_SORT_COLS, "time")
order = _validate_order(order)
@@ -331,9 +331,9 @@ async def traffic(
where_clauses.append("host LIKE {host:String}")
params["host"] = f"%{host}%"
- if status is not None:
- where_clauses.append("http_version = {status:String}")
- params["status"] = status
+ if http_version is not None:
+ where_clauses.append("http_version = {http_version:String}")
+ params["http_version"] = http_version
where = " AND ".join(where_clauses)
@@ -651,7 +651,7 @@ async def behavior() -> dict[str, Any]:
async def heatmap() -> dict[str, Any]:
try:
cells = query(
- f"SELECT toDayOfWeek(time) AS dow, toHour(time) AS hour, count() AS cnt "
+ f"SELECT toDayOfWeek(time) - 1 AS dow, toHour(time) AS hour, count() AS cnt "
f"FROM {_DB_LOGS}.http_logs "
"WHERE time >= now() - INTERVAL 7 DAY "
"GROUP BY dow, hour ORDER BY dow, hour"
diff --git a/services/dashboard/backend/templates/base.html b/services/dashboard/backend/templates/base.html
index 84fd630..c18c893 100644
--- a/services/dashboard/backend/templates/base.html
+++ b/services/dashboard/backend/templates/base.html
@@ -119,10 +119,15 @@
};
return `${level}`;
}
+ function escapeHtml(s) {
+ const d = document.createElement('div');
+ d.textContent = s;
+ return d.innerHTML;
+ }
function fmtIP(ip) {
if (!ip) return '';
let s = String(ip).replace('::ffff:','');
- return `${s}`;
+ return `${escapeHtml(s)}`;
}
function fmtScore(v) {
let n = parseFloat(v);
@@ -134,24 +139,24 @@
// ── Navigation helpers ──
function fmtASN(org) {
if (!org) return '';
- return `${org}`;
+ return `${escapeHtml(org)}`;
}
function fmtCountry(cc) {
if (!cc) return '';
const flags = {'FR':'🇫🇷','DE':'🇩🇪','NL':'🇳🇱','GB':'🇬🇧','ES':'🇪🇸','US':'🇺🇸','RU':'🇷🇺','IT':'🇮🇹','JP':'🇯🇵','CN':'🇨🇳','KR':'🇰🇷','BR':'🇧🇷','AU':'🇦🇺','CA':'🇨🇦','IN':'🇮🇳'};
- return `${flags[cc]||'🏳️'} ${cc}`;
+ return `${flags[cc]||'🏳️'} ${escapeHtml(cc)}`;
}
function fmtJA4(ja4) {
if (!ja4) return '';
- return `${ja4.substring(0,20)}…`;
+ return `${escapeHtml(ja4.substring(0,20))}…`;
}
function fmtJA4Full(ja4) {
if (!ja4) return '';
- return `${ja4}`;
+ return `${escapeHtml(ja4)}`;
}
function fmtBotName(name) {
if (!name) return '';
- return `${name}`;
+ return `${escapeHtml(name)}`;
}
function fmtThreatLink(level) {
if (!level) return '';
@@ -160,7 +165,7 @@
function fmtLabel(label) {
if (!label) return '';
const colors = {human:'text-green-400 bg-green-500/10',datacenter:'text-red-400 bg-red-500/10',hosting:'text-orange-400 bg-orange-500/10'};
- return `${label}`;
+ return `${escapeHtml(label)}`;
}
// ── ECharts helpers ──
diff --git a/services/dashboard/backend/templates/ip_detail.html b/services/dashboard/backend/templates/ip_detail.html
index c7a4562..6d6cafc 100644
--- a/services/dashboard/backend/templates/ip_detail.html
+++ b/services/dashboard/backend/templates/ip_detail.html
@@ -69,7 +69,7 @@
{% endblock %}
{% block scripts %}