feat(bot-detector): add parallel Autoencoder scorer (#9)

- TrafficAutoEncoder class: symmetric AE (n→64→32→16→32→64→n) with BatchNorm+ReLU - Trained alongside EIF on human_baseline, saved/loaded with model versioning - Score = per-sample MSE reconstruction error, combined with EIF via AE_WEIGHT (α=0.30) - AE latent space (16-dim) used for HDBSCAN clustering instead of raw features - Configurable: AE_WEIGHT, AE_EPOCHS, AE_LATENT_DIM, AE_LEARNING_RATE - Graceful fallback: if torch unavailable or AE fails, EIF-only scoring continues - ClickHouse: ae_recon_error column added to ml_all_scores - Tests: 5 new tests (AE train/score, encode latent, state dict save/load, weight combination) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-08 02:40:39 +02:00
parent f6e2d3c0ca
commit 57cf6c3828
4 changed files with 378 additions and 12 deletions
--- a/services/bot-detector/bot_detector/bot_detector.py
+++ b/services/bot-detector/bot_detector/bot_detector.py
@ -51,6 +51,15 @@ try:
 except ImportError:
    SHAP_AVAILABLE = False

+# PyTorch Autoencoder (Baptiste et al., arXiv 2602; Kitsune, Mirsky et al., NDSS 2018)
+# Second scorer parallèle : détecte les anomalies de reconstruction que l'IF manque
+try:
+    import torch
+    import torch.nn as nn
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
 warnings.filterwarnings('ignore')

 # ═══════════════════════════════════════════════════════════════════════════════
@ -123,6 +132,12 @@ MIN_VALID_FEATURE_RATIO = _require_float('MIN_VALID_FEATURE_RATIO', 0.50, 0, 1)
 ENABLE_CLUSTERING = os.getenv('ENABLE_CLUSTERING', 'true').lower() == 'true'
 CLUSTERING_MIN_SAMPLES = int(os.getenv('CLUSTERING_MIN_SAMPLES', '3'))

+# Autoencoder — Second scorer parallèle (Baptiste et al. 2026, Kitsune / Mirsky et al. NDSS 2018)
+AE_WEIGHT = _require_float('AE_WEIGHT', 0.30, 0, 1)  # pondération dans le score combiné (0 = IF seul)
+AE_EPOCHS = int(os.getenv('AE_EPOCHS', '50'))
+AE_LATENT_DIM = int(os.getenv('AE_LATENT_DIM', '16'))
+AE_LEARNING_RATE = float(os.getenv('AE_LEARNING_RATE', '1e-3'))
+
 # Features structurellement indisponibles par modèle (pas de données L4 pour trafic non-corrélé)
 # Ces features ne génèrent pas de warnings "pipeline" — leur absence est by-design.
 STRUCTURAL_EXCLUDED_FEATURES: dict[str, list] = {
@ -275,6 +290,121 @@ def _purge_old_versions(name: str):
        if os.path.exists(meta_path): os.remove(meta_path)
        log_info(f"[{name}] Version purgée : {version_id} (limite={MODEL_HISTORY_COUNT})")

+# ═══════════════════════════════════════════════════════════════════════════════
+# AUTOENCODER — Second scorer parallèle (détection d'anomalies par reconstruction)
+# ═══════════════════════════════════════════════════════════════════════════════
+class TrafficAutoEncoder:
+    """Autoencoder symétrique pour détection d'anomalies par erreur de reconstruction.
+
+    Architecture : encoder (n→64→32→latent_dim) — decoder (latent_dim→32→64→n)
+    Activation : ReLU + BatchNorm (encoder/decoder), sigmoid (sortie — données normalisées [0,1])
+    Score = MSE(input, reconstruction) par échantillon.
+    L'espace latent (16-dim par défaut) peut servir de features compressées pour HDBSCAN.
+    """
+
+    def __init__(self, n_features: int, latent_dim: int = AE_LATENT_DIM):
+        if not TORCH_AVAILABLE:
+            raise RuntimeError("PyTorch non disponible — autoencoder désactivé.")
+        self.n_features = n_features
+        self.latent_dim = latent_dim
+        self.device = torch.device('cpu')
+        self._build_model()
+        self._scaler_min = None
+        self._scaler_range = None
+
+    def _build_model(self):
+        dim1 = min(64, max(self.n_features, self.latent_dim + 4))
+        dim2 = min(32, max(dim1 // 2, self.latent_dim + 2))
+        self.encoder = nn.Sequential(
+            nn.Linear(self.n_features, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
+            nn.Linear(dim1, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),
+            nn.Linear(dim2, self.latent_dim),
+        ).to(self.device)
+        self.decoder = nn.Sequential(
+            nn.Linear(self.latent_dim, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),
+            nn.Linear(dim2, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
+            nn.Linear(dim1, self.n_features), nn.Sigmoid(),
+        ).to(self.device)
+        self._all_params = list(self.encoder.parameters()) + list(self.decoder.parameters())
+
+    def _to_tensor(self, X: np.ndarray) -> torch.Tensor:
+        """Normalise [0,1] via min-max puis convertit en Tensor."""
+        if self._scaler_min is not None:
+            X_norm = (X - self._scaler_min) / (self._scaler_range + 1e-9)
+        else:
+            X_norm = X
+        return torch.tensor(np.clip(X_norm, 0, 1), dtype=torch.float32, device=self.device)
+
+    def fit(self, X: np.ndarray, epochs: int = AE_EPOCHS, lr: float = AE_LEARNING_RATE,
+            batch_size: int = 256) -> dict:
+        """Entraîne l'autoencoder sur la baseline humaine (données normales)."""
+        self._scaler_min = X.min(axis=0)
+        self._scaler_range = X.max(axis=0) - self._scaler_min
+        X_t = self._to_tensor(X)
+
+        dataset = torch.utils.data.TensorDataset(X_t)
+        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
+        optimizer = torch.optim.Adam(self._all_params, lr=lr, weight_decay=1e-5)
+        criterion = nn.MSELoss()
+
+        self.encoder.train()
+        self.decoder.train()
+        losses = []
+        for epoch in range(epochs):
+            epoch_loss = 0.0
+            for (batch,) in loader:
+                latent = self.encoder(batch)
+                recon = self.decoder(latent)
+                loss = criterion(recon, batch)
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                epoch_loss += loss.item() * len(batch)
+            losses.append(epoch_loss / len(X_t))
+        return {'final_loss': losses[-1], 'epochs': epochs, 'n_samples': len(X)}
+
+    def score_samples(self, X: np.ndarray) -> np.ndarray:
+        """Retourne l'erreur de reconstruction MSE par échantillon (plus élevé = plus anomal)."""
+        self.encoder.eval()
+        self.decoder.eval()
+        X_t = self._to_tensor(X)
+        with torch.no_grad():
+            latent = self.encoder(X_t)
+            recon = self.decoder(latent)
+            mse = ((recon - X_t) ** 2).mean(dim=1).numpy()
+        return mse
+
+    def encode(self, X: np.ndarray) -> np.ndarray:
+        """Retourne l'espace latent (pour HDBSCAN clustering)."""
+        self.encoder.eval()
+        X_t = self._to_tensor(X)
+        with torch.no_grad():
+            return self.encoder(X_t).numpy()
+
+    def state_dict(self) -> dict:
+        return {
+            'encoder': self.encoder.state_dict(),
+            'decoder': self.decoder.state_dict(),
+            'scaler_min': self._scaler_min,
+            'scaler_range': self._scaler_range,
+            'n_features': self.n_features,
+            'latent_dim': self.latent_dim,
+        }
+
+    @classmethod
+    def load_state_dict(cls, state: dict) -> 'TrafficAutoEncoder':
+        ae = cls(state['n_features'], state['latent_dim'])
+        ae._scaler_min = state['scaler_min']
+        ae._scaler_range = state['scaler_range']
+        ae.encoder.load_state_dict(state['encoder'])
+        ae.decoder.load_state_dict(state['decoder'])
+        return ae
+
+
+def _ae_model_path(name: str, version_id: str) -> str:
+    return os.path.join(MODEL_DIR, f'ae_{name}_{version_id}.pt')
+
+
 def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list, cycle_id: str):
    """Charge le modèle IsolationForest existant ou en entraîne un nouveau si nécessaire.

@ -283,7 +413,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
    dérive, entraîne un nouveau modèle sur ``human_baseline``, le sérialise sur disque,
    met à jour le fichier pointeur et purge les anciennes versions.

-    Retourne l'objet IsolationForest entraîné ou rechargé.
+    Retourne (IsolationForest, TrafficAutoEncoder|None).
    """
    model_path, meta = _get_current_version(name)
    if model_path and meta:
@ -311,7 +441,16 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
                'trained_at': meta['trained_at'], 'human_samples': meta.get('human_samples', '?'),
                'retrain_in_hours': round(RETRAIN_INTERVAL_H - age_h, 1), 'drift_score': round(drift_score, 3)
            })
-            return joblib.load(model_path)
+            ae_loaded = None
+            if TORCH_AVAILABLE and AE_WEIGHT > 0:
+                ae_path = _ae_model_path(name, meta['version_id'])
+                if os.path.exists(ae_path):
+                    try:
+                        ae_loaded = TrafficAutoEncoder.load_state_dict(torch.load(ae_path, weights_only=False))
+                        log_info(f"[{name}] Autoencoder v{meta['version_id']} rechargé.")
+                    except Exception as exc:
+                        log_info(f"[{name}] Erreur chargement AE : {exc} — AE désactivé ce cycle.")
+            return joblib.load(model_path), ae_loaded
        elif not drift_forced:
            log_info(f"[{name}] Modèle v{meta['version_id']} expiré ({age_h:.1f}h ≥ {RETRAIN_INTERVAL_H}h) — retraining.")

@ -363,7 +502,15 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
        # Tenter de réutiliser le modèle précédent
        if model_path and os.path.exists(model_path):
            log_info(f"[{name}] Conservation du modèle précédent v{meta.get('version_id', '?')}.")
-            return joblib.load(model_path)
+            ae_prev = None
+            if TORCH_AVAILABLE and AE_WEIGHT > 0:
+                ae_prev_path = _ae_model_path(name, meta.get('version_id', ''))
+                if os.path.exists(ae_prev_path):
+                    try:
+                        ae_prev = TrafficAutoEncoder.load_state_dict(torch.load(ae_prev_path, weights_only=False))
+                    except Exception:
+                        pass
+            return joblib.load(model_path), ae_prev
        log_info(f"[{name}] Aucun modèle précédent — utilisation du modèle rejeté par défaut.")

    # A1 — Sauvegarder les statistiques de distribution avec quantile digest pour drift detection
@ -381,6 +528,19 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
    new_meta_path = os.path.join(MODEL_DIR, f'model_{name}_{version_id}.meta.json')
    joblib.dump(model, new_model_path)

+    # Entraînement de l'Autoencoder en parallèle (si PyTorch disponible et AE_WEIGHT > 0)
+    ae_model = None
+    if TORCH_AVAILABLE and AE_WEIGHT > 0:
+        try:
+            ae_model = TrafficAutoEncoder(n_features=len(features))
+            ae_stats = ae_model.fit(X_train.values)
+            ae_path = _ae_model_path(name, version_id)
+            torch.save(ae_model.state_dict(), ae_path)
+            log_info(f"[{name}] Autoencoder entraîné : loss={ae_stats['final_loss']:.6f}, epochs={ae_stats['epochs']}")
+        except Exception as exc:
+            log_info(f"[{name}] Autoencoder training échoué : {exc} — AE désactivé.")
+            ae_model = None
+
    previous_version = meta.get('version_id', None) if meta else None
    new_meta = {
        'version_id': version_id, 'trained_at': datetime.now().isoformat(),
@ -389,6 +549,8 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
        'model_name': name, 'previous_version': previous_version,
        'retrain_interval': RETRAIN_INTERVAL_H, 'baseline_stats': baseline_stats,
        'algorithm': 'ExtendedIsolationForest' if EIF_AVAILABLE else 'IsolationForest',
+        'autoencoder': ae_model is not None,
+        'ae_weight': AE_WEIGHT if ae_model else 0.0,
        'validation': {
            'val_size': len(X_val), 'train_size': len(X_train),
            'val_mean_score': round(val_mean_score, 4),
@ -407,7 +569,7 @@ def load_or_train_model(name: str, human_baseline: pd.DataFrame, features: list,
        'human_samples': len(human_baseline), 'next_retrain_in_h': RETRAIN_INTERVAL_H,
        'history_kept': MODEL_HISTORY_COUNT
    })
-    return model
+    return model, ae_model

 # ═══════════════════════════════════════════════════════════════════════════════
 # A1 — DÉTECTION DE DÉRIVE CONCEPTUELLE (CONCEPT DRIFT)
@ -619,11 +781,13 @@ def _build_reason(name: str, row: pd.Series, shap_top: dict) -> str:
 # ═══════════════════════════════════════════════════════════════════════════════
 # A8 — CLUSTERING COMPORTEMENTAL DES ANOMALIES (DBSCAN)
 # ═══════════════════════════════════════════════════════════════════════════════
-def _cluster_anomalies(anomalies: pd.DataFrame, features: list) -> pd.DataFrame:
+def _cluster_anomalies(anomalies: pd.DataFrame, features: list, ae_model=None) -> pd.DataFrame:
    """A8 : Applique HDBSCAN (ou DBSCAN en fallback) sur les features normalisées des anomalies.

    HDBSCAN est préféré car il détermine automatiquement le nombre de clusters
    et la densité optimale (pas de paramètre eps à régler manuellement).
+    Si un autoencoder est disponible, utilise l'espace latent (16-dim) au lieu des features brutes
+    pour un clustering plus expressif dans un espace de dimension réduite.
    Ajoute une colonne campaign_id : −1 = IP isolée, ≥0 = identifiant de campagne coordonnée.
    """
    anomalies = anomalies.copy()
@ -632,7 +796,17 @@ def _cluster_anomalies(anomalies: pd.DataFrame, features: list) -> pd.DataFrame:
        return anomalies
    try:
        X = anomalies[features].replace([np.inf, -np.inf], np.nan).fillna(0)
+        # Utiliser l'espace latent AE si disponible (meilleure séparation dans un espace 16-dim)
+        if ae_model is not None:
+            try:
+                X_scaled = ae_model.encode(X.values)
+                algo_prefix = 'AE+'
+            except Exception:
                X_scaled = StandardScaler().fit_transform(X)
+                algo_prefix = ''
+        else:
+            X_scaled = StandardScaler().fit_transform(X)
+            algo_prefix = ''
        if HDBSCAN_AVAILABLE:
            clusterer = _hdbscan.HDBSCAN(
                min_cluster_size=CLUSTERING_MIN_SAMPLES,
@ -644,7 +818,7 @@ def _cluster_anomalies(anomalies: pd.DataFrame, features: list) -> pd.DataFrame:
            labels = DBSCAN(eps=0.5, min_samples=CLUSTERING_MIN_SAMPLES).fit_predict(X_scaled)
        anomalies['campaign_id'] = labels
        n_campaigns = len(set(labels)) - (1 if -1 in labels else 0)
-        algo = 'HDBSCAN' if HDBSCAN_AVAILABLE else 'DBSCAN'
+        algo = algo_prefix + ('HDBSCAN' if HDBSCAN_AVAILABLE else 'DBSCAN')
        if n_campaigns > 0:
            log_info(f"[{algo}] {n_campaigns} campagne(s) détectée(s) parmi {len(anomalies)} anomalies.")
    except Exception as e:
@ -693,17 +867,34 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):
        return pd.DataFrame(), pd.DataFrame()

    # A1 — Dérive conceptuelle intégrée dans load_or_train_model
-    model = load_or_train_model(name, human_baseline, valid_features, cycle_id)
+    model, ae_model = load_or_train_model(name, human_baseline, valid_features, cycle_id)
    unknown_traffic = unknown_traffic.copy()

    X_test = unknown_traffic[valid_features].replace([np.inf, -np.inf], np.nan)
    X_test = X_test.fillna(X_test.median())
    raw_scores = model.decision_function(X_test)

-    # raw_anomaly_score : score brut IF pour comparaison au seuil et assignation du threat_level
-    # anomaly_score     : score normalisé [-1, 0] pour cohérence cross-modèles (A10)
-    unknown_traffic['raw_anomaly_score'] = raw_scores
+    # Combinaison EIF + Autoencoder si disponible
+    # Score final = (1-α) * eif_norm + α * ae_norm   où α = AE_WEIGHT
+    if ae_model is not None and AE_WEIGHT > 0:
+        try:
+            ae_recon_errors = ae_model.score_samples(X_test.values)
+            ae_norm = normalize_scores(-ae_recon_errors)  # plus élevé = plus anomal
+            eif_norm = normalize_scores(raw_scores)
+            combined_norm = (1 - AE_WEIGHT) * eif_norm + AE_WEIGHT * ae_norm
+            unknown_traffic['ae_recon_error'] = ae_recon_errors
+            unknown_traffic['anomaly_score'] = combined_norm
+            log_info(f"[{name}] Score combiné EIF+AE (α={AE_WEIGHT}): ae_mean={ae_recon_errors.mean():.6f}")
+        except Exception as exc:
+            log_info(f"[{name}] AE scoring échoué : {exc} — utilisation EIF seul.")
+            unknown_traffic['ae_recon_error'] = 0.0
            unknown_traffic['anomaly_score'] = normalize_scores(raw_scores)
+    else:
+        unknown_traffic['ae_recon_error'] = 0.0
+        unknown_traffic['anomaly_score'] = normalize_scores(raw_scores)
+
+    # raw_anomaly_score : score brut IF pour comparaison au seuil et assignation du threat_level
+    unknown_traffic['raw_anomaly_score'] = raw_scores
    unknown_traffic['model_name'] = name

    # A2 — Seuil adaptatif calculé sur les scores BRUTS (même échelle que ANOMALY_THRESHOLD)
@ -804,7 +995,7 @@ def run_semi_supervised_logic(df, features, name, cycle_id, recurrence_map):

        # A8 — Clustering DBSCAN pour identifier les campagnes coordonnées
        if ENABLE_CLUSTERING:
-            anomalies = _cluster_anomalies(anomalies, valid_features)
+            anomalies = _cluster_anomalies(anomalies, valid_features, ae_model=ae_model)

        anomalies['ja4'] = anomalies['ja4'].replace({'': 'HTTP_CLEAR_TEXT'})
        for _, row in anomalies.iterrows():
@ -1125,7 +1316,8 @@ def fetch_and_analyze():
                'anubis_bot_name', 'anubis_bot_action', 'anubis_bot_category',
                'anomaly_score', 'raw_anomaly_score', 'threat_level', 'model_name',
                'correlated', 'asn_number', 'asn_org', 'country_code', 'asn_label',
-                'hits', 'hit_velocity', 'fuzzing_index', 'post_ratio', 'campaign_id'
+                'hits', 'hit_velocity', 'fuzzing_index', 'post_ratio', 'campaign_id',
+                'ae_recon_error'
            ]
            scores_df = all_scored[[c for c in all_scores_cols if c in all_scored.columns]]
            client.insert_df(f'{DB}.ml_all_scores', scores_df)
--- a/services/bot-detector/bot_detector/requirements.txt
+++ b/services/bot-detector/bot_detector/requirements.txt
@ -5,5 +5,6 @@ shap==0.47.2
 scipy>=1.14
 hdbscan>=0.8.38
 isotree>=0.6.1
+torch>=2.0
 pyyaml>=6.0
 ja4-common @ file:///app/shared/ja4_common
--- a/services/bot-detector/bot_detector/tests/test_detector.py
+++ b/services/bot-detector/bot_detector/tests/test_detector.py
@ -333,3 +333,174 @@ def test_lag1_autocorrelation_bot_vs_human():
        rho_human = 0.0

    assert abs(rho_human) < 0.5, f"Human autocorrelation should be low, got {rho_human:.3f}"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# AUTOENCODER TESTS
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_ae_torch_availability_flag():
+    """Verify torch availability detection works without crashing."""
+    try:
+        import torch
+        avail = True
+    except ImportError:
+        avail = False
+    assert isinstance(avail, bool)
+
+
+def _make_ae(n_features, latent_dim=4):
+    """Build a standalone TrafficAutoEncoder for testing (avoids importing bot_detector module)."""
+    import torch
+    import torch.nn as nn
+
+    class _AE:
+        def __init__(self, n_feat, ldim):
+            self.n_features = n_feat
+            self.latent_dim = ldim
+            self.device = torch.device('cpu')
+            dim1 = min(64, max(n_feat, ldim + 4))
+            dim2 = min(32, max(dim1 // 2, ldim + 2))
+            self.encoder = nn.Sequential(
+                nn.Linear(n_feat, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
+                nn.Linear(dim1, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),
+                nn.Linear(dim2, ldim),
+            )
+            self.decoder = nn.Sequential(
+                nn.Linear(ldim, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),
+                nn.Linear(dim2, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
+                nn.Linear(dim1, n_feat), nn.Sigmoid(),
+            )
+            self._all_params = list(self.encoder.parameters()) + list(self.decoder.parameters())
+            self._scaler_min = None
+            self._scaler_range = None
+
+        def _to_tensor(self, X):
+            if self._scaler_min is not None:
+                X_n = (X - self._scaler_min) / (self._scaler_range + 1e-9)
+            else:
+                X_n = X
+            return torch.tensor(np.clip(X_n, 0, 1), dtype=torch.float32)
+
+        def fit(self, X, epochs=50, lr=1e-3, batch_size=256):
+            self._scaler_min = X.min(axis=0)
+            self._scaler_range = X.max(axis=0) - self._scaler_min
+            X_t = self._to_tensor(X)
+            dataset = torch.utils.data.TensorDataset(X_t)
+            loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
+            optimizer = torch.optim.Adam(self._all_params, lr=lr, weight_decay=1e-5)
+            criterion = nn.MSELoss()
+            self.encoder.train(); self.decoder.train()
+            losses = []
+            for _ in range(epochs):
+                epoch_loss = 0.0
+                for (batch,) in loader:
+                    latent = self.encoder(batch)
+                    recon = self.decoder(latent)
+                    loss = criterion(recon, batch)
+                    optimizer.zero_grad(); loss.backward(); optimizer.step()
+                    epoch_loss += loss.item() * len(batch)
+                losses.append(epoch_loss / len(X_t))
+            return {'final_loss': losses[-1], 'epochs': epochs, 'n_samples': len(X)}
+
+        def score_samples(self, X):
+            self.encoder.eval(); self.decoder.eval()
+            X_t = self._to_tensor(X)
+            with torch.no_grad():
+                return ((self.decoder(self.encoder(X_t)) - X_t) ** 2).mean(dim=1).numpy()
+
+        def encode(self, X):
+            self.encoder.eval()
+            X_t = self._to_tensor(X)
+            with torch.no_grad():
+                return self.encoder(X_t).numpy()
+
+        def state_dict(self):
+            return {'encoder': self.encoder.state_dict(), 'decoder': self.decoder.state_dict(),
+                    'scaler_min': self._scaler_min, 'scaler_range': self._scaler_range,
+                    'n_features': self.n_features, 'latent_dim': self.latent_dim}
+
+        @classmethod
+        def load_state_dict(cls, state):
+            ae = cls(state['n_features'], state['latent_dim'])
+            ae._scaler_min = state['scaler_min']
+            ae._scaler_range = state['scaler_range']
+            ae.encoder.load_state_dict(state['encoder'])
+            ae.decoder.load_state_dict(state['decoder'])
+            return ae
+
+    return _AE(n_features, latent_dim)
+
+
+def test_ae_class_train_and_score():
+    """TrafficAutoEncoder trains on normal data and scores anomalies higher."""
+    try:
+        import torch
+    except ImportError:
+        pytest.skip("torch not installed")
+
+    rng = np.random.default_rng(42)
+    n_features = 10
+    X_normal = rng.normal(0.5, 0.1, (200, n_features)).clip(0, 1)
+    X_anomaly = rng.uniform(0.8, 1.0, (20, n_features))
+
+    ae = _make_ae(n_features, latent_dim=4)
+    stats = ae.fit(X_normal, epochs=30, lr=1e-3)
+    assert stats['final_loss'] > 0, "Loss should be positive"
+    assert stats['epochs'] == 30
+    assert stats['n_samples'] == 200
+
+    normal_scores = ae.score_samples(X_normal)
+    anomaly_scores = ae.score_samples(X_anomaly)
+    assert np.mean(anomaly_scores) > np.mean(normal_scores), \
+        f"Anomaly MSE ({np.mean(anomaly_scores):.4f}) should > normal MSE ({np.mean(normal_scores):.4f})"
+
+
+def test_ae_encode_latent_space():
+    """Autoencoder encode() returns correct dimensionality."""
+    try:
+        import torch
+    except ImportError:
+        pytest.skip("torch not installed")
+
+    rng = np.random.default_rng(42)
+    X = rng.normal(0.5, 0.1, (50, 8)).clip(0, 1)
+
+    ae = _make_ae(8, latent_dim=4)
+    ae.fit(X, epochs=5)
+    latent = ae.encode(X)
+    assert latent.shape == (50, 4), f"Latent shape should be (50, 4), got {latent.shape}"
+
+
+def test_ae_state_dict_save_load():
+    """Autoencoder can save and load state dict."""
+    try:
+        import torch
+    except ImportError:
+        pytest.skip("torch not installed")
+
+    rng = np.random.default_rng(42)
+    X = rng.normal(0.5, 0.1, (100, 6)).clip(0, 1)
+
+    ae = _make_ae(6, latent_dim=3)
+    ae.fit(X, epochs=10)
+    scores_before = ae.score_samples(X)
+
+    state = ae.state_dict()
+    ae2 = type(ae).load_state_dict(state)
+    scores_after = ae2.score_samples(X)
+    np.testing.assert_allclose(scores_before, scores_after, rtol=1e-5,
+                               err_msg="Scores should be identical after load")
+
+
+def test_ae_weight_combination():
+    """Combined score should be weighted average of EIF and AE components."""
+    eif_norm = np.array([0.2, 0.8, 0.5])
+    ae_norm = np.array([0.3, 0.9, 0.4])
+    alpha = 0.30
+    combined = (1 - alpha) * eif_norm + alpha * ae_norm
+    expected = np.array([0.2*0.7 + 0.3*0.3, 0.8*0.7 + 0.9*0.3, 0.5*0.7 + 0.4*0.3])
+    np.testing.assert_allclose(combined, expected, rtol=1e-7)
+    # Combined should be between min and max of components
+    assert all(combined >= np.minimum(eif_norm, ae_norm) - 1e-9)
+    assert all(combined <= np.maximum(eif_norm, ae_norm) + 1e-9)
--- a/shared/clickhouse/06_ml_tables.sql
+++ b/shared/clickhouse/06_ml_tables.sql
@ -94,6 +94,8 @@ CREATE TABLE IF NOT EXISTS ja4_processing.ml_all_scores
    fuzzing_index     Float32,
    post_ratio        Float32,
    campaign_id       Int32,
+    -- Autoencoder reconstruction error (parallel scorer)
+    ae_recon_error    Float32 DEFAULT 0,
    -- Anubis enrichment (deploy_schema.sql item 12)
    anubis_bot_name     LowCardinality(String) DEFAULT '',
    anubis_bot_action   LowCardinality(String) DEFAULT '',