feat(ml): replace Autoencoder with RealNVP Normalizing Flow and add SessionTransformer embeddings
Replace TrafficAutoEncoder (MSE reconstruction scoring) with TrafficNormalizingFlow (RealNVP via FrEIA, 4 affine coupling blocks, anomaly score = -log p(x)) for mathematically rigorous density estimation. Add SessionTransformer module producing 32-dimensional sequence embeddings from raw HTTP request sequences (path, method, timing) via a lightweight TransformerEncoder, replacing path_transition_entropy and cadence_cv features. Update thesis documentation sections 2.4.2b and 3.8 accordingly. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -349,31 +349,39 @@ def test_ae_torch_availability_flag():
|
||||
assert isinstance(avail, bool)
|
||||
|
||||
|
||||
def _make_ae(n_features, latent_dim=4):
|
||||
"""Build a standalone TrafficAutoEncoder for testing (avoids importing bot_detector module)."""
|
||||
def _make_nf(n_features):
|
||||
"""Build a standalone TrafficNormalizingFlow for testing (avoids importing bot_detector module)."""
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class _AE:
|
||||
def __init__(self, n_feat, ldim):
|
||||
class _NF:
|
||||
def __init__(self, n_feat):
|
||||
self.n_features = n_feat
|
||||
self.latent_dim = ldim
|
||||
self.device = torch.device('cpu')
|
||||
dim1 = min(64, max(n_feat, ldim + 4))
|
||||
dim2 = min(32, max(dim1 // 2, ldim + 2))
|
||||
self.encoder = nn.Sequential(
|
||||
nn.Linear(n_feat, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
|
||||
nn.Linear(dim1, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),
|
||||
nn.Linear(dim2, ldim),
|
||||
)
|
||||
self.decoder = nn.Sequential(
|
||||
nn.Linear(ldim, dim2), nn.BatchNorm1d(dim2), nn.ReLU(),
|
||||
nn.Linear(dim2, dim1), nn.BatchNorm1d(dim1), nn.ReLU(),
|
||||
nn.Linear(dim1, n_feat), nn.Sigmoid(),
|
||||
)
|
||||
self._all_params = list(self.encoder.parameters()) + list(self.decoder.parameters())
|
||||
self._scaler_min = None
|
||||
self._scaler_range = None
|
||||
self._build_model()
|
||||
|
||||
def _subnet_fc(self, c_in, c_out):
|
||||
return nn.Sequential(
|
||||
nn.Linear(c_in, 64), nn.ReLU(),
|
||||
nn.Linear(64, 64), nn.ReLU(),
|
||||
nn.Linear(64, c_out),
|
||||
)
|
||||
|
||||
def _build_model(self):
|
||||
import FrEIA.framework as Ff
|
||||
import FrEIA.modules as Fm
|
||||
nodes = [Ff.InputNode(self.n_features, name='input')]
|
||||
for i in range(4):
|
||||
nodes.append(Ff.Node(
|
||||
nodes[-1],
|
||||
Fm.AllInOneBlock,
|
||||
{'subnet_constructor': self._subnet_fc, 'affine_clamping': 2.0},
|
||||
name=f'coupling_{i}',
|
||||
))
|
||||
nodes.append(Ff.OutputNode(nodes[-1], name='output'))
|
||||
self.flow = Ff.GraphINN(nodes, verbose=False).to(self.device)
|
||||
|
||||
def _to_tensor(self, X):
|
||||
if self._scaler_min is not None:
|
||||
@ -382,119 +390,134 @@ def _make_ae(n_features, latent_dim=4):
|
||||
X_n = X
|
||||
return torch.tensor(np.clip(X_n, 0, 1), dtype=torch.float32)
|
||||
|
||||
def log_likelihood(self, x):
|
||||
z, log_det = self.flow(x)
|
||||
log_pz = -0.5 * (z ** 2).sum(dim=1) - 0.5 * self.n_features * np.log(2 * np.pi)
|
||||
return log_pz + log_det
|
||||
|
||||
def fit(self, X, epochs=50, lr=1e-3, batch_size=256):
|
||||
self._scaler_min = X.min(axis=0)
|
||||
self._scaler_range = X.max(axis=0) - self._scaler_min
|
||||
X_t = self._to_tensor(X)
|
||||
dataset = torch.utils.data.TensorDataset(X_t)
|
||||
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
optimizer = torch.optim.Adam(self._all_params, lr=lr, weight_decay=1e-5)
|
||||
criterion = nn.MSELoss()
|
||||
self.encoder.train(); self.decoder.train()
|
||||
optimizer = torch.optim.Adam(self.flow.parameters(), lr=lr, weight_decay=1e-5)
|
||||
self.flow.train()
|
||||
losses = []
|
||||
for _ in range(epochs):
|
||||
epoch_loss = 0.0
|
||||
for (batch,) in loader:
|
||||
latent = self.encoder(batch)
|
||||
recon = self.decoder(latent)
|
||||
loss = criterion(recon, batch)
|
||||
log_p = self.log_likelihood(batch)
|
||||
loss = -log_p.mean()
|
||||
optimizer.zero_grad(); loss.backward(); optimizer.step()
|
||||
epoch_loss += loss.item() * len(batch)
|
||||
losses.append(epoch_loss / len(X_t))
|
||||
return {'final_loss': losses[-1], 'epochs': epochs, 'n_samples': len(X)}
|
||||
|
||||
def score_samples(self, X):
|
||||
self.encoder.eval(); self.decoder.eval()
|
||||
self.flow.eval()
|
||||
X_t = self._to_tensor(X)
|
||||
with torch.no_grad():
|
||||
return ((self.decoder(self.encoder(X_t)) - X_t) ** 2).mean(dim=1).numpy()
|
||||
return -self.log_likelihood(X_t).numpy()
|
||||
|
||||
def encode(self, X):
|
||||
self.encoder.eval()
|
||||
self.flow.eval()
|
||||
X_t = self._to_tensor(X)
|
||||
with torch.no_grad():
|
||||
return self.encoder(X_t).numpy()
|
||||
z, _ = self.flow(X_t)
|
||||
return z.numpy()
|
||||
|
||||
def state_dict(self):
|
||||
return {'encoder': self.encoder.state_dict(), 'decoder': self.decoder.state_dict(),
|
||||
return {'flow': self.flow.state_dict(),
|
||||
'scaler_min': self._scaler_min, 'scaler_range': self._scaler_range,
|
||||
'n_features': self.n_features, 'latent_dim': self.latent_dim}
|
||||
'n_features': self.n_features}
|
||||
|
||||
@classmethod
|
||||
def load_state_dict(cls, state):
|
||||
ae = cls(state['n_features'], state['latent_dim'])
|
||||
ae._scaler_min = state['scaler_min']
|
||||
ae._scaler_range = state['scaler_range']
|
||||
ae.encoder.load_state_dict(state['encoder'])
|
||||
ae.decoder.load_state_dict(state['decoder'])
|
||||
return ae
|
||||
nf = cls(state['n_features'])
|
||||
nf._scaler_min = state['scaler_min']
|
||||
nf._scaler_range = state['scaler_range']
|
||||
nf.flow.load_state_dict(state['flow'])
|
||||
return nf
|
||||
|
||||
return _AE(n_features, latent_dim)
|
||||
return _NF(n_features)
|
||||
|
||||
|
||||
def test_ae_class_train_and_score():
|
||||
"""TrafficAutoEncoder trains on normal data and scores anomalies higher."""
|
||||
def test_nf_class_train_and_score():
|
||||
"""TrafficNormalizingFlow trains on normal data and scores anomalies higher."""
|
||||
try:
|
||||
import torch
|
||||
except ImportError:
|
||||
pytest.skip("torch not installed")
|
||||
try:
|
||||
import FrEIA
|
||||
except ImportError:
|
||||
pytest.skip("FrEIA not installed")
|
||||
|
||||
rng = np.random.default_rng(42)
|
||||
n_features = 10
|
||||
X_normal = rng.normal(0.5, 0.1, (200, n_features)).clip(0, 1)
|
||||
X_anomaly = rng.uniform(0.8, 1.0, (20, n_features))
|
||||
|
||||
ae = _make_ae(n_features, latent_dim=4)
|
||||
stats = ae.fit(X_normal, epochs=30, lr=1e-3)
|
||||
assert stats['final_loss'] > 0, "Loss should be positive"
|
||||
nf = _make_nf(n_features)
|
||||
stats = nf.fit(X_normal, epochs=30, lr=1e-3)
|
||||
assert stats['final_loss'] > 0, "NLL should be positive"
|
||||
assert stats['epochs'] == 30
|
||||
assert stats['n_samples'] == 200
|
||||
|
||||
normal_scores = ae.score_samples(X_normal)
|
||||
anomaly_scores = ae.score_samples(X_anomaly)
|
||||
normal_scores = nf.score_samples(X_normal) # -log p(x)
|
||||
anomaly_scores = nf.score_samples(X_anomaly)
|
||||
assert np.mean(anomaly_scores) > np.mean(normal_scores), \
|
||||
f"Anomaly MSE ({np.mean(anomaly_scores):.4f}) should > normal MSE ({np.mean(normal_scores):.4f})"
|
||||
f"Anomaly -logp ({np.mean(anomaly_scores):.4f}) should > normal -logp ({np.mean(normal_scores):.4f})"
|
||||
|
||||
|
||||
def test_ae_encode_latent_space():
|
||||
"""Autoencoder encode() returns correct dimensionality."""
|
||||
def test_nf_encode_latent_space():
|
||||
"""Normalizing Flow encode() returns same dimensionality as input (bijection)."""
|
||||
try:
|
||||
import torch
|
||||
except ImportError:
|
||||
pytest.skip("torch not installed")
|
||||
try:
|
||||
import FrEIA
|
||||
except ImportError:
|
||||
pytest.skip("FrEIA not installed")
|
||||
|
||||
rng = np.random.default_rng(42)
|
||||
X = rng.normal(0.5, 0.1, (50, 8)).clip(0, 1)
|
||||
|
||||
ae = _make_ae(8, latent_dim=4)
|
||||
ae.fit(X, epochs=5)
|
||||
latent = ae.encode(X)
|
||||
assert latent.shape == (50, 4), f"Latent shape should be (50, 4), got {latent.shape}"
|
||||
nf = _make_nf(8)
|
||||
nf.fit(X, epochs=5)
|
||||
z = nf.encode(X)
|
||||
assert z.shape == (50, 8), f"Latent shape should be (50, 8), got {z.shape}"
|
||||
|
||||
|
||||
def test_ae_state_dict_save_load():
|
||||
"""Autoencoder can save and load state dict."""
|
||||
def test_nf_state_dict_save_load():
|
||||
"""Normalizing Flow can save and load state dict."""
|
||||
try:
|
||||
import torch
|
||||
except ImportError:
|
||||
pytest.skip("torch not installed")
|
||||
try:
|
||||
import FrEIA
|
||||
except ImportError:
|
||||
pytest.skip("FrEIA not installed")
|
||||
|
||||
rng = np.random.default_rng(42)
|
||||
X = rng.normal(0.5, 0.1, (100, 6)).clip(0, 1)
|
||||
|
||||
ae = _make_ae(6, latent_dim=3)
|
||||
ae.fit(X, epochs=10)
|
||||
scores_before = ae.score_samples(X)
|
||||
nf = _make_nf(6)
|
||||
nf.fit(X, epochs=10)
|
||||
scores_before = nf.score_samples(X)
|
||||
|
||||
state = ae.state_dict()
|
||||
ae2 = type(ae).load_state_dict(state)
|
||||
scores_after = ae2.score_samples(X)
|
||||
state = nf.state_dict()
|
||||
nf2 = type(nf).load_state_dict(state)
|
||||
scores_after = nf2.score_samples(X)
|
||||
np.testing.assert_allclose(scores_before, scores_after, rtol=1e-5,
|
||||
err_msg="Scores should be identical after load")
|
||||
|
||||
|
||||
def test_ae_weight_combination():
|
||||
"""Combined score should be weighted average of EIF and AE components."""
|
||||
def test_nf_weight_combination():
|
||||
"""Combined score should be weighted average of EIF and NF components."""
|
||||
eif_norm = np.array([0.2, 0.8, 0.5])
|
||||
ae_norm = np.array([0.3, 0.9, 0.4])
|
||||
alpha = 0.30
|
||||
|
||||
Reference in New Issue
Block a user