feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized
Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
17
services/bot-detector/bot_detector/tests/conftest.py
Normal file
17
services/bot-detector/bot_detector/tests/conftest.py
Normal file
@ -0,0 +1,17 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ch_client():
|
||||
"""Mock ClickHouse client."""
|
||||
client = MagicMock()
|
||||
client.query.return_value = MagicMock(result_rows=[])
|
||||
client.command.return_value = None
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture(autouse=False)
|
||||
def mock_get_client(mock_ch_client):
|
||||
with patch("ja4_common.clickhouse.get_client", return_value=mock_ch_client):
|
||||
yield mock_ch_client
|
||||
166
services/bot-detector/bot_detector/tests/test_detector.py
Normal file
166
services/bot-detector/bot_detector/tests/test_detector.py
Normal file
@ -0,0 +1,166 @@
|
||||
import os
|
||||
import pytest
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
def test_settings_from_env(monkeypatch):
|
||||
"""ClickHouseSettings loads CLICKHOUSE_HOST from env."""
|
||||
monkeypatch.setenv("CLICKHOUSE_HOST", "testhost")
|
||||
from ja4_common.settings import ClickHouseSettings
|
||||
s = ClickHouseSettings()
|
||||
assert s.CLICKHOUSE_HOST == "testhost"
|
||||
|
||||
|
||||
def test_feature_dataframe_validation():
|
||||
"""MIN_VALID_FEATURE_RATIO logic: if < ratio of features have data, skip."""
|
||||
MIN_VALID_FEATURE_RATIO = 0.5
|
||||
df = pd.DataFrame({"f1": [1.0], "f2": [None], "f3": [None], "f4": [None]})
|
||||
non_null_ratio = df.notna().mean().mean()
|
||||
assert non_null_ratio < MIN_VALID_FEATURE_RATIO, "Should detect insufficient features"
|
||||
|
||||
|
||||
def test_anomaly_threshold():
|
||||
"""Scores below ANOMALY_THRESHOLD trigger detection."""
|
||||
ANOMALY_THRESHOLD = -0.1
|
||||
anomaly_scores = np.array([-0.5, -0.3, 0.1, 0.2])
|
||||
anomalies = anomaly_scores[anomaly_scores < ANOMALY_THRESHOLD]
|
||||
assert len(anomalies) == 2, "Should detect 2 anomalies"
|
||||
|
||||
|
||||
def test_dedup_logic():
|
||||
"""Duplicate detections within DEDUP_TTL_MIN are skipped."""
|
||||
from datetime import datetime, timedelta
|
||||
DEDUP_TTL_MIN = 60
|
||||
dedup_cache = {}
|
||||
|
||||
def should_insert(ip: str, now: datetime) -> bool:
|
||||
if ip in dedup_cache:
|
||||
if (now - dedup_cache[ip]).total_seconds() < DEDUP_TTL_MIN * 60:
|
||||
return False
|
||||
dedup_cache[ip] = now
|
||||
return True
|
||||
|
||||
now = datetime(2024, 1, 1, 12, 0, 0)
|
||||
assert should_insert("1.2.3.4", now) is True
|
||||
assert should_insert("1.2.3.4", now + timedelta(minutes=30)) is False # within TTL
|
||||
assert should_insert("1.2.3.4", now + timedelta(minutes=61)) is True # past TTL
|
||||
|
||||
|
||||
def test_health_check():
|
||||
"""Health check endpoint returns 200."""
|
||||
import threading
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
|
||||
class HealthHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
|
||||
def log_message(self, *args):
|
||||
pass
|
||||
|
||||
server = HTTPServer(("127.0.0.1", 0), HealthHandler)
|
||||
port = server.server_address[1]
|
||||
t = threading.Thread(target=server.handle_request)
|
||||
t.start()
|
||||
|
||||
import urllib.request
|
||||
resp = urllib.request.urlopen(f"http://127.0.0.1:{port}/")
|
||||
assert resp.status == 200
|
||||
server.server_close()
|
||||
|
||||
|
||||
def test_dedup_different_ips_are_independent():
|
||||
"""Different IPs are tracked independently in dedup cache."""
|
||||
from datetime import datetime, timedelta
|
||||
DEDUP_TTL_MIN = 60
|
||||
dedup_cache = {}
|
||||
|
||||
def should_insert(ip: str, now: datetime) -> bool:
|
||||
if ip in dedup_cache:
|
||||
if (now - dedup_cache[ip]).total_seconds() < DEDUP_TTL_MIN * 60:
|
||||
return False
|
||||
dedup_cache[ip] = now
|
||||
return True
|
||||
|
||||
now = datetime(2024, 1, 1, 12, 0, 0)
|
||||
assert should_insert("1.1.1.1", now) is True
|
||||
assert should_insert("2.2.2.2", now) is True # Different IP, should be allowed
|
||||
assert should_insert("1.1.1.1", now + timedelta(minutes=30)) is False # Same IP within TTL
|
||||
assert should_insert("2.2.2.2", now + timedelta(minutes=30)) is False # Same IP within TTL
|
||||
|
||||
|
||||
def test_dedup_exact_ttl_boundary():
|
||||
"""Dedup: insertion exactly at TTL boundary is still blocked."""
|
||||
from datetime import datetime, timedelta
|
||||
DEDUP_TTL_MIN = 60
|
||||
dedup_cache = {}
|
||||
|
||||
def should_insert(ip: str, now: datetime) -> bool:
|
||||
if ip in dedup_cache:
|
||||
if (now - dedup_cache[ip]).total_seconds() < DEDUP_TTL_MIN * 60:
|
||||
return False
|
||||
dedup_cache[ip] = now
|
||||
return True
|
||||
|
||||
now = datetime(2024, 1, 1, 12, 0, 0)
|
||||
assert should_insert("1.2.3.4", now) is True
|
||||
# Exactly at 60 minutes should be blocked (< not <=)
|
||||
assert should_insert("1.2.3.4", now + timedelta(minutes=60)) is False
|
||||
|
||||
|
||||
def test_anomaly_threshold_no_anomalies():
|
||||
"""No anomalies when all scores are above threshold."""
|
||||
import numpy as np
|
||||
ANOMALY_THRESHOLD = -0.1
|
||||
scores = np.array([0.0, 0.1, 0.5, 1.0])
|
||||
anomalies = scores[scores < ANOMALY_THRESHOLD]
|
||||
assert len(anomalies) == 0
|
||||
|
||||
|
||||
def test_anomaly_threshold_all_anomalies():
|
||||
"""All items flagged when all scores are below threshold."""
|
||||
import numpy as np
|
||||
ANOMALY_THRESHOLD = -0.1
|
||||
scores = np.array([-0.5, -0.3, -0.2, -0.15])
|
||||
anomalies = scores[scores < ANOMALY_THRESHOLD]
|
||||
assert len(anomalies) == 4
|
||||
|
||||
|
||||
def test_feature_dataframe_all_valid():
|
||||
"""Feature dataframe with all valid values passes ratio check."""
|
||||
import pandas as pd
|
||||
MIN_VALID_FEATURE_RATIO = 0.5
|
||||
df = pd.DataFrame({"f1": [1.0], "f2": [2.0], "f3": [3.0], "f4": [4.0]})
|
||||
non_null_ratio = df.notna().mean().mean()
|
||||
assert non_null_ratio >= MIN_VALID_FEATURE_RATIO
|
||||
|
||||
|
||||
def test_health_check_returns_correct_status():
|
||||
"""Health check endpoint body is readable."""
|
||||
import threading
|
||||
import urllib.request
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
|
||||
class StatusHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(b'{"status": "ok"}')
|
||||
|
||||
def log_message(self, *args):
|
||||
pass
|
||||
|
||||
server = HTTPServer(("127.0.0.1", 0), StatusHandler)
|
||||
port = server.server_address[1]
|
||||
t = threading.Thread(target=server.handle_request)
|
||||
t.start()
|
||||
|
||||
resp = urllib.request.urlopen(f"http://127.0.0.1:{port}/health")
|
||||
assert resp.status == 200
|
||||
body = resp.read()
|
||||
assert b"ok" in body
|
||||
server.server_close()
|
||||
Reference in New Issue
Block a user