feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions

View File

@ -0,0 +1,17 @@
import pytest
from unittest.mock import MagicMock, patch
@pytest.fixture
def mock_ch_client():
"""Mock ClickHouse client."""
client = MagicMock()
client.query.return_value = MagicMock(result_rows=[])
client.command.return_value = None
return client
@pytest.fixture(autouse=False)
def mock_get_client(mock_ch_client):
with patch("ja4_common.clickhouse.get_client", return_value=mock_ch_client):
yield mock_ch_client

View File

@ -0,0 +1,166 @@
import os
import pytest
import pandas as pd
import numpy as np
from unittest.mock import patch, MagicMock
def test_settings_from_env(monkeypatch):
"""ClickHouseSettings loads CLICKHOUSE_HOST from env."""
monkeypatch.setenv("CLICKHOUSE_HOST", "testhost")
from ja4_common.settings import ClickHouseSettings
s = ClickHouseSettings()
assert s.CLICKHOUSE_HOST == "testhost"
def test_feature_dataframe_validation():
"""MIN_VALID_FEATURE_RATIO logic: if < ratio of features have data, skip."""
MIN_VALID_FEATURE_RATIO = 0.5
df = pd.DataFrame({"f1": [1.0], "f2": [None], "f3": [None], "f4": [None]})
non_null_ratio = df.notna().mean().mean()
assert non_null_ratio < MIN_VALID_FEATURE_RATIO, "Should detect insufficient features"
def test_anomaly_threshold():
"""Scores below ANOMALY_THRESHOLD trigger detection."""
ANOMALY_THRESHOLD = -0.1
anomaly_scores = np.array([-0.5, -0.3, 0.1, 0.2])
anomalies = anomaly_scores[anomaly_scores < ANOMALY_THRESHOLD]
assert len(anomalies) == 2, "Should detect 2 anomalies"
def test_dedup_logic():
"""Duplicate detections within DEDUP_TTL_MIN are skipped."""
from datetime import datetime, timedelta
DEDUP_TTL_MIN = 60
dedup_cache = {}
def should_insert(ip: str, now: datetime) -> bool:
if ip in dedup_cache:
if (now - dedup_cache[ip]).total_seconds() < DEDUP_TTL_MIN * 60:
return False
dedup_cache[ip] = now
return True
now = datetime(2024, 1, 1, 12, 0, 0)
assert should_insert("1.2.3.4", now) is True
assert should_insert("1.2.3.4", now + timedelta(minutes=30)) is False # within TTL
assert should_insert("1.2.3.4", now + timedelta(minutes=61)) is True # past TTL
def test_health_check():
"""Health check endpoint returns 200."""
import threading
from http.server import HTTPServer, BaseHTTPRequestHandler
class HealthHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
def log_message(self, *args):
pass
server = HTTPServer(("127.0.0.1", 0), HealthHandler)
port = server.server_address[1]
t = threading.Thread(target=server.handle_request)
t.start()
import urllib.request
resp = urllib.request.urlopen(f"http://127.0.0.1:{port}/")
assert resp.status == 200
server.server_close()
def test_dedup_different_ips_are_independent():
"""Different IPs are tracked independently in dedup cache."""
from datetime import datetime, timedelta
DEDUP_TTL_MIN = 60
dedup_cache = {}
def should_insert(ip: str, now: datetime) -> bool:
if ip in dedup_cache:
if (now - dedup_cache[ip]).total_seconds() < DEDUP_TTL_MIN * 60:
return False
dedup_cache[ip] = now
return True
now = datetime(2024, 1, 1, 12, 0, 0)
assert should_insert("1.1.1.1", now) is True
assert should_insert("2.2.2.2", now) is True # Different IP, should be allowed
assert should_insert("1.1.1.1", now + timedelta(minutes=30)) is False # Same IP within TTL
assert should_insert("2.2.2.2", now + timedelta(minutes=30)) is False # Same IP within TTL
def test_dedup_exact_ttl_boundary():
"""Dedup: insertion exactly at TTL boundary is still blocked."""
from datetime import datetime, timedelta
DEDUP_TTL_MIN = 60
dedup_cache = {}
def should_insert(ip: str, now: datetime) -> bool:
if ip in dedup_cache:
if (now - dedup_cache[ip]).total_seconds() < DEDUP_TTL_MIN * 60:
return False
dedup_cache[ip] = now
return True
now = datetime(2024, 1, 1, 12, 0, 0)
assert should_insert("1.2.3.4", now) is True
# Exactly at 60 minutes should be blocked (< not <=)
assert should_insert("1.2.3.4", now + timedelta(minutes=60)) is False
def test_anomaly_threshold_no_anomalies():
"""No anomalies when all scores are above threshold."""
import numpy as np
ANOMALY_THRESHOLD = -0.1
scores = np.array([0.0, 0.1, 0.5, 1.0])
anomalies = scores[scores < ANOMALY_THRESHOLD]
assert len(anomalies) == 0
def test_anomaly_threshold_all_anomalies():
"""All items flagged when all scores are below threshold."""
import numpy as np
ANOMALY_THRESHOLD = -0.1
scores = np.array([-0.5, -0.3, -0.2, -0.15])
anomalies = scores[scores < ANOMALY_THRESHOLD]
assert len(anomalies) == 4
def test_feature_dataframe_all_valid():
"""Feature dataframe with all valid values passes ratio check."""
import pandas as pd
MIN_VALID_FEATURE_RATIO = 0.5
df = pd.DataFrame({"f1": [1.0], "f2": [2.0], "f3": [3.0], "f4": [4.0]})
non_null_ratio = df.notna().mean().mean()
assert non_null_ratio >= MIN_VALID_FEATURE_RATIO
def test_health_check_returns_correct_status():
"""Health check endpoint body is readable."""
import threading
import urllib.request
from http.server import HTTPServer, BaseHTTPRequestHandler
class StatusHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(b'{"status": "ok"}')
def log_message(self, *args):
pass
server = HTTPServer(("127.0.0.1", 0), StatusHandler)
port = server.server_address[1]
t = threading.Thread(target=server.handle_request)
t.start()
resp = urllib.request.urlopen(f"http://127.0.0.1:{port}/health")
assert resp.status == 200
body = resp.read()
assert b"ok" in body
server.server_close()