feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions

View File

@ -0,0 +1,110 @@
from unittest.mock import patch, MagicMock
from ja4_common.clickhouse import ClickHouseClient, get_client
import ja4_common.clickhouse as ch_module
def test_get_client_singleton():
ch_module._client = None
with patch("ja4_common.clickhouse.clickhouse_connect.get_client") as mock_gc:
mock_client = MagicMock()
mock_client.ping.return_value = True
mock_gc.return_value = mock_client
c1 = get_client()
c2 = get_client()
assert c1 is c2
def test_client_reconnects_on_ping_fail():
client = ClickHouseClient()
with patch("ja4_common.clickhouse.clickhouse_connect.get_client") as mock_gc:
mock_inner = MagicMock()
mock_inner.ping.side_effect = Exception("connection lost")
mock_gc.return_value = mock_inner
client._client = mock_inner # simulate stale connection
client.connect() # should reconnect
assert mock_gc.call_count >= 1
def test_get_client_returns_same_instance_on_second_call():
"""get_client() is a singleton: returns the same object on repeated calls."""
ch_module._client = None
with patch("ja4_common.clickhouse.clickhouse_connect.get_client") as mock_gc:
mock_inner = MagicMock()
mock_inner.ping.return_value = True
mock_gc.return_value = mock_inner
c1 = get_client()
c2 = get_client()
assert c1 is c2
# connect() should have been called once for c1; c2 reuses the same instance
assert mock_gc.call_count == 1
def test_client_query_delegates_to_inner():
"""ClickHouseClient.query() delegates to the underlying client."""
client = ClickHouseClient()
with patch("ja4_common.clickhouse.clickhouse_connect.get_client") as mock_gc:
mock_inner = MagicMock()
mock_inner.ping.return_value = True
mock_inner.query.return_value = "result"
mock_gc.return_value = mock_inner
result = client.query("SELECT 1")
assert result == "result"
mock_inner.query.assert_called_once_with("SELECT 1", None)
def test_client_query_with_params():
"""ClickHouseClient.query() passes params to the inner client."""
client = ClickHouseClient()
with patch("ja4_common.clickhouse.clickhouse_connect.get_client") as mock_gc:
mock_inner = MagicMock()
mock_inner.ping.return_value = True
mock_gc.return_value = mock_inner
client.query("SELECT %(val)s", {"val": 42})
mock_inner.query.assert_called_once_with("SELECT %(val)s", {"val": 42})
def test_client_close_sets_client_to_none():
"""ClickHouseClient.close() clears the internal client reference."""
client = ClickHouseClient()
with patch("ja4_common.clickhouse.clickhouse_connect.get_client") as mock_gc:
mock_inner = MagicMock()
mock_inner.ping.return_value = True
mock_gc.return_value = mock_inner
client.connect() # establish connection
assert client._client is not None
client.close()
assert client._client is None
def test_client_close_when_already_none():
"""ClickHouseClient.close() is safe to call when no connection exists."""
client = ClickHouseClient()
client._client = None # ensure no connection
client.close() # should not raise
def test_ping_returns_false_on_exception():
"""_ping() returns False when ping raises."""
client = ClickHouseClient()
mock_inner = MagicMock()
mock_inner.ping.side_effect = Exception("conn reset")
client._client = mock_inner
assert client._ping() is False
def test_ping_returns_true_on_success():
"""_ping() returns True when ping succeeds."""
client = ClickHouseClient()
mock_inner = MagicMock()
mock_inner.ping.return_value = True
client._client = mock_inner
assert client._ping() is True
def test_ping_returns_false_when_no_client():
"""_ping() returns False when _client is None."""
client = ClickHouseClient()
client._client = None
assert client._ping() is False

View File

@ -0,0 +1,64 @@
import os
import pytest
from ja4_common.settings import ClickHouseSettings
def test_default_settings():
s = ClickHouseSettings()
assert s.CLICKHOUSE_HOST == "clickhouse"
assert s.CLICKHOUSE_PORT == 8123
assert s.CLICKHOUSE_DB == "mabase_prod"
assert s.CLICKHOUSE_USER == "admin"
assert s.CLICKHOUSE_PASSWORD == ""
def test_settings_from_env(monkeypatch):
monkeypatch.setenv("CLICKHOUSE_HOST", "myhost")
monkeypatch.setenv("CLICKHOUSE_PORT", "9000")
monkeypatch.setenv("CLICKHOUSE_DB", "testdb")
s = ClickHouseSettings()
assert s.CLICKHOUSE_HOST == "myhost"
assert s.CLICKHOUSE_PORT == 9000
assert s.CLICKHOUSE_DB == "testdb"
def test_settings_password_default_empty():
s = ClickHouseSettings()
assert s.CLICKHOUSE_PASSWORD == ""
def test_settings_user_default():
s = ClickHouseSettings()
assert s.CLICKHOUSE_USER == "admin"
def test_settings_password_from_env(monkeypatch):
monkeypatch.setenv("CLICKHOUSE_PASSWORD", "secret")
s = ClickHouseSettings()
assert s.CLICKHOUSE_PASSWORD == "secret"
def test_settings_port_is_int():
s = ClickHouseSettings()
assert isinstance(s.CLICKHOUSE_PORT, int)
def test_settings_port_from_env_string(monkeypatch):
"""Port provided as string env var is coerced to int."""
monkeypatch.setenv("CLICKHOUSE_PORT", "9100")
s = ClickHouseSettings()
assert s.CLICKHOUSE_PORT == 9100
def test_settings_all_fields_overridable(monkeypatch):
monkeypatch.setenv("CLICKHOUSE_HOST", "h1")
monkeypatch.setenv("CLICKHOUSE_PORT", "1234")
monkeypatch.setenv("CLICKHOUSE_DB", "mydb")
monkeypatch.setenv("CLICKHOUSE_USER", "myuser")
monkeypatch.setenv("CLICKHOUSE_PASSWORD", "mypass")
s = ClickHouseSettings()
assert s.CLICKHOUSE_HOST == "h1"
assert s.CLICKHOUSE_PORT == 1234
assert s.CLICKHOUSE_DB == "mydb"
assert s.CLICKHOUSE_USER == "myuser"
assert s.CLICKHOUSE_PASSWORD == "mypass"