feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions

View File

@ -0,0 +1,19 @@
# Build outputs
dist/
# Dependency directories
vendor/
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
# Aider cache
.aider*

View File

@ -0,0 +1,2 @@
# correlator configuration — DO NOT COMMIT real values
LOGCORRELATOR_CLICKHOUSE_DSN=clickhouse://data_writer:ChangeMe@clickhouse:9000/mabase_prod

View File

@ -0,0 +1,73 @@
name: Build and Test
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.21'
- name: Download dependencies
run: go mod download
- name: Run tests with coverage
run: |
go test -race -coverprofile=coverage.txt -covermode=atomic ./...
TOTAL=$(go tool cover -func=coverage.txt | grep total | awk '{gsub(/%/, "", $3); print $3}')
echo "Coverage: ${TOTAL}%"
if (( $(echo "$TOTAL < 80" | bc -l) )); then
echo "Coverage ${TOTAL}% is below 80% threshold"
exit 1
fi
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.txt
build:
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.21'
- name: Build binary
run: |
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
-ldflags="-w -s" \
-o logcorrelator \
./cmd/logcorrelator
- name: Upload binary artifact
uses: actions/upload-artifact@v4
with:
name: logcorrelator-linux-amd64
path: logcorrelator
docker:
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v4
- name: Build Docker image
run: docker build -t logcorrelator:latest .
- name: Run tests in Docker
run: |
docker run --rm logcorrelator:latest --help || true

32
services/correlator/.gitignore vendored Normal file
View File

@ -0,0 +1,32 @@
# Build directory
/build/
/dist/
# Binaries
*.exe
*.exe~
*.dll
*.so
*.dylib
/logcorrelator
# Test binary
*.test
# Output of the go coverage tool
*.out
# Dependency directories
vendor/
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
.aider*

View File

@ -0,0 +1,43 @@
# syntax=docker/dockerfile:1
FROM golang:1.24 AS builder
WORKDIR /build
RUN apt-get update && apt-get install -y --no-install-recommends git bc && rm -rf /var/lib/apt/lists/*
COPY go.work go.work.sum* ./
COPY shared/go/ja4common/ ./shared/go/ja4common/
COPY services/sentinel/go.mod services/sentinel/go.sum* ./services/sentinel/
COPY services/correlator/go.mod services/correlator/go.sum* ./services/correlator/
WORKDIR /build/services/correlator
RUN --mount=type=cache,target=/go/pkg/mod go mod download
COPY services/correlator/ /build/services/correlator/
ARG SKIP_TESTS=false
RUN --mount=type=cache,target=/go/pkg/mod \
if [ "$SKIP_TESTS" = "false" ]; then \
go test -race -coverprofile=coverage.txt -covermode=atomic ./... && \
echo "=== Coverage Report ===" && \
go tool cover -func=coverage.txt | grep total && \
TOTAL=$(go tool cover -func=coverage.txt | grep total | awk '{gsub(/%/, "", $3); print $3}') && \
echo "Total coverage: ${TOTAL}%" && \
if (( $(echo "$TOTAL < 60" | bc -l) )); then \
echo "ERROR: Coverage ${TOTAL}% is below 60% threshold"; \
exit 1; \
fi && \
echo "Coverage check passed!"; \
else \
echo "Skipping tests (SKIP_TESTS=true)"; \
fi
RUN --mount=type=cache,target=/go/pkg/mod \
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
-ldflags="-w -s" -o /usr/bin/correlator ./cmd/logcorrelator
FROM scratch AS runtime
COPY --from=builder /usr/bin/correlator /usr/bin/correlator
COPY --from=builder /build/services/correlator/config.example.yml /etc/correlator/correlator.yml
ENTRYPOINT ["/usr/bin/correlator"]
CMD ["-config", "/etc/correlator/correlator.yml"]

View File

@ -0,0 +1,110 @@
# syntax=docker/dockerfile:1
# =============================================================================
# correlator — Dockerfile de packaging RPM (Rocky Linux 8/9, AlmaLinux 10)
# Build context: monorepo root (ja4-platform/)
# Méthode: 1 builder Go → 1 rpm-builder (rpmbuild, 3 × dist) → 1 output alpine
# =============================================================================
# =============================================================================
# Stage 1: Builder — compilation du binaire Go
# golang:1.21 officiel (statiquement lié, CGO_ENABLED=0 → binaire portable)
# =============================================================================
FROM golang:1.24 AS builder
WORKDIR /build
RUN apt-get update && apt-get install -y --no-install-recommends git bc && \
rm -rf /var/lib/apt/lists/*
# Copie du workspace Go et du module partagé en premier (meilleur cache)
COPY go.work go.work.sum* ./
COPY shared/go/ja4common/ ./shared/go/ja4common/
COPY services/sentinel/go.mod services/sentinel/go.sum* ./services/sentinel/
COPY services/correlator/go.mod services/correlator/go.sum* ./services/correlator/
WORKDIR /build/services/correlator
RUN --mount=type=cache,target=/go/pkg/mod go mod download
COPY services/correlator/ /build/services/correlator/
ARG VERSION=dev
RUN --mount=type=cache,target=/go/pkg/mod \
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
go build -ldflags="-w -s -X main.Version=${VERSION}" \
-o /tmp/correlator \
./cmd/logcorrelator
# =============================================================================
# Stage 2: rpm-builder — construction des RPMs avec rpmbuild
# Un seul stage, trois appels rpmbuild successifs (el8, el9, el10).
# Le spec lit les fichiers depuis %{_builddir} (répertoire BUILD de rpmbuild).
# =============================================================================
FROM rockylinux:9 AS rpm-builder
WORKDIR /package
ARG VERSION=dev
RUN dnf install -y rpm-build rpmdevtools && dnf clean all
RUN mkdir -p /root/rpmbuild/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} && \
mkdir -p /packages/rpm/{el8,el9,el10}
# Disposition des fichiers dans BUILD/ (attendue par le spec correlator)
RUN mkdir -p /root/rpmbuild/BUILD/usr/bin \
/root/rpmbuild/BUILD/etc/logcorrelator \
/root/rpmbuild/BUILD/etc/systemd/system \
/root/rpmbuild/BUILD/etc/logrotate.d
COPY --from=builder /tmp/correlator /root/rpmbuild/BUILD/usr/bin/logcorrelator
COPY services/correlator/config.example.yml /root/rpmbuild/BUILD/etc/logcorrelator/logcorrelator.yml
COPY services/correlator/config.example.yml /root/rpmbuild/BUILD/etc/logcorrelator/logcorrelator.yml.example
COPY services/correlator/logcorrelator.service /root/rpmbuild/BUILD/etc/systemd/system/logcorrelator.service
COPY services/correlator/packaging/rpm/logrotate /root/rpmbuild/BUILD/etc/logrotate.d/logcorrelator
RUN chmod 755 /root/rpmbuild/BUILD/usr/bin/logcorrelator && \
chmod 640 /root/rpmbuild/BUILD/etc/logcorrelator/logcorrelator.yml && \
chmod 640 /root/rpmbuild/BUILD/etc/logcorrelator/logcorrelator.yml.example && \
chmod 644 /root/rpmbuild/BUILD/etc/systemd/system/logcorrelator.service && \
chmod 644 /root/rpmbuild/BUILD/etc/logrotate.d/logcorrelator
COPY services/correlator/packaging/rpm/logcorrelator.spec /root/rpmbuild/SPECS/logcorrelator.spec
# el8
RUN rpmbuild --define "_topdir /root/rpmbuild" \
--define "dist .el8" \
--define "version ${VERSION}" \
--target x86_64 \
-bb /root/rpmbuild/SPECS/logcorrelator.spec && \
cp /root/rpmbuild/RPMS/x86_64/*.el8.x86_64.rpm /packages/rpm/el8/
# el9
RUN rpmbuild --define "_topdir /root/rpmbuild" \
--define "dist .el9" \
--define "version ${VERSION}" \
--target x86_64 \
-bb /root/rpmbuild/SPECS/logcorrelator.spec && \
cp /root/rpmbuild/RPMS/x86_64/*.el9.x86_64.rpm /packages/rpm/el9/
# el10
RUN rpmbuild --define "_topdir /root/rpmbuild" \
--define "dist .el10" \
--define "version ${VERSION}" \
--target x86_64 \
-bb /root/rpmbuild/SPECS/logcorrelator.spec && \
cp /root/rpmbuild/RPMS/x86_64/*.el10.x86_64.rpm /packages/rpm/el10/
# =============================================================================
# Stage 3: output — image finale contenant uniquement les RPMs
# =============================================================================
FROM alpine:latest AS output
WORKDIR /packages
COPY --from=rpm-builder /packages/rpm/el8/*.rpm /packages/rpm/el8/
COPY --from=rpm-builder /packages/rpm/el9/*.rpm /packages/rpm/el9/
COPY --from=rpm-builder /packages/rpm/el10/*.rpm /packages/rpm/el10/
CMD ["sh", "-c", \
"echo '=== RPM el8 ===' && ls -la /packages/rpm/el8/ && \
echo '' && echo '=== RPM el9 ===' && ls -la /packages/rpm/el9/ && \
echo '' && echo '=== RPM el10 ===' && ls -la /packages/rpm/el10/"]

View File

@ -0,0 +1,148 @@
.PHONY: build build-docker test test-docker lint clean help docker-build-dev docker-build-runtime package package-rpm
# Docker parameters
DOCKER=docker
# Use buildx for better cache management and parallel builds
DOCKER_BUILD=$(DOCKER) build
DOCKER_BUILDX=$(DOCKER) buildx
DOCKER_RUN=$(DOCKER) run
# Image names
DEV_IMAGE=logcorrelator-dev:latest
RUNTIME_IMAGE=logcorrelator:latest
PACKAGER_IMAGE=logcorrelator-packager:latest
PACKAGER_IMAGE_EL8=logcorrelator-packager-el8:latest
PACKAGER_IMAGE_EL9=logcorrelator-packager-el9:latest
PACKAGER_IMAGE_EL10=logcorrelator-packager-el10:latest
# Binary name
BINARY_NAME=logcorrelator
DIST_DIR=dist
# Package version
PKG_VERSION ?= 1.1.22
# Enable BuildKit for better performance
export DOCKER_BUILDKIT=1
## build: Build the logcorrelator binary locally
build:
mkdir -p $(DIST_DIR)
go build -ldflags="-w -s" -o $(DIST_DIR)/$(BINARY_NAME) ./cmd/$(BINARY_NAME)
## docker-build-dev: Build the development Docker image (with tests and coverage)
docker-build-dev:
$(DOCKER_BUILD) --target builder -t $(DEV_IMAGE) -f Dockerfile .
## docker-build-dev-no-test: Build the development Docker image WITHOUT tests (faster)
docker-build-dev-no-test:
$(DOCKER_BUILD) --target builder --no-cache --build-arg SKIP_TESTS=true -t $(DEV_IMAGE) -f Dockerfile .
## docker-build-runtime: Build the runtime Docker image (fast, no tests)
docker-build-runtime:
$(DOCKER_BUILD) --target runtime -t $(RUNTIME_IMAGE) -f Dockerfile .
## test: Run unit tests locally
test:
go test -race -coverprofile=coverage.out ./...
## test-docker: Run unit tests inside Docker container
test-docker: docker-build-dev
@echo "Tests already run in builder stage"
## lint: Run linters
lint:
go vet ./...
gofmt -l .
## fmt: Format all Go files
fmt:
gofmt -w .
## package: Build RPM packages for all target distributions
package: package-rpm
## package-rpm: Build RPM packages for Rocky Linux 8/9, AlmaLinux 10 (requires Docker)
## Uses buildx for parallel builds (el8, el9, el10 built simultaneously)
package-rpm:
mkdir -p $(DIST_DIR)/rpm/el8 $(DIST_DIR)/rpm/el9 $(DIST_DIR)/rpm/el10
@echo "Starting parallel RPM builds for el8, el9, el10..."
# Build all three distributions in parallel using buildx
$(DOCKER_BUILDX) build --target output -t $(PACKAGER_IMAGE) \
--build-arg VERSION=$(PKG_VERSION) \
-f Dockerfile.package . \
--load
@echo "Extracting RPM packages from Docker image..."
$(DOCKER_RUN) --rm -v $(PWD)/$(DIST_DIR)/rpm:/output/rpm $(PACKAGER_IMAGE) sh -c \
"cp -r /packages/rpm/el8 /output/rpm/ && \
cp -r /packages/rpm/el9 /output/rpm/ && \
cp -r /packages/rpm/el10 /output/rpm/"
@echo "RPM packages created:"
@echo " Enterprise Linux 8 (el8):"
ls -la $(DIST_DIR)/rpm/el8/ 2>/dev/null || echo " (no packages)"
@echo " Enterprise Linux 9 (el9):"
ls -la $(DIST_DIR)/rpm/el9/ 2>/dev/null || echo " (no packages)"
@echo " Enterprise Linux 10 (el10):"
ls -la $(DIST_DIR)/rpm/el10/ 2>/dev/null || echo " (no packages)"
## package-rpm-sequential: Build RPM packages sequentially (fallback if parallel fails)
package-rpm-sequential:
mkdir -p $(DIST_DIR)/rpm/el8 $(DIST_DIR)/rpm/el9 $(DIST_DIR)/rpm/el10
@echo "Building RPM for el8..."
$(DOCKER_BUILD) --target rpm-el8-builder -t $(PACKAGER_IMAGE_EL8) \
--build-arg VERSION=$(PKG_VERSION) \
-f Dockerfile.package .
@echo "Building RPM for el9..."
$(DOCKER_BUILD) --target rpm-el9-builder -t $(PACKAGER_IMAGE_EL9) \
--build-arg VERSION=$(PKG_VERSION) \
-f Dockerfile.package .
@echo "Building RPM for el10..."
$(DOCKER_BUILD) --target rpm-el10-builder -t $(PACKAGER_IMAGE_EL10) \
--build-arg VERSION=$(PKG_VERSION) \
-f Dockerfile.package .
@echo "Extracting RPM packages..."
$(DOCKER_RUN) --rm -v $(PWD)/$(DIST_DIR)/rpm:/output/rpm \
-v $(PACKAGER_IMAGE_EL8):/el8:ro \
-v $(PACKAGER_IMAGE_EL9):/el9:ro \
-v $(PACKAGER_IMAGE_EL10):/el10:ro \
alpine:latest sh -c \
"cp -r /el8/packages/rpm/el8 /output/rpm/ && \
cp -r /el9/packages/rpm/el9 /output/rpm/ && \
cp -r /el10/packages/rpm/el10 /output/rpm/"
## test-package-rpm: Test RPM package installation in Docker
test-package-rpm: package-rpm
./packaging/test/test-rpm.sh
## test-package: Test RPM package installation
test-package: test-package-rpm
## ci: Full CI pipeline (tests, build, packages, package tests)
ci: ci-test ci-build ci-package ci-package-test
## ci-test: Run all tests for CI
ci-test: test lint
## ci-build: Build for CI (production binary)
ci-build: build
## ci-package: Build all packages for CI
ci-package: package
## ci-package-test: Test all packages for CI
ci-package-test: test-package
## clean: Clean build artifacts and Docker images
clean:
rm -rf $(DIST_DIR)/
rm -f coverage.out
$(DOCKER) rmi $(DEV_IMAGE) 2>/dev/null || true
$(DOCKER) rmi $(RUNTIME_IMAGE) 2>/dev/null || true
$(DOCKER) rmi $(PACKAGER_IMAGE) 2>/dev/null || true
## help: Show this help message
help:
@echo "Usage: make [target]"
@echo ""
@echo "Targets:"
@sed -n 's/^##//p' $(MAKEFILE_LIST) | column -t -s ':' | sed -e 's/^/ /'

View File

@ -0,0 +1,426 @@
# logcorrelator
Service de corrélation de logs HTTP et réseau écrit en Go.
## Description
**logcorrelator** reçoit deux flux de logs JSON via des sockets Unix datagrammes (SOCK_DGRAM) :
- **Source A** : logs HTTP applicatifs (Apache, reverse proxy)
- **Source B** : logs réseau (métadonnées IP/TCP, JA3/JA4, etc.)
Il corrèle les événements sur la base de `src_ip + src_port` dans une fenêtre temporelle configurable, et produit des logs corrélés vers :
- Un fichier local (JSON lines)
- ClickHouse (pour analyse et archivage)
Les logs opérationnels du service (démarrage, erreurs, métriques) sont écrits sur **stderr** et collectés par journald. Aucune donnée corrélée n'apparaît sur stdout.
## Architecture
```
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
│ Source A │────▶│ │────▶│ File Sink │
│ HTTP/Apache │ │ Correlation │ │ (JSON lines) │
│ (Unix DGRAM) │ │ Service │ └─────────────────┘
└─────────────────┘ │ │
│ - Buffers │ ┌─────────────────┐
┌─────────────────┐ │ - Time Window │────▶│ ClickHouse │
│ Source B │────▶│ - Orphan Policy │ │ Sink │
│ Réseau/JA4 │ │ - Keep-Alive │ └─────────────────┘
│ (Unix DGRAM) │ └──────────────────┘
└─────────────────┘
```
Architecture hexagonale : domaine pur (`internal/domain`), ports abstraits (`internal/ports`), adaptateurs (`internal/adapters`), orchestration (`internal/app`).
## Build (100% Docker)
Tout le build, les tests et le packaging RPM s'exécutent dans des conteneurs :
```bash
# Build complet avec tests (builder stage)
make docker-build-dev
# Packaging RPM (el8, el9, el10)
make package-rpm
# Build rapide sans tests
make docker-build-dev-no-test
# Tests en local (nécessite Go 1.21+)
make test
```
### Prérequis
- Docker 20.10+
## Installation
### Packages RPM
```bash
# Générer les packages
make package-rpm
# Installer (Rocky Linux / AlmaLinux)
sudo dnf install -y dist/rpm/el8/logcorrelator-1.1.12-1.el8.x86_64.rpm
sudo dnf install -y dist/rpm/el9/logcorrelator-1.1.12-1.el9.x86_64.rpm
sudo dnf install -y dist/rpm/el10/logcorrelator-1.1.12-1.el10.x86_64.rpm
# Démarrer
sudo systemctl enable --now logcorrelator
sudo systemctl status logcorrelator
```
### Build manuel
```bash
# Binaire local (nécessite Go 1.21+)
go build -o logcorrelator ./cmd/logcorrelator
./logcorrelator -config config.example.yml
```
## Configuration
Fichier YAML. Voir `config.example.yml` pour un exemple complet.
```yaml
log:
level: INFO # DEBUG, INFO, WARN, ERROR
inputs:
unix_sockets:
- name: http
source_type: A # Source HTTP
path: /var/run/logcorrelator/http.socket
format: json
socket_permissions: "0666"
- name: network
source_type: B # Source réseau
path: /var/run/logcorrelator/network.socket
format: json
socket_permissions: "0666"
outputs:
file:
path: /var/log/logcorrelator/correlated.log
clickhouse:
enabled: false
dsn: clickhouse://user:pass@localhost:9000/db
table: http_logs_raw
batch_size: 500
flush_interval_ms: 200
max_buffer_size: 5000
drop_on_overflow: true
timeout_ms: 1000
stdout:
enabled: false # no-op pour les données ; logs opérationnels toujours sur stderr
correlation:
time_window:
value: 10
unit: s
orphan_policy:
apache_always_emit: true
apache_emit_delay_ms: 500 # délai avant émission orphelin A (ms)
network_emit: false
matching:
mode: one_to_many # Keep-Alive : un B peut corréler plusieurs A successifs
buffers:
max_http_items: 10000
max_network_items: 20000
ttl:
network_ttl_s: 120 # TTL remis à zéro à chaque corrélation (Keep-Alive)
# Exclure des IPs source (IPs uniques ou plages CIDR)
exclude_source_ips:
- 10.0.0.1
- 172.16.0.0/12
# Restreindre la corrélation à certains ports de destination (optionnel)
# Si la liste est vide, tous les ports sont corrélés
include_dest_ports:
- 80
- 443
metrics:
enabled: false
addr: ":8080"
```
### Format du DSN ClickHouse
```
clickhouse://username:password@host:port/database
```
Ports : `9000` (natif, recommandé) ou `8123` (HTTP).
## Format des logs
### Source A (HTTP)
```json
{
"src_ip": "192.168.1.1", "src_port": 8080,
"dst_ip": "10.0.0.1", "dst_port": 443,
"timestamp": 1704110400000000000,
"method": "GET", "path": "/api/test"
}
```
### Source B (Réseau)
```json
{
"src_ip": "192.168.1.1", "src_port": 8080,
"dst_ip": "10.0.0.1", "dst_port": 443,
"ja3": "abc123", "ja4": "xyz789"
}
```
### Log corrélé (sortie)
Structure JSON plate — tous les champs A et B sont fusionnés à la racine :
```json
{
"timestamp": "2024-01-01T12:00:00Z",
"src_ip": "192.168.1.1", "src_port": 8080,
"dst_ip": "10.0.0.1", "dst_port": 443,
"correlated": true,
"method": "GET", "path": "/api/test",
"ja3": "abc123", "ja4": "xyz789"
}
```
En cas de collision de champ entre A et B, les deux valeurs sont conservées avec préfixes `a_` et `b_`.
Les orphelins A (sans B correspondant) sont émis avec `"correlated": false, "orphan_side": "A"`.
## Schema ClickHouse
Le fichier `sql/init.sql` contient le schéma complet prêt à l'emploi.
```bash
clickhouse-client --multiquery < sql/init.sql
```
### Architecture des tables
```
http_logs_raw ← inserts du service (raw_json String)
└─ mv_http_logs ← vue matérialisée (parse JSON → colonnes typées)
http_logs ← table requêtable par les analystes
```
### Table `http_logs` — colonnes
| Groupe | Colonnes |
|---|---|
| Temporel | `time` DateTime, `log_date` Date |
| Réseau | `src_ip` IPv4, `src_port` UInt16, `dst_ip` IPv4, `dst_port` UInt16 |
| HTTP | `method`, `scheme`, `host`, `path`, `query`, `http_version` (LowCardinality) |
| Corrélation | `orphan_side`, `correlated` UInt8, `keepalives` UInt16, `a_timestamp`/`b_timestamp` UInt64, `conn_id` |
| IP meta | `ip_meta_df` UInt8, `ip_meta_id` UInt16, `ip_meta_total_length` UInt16, `ip_meta_ttl` UInt8 |
| TCP meta | `tcp_meta_options`, `tcp_meta_window_size` UInt32, `tcp_meta_mss` UInt16, `tcp_meta_window_scale` UInt8, `syn_to_clienthello_ms` Int32 |
| TLS / fingerprint | `tls_version`, `tls_sni`, `tls_alpn` (LowCardinality), `ja3`, `ja3_hash`, `ja4` |
| En-têtes HTTP | `header_user_agent`, `header_accept`, `header_accept_encoding`, `header_accept_language`, `header_x_request_id`, `header_x_trace_id`, `header_x_forwarded_for`, `header_sec_ch_ua*`, `header_sec_fetch_*` |
### Utilisateurs et permissions
```sql
-- data_writer : INSERT sur http_logs_raw uniquement (compte du service)
GRANT INSERT ON mabase_prod.http_logs_raw TO data_writer;
GRANT SELECT ON mabase_prod.http_logs_raw TO data_writer;
-- analyst : lecture sur la table parsée
GRANT SELECT ON mabase_prod.http_logs TO analyst;
```
### Vérification de l'ingestion
```sql
-- Données brutes reçues
SELECT count(*), min(ingest_time), max(ingest_time) FROM mabase_prod.http_logs_raw;
-- Données parsées par la vue matérialisée
SELECT count(*), min(time), max(time) FROM mabase_prod.http_logs;
-- Derniers logs corrélés
SELECT time, src_ip, dst_ip, method, host, path, ja4
FROM mabase_prod.http_logs
WHERE correlated = 1
ORDER BY time DESC LIMIT 10;
```
## Signaux
| Signal | Comportement |
|--------|--------------|
| `SIGINT` / `SIGTERM` | Arrêt gracieux (drain buffers, flush sinks) |
| `SIGHUP` | Réouverture des fichiers de sortie (log rotation) |
## Logs internes
Les logs opérationnels vont sur **stderr** :
```bash
# Systemd
journalctl -u logcorrelator -f
# Docker
docker logs -f logcorrelator
```
## Structure du projet
```
cmd/logcorrelator/ # Point d'entrée
internal/
adapters/
inbound/unixsocket/ # Lecture SOCK_DGRAM → NormalizedEvent
outbound/
clickhouse/ # Sink ClickHouse (batch, retry, logging complet)
file/ # Sink fichier (JSON lines, SIGHUP reopen)
multi/ # Fan-out vers plusieurs sinks
stdout/ # No-op pour les données (logs opérationnels sur stderr)
app/ # Orchestrator (sources → corrélation → sinks)
config/ # Chargement/validation YAML
domain/ # CorrelationService, NormalizedEvent, CorrelatedLog
observability/ # Logger, métriques, serveur HTTP /metrics /health
ports/ # Interfaces EventSource, CorrelatedLogSink, CorrelationProcessor
config.example.yml # Exemple de configuration
Dockerfile # Build multi-stage (builder, runtime, dev)
Dockerfile.package # Packaging RPM multi-distros (el8, el9, el10)
Makefile # Cibles de build
architecture.yml # Spécification architecture
logcorrelator.service # Unité systemd
```
## Débogage
### Logs DEBUG
```yaml
log:
level: DEBUG
```
Exemples de logs produits :
```
[unixsocket:http] DEBUG event received: source=A src_ip=192.168.1.1 src_port=8080
[correlation] DEBUG processing A event: key=192.168.1.1:8080
[correlation] DEBUG correlation found: A(src_ip=... src_port=... ts=...) + B(...)
[correlation] DEBUG A event has no matching B key in buffer: key=...
[correlation] DEBUG event excluded by IP filter: source=A src_ip=10.0.0.1 src_port=8080
[correlation] DEBUG event excluded by dest port filter: source=A dst_port=22
[correlation] DEBUG TTL reset for B event (Keep-Alive): key=... new_ttl=120s
[clickhouse] DEBUG batch sent: rows=42 table=http_logs_raw
```
### Serveur de métriques
```yaml
metrics:
enabled: true
addr: ":8080"
```
`GET /health``{"status":"healthy"}`
`GET /metrics` :
```json
{
"events_received_a": 1542, "events_received_b": 1498,
"correlations_success": 1450, "correlations_failed": 92,
"failed_no_match_key": 45, "failed_time_window": 23,
"failed_buffer_eviction": 5, "failed_ttl_expired": 12,
"failed_ip_excluded": 7, "failed_dest_port_filtered": 3,
"buffer_a_size": 23, "buffer_b_size": 18,
"orphans_emitted_a": 92, "orphans_pending_a": 4,
"keepalive_resets": 892
}
```
### Diagnostic par métriques
| Métrique élevée | Cause | Solution |
|---|---|---|
| `failed_no_match_key` | A et B n'ont pas le même `src_ip:src_port` | Vérifier les deux sources |
| `failed_time_window` | Timestamps trop éloignés | Augmenter `time_window.value` ou vérifier NTP |
| `failed_ttl_expired` | B expire avant corrélation | Augmenter `ttl.network_ttl_s` |
| `failed_buffer_eviction` | Buffers trop petits | Augmenter `buffers.max_http_items` / `max_network_items` |
| `failed_ip_excluded` | Traffic depuis IPs exclues | Normal si attendu |
| `failed_dest_port_filtered` | Traffic sur ports non listés | Vérifier `include_dest_ports` |
| `orphans_emitted_a` élevé | Beaucoup de A sans B | Vérifier que la source B envoie des événements |
### Filtrage par IP source
```yaml
correlation:
exclude_source_ips:
- 10.0.0.1 # IP unique (health checks)
- 172.16.0.0/12 # Plage CIDR
```
Les événements depuis ces IPs sont silencieusement ignorés (non corrélés, non émis en orphelin). La métrique `failed_ip_excluded` comptabilise les exclusions.
### Filtrage par port de destination
```yaml
correlation:
include_dest_ports:
- 80 # HTTP
- 443 # HTTPS
- 8080
- 8443
```
Si la liste est non vide, seuls les événements dont le `dst_port` est dans la liste participent à la corrélation. Les autres sont silencieusement ignorés. Liste vide = tous les ports corrélés (comportement par défaut). La métrique `failed_dest_port_filtered` comptabilise les exclusions.
### Scripts de test
```bash
# Script Bash (simple)
./scripts/test-correlation.sh -c 10 -v
# Script Python (scénarios complets : basic, time window, keepalive, différentes IPs)
pip install requests
python3 scripts/test-correlation-advanced.py --all
```
## Troubleshooting
### ClickHouse : erreurs d'insertion
- **`No such column`** : vérifier que la table `http_logs_raw` utilise la colonne unique `raw_json` (pas de colonnes séparées)
- **`ACCESS_DENIED`** : `GRANT INSERT ON mabase_prod.http_logs_raw TO data_writer;`
- Les erreurs de flush sont loggées en ERROR dans les logs du service
### Vue matérialisée vide
Si `http_logs_raw` a des données mais `http_logs` est vide :
```sql
-- Vérifier la vue
SHOW CREATE TABLE mabase_prod.mv_http_logs;
-- Vérifier les permissions (la MV s'exécute sous le compte du service)
GRANT SELECT ON mabase_prod.http_logs_raw TO data_writer;
```
### Sockets Unix : permission denied
Vérifier que `socket_permissions: "0666"` est configuré et que le répertoire `/var/run/logcorrelator` appartient à l'utilisateur `logcorrelator`.
### Service systemd ne démarre pas
```bash
journalctl -u logcorrelator -n 50 --no-pager
/usr/bin/logcorrelator -config /etc/logcorrelator/logcorrelator.yml
```
## License
MIT

View File

@ -0,0 +1,974 @@
service:
name: logcorrelator
context: http-network-correlation
language: go
pattern: hexagonal
description: >
logcorrelator est un service système (lancé par systemd) écrit en Go, chargé
de recevoir deux flux de logs JSON via des sockets Unix, de corréler les
événements HTTP applicatifs (source A, typiquement Apache ou reverse proxy)
avec des événements réseau (source B, métadonnées IP/TCP, JA3/JA4, etc.)
sur la base de la combinaison strictement définie src_ip + src_port, avec
une fenêtre temporelle configurable. Le service supporte les connexions
HTTP Keep-Alive : un log réseau peut être corrélé à plusieurs logs HTTP
successifs (stratégie 1àN). La rétention en mémoire est bornée par des
tailles de caches configurables et un TTL dynamique pour la source B. Le
service émet toujours les événements A même lorsqu'aucun événement B n'est
disponible, n'émet jamais de logs B seuls, et pousse les résultats vers
ClickHouse et/ou un fichier local.
Fonctionnalités de débogage incluses :
- Serveur de métriques HTTP (/metrics, /health)
- Logs DEBUG détaillés avec raisons des échecs de corrélation
- Filtrage des IPs source (exclude_source_ips)
- Scripts de test (Bash et Python)
- Métriques : événements reçus, corrélations, échecs par raison, buffers, orphelins
runtime:
deployment:
unit_type: systemd
description: >
logcorrelator est livré sous forme de binaire autonome, exécuté comme un
service systemd. L'unité systemd assure le démarrage automatique au boot,
le redémarrage en cas de crash, et une intégration standard dans l'écosystème
Linux.
binary_path: /usr/bin/logcorrelator
config_path: /etc/logcorrelator/logcorrelator.yml
user: logcorrelator
group: logcorrelator
restart: on-failure
systemd_unit:
path: /etc/systemd/system/logcorrelator.service
content_example: |
[Unit]
Description=logcorrelator service
After=network.target
[Service]
Type=simple
User=logcorrelator
Group=logcorrelator
ExecStart=/usr/bin/logcorrelator -config /etc/logcorrelator/logcorrelator.yml
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=5
# Security hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/log/logcorrelator /var/run/logcorrelator /etc/logcorrelator
# Resource limits
LimitNOFILE=65536
# Systemd timeouts
TimeoutStartSec=10
TimeoutStopSec=30
[Install]
WantedBy=multi-user.target
os:
supported:
- rocky-linux-8
- rocky-linux-9
- almalinux-10
- autres-linux-recentes
logs:
stdout_stderr: journald
structured: true
description: >
Les logs internes du service (erreurs, messages d'information) sont envoyés
vers stdout/stderr et collectés par journald. Ils sont structurés et ne
contiennent pas de données personnelles.
signals:
graceful_shutdown:
- SIGINT
- SIGTERM
reload:
- SIGHUP
description: >
SIGINT/SIGTERM : arrêt propre (arrêt des sockets, vidage des buffers, fermeture
des sinks). SIGHUP : réouverture des fichiers de sortie (utile pour la
rotation des logs via logrotate) sans arrêter le service.
filesystem:
description: >
Permissions et propriété des fichiers et répertoires utilisés par logcorrelator.
directories:
- path: /var/run/logcorrelator
owner: logcorrelator:logcorrelator
permissions: "0755"
purpose: >
Contient les sockets Unix (http.socket, network.socket).
Les sockets sont créés avec des permissions 0666 (world read/write).
- path: /var/log/logcorrelator
owner: logcorrelator:logcorrelator
permissions: "0750"
purpose: >
Contient les logs corrélés (correlated.log).
- path: /var/lib/logcorrelator
owner: logcorrelator:logcorrelator
permissions: "0750"
purpose: >
Répertoire home du service (données internes).
- path: /etc/logcorrelator
owner: logcorrelator:logcorrelator
permissions: "0750"
purpose: >
Contient la configuration (logcorrelator.yml, logcorrelator.yml.example).
files:
- path: /etc/logcorrelator/logcorrelator.yml
owner: logcorrelator:logcorrelator
permissions: "0640"
rpm_directive: "%config(noreplace)"
- path: /etc/logcorrelator/logcorrelator.yml.example
owner: logcorrelator:logcorrelator
permissions: "0640"
- path: /etc/systemd/system/logcorrelator.service
owner: root:root
permissions: "0644"
- path: /etc/logrotate.d/logcorrelator
owner: root:root
permissions: "0644"
rpm_directive: "%config(noreplace)"
sockets:
- path: /var/run/logcorrelator/http.socket
owner: logcorrelator:logcorrelator
permissions: "0666"
type: unix_datagram
purpose: "Source A - logs HTTP applicatifs"
- path: /var/run/logcorrelator/network.socket
owner: logcorrelator:logcorrelator
permissions: "0666"
type: unix_datagram
purpose: "Source B - logs réseau"
packaging:
description: >
logcorrelator est distribué sous forme de packages .rpm (Rocky Linux, AlmaLinux,
RHEL), construits intégralement dans des conteneurs. Le changelog RPM est mis
à jour à chaque changement de version.
Tous les numéros de version doivent être cohérents entre le spec RPM, le Makefile
(PKG_VERSION), le CHANGELOG.md et les tags git.
Politique de mise à jour de la configuration :
- Le fichier logcorrelator.yml est marqué %config(noreplace) : il n'est JAMAIS
écrasé lors d'une mise à jour. La configuration existante est préservée.
- Le fichier logcorrelator.yml.example est TOUJOURS mis à jour pour refléter
les nouvelles options de configuration disponibles.
- Lors de la première installation, si logcorrelator.yml n'existe pas, il est
créé à partir de logcorrelator.yml.example.
formats:
- rpm
target_distros:
- rocky-linux-8
- rocky-linux-9
- almalinux-10
- rhel-8
- rhel-9
- rhel-10
rpm:
tool: fpm
changelog:
source: git # ou CHANGELOG.md
description: >
À chaque build, un script génère un fichier de changelog RPM à partir de
l'historique (tags/commits) et le passe à fpm (option --rpm-changelog).
contents:
- path: /usr/bin/logcorrelator
type: binary
- path: /etc/logcorrelator/logcorrelator.yml
type: config
directives: "%config(noreplace)"
behavior: >
Jamais écrasé lors des mises à jour. Préservé automatiquement par RPM.
Créé uniquement lors de la première installation s'il n'existe pas.
- path: /etc/logcorrelator/logcorrelator.yml.example
type: doc
behavior: >
TOUJOURS mis à jour lors des mises à jour. Sert de référence pour les
nouvelles options de configuration disponibles.
- path: /etc/systemd/system/logcorrelator.service
type: systemd_unit
- path: /etc/logrotate.d/logcorrelator
type: logrotate_script
directives: "%config(noreplace)"
logrotate_example: |
/var/log/logcorrelator/correlated.log {
daily
rotate 7
compress
delaycompress
missingok
notifempty
create 0640 logcorrelator logcorrelator
sharedscripts
postrotate
/bin/systemctl reload logcorrelator > /dev/null 2>&1 || true
endscript
}
config:
format: yaml
location: /etc/logcorrelator/logcorrelator.yml
reload_strategy: signal_sighup_for_files
description: >
Toute la configuration est centralisée dans un fichier YAML lisible. Le RPM
fournit aussi un fichier d'exemple mis à jour à chaque version.
example: |
# /etc/logcorrelator/logcorrelator.yml
log:
level: INFO # DEBUG, INFO, WARN, ERROR
inputs:
unix_sockets:
# Source HTTP (A) : logs applicatifs en JSON, 1 datagramme = 1 log.
- name: http
source_type: A
path: /var/run/logcorrelator/http.socket
format: json
socket_permissions: "0666"
# Source réseau (B) : logs IP/TCP/JA3... en JSON, 1 datagramme = 1 log.
- name: network
source_type: B
path: /var/run/logcorrelator/network.socket
format: json
socket_permissions: "0666"
outputs:
file:
enabled: true
path: /var/log/logcorrelator/correlated.log
clickhouse:
enabled: false
dsn: clickhouse://user:pass@localhost:9000/db
table: correlated_logs_http_network
batch_size: 500
flush_interval_ms: 200
max_buffer_size: 5000
drop_on_overflow: true
async_insert: true
timeout_ms: 1000
stdout:
enabled: false
level: INFO # DEBUG: tous les logs (y compris orphelins), INFO: seulement corrélés, WARN: corrélés seulement, ERROR: aucun
correlation:
# Fenêtre de corrélation : si le log HTTP arrive avant le réseau, il attend
# au plus cette durée (sauf éviction du cache HTTP).
# Augmentée à 10s pour supporter le Keep-Alive HTTP.
time_window:
value: 10
unit: s
orphan_policy:
apache_always_emit: true # Toujours émettre les événements A, même sans correspondance B
network_emit: false # Ne jamais émettre les événements B seuls
matching:
mode: one_to_many # KeepAlive : un B peut corréler plusieurs A.
buffers:
# Tailles max des caches en mémoire (en nombre de logs).
max_http_items: 10000
max_network_items: 20000
ttl:
# Durée de vie standard d'un log réseau (B) en mémoire. Chaque corrélation
# réussie avec un A réinitialise ce TTL.
# Augmenté à 120s pour supporter les sessions HTTP Keep-Alive longues.
network_ttl_s: 120
# Filtrage des IPs source à exclure (optionnel)
exclude_source_ips:
- 10.0.0.1 # IP unique
- 172.16.0.0/12 # Plage CIDR
# Les événements depuis ces IPs sont silencieusement ignorés
# Serveur de métriques HTTP (optionnel, pour débogage et monitoring)
metrics:
enabled: false
addr: ":8080" # Adresse d'écoute du serveur HTTP
# Endpoints:
# GET /metrics - Retourne les métriques de corrélation en JSON
# GET /health - Health check
inputs:
description: >
Deux flux de logs JSON via sockets Unix datagram (SOCK_DGRAM). Chaque datagramme
contient un JSON complet. Le champ source_type ("A" ou "B") doit être spécifié
pour chaque socket. À défaut, la source est déduite automatiquement (présence de
headers = source A, sinon source B).
unix_sockets:
- name: http
id: A
description: >
Source A, logs HTTP applicatifs (Apache, reverse proxy, etc.). Schéma JSON
variable, champ timestamp (int64, nanosecondes) obligatoire, headers dynamiques (header_*).
path: /var/run/logcorrelator/http.socket
source_type: A
permissions: "0666"
protocol: unix
socket_type: dgram
mode: datagram
format: json
framing: message
max_datagram_bytes: 65535
retry_on_error: true
- name: network
id: B
description: >
Source B, logs réseau (métadonnées IP/TCP, JA3/JA4, etc.). Seuls src_ip
et src_port sont requis pour la corrélation. Le champ timestamp est optionnel ;
s'il est absent, l'heure de réception est utilisée.
path: /var/run/logcorrelator/network.socket
source_type: B
permissions: "0666"
protocol: unix
socket_type: dgram
mode: datagram
format: json
framing: message
max_datagram_bytes: 65535
retry_on_error: true
outputs:
description: >
Les logs corrélés sont envoyés vers un ou plusieurs sinks (MultiSink).
sinks:
file:
enabled: true
description: >
Sink fichier local. Un JSON par ligne. Rotation gérée par logrotate,
réouverture du fichier sur SIGHUP. Le champ `enabled: false` coupe
completement l'ecriture du fichier (le sink n'est pas cree).
path: /var/log/logcorrelator/correlated.log
format: json_lines
rotate_managed_by: external_logrotate
clickhouse:
enabled: false
description: >
Sink principal pour l'archivage et l'analyse quasi temps réel. Inserts
batch asynchrones, drop en cas de saturation. Le service insère uniquement
dans une table RAW (raw_json String, ingest_time DateTime DEFAULT now()).
La table parsée et la vue matérialisée sont gérées en externe (DDL séparés).
Toutes les erreurs de connexion, de flush et de retry sont loggées :
INFO à la connexion, ERROR sur échec de flush, WARN sur drop/retry, DEBUG sur envoi réussi.
dsn: clickhouse://user:pass@host:9000/db
table: correlated_logs_http_network
batch_size: 500
flush_interval_ms: 200
max_buffer_size: 5000
drop_on_overflow: true
async_insert: true
timeout_ms: 1000
stdout:
enabled: false
description: >
Sink no-op pour les données. Aucune donnée corrélée ou orpheline n'est
jamais écrite sur stdout. Ce sink existe uniquement pour satisfaire
l'interface CorrelatedLogSink. Les logs opérationnels du service
(démarrage, erreurs, métriques de débogage) sont toujours sur stderr
via observability.Logger, indépendamment de ce sink.
correlation:
description: >
Corrélation stricte basée sur src_ip + src_port et une fenêtre temporelle
configurable. Aucun autre champ n'est utilisé pour la décision de corrélation.
key:
- src_ip
- src_port
time_window:
value: 10
unit: s
description: >
Fenêtre de temps appliquée aux timestamps de A et B. Si B n'arrive pas dans
ce délai, A est émis comme orphelin. Augmentée à 10s pour le Keep-Alive.
retention_limits:
max_http_items: 10000
max_network_items: 20000
description: >
Limites des caches. Si max_http_items est atteint, le plus ancien A est
évincé et émis orphelin. Si max_network_items est atteint, le plus ancien B
est supprimé silencieusement.
ttl_management:
network_ttl_s: 120
description: >
TTL des logs réseau. Chaque fois qu'un B est corrélé à un A (Keep-Alive),
son TTL est remis à cette valeur. Augmenté à 120s pour les sessions longues.
timestamp_source:
apache: timestamp (champ int64, nanosecondes)
network: timestamp (champ int64, nanosecondes) si présent, sinon time (RFC3339),
sinon reception_time (time.Now())
orphan_policy:
apache_always_emit: true
network_emit: false
matching:
mode: one_to_many
description: >
Stratégie 1àN : un log réseau peut être utilisé pour plusieurs logs HTTP
successifs tant qu'il n'a pas expiré ni été évincé.
ip_filtering:
directive: exclude_source_ips
description: >
Liste d'IPs source (exactes ou plages CIDR) à ignorer silencieusement.
Événements non corrélés, non émis en orphelin. Métrique : failed_ip_excluded.
dest_port_filtering:
directive: include_dest_ports
description: >
Liste blanche de ports de destination. Si non vide, seuls les événements
dont le dst_port est dans la liste participent à la corrélation. Les autres
sont silencieusement ignorés (non corrélés, non émis en orphelin).
Liste vide = tous les ports autorisés (comportement par défaut).
Métrique : failed_dest_port_filtered.
example:
include_dest_ports: [80, 443, 8080, 8443]
schema:
description: >
Schémas variables pour A et B. Quelques champs seulement sont obligatoires
pour la corrélation, les autres sont acceptés sans modification de code.
source_A:
description: >
Logs HTTP applicatifs au format JSON.
required_fields:
- name: src_ip
type: string
- name: src_port
type: int
- name: timestamp
type: int64
unit: ns
optional_fields:
- name: dst_ip
type: string
- name: dst_port
type: int
- name: method
type: string
- name: path
type: string
- name: host
type: string
- name: http_version
type: string
dynamic_fields:
- pattern: header_*
target_map: headers
- pattern: "*"
target_map: extra
source_B:
description: Logs réseau JSON (IP/TCP, JA3/JA4...).
required_fields:
- name: src_ip
type: string
- name: src_port
type: int
optional_fields:
- name: dst_ip
type: string
- name: dst_port
type: int
- name: timestamp
type: int64
unit: ns
- name: time
type: string
format: RFC3339 ou RFC3339Nano
dynamic_fields:
- pattern: "*"
target_map: extra
normalized_event:
description: >
Représentation interne unifiée des événements A/B.
fields:
- name: source
type: enum("A","B")
- name: timestamp
type: time.Time
- name: src_ip
type: string
- name: src_port
type: int
- name: dst_ip
type: string
optional: true
- name: dst_port
type: int
optional: true
- name: headers
type: map[string]string
optional: true
- name: extra
type: map[string]any
correlated_log:
description: >
Structure du log corrélé émis vers les sinks.
fields:
- name: timestamp
type: time.Time
- name: src_ip
type: string
- name: src_port
type: int
- name: dst_ip
type: string
optional: true
- name: dst_port
type: int
optional: true
- name: correlated
type: bool
- name: orphan_side
type: string
- name: "*"
type: map[string]any
clickhouse_schema:
strategy: external_ddls
database: mabase_prod
description: >
La table ClickHouse est gérée en dehors du service. Le service insère dans une
table RAW avec une seule colonne raw_json contenant le log corrélé complet
sérialisé en JSON. La colonne ingest_time utilise DEFAULT now().
Toute extraction de champs (table parsée, vue matérialisée) est gérée en externe
via des DDL séparés, non implémentés dans le service.
tables:
- name: http_logs_raw
description: >
Table d'ingestion brute. Une seule colonne raw_json contient le log corrélé
complet sérialisé en JSON. La colonne ingest_time est auto-générée avec
DEFAULT now(). Partitionnée par jour pour optimiser le TTL.
engine: MergeTree
partition_by: toDate(ingest_time)
order_by: ingest_time
columns:
- name: raw_json
type: String
- name: ingest_time
type: DateTime
default: now()
insert_format: |
INSERT INTO mabase_prod.http_logs_raw (raw_json) VALUES
('{...log corrélé sérialisé en JSON...}')
notes: >
Le service utilise l'API native clickhouse-go/v2 (PrepareBatch + Append + Send).
La colonne ingest_time n'est PAS explicitement insérée (DEFAULT now() est utilisé).
- name: http_logs
description: >
Table parsée (optionnelle, gérée en externe). Le service n'implémente PAS
l'extraction des champs suivants. Si cette table est utilisée, elle doit être
alimentée par une vue matérialisée ou un traitement ETL externe.
engine: MergeTree
partition_by: log_date
order_by: (time, src_ip, dst_ip, ja4)
columns:
- name: time
type: DateTime
- name: log_date
type: Date
default: toDate(time)
- name: src_ip
type: IPv4
- name: src_port
type: UInt16
- name: dst_ip
type: IPv4
- name: dst_port
type: UInt16
- name: method
type: LowCardinality(String)
- name: scheme
type: LowCardinality(String)
- name: host
type: LowCardinality(String)
- name: path
type: String
- name: query
type: String
- name: http_version
type: LowCardinality(String)
- name: orphan_side
type: LowCardinality(String)
- name: correlated
type: UInt8
- name: keepalives
type: UInt16
status: non_implémenté
- name: a_timestamp
type: UInt64
status: non_implémenté
- name: b_timestamp
type: UInt64
status: non_implémenté
- name: conn_id
type: String
status: non_implémenté
- name: ip_meta_df
type: UInt8
status: non_implémenté
- name: ip_meta_id
type: UInt32
status: non_implémenté
- name: ip_meta_total_length
type: UInt32
status: non_implémenté
- name: ip_meta_ttl
type: UInt8
status: non_implémenté
- name: tcp_meta_options
type: LowCardinality(String)
status: non_implémenté
- name: tcp_meta_window_size
type: UInt32
status: non_implémenté
- name: syn_to_clienthello_ms
type: Int32
status: non_implémenté
- name: tls_version
type: LowCardinality(String)
status: non_implémenté
- name: tls_sni
type: LowCardinality(String)
status: non_implémenté
- name: ja3
type: String
status: non_implémenté
- name: ja3_hash
type: String
status: non_implémenté
- name: ja4
type: String
status: non_implémenté
- name: header_user_agent
type: String
status: non_implémenté
- name: header_accept
type: String
status: non_implémenté
- name: header_accept_encoding
type: String
status: non_implémenté
- name: header_accept_language
type: String
status: non_implémenté
- name: header_x_request_id
type: String
status: non_implémenté
- name: header_x_trace_id
type: String
status: non_implémenté
- name: header_x_forwarded_for
type: String
status: non_implémenté
- name: header_sec_ch_ua
type: String
status: non_implémenté
- name: header_sec_ch_ua_mobile
type: String
status: non_implémenté
- name: header_sec_ch_ua_platform
type: String
status: non_implémenté
- name: header_sec_fetch_dest
type: String
status: non_implémenté
- name: header_sec_fetch_mode
type: String
status: non_implémenté
- name: header_sec_fetch_site
type: String
status: non_implémenté
notes: >
Cette table et la vue matérialisée associée sont gérées en externe (DDL séparés).
Le service se contente d'insérer le JSON brut dans http_logs_raw.
Les champs marqués "non_implémenté" ne sont PAS extraits par le service.
users:
description: >
La gestion des utilisateurs ClickHouse est externe au service. Le DSN est
configuré dans le fichier de configuration YAML.
notes: >
Cette section est fournie à titre indicatif pour l'administration ClickHouse.
migration:
description: >
Aucune migration n'est implémentée dans le service. La gestion des schémas
(tables, vues matérialisées) est entièrement externe (DDL séparés).
architecture:
description: >
Architecture hexagonale : domaine de corrélation indépendant, ports abstraits
pour les sources/sinks, adaptateurs pour sockets Unix, fichier, ClickHouse et
stdout, couche application d'orchestration, et modules infra (config, observabilité).
modules:
- name: cmd/logcorrelator
type: entrypoint
responsibilities:
- Chargement de la configuration YAML.
- Initialisation des adaptateurs d'entrée/sortie.
- Création du CorrelationService.
- Démarrage de l'orchestrateur.
- Gestion des signaux (SIGINT, SIGTERM, SIGHUP).
- Versioning via -ldflags (main.Version).
- name: internal/domain
type: domain
responsibilities:
- Modèles NormalizedEvent et CorrelatedLog.
- CorrelationService (fenêtre, TTL, buffers bornés, one-to-many/Keep-Alive, orphelins).
- Filtrage par IP source (exclude_source_ips, CIDR).
- Filtrage par port destination (include_dest_ports, liste blanche).
- Custom JSON marshaling pour CorrelatedLog (structure plate).
- name: internal/ports
type: ports
responsibilities:
- Interfaces EventSource, CorrelatedLogSink, CorrelationProcessor.
- name: internal/app
type: application
responsibilities:
- Orchestrator : EventSource → CorrelationService → MultiSink.
- Gestion du contexte de shutdown et drain des événements.
- name: internal/adapters/inbound/unixsocket
type: adapter_inbound
responsibilities:
- Lecture Unix datagram (SOCK_DGRAM) et parsing JSON → NormalizedEvent.
- Détection automatique de la source (A/B) via source_type ou headers.
- Gestion des permissions de socket (défaut 0666).
- Cleanup du fichier socket à l'arrêt.
- name: internal/adapters/outbound/file
type: adapter_outbound
responsibilities:
- Écriture JSON lines.
- Réouverture du fichier sur SIGHUP (log rotation).
- Validation des chemins (répertoire autorisé).
- name: internal/adapters/outbound/clickhouse
type: adapter_outbound
responsibilities:
- Bufferisation + inserts batch asynchrones.
- Gestion du drop_on_overflow.
- Retry avec backoff exponentiel (MaxRetries=3).
- API native clickhouse-go/v2 (PrepareBatch + Append + Send).
- Logging complet via observability.Logger (SetLogger) : INFO à la connexion,
DEBUG sur envoi réussi (rows/table), WARN sur drop buffer et retries,
ERROR sur échec de flush (périodique, batch, fermeture).
- name: internal/adapters/outbound/stdout
type: adapter_outbound
responsibilities:
- Sink no-op pour les données corrélées.
- Write/Flush/Close ne font rien : les données ne passent jamais par stdout.
- Les logs opérationnels sont sur stderr via observability.Logger (indépendant de ce sink).
- name: internal/adapters/outbound/multi
type: adapter_outbound
responsibilities:
- Fan-out vers plusieurs sinks.
- Implémentation de Reopen() pour la rotation des logs.
- name: internal/config
type: infrastructure
responsibilities:
- Chargement/validation de la configuration YAML.
- Valeurs par défaut et fallback pour champs dépréciés.
- name: internal/observability
type: infrastructure
responsibilities:
- Logger structuré avec niveaux (DEBUG, INFO, WARN, ERROR).
- CorrelationMetrics : suivi des statistiques de corrélation.
- MetricsServer : serveur HTTP pour exposition des métriques (/metrics, /health).
- Traçage des événements exclus (exclude_source_ips).
- Logs pour : événements reçus, corrélations, orphelins, buffer plein.
testing:
unit:
description: >
Tests unitaires tabledriven, couverture cible ≥ 80 %. La couverture actuelle
est d'environ 74-80% selon les versions. Les tests se concentrent sur la logique
de corrélation, les caches, les sinks et le parsing des datagrammes.
coverage_minimum: 0.8
coverage_actual: ~0.74-0.80
focus:
- CorrelationService (fenêtre, TTL, évictions, one-to-many/Keep-Alive)
- Parsing A/B → NormalizedEvent (datagrammes JSON)
- ClickHouseSink (batching, retry, overflow, logging erreurs/succès)
- FileSink (réouverture sur SIGHUP)
- MultiSink (fan-out)
- StdoutSink (no-op data, test stdout reste vide)
- Config (validation, valeurs par défaut, exclude_source_ips)
- UnixSocketSource (lecture, permissions, cleanup)
- CorrelationMetrics (suivi des statistiques)
- MetricsServer (endpoints /metrics et /health)
integration:
description: >
Tests d'intégration limités. Le flux complet A+B → corrélation → sinks est
testé via des tests unitaires avec mocks. ClickHouse est mocké (pas de tests
avec vrai ClickHouse). Scénarios Keep-Alive testés dans correlation_service_test.go.
Scripts de test fournis : scripts/test-correlation.sh et scripts/test-correlation-advanced.py.
docker:
description: >
Build, tests et packaging RPM sont exécutés intégralement dans des conteneurs
via un multistage build. Deux Dockerfiles : Dockerfile (build + runtime + dev)
et Dockerfile.package (RPM multi-distros : el8, el9, el10).
build_pipeline:
multi_stage: true
stages:
- name: builder
base: golang:1.21
description: >
go test -race -coverprofile=coverage.txt ./... avec vérification de couverture
(échec si < 80 %). Compilation d'un binaire statique (CGO_ENABLED=0,
GOOS=linux, GOARCH=amd64).
- name: runtime
base: scratch
description: >
Image minimale contenant uniquement le binaire et la config exemple.
- name: rpm_builder_el8
base: rockylinux:8
description: >
Installation de fpm (via Ruby), construction RPM pour Enterprise Linux 8.
- name: rpm_builder_el9
base: rockylinux:9
description: >
Installation de fpm (via Ruby), construction RPM pour Enterprise Linux 9.
- name: rpm_builder_el10
base: almalinux:10
description: >
Installation de fpm (via Ruby), construction RPM pour Enterprise Linux 10.
- name: output_export
base: alpine:latest
description: >
Export des paquets RPM produits pour les 3 distributions (el8, el9, el10).
files:
- path: Dockerfile
description: Build principal (builder, runtime, dev) et packaging RPM mono-distro.
- path: Dockerfile.package
description: Packaging RPM multi-distros (el8, el9, el10) avec scripts post/preun/postun.
observability:
description: >
Le service inclut des fonctionnalités complètes de débogage et de monitoring
pour diagnostiquer les problèmes de corrélation et surveiller les performances.
logging:
levels:
- DEBUG: Tous les événements reçus, tentatives de corrélation, raisons des échecs
- INFO: Événements corrélés, démarrage/arrêt du service
- WARN: Orphelins émis, buffer plein, TTL expiré
- ERROR: Erreurs de parsing, échecs de sink, erreurs critiques
debug_logs:
- "event received: source=A src_ip=192.168.1.1 src_port=8080 timestamp=..."
- "processing A event: key=192.168.1.1:8080 timestamp=..."
- "correlation found: A(src_ip=... src_port=... ts=...) + B(src_ip=... src_port=... ts=...)"
- "A event has no matching B key in buffer: key=..."
- "A event has same key as B but outside time window: key=... time_diff=5s window=10s"
- "event excluded by IP filter: source=A src_ip=10.0.0.1 src_port=8080"
- "event excluded by dest port filter: source=A dst_port=22"
- "TTL reset for B event (Keep-Alive): key=... new_ttl=120s"
- "[clickhouse] DEBUG batch sent: rows=42 table=correlated_logs_http_network"
info_logs:
- "[clickhouse] INFO connected to ClickHouse: table=... batch_size=500 flush_interval_ms=200"
warn_logs:
- "[clickhouse] WARN buffer full, dropping log: table=... buffer_size=5000"
- "[clickhouse] WARN retrying batch insert: attempt=2/3 delay=100ms rows=42 err=connection refused"
error_logs:
- "[clickhouse] ERROR periodic flush failed: ..."
- "[clickhouse] ERROR batch flush failed: ..."
- "[clickhouse] ERROR final flush on close failed: ..."
metrics_server:
enabled: true
endpoints:
- path: /metrics
method: GET
description: Retourne les métriques de corrélation au format JSON
response_example: |
{
"events_received_a": 1542,
"events_received_b": 1498,
"correlations_success": 1450,
"correlations_failed": 92,
"failed_no_match_key": 45,
"failed_time_window": 23,
"failed_buffer_eviction": 5,
"failed_ttl_expired": 12,
"failed_ip_excluded": 7,
"failed_dest_port_filtered": 3,
"buffer_a_size": 23,
"buffer_b_size": 18,
"orphans_emitted_a": 92,
"keepalive_resets": 892
}
- path: /health
method: GET
description: Health check
response_example: |
{"status":"healthy"}
metrics_tracked:
events_received:
- events_received_a: Nombre d'événements HTTP (source A) reçus
- events_received_b: Nombre d'événements réseau (source B) reçus
correlations:
- correlations_success: Corrélations réussies
- correlations_failed: Échecs de corrélation
failure_reasons:
- failed_no_match_key: Clé src_ip:src_port non trouvée dans le buffer
- failed_time_window: Événements hors fenêtre temporelle
- failed_buffer_eviction: Buffer plein, événement évincé
- failed_ttl_expired: TTL du événement B expiré
- failed_ip_excluded: Événement exclu par filtre IP (exclude_source_ips)
- failed_dest_port_filtered: Événement exclu par filtre port destination (include_dest_ports)
buffers:
- buffer_a_size: Taille actuelle du buffer HTTP
- buffer_b_size: Taille actuelle du buffer réseau
orphans:
- orphans_emitted_a: Orphelins A émis (sans correspondance B)
- orphans_emitted_b: Orphelins B émis (toujours 0, policy: network_emit=false)
- orphans_pending_a: Orphelins A en attente (délai avant émission)
- pending_orphan_match: B a corrélé avec un orphelin A en attente
keepalive:
- keepalive_resets: Resets TTL pour mode Keep-Alive (one-to-many)
troubleshooting:
description: >
Guide de diagnostic basé sur les métriques et logs
common_issues:
- symptom: failed_no_match_key élevé
cause: Les logs A et B n'ont pas le même src_ip + src_port
solution: Vérifier que les deux sources utilisent la même combinaison IP/port
- symptom: failed_time_window élevé
cause: Timestamps trop éloignés (> time_window.value)
solution: Augmenter correlation.time_window.value ou synchroniser les horloges (NTP)
- symptom: failed_ttl_expired élevé
cause: Les événements B expirent avant corrélation
solution: Augmenter correlation.ttl.network_ttl_s
- symptom: failed_buffer_eviction élevé
cause: Buffers trop petits pour le volume de logs
solution: Augmenter correlation.buffers.max_http_items et max_network_items
- symptom: failed_ip_excluded élevé
cause: Traffic depuis des IPs configurées dans exclude_source_ips
solution: Vérifier la configuration, c'est normal si attendu
- symptom: failed_dest_port_filtered élevé
cause: Traffic sur des ports non listés dans include_dest_ports
solution: Vérifier la configuration include_dest_ports, ou vider la liste pour tout accepter
- symptom: orphans_emitted_a élevé
cause: Beaucoup de logs A sans correspondance B
solution: Vérifier que la source B envoie bien les événements attendus
test_scripts:
- name: scripts/test-correlation.sh
description: Script Bash pour tester la corrélation avec des événements synthétiques
features:
- Envoi de paires A+B avec mêmes src_ip:src_port
- Vérification des métriques avant/après
- Options: -c (count), -d (delay), -v (verbose), -m (metrics-url)
- name: scripts/test-correlation-advanced.py
description: Script Python avancé avec multiples scénarios de test
features:
- Basic test: corrélations simples
- Time window test: vérifie l'expiration de la fenêtre temporelle
- Different IP test: vérifie non-corrélation avec IPs différentes
- Keep-Alive test: vérifie le mode one-to-many
- Métriques en temps réel

View File

@ -0,0 +1,202 @@
package main
import (
"context"
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"time"
"github.com/antitbone/ja4/correlator/internal/adapters/inbound/unixsocket"
"github.com/antitbone/ja4/correlator/internal/adapters/outbound/clickhouse"
"github.com/antitbone/ja4/correlator/internal/adapters/outbound/file"
"github.com/antitbone/ja4/correlator/internal/adapters/outbound/multi"
"github.com/antitbone/ja4/correlator/internal/adapters/outbound/stdout"
"github.com/antitbone/ja4/correlator/internal/app"
"github.com/antitbone/ja4/correlator/internal/config"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/observability"
"github.com/antitbone/ja4/correlator/internal/ports"
)
var Version = "dev"
func main() {
configPath := flag.String("config", "config.yml", "path to configuration file")
version := flag.Bool("version", false, "print version and exit")
flag.Parse()
if *version {
fmt.Println(Version)
os.Exit(0)
}
// Load configuration
cfg, err := config.Load(*configPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Error loading configuration: %v\n", err)
os.Exit(1)
}
// Initialize logger with configured level
logger := observability.NewLoggerWithLevel("logcorrelator", cfg.Log.GetLevel())
logger.Info(fmt.Sprintf("Starting logcorrelator version %s (log_level=%s)", Version, cfg.Log.GetLevel()))
// Create sources
sources := make([]ports.EventSource, 0, len(cfg.Inputs.UnixSockets))
for _, inputCfg := range cfg.Inputs.UnixSockets {
source := unixsocket.NewUnixSocketSource(unixsocket.Config{
Name: inputCfg.Name,
Path: inputCfg.Path,
SourceType: inputCfg.SourceType,
SocketPermissions: inputCfg.GetSocketPermissions(),
})
// Set logger for debug logging
source.SetLogger(logger)
sources = append(sources, source)
logger.Info(fmt.Sprintf("Configured input source: name=%s, path=%s, permissions=%o", inputCfg.Name, inputCfg.Path, inputCfg.GetSocketPermissions()))
}
// Create sinks
sinks := make([]ports.CorrelatedLogSink, 0)
if cfg.Outputs.File.Enabled && cfg.Outputs.File.Path != "" {
fileSink, err := file.NewFileSink(file.Config{
Path: cfg.Outputs.File.Path,
})
if err != nil {
logger.Error("Failed to create file sink", err)
os.Exit(1)
}
sinks = append(sinks, fileSink)
logger.Info(fmt.Sprintf("Configured file sink: path=%s", cfg.Outputs.File.Path))
}
if cfg.Outputs.ClickHouse.Enabled {
clickHouseSink, err := clickhouse.NewClickHouseSink(clickhouse.Config{
DSN: cfg.Outputs.ClickHouse.DSN,
Table: cfg.Outputs.ClickHouse.Table,
BatchSize: cfg.Outputs.ClickHouse.BatchSize,
FlushIntervalMs: cfg.Outputs.ClickHouse.FlushIntervalMs,
MaxBufferSize: cfg.Outputs.ClickHouse.MaxBufferSize,
DropOnOverflow: cfg.Outputs.ClickHouse.DropOnOverflow,
AsyncInsert: cfg.Outputs.ClickHouse.AsyncInsert,
TimeoutMs: cfg.Outputs.ClickHouse.TimeoutMs,
})
if err != nil {
logger.Error("Failed to create ClickHouse sink", err)
os.Exit(1)
}
clickHouseSink.SetLogger(logger)
sinks = append(sinks, clickHouseSink)
logger.Info(fmt.Sprintf("Configured ClickHouse sink: table=%s", cfg.Outputs.ClickHouse.Table))
}
if cfg.Outputs.Stdout.Enabled {
stdoutSink := stdout.NewStdoutSink(stdout.Config{Enabled: true})
sinks = append(sinks, stdoutSink)
logger.Info("Configured stdout sink (operational logs on stderr)")
}
// Create multi-sink wrapper
multiSink := multi.NewMultiSink(sinks...)
// Create correlation service
correlationSvc := domain.NewCorrelationService(domain.CorrelationConfig{
TimeWindow: cfg.Correlation.GetTimeWindow(),
ApacheAlwaysEmit: cfg.Correlation.GetApacheAlwaysEmit(),
ApacheEmitDelayMs: cfg.Correlation.GetApacheEmitDelayMs(),
NetworkEmit: false,
MaxHTTPBufferSize: cfg.Correlation.GetMaxHTTPBufferSize(),
MaxNetworkBufferSize: cfg.Correlation.GetMaxNetworkBufferSize(),
NetworkTTLS: cfg.Correlation.GetNetworkTTLS(),
MatchingMode: cfg.Correlation.GetMatchingMode(),
ExcludeSourceIPs: cfg.Correlation.GetExcludeSourceIPs(),
IncludeDestPorts: cfg.Correlation.GetIncludeDestPorts(),
}, &domain.RealTimeProvider{})
// Set logger for correlation service
correlationSvc.SetLogger(logger.WithFields(map[string]any{"component": "correlation"}))
logger.Info(fmt.Sprintf("Correlation service initialized: time_window=%s, emit_orphans=%v, emit_delay_ms=%d",
cfg.Correlation.GetTimeWindow().String(),
cfg.Correlation.GetApacheAlwaysEmit(),
cfg.Correlation.GetApacheEmitDelayMs()))
// Start metrics server if enabled
var metricsServer *observability.MetricsServer
if cfg.Metrics.Enabled {
addr := cfg.Metrics.Addr
if addr == "" {
addr = ":8080" // Default address
}
var err error
metricsServer, err = observability.NewMetricsServer(addr, correlationSvc.GetMetricsSnapshot)
if err != nil {
logger.Error("Failed to create metrics server", err)
os.Exit(1)
}
if err := metricsServer.Start(); err != nil {
logger.Error("Failed to start metrics server", err)
os.Exit(1)
}
logger.Info(fmt.Sprintf("Metrics server started: addr=%s", metricsServer.Addr()))
logger.Info("Metrics endpoints: /metrics (JSON), /health")
}
// Create orchestrator
orchestrator := app.NewOrchestrator(app.OrchestratorConfig{
Sources: sources,
Sink: multiSink,
}, correlationSvc)
// Start the application
if err := orchestrator.Start(); err != nil {
logger.Error("Failed to start orchestrator", err)
os.Exit(1)
}
logger.Info("logcorrelator started successfully")
// Wait for shutdown signal
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
for {
sig := <-sigChan
if sig == syscall.SIGHUP {
// Reopen file sinks for log rotation
logger.Info("SIGHUP received, reopening file sinks...")
if err := multiSink.Reopen(); err != nil {
logger.Error("Error reopening file sinks", err)
} else {
logger.Info("File sinks reopened successfully")
}
continue
}
// Shutdown signal received
logger.Info(fmt.Sprintf("Shutdown signal received: %v", sig))
break
}
// Graceful shutdown
if err := orchestrator.Stop(); err != nil {
logger.Error("Error during shutdown", err)
}
// Stop metrics server
if metricsServer != nil {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := metricsServer.Stop(shutdownCtx); err != nil {
logger.Error("Error stopping metrics server", err)
}
}
logger.Info("logcorrelator stopped")
}

View File

@ -0,0 +1,92 @@
# logcorrelator configuration file
# Format: YAML
# Logging configuration
log:
level: INFO # DEBUG, INFO, WARN, ERROR
inputs:
unix_sockets:
- name: http
source_type: A
path: /var/run/logcorrelator/http.socket
format: json
socket_permissions: "0666" # world read/write
- name: network
source_type: B
path: /var/run/logcorrelator/network.socket
format: json
socket_permissions: "0666"
outputs:
file:
enabled: true
path: /var/log/logcorrelator/correlated.log
clickhouse:
enabled: false
dsn: clickhouse://user:pass@localhost:9000/db
table: correlated_logs_http_network
batch_size: 500
flush_interval_ms: 200
max_buffer_size: 5000
drop_on_overflow: true
async_insert: true
timeout_ms: 1000
stdout:
enabled: false
correlation:
# Time window for correlation (A and B must be within this window)
# Increased to 10s to support HTTP Keep-Alive scenarios
time_window:
value: 10
unit: s
# Orphan policy: what to do when no match is found
orphan_policy:
apache_always_emit: true # Always emit A events, even without B match
apache_emit_delay_ms: 500 # Wait 500ms before emitting as orphan (allows B to arrive)
network_emit: false # Never emit B events alone
# Matching mode: one_to_one or one_to_many (Keep-Alive)
matching:
mode: one_to_many
# Buffer limits (max events in memory)
buffers:
max_http_items: 10000
max_network_items: 20000
# TTL for network events (source B)
# Increased to 120s to support long-lived HTTP Keep-Alive sessions
ttl:
network_ttl_s: 120
# Exclude specific source IPs or CIDR ranges from correlation
# Events from these IPs will be silently dropped (not correlated, not emitted)
# Useful for excluding health checks, internal traffic, or known bad actors
exclude_source_ips:
- 10.0.0.1 # Single IP
- 192.168.1.100 # Another single IP
- 172.16.0.0/12 # CIDR range (private network)
- 10.10.10.0/24 # Another CIDR range
# Restrict correlation to specific destination ports (optional)
# If non-empty, only events whose dst_port matches one of these values will be correlated
# Events on other ports are silently ignored (not correlated, not emitted as orphans)
# Useful to focus on HTTP/HTTPS traffic only and ignore unrelated connections
# include_dest_ports:
# - 80 # HTTP
# - 443 # HTTPS
# - 8080 # HTTP alt
# - 8443 # HTTPS alt
# Metrics server configuration (optional, for debugging/monitoring)
metrics:
enabled: false
addr: ":8080" # Address to listen on (e.g., ":8080", "localhost:8080")
# Endpoints:
# GET /metrics - Returns correlation metrics as JSON
# GET /health - Health check endpoint

View File

@ -0,0 +1,224 @@
# Architecture de détection — logcorrelator
## Vue d'ensemble
Le système de détection est composé de **trois couches** qui s'enchaînent en pipeline :
```
Trafic HTTP/TLS capturé
┌───────────────────┐
│ ClickHouse │ Stockage, agrégation, vues heuristiques
│ (SQL pipeline) │
└────────┬──────────┘
┌───────────────────┐
│ bot_detector.py │ Modèle IA (Isolation Forest, cycle 5 min)
│ (Python / ML) │
└────────┬──────────┘
┌───────────────────┐
│ ml_detected_ │ Table de résultats (ReplacingMergeTree)
│ anomalies │
└───────────────────┘
```
---
## 1. Ingestion des logs (`http_logs_raw` → `http_logs`)
Les logs bruts arrivent en JSON dans la table `http_logs_raw`. Une **vue matérialisée** (`mv_http_logs`) les parse en temps réel et alimente la table `http_logs`, qui contient les champs structurés suivants :
| Catégorie | Champs clés |
|---|---|
| Réseau | `src_ip`, `src_port`, `dst_ip`, `dst_port` |
| Enrichissement | `src_asn`, `src_country_code`, `src_as_name` (via dictionnaire IPLocate) |
| HTTP | `method`, `host`, `path`, `query`, `http_version` |
| Corrélation | `correlated`, `orphan_side`, `conn_id`, `keepalives` |
| Métadonnées IP | `ip_meta_ttl`, `ip_meta_id`, `ip_meta_df`, `ip_meta_total_length` |
| Métadonnées TCP | `tcp_meta_window_size`, `tcp_meta_mss`, `tcp_meta_window_scale`, `tcp_meta_options` |
| TLS / Fingerprint | `tls_version`, `tls_sni`, `tls_alpn`, `ja3`, `ja3_hash`, `ja4` |
| En-têtes HTTP | `header_user_agent`, `header_sec_ch_ua*`, `header_sec_fetch_*`, … |
L'enrichissement IP est réalisé via le dictionnaire `dict_iplocate_asn` (fichier CSV chargé en mémoire, rechargé toutes les 1-2 heures).
---
## 2. Agrégation comportementale (fenêtre horaire)
Deux tables d'agrégation `AggregatingMergeTree` sont alimentées en continu par des vues matérialisées.
### 2.1 `agg_host_ip_ja4_1h` — Comportement réseau & applicatif
Agrège par triplet **(window_start, src_ip, ja4, host)** toutes les heures :
| Métrique agrégée | Signification |
|---|---|
| `hits` | Nombre total de requêtes |
| `count_post` | Requêtes POST |
| `uniq_paths` | Chemins distincts visités |
| `uniq_query_params` | Paramètres de query distincts |
| `unique_src_ports` | Ports sources distincts |
| `unique_conn_id` | Connexions TCP distinctes |
| `max_keepalives` | Réutilisation maximale d'une connexion |
| `orphan_count` | Requêtes sans corrélation TCP complète |
| `ip_id_zero_count` | Paquets avec IP ID = 0 (spoofing potentiel) |
| `tcp_fp_raw` | Hash de l'empreinte TCP (window, MSS, scale, options) |
| `tcp_jitter_variance` | Variance du délai SYN→ClientHello (jitter TLS) |
| `total_ip_length_var` | Variance de la taille des paquets IP |
| `mss_1460_count` | Requêtes avec MSS = 1460 (signature Ethernet/desktop) |
### 2.2 `agg_header_fingerprint_1h` — Empreinte des en-têtes HTTP
Agrège par **(window_start, src_ip)** :
| Métrique | Signification |
|---|---|
| `header_order_hash` | Hash de l'ordre des en-têtes (fingerprint JA4H) |
| `header_count` | Nombre d'en-têtes distincts |
| `has_accept_language` | Présence de `Accept-Language` |
| `has_cookie` | Présence de `Cookie` |
| `has_referer` | Présence de `Referer` |
| `modern_browser_score` | Score 0/50/100 selon présence UA et `Sec-CH-UA` |
| `ua_ch_mismatch` | Incohérence entre `User-Agent` et `Sec-CH-UA-Platform` |
| `sec_fetch_mode/dest` | Contexte de navigation déclaré |
---
## 3. Exclusions (listes blanches)
Avant toute analyse, deux tables permettent d'**exclure les robots légitimes** connus :
- `bot_ip` (fichier `bot_ip.csv`) — IPs à ignorer (crawlers, monitoring…)
- `bot_ja4` (fichier `bot_ja4.csv`) — Fingerprints JA4 à ignorer
- `ref_bot_networks` — Réseaux CIDR IPv4/IPv6 catégorisés (légitimes ou malveillants)
Ces exclusions sont appliquées dans la vue `view_ai_features_1h`.
---
## 4. Vue IA : `view_ai_features_1h`
Cette vue consolidée **sur 24 heures glissantes** calcule les **28 features** passées au modèle ML. Elle joint les deux tables d'agrégation et dérive les métriques suivantes :
| Feature | Calcul | Signal détecté |
|---|---|---|
| `hit_velocity` | `hits / durée_en_secondes` | Volume de requêtes anormalement élevé |
| `fuzzing_index` | `uniq_query_params / uniq_paths` | Exploration paramétrique (fuzzing) |
| `post_ratio` | `count_post / hits` | Soumission de formulaires en masse |
| `port_exhaustion_ratio` | `unique_src_ports / hits` | Rotation de ports (scan) |
| `orphan_ratio` | `orphan_count / hits` | Requêtes sans handshake complet |
| `ip_id_zero_ratio` | `ip_id_zero_count / hits` | Spoofing d'adresse IP |
| `multiplexing_efficiency` | `hits / unique_conn_id` | Réutilisation des connexions (H2/H3) |
| `true_window_size` | `tcp_win * 2^tcp_scale` | Taille réelle de la fenêtre TCP |
| `window_mss_ratio` | `tcp_win / tcp_mss` | Cohérence TCP stack |
| `tcp_jitter_variance` | Variance SYN→ClientHello | Irrégularité du timing TLS |
| `alpn_http_mismatch` | ALPN=h2 mais HTTP/1.1 | Négociation TLS mensongère |
| `is_alpn_missing` | ALPN absent ou `00` | Client non-standard |
| `sni_host_mismatch` | SNI ≠ Host header | Proxy transparent / bot |
| `mss_mobile_mismatch` | MSS=1460 + score navigateur élevé | Client mobile simulé depuis desktop |
| `is_fake_navigation` | `sec_fetch_mode=navigate` mais `sec_fetch_dest≠document` | Navigation simulée |
| `tcp_shared_count` | Nb d'IPs partageant la même empreinte TCP | Infrastructure partagée / botnet |
| `header_order_shared_count` | Nb d'IPs partageant le même ordre d'en-têtes | Outil automatisé commun |
---
## 5. Modèle IA : Isolation Forest (`bot_detector.py`)
### Cycle d'exécution
Le service tourne en boucle avec un **cycle de 5 minutes** :
```
fetch_and_analyze()
├─ Requête SELECT * FROM view_ai_features_1h
├─ Nettoyage des données (fillna)
├─ Dual-Model routing :
│ ├─ [Complet] correlated=1 → 23 features (réseau + TLS + headers)
│ └─ [Applicatif] correlated=0 → 19 features (headers + comportement)
└─ INSERT INTO ml_detected_anomalies
```
### Paramétrage du modèle
| Paramètre | Valeur | Signification |
|---|---|---|
| `n_estimators` | 200 | Nombre d'arbres d'isolation |
| `contamination` | 0.2% | Proportion de bots attendue dans le trafic |
| `seuil de score` | < -0.05 | Score en dessous duquel une session est marquée anomalie |
| `volume minimum` | 500 sessions | En dessous, le modèle est ignoré (trop peu de données) |
### Dual-Model routing
Le trafic est **séparé en deux populations** selon le champ `correlated` :
- **Modèle Complet** (`correlated=1`) : la corrélation TCPHTTP est disponible les features réseau (TTL, jitter TLS, ALPN, SNI) sont fiables et ajoutées à l'analyse.
- **Modèle Applicatif** (`correlated=0`) : seule la couche HTTP est disponible l'analyse se concentre sur le comportement applicatif (headers, paths, POST ratio…).
---
## 6. Vues heuristiques statiques
En parallèle du modèle IA, cinq vues SQL fournissent des **détections déterministes** sans ML, sur fenêtre 24h :
| Vue | Règle de détection |
|---|---|
| `view_host_ip_ja4_rotation` | IP avec 5 fingerprints JA4 distincts et > 100 requêtes → rotation d'identité |
| `view_host_ja4_anomalies` | Fingerprint JA4 vu depuis ≥ 20 IPs sur ≥ 3 hôtes → outil de scan distribué |
| `view_form_bruteforce_detected` | ≥ 10 query params distincts et ≥ 20 hits → brute-force de formulaire |
| `view_alpn_mismatch_detected` | HTTP/1.1 avec ALPN h2 ou h3 et ≥ 10 hits → négociation TLS frauduleuse |
| `view_tcp_spoofing_detected` | TTL ≤ 64 avec User-Agent Windows ou iPhone → empreinte OS incohérente |
---
## 7. Résultats : `ml_detected_anomalies`
Les anomalies détectées sont stockées dans une table `ReplacingMergeTree(detected_at)` avec **TTL 30 jours**. La clé d'ordre `(src_ip, ja4, host)` garantit que chaque triplet ne conserve que la **détection la plus récente** (dédoublonnage automatique).
Chaque enregistrement contient :
- Les scores et features ayant conduit à la détection
- Le champ `reason` : texte lisible avec score, vélocité, et indice de fuzzing
- Le champ `is_headless` : déduit de l'incohérence `sec_fetch_mode`
---
## 8. Schéma de flux complet
```
┌─────────────────────────────────────┐
│ http_logs_raw (JSON) │
└──────────────┬──────────────────────┘
│ mv_http_logs (MV)
┌─────────────────────────────────────┐
│ http_logs (parsée) │
└────────┬──────────────┬─────────────┘
│ │
mv_agg_host_ip_ja4 │ │ mv_agg_header_fingerprint
▼ ▼
┌──────────────────┐ ┌──────────────────────────┐
│ agg_host_ip_ja4 │ │ agg_header_fingerprint │
│ _1h │ │ _1h │
└────────┬─────────┘ └──────────┬──────────────┘
│ │
└──────────┬─────────────┘
│ view_ai_features_1h (JOIN + calculs)
┌─────────────────────────────────────┐
│ bot_detector.py (Isolation Forest) │
│ Cycle : 5 min | Fenêtre : 24h │
└──────────────┬──────────────────────┘
┌─────────────────────────────────────┐
│ ml_detected_anomalies │
│ (ReplacingMergeTree, TTL 30j) │
└─────────────────────────────────────┘
```

View File

@ -0,0 +1,29 @@
module github.com/antitbone/ja4/correlator
go 1.21
require (
github.com/ClickHouse/clickhouse-go/v2 v2.23.0
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/ClickHouse/ch-go v0.61.5 // indirect
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/go-faster/city v1.0.1 // indirect
github.com/go-faster/errors v0.7.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.7 // indirect
github.com/paulmach/orb v0.11.1 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/segmentio/asm v1.2.0 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
golang.org/x/sys v0.18.0 // indirect
)
require github.com/antitbone/ja4/ja4common v0.1.0
replace github.com/antitbone/ja4/ja4common => ../../shared/go/ja4common

110
services/correlator/go.sum Normal file
View File

@ -0,0 +1,110 @@
github.com/ClickHouse/ch-go v0.61.5 h1:zwR8QbYI0tsMiEcze/uIMK+Tz1D3XZXLdNrlaOpeEI4=
github.com/ClickHouse/ch-go v0.61.5/go.mod h1:s1LJW/F/LcFs5HJnuogFMta50kKDO0lf9zzfrbl0RQg=
github.com/ClickHouse/clickhouse-go/v2 v2.23.0 h1:srmRrkS0BR8gEut87u8jpcZ7geOob6nGj9ifrb+aKmg=
github.com/ClickHouse/clickhouse-go/v2 v2.23.0/go.mod h1:tBhdF3f3RdP7sS59+oBAtTyhWpy0024ZxDMhgxra0QE=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw=
github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw=
github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg=
github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU=
github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU=
github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g=
go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=
go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=
go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -0,0 +1,111 @@
time
log_date
src_ip
- ip source de la connexion
src_port
- port source de la connexion
dst_ip
- ip de destination de la connexion
dst_port
- port de destination de la connexion
src_asn
- Numero d'AS de l'ip source
src_country_code
- Code Pays de l'ip source
src_as_name
- Nom de l'AS de l ip source
src_org
- Organisation de l AS source
src_domain
- domaine de l'AS de l ip source
method
- Methode HTTP [GET, POST, ... ]
scheme
- Type de connexion http [http, https]
host
- Hostname demandé dans l'url
path
- Path demandé dans l'url
query
- Query demandé dans l'url
http_version
- Version du protocol http utilisé
orphan_side
- Indique si le log HTTP a pu etre enrichi avec les informations ip_, tcp, ja3_ et ja4_
- "A" indique que seul le log HTTP est present, sans enrichissement
correlated
- l'algorithm de correlation log http + parametres tcp a il réussi (tcp + ja4/3)
keepalives
- Numero de desquance dans une connexion http avec keepalive.
a_timestamp
b_timestamp
conn_id
ip_meta_df
- Flag dont fragement
ip_meta_id
- id du packet ip
ip_meta_total_length
- Taille des metadata dans pe packet ip
ip_meta_ttl
- TTL du packet ip vu par le serveur destinataire du packet
tcp_meta_options
- options du packet TCP vu par le serveur destinataire du packet
tcp_meta_window_size
- TCP window size vu par le serveur destinataire du packet
tcp_meta_mss
- TCP mss vu par le serveur destinataire du packet
tcp_meta_window_scale
- TCP windows scale vu par le serveur destinataire du packet
syn_to_clienthello_ms
- durée en ms entre le 1er packet SYN et le ClienHello du TLS
tls_version
- Version de TLS negocié avec le serveur destinataire du packet
tls_sni
- SNI, nom de domaine demandé pour le cerificat TLS
tls_alpn
- ALPN annoncé lors du TLS
ja3
- liste des agos utiliés pour la signature ja3
ja3_hash
- hash ja3
ja4
- hash ja4
client_headers
- liste des headers envoyés par le client http sous forme de liste Header,Header2,Header3,...
header_user_agent
- Header HTTP User-Agent
header_accept
- Header HTTP Accept
header_accept_encoding
- Header HTTP Accept-Encoding
header_accept_language
- Header HTTP Accept-Language
header_content_type
- Header Content-Type
header_x_request_id
- Header X-Request-ID
header_x_trace_id
- Header X-Trace-ID
header_x_forwarded_for
- Header X-Forwarded-For
header_sec_ch_ua
- Header Sec-Ch-UA
header_sec_ch_ua_mobile
- Header -Sec-Ch-UA-Mobile
header_sec_ch_ua_platform
- Header Sec-Ch-UA-Plateform
header_sec_fetch_dest
- Header -Sec-Fetch-Dest
header_sec_fetch_mode
- Header Sec-Fetch-Mode
header_sec_fetch_site
- Header Sec-Fetch-Site

View File

@ -0,0 +1,30 @@
1. Incohérences de Signatures (Spoofing)
User-Agent vs TLS : Le header_user_agent prétend être un navigateur (Chrome/Safari) mais le ja3/ja4 correspond à un outil de script.
User-Agent vs Headers modernes : Le header_user_agent indique un navigateur récent, mais les headers header_sec_ch_ua_* sont vides ou absents de client_headers.
User-Agent vs ALPN : Le navigateur déclaré ne correspond pas au protocole négocié dans tls_alpn (ex: Chrome sans h2).
OS vs TTL TCP : L'OS déclaré dans le header_user_agent (ex: Windows) contredit la valeur de ip_meta_ttl (ex: 64, typique de Linux).
Host vs SNI : Le nom de domaine dans le header host ne correspond pas au tls_sni demandé lors du handshake TLS.
2. Anomalies de Headers (HTTP Fingerprinting)
Empreinte d'ordre (Fingerprint) : Apparition soudaine d'une disposition de client_headers (ordre exact) très rare, générant beaucoup de trafic.
Pauvreté des headers : Le nombre total de headers dans client_headers est anormalement bas (ex: < 5), typique des scripts basiques.
Absence de headers vitaux : Le trafic prétend être humain mais n'envoie pas header_accept_language ou header_accept_encoding.
Combinaison fatale : Le croisement d'un ja4 spécifique avec un ordre de client_headers inédit (détection de bots modifiant leur TLS mais trahis par l'applicatif).
3. Anomalies Réseau et TCP (Couche 3 & 4)
Mécanique TCP de masse : Une même combinaison (tcp_meta_window_size, tcp_meta_window_scale, tcp_meta_mss) vue sur des milliers d'IP différentes.
Handshake robotique : Un délai syn_to_clienthello_ms anormalement constant (variance quasi nulle) sur un grand nombre de connexions, typique d'un bot en datacenter.
Options TCP atypiques : Des paramètres tcp_meta_options inhabituels pour le trafic web classique de tes vrais utilisateurs.
4. Anomalies Comportementales et Volumétriques (Côté Requête)
Rafale de requêtes (Spike) : Volume d'appels (count) par src_ip ou par ja4 dépassant drastiquement le 99ème percentile historique sur 5 minutes.
Scraping furtif distribué : Un même ja4 (non standard) utilisé par des centaines de src_ip différentes, chacune faisant très peu de requêtes.
Balayage aveugle (Scanner) : Un volume anormal de path uniques (ou path + query) visités par une même IP ou un même ja4 en quelques minutes (remplace la détection des erreurs 404).
Acharnement sur cible (Brute force aveugle) : Une concentration extrême de requêtes ciblant uniquement les path sensibles (login, API, password-reset) sans navigation normale sur le reste du site (remplace la détection des 401/403).
Méthodes suspectes : Utilisation massive ou inhabituelle de method non standards (PUT, DELETE, OPTIONS, TRACE) par rapport à la baseline.
Payloads suspects : Présence de patterns d'injection ou de caractères très inhabituels dans query ou path (longueur extrême, encodages multiples).
Bot "Low and Slow" : IP ou ja4 qui passe sous les radars sur 5 minutes, mais dont le volume cumulé sur 24h ou 7 jours est mathématiquement improbable pour un humain.

View File

@ -0,0 +1,521 @@
# 🛡️ Manuel de Référence Technique : Moteur de Détection Antispam & Bot
Ce document détaille les algorithmes de détection implémentés dans les vues ClickHouse pour la plateforme.
---
## 1. Analyse de la Couche Transport (L4) : La "Trace Physique"
Avant même d'analyser l'URL, le moteur inspecte la manière dont la connexion a été établie. C'est la couche la plus difficile à falsifier pour un attaquant.
### A. Fingerprint de la Pile TCP (`tcp_fingerprint`)
* **Fonctionnement :** Nous utilisons `cityHash64` pour créer un identifiant unique basé sur trois paramètres immuables du handshake : le **MSS** (Maximum Segment Size), la **Window Size** et le **Window Scale**.
* **Ce que ça détecte :** L'unicité logicielle. Un bot tournant sur une image Alpine Linux aura une signature TCP différente d'un utilisateur sur iOS 17 ou Windows 11.
* **Détection de botnet :** Si 500 IPs différentes partagent exactement le même `tcp_fingerprint` ET le même `ja4`, il y a une probabilité de 99% qu'il s'agisse d'un cluster de bots clonés.
### B. Analyse de la gigue (Jitter) et Handshake
* **Fonctionnement :** On calcule la variance (`varPop`) du délai entre le `SYN` et le `ClientHello` TLS.
* **Ce que ça détecte :** La stabilité robotique.
* **Humain :** Latence variable (4G, Wi-Fi, mouvements). La variance est élevée.
* **Bot Datacenter :** Latence ultra-stable (fibre optique dédiée). Une variance proche de 0 indique une connexion automatisée depuis une infrastructure cloud.
---
## 2. Analyse de la Session (L5) : Le "Passeport TLS"
Le handshake TLS est une mine d'or pour identifier la bibliothèque logicielle (OpenSSL, Go-TLS, etc.).
### A. Incohérence UA vs JA4
* **Fonctionnement :** Le moteur croise le `header_user_agent` (déclaratif) avec le `ja4` (structurel).
* **Ce que ça détecte :** Le **Spoofing de Browser**. Un script Python peut facilement écrire `User-Agent: Mozilla/5.0...Chrome/120`, mais il ne peut pas simuler l'ordre exact des extensions TLS et des algorithmes de chiffrement d'un vrai Chrome sans une ingénierie complexe (comme `utls`).
* **Logique de score :** Si UA = Chrome mais JA4 != Signature_Chrome -> **+50 points de risque**.
### B. Discordance Host vs SNI
* **Fonctionnement :** Comparaison entre le champ `tls_sni` (négocié en clair lors du handshake) et le header `Host` (envoyé plus tard dans la requête chiffrée).
* **Ce que ça détecte :** Le **Domain Fronting** ou les attaques par tunnel. Un bot peut demander un certificat pour `domaine-innocent.com` (SNI) mais tenter d'attaquer `api-critique.com` (Host).
---
## 3. Analyse Applicative (L7) : Le "Comportement HTTP"
Une fois le tunnel établi, on analyse la structure de la requête HTTP.
### A. Empreinte d'ordre des Headers (`http_fp`)
* **Fonctionnement :** Nous hashons la liste ordonnée des clés de headers (`Accept`, `User-Agent`, `Connection`, etc.).
* **Ce que ça détecte :** La signature du moteur de rendu. Chaque navigateur (Firefox, Safari, Chromium) a un ordre immuable pour envoyer ses headers.
* **Détection :** Si un client envoie les headers dans un ordre inhabituel ou minimaliste (pauvreté des headers < 6), il est marqué comme suspect.
### B. Analyse des Payloads et Entropie
* **Fonctionnement :** Recherche de patterns via regex dans `query` et `path` (détection SQLi, XSS, Path Traversal).
* **Complexité :** Nous détectons les encodages multiples (ex: `%2520`) qui tentent de tromper les pare-feux simples.
---
## 4. Corrélation Temporelle & Baseline : Le "Voisinage Statistique"
Le score final dépend du passé de la signature TLS.
### A. Le Malus de Nouveauté (`agg_novelty`)
* **Logique :** Une signature (JA4 + FP) vue pour la première fois aujourd'hui est "froide".
* **Traitement :** On applique un malus si `first_seen` date de moins de 2 heures. Un botnet qui vient de lancer une campagne de rotation de signatures sera immédiatement pénalisé par son manque d'historique.
### B. Le Dépassement de Baseline (`tbl_baseline_ja4_7d`)
* **Fonctionnement :** On compare les `hits` actuels au 99ème percentile (`p99`) historique de cette signature précise.
* **Exemple :** Si le JA4 de "Chrome 122" fait habituellement 10 requêtes/min/IP sur votre site, et qu'une IP en fait soudainement 300, le score explose même si la requête est techniquement parfaite.
---
## 5. Synthèse du Scoring (Le Verdict)
| Algorithme | Signal | Impact Score |
| :--- | :--- | :--- |
| **Fingerprint Mismatch** | UA vs TLS (Spoofing) | **Haut (50)** |
| **L4 Anomaly** | Variance latence < 0.5ms | **Moyen (30)** |
| **Path Sensitivity** | Hit sur `/admin` ou `/config` | **Haut (40)** |
| **Payload Security** | Caractères d'injection (SQL/XSS) | **Critique (60)** |
| **Mass Distribution** | 1 JA4 sur > 50 IPs différentes | **Moyen (30)** |
---
## 6. Identification des Hosts par IP et JA4 (sql/hosts.sql)
Cette section détaille les vues d'agrégation et de détection pour identifier quels hosts sont associés à quelles signatures (IP + JA4).
### A. Agrégats de Base
| Table | Granularité | Description |
|-------|-------------|-------------|
| `agg_host_ip_ja4_1h` | heure | Hits, paths uniques, query params, méthodes par (IP, JA4, host) |
| `agg_host_ip_ja4_24h` | jour | Rollup quotidien pour historique long terme |
### B. Vues d'Identification
**`view_host_identification`** - Top hosts par signature
```sql
-- Quel host est associé à cette IP/JA4 ?
SELECT src_ip, ja4, host, total_hits, unique_paths, user_agent
FROM mabase_prod.view_host_identification
WHERE src_ip = '1.2.3.4'
ORDER BY total_hits DESC;
```
**`view_host_ja4_anomalies`** - JA4 partagé par plusieurs hosts (botnet)
```sql
-- Ce JA4 est-il utilisé par plusieurs hosts différents ?
SELECT ja4, hosts, unique_hosts, unique_ips
FROM mabase_prod.view_host_ja4_anomalies
HAVING unique_hosts >= 3;
-- Interprétation : 1 JA4 sur 3+ hosts = botnet cloné probable
```
**`view_host_ip_ja4_rotation`** - IP avec rotation de fingerprints
```sql
-- Cette IP change-t-elle de JA4 fréquemment ?
SELECT src_ip, ja4s, unique_ja4s
FROM mabase_prod.view_host_ip_ja4_rotation
HAVING unique_ja4s >= 5;
-- Interprétation : 1 IP avec 5+ JA4 différents = fingerprint spoofing
```
---
## 7. Détection de Brute Force (sql/hosts.sql)
### A. Brute Force sur POST (endpoints sensibles)
**Table :** `agg_bruteforce_post_5m` - Fenêtres de 5 minutes
**Vue :** `view_bruteforce_post_detected`
```sql
-- Détecter les tentatives de brute force sur les login
SELECT window, src_ip, ja4, host, path, attempts, attempts_per_minute
FROM mabase_prod.view_bruteforce_post_detected
WHERE host = 'api.example.com'
ORDER BY attempts DESC;
-- Threshold : ≥10 POST en 5 minutes sur endpoints sensibles
-- Endpoints ciblés : login, auth, signin, password, admin, wp-login, etc.
```
### B. Brute Force sur Formulaire (Query params variables)
**Table :** `agg_form_bruteforce_5m`
**Vue :** `view_form_bruteforce_detected`
```sql
-- Détecter les requêtes avec query params hautement variables
SELECT window, src_ip, ja4, host, path, requests, unique_query_patterns
FROM mabase_prod.view_form_bruteforce_detected
HAVING requests >= 20 AND unique_query_patterns >= 10;
-- Interprétation : 20+ requêtes avec 10+ patterns query différents
-- = tentative de fuzzing ou brute force sur paramètres
```
---
## 8. Header Fingerprinting (sql/hosts.sql)
Le champ `client_headers` contient la liste comma-separated des headers présents.
Exemple : `"Accept,Accept-Encoding,Sec-CH-UA,Sec-Fetch-Dest,User-Agent"`
### A. Signature par Ordre de Headers
**Table :** `agg_header_fingerprint_1h`
| Champ | Description |
|-------|-------------|
| `header_count` | Nombre total de headers (virgules + 1) |
| `has_*` | Flags pour chaque header moderne (Sec-CH-UA, Sec-Fetch-*, etc.) |
| `header_order_hash` | MD5(client_headers) = signature unique de l'ordre |
| `modern_browser_score` | Score 0-100 basé sur les headers modernes présents |
### B. Vues de Détection
**`view_header_missing_modern_headers`** - Headers modernes manquants
```sql
-- Navigateurs "modernes" avec headers manquants
SELECT src_ip, ja4, header_user_agent, modern_browser_score, header_count
FROM mabase_prod.view_header_missing_modern_headers
WHERE header_user_agent ILIKE '%Chrome%';
-- Threshold : score < 70 pour Chrome/Firefox = suspect
-- Un vrai Chrome envoie automatiquement Sec-CH-UA, Sec-Fetch-*, etc.
```
**`view_header_ua_order_mismatch`** - Spoofing détecté
```sql
-- Même User-Agent avec ordre de headers différent
SELECT header_user_agent, ja4, unique_hashes, unique_ips
FROM mabase_prod.view_header_ua_order_mismatch
HAVING unique_hashes > 1;
-- Interprétation : 1 UA avec 2+ ordres de headers = spoofing ou outil custom
```
**`view_header_minimalist_count`** - Bot minimaliste
```sql
-- Clients avec trop peu de headers
SELECT src_ip, ja4, header_count, header_user_agent
FROM mabase_prod.view_header_minimalist_count
WHERE header_count < 6;
-- Threshold : < 6 headers = bot scripté (curl, Python requests, etc.)
```
**`view_header_sec_ch_missing`** - Incohérence Chrome
```sql
-- Chrome sans Sec-CH-UA (impossible pour un vrai Chrome)
SELECT src_ip, ja4, header_user_agent
FROM mabase_prod.view_header_sec_ch_missing
WHERE header_user_agent ILIKE '%Chrome/%';
```
**`view_header_known_bot_signature`** - Signature botnet
```sql
-- Même ordre de headers sur 10+ IPs différentes
SELECT header_order_hash, header_user_agent, unique_ips, total_hits
FROM mabase_prod.view_header_known_bot_signature
HAVING unique_ips >= 10;
-- Interprétation : 1 signature sur 10+ IPs = cluster de bots clonés
```
---
## 9. ALPN Mismatch Detection (sql/hosts.sql)
### Principe
ALPN (Application-Layer Protocol Negotiation) est une extension TLS qui négocie le protocole HTTP **avant** la requête.
| ALPN déclaré | HTTP réel | Interprétation |
|--------------|-----------|----------------|
| `h2` | `HTTP/2` | ✅ Normal |
| `h2` | `HTTP/1.1` | ❌ Bot mal configuré |
| `http/1.1` | `HTTP/1.1` | ✅ Normal |
### Vue de Détection
**`view_alpn_mismatch_detected`**
```sql
-- Clients déclarant h2 mais parlant HTTP/1.1
SELECT src_ip, ja4, declared_alpn, actual_http_version, mismatches, mismatch_pct
FROM mabase_prod.view_alpn_mismatch_detected
HAVING mismatch_pct >= 80;
-- Threshold : ≥5 requêtes avec ≥80% d'incohérence
-- Cause : curl mal configuré, Python requests, bots spoofant ALPN
```
---
## 10. Rate Limiting & Burst Detection (sql/hosts.sql)
### A. Rate Limiting (1 minute)
**Table :** `agg_rate_limit_1m`
**Vue :** `view_rate_limit_exceeded`
```sql
-- IPs dépassant 50 requêtes/minute
SELECT minute, src_ip, ja4, requests_per_min, unique_paths
FROM mabase_prod.view_rate_limit_exceeded
ORDER BY requests_per_min DESC;
-- Threshold : > 50 req/min = trafic automatisé
-- Un humain ne peut pas soutenir 50+ req/min de manière cohérente
```
### B. Burst Detection (10 secondes)
**Table :** `agg_burst_10s`
**Vue :** `view_burst_detected`
```sql
-- Pics soudains de trafic
SELECT window, src_ip, ja4, burst_count
FROM mabase_prod.view_burst_detected
HAVING burst_count > 20;
-- Threshold : > 20 requêtes en 10 secondes = burst suspect
-- Utile pour détecter les attaques par vagues
```
---
## 11. Path Enumeration / Scanning (sql/hosts.sql)
### Vue de Détection
**`view_path_scan_detected`**
```sql
-- Détection de scanning de paths sensibles
SELECT window, src_ip, ja4, host, sensitive_hits, sensitive_ratio
FROM mabase_prod.view_path_scan_detected
HAVING sensitive_hits >= 5;
-- Paths surveillés : admin, backup, config, .env, .git, wp-admin,
-- phpinfo, test, debug, log, sql, dump, passwd, shadow, htaccess, etc.
-- Threshold : ≥5 paths sensibles en 5 minutes = scanning
```
### Exemple de Résultat
| src_ip | ja4 | host | sensitive_hits | sensitive_ratio |
|--------|-----|------|----------------|-----------------|
| 1.2.3.4 | t13d... | api.example.com | 47 | 94.00 |
| 5.6.7.8 | t13d... | www.example.com | 12 | 80.00 |
**Interprétation :** Ces IPs testent systématiquement les paths sensibles = outils comme Nikto, Dirb, Gobuster.
---
## 12. Payload Attack Detection (sql/hosts.sql)
### A. Types d'Attaques Détectées
| Type | Patterns Détectés |
|------|-------------------|
| **SQL Injection** | `UNION SELECT`, `OR 1=1`, `DROP TABLE`, `; --`, `/* */`, `WAITFOR DELAY`, `SLEEP()` |
| **XSS** | `<script>`, `javascript:`, `onerror=`, `onload=`, `<img src=data:`, `<svg onload>` |
| **Path Traversal** | `../`, `..\\`, `%2e%2e%2f`, `%252e%252e`, `%%32%65%%32%65` |
### Vue de Détection
**`view_payload_attacks_detected`**
```sql
-- Toutes les tentatives d'injection
SELECT window, src_ip, ja4, host, path,
sqli_attempts, xss_attempts, traversal_attempts
FROM mabase_prod.view_payload_attacks_detected
ORDER BY sqli_attempts DESC, xss_attempts DESC, traversal_attempts DESC;
-- Threshold : ≥1 tentative = alerte (zero tolerance)
```
---
## 13. JA4 Botnet Detection (sql/hosts.sql)
### Principe
Un vrai navigateur a un fingerprint TLS unique. Un bot déployé sur 100 machines aura le **même JA4**.
### Vue de Détection
**`view_ja4_botnet_suspected`**
```sql
-- JA4 partagé par 20+ IPs différentes
SELECT ja4, ja3_hash, unique_ips, unique_asns, unique_countries, total_hits
FROM mabase_prod.view_ja4_botnet_suspected
HAVING unique_ips >= 20;
-- Threshold : ≥20 IPs avec le même JA4 = botnet cloné
```
### Exemple de Résultat
| ja4 | ja3_hash | unique_ips | unique_asns | unique_countries |
|-----|----------|------------|-------------|------------------|
| t13d1512... | a3b5c7... | 147 | 12 | 8 |
| t13d0918... | f1e2d3... | 52 | 3 | 2 |
**Interprétation :** 147 IPs différentes avec le même fingerprint = cluster de bots clonés.
---
## 14. Correlation Quality (sql/hosts.sql)
### Principe
Mesure le ratio d'événements non-corrélés (orphelins). Un trafic légitime a une bonne corrélation HTTP/TCP.
### Vue de Détection
**`view_high_orphan_ratio`**
```sql
-- Trafic avec >80% d'événements non-corrélés
SELECT hour, src_ip, ja4, host, correlated, orphans, orphan_pct
FROM mabase_prod.view_high_orphan_ratio
ORDER BY orphan_pct DESC;
-- Threshold : orphan_pct > 80% = trafic suspect
-- Peut indiquer du trafic généré artificiellement
```
---
## 15. Maintenance et Faux Positifs
### Exceptions Connues
| Source | Faux Positif | Solution |
|--------|--------------|----------|
| **Googlebot/Bingbot** | Scan agressif mais légitime | Filtrer par ASN + Reverse DNS |
| **Monitoring interne** | Rate limit élevé | Whitelist par IP/ASN |
| **CDN/Proxy** | JA4 partagé (clients derrière proxy) | Vérifier ASN (Cloudflare, Akamai) |
| **Navigateurs anciens** | Headers modernes manquants | Vérifier UA version |
### Reset des Scores
Les agrégats sont automatiquement purgés par TTL :
- `agg_*_1h` : TTL 7 jours
- `agg_*_5m` : TTL 1 jour
- `agg_*_1m` : TTL 1 jour
Un IP bloquée par erreur retrouvera un score normal après expiration du TTL.
---
## 16. Synthèse des Vues de Détection
| Vue | Détection | Threshold | Impact |
|-----|-----------|-----------|--------|
| `view_bruteforce_post_detected` | POST endpoints sensibles | ≥10 en 5min | 🔴 Haut |
| `view_form_bruteforce_detected` | Query params variables | ≥20 req, ≥10 patterns | 🔴 Haut |
| `view_header_missing_modern_headers` | Headers modernes manquants | score < 70 | 🔴 Haut |
| `view_header_ua_order_mismatch` | UA spoofing (ordre) | >1 hash | 🔴 Haut |
| `view_header_minimalist_count` | Bot minimaliste | < 6 headers | 🔴 Haut |
| `view_header_sec_ch_missing` | Chrome sans Sec-CH | absent | 🟡 Moyen |
| `view_header_known_bot_signature` | Signature connue (botnet) | 10+ IPs | 🔴 Haut |
| `view_alpn_mismatch_detected` | h2 déclaré, HTTP/1.1 parlé | 80% mismatch | 🔴 Haut |
| `view_rate_limit_exceeded` | Rate limit dépassé | >50 req/min | 🔴 Haut |
| `view_burst_detected` | Burst soudain | >20 req/10s | 🟡 Moyen |
| `view_path_scan_detected` | Scanning de paths | ≥5 sensibles | 🔴 Haut |
| `view_payload_attacks_detected` | Injections SQLi/XSS | ≥1 tentative | 🔴 Critique |
| `view_ja4_botnet_suspected` | JA4 partagé (botnet) | ≥20 IPs | 🔴 Haut |
| `view_high_orphan_ratio` | Trafic non-corrélé | >80% orphans | 🟡 Moyen |
| `view_host_ja4_anomalies` | JA4 sur plusieurs hosts | ≥3 hosts | 🟡 Moyen |
| `view_host_ip_ja4_rotation` | IP rotate JA4 | ≥5 JA4 | 🟡 Moyen |
---
## 17. Exemples de Requêtes d'Investigation
### Top 10 des IPs les plus suspectes (score cumulé)
```sql
WITH threats AS (
SELECT src_ip, ja4, 'bruteforce' AS type, sum(attempts) AS score
FROM mabase_prod.view_bruteforce_post_detected GROUP BY src_ip, ja4
UNION ALL
SELECT src_ip, ja4, 'path_scan', sum(sensitive_hits)
FROM mabase_prod.view_path_scan_detected GROUP BY src_ip, ja4
UNION ALL
SELECT src_ip, ja4, 'payload', sum(sqli_attempts + xss_attempts)
FROM mabase_prod.view_payload_attacks_detected GROUP BY src_ip, ja4
)
SELECT src_ip, ja4, sum(score) AS total_score, groupArray(type) AS threat_types
FROM threats
GROUP BY src_ip, ja4
ORDER BY total_score DESC
LIMIT 10;
```
### Historique d'une IP suspecte
```sql
SELECT
hour,
host,
countMerge(hits) AS requests,
uniqMerge(uniq_paths) AS unique_paths
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE src_ip = '1.2.3.4'
AND hour >= now() - INTERVAL 24 HOUR
GROUP BY hour, host
ORDER BY hour DESC;
```
### Corrélation JA4 → User-Agent → Hosts
```sql
SELECT
ja4,
any(first_ua) AS user_agent,
groupArray(DISTINCT host) AS hosts,
sum(countMerge(hits)) AS total_requests
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE hour >= now() - INTERVAL 1 HOUR
GROUP BY ja4
ORDER BY total_requests DESC
LIMIT 20;
```
---
## 18. Installation et Maintenance
### Installation
```bash
# Exécuter après init.sql
clickhouse-client --multiquery < sql/hosts.sql
```
### Vérification
```sql
-- Compter les enregistrements
SELECT count(*) FROM mabase_prod.agg_host_ip_ja4_1h;
SELECT count(*) FROM mabase_prod.agg_header_fingerprint_1h;
-- Tester les vues
SELECT * FROM mabase_prod.view_host_identification LIMIT 10;
SELECT * FROM mabase_prod.view_bruteforce_post_detected LIMIT 10;
SELECT * FROM mabase_prod.view_payload_attacks_detected LIMIT 10;
```
### Monitoring
```sql
-- Vues les plus actives (dernière heure)
SELECT
'bruteforce_post' AS view_name, count() AS alerts
FROM mabase_prod.view_bruteforce_post_detected
UNION ALL
SELECT 'path_scan', count() FROM mabase_prod.view_path_scan_detected
UNION ALL
SELECT 'payload_attacks', count() FROM mabase_prod.view_payload_attacks_detected
UNION ALL
SELECT 'ja4_botnet', count() FROM mabase_prod.view_ja4_botnet_suspected
ORDER BY alerts DESC;
```

View File

@ -0,0 +1,376 @@
package unixsocket
import (
"context"
"encoding/json"
"fmt"
"math"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/observability"
)
const (
// Maximum datagram size for JSON logs (64KB - Unix datagram limit)
MaxDatagramSize = 65535
// Rate limit: max events per second
MaxEventsPerSecond = 10000
)
// Config holds the Unix socket source configuration.
type Config struct {
Name string
Path string
SourceType string // "A" for Apache/HTTP, "B" for Network, "" for auto-detect
SocketPermissions os.FileMode
}
// UnixSocketSource reads JSON events from a Unix datagram socket.
type UnixSocketSource struct {
config Config
mu sync.Mutex
conn *net.UnixConn
done chan struct{}
wg sync.WaitGroup
stopOnce sync.Once
logger *observability.Logger
}
// NewUnixSocketSource creates a new Unix socket source.
func NewUnixSocketSource(config Config) *UnixSocketSource {
return &UnixSocketSource{
config: config,
done: make(chan struct{}),
logger: observability.NewLogger("unixsocket:" + config.Name),
}
}
// SetLogger sets the logger for the source (for debug mode).
func (s *UnixSocketSource) SetLogger(logger *observability.Logger) {
s.logger = logger.WithFields(map[string]any{"source": s.config.Name})
}
// Name returns the source name.
func (s *UnixSocketSource) Name() string {
return s.config.Name
}
// Start begins listening on the Unix datagram socket.
func (s *UnixSocketSource) Start(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) error {
if strings.TrimSpace(s.config.Path) == "" {
return fmt.Errorf("socket path cannot be empty")
}
// Create parent directory if it doesn't exist
socketDir := filepath.Dir(s.config.Path)
if err := os.MkdirAll(socketDir, 0755); err != nil {
return fmt.Errorf("failed to create socket directory %s: %w", socketDir, err)
}
// Remove existing socket file if present
if info, err := os.Stat(s.config.Path); err == nil {
if info.Mode()&os.ModeSocket != 0 {
if err := os.Remove(s.config.Path); err != nil {
return fmt.Errorf("failed to remove existing socket: %w", err)
}
} else {
return fmt.Errorf("path exists but is not a socket: %s", s.config.Path)
}
}
// Create Unix datagram socket
addr, err := net.ResolveUnixAddr("unixgram", s.config.Path)
if err != nil {
return fmt.Errorf("failed to resolve unix socket address: %w", err)
}
conn, err := net.ListenUnixgram("unixgram", addr)
if err != nil {
return fmt.Errorf("failed to create unix datagram socket: %w", err)
}
s.conn = conn
// Set permissions - fail if we can't
permissions := s.config.SocketPermissions
if permissions == 0 {
permissions = 0666 // default
}
if err := os.Chmod(s.config.Path, permissions); err != nil {
_ = conn.Close()
_ = os.Remove(s.config.Path)
return fmt.Errorf("failed to set socket permissions: %w", err)
}
s.wg.Add(1)
go func() {
defer s.wg.Done()
s.readDatagrams(ctx, eventChan)
}()
return nil
}
func (s *UnixSocketSource) readDatagrams(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) {
buf := make([]byte, MaxDatagramSize)
for {
select {
case <-s.done:
return
case <-ctx.Done():
return
default:
}
// Set read deadline to allow periodic context checks
_ = s.conn.SetReadDeadline(time.Now().Add(100 * time.Millisecond))
n, _, err := s.conn.ReadFromUnix(buf)
if err != nil {
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
// Read timeout, continue to check context
continue
}
// Other errors (e.g., closed socket)
select {
case <-s.done:
return
case <-ctx.Done():
return
default:
s.logger.Warnf("read error: %v", err)
continue
}
}
if n == 0 {
continue
}
data := make([]byte, n)
copy(data, buf[:n])
event, err := parseJSONEvent(data, s.config.SourceType)
if err != nil {
// Log parse errors with the raw data for debugging
s.logger.Warnf("parse error: %v | raw: %s", err, string(data))
continue
}
// Debug: log raw events with all key details
s.logger.Debugf("event received: source=%s src_ip=%s src_port=%d timestamp=%v raw_timestamp=%v",
event.Source, event.SrcIP, event.SrcPort, event.Timestamp, event.Raw["timestamp"])
select {
case eventChan <- event:
case <-ctx.Done():
return
}
}
}
func resolveSource(sourceType string, headers map[string]string) domain.EventSource {
switch strings.ToLower(strings.TrimSpace(sourceType)) {
case "a", "apache", "http":
return domain.SourceA
case "b", "network", "net":
return domain.SourceB
default:
// fallback compat
if len(headers) > 0 {
return domain.SourceA
}
return domain.SourceB
}
}
func parseJSONEvent(data []byte, sourceType string) (*domain.NormalizedEvent, error) {
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return nil, fmt.Errorf("invalid JSON: %w", err)
}
event := &domain.NormalizedEvent{
Raw: raw,
Extra: make(map[string]any),
Headers: make(map[string]string),
}
// Extract headers (header_* fields) first
for k, v := range raw {
if strings.HasPrefix(k, "header_") {
if sv, ok := v.(string); ok {
event.Headers[k[7:]] = sv
}
}
}
// Resolve source first (strict timestamp logic depends on source)
event.Source = resolveSource(sourceType, event.Headers)
// Extract and validate src_ip
if v, ok := getString(raw, "src_ip"); ok {
v = strings.TrimSpace(v)
if v == "" {
return nil, fmt.Errorf("src_ip cannot be empty")
}
event.SrcIP = v
} else {
return nil, fmt.Errorf("missing required field: src_ip")
}
// Extract and validate src_port
if v, ok := getInt(raw, "src_port"); ok {
if v < 1 || v > 65535 {
return nil, fmt.Errorf("src_port must be between 1 and 65535, got %d", v)
}
event.SrcPort = v
} else {
return nil, fmt.Errorf("missing required field: src_port")
}
// Extract dst_ip (optional)
if v, ok := getString(raw, "dst_ip"); ok {
event.DstIP = strings.TrimSpace(v)
}
// Extract dst_port (optional)
if v, ok := getInt(raw, "dst_port"); ok {
if v < 0 || v > 65535 {
return nil, fmt.Errorf("dst_port must be between 0 and 65535, got %d", v)
}
event.DstPort = v
}
// Extract timestamp based on source contract
switch event.Source {
case domain.SourceA:
ts, ok := getInt64(raw, "timestamp")
if !ok {
return nil, fmt.Errorf("missing required numeric field: timestamp for source A")
}
// Assume nanoseconds
event.Timestamp = time.Unix(0, ts)
case domain.SourceB:
// For network source, try to use event timestamp if available,
// fallback to reception time. This improves correlation accuracy
// when network logs include their own timestamp (e.g., from packet capture).
if ts, ok := getInt64(raw, "timestamp"); ok {
event.Timestamp = time.Unix(0, ts)
} else if timeStr, ok := getString(raw, "time"); ok {
// Try RFC3339 format
if t, err := time.Parse(time.RFC3339, timeStr); err == nil {
event.Timestamp = t
} else if t, err := time.Parse(time.RFC3339Nano, timeStr); err == nil {
event.Timestamp = t
} else {
event.Timestamp = time.Now()
}
} else {
event.Timestamp = time.Now()
}
default:
return nil, fmt.Errorf("unsupported source type: %s", event.Source)
}
// Extra fields
knownFields := map[string]bool{
"src_ip": true, "src_port": true, "dst_ip": true, "dst_port": true,
"timestamp": true, "time": true,
}
for k, v := range raw {
if knownFields[k] {
continue
}
if strings.HasPrefix(k, "header_") {
continue
}
event.Extra[k] = v
}
return event, nil
}
func getString(m map[string]any, key string) (string, bool) {
if v, ok := m[key]; ok {
if s, ok := v.(string); ok {
return s, true
}
}
return "", false
}
func getInt(m map[string]any, key string) (int, bool) {
if v, ok := m[key]; ok {
switch val := v.(type) {
case float64:
if math.Trunc(val) != val {
return 0, false
}
return int(val), true
case int:
return val, true
case int64:
return int(val), true
case string:
if i, err := strconv.Atoi(val); err == nil {
return i, true
}
}
}
return 0, false
}
func getInt64(m map[string]any, key string) (int64, bool) {
if v, ok := m[key]; ok {
switch val := v.(type) {
case float64:
if math.Trunc(val) != val {
return 0, false
}
return int64(val), true
case int:
return int64(val), true
case int64:
return val, true
case string:
if i, err := strconv.ParseInt(val, 10, 64); err == nil {
return i, true
}
}
}
return 0, false
}
// Stop gracefully stops the source.
func (s *UnixSocketSource) Stop() error {
var stopErr error
s.stopOnce.Do(func() {
s.mu.Lock()
defer s.mu.Unlock()
close(s.done)
if s.conn != nil {
_ = s.conn.Close()
}
s.wg.Wait()
// Clean up socket file
if err := os.Remove(s.config.Path); err != nil && !os.IsNotExist(err) {
stopErr = fmt.Errorf("failed to remove socket file: %w", err)
return
}
})
return stopErr
}

View File

@ -0,0 +1,596 @@
package unixsocket
import (
"context"
"fmt"
"net"
"os"
"testing"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
)
func TestParseJSONEvent_Apache(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"dst_ip": "10.0.0.1",
"dst_port": 80,
"timestamp": 1704110400000000000,
"method": "GET",
"path": "/api/test",
"header_host": "example.com",
"header_user_agent": "Mozilla/5.0"
}`)
event, err := parseJSONEvent(data, "A")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if event.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", event.SrcIP)
}
if event.SrcPort != 8080 {
t.Errorf("expected src_port 8080, got %d", event.SrcPort)
}
if event.Headers["host"] != "example.com" {
t.Errorf("expected header host example.com, got %s", event.Headers["host"])
}
if event.Headers["user_agent"] != "Mozilla/5.0" {
t.Errorf("expected header_user_agent Mozilla/5.0, got %s", event.Headers["user_agent"])
}
if event.Source != domain.SourceA {
t.Errorf("expected source A, got %s", event.Source)
}
expectedTs := time.Unix(0, 1704110400000000000)
if !event.Timestamp.Equal(expectedTs) {
t.Errorf("expected timestamp %v, got %v", expectedTs, event.Timestamp)
}
}
func TestParseJSONEvent_Network(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"dst_ip": "10.0.0.1",
"dst_port": 443,
"timestamp": 1704110400000000000,
"ja3": "abc123def456",
"ja4": "xyz789",
"tcp_meta_flags": "SYN"
}`)
event, err := parseJSONEvent(data, "B")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if event.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", event.SrcIP)
}
if event.Extra["ja3"] != "abc123def456" {
t.Errorf("expected ja3 abc123def456, got %v", event.Extra["ja3"])
}
if event.Source != domain.SourceB {
t.Errorf("expected source B, got %s", event.Source)
}
// Network source now uses payload timestamp if available
expectedTs := time.Unix(0, 1704110400000000000)
if !event.Timestamp.Equal(expectedTs) {
t.Errorf("expected network timestamp %v, got %v", expectedTs, event.Timestamp)
}
}
func TestParseJSONEvent_InvalidJSON(t *testing.T) {
data := []byte(`{invalid json}`)
_, err := parseJSONEvent(data, "")
if err == nil {
t.Error("expected error for invalid JSON")
}
}
func TestParseJSONEvent_MissingFields(t *testing.T) {
data := []byte(`{"other_field": "value"}`)
_, err := parseJSONEvent(data, "")
if err == nil {
t.Error("expected error for missing src_ip/src_port")
}
}
func TestParseJSONEvent_SourceARequiresNumericTimestamp(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"time": "2024-01-01T12:00:00Z"
}`)
_, err := parseJSONEvent(data, "A")
if err == nil {
t.Fatal("expected error for source A without numeric timestamp")
}
}
func TestParseJSONEvent_SourceBUsesPayloadTimestamp(t *testing.T) {
expectedTs := int64(1704110400000000000)
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"timestamp": 1704110400000000000
}`)
event, err := parseJSONEvent(data, "B")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
expectedTime := time.Unix(0, expectedTs)
if !event.Timestamp.Equal(expectedTime) {
t.Errorf("expected source B to use payload timestamp %v, got %v", expectedTime, event.Timestamp)
}
}
func TestParseJSONEvent_SourceBUsesTimeField(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"time": "2024-01-01T12:00:00Z"
}`)
event, err := parseJSONEvent(data, "B")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
expectedTime := time.Unix(0, 1704110400000000000)
if !event.Timestamp.Equal(expectedTime) {
t.Errorf("expected source B to use time field %v, got %v", expectedTime, event.Timestamp)
}
}
func TestParseJSONEvent_SourceBFallbackToNow(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080
}`)
before := time.Now()
event, err := parseJSONEvent(data, "B")
after := time.Now()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if event.Timestamp.Before(before.Add(-2*time.Second)) || event.Timestamp.After(after.Add(2*time.Second)) {
t.Errorf("expected source B timestamp near now, got %v", event.Timestamp)
}
}
func TestParseJSONEvent_ExplicitSourceType(t *testing.T) {
tests := []struct {
name string
data string
sourceType string
expected domain.EventSource
}{
{
name: "explicit A",
data: `{"src_ip": "192.168.1.1", "src_port": 8080, "timestamp": 1704110400000000000}`,
sourceType: "A",
expected: domain.SourceA,
},
{
name: "explicit B",
data: `{"src_ip": "192.168.1.1", "src_port": 8080}`,
sourceType: "B",
expected: domain.SourceB,
},
{
name: "explicit apache",
data: `{"src_ip": "192.168.1.1", "src_port": 8080, "timestamp": 1704110400000000000}`,
sourceType: "apache",
expected: domain.SourceA,
},
{
name: "explicit network",
data: `{"src_ip": "192.168.1.1", "src_port": 8080}`,
sourceType: "network",
expected: domain.SourceB,
},
{
name: "auto-detect A with headers",
data: `{"src_ip": "192.168.1.1", "src_port": 8080, "timestamp": 1704110400000000000, "header_host": "example.com"}`,
sourceType: "",
expected: domain.SourceA,
},
{
name: "auto-detect B without headers",
data: `{"src_ip": "192.168.1.1", "src_port": 8080, "ja3": "abc"}`,
sourceType: "",
expected: domain.SourceB,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
event, err := parseJSONEvent([]byte(tt.data), tt.sourceType)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if event.Source != tt.expected {
t.Errorf("expected source %s, got %s", tt.expected, event.Source)
}
})
}
}
func TestUnixSocketSource_Name(t *testing.T) {
source := NewUnixSocketSource(Config{
Name: "test_source",
Path: "/tmp/test.sock",
})
if source.Name() != "test_source" {
t.Errorf("expected name 'test_source', got %s", source.Name())
}
}
func TestUnixSocketSource_StopWithoutStart(t *testing.T) {
source := NewUnixSocketSource(Config{
Name: "test_source",
Path: "/tmp/test.sock",
})
// Should not panic
err := source.Stop()
if err != nil {
t.Errorf("expected no error on stop without start, got %v", err)
}
}
func TestUnixSocketSource_EmptyPath(t *testing.T) {
source := NewUnixSocketSource(Config{
Name: "test_source",
Path: "",
})
ctx := context.Background()
eventChan := make(chan *domain.NormalizedEvent, 10)
err := source.Start(ctx, eventChan)
if err == nil {
t.Error("expected error for empty path")
}
}
func TestGetString(t *testing.T) {
m := map[string]any{
"string": "hello",
"int": 42,
"nil": nil,
}
v, ok := getString(m, "string")
if !ok || v != "hello" {
t.Errorf("expected 'hello', got %v, %v", v, ok)
}
_, ok = getString(m, "int")
if ok {
t.Error("expected false for int")
}
_, ok = getString(m, "missing")
if ok {
t.Error("expected false for missing key")
}
}
func TestGetInt(t *testing.T) {
m := map[string]any{
"float": 42.5,
"int": 42,
"int64": int64(42),
"string": "42",
"bad": "not a number",
"nil": nil,
}
tests := []struct {
key string
expected int
ok bool
}{
{"float", 0, false},
{"int", 42, true},
{"int64", 42, true},
{"string", 42, true},
{"bad", 0, false},
{"nil", 0, false},
{"missing", 0, false},
}
for _, tt := range tests {
t.Run(tt.key, func(t *testing.T) {
v, ok := getInt(m, tt.key)
if ok != tt.ok {
t.Errorf("getInt(%q) ok = %v, want %v", tt.key, ok, tt.ok)
}
if v != tt.expected {
t.Errorf("getInt(%q) = %v, want %v", tt.key, v, tt.expected)
}
})
}
}
func TestGetInt64(t *testing.T) {
m := map[string]any{
"float": 42.5,
"int": 42,
"int64": int64(42),
"string": "42",
"bad": "not a number",
"nil": nil,
}
tests := []struct {
key string
expected int64
ok bool
}{
{"float", 0, false},
{"int", 42, true},
{"int64", 42, true},
{"string", 42, true},
{"bad", 0, false},
{"nil", 0, false},
{"missing", 0, false},
}
for _, tt := range tests {
t.Run(tt.key, func(t *testing.T) {
v, ok := getInt64(m, tt.key)
if ok != tt.ok {
t.Errorf("getInt64(%q) ok = %v, want %v", tt.key, ok, tt.ok)
}
if v != tt.expected {
t.Errorf("getInt64(%q) = %v, want %v", tt.key, v, tt.expected)
}
})
}
}
func TestParseJSONEvent_PortValidation(t *testing.T) {
tests := []struct {
name string
data string
sourceType string
wantErr bool
}{
{
name: "valid src_port",
data: `{"src_ip": "192.168.1.1", "src_port": 8080}`,
sourceType: "B",
wantErr: false,
},
{
name: "src_port zero",
data: `{"src_ip": "192.168.1.1", "src_port": 0}`,
sourceType: "B",
wantErr: true,
},
{
name: "src_port negative",
data: `{"src_ip": "192.168.1.1", "src_port": -1}`,
sourceType: "B",
wantErr: true,
},
{
name: "src_port too high",
data: `{"src_ip": "192.168.1.1", "src_port": 70000}`,
sourceType: "B",
wantErr: true,
},
{
name: "valid dst_port zero",
data: `{"src_ip": "192.168.1.1", "src_port": 8080, "dst_port": 0}`,
sourceType: "B",
wantErr: false,
},
{
name: "dst_port too high",
data: `{"src_ip": "192.168.1.1", "src_port": 8080, "dst_port": 70000}`,
sourceType: "B",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := parseJSONEvent([]byte(tt.data), tt.sourceType)
if (err != nil) != tt.wantErr {
t.Errorf("parseJSONEvent() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
}
func TestParseJSONEvent_TimestampFallback(t *testing.T) {
data := []byte(`{"src_ip": "192.168.1.1", "src_port": 8080}`)
event, err := parseJSONEvent(data, "B")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// For source B, timestamp is reception time
if event.Timestamp.IsZero() {
t.Error("expected non-zero timestamp")
}
}
func TestUnixSocketSource_StartStopDatagram(t *testing.T) {
tmpPath := "/tmp/test_logcorrelator_datagram.sock"
// Clean up any existing socket
os.Remove(tmpPath)
source := NewUnixSocketSource(Config{
Name: "test_datagram",
Path: tmpPath,
SourceType: "B",
SocketPermissions: 0666,
})
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
eventChan := make(chan *domain.NormalizedEvent, 10)
err := source.Start(ctx, eventChan)
if err != nil {
t.Fatalf("failed to start source: %v", err)
}
// Give socket time to start
time.Sleep(100 * time.Millisecond)
// Verify socket file exists
if _, err := os.Stat(tmpPath); os.IsNotExist(err) {
t.Error("socket file should exist")
}
// Stop the source
err = source.Stop()
if err != nil {
t.Errorf("failed to stop source: %v", err)
}
// Socket file should be cleaned up
time.Sleep(100 * time.Millisecond)
if _, err := os.Stat(tmpPath); !os.IsNotExist(err) {
t.Error("socket file should be removed after stop")
}
}
func TestUnixSocketSource_SendDatagram(t *testing.T) {
tmpPath := "/tmp/test_logcorrelator_send.sock"
os.Remove(tmpPath)
source := NewUnixSocketSource(Config{
Name: "test_send",
Path: tmpPath,
SourceType: "B",
SocketPermissions: 0666,
})
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
eventChan := make(chan *domain.NormalizedEvent, 10)
err := source.Start(ctx, eventChan)
if err != nil {
t.Fatalf("failed to start source: %v", err)
}
// Give socket time to start
time.Sleep(100 * time.Millisecond)
// Connect and send a datagram
conn, err := net.Dial("unixgram", tmpPath)
if err != nil {
t.Fatalf("failed to dial socket: %v", err)
}
defer conn.Close()
data := []byte(`{"src_ip": "192.168.1.1", "src_port": 8080, "ja3": "test"}`)
_, err = conn.Write(data)
if err != nil {
t.Fatalf("failed to write: %v", err)
}
// Wait for event
select {
case event := <-eventChan:
if event.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", event.SrcIP)
}
if event.SrcPort != 8080 {
t.Errorf("expected src_port 8080, got %d", event.SrcPort)
}
case <-time.After(2 * time.Second):
t.Error("timeout waiting for event")
case <-ctx.Done():
t.Error("context cancelled")
}
err = source.Stop()
if err != nil {
t.Errorf("failed to stop source: %v", err)
}
}
func TestUnixSocketSource_MultipleDatagrams(t *testing.T) {
tmpPath := "/tmp/test_logcorrelator_multi.sock"
os.Remove(tmpPath)
source := NewUnixSocketSource(Config{
Name: "test_multi",
Path: tmpPath,
SourceType: "B",
SocketPermissions: 0666,
})
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
eventChan := make(chan *domain.NormalizedEvent, 100)
err := source.Start(ctx, eventChan)
if err != nil {
t.Fatalf("failed to start source: %v", err)
}
// Give socket time to start
time.Sleep(100 * time.Millisecond)
// Connect and send multiple datagrams
conn, err := net.Dial("unixgram", tmpPath)
if err != nil {
t.Fatalf("failed to dial socket: %v", err)
}
defer conn.Close()
for i := 0; i < 5; i++ {
data := []byte(fmt.Sprintf(`{"src_ip": "192.168.1.%d", "src_port": %d, "ja3": "test%d"}`, i+1, 8080+i, i))
_, err = conn.Write(data)
if err != nil {
t.Fatalf("failed to write datagram %d: %v", i, err)
}
}
// Wait for all events
received := 0
timeout := time.After(3 * time.Second)
for received < 5 {
select {
case event := <-eventChan:
received++
t.Logf("received event %d: src_ip=%s", received, event.SrcIP)
case <-timeout:
t.Errorf("timeout waiting for events, received %d/5", received)
goto done
case <-ctx.Done():
t.Error("context cancelled")
goto done
}
}
done:
err = source.Stop()
if err != nil {
t.Errorf("failed to stop source: %v", err)
}
}

View File

@ -0,0 +1,391 @@
package clickhouse
import (
"context"
"encoding/json"
"errors"
"fmt"
"net"
"strings"
"sync"
"time"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/observability"
)
const (
// DefaultBatchSize is the default number of records per batch
DefaultBatchSize = 500
// DefaultFlushIntervalMs is the default flush interval in milliseconds
DefaultFlushIntervalMs = 200
// DefaultMaxBufferSize is the default maximum buffer size
DefaultMaxBufferSize = 5000
// DefaultTimeoutMs is the default timeout for operations in milliseconds
DefaultTimeoutMs = 1000
// DefaultPingTimeoutMs is the timeout for initial connection ping
DefaultPingTimeoutMs = 5000
// MaxRetries is the maximum number of retry attempts for failed inserts
MaxRetries = 3
// RetryBaseDelay is the base delay between retries
RetryBaseDelay = 100 * time.Millisecond
)
// Config holds the ClickHouse sink configuration.
type Config struct {
DSN string
Table string
BatchSize int
FlushIntervalMs int
MaxBufferSize int
DropOnOverflow bool
AsyncInsert bool
TimeoutMs int
}
// ClickHouseSink writes correlated logs to ClickHouse.
type ClickHouseSink struct {
config Config
conn clickhouse.Conn
mu sync.Mutex
buffer []domain.CorrelatedLog
flushChan chan struct{}
done chan struct{}
wg sync.WaitGroup
closeOnce sync.Once
logger *observability.Logger
}
// SetLogger sets the logger used by the sink.
func (s *ClickHouseSink) SetLogger(logger *observability.Logger) {
s.logger = logger.WithFields(map[string]any{"sink": "clickhouse"})
}
// NewClickHouseSink creates a new ClickHouse sink.
func NewClickHouseSink(config Config) (*ClickHouseSink, error) {
if strings.TrimSpace(config.DSN) == "" {
return nil, fmt.Errorf("clickhouse DSN is required")
}
if strings.TrimSpace(config.Table) == "" {
return nil, fmt.Errorf("clickhouse table is required")
}
// Apply defaults
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
}
if config.FlushIntervalMs <= 0 {
config.FlushIntervalMs = DefaultFlushIntervalMs
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
if config.TimeoutMs <= 0 {
config.TimeoutMs = DefaultTimeoutMs
}
s := &ClickHouseSink{
config: config,
buffer: make([]domain.CorrelatedLog, 0, config.BatchSize),
flushChan: make(chan struct{}, 1),
done: make(chan struct{}),
logger: observability.NewLogger("clickhouse"),
}
// Parse DSN and create options
options, err := clickhouse.ParseDSN(config.DSN)
if err != nil {
return nil, fmt.Errorf("failed to parse ClickHouse DSN: %w", err)
}
// Connect to ClickHouse using native API
conn, err := clickhouse.Open(options)
if err != nil {
return nil, fmt.Errorf("failed to connect to ClickHouse: %w", err)
}
// Ping with timeout to verify connection
pingCtx, pingCancel := context.WithTimeout(context.Background(), time.Duration(DefaultPingTimeoutMs)*time.Millisecond)
defer pingCancel()
if err := conn.Ping(pingCtx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("failed to ping ClickHouse: %w", err)
}
s.conn = conn
s.log().Infof("connected to ClickHouse: table=%s batch_size=%d flush_interval_ms=%d",
config.Table, config.BatchSize, config.FlushIntervalMs)
// Start flush goroutine
s.wg.Add(1)
go s.flushLoop()
return s, nil
}
// Name returns the sink name.
func (s *ClickHouseSink) Name() string {
return "clickhouse"
}
// log returns the logger, initializing a default one if not set (e.g. in tests).
func (s *ClickHouseSink) log() *observability.Logger {
if s.logger == nil {
s.logger = observability.NewLogger("clickhouse")
}
return s.logger
}
// Reopen is a no-op for ClickHouse (connection is managed internally).
func (s *ClickHouseSink) Reopen() error {
return nil
}
// Write adds a log to the buffer.
func (s *ClickHouseSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
deadline := time.Now().Add(time.Duration(s.config.TimeoutMs) * time.Millisecond)
for {
s.mu.Lock()
if len(s.buffer) < s.config.MaxBufferSize {
s.buffer = append(s.buffer, log)
if len(s.buffer) >= s.config.BatchSize {
select {
case s.flushChan <- struct{}{}:
default:
}
}
s.mu.Unlock()
return nil
}
drop := s.config.DropOnOverflow
s.mu.Unlock()
if drop {
s.log().Warnf("buffer full, dropping log: table=%s buffer_size=%d", s.config.Table, s.config.MaxBufferSize)
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("buffer full, timeout exceeded")
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(10 * time.Millisecond):
}
}
}
// Flush flushes the buffer to ClickHouse.
func (s *ClickHouseSink) Flush(ctx context.Context) error {
return s.doFlush(ctx)
}
// Close closes the sink.
func (s *ClickHouseSink) Close() error {
var closeErr error
s.closeOnce.Do(func() {
if s.done != nil {
close(s.done)
}
s.wg.Wait()
flushCtx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
defer cancel()
if err := s.doFlush(flushCtx); err != nil {
closeErr = err
}
if s.conn != nil {
if err := s.conn.Close(); err != nil && closeErr == nil {
closeErr = err
}
}
})
return closeErr
}
func (s *ClickHouseSink) flushLoop() {
defer s.wg.Done()
ticker := time.NewTicker(time.Duration(s.config.FlushIntervalMs) * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-s.done:
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
if err := s.doFlush(ctx); err != nil {
s.log().Error("final flush on close failed", err)
}
cancel()
return
case <-ticker.C:
s.mu.Lock()
needsFlush := len(s.buffer) > 0
s.mu.Unlock()
if needsFlush {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
if err := s.doFlush(ctx); err != nil {
s.log().Error("periodic flush failed", err)
}
cancel()
}
case <-s.flushChan:
s.mu.Lock()
needsFlush := len(s.buffer) >= s.config.BatchSize
s.mu.Unlock()
if needsFlush {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
if err := s.doFlush(ctx); err != nil {
s.log().Error("batch flush failed", err)
}
cancel()
}
}
}
}
func (s *ClickHouseSink) doFlush(ctx context.Context) error {
s.mu.Lock()
if len(s.buffer) == 0 {
s.mu.Unlock()
return nil
}
// Copy buffer to flush
buffer := make([]domain.CorrelatedLog, len(s.buffer))
copy(buffer, s.buffer)
s.buffer = make([]domain.CorrelatedLog, 0, s.config.BatchSize)
s.mu.Unlock()
if s.conn == nil {
return fmt.Errorf("clickhouse connection is not initialized")
}
batchSize := len(buffer)
// Retry logic with exponential backoff
var lastErr error
for attempt := 0; attempt < MaxRetries; attempt++ {
if attempt > 0 {
delay := RetryBaseDelay * time.Duration(1<<uint(attempt-1))
s.log().Warnf("retrying batch insert: attempt=%d/%d delay=%s rows=%d err=%v",
attempt+1, MaxRetries, delay, batchSize, lastErr)
select {
case <-time.After(delay):
case <-ctx.Done():
return ctx.Err()
}
}
lastErr = s.executeBatch(ctx, buffer)
if lastErr == nil {
s.log().Debugf("batch sent: rows=%d table=%s", batchSize, s.config.Table)
return nil
}
if !isRetryableError(lastErr) {
return fmt.Errorf("non-retryable error: %w", lastErr)
}
}
return fmt.Errorf("failed after %d retries (batch size: %d): %w", MaxRetries, batchSize, lastErr)
}
func (s *ClickHouseSink) executeBatch(ctx context.Context, buffer []domain.CorrelatedLog) error {
if s.conn == nil {
return fmt.Errorf("clickhouse connection is not initialized")
}
// Table schema: http_logs_raw (raw_json String)
// Single column insert - the entire log is serialized as JSON string
query := fmt.Sprintf(`INSERT INTO %s (raw_json)`, s.config.Table)
// Prepare batch using native clickhouse-go/v2 API
batch, err := s.conn.PrepareBatch(ctx, query)
if err != nil {
return fmt.Errorf("failed to prepare batch: %w", err)
}
for i, log := range buffer {
// Marshal the entire CorrelatedLog to JSON
logJSON, marshalErr := json.Marshal(log)
if marshalErr != nil {
return fmt.Errorf("failed to marshal log %d to JSON: %w", i, marshalErr)
}
// Append the JSON string as the raw_json column value
appendErr := batch.Append(string(logJSON))
if appendErr != nil {
return fmt.Errorf("failed to append log %d to batch: %w", i, appendErr)
}
}
// Send the batch - DO NOT FORGET this step
sendErr := batch.Send()
if sendErr != nil {
return fmt.Errorf("failed to send batch (%d rows): %w", len(buffer), sendErr)
}
return nil
}
// isRetryableError checks if an error is retryable.
func isRetryableError(err error) bool {
if err == nil {
return false
}
if errors.Is(err, context.DeadlineExceeded) {
return true
}
if errors.Is(err, context.Canceled) {
return false
}
var netErr net.Error
if errors.As(err, &netErr) {
if netErr.Timeout() {
return true
}
}
errStr := strings.ToLower(err.Error())
// Explicit non-retryable SQL/schema errors
if strings.Contains(errStr, "syntax error") ||
strings.Contains(errStr, "unknown table") ||
strings.Contains(errStr, "unknown column") ||
(strings.Contains(errStr, "table") && strings.Contains(errStr, "not found")) {
return false
}
// Fallback network/transient errors
retryableErrors := []string{
"connection refused",
"connection reset",
"timeout",
"temporary failure",
"network is unreachable",
"broken pipe",
"no route to host",
}
for _, re := range retryableErrors {
if strings.Contains(errStr, re) {
return true
}
}
return false
}

View File

@ -0,0 +1,538 @@
package clickhouse
import (
"context"
"testing"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/observability"
)
func TestClickHouseSink_Name(t *testing.T) {
sink := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
}
if sink.Name() != "clickhouse" {
t.Errorf("expected name 'clickhouse', got %s", sink.Name())
}
}
func TestClickHouseSink_ConfigDefaults(t *testing.T) {
// Test that defaults are applied correctly
config := Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
// Other fields are zero, should get defaults
}
// Verify defaults would be applied (we can't actually connect in tests)
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
}
if config.FlushIntervalMs <= 0 {
config.FlushIntervalMs = DefaultFlushIntervalMs
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
if config.TimeoutMs <= 0 {
config.TimeoutMs = DefaultTimeoutMs
}
if config.BatchSize != DefaultBatchSize {
t.Errorf("expected BatchSize %d, got %d", DefaultBatchSize, config.BatchSize)
}
if config.FlushIntervalMs != DefaultFlushIntervalMs {
t.Errorf("expected FlushIntervalMs %d, got %d", DefaultFlushIntervalMs, config.FlushIntervalMs)
}
if config.MaxBufferSize != DefaultMaxBufferSize {
t.Errorf("expected MaxBufferSize %d, got %d", DefaultMaxBufferSize, config.MaxBufferSize)
}
if config.TimeoutMs != DefaultTimeoutMs {
t.Errorf("expected TimeoutMs %d, got %d", DefaultTimeoutMs, config.TimeoutMs)
}
}
func TestClickHouseSink_Write_BufferOverflow(t *testing.T) {
// This test verifies the buffer overflow logic without actually connecting
config := Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
BatchSize: 10,
MaxBufferSize: 10,
DropOnOverflow: true,
TimeoutMs: 100,
FlushIntervalMs: 1000,
}
// We can't test actual writes without a ClickHouse instance,
// but we can verify the config is valid
if config.BatchSize > config.MaxBufferSize {
t.Error("BatchSize should not exceed MaxBufferSize")
}
}
func TestClickHouseSink_IsRetryableError(t *testing.T) {
tests := []struct {
name string
err error
expected bool
}{
{"nil error", nil, false},
{"connection refused", &mockError{"connection refused"}, true},
{"connection reset", &mockError{"connection reset by peer"}, true},
{"timeout", &mockError{"timeout waiting for response"}, true},
{"network unreachable", &mockError{"network is unreachable"}, true},
{"broken pipe", &mockError{"broken pipe"}, true},
{"syntax error", &mockError{"syntax error in SQL"}, false},
{"table not found", &mockError{"table test not found"}, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isRetryableError(tt.err)
if result != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, result)
}
})
}
}
func TestClickHouseSink_FlushEmpty(t *testing.T) {
// Test that flushing an empty buffer doesn't cause issues
// (We can't test actual ClickHouse operations without a real instance)
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
}
// Should not panic or error on empty flush
ctx := context.Background()
err := s.Flush(ctx)
if err != nil {
t.Errorf("expected no error on empty flush, got %v", err)
}
}
func TestClickHouseSink_CloseWithoutConnect(t *testing.T) {
// Test that closing without connecting doesn't panic
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
done: make(chan struct{}),
}
err := s.Close()
if err != nil {
t.Errorf("expected no error on close without connect, got %v", err)
}
}
func TestClickHouseSink_Constants(t *testing.T) {
// Verify constants have reasonable values
if DefaultBatchSize <= 0 {
t.Error("DefaultBatchSize should be positive")
}
if DefaultFlushIntervalMs <= 0 {
t.Error("DefaultFlushIntervalMs should be positive")
}
if DefaultMaxBufferSize <= 0 {
t.Error("DefaultMaxBufferSize should be positive")
}
if DefaultTimeoutMs <= 0 {
t.Error("DefaultTimeoutMs should be positive")
}
if DefaultPingTimeoutMs <= 0 {
t.Error("DefaultPingTimeoutMs should be positive")
}
if MaxRetries <= 0 {
t.Error("MaxRetries should be positive")
}
if RetryBaseDelay <= 0 {
t.Error("RetryBaseDelay should be positive")
}
}
// mockError implements error for testing
type mockError struct {
msg string
}
func (e *mockError) Error() string {
return e.msg
}
// Test the doFlush function with empty buffer (no actual DB connection)
func TestClickHouseSink_DoFlushEmpty(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
}
ctx := context.Background()
err := s.doFlush(ctx)
if err != nil {
t.Errorf("expected no error when flushing empty buffer, got %v", err)
}
}
// Test that buffer is properly managed (without actual DB operations)
func TestClickHouseSink_BufferManagement(t *testing.T) {
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 100, // Allow more than 1 element
DropOnOverflow: false,
TimeoutMs: 1000,
},
buffer: []domain.CorrelatedLog{log},
}
// Verify buffer has data
if len(s.buffer) != 1 {
t.Fatalf("expected buffer length 1, got %d", len(s.buffer))
}
// Test that Write properly adds to buffer
ctx := context.Background()
err := s.Write(ctx, log)
if err != nil {
t.Errorf("unexpected error on Write: %v", err)
}
if len(s.buffer) != 2 {
t.Errorf("expected buffer length 2 after Write, got %d", len(s.buffer))
}
}
// Test Write with context cancellation
func TestClickHouseSink_Write_ContextCancel(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: false,
TimeoutMs: 10,
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
// Fill the buffer
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
s.buffer = append(s.buffer, log)
// Try to write with cancelled context
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
err := s.Write(ctx, log)
if err == nil {
t.Error("expected error when writing with cancelled context")
}
}
// Test DropOnOverflow behavior
func TestClickHouseSink_Write_DropOnOverflow(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: true,
TimeoutMs: 10,
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
// Fill the buffer
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
s.buffer = append(s.buffer, log)
// Try to write when buffer is full - should drop silently
ctx := context.Background()
err := s.Write(ctx, log)
if err != nil {
t.Errorf("expected no error when DropOnOverflow is true, got %v", err)
}
}
// TestIsRetryableError_ContextDeadlineExceeded tests context.DeadlineExceeded is retryable.
func TestIsRetryableError_ContextDeadlineExceeded(t *testing.T) {
if !isRetryableError(context.DeadlineExceeded) {
t.Error("context.DeadlineExceeded should be retryable")
}
}
// TestIsRetryableError_ContextCanceled tests context.Canceled is NOT retryable.
func TestIsRetryableError_ContextCanceled(t *testing.T) {
if isRetryableError(context.Canceled) {
t.Error("context.Canceled should not be retryable")
}
}
// TestIsRetryableError_NetTimeout tests net.Error with Timeout() = true is retryable.
func TestIsRetryableError_NetTimeout(t *testing.T) {
err := &mockNetError{timeout: true, temporary: false}
if !isRetryableError(err) {
t.Error("net.Error with Timeout()=true should be retryable")
}
}
// TestIsRetryableError_NetNoTimeout tests net.Error with Timeout() = false is NOT retryable.
func TestIsRetryableError_NetNoTimeout(t *testing.T) {
err := &mockNetError{timeout: false, temporary: false}
if isRetryableError(err) {
t.Error("net.Error with Timeout()=false should not be retryable (unless msg matches)")
}
}
// TestIsRetryableError_UnknownTable tests "unknown table" is NOT retryable.
func TestIsRetryableError_UnknownTable(t *testing.T) {
if isRetryableError(&mockError{"unknown table users"}) {
t.Error("unknown table error should not be retryable")
}
}
// TestIsRetryableError_UnknownColumn tests "unknown column" is NOT retryable.
func TestIsRetryableError_UnknownColumn(t *testing.T) {
if isRetryableError(&mockError{"unknown column foo"}) {
t.Error("unknown column error should not be retryable")
}
}
// TestIsRetryableError_RandomError tests a random error is NOT retryable.
func TestIsRetryableError_RandomError(t *testing.T) {
if isRetryableError(&mockError{"some random unrecognized error"}) {
t.Error("random error should not be retryable")
}
}
// TestIsRetryableError_NoRouteToHost tests "no route to host" is retryable.
func TestIsRetryableError_NoRouteToHost(t *testing.T) {
if !isRetryableError(&mockError{"no route to host"}) {
t.Error("'no route to host' should be retryable")
}
}
// TestIsRetryableError_TemporaryFailure tests "temporary failure" is retryable.
func TestIsRetryableError_TemporaryFailure(t *testing.T) {
if !isRetryableError(&mockError{"temporary failure in name resolution"}) {
t.Error("'temporary failure' should be retryable")
}
}
// mockNetError implements net.Error for testing.
type mockNetError struct {
timeout bool
temporary bool
msg string
}
func (e *mockNetError) Error() string { return e.msg }
func (e *mockNetError) Timeout() bool { return e.timeout }
func (e *mockNetError) Temporary() bool { return e.temporary }
// TestNewClickHouseSink_EmptyDSN tests that empty DSN returns error.
func TestNewClickHouseSink_EmptyDSN(t *testing.T) {
_, err := NewClickHouseSink(Config{
DSN: "",
Table: "test_table",
})
if err == nil {
t.Error("expected error for empty DSN")
}
}
// TestNewClickHouseSink_WhitespaceDSN tests that whitespace DSN returns error.
func TestNewClickHouseSink_WhitespaceDSN(t *testing.T) {
_, err := NewClickHouseSink(Config{
DSN: " ",
Table: "test_table",
})
if err == nil {
t.Error("expected error for whitespace-only DSN")
}
}
// TestNewClickHouseSink_EmptyTable tests that empty Table returns error.
func TestNewClickHouseSink_EmptyTable(t *testing.T) {
_, err := NewClickHouseSink(Config{
DSN: "clickhouse://localhost:9000/test",
Table: "",
})
if err == nil {
t.Error("expected error for empty Table")
}
}
// TestNewClickHouseSink_WhitespaceTable tests that whitespace Table returns error.
func TestNewClickHouseSink_WhitespaceTable(t *testing.T) {
_, err := NewClickHouseSink(Config{
DSN: "clickhouse://localhost:9000/test",
Table: " ",
})
if err == nil {
t.Error("expected error for whitespace-only Table")
}
}
// TestNewClickHouseSink_InvalidDSN tests that an invalid DSN (no real connection) returns error.
func TestNewClickHouseSink_InvalidDSN(t *testing.T) {
_, err := NewClickHouseSink(Config{
DSN: "not-a-valid-dsn",
Table: "test_table",
})
if err == nil {
t.Error("expected error for invalid DSN")
}
}
// TestClickHouseSink_SetLogger tests that SetLogger sets a logger.
func TestClickHouseSink_SetLogger(t *testing.T) {
s := &ClickHouseSink{
config: Config{Table: "test_table"},
buffer: make([]domain.CorrelatedLog, 0),
}
testLogger := observability.NewLogger("test")
s.SetLogger(testLogger)
if s.logger == nil {
t.Error("expected logger to be set")
}
}
// TestClickHouseSink_LogNilLogger tests that log() returns a logger even when s.logger is nil.
func TestClickHouseSink_LogNilLogger(t *testing.T) {
s := &ClickHouseSink{
config: Config{Table: "test_table"},
buffer: make([]domain.CorrelatedLog, 0),
}
s.logger = nil
// log() should auto-initialize
logger := s.log()
if logger == nil {
t.Error("expected non-nil logger from log()")
}
}
// TestClickHouseSink_Reopen tests that Reopen is a no-op and returns nil.
func TestClickHouseSink_Reopen(t *testing.T) {
s := &ClickHouseSink{
config: Config{Table: "test_table"},
buffer: make([]domain.CorrelatedLog, 0),
}
if err := s.Reopen(); err != nil {
t.Errorf("Reopen() should return nil, got: %v", err)
}
}
// TestClickHouseSink_DoFlushNilConn tests doFlush returns error when conn is nil and buffer non-empty.
func TestClickHouseSink_DoFlushNilConn(t *testing.T) {
log := domain.CorrelatedLog{SrcIP: "1.2.3.4", SrcPort: 1234}
s := &ClickHouseSink{
config: Config{
Table: "test_table",
BatchSize: DefaultBatchSize,
},
buffer: []domain.CorrelatedLog{log},
conn: nil,
}
err := s.doFlush(context.Background())
if err == nil {
t.Error("expected error from doFlush when conn is nil")
}
}
// TestClickHouseSink_CloseTwice tests that calling Close() twice does not panic or error.
func TestClickHouseSink_CloseTwice(t *testing.T) {
s := &ClickHouseSink{
config: Config{
Table: "test_table",
TimeoutMs: DefaultTimeoutMs,
},
buffer: make([]domain.CorrelatedLog, 0),
done: make(chan struct{}),
}
if err := s.Close(); err != nil {
t.Errorf("first Close() should not error, got: %v", err)
}
if err := s.Close(); err != nil {
t.Errorf("second Close() should not error (closeOnce), got: %v", err)
}
}
// TestClickHouseSink_WriteTimeout tests that Write returns error when buffer is full and timeout exceeded.
func TestClickHouseSink_Write_Timeout(t *testing.T) {
s := &ClickHouseSink{
config: Config{
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: false,
TimeoutMs: 1, // 1ms timeout
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
log := domain.CorrelatedLog{SrcIP: "1.2.3.4", SrcPort: 1234}
// Fill the buffer
s.buffer = append(s.buffer, log)
ctx := context.Background()
err := s.Write(ctx, log)
if err == nil {
t.Error("expected error when buffer full and timeout exceeded")
}
}
// Benchmark Write operation (without actual DB)
func BenchmarkClickHouseSink_Write(b *testing.B) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 10000,
DropOnOverflow: true,
},
buffer: make([]domain.CorrelatedLog, 0, 10000),
}
log := domain.CorrelatedLog{
Timestamp: time.Now(),
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
ctx := context.Background()
b.ResetTimer()
for i := 0; i < b.N; i++ {
s.Write(ctx, log)
}
}

View File

@ -0,0 +1,191 @@
package file
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/antitbone/ja4/correlator/internal/domain"
)
const (
// DefaultFilePermissions for output files
DefaultFilePermissions os.FileMode = 0644
// DefaultDirPermissions for output directories
DefaultDirPermissions os.FileMode = 0750
)
// Config holds the file sink configuration.
type Config struct {
Path string
}
// FileSink writes correlated logs to a file as JSON lines.
type FileSink struct {
config Config
mu sync.Mutex
file *os.File
}
// NewFileSink creates a new file sink.
func NewFileSink(config Config) (*FileSink, error) {
// Validate path
if err := validateFilePath(config.Path); err != nil {
return nil, fmt.Errorf("invalid file path: %w", err)
}
s := &FileSink{
config: config,
}
// Open file on creation
if err := s.openFile(); err != nil {
return nil, err
}
return s, nil
}
// Name returns the sink name.
func (s *FileSink) Name() string {
return "file"
}
// Reopen closes and reopens the file (for log rotation on SIGHUP).
func (s *FileSink) Reopen() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.file != nil {
if err := s.file.Close(); err != nil {
return fmt.Errorf("failed to close file: %w", err)
}
}
return s.openFile()
}
// Write writes a correlated log to the file.
func (s *FileSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.file == nil {
if err := s.openFile(); err != nil {
return err
}
}
data, err := json.Marshal(log)
if err != nil {
return fmt.Errorf("failed to marshal log: %w", err)
}
line := append(data, '\n')
if _, err := s.file.Write(line); err != nil {
return fmt.Errorf("failed to write log line: %w", err)
}
if err := s.file.Sync(); err != nil {
return fmt.Errorf("failed to sync log line: %w", err)
}
return nil
}
// Flush flushes any buffered data.
func (s *FileSink) Flush(ctx context.Context) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.file != nil {
return s.file.Sync()
}
return nil
}
// Close closes the sink.
func (s *FileSink) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.file != nil {
err := s.file.Close()
s.file = nil
return err
}
return nil
}
func (s *FileSink) openFile() error {
// Validate path again before opening
if err := validateFilePath(s.config.Path); err != nil {
return fmt.Errorf("invalid file path: %w", err)
}
// Ensure directory exists
dir := filepath.Dir(s.config.Path)
if err := os.MkdirAll(dir, DefaultDirPermissions); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
file, err := os.OpenFile(s.config.Path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, DefaultFilePermissions)
if err != nil {
return fmt.Errorf("failed to open file: %w", err)
}
s.file = file
return nil
}
// validateFilePath validates that the file path is safe and allowed.
func validateFilePath(path string) error {
if strings.TrimSpace(path) == "" {
return fmt.Errorf("path cannot be empty")
}
cleanPath := filepath.Clean(path)
// Allow relative paths for testing/dev
if !filepath.IsAbs(cleanPath) {
return nil
}
absPath, err := filepath.Abs(cleanPath)
if err != nil {
return fmt.Errorf("failed to resolve absolute path: %w", err)
}
allowedRoots := []string{
"/var/log/logcorrelator",
"/var/log",
"/tmp",
}
for _, root := range allowedRoots {
absRoot, err := filepath.Abs(filepath.Clean(root))
if err != nil {
continue
}
rel, err := filepath.Rel(absRoot, absPath)
if err != nil {
continue
}
if rel == "." {
return nil
}
if rel == ".." {
continue
}
if !strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
return nil
}
}
return fmt.Errorf("path must be under allowed directories: %v", allowedRoots)
}

View File

@ -0,0 +1,524 @@
package file
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/antitbone/ja4/correlator/internal/domain"
)
func TestFileSink_Write(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
if err := sink.Flush(context.Background()); err != nil {
t.Fatalf("failed to flush: %v", err)
}
// Verify file exists and contains data
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
if len(data) == 0 {
t.Error("expected non-empty file")
}
}
func TestFileSink_WriteImmediatePersist_NoFlushNeeded(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
// Must be visible immediately without Flush()
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
if len(data) == 0 {
t.Error("expected data to be present immediately after Write without Flush")
}
}
func TestFileSink_MultipleWrites(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
for i := 0; i < 5; i++ {
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080 + i,
}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
}
sink.Close()
// Verify file has 5 lines
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
lines := 0
for _, b := range data {
if b == '\n' {
lines++
}
}
if lines != 5 {
t.Errorf("expected 5 lines, got %d", lines)
}
}
func TestFileSink_Name(t *testing.T) {
sink, err := NewFileSink(Config{Path: "/tmp/test.log"})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
if sink.Name() != "file" {
t.Errorf("expected name 'file', got %s", sink.Name())
}
}
func TestFileSink_ValidateFilePath(t *testing.T) {
tests := []struct {
name string
path string
wantErr bool
}{
{"empty path", "", true},
{"valid /var/log/logcorrelator", "/var/log/logcorrelator/test.log", false},
{"valid /var/log", "/var/log/test.log", false},
{"valid /tmp", "/tmp/test.log", false},
{"reject lookalike /var/logevil", "/var/logevil/test.log", true},
{"invalid directory", "/etc/logcorrelator/test.log", true},
{"relative path", "test.log", false}, // Allowed for testing
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := validateFilePath(tt.path)
if (err != nil) != tt.wantErr {
t.Errorf("validateFilePath(%q) error = %v, wantErr %v", tt.path, err, tt.wantErr)
}
})
}
}
func TestFileSink_OpenFile(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "subdir", "test.log")
sink := &FileSink{
config: Config{Path: testPath},
}
err := sink.openFile()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer sink.Close()
if sink.file == nil {
t.Error("expected file to be opened")
}
}
func TestFileSink_WriteBeforeOpen(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
// Write should open file automatically
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
err = sink.Write(context.Background(), log)
if err != nil {
t.Fatalf("failed to write: %v", err)
}
// Verify file was created
if _, err := os.Stat(testPath); os.IsNotExist(err) {
t.Error("expected file to be created")
}
}
func TestFileSink_FlushBeforeOpen(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
// Flush before any write should not error
err = sink.Flush(context.Background())
if err != nil {
t.Errorf("expected no error on flush before open, got %v", err)
}
}
func TestFileSink_InvalidPath(t *testing.T) {
// Test with invalid path (outside allowed directories)
_, err := NewFileSink(Config{Path: "/etc/../passwd"})
if err == nil {
t.Error("expected error for invalid path")
}
}
func TestFileSink_Reopen(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
// Write initial data
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
// Reopen should close and reopen the file
err = sink.Reopen()
if err != nil {
t.Errorf("expected no error on Reopen, got %v", err)
}
// Write after reopen
log2 := domain.CorrelatedLog{SrcIP: "192.168.1.2", SrcPort: 8081}
if err := sink.Write(context.Background(), log2); err != nil {
t.Fatalf("failed to write after reopen: %v", err)
}
sink.Close()
// Verify both writes are present
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
lines := 0
for _, b := range data {
if b == '\n' {
lines++
}
}
if lines != 2 {
t.Errorf("expected 2 lines after reopen, got %d", lines)
}
}
func TestFileSink_Close(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
// Close should succeed
err = sink.Close()
if err != nil {
t.Errorf("expected no error on Close, got %v", err)
}
// Write after close should fail or reopen
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
err = sink.Write(context.Background(), log)
if err != nil {
// Expected - file was closed
t.Logf("write after close returned error (expected): %v", err)
}
}
func TestFileSink_EmptyPath(t *testing.T) {
_, err := NewFileSink(Config{Path: ""})
if err == nil {
t.Error("expected error for empty path")
}
}
func TestFileSink_WhitespacePath(t *testing.T) {
_, err := NewFileSink(Config{Path: " "})
if err == nil {
t.Error("expected error for whitespace-only path")
}
}
func TestFileSink_ValidateFilePath_AllowedRoots(t *testing.T) {
// Test paths under allowed roots
allowedPaths := []string{
"/var/log/logcorrelator/correlated.log",
"/var/log/test.log",
"/tmp/test.log",
"/tmp/subdir/test.log",
"relative/path/test.log",
"./test.log",
}
for _, path := range allowedPaths {
err := validateFilePath(path)
if err != nil {
t.Errorf("validateFilePath(%q) unexpected error: %v", path, err)
}
}
}
func TestFileSink_ValidateFilePath_RejectedPaths(t *testing.T) {
// Test paths that should be rejected
rejectedPaths := []string{
"",
" ",
"/etc/passwd",
"/etc/logcorrelator/test.log",
"/root/test.log",
"/home/user/test.log",
"/var/logevil/test.log",
}
for _, path := range rejectedPaths {
err := validateFilePath(path)
if err == nil {
t.Errorf("validateFilePath(%q) should have been rejected", path)
}
}
}
func TestFileSink_ConcurrentWrites(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080 + n}
sink.Write(context.Background(), log)
done <- true
}(i)
}
for i := 0; i < 10; i++ {
<-done
}
// Verify all writes completed
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
lines := 0
for _, b := range data {
if b == '\n' {
lines++
}
}
if lines != 10 {
t.Errorf("expected 10 lines from concurrent writes, got %d", lines)
}
}
func TestFileSink_Flush(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
// Flush should succeed
err = sink.Flush(context.Background())
if err != nil {
t.Errorf("expected no error on Flush, got %v", err)
}
}
func TestFileSink_MarshalError(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
// Create a log with unmarshalable data (channel)
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Fields: map[string]any{"chan": make(chan int)},
}
err = sink.Write(context.Background(), log)
if err == nil {
t.Error("expected error when marshaling unmarshalable data")
}
}
// TestFileSink_CloseTwice tests that closing an already-closed sink does not error.
func TestFileSink_CloseTwice(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
if err := sink.Close(); err != nil {
t.Errorf("first Close() should not error, got: %v", err)
}
// After close, file is nil, so second close should return nil
if err := sink.Close(); err != nil {
t.Errorf("second Close() on already-closed sink should not error, got: %v", err)
}
}
// TestFileSink_WriteAfterClose tests that Write after Close re-opens the file.
func TestFileSink_WriteAfterCloseReopens(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
if err := sink.Close(); err != nil {
t.Fatalf("Close() failed: %v", err)
}
// Write after close: FileSink.Write reopens the file when file == nil
log := domain.CorrelatedLog{SrcIP: "1.2.3.4", SrcPort: 80}
if err := sink.Write(context.Background(), log); err != nil {
t.Errorf("Write after close should succeed (auto-reopen), got: %v", err)
}
// Verify data was written
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
if len(data) == 0 {
t.Error("expected data to be present after write on re-opened file")
}
}
// TestFileSink_ReopenAfterWrite tests Reopen then write produces correct output.
func TestFileSink_ReopenThenWrite(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
// Write before reopen
log1 := domain.CorrelatedLog{SrcIP: "1.1.1.1", SrcPort: 80}
if err := sink.Write(context.Background(), log1); err != nil {
t.Fatalf("first Write failed: %v", err)
}
// Simulate log rotation
if err := sink.Reopen(); err != nil {
t.Fatalf("Reopen failed: %v", err)
}
// Write after reopen
log2 := domain.CorrelatedLog{SrcIP: "2.2.2.2", SrcPort: 443}
if err := sink.Write(context.Background(), log2); err != nil {
t.Fatalf("second Write failed: %v", err)
}
sink.Close()
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
lines := 0
for _, b := range data {
if b == '\n' {
lines++
}
}
if lines != 2 {
t.Errorf("expected 2 lines after reopen+write, got %d", lines)
}
}

View File

@ -0,0 +1,137 @@
package multi
import (
"context"
"sync"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/ports"
)
// MultiSink fans out correlated logs to multiple sinks.
type MultiSink struct {
mu sync.RWMutex
sinks []ports.CorrelatedLogSink
}
// NewMultiSink creates a new multi-sink.
func NewMultiSink(sinks ...ports.CorrelatedLogSink) *MultiSink {
return &MultiSink{
sinks: sinks,
}
}
// Name returns the sink name.
func (s *MultiSink) Name() string {
return "multi"
}
// AddSink adds a sink to the fan-out.
func (s *MultiSink) AddSink(sink ports.CorrelatedLogSink) {
s.mu.Lock()
defer s.mu.Unlock()
s.sinks = append(s.sinks, sink)
}
// Write writes a correlated log to all sinks concurrently.
// Returns the first error encountered (but all sinks are attempted).
func (s *MultiSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
s.mu.RLock()
sinks := make([]ports.CorrelatedLogSink, len(s.sinks))
copy(sinks, s.sinks)
s.mu.RUnlock()
if len(sinks) == 0 {
return nil
}
var wg sync.WaitGroup
var firstErr error
var firstErrMu sync.Mutex
errChan := make(chan error, len(sinks))
for _, sink := range sinks {
wg.Add(1)
go func(sk ports.CorrelatedLogSink) {
defer wg.Done()
if err := sk.Write(ctx, log); err != nil {
// Non-blocking send to errChan
select {
case errChan <- err:
default:
// Channel full, error will be handled via firstErr
}
}
}(sink)
}
// Wait for all writes to complete in a separate goroutine
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
// Collect errors with timeout
select {
case <-done:
close(errChan)
// Collect first error
for err := range errChan {
if err != nil {
firstErrMu.Lock()
if firstErr == nil {
firstErr = err
}
firstErrMu.Unlock()
}
}
case <-ctx.Done():
return ctx.Err()
}
firstErrMu.Lock()
defer firstErrMu.Unlock()
return firstErr
}
// Flush flushes all sinks.
func (s *MultiSink) Flush(ctx context.Context) error {
s.mu.RLock()
defer s.mu.RUnlock()
for _, sink := range s.sinks {
if err := sink.Flush(ctx); err != nil {
return err
}
}
return nil
}
// Close closes all sinks.
func (s *MultiSink) Close() error {
s.mu.RLock()
defer s.mu.RUnlock()
var firstErr error
for _, sink := range s.sinks {
if err := sink.Close(); err != nil && firstErr == nil {
firstErr = err
}
}
return firstErr
}
// Reopen reopens all sinks (for log rotation on SIGHUP).
func (s *MultiSink) Reopen() error {
s.mu.RLock()
defer s.mu.RUnlock()
var firstErr error
for _, sink := range s.sinks {
if err := sink.Reopen(); err != nil && firstErr == nil {
firstErr = err
}
}
return firstErr
}

View File

@ -0,0 +1,233 @@
package multi
import (
"context"
"sync"
"testing"
"github.com/antitbone/ja4/correlator/internal/domain"
)
type mockSink struct {
name string
mu sync.Mutex
writeFunc func(domain.CorrelatedLog) error
flushFunc func() error
closeFunc func() error
reopenFunc func() error
}
func (m *mockSink) Name() string { return m.name }
func (m *mockSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
m.mu.Lock()
defer m.mu.Unlock()
return m.writeFunc(log)
}
func (m *mockSink) Flush(ctx context.Context) error { return m.flushFunc() }
func (m *mockSink) Close() error { return m.closeFunc() }
func (m *mockSink) Reopen() error {
if m.reopenFunc != nil {
return m.reopenFunc()
}
return nil
}
func TestMultiSink_Write(t *testing.T) {
var mu sync.Mutex
writeCount := 0
sink1 := &mockSink{
name: "sink1",
writeFunc: func(log domain.CorrelatedLog) error {
mu.Lock()
writeCount++
mu.Unlock()
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
sink2 := &mockSink{
name: "sink2",
writeFunc: func(log domain.CorrelatedLog) error {
mu.Lock()
writeCount++
mu.Unlock()
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink1, sink2)
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if writeCount != 2 {
t.Errorf("expected 2 writes, got %d", writeCount)
}
}
func TestMultiSink_Write_OneFails(t *testing.T) {
sink1 := &mockSink{
name: "sink1",
writeFunc: func(log domain.CorrelatedLog) error {
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
sink2 := &mockSink{
name: "sink2",
writeFunc: func(log domain.CorrelatedLog) error {
return context.Canceled
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink1, sink2)
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err == nil {
t.Error("expected error when one sink fails")
}
}
func TestMultiSink_AddSink(t *testing.T) {
ms := NewMultiSink()
sink := &mockSink{
name: "dynamic",
writeFunc: func(log domain.CorrelatedLog) error { return nil },
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms.AddSink(sink)
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
func TestMultiSink_Name(t *testing.T) {
ms := NewMultiSink()
if ms.Name() != "multi" {
t.Errorf("expected name 'multi', got %s", ms.Name())
}
}
func TestMultiSink_Flush(t *testing.T) {
flushed := false
sink := &mockSink{
name: "test",
writeFunc: func(log domain.CorrelatedLog) error { return nil },
flushFunc: func() error {
flushed = true
return nil
},
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink)
err := ms.Flush(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !flushed {
t.Error("expected sink to be flushed")
}
}
func TestMultiSink_Flush_Error(t *testing.T) {
sink := &mockSink{
name: "test",
writeFunc: func(log domain.CorrelatedLog) error { return nil },
flushFunc: func() error { return context.Canceled },
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink)
err := ms.Flush(context.Background())
if err != context.Canceled {
t.Errorf("expected context.Canceled error, got %v", err)
}
}
func TestMultiSink_Close(t *testing.T) {
closed := false
sink := &mockSink{
name: "test",
writeFunc: func(log domain.CorrelatedLog) error { return nil },
flushFunc: func() error { return nil },
closeFunc: func() error {
closed = true
return nil
},
}
ms := NewMultiSink(sink)
err := ms.Close()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !closed {
t.Error("expected sink to be closed")
}
}
func TestMultiSink_Close_Error(t *testing.T) {
sink := &mockSink{
name: "test",
writeFunc: func(log domain.CorrelatedLog) error { return nil },
flushFunc: func() error { return nil },
closeFunc: func() error { return context.Canceled },
}
ms := NewMultiSink(sink)
err := ms.Close()
if err != context.Canceled {
t.Errorf("expected context.Canceled error, got %v", err)
}
}
func TestMultiSink_Write_EmptySinks(t *testing.T) {
ms := NewMultiSink()
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err != nil {
t.Fatalf("unexpected error with empty sinks: %v", err)
}
}
func TestMultiSink_Write_ContextCancelled(t *testing.T) {
sink := &mockSink{
name: "test",
writeFunc: func(log domain.CorrelatedLog) error {
<-context.Background().Done()
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink)
ctx, cancel := context.WithCancel(context.Background())
cancel()
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(ctx, log)
if err != context.Canceled {
t.Errorf("expected context.Canceled error, got %v", err)
}
}

View File

@ -0,0 +1,46 @@
package stdout
import (
"context"
"github.com/antitbone/ja4/correlator/internal/domain"
)
// Config holds the stdout sink configuration.
type Config struct {
Enabled bool
}
// StdoutSink is a no-op data sink. Operational logs are written to stderr
// by the observability.Logger; correlated data must never appear on stdout.
type StdoutSink struct{}
// NewStdoutSink creates a new stdout sink.
func NewStdoutSink(config Config) *StdoutSink {
return &StdoutSink{}
}
// Name returns the sink name.
func (s *StdoutSink) Name() string {
return "stdout"
}
// Reopen is a no-op for stdout.
func (s *StdoutSink) Reopen() error {
return nil
}
// Write is a no-op: correlated data must never be written to stdout.
func (s *StdoutSink) Write(_ context.Context, _ domain.CorrelatedLog) error {
return nil
}
// Flush is a no-op for stdout.
func (s *StdoutSink) Flush(_ context.Context) error {
return nil
}
// Close is a no-op for stdout.
func (s *StdoutSink) Close() error {
return nil
}

View File

@ -0,0 +1,81 @@
package stdout
import (
"bytes"
"context"
"os"
"testing"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
)
func makeLog(correlated bool) domain.CorrelatedLog {
return domain.CorrelatedLog{
Timestamp: time.Unix(1700000000, 0),
SrcIP: "1.2.3.4",
SrcPort: 12345,
Correlated: correlated,
}
}
// captureStdout replaces os.Stdout temporarily and returns what was written.
func captureStdout(t *testing.T, fn func()) string {
t.Helper()
r, w, err := os.Pipe()
if err != nil {
t.Fatalf("os.Pipe: %v", err)
}
old := os.Stdout
os.Stdout = w
fn()
w.Close()
os.Stdout = old
var buf bytes.Buffer
buf.ReadFrom(r)
r.Close()
return buf.String()
}
func TestStdoutSink_Name(t *testing.T) {
s := NewStdoutSink(Config{Enabled: true})
if s.Name() != "stdout" {
t.Errorf("expected name 'stdout', got %q", s.Name())
}
}
// TestStdoutSink_WriteDoesNotProduceOutput verifies that no JSON data
// (correlated or not) is ever written to stdout.
func TestStdoutSink_WriteDoesNotProduceOutput(t *testing.T) {
s := NewStdoutSink(Config{Enabled: true})
got := captureStdout(t, func() {
if err := s.Write(context.Background(), makeLog(true)); err != nil {
t.Fatalf("Write(correlated) returned error: %v", err)
}
if err := s.Write(context.Background(), makeLog(false)); err != nil {
t.Fatalf("Write(orphan) returned error: %v", err)
}
})
if got != "" {
t.Errorf("stdout must be empty but got: %q", got)
}
}
func TestStdoutSink_NoopMethods(t *testing.T) {
s := NewStdoutSink(Config{Enabled: true})
if err := s.Flush(context.Background()); err != nil {
t.Errorf("Flush returned error: %v", err)
}
if err := s.Close(); err != nil {
t.Errorf("Close returned error: %v", err)
}
if err := s.Reopen(); err != nil {
t.Errorf("Reopen returned error: %v", err)
}
}

View File

@ -0,0 +1,160 @@
package app
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/ports"
)
const (
// DefaultEventChannelBufferSize is the default size for event channels
DefaultEventChannelBufferSize = 1000
// OrphanTickInterval is how often the orchestrator drains pending orphans.
// Set to half the default emit delay (500ms/2) so orphans are emitted promptly
// even when no new events arrive.
OrphanTickInterval = 250 * time.Millisecond
)
// OrchestratorConfig holds the orchestrator configuration.
type OrchestratorConfig struct {
Sources []ports.EventSource
Sink ports.CorrelatedLogSink
}
// Orchestrator connects sources to the correlation service and sinks.
type Orchestrator struct {
config OrchestratorConfig
correlationSvc ports.CorrelationProcessor
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
running atomic.Bool
}
// NewOrchestrator creates a new orchestrator.
func NewOrchestrator(config OrchestratorConfig, correlationSvc ports.CorrelationProcessor) *Orchestrator {
ctx, cancel := context.WithCancel(context.Background())
return &Orchestrator{
config: config,
correlationSvc: correlationSvc,
ctx: ctx,
cancel: cancel,
}
}
// Start begins the orchestration.
func (o *Orchestrator) Start() error {
if !o.running.CompareAndSwap(false, true) {
return nil // Already running
}
// Start each source
for _, source := range o.config.Sources {
eventChan := make(chan *domain.NormalizedEvent, DefaultEventChannelBufferSize)
o.wg.Add(1)
go func(src ports.EventSource, evChan chan *domain.NormalizedEvent) {
defer o.wg.Done()
// Start the source in a separate goroutine
sourceErr := make(chan error, 1)
go func() {
if err := src.Start(o.ctx, evChan); err != nil {
sourceErr <- err
}
}()
// Process events in the current goroutine
o.processEvents(evChan)
// Check for source start errors
if err := <-sourceErr; err != nil {
// Source failed to start, log error and exit
return
}
}(source, eventChan)
}
// Start a periodic ticker to drain pending orphan A events independently of the
// event flow. Without this, orphans are only emitted when a new event arrives,
// causing them to accumulate silently when the source goes quiet.
o.wg.Add(1)
go func() {
defer o.wg.Done()
ticker := time.NewTicker(OrphanTickInterval)
defer ticker.Stop()
for {
select {
case <-o.ctx.Done():
return
case <-ticker.C:
logs := o.correlationSvc.EmitPendingOrphans()
for _, log := range logs {
o.config.Sink.Write(o.ctx, log) //nolint:errcheck
}
}
}
}()
return nil
}
func (o *Orchestrator) processEvents(eventChan <-chan *domain.NormalizedEvent) {
for {
select {
case <-o.ctx.Done():
// Drain remaining events before exiting
for {
select {
case event, ok := <-eventChan:
if !ok {
return
}
logs := o.correlationSvc.ProcessEvent(event)
for _, log := range logs {
o.config.Sink.Write(o.ctx, log)
}
default:
return
}
}
case event, ok := <-eventChan:
if !ok {
return
}
// Process through correlation service
logs := o.correlationSvc.ProcessEvent(event)
// Write correlated logs to sink
for _, log := range logs {
if err := o.config.Sink.Write(o.ctx, log); err != nil {
// Log error but continue processing
}
}
}
}
}
// Stop gracefully stops the orchestrator.
// It stops all sources and closes sinks immediately without waiting for queue drainage.
// systemd TimeoutStopSec handles forced termination if needed.
func (o *Orchestrator) Stop() error {
if !o.running.CompareAndSwap(true, false) {
return nil // Not running
}
// Cancel context to stop accepting new events immediately
o.cancel()
// Close sink (flush skipped - in-flight events are dropped)
if err := o.config.Sink.Close(); err != nil {
// Log error
}
return nil
}

View File

@ -0,0 +1,300 @@
package app
import (
"context"
"sync"
"testing"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
"github.com/antitbone/ja4/correlator/internal/ports"
)
type mockEventSource struct {
name string
mu sync.RWMutex
eventChan chan<- *domain.NormalizedEvent
started bool
stopped bool
}
func (m *mockEventSource) Name() string { return m.name }
func (m *mockEventSource) Start(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) error {
m.mu.Lock()
m.started = true
m.eventChan = eventChan
m.mu.Unlock()
<-ctx.Done()
m.mu.Lock()
m.stopped = true
m.mu.Unlock()
return nil
}
func (m *mockEventSource) Stop() error { return nil }
func (m *mockEventSource) getEventChan() chan<- *domain.NormalizedEvent {
m.mu.RLock()
defer m.mu.RUnlock()
return m.eventChan
}
func (m *mockEventSource) isStarted() bool {
m.mu.RLock()
defer m.mu.RUnlock()
return m.started
}
type mockSink struct {
mu sync.Mutex
written []domain.CorrelatedLog
}
func (m *mockSink) Name() string { return "mock" }
func (m *mockSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
m.mu.Lock()
defer m.mu.Unlock()
m.written = append(m.written, log)
return nil
}
func (m *mockSink) Flush(ctx context.Context) error { return nil }
func (m *mockSink) Close() error { return nil }
func (m *mockSink) Reopen() error { return nil }
func (m *mockSink) getWritten() []domain.CorrelatedLog {
m.mu.Lock()
defer m.mu.Unlock()
result := make([]domain.CorrelatedLog, len(m.written))
copy(result, m.written)
return result
}
func TestOrchestrator_StartStop(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
orchestrator := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
if err := orchestrator.Start(); err != nil {
t.Fatalf("failed to start: %v", err)
}
// Let it run briefly
time.Sleep(100 * time.Millisecond)
if err := orchestrator.Stop(); err != nil {
t.Fatalf("failed to stop: %v", err)
}
if !source.isStarted() {
t.Error("expected source to be started")
}
}
func TestOrchestrator_ProcessEvent(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
orchestrator := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
if err := orchestrator.Start(); err != nil {
t.Fatalf("failed to start: %v", err)
}
// Wait for source to start and get the channel
var eventChan chan<- *domain.NormalizedEvent
for i := 0; i < 50; i++ {
eventChan = source.getEventChan()
if eventChan != nil {
break
}
time.Sleep(10 * time.Millisecond)
}
if eventChan == nil {
t.Fatal("source did not start properly")
}
// Send an event through the source
event := &domain.NormalizedEvent{
Source: domain.SourceA,
Timestamp: time.Now(),
SrcIP: "192.168.1.1",
SrcPort: 8080,
Raw: map[string]any{"method": "GET"},
}
// Send event
eventChan <- event
// Give it time to process
time.Sleep(100 * time.Millisecond)
if err := orchestrator.Stop(); err != nil {
t.Fatalf("failed to stop: %v", err)
}
// Should have written at least one log (the orphan A)
written := sink.getWritten()
if len(written) == 0 {
t.Error("expected at least one log to be written")
}
}
// TestOrchestrator_StartTwice tests that calling Start() twice is a no-op (already running).
func TestOrchestrator_StartTwice(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
o := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
if err := o.Start(); err != nil {
t.Fatalf("first Start() failed: %v", err)
}
if err := o.Start(); err != nil {
t.Errorf("second Start() should be no-op, got: %v", err)
}
o.Stop()
}
// TestOrchestrator_StopTwice tests that calling Stop() twice is a no-op.
func TestOrchestrator_StopTwice(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
o := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
o.Start()
if err := o.Stop(); err != nil {
t.Errorf("first Stop() failed: %v", err)
}
if err := o.Stop(); err != nil {
t.Errorf("second Stop() should be no-op, got: %v", err)
}
}
// TestOrchestrator_NoSources tests that Orchestrator works with no sources.
func TestOrchestrator_NoSources(t *testing.T) {
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{TimeWindow: time.Second}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
o := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{},
Sink: sink,
}, correlationSvc)
if err := o.Start(); err != nil {
t.Fatalf("Start() with no sources failed: %v", err)
}
time.Sleep(50 * time.Millisecond)
if err := o.Stop(); err != nil {
t.Errorf("Stop() failed: %v", err)
}
}
// TestOrchestrator_OrphanEmission tests that orphan A events are emitted via tick.
func TestOrchestrator_OrphanEmission(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: 50 * time.Millisecond,
ApacheAlwaysEmit: true,
ApacheEmitDelayMs: 10, // Very short delay so orphans emit quickly
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
o := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
if err := o.Start(); err != nil {
t.Fatalf("Start() failed: %v", err)
}
// Wait for source to be ready
var eventChan chan<- *domain.NormalizedEvent
for i := 0; i < 50; i++ {
eventChan = source.getEventChan()
if eventChan != nil {
break
}
time.Sleep(5 * time.Millisecond)
}
if eventChan == nil {
t.Fatal("source did not start")
}
// Send a source A event (Apache/HTTP)
eventChan <- &domain.NormalizedEvent{
Source: domain.SourceA,
Timestamp: time.Now(),
SrcIP: "10.0.0.1",
SrcPort: 12345,
Raw: map[string]any{"method": "GET"},
}
// Allow time for orphan ticker to fire (OrphanTickInterval = 250ms, but emit delay is 10ms)
time.Sleep(600 * time.Millisecond)
o.Stop()
written := sink.getWritten()
if len(written) == 0 {
t.Error("expected at least one orphan log to be emitted")
}
}
// TestOrchestrator_Constants tests that constants have reasonable values.
func TestOrchestrator_Constants(t *testing.T) {
if DefaultEventChannelBufferSize <= 0 {
t.Error("DefaultEventChannelBufferSize should be positive")
}
if OrphanTickInterval <= 0 {
t.Error("OrphanTickInterval should be positive")
}
}

View File

@ -0,0 +1,406 @@
package config
import (
"fmt"
"net"
"os"
"strconv"
"strings"
"time"
"github.com/antitbone/ja4/correlator/internal/domain"
"gopkg.in/yaml.v3"
)
// Config holds the complete application configuration.
type Config struct {
Log LogConfig `yaml:"log"`
Inputs InputsConfig `yaml:"inputs"`
Outputs OutputsConfig `yaml:"outputs"`
Correlation CorrelationConfig `yaml:"correlation"`
Metrics MetricsConfig `yaml:"metrics"`
}
// MetricsConfig holds metrics server configuration.
type MetricsConfig struct {
Enabled bool `yaml:"enabled"`
Addr string `yaml:"addr"` // e.g., ":8080", "localhost:8080"
}
// LogConfig holds logging configuration.
type LogConfig struct {
Level string `yaml:"level"` // DEBUG, INFO, WARN, ERROR
}
// GetLogLevel returns the log level, defaulting to INFO if not set.
func (c *LogConfig) GetLevel() string {
if c.Level == "" {
return "INFO"
}
return strings.ToUpper(c.Level)
}
// ServiceConfig holds service-level configuration.
type ServiceConfig struct {
Name string `yaml:"name"`
Language string `yaml:"language"`
}
// InputsConfig holds input sources configuration.
type InputsConfig struct {
UnixSockets []UnixSocketConfig `yaml:"unix_sockets"`
}
// UnixSocketConfig holds a Unix socket source configuration.
type UnixSocketConfig struct {
Name string `yaml:"name"`
Path string `yaml:"path"`
Format string `yaml:"format"`
SourceType string `yaml:"source_type"` // "A" for Apache/HTTP, "B" for Network
SocketPermissions string `yaml:"socket_permissions"` // octal string, e.g., "0660", "0666"
}
// OutputsConfig holds output sinks configuration.
type OutputsConfig struct {
File FileOutputConfig `yaml:"file"`
ClickHouse ClickHouseOutputConfig `yaml:"clickhouse"`
Stdout StdoutOutputConfig `yaml:"stdout"`
}
// FileOutputConfig holds file sink configuration.
type FileOutputConfig struct {
Enabled bool `yaml:"enabled"`
Path string `yaml:"path"`
}
// ClickHouseOutputConfig holds ClickHouse sink configuration.
type ClickHouseOutputConfig struct {
Enabled bool `yaml:"enabled"`
DSN string `yaml:"dsn"`
Table string `yaml:"table"`
BatchSize int `yaml:"batch_size"`
FlushIntervalMs int `yaml:"flush_interval_ms"`
MaxBufferSize int `yaml:"max_buffer_size"`
DropOnOverflow bool `yaml:"drop_on_overflow"`
AsyncInsert bool `yaml:"async_insert"`
TimeoutMs int `yaml:"timeout_ms"`
}
// StdoutOutputConfig holds stdout sink configuration.
type StdoutOutputConfig struct {
Enabled bool `yaml:"enabled"`
Level string `yaml:"level"` // DEBUG, INFO, WARN, ERROR - filters output verbosity
}
// CorrelationConfig holds correlation configuration.
type CorrelationConfig struct {
TimeWindow TimeWindowConfig `yaml:"time_window"`
OrphanPolicy OrphanPolicyConfig `yaml:"orphan_policy"`
Matching MatchingConfig `yaml:"matching"`
Buffers BuffersConfig `yaml:"buffers"`
TTL TTLConfig `yaml:"ttl"`
ExcludeSourceIPs []string `yaml:"exclude_source_ips"` // List of source IPs or CIDR ranges to exclude
IncludeDestPorts []int `yaml:"include_dest_ports"` // If non-empty, only correlate events matching these destination ports
// Deprecated: Use TimeWindow.Value instead
TimeWindowS int `yaml:"time_window_s"`
// Deprecated: Use OrphanPolicy.ApacheAlwaysEmit instead
EmitOrphans bool `yaml:"emit_orphans"`
}
// TimeWindowConfig holds time window configuration.
type TimeWindowConfig struct {
Value int `yaml:"value"`
Unit string `yaml:"unit"` // s, ms, etc.
}
// GetDuration returns the time window as a duration.
func (c *TimeWindowConfig) GetDuration() time.Duration {
value := c.Value
if value <= 0 {
value = 1
}
switch c.Unit {
case "ms", "millisecond", "milliseconds":
return time.Duration(value) * time.Millisecond
case "s", "sec", "second", "seconds":
fallthrough
default:
return time.Duration(value) * time.Second
}
}
// OrphanPolicyConfig holds orphan event policy configuration.
type OrphanPolicyConfig struct {
ApacheAlwaysEmit bool `yaml:"apache_always_emit"`
ApacheEmitDelayMs int `yaml:"apache_emit_delay_ms"` // Delay in ms before emitting orphan A
NetworkEmit bool `yaml:"network_emit"`
}
// MatchingConfig holds matching mode configuration.
type MatchingConfig struct {
Mode string `yaml:"mode"` // one_to_one or one_to_many
}
// BuffersConfig holds buffer size configuration.
type BuffersConfig struct {
MaxHTTPItems int `yaml:"max_http_items"`
MaxNetworkItems int `yaml:"max_network_items"`
}
// TTLConfig holds TTL configuration.
type TTLConfig struct {
NetworkTTLS int `yaml:"network_ttl_s"`
}
// Load loads configuration from a YAML file.
func Load(path string) (*Config, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read config file: %w", err)
}
cfg := defaultConfig()
if err := yaml.Unmarshal(data, cfg); err != nil {
return nil, fmt.Errorf("failed to parse config file: %w", err)
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err)
}
return cfg, nil
}
// defaultConfig returns a Config with default values.
func defaultConfig() *Config {
return &Config{
Log: LogConfig{
Level: "INFO",
},
Inputs: InputsConfig{
UnixSockets: make([]UnixSocketConfig, 0),
},
Outputs: OutputsConfig{
File: FileOutputConfig{
Enabled: true,
Path: "/var/log/logcorrelator/correlated.log",
},
ClickHouse: ClickHouseOutputConfig{
Enabled: false,
BatchSize: 500,
FlushIntervalMs: 200,
MaxBufferSize: 5000,
DropOnOverflow: true,
AsyncInsert: true,
TimeoutMs: 1000,
},
Stdout: StdoutOutputConfig{Enabled: false},
},
Correlation: CorrelationConfig{
TimeWindowS: 1,
EmitOrphans: true,
},
}
}
// Validate validates the configuration.
func (c *Config) Validate() error {
if len(c.Inputs.UnixSockets) < 2 {
return fmt.Errorf("at least two unix socket inputs are required")
}
seenNames := make(map[string]struct{}, len(c.Inputs.UnixSockets))
seenPaths := make(map[string]struct{}, len(c.Inputs.UnixSockets))
for i, input := range c.Inputs.UnixSockets {
if strings.TrimSpace(input.Name) == "" {
return fmt.Errorf("inputs.unix_sockets[%d].name is required", i)
}
if strings.TrimSpace(input.Path) == "" {
return fmt.Errorf("inputs.unix_sockets[%d].path is required", i)
}
if _, exists := seenNames[input.Name]; exists {
return fmt.Errorf("duplicate unix socket input name: %s", input.Name)
}
seenNames[input.Name] = struct{}{}
if _, exists := seenPaths[input.Path]; exists {
return fmt.Errorf("duplicate unix socket input path: %s", input.Path)
}
seenPaths[input.Path] = struct{}{}
}
// At least one output must be enabled
hasOutput := false
if c.Outputs.File.Enabled && c.Outputs.File.Path != "" {
hasOutput = true
}
if c.Outputs.ClickHouse.Enabled {
hasOutput = true
}
if c.Outputs.Stdout.Enabled {
hasOutput = true
}
if !hasOutput {
return fmt.Errorf("at least one output must be enabled (file, clickhouse, or stdout)")
}
if c.Outputs.ClickHouse.Enabled {
if strings.TrimSpace(c.Outputs.ClickHouse.DSN) == "" {
return fmt.Errorf("clickhouse DSN is required when enabled")
}
if strings.TrimSpace(c.Outputs.ClickHouse.Table) == "" {
return fmt.Errorf("clickhouse table is required when enabled")
}
if c.Outputs.ClickHouse.BatchSize <= 0 {
return fmt.Errorf("clickhouse batch_size must be > 0")
}
if c.Outputs.ClickHouse.MaxBufferSize <= 0 {
return fmt.Errorf("clickhouse max_buffer_size must be > 0")
}
if c.Outputs.ClickHouse.TimeoutMs <= 0 {
return fmt.Errorf("clickhouse timeout_ms must be > 0")
}
}
if c.Correlation.TimeWindowS <= 0 {
return fmt.Errorf("correlation.time_window_s must be > 0")
}
return nil
}
// GetTimeWindow returns the time window as a duration.
// Deprecated: Use TimeWindow.GetDuration() instead.
func (c *CorrelationConfig) GetTimeWindow() time.Duration {
// New config takes precedence
if c.TimeWindow.Value > 0 {
return c.TimeWindow.GetDuration()
}
// Fallback to deprecated field
value := c.TimeWindowS
if value <= 0 {
value = 1
}
return time.Duration(value) * time.Second
}
// GetApacheAlwaysEmit returns whether to always emit Apache events.
func (c *CorrelationConfig) GetApacheAlwaysEmit() bool {
if c.OrphanPolicy.ApacheAlwaysEmit {
return true
}
// Fallback to deprecated field
return c.EmitOrphans
}
// GetApacheEmitDelayMs returns the delay in milliseconds before emitting orphan A events.
func (c *CorrelationConfig) GetApacheEmitDelayMs() int {
if c.OrphanPolicy.ApacheEmitDelayMs > 0 {
return c.OrphanPolicy.ApacheEmitDelayMs
}
return domain.DefaultApacheEmitDelayMs // Default: 500ms
}
// GetMatchingMode returns the matching mode.
func (c *CorrelationConfig) GetMatchingMode() string {
if c.Matching.Mode != "" {
return c.Matching.Mode
}
return "one_to_many" // Default to Keep-Alive
}
// GetMaxHTTPBufferSize returns the max HTTP buffer size.
func (c *CorrelationConfig) GetMaxHTTPBufferSize() int {
if c.Buffers.MaxHTTPItems > 0 {
return c.Buffers.MaxHTTPItems
}
return domain.DefaultMaxHTTPBufferSize
}
// GetMaxNetworkBufferSize returns the max network buffer size.
func (c *CorrelationConfig) GetMaxNetworkBufferSize() int {
if c.Buffers.MaxNetworkItems > 0 {
return c.Buffers.MaxNetworkItems
}
return domain.DefaultMaxNetworkBufferSize
}
// GetNetworkTTLS returns the network TTL in seconds.
func (c *CorrelationConfig) GetNetworkTTLS() int {
if c.TTL.NetworkTTLS > 0 {
return c.TTL.NetworkTTLS
}
return domain.DefaultNetworkTTLS
}
// GetSocketPermissions returns the socket permissions as os.FileMode.
// Default is 0666 (world read/write).
func (c *UnixSocketConfig) GetSocketPermissions() os.FileMode {
trimmed := strings.TrimSpace(c.SocketPermissions)
if trimmed == "" {
return 0666
}
// Parse octal string (e.g., "0660", "660", "0666")
perms, err := strconv.ParseUint(trimmed, 8, 32)
if err != nil {
return 0666
}
return os.FileMode(perms)
}
// GetIncludeDestPorts returns the list of destination ports allowed for correlation.
// An empty list means all ports are allowed.
func (c *CorrelationConfig) GetIncludeDestPorts() []int {
return c.IncludeDestPorts
}
// GetExcludeSourceIPs returns the list of excluded source IPs or CIDR ranges.
func (c *CorrelationConfig) GetExcludeSourceIPs() []string {
return c.ExcludeSourceIPs
}
// IsSourceIPExcluded checks if a source IP should be excluded.
// Supports both exact IP matches and CIDR ranges.
func (c *CorrelationConfig) IsSourceIPExcluded(ip string) bool {
if len(c.ExcludeSourceIPs) == 0 {
return false
}
// Parse the IP once
parsedIP := net.ParseIP(ip)
if parsedIP == nil {
return false // Invalid IP
}
for _, exclude := range c.ExcludeSourceIPs {
// Try CIDR first
if strings.Contains(exclude, "/") {
_, cidr, err := net.ParseCIDR(exclude)
if err != nil {
continue // Invalid CIDR, skip
}
if cidr.Contains(parsedIP) {
return true
}
} else {
// Exact IP match
if exclude == ip {
return true
}
// Also try parsing as IP (handles different formats like 192.168.1.1 vs 192.168.001.001)
if excludeIP := net.ParseIP(exclude); excludeIP != nil {
if excludeIP.Equal(parsedIP) {
return true
}
}
}
}
return false
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,151 @@
package domain
import (
"encoding/json"
"reflect"
"time"
)
// CorrelatedLog represents the output correlated log entry.
// All fields are flattened into a single-level structure.
type CorrelatedLog struct {
Timestamp time.Time `json:"timestamp"`
SrcIP string `json:"src_ip"`
SrcPort int `json:"src_port"`
DstIP string `json:"dst_ip,omitempty"`
DstPort int `json:"dst_port,omitempty"`
Correlated bool `json:"correlated"`
OrphanSide string `json:"orphan_side,omitempty"`
Fields map[string]any `json:"-"` // Additional fields, merged at marshal time
}
// MarshalJSON implements custom JSON marshaling to flatten the structure.
func (c CorrelatedLog) MarshalJSON() ([]byte, error) {
// Create a flat map with all fields
flat := make(map[string]any)
// Add core fields
flat["timestamp"] = c.Timestamp
flat["src_ip"] = c.SrcIP
flat["src_port"] = c.SrcPort
if c.DstIP != "" {
flat["dst_ip"] = c.DstIP
}
if c.DstPort != 0 {
flat["dst_port"] = c.DstPort
}
flat["correlated"] = c.Correlated
if c.OrphanSide != "" {
flat["orphan_side"] = c.OrphanSide
}
// Merge additional fields while preserving reserved keys
reservedKeys := map[string]struct{}{
"timestamp": {},
"src_ip": {},
"src_port": {},
"dst_ip": {},
"dst_port": {},
"correlated": {},
"orphan_side": {},
}
for k, v := range c.Fields {
if _, reserved := reservedKeys[k]; reserved {
continue
}
flat[k] = v
}
return json.Marshal(flat)
}
// NewCorrelatedLogFromEvent creates a correlated log from a single event (orphan).
func NewCorrelatedLogFromEvent(event *NormalizedEvent, orphanSide string) CorrelatedLog {
fields := extractFields(event)
if event.KeepAliveSeq > 0 {
fields["keepalives"] = event.KeepAliveSeq
}
return CorrelatedLog{
Timestamp: event.Timestamp,
SrcIP: event.SrcIP,
SrcPort: event.SrcPort,
DstIP: event.DstIP,
DstPort: event.DstPort,
Correlated: false,
OrphanSide: orphanSide,
Fields: fields,
}
}
// NewCorrelatedLog creates a correlated log from two matched events.
func NewCorrelatedLog(apacheEvent, networkEvent *NormalizedEvent) CorrelatedLog {
ts := apacheEvent.Timestamp
if networkEvent.Timestamp.After(ts) {
ts = networkEvent.Timestamp
}
fields := mergeFields(apacheEvent, networkEvent)
if apacheEvent.KeepAliveSeq > 0 {
fields["keepalives"] = apacheEvent.KeepAliveSeq
}
return CorrelatedLog{
Timestamp: ts,
SrcIP: apacheEvent.SrcIP,
SrcPort: apacheEvent.SrcPort,
DstIP: coalesceString(apacheEvent.DstIP, networkEvent.DstIP),
DstPort: coalesceInt(apacheEvent.DstPort, networkEvent.DstPort),
Correlated: true,
OrphanSide: "",
Fields: fields,
}
}
func extractFields(e *NormalizedEvent) map[string]any {
result := make(map[string]any)
for k, v := range e.Raw {
result[k] = v
}
return result
}
func mergeFields(a, b *NormalizedEvent) map[string]any {
result := make(map[string]any)
// Start with A fields
for k, v := range a.Raw {
result[k] = v
}
// Merge B fields with collision handling
for k, v := range b.Raw {
if existing, exists := result[k]; exists {
if reflect.DeepEqual(existing, v) {
continue
}
// Collision with different values: keep both with prefixes
delete(result, k)
result["a_"+k] = existing
result["b_"+k] = v
continue
}
result[k] = v
}
return result
}
func coalesceString(a, b string) string {
if a != "" {
return a
}
return b
}
func coalesceInt(a, b int) int {
if a != 0 {
return a
}
return b
}

View File

@ -0,0 +1,365 @@
package domain
import (
"encoding/json"
"testing"
"time"
)
func TestNormalizedEvent_CorrelationKey(t *testing.T) {
tests := []struct {
name string
event *NormalizedEvent
expected string
}{
{
name: "basic key",
event: &NormalizedEvent{
SrcIP: "192.168.1.1",
SrcPort: 8080,
},
expected: "192.168.1.1:8080",
},
{
name: "different port",
event: &NormalizedEvent{
SrcIP: "10.0.0.1",
SrcPort: 443,
},
expected: "10.0.0.1:443",
},
{
name: "port zero",
event: &NormalizedEvent{
SrcIP: "127.0.0.1",
SrcPort: 0,
},
expected: "127.0.0.1:0",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
key := tt.event.CorrelationKey()
if key != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, key)
}
})
}
}
func TestNewCorrelatedLogFromEvent(t *testing.T) {
event := &NormalizedEvent{
Source: SourceA,
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
SrcIP: "192.168.1.1",
SrcPort: 8080,
DstIP: "10.0.0.1",
DstPort: 80,
Raw: map[string]any{
"method": "GET",
"path": "/api/test",
},
}
log := NewCorrelatedLogFromEvent(event, "A")
if log.Correlated {
t.Error("expected correlated to be false")
}
if log.OrphanSide != "A" {
t.Errorf("expected orphan_side A, got %s", log.OrphanSide)
}
if log.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", log.SrcIP)
}
if log.Fields == nil {
t.Error("expected fields to be non-nil")
}
}
func TestNewCorrelatedLog(t *testing.T) {
apacheEvent := &NormalizedEvent{
Source: SourceA,
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
SrcIP: "192.168.1.1",
SrcPort: 8080,
DstIP: "10.0.0.1",
DstPort: 80,
Raw: map[string]any{"method": "GET"},
}
networkEvent := &NormalizedEvent{
Source: SourceB,
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 500000000, time.UTC),
SrcIP: "192.168.1.1",
SrcPort: 8080,
DstIP: "10.0.0.1",
DstPort: 80,
Raw: map[string]any{"ja3": "abc123"},
}
log := NewCorrelatedLog(apacheEvent, networkEvent)
if !log.Correlated {
t.Error("expected correlated to be true")
}
if log.OrphanSide != "" {
t.Errorf("expected orphan_side to be empty, got %s", log.OrphanSide)
}
if log.Fields == nil {
t.Error("expected fields to be non-nil")
}
}
// TestNewCorrelatedLog_TimestampSelectionAEarlier verifies that when A is earlier the later (B) timestamp is used.
func TestNewCorrelatedLog_TimestampSelectionAEarlier(t *testing.T) {
tsA := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
tsB := time.Date(2024, 1, 1, 12, 0, 1, 0, time.UTC) // B is later
a := &NormalizedEvent{Source: SourceA, Timestamp: tsA, SrcIP: "1.1.1.1", SrcPort: 100, Raw: map[string]any{}}
b := &NormalizedEvent{Source: SourceB, Timestamp: tsB, SrcIP: "1.1.1.1", SrcPort: 100, Raw: map[string]any{}}
log := NewCorrelatedLog(a, b)
if !log.Timestamp.Equal(tsB) {
t.Errorf("expected timestamp to be B's (later), got %v", log.Timestamp)
}
}
// TestNewCorrelatedLog_TimestampSelectionBEarlier verifies that when B is earlier, A's timestamp is used.
func TestNewCorrelatedLog_TimestampSelectionBEarlier(t *testing.T) {
tsA := time.Date(2024, 1, 1, 12, 0, 1, 0, time.UTC) // A is later
tsB := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
a := &NormalizedEvent{Source: SourceA, Timestamp: tsA, SrcIP: "1.1.1.1", SrcPort: 100, Raw: map[string]any{}}
b := &NormalizedEvent{Source: SourceB, Timestamp: tsB, SrcIP: "1.1.1.1", SrcPort: 100, Raw: map[string]any{}}
log := NewCorrelatedLog(a, b)
// The later timestamp wins. Since B is not After A, ts stays as A's timestamp.
if !log.Timestamp.Equal(tsA) {
t.Errorf("expected timestamp to be A's (later), got %v", log.Timestamp)
}
}
// TestNewCorrelatedLog_TimestampEqual verifies equal timestamps yield A's timestamp.
func TestNewCorrelatedLog_TimestampEqual(t *testing.T) {
ts := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
a := &NormalizedEvent{Source: SourceA, Timestamp: ts, SrcIP: "1.1.1.1", SrcPort: 100, Raw: map[string]any{}}
b := &NormalizedEvent{Source: SourceB, Timestamp: ts, SrcIP: "1.1.1.1", SrcPort: 100, Raw: map[string]any{}}
log := NewCorrelatedLog(a, b)
if !log.Timestamp.Equal(ts) {
t.Errorf("expected timestamp to be equal to both events' timestamp, got %v", log.Timestamp)
}
}
// TestNewCorrelatedLogFromEvent_WithKeepAlive verifies keepalives field is added when KeepAliveSeq > 0.
func TestNewCorrelatedLogFromEvent_WithKeepAlive(t *testing.T) {
event := &NormalizedEvent{
Source: SourceA,
Timestamp: time.Now(),
SrcIP: "1.1.1.1",
SrcPort: 9999,
KeepAliveSeq: 3,
Raw: map[string]any{"method": "GET"},
}
log := NewCorrelatedLogFromEvent(event, "A")
if log.Fields["keepalives"] != 3 {
t.Errorf("expected keepalives=3, got %v", log.Fields["keepalives"])
}
}
// TestNewCorrelatedLogFromEvent_NoKeepAlive verifies keepalives field is absent when KeepAliveSeq == 0.
func TestNewCorrelatedLogFromEvent_NoKeepAlive(t *testing.T) {
event := &NormalizedEvent{
Source: SourceA,
Timestamp: time.Now(),
SrcIP: "1.1.1.1",
SrcPort: 9999,
KeepAliveSeq: 0,
Raw: map[string]any{"method": "GET"},
}
log := NewCorrelatedLogFromEvent(event, "A")
if _, ok := log.Fields["keepalives"]; ok {
t.Error("keepalives field should not be present when KeepAliveSeq == 0")
}
}
// TestMergeFields_NoCollision verifies fields from A and B are merged without conflict.
func TestMergeFields_NoCollision(t *testing.T) {
a := &NormalizedEvent{Raw: map[string]any{"method": "GET", "path": "/foo"}}
b := &NormalizedEvent{Raw: map[string]any{"ja4": "abc123", "proto": "TLS"}}
fields := mergeFields(a, b)
if fields["method"] != "GET" {
t.Errorf("expected method=GET, got %v", fields["method"])
}
if fields["ja4"] != "abc123" {
t.Errorf("expected ja4=abc123, got %v", fields["ja4"])
}
}
// TestMergeFields_SameValueNoPrefix verifies same-value fields are not prefixed.
func TestMergeFields_SameValueNoPrefix(t *testing.T) {
a := &NormalizedEvent{Raw: map[string]any{"proto": "TCP"}}
b := &NormalizedEvent{Raw: map[string]any{"proto": "TCP"}}
fields := mergeFields(a, b)
if fields["proto"] != "TCP" {
t.Errorf("expected proto=TCP (no prefix), got %v", fields["proto"])
}
if _, ok := fields["a_proto"]; ok {
t.Error("a_proto should not exist for same-value collision")
}
if _, ok := fields["b_proto"]; ok {
t.Error("b_proto should not exist for same-value collision")
}
}
// TestMergeFields_DifferentValuePrefix verifies different-value fields get a_/b_ prefix.
func TestMergeFields_DifferentValuePrefix(t *testing.T) {
a := &NormalizedEvent{Raw: map[string]any{"port": 80}}
b := &NormalizedEvent{Raw: map[string]any{"port": 443}}
fields := mergeFields(a, b)
if fields["a_port"] != 80 {
t.Errorf("expected a_port=80, got %v", fields["a_port"])
}
if fields["b_port"] != 443 {
t.Errorf("expected b_port=443, got %v", fields["b_port"])
}
if _, ok := fields["port"]; ok {
t.Error("original 'port' key should be removed on collision")
}
}
// TestCoalesceString_EmptyA tests that when a is empty, b is returned.
func TestCoalesceString_EmptyA(t *testing.T) {
result := coalesceString("", "fallback")
if result != "fallback" {
t.Errorf("expected 'fallback', got %q", result)
}
}
// TestCoalesceString_NonEmptyA tests that when a is non-empty, a is returned.
func TestCoalesceString_NonEmptyA(t *testing.T) {
result := coalesceString("primary", "fallback")
if result != "primary" {
t.Errorf("expected 'primary', got %q", result)
}
}
// TestCoalesceInt_ZeroA tests that when a is zero, b is returned.
func TestCoalesceInt_ZeroA(t *testing.T) {
result := coalesceInt(0, 443)
if result != 443 {
t.Errorf("expected 443, got %d", result)
}
}
// TestCoalesceInt_NonZeroA tests that when a is non-zero, a is returned.
func TestCoalesceInt_NonZeroA(t *testing.T) {
result := coalesceInt(80, 443)
if result != 80 {
t.Errorf("expected 80, got %d", result)
}
}
// TestMarshalJSON_ReservedKeyProtection verifies reserved keys in Fields are not overwritten.
func TestMarshalJSON_ReservedKeyProtection(t *testing.T) {
log := CorrelatedLog{
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
SrcIP: "1.2.3.4",
SrcPort: 1234,
Correlated: true,
Fields: map[string]any{
"src_ip": "EVIL_OVERRIDE", // should be ignored
"correlated": false, // should be ignored
"extra": "value",
},
}
data, err := json.Marshal(log)
if err != nil {
t.Fatalf("MarshalJSON failed: %v", err)
}
var flat map[string]any
if err := json.Unmarshal(data, &flat); err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if flat["src_ip"] != "1.2.3.4" {
t.Errorf("reserved key src_ip should not be overwritten, got %v", flat["src_ip"])
}
if flat["correlated"] != true {
t.Errorf("reserved key correlated should not be overwritten, got %v", flat["correlated"])
}
if flat["extra"] != "value" {
t.Errorf("non-reserved key extra should be present, got %v", flat["extra"])
}
}
// TestMarshalJSON_OptionalFieldsOmittedWhenZero verifies DstIP/DstPort are omitted when zero.
func TestMarshalJSON_OptionalFieldsOmittedWhenZero(t *testing.T) {
log := CorrelatedLog{
Timestamp: time.Now(),
SrcIP: "1.2.3.4",
SrcPort: 1234,
Correlated: false,
}
data, err := json.Marshal(log)
if err != nil {
t.Fatalf("MarshalJSON failed: %v", err)
}
var flat map[string]any
if err := json.Unmarshal(data, &flat); err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if _, ok := flat["dst_ip"]; ok {
t.Error("dst_ip should be omitted when empty")
}
if _, ok := flat["dst_port"]; ok {
t.Error("dst_port should be omitted when zero")
}
if _, ok := flat["orphan_side"]; ok {
t.Error("orphan_side should be omitted when empty")
}
}
// TestExtractFields_Basic verifies extractFields copies Raw fields.
func TestExtractFields_Basic(t *testing.T) {
e := &NormalizedEvent{
Raw: map[string]any{"key1": "val1", "key2": 42},
}
fields := extractFields(e)
if fields["key1"] != "val1" {
t.Errorf("expected key1=val1, got %v", fields["key1"])
}
if fields["key2"] != 42 {
t.Errorf("expected key2=42, got %v", fields["key2"])
}
}
// TestNewCorrelatedLog_KeepAliveSeq verifies keepalives is set from apache event.
func TestNewCorrelatedLog_KeepAliveSeq(t *testing.T) {
a := &NormalizedEvent{
Source: SourceA, Timestamp: time.Now(), SrcIP: "1.1.1.1", SrcPort: 100,
KeepAliveSeq: 5,
Raw: map[string]any{},
}
b := &NormalizedEvent{
Source: SourceB, Timestamp: time.Now(), SrcIP: "1.1.1.1", SrcPort: 100,
Raw: map[string]any{},
}
log := NewCorrelatedLog(a, b)
if log.Fields["keepalives"] != 5 {
t.Errorf("expected keepalives=5, got %v", log.Fields["keepalives"])
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,33 @@
package domain
import (
"strconv"
"time"
)
// EventSource identifies the source of an event.
type EventSource string
const (
SourceA EventSource = "A" // Apache/HTTP source
SourceB EventSource = "B" // Network source
)
// NormalizedEvent represents a unified internal event from either source.
type NormalizedEvent struct {
Source EventSource
Timestamp time.Time
SrcIP string
SrcPort int
DstIP string
DstPort int
Headers map[string]string
Extra map[string]any
Raw map[string]any // Original raw data
KeepAliveSeq int // Request sequence number within the Keep-Alive connection (1-based)
}
// CorrelationKey returns the key used for correlation (src_ip + src_port).
func (e *NormalizedEvent) CorrelationKey() string {
return e.SrcIP + ":" + strconv.Itoa(e.SrcPort)
}

View File

@ -0,0 +1,25 @@
// Package observability provides structured logging for the correlator service.
// Implementation is delegated to shared/go/ja4common/logger to avoid duplication.
package observability
import jalogger "github.com/antitbone/ja4/ja4common/logger"
// Type aliases — all existing correlator code compiles unchanged.
type Logger = jalogger.Logger
type LogLevel = jalogger.LogLevel
const (
DEBUG LogLevel = jalogger.DEBUG
INFO LogLevel = jalogger.INFO
WARN LogLevel = jalogger.WARN
ERROR LogLevel = jalogger.ERROR
)
// NewLogger creates a new Logger with INFO level.
func NewLogger(prefix string) *Logger { return jalogger.New(prefix) }
// NewLoggerWithLevel creates a new Logger with the specified minimum level.
func NewLoggerWithLevel(prefix, level string) *Logger { return jalogger.NewWithLevel(prefix, level) }
// ParseLogLevel converts a string to LogLevel.
func ParseLogLevel(level string) LogLevel { return jalogger.ParseLogLevel(level) }

View File

@ -0,0 +1,296 @@
// Package observability tests — behavioral tests for the Logger type alias.
// Since Logger = jalogger.Logger, we test the observable API only.
package observability_test
import (
"testing"
"github.com/antitbone/ja4/correlator/internal/observability"
)
func TestNewLogger_NonNil(t *testing.T) {
logger := observability.NewLogger("test")
if logger == nil {
t.Fatal("expected non-nil logger")
}
}
func TestLogger_DefaultLevel_IsInfo(t *testing.T) {
logger := observability.NewLogger("test")
if !logger.ShouldLog(observability.INFO) {
t.Error("INFO should be enabled by default")
}
if logger.ShouldLog(observability.DEBUG) {
t.Error("DEBUG should be disabled by default")
}
}
func TestLogger_Info_NoPanic(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "INFO")
if !logger.ShouldLog(observability.INFO) {
t.Error("INFO should be enabled")
}
logger.Info("test message")
}
func TestLogger_Error_NoPanic(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "ERROR")
if !logger.ShouldLog(observability.ERROR) {
t.Error("ERROR should be enabled")
}
logger.Error("error message", nil)
}
func TestLogger_Debug_NoPanic(t *testing.T) {
logger := observability.NewLogger("test")
logger.SetLevel("DEBUG")
if !logger.ShouldLog(observability.DEBUG) {
t.Error("DEBUG should be enabled after SetLevel(DEBUG)")
}
logger.Debug("test message")
}
func TestLogger_SetLevel(t *testing.T) {
logger := observability.NewLogger("test")
logger.SetLevel("DEBUG")
if !logger.ShouldLog(observability.DEBUG) {
t.Error("DEBUG should be enabled after SetLevel(DEBUG)")
}
logger.SetLevel("INFO")
if logger.ShouldLog(observability.DEBUG) {
t.Error("DEBUG should be disabled after SetLevel(INFO)")
}
logger.SetLevel("WARN")
if logger.ShouldLog(observability.INFO) {
t.Error("INFO should be disabled after SetLevel(WARN)")
}
if !logger.ShouldLog(observability.WARN) {
t.Error("WARN should be enabled after SetLevel(WARN)")
}
logger.SetLevel("ERROR")
if logger.ShouldLog(observability.WARN) {
t.Error("WARN should be disabled after SetLevel(ERROR)")
}
if !logger.ShouldLog(observability.ERROR) {
t.Error("ERROR should be enabled after SetLevel(ERROR)")
}
}
func TestParseLogLevel(t *testing.T) {
cases := []struct {
input string
expected observability.LogLevel
}{
{"DEBUG", observability.DEBUG},
{"debug", observability.DEBUG},
{"INFO", observability.INFO},
{"info", observability.INFO},
{"WARN", observability.WARN},
{"warn", observability.WARN},
{"WARNING", observability.WARN},
{"ERROR", observability.ERROR},
{"error", observability.ERROR},
{"", observability.INFO},
{"invalid", observability.INFO},
}
for _, tt := range cases {
t.Run(tt.input, func(t *testing.T) {
result := observability.ParseLogLevel(tt.input)
if result != tt.expected {
t.Errorf("ParseLogLevel(%q) = %v, want %v", tt.input, result, tt.expected)
}
})
}
}
func TestLogger_WithFields_NoPanic(t *testing.T) {
logger := observability.NewLogger("test")
child := logger.WithFields(map[string]any{"key1": "value1", "key2": 42})
if child == logger {
t.Error("expected different logger instance")
}
child.Info("message with fields")
}
func TestLogLevel_String(t *testing.T) {
cases := []struct {
level observability.LogLevel
expected string
}{
{observability.DEBUG, "DEBUG"},
{observability.INFO, "INFO"},
{observability.WARN, "WARN"},
{observability.ERROR, "ERROR"},
}
for _, tt := range cases {
t.Run(tt.expected, func(t *testing.T) {
if got := tt.level.String(); got != tt.expected {
t.Errorf("LogLevel(%d).String() = %q, want %q", tt.level, got, tt.expected)
}
})
}
}
func TestLogger_Warn_NoPanic(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "WARN")
if !logger.ShouldLog(observability.WARN) {
t.Error("WARN should be enabled")
}
logger.Warn("warning message")
}
func TestLogger_Formatted_NoPanic(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "DEBUG")
logger.Warnf("formatted %s %d", "message", 42)
logger.Infof("formatted %s %d", "message", 42)
logger.Debugf("formatted %s %d", "message", 42)
}
func TestLogger_Error_WithError(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "ERROR")
logger.Error("error occurred", &testErr{"test error"})
}
func TestLogger_ShouldLog_Concurrent(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "DEBUG")
done := make(chan bool)
for i := 0; i < 10; i++ {
go func() {
_ = logger.ShouldLog(observability.DEBUG)
done <- true
}()
}
for i := 0; i < 10; i++ {
<-done
}
}
func TestLogger_Log_Concurrent(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "DEBUG")
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
logger.Debugf("message %d", n)
done <- true
}(i)
}
for i := 0; i < 10; i++ {
<-done
}
}
func TestLogger_WithFields_Concurrent(t *testing.T) {
logger := observability.NewLogger("test")
done := make(chan bool)
for i := 0; i < 10; i++ {
go func(n int) {
_ = logger.WithFields(map[string]any{"key": n})
done <- true
}(i)
}
for i := 0; i < 10; i++ {
<-done
}
}
func TestLogger_SetLevel_Concurrent(t *testing.T) {
logger := observability.NewLogger("test")
done := make(chan bool)
for i := 0; i < 10; i++ {
go func() {
logger.SetLevel("DEBUG")
logger.SetLevel("INFO")
done <- true
}()
}
for i := 0; i < 10; i++ {
<-done
}
}
type testErr struct{ msg string }
func (e *testErr) Error() string { return e.msg }
func TestNewLoggerWithLevel_AllLevels(t *testing.T) {
levels := []string{"DEBUG", "INFO", "WARN", "WARNING", "ERROR", "invalid", ""}
for _, level := range levels {
t.Run(level, func(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", level)
if logger == nil {
t.Errorf("NewLoggerWithLevel(%q) returned nil", level)
}
})
}
}
func TestLogLevel_Constants(t *testing.T) {
if observability.DEBUG >= observability.INFO {
t.Error("DEBUG should be less than INFO")
}
if observability.INFO >= observability.WARN {
t.Error("INFO should be less than WARN")
}
if observability.WARN >= observability.ERROR {
t.Error("WARN should be less than ERROR")
}
}
func TestLogger_ShouldLog_AllLevels(t *testing.T) {
cases := []struct {
minLevel string
level observability.LogLevel
want bool
}{
{"DEBUG", observability.DEBUG, true},
{"DEBUG", observability.INFO, true},
{"DEBUG", observability.WARN, true},
{"DEBUG", observability.ERROR, true},
{"INFO", observability.DEBUG, false},
{"INFO", observability.INFO, true},
{"INFO", observability.WARN, true},
{"WARN", observability.INFO, false},
{"WARN", observability.WARN, true},
{"WARN", observability.ERROR, true},
{"ERROR", observability.WARN, false},
{"ERROR", observability.ERROR, true},
}
for _, tc := range cases {
t.Run(tc.minLevel+"_"+tc.level.String(), func(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", tc.minLevel)
got := logger.ShouldLog(tc.level)
if got != tc.want {
t.Errorf("ShouldLog(%v) with min=%s: expected %v, got %v",
tc.level, tc.minLevel, tc.want, got)
}
})
}
}
func TestParseLogLevel_WarningAlias(t *testing.T) {
got := observability.ParseLogLevel("WARNING")
if got != observability.WARN {
t.Errorf("ParseLogLevel(WARNING) = %v, want WARN", got)
}
}
func TestLogger_Errorf_NoPanic(t *testing.T) {
logger := observability.NewLoggerWithLevel("test", "DEBUG")
// Errorf is not defined in the interface, but Warnf/Infof/Debugf are tested
// Just ensure Error with a formatted message doesn't panic
logger.Error("formatted error", &testErr{"err detail"})
}
func TestNewLogger_PrefixIsUsed(t *testing.T) {
logger := observability.NewLogger("my-prefix")
if logger == nil {
t.Fatal("expected non-nil logger")
}
// The logger should be usable
logger.Infof("hello from %s", "my-prefix")
}

View File

@ -0,0 +1,176 @@
package observability
import (
"encoding/json"
"fmt"
"strings"
"sync"
"sync/atomic"
)
// CorrelationMetrics tracks correlation statistics for debugging and monitoring.
type CorrelationMetrics struct {
mu sync.RWMutex
// Events received
eventsReceivedA atomic.Int64
eventsReceivedB atomic.Int64
// Correlation results
correlationsSuccess atomic.Int64
correlationsFailed atomic.Int64
// Failure reasons
failedNoMatchKey atomic.Int64 // No event with same key in buffer
failedTimeWindow atomic.Int64 // Key found but outside time window
failedBufferEviction atomic.Int64 // Event evicted due to buffer full
failedTTLExpired atomic.Int64 // B event TTL expired before match
failedIPExcluded atomic.Int64 // Event excluded by IP filter
// Buffer stats
bufferASize atomic.Int64
bufferBSize atomic.Int64
// Orphan stats
orphansEmittedA atomic.Int64
orphansEmittedB atomic.Int64
orphansPendingA atomic.Int64
pendingOrphanMatch atomic.Int64 // B matched with pending orphan A
// Keep-Alive stats
keepAliveResets atomic.Int64 // Number of TTL resets (one-to-many mode)
}
// NewCorrelationMetrics creates a new metrics tracker.
func NewCorrelationMetrics() *CorrelationMetrics {
return &CorrelationMetrics{}
}
// RecordEventReceived records an event received from a source.
func (m *CorrelationMetrics) RecordEventReceived(source string) {
if source == "A" {
m.eventsReceivedA.Add(1)
} else if source == "B" {
m.eventsReceivedB.Add(1)
}
}
// RecordCorrelationSuccess records a successful correlation.
func (m *CorrelationMetrics) RecordCorrelationSuccess() {
m.correlationsSuccess.Add(1)
}
// RecordCorrelationFailed records a failed correlation attempt with the reason.
func (m *CorrelationMetrics) RecordCorrelationFailed(reason string) {
m.correlationsFailed.Add(1)
switch reason {
case "no_match_key":
m.failedNoMatchKey.Add(1)
case "time_window":
m.failedTimeWindow.Add(1)
case "buffer_eviction":
m.failedBufferEviction.Add(1)
case "ttl_expired":
m.failedTTLExpired.Add(1)
case "ip_excluded":
m.failedIPExcluded.Add(1)
}
}
// RecordBufferEviction records an event evicted from buffer.
func (m *CorrelationMetrics) RecordBufferEviction(source string) {
// Can be used for additional tracking if needed
}
// RecordOrphanEmitted records an orphan event emitted.
func (m *CorrelationMetrics) RecordOrphanEmitted(source string) {
if source == "A" {
m.orphansEmittedA.Add(1)
} else if source == "B" {
m.orphansEmittedB.Add(1)
}
}
// RecordPendingOrphan records an A event added to pending orphans.
func (m *CorrelationMetrics) RecordPendingOrphan() {
m.orphansPendingA.Add(1)
}
// RecordPendingOrphanMatch records a B event matching a pending orphan A.
func (m *CorrelationMetrics) RecordPendingOrphanMatch() {
m.pendingOrphanMatch.Add(1)
}
// RecordKeepAliveReset records a TTL reset for Keep-Alive.
func (m *CorrelationMetrics) RecordKeepAliveReset() {
m.keepAliveResets.Add(1)
}
// UpdateBufferSizes updates the current buffer sizes.
func (m *CorrelationMetrics) UpdateBufferSizes(sizeA, sizeB int64) {
m.bufferASize.Store(sizeA)
m.bufferBSize.Store(sizeB)
}
// Snapshot returns a point-in-time snapshot of all metrics.
func (m *CorrelationMetrics) Snapshot() MetricsSnapshot {
return MetricsSnapshot{
EventsReceivedA: m.eventsReceivedA.Load(),
EventsReceivedB: m.eventsReceivedB.Load(),
CorrelationsSuccess: m.correlationsSuccess.Load(),
CorrelationsFailed: m.correlationsFailed.Load(),
FailedNoMatchKey: m.failedNoMatchKey.Load(),
FailedTimeWindow: m.failedTimeWindow.Load(),
FailedBufferEviction: m.failedBufferEviction.Load(),
FailedTTLExpired: m.failedTTLExpired.Load(),
FailedIPExcluded: m.failedIPExcluded.Load(),
BufferASize: m.bufferASize.Load(),
BufferBSize: m.bufferBSize.Load(),
OrphansEmittedA: m.orphansEmittedA.Load(),
OrphansEmittedB: m.orphansEmittedB.Load(),
OrphansPendingA: m.orphansPendingA.Load(),
PendingOrphanMatch: m.pendingOrphanMatch.Load(),
KeepAliveResets: m.keepAliveResets.Load(),
}
}
// MetricsSnapshot is a point-in-time snapshot of metrics.
type MetricsSnapshot struct {
EventsReceivedA int64 `json:"events_received_a"`
EventsReceivedB int64 `json:"events_received_b"`
CorrelationsSuccess int64 `json:"correlations_success"`
CorrelationsFailed int64 `json:"correlations_failed"`
FailedNoMatchKey int64 `json:"failed_no_match_key"`
FailedTimeWindow int64 `json:"failed_time_window"`
FailedBufferEviction int64 `json:"failed_buffer_eviction"`
FailedTTLExpired int64 `json:"failed_ttl_expired"`
FailedIPExcluded int64 `json:"failed_ip_excluded"`
BufferASize int64 `json:"buffer_a_size"`
BufferBSize int64 `json:"buffer_b_size"`
OrphansEmittedA int64 `json:"orphans_emitted_a"`
OrphansEmittedB int64 `json:"orphans_emitted_b"`
OrphansPendingA int64 `json:"orphans_pending_a"`
PendingOrphanMatch int64 `json:"pending_orphan_match"`
KeepAliveResets int64 `json:"keepalive_resets"`
}
// MarshalJSON implements json.Marshaler.
func (m *CorrelationMetrics) MarshalJSON() ([]byte, error) {
return json.Marshal(m.Snapshot())
}
// String returns a human-readable string of metrics.
func (m *CorrelationMetrics) String() string {
s := m.Snapshot()
var b strings.Builder
b.WriteString("Correlation Metrics:\n")
fmt.Fprintf(&b, " Events Received: A=%d B=%d Total=%d\n", s.EventsReceivedA, s.EventsReceivedB, s.EventsReceivedA+s.EventsReceivedB)
fmt.Fprintf(&b, " Correlations: Success=%d Failed=%d\n", s.CorrelationsSuccess, s.CorrelationsFailed)
fmt.Fprintf(&b, " Failure Reasons: no_match_key=%d time_window=%d buffer_eviction=%d ttl_expired=%d ip_excluded=%d\n",
s.FailedNoMatchKey, s.FailedTimeWindow, s.FailedBufferEviction, s.FailedTTLExpired, s.FailedIPExcluded)
fmt.Fprintf(&b, " Buffer Sizes: A=%d B=%d\n", s.BufferASize, s.BufferBSize)
fmt.Fprintf(&b, " Orphans: Emitted A=%d B=%d Pending A=%d\n", s.OrphansEmittedA, s.OrphansEmittedB, s.OrphansPendingA)
fmt.Fprintf(&b, " Pending Orphan Match: %d\n", s.PendingOrphanMatch)
fmt.Fprintf(&b, " Keep-Alive Resets: %d\n", s.KeepAliveResets)
return b.String()
}

View File

@ -0,0 +1,128 @@
package observability
import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"sync"
"time"
)
// MetricsServer exposes correlation metrics via HTTP.
type MetricsServer struct {
mu sync.Mutex
server *http.Server
listener net.Listener
metricsFunc func() MetricsSnapshot
running bool
}
// NewMetricsServer creates a new metrics HTTP server.
func NewMetricsServer(addr string, metricsFunc func() MetricsSnapshot) (*MetricsServer, error) {
if metricsFunc == nil {
return nil, fmt.Errorf("metricsFunc cannot be nil")
}
ms := &MetricsServer{
metricsFunc: metricsFunc,
}
mux := http.NewServeMux()
mux.HandleFunc("/metrics", ms.handleMetrics)
mux.HandleFunc("/health", ms.handleHealth)
ms.server = &http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: 5 * time.Second,
WriteTimeout: 10 * time.Second,
}
return ms, nil
}
// Start begins listening on the configured address.
func (ms *MetricsServer) Start() error {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.running {
return nil
}
listener, err := net.Listen("tcp", ms.server.Addr)
if err != nil {
return fmt.Errorf("failed to start metrics server: %w", err)
}
ms.listener = listener
ms.running = true
go func() {
if err := ms.server.Serve(listener); err != nil && err != http.ErrServerClosed {
// Server error or closed
}
}()
return nil
}
// Stop gracefully stops the metrics server.
func (ms *MetricsServer) Stop(ctx context.Context) error {
ms.mu.Lock()
defer ms.mu.Unlock()
if !ms.running {
return nil
}
ms.running = false
return ms.server.Shutdown(ctx)
}
// handleMetrics returns the correlation metrics as JSON.
func (ms *MetricsServer) handleMetrics(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
metrics := ms.metricsFunc()
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(metrics); err != nil {
http.Error(w, "Failed to encode metrics", http.StatusInternalServerError)
return
}
}
// handleHealth returns a simple health check response.
func (ms *MetricsServer) handleHealth(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, `{"status":"healthy"}`)
}
// IsRunning returns true if the server is running.
func (ms *MetricsServer) IsRunning() bool {
ms.mu.Lock()
defer ms.mu.Unlock()
return ms.running
}
// Addr returns the listening address.
func (ms *MetricsServer) Addr() string {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.listener == nil {
return ""
}
return ms.listener.Addr().String()
}

View File

@ -0,0 +1,57 @@
package ports
import (
"context"
"github.com/antitbone/ja4/correlator/internal/domain"
)
// EventSource defines the interface for log sources.
type EventSource interface {
// Start begins reading events and sending them to the channel.
// Returns an error if the source cannot be started.
Start(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) error
// Stop gracefully stops the source.
Stop() error
// Name returns the source name.
Name() string
}
// CorrelatedLogSink defines the interface for correlated log destinations.
type CorrelatedLogSink interface {
// Write sends a correlated log to the sink.
Write(ctx context.Context, log domain.CorrelatedLog) error
// Flush flushes any buffered logs.
Flush(ctx context.Context) error
// Close closes the sink.
Close() error
// Name returns the sink name.
Name() string
// Reopen closes and reopens the sink (for log rotation on SIGHUP).
// Optional: only FileSink implements this.
Reopen() error
}
// CorrelationProcessor defines the interface for the correlation service.
// This allows for easier testing and alternative implementations.
type CorrelationProcessor interface {
// ProcessEvent processes an incoming event and returns correlated logs.
ProcessEvent(event *domain.NormalizedEvent) []domain.CorrelatedLog
// Flush forces emission of remaining buffered events.
Flush() []domain.CorrelatedLog
// EmitPendingOrphans emits orphan A events whose delay has expired.
// Called periodically by the Orchestrator ticker so orphans are not blocked
// waiting for the next incoming event.
EmitPendingOrphans() []domain.CorrelatedLog
// GetBufferSizes returns the current buffer sizes for monitoring.
GetBufferSizes() (int, int)
}

View File

@ -0,0 +1,34 @@
[Unit]
Description=logcorrelator service
After=network.target
[Service]
Type=simple
User=logcorrelator
Group=logcorrelator
ExecStart=/usr/bin/logcorrelator -config /etc/logcorrelator/logcorrelator.yml
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=5
# Runtime directory: systemd crée /run/logcorrelator (= /var/run/logcorrelator)
# avec le bon propriétaire (logcorrelator:logcorrelator) à chaque démarrage/restart,
# ce qui évite que les sockets se retrouvent en root:root après un reboot (tmpfs vidé).
RuntimeDirectory=logcorrelator
RuntimeDirectoryMode=0755
# Security hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/log/logcorrelator /etc/logcorrelator
# Resource limits
LimitNOFILE=65536
# Systemd timeouts
TimeoutStartSec=10
TimeoutStopSec=30
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,383 @@
# logcorrelator RPM spec file
# Compatible with CentOS 7, Rocky Linux 8, 9, 10
# Built with rpmbuild (not FPM)
Name: logcorrelator
Version: %{version}
Release: 1%{?dist}
Summary: Log correlation service for HTTP and network events
License: MIT
URL: https://github.com/logcorrelator/logcorrelator
Vendor: logcorrelator <dev@example.com>
Packager: logcorrelator <dev@example.com>
BuildArch: x86_64
# Dependencies
Requires: systemd
Requires(post): systemd
Requires(preun): systemd
Requires(postun): systemd
%description
logcorrelator est un service système écrit en Go qui reçoit deux flux de logs JSON
via des sockets Unix, corrèle les événements HTTP applicatifs avec des événements
réseau, et produit des logs corrélés en temps réel vers ClickHouse et/ou fichier local.
Notes de sécurité :
- Le service s'exécute sous l'utilisateur logcorrelator (non-root)
- Les sockets Unix sont créés avec des permissions 0666 (world read/write)
- Les répertoires critiques sont protégés : /var/log (750), /var/lib (750), /etc (750)
- /var/run/logcorrelator est en 755 pour permettre la création de sockets
%prep
# Files are already in BUILD directory (copied by build-rpm.sh)
# No extraction needed
echo "Files available in BUILD directory:"
ls -la %{_builddir}/
%install
# Create directory structure in buildroot
mkdir -p %{buildroot}/usr/bin
mkdir -p %{buildroot}/etc/logcorrelator
mkdir -p %{buildroot}/var/log/logcorrelator
mkdir -p %{buildroot}/var/run/logcorrelator
mkdir -p %{buildroot}/var/lib/logcorrelator
mkdir -p %{buildroot}/etc/systemd/system
mkdir -p %{buildroot}/etc/logrotate.d
mkdir -p %{buildroot}/usr/lib/tmpfiles.d
# Install binary (from BUILD directory)
install -m 0755 %{_builddir}/usr/bin/logcorrelator %{buildroot}/usr/bin/logcorrelator
# Install config files
install -m 0640 %{_builddir}/etc/logcorrelator/logcorrelator.yml %{buildroot}/etc/logcorrelator/logcorrelator.yml
install -m 0640 %{_builddir}/etc/logcorrelator/logcorrelator.yml.example %{buildroot}/etc/logcorrelator/logcorrelator.yml.example
# Install systemd service
install -m 0644 %{_builddir}/etc/systemd/system/logcorrelator.service %{buildroot}/etc/systemd/system/logcorrelator.service
# Install logrotate config
install -m 0644 %{_builddir}/etc/logrotate.d/logcorrelator %{buildroot}/etc/logrotate.d/logcorrelator
%post
# Create logcorrelator user and group
if ! getent group logcorrelator >/dev/null 2>&1; then
groupadd --system logcorrelator
fi
if ! getent passwd logcorrelator >/dev/null 2>&1; then
useradd --system \
--gid logcorrelator \
--home-dir /var/lib/logcorrelator \
--no-create-home \
--shell /usr/sbin/nologin \
logcorrelator
fi
# Create directories
mkdir -p /var/lib/logcorrelator
mkdir -p /var/log/logcorrelator
# Note: /var/run/logcorrelator est géré par RuntimeDirectory= (systemd) et tmpfiles.d
# Set ownership
chown -R logcorrelator:logcorrelator /var/lib/logcorrelator
chown -R logcorrelator:logcorrelator /var/log/logcorrelator
chown -R logcorrelator:logcorrelator /etc/logcorrelator
# Set permissions
chmod 750 /var/lib/logcorrelator
chmod 750 /var/log/logcorrelator
chmod 750 /etc/logcorrelator
# Copy default config if not exists
if [ ! -f /etc/logcorrelator/logcorrelator.yml ]; then
cp /etc/logcorrelator/logcorrelator.yml.example /etc/logcorrelator/logcorrelator.yml
chown logcorrelator:logcorrelator /etc/logcorrelator/logcorrelator.yml
chmod 640 /etc/logcorrelator/logcorrelator.yml
fi
# Reload systemd and start service
if [ -x /bin/systemctl ]; then
systemctl daemon-reload
systemctl enable logcorrelator.service
systemctl start logcorrelator.service
fi
exit 0
%preun
if [ $1 -eq 0 ]; then
# Package removal, not upgrade
if [ -x /bin/systemctl ]; then
systemctl stop logcorrelator.service
systemctl disable logcorrelator.service
fi
fi
exit 0
%postun
if [ -x /bin/systemctl ]; then
systemctl daemon-reload
if [ $1 -ge 1 ]; then
# Package upgrade, restart service
systemctl try-restart logcorrelator.service
fi
fi
exit 0
%files
/usr/bin/logcorrelator
%config(noreplace) /etc/logcorrelator/logcorrelator.yml
/etc/logcorrelator/logcorrelator.yml.example
/var/log/logcorrelator
/var/lib/logcorrelator
/etc/systemd/system/logcorrelator.service
%config(noreplace) /etc/logrotate.d/logcorrelator
%changelog
* Wed Mar 11 2026 logcorrelator <dev@example.com> - 1.1.22-1
- Feat(outputs): file output enabled/disabled toggle
Ajout du champ enabled: true/false dans outputs.file de la configuration.
Le sink fichier n'est cree que si enabled: true ET path: defini.
Permet de desactiver completement la sortie fichier tout en gardant stdout/clickhouse.
Tests: TestValidate_FileOutputDisabled, TestLoadConfig_FileOutputDisabled
- Fix(systemd): arret immediat sans vidage de queue
orchestrator.Stop() ne vide plus les buffers (events en transit perdus).
Suppression de ShutdownTimeout et de la logique de flush/attente.
systemd TimeoutStopSec=30 gere l'arret force si besoin.
Simplification: cancel() + Close() uniquement.
- Feat(sql): TTL et compression ZSTD sur tables ClickHouse
http_logs_raw: TTL 1 jour, compression ZSTD sur raw_json
http_logs: TTL 7 jours, compression ZSTD sur champs texte volumineux
Parametre ttl_only_drop_parts = 1 pour optimiser les suppressions
* Mon Mar 09 2026 logcorrelator <dev@example.com> - 1.1.21-1
- Update: vues ClickHouse et schema SQL
Ajout de bots.sql pour l'identification des bots (User-Agent parsing)
Ajout de tables.sql pour les tables de reference
Mise a jour de mv1.sql (vue materialisee) avec nouvelle structure de correlation
Documentation views.md enrichie avec exemples de requetes et schema complet
* Mon Mar 09 2026 logcorrelator <dev@example.com> - 1.1.20-1
- Fix(rpm): suppression de systemd-tmpfiles.conf redondant
RuntimeDirectory=logcorrelator dans le service systemd gere deja /run/logcorrelator
automatiquement. La commande systemd-tmpfiles --create causait des erreurs sur
les systemes avec /var/lib/mysql existant (fichier au lieu de repertoire).
Suppression de /usr/lib/tmpfiles.d/logcorrelator.conf et de systemd-tmpfiles --create.
* Mon Mar 09 2026 logcorrelator <dev@example.com> - 1.1.19-1
- Fix(systemd): stop/restart immediat sans attendre vidage queue
L'arret du service ne vide plus les buffers (events en transit perdus).
systemd TimeoutStopSec=30 gere deja l'arret force si besoin.
Simplification de orchestrator.Stop() : cancel() + Close() uniquement.
Suppression de ShutdownTimeout devenu inutile.
* Mon Mar 09 2026 logcorrelator <dev@example.com> - 1.1.18-1
- Fix(outputs): file output enabled: false ne coupait pas l ecriture du fichier
Le champ Enabled manquait dans FileOutputConfig. Le sink fichier etait cree
meme avec enabled: false tant que path etait defini. Desormais, la condition
verifie explicitement enabled && path != "" dans main.go et Validate().
Test: TestValidate_FileOutputDisabled et TestLoadConfig_FileOutputDisabled ajoutes.
* Fri Mar 06 2026 logcorrelator <dev@example.com> - 1.1.17-1
- Fix(correlation): champ keepalives non peuple dans ClickHouse
Le champ KeepAliveSeq de NormalizedEvent n'etait pas transfere dans les Fields
de CorrelatedLog. La vue materialisee ClickHouse extrayait keepalives du JSON
mais trouvait toujours 0. Desormais, NewCorrelatedLog et NewCorrelatedLogFromEvent
ajoutent explicitement keepalives = KeepAliveSeq dans les Fields.
* Fri Mar 06 2026 logcorrelator <dev@example.com> - 1.1.16-1
- Feat(correlation): emettre les evenements A filtrés par include_dest_ports vers ClickHouse
Quand un evenement A (HTTP) etait exclu par le filtre include_dest_ports, il etait
silencieusement ignore. Desormais, si ApacheAlwaysEmit=true, l evenement est emis comme
non-correle (orphan_side=A) afin d apparaitre dans ClickHouse. Les evenements B restent
ignores. Test: TestCorrelationService_IncludeDestPorts_FilteredPort mis a jour +
TestCorrelationService_IncludeDestPorts_FilteredPort_NoAlwaysEmit ajoute.
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.15-1
- Fix(correlation/bug3): perte de donnees quand B expire avec des orphelins en attente
cleanNetworkBufferByTTL supprimait les pendingOrphans sans les emettre (perte silencieuse).
Desormais, les orphelins A sont retournes immediatement a l'appelant quand B expire,
et cleanExpired/ProcessEvent propagent ces resultats vers le sink.
Test: TestBTTLExpiry_PurgesPendingOrphans etendu pour verifier l'emission effective.
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.14-1
- Fix(correlation/bug1): Keep-Alive sessions au-dela de TimeWindow ne correlent plus en orphelins
Le matcher dans processSourceA utilisait eventsMatch (comparaison de timestamps) en mode
one_to_many. Apres ~10s, B.Timestamp_original depasse la TimeWindow et toutes les requetes
suivantes devenaient orphelines. Nouveau matcher bEventHasValidTTL : un B event est valide
tant que son TTL n'a pas expire (le TTL est reset a chaque correlation Keep-Alive).
- Fix(correlation/bug4): checkPendingOrphansForCorrelation utilisait eventsMatch (meme bug)
En mode one_to_many, un B arrivant avec un vieux timestamp ne matchait plus les pending orphans
pour la meme cle. Remplace par une verification de cle uniquement (meme cle = meme connexion).
- Fix(correlation/bug3): pendingOrphans non purges quand le B expire (cleanNetworkBufferByTTL)
Quand un B event expirait (TTL), les pending orphan A associes etaient bloques indefiniment.
Ils sont desormais emis immediatement lors de l'expiration du B correspondant.
- Fix(correlation/bug2): orphans emis uniquement sur reception d'evenement (pas de timer dedie)
EmitPendingOrphans() est maintenant une methode publique thread-safe. L'Orchestrateur
demarre un goroutine ticker (250ms) qui appelle EmitPendingOrphans() independamment du flux,
garantissant l'emission meme en l'absence de nouveaux evenements.
- Feat(ports): ajout de EmitPendingOrphans() dans l'interface CorrelationProcessor
- Test: 4 nouveaux tests de non-regression (Bug #1, #2, #3, #4)
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.13-1
- Fix: Unix sockets ne passent plus en root:root lors des restarts du service
- Fix: Ajout de RuntimeDirectory=logcorrelator dans le service systemd (systemd gère /run/logcorrelator avec le bon propriétaire à chaque démarrage/restart)
- Fix: Ajout de /usr/lib/tmpfiles.d/logcorrelator.conf pour recréer /run/logcorrelator au boot
- Chore: Retrait de /var/run/logcorrelator du RPM %files (géré par tmpfiles.d)
- Fix(correlation): emitPendingOrphans - corruption de slice lors de l expiration simultanée de plusieurs orphelins pour la même clé (slice aliasing bug, émissions en double)
- Fix(correlation): rotateOldestA - l événement rotaté était perdu silencieusement même avec ApacheAlwaysEmit=true (retourne désormais le CorrelatedLog)
- Fix(correlation): Keep-Alive cassé dans le chemin pending-orphan-then-B - le B event n était pas bufferisé en mode one_to_many, bloquant la corrélation des requêtes A2+ du même Keep-Alive
- Chore(correlation): suppression du champ mort timer *time.Timer dans pendingOrphan
- Feat(correlation): ajout de keepalive_seq dans les logs orphelins pour faciliter le debug (numéro de requête dans la connexion Keep-Alive, 1-based)
- Test: 4 nouveaux tests de non-régression pour les bugs de corrélation
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.12-1
- Feat: New config directive include_dest_ports - restrict correlation to specific destination ports
- Feat: If include_dest_ports is non-empty, events on unlisted ports are silently ignored (not correlated, not emitted as orphan)
- Feat: New metric failed_dest_port_filtered for monitoring filtered traffic
- Feat: Debug log for filtered events: "event excluded by dest port filter: source=A dst_port=22"
- Test: New unit tests for include_dest_ports (allowed port, filtered port, empty=all)
- Docs: README.md updated with include_dest_ports section and current version references
- Docs: architecture.yml updated with include_dest_ports
- Fix: config.example.yml - removed obsolete stdout.level field
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.11-1
- Fix: StdoutSink no longer writes correlated/orphan JSON to stdout
- Fix: stdout sink is now a no-op for data; operational logs go to stderr via logger
- Fix: ClickHouse sink had no logger - all flush errors were silently discarded
- Fix: Periodic, batch and final-close flush errors are now logged at ERROR level
- Fix: Buffer overflow with DropOnOverflow=true is now logged at WARN level
- Fix: Retry attempts are now logged at WARN level with attempt number, delay and error
- Feat: ClickHouse connection success logged at INFO (table, batch_size, flush_interval_ms)
- Feat: Successful batch sends logged at DEBUG (rows count, table)
- Feat: SetLogger() method added to ClickHouseSink for external logger injection
- Test: New unit tests for StdoutSink asserting stdout remains empty for all log types
* Wed Mar 04 2026 logcorrelator <dev@example.com> - 1.1.10-1
- Feat: IP exclusion filter - exclude specific source IPs or CIDR ranges
- Feat: Configuration exclude_source_ips supports single IPs and CIDR notation
- Feat: Debug logging for excluded IPs
- Feat: New metric failed_ip_excluded for monitoring filtered traffic
- Feat: Architecture documentation updated with observability section
- Use cases: exclude health checks, internal traffic, known bad actors
- Docs: README.md updated with IP exclusion documentation
- Docs: architecture.yml updated with metrics and troubleshooting guide
* Wed Mar 04 2026 logcorrelator <dev@example.com> - 1.1.9-1
- Feat: Debug logging - detailed DEBUG logs for correlation troubleshooting
- Feat: Correlation metrics server (HTTP endpoint /metrics and /health)
- Feat: New metrics: events_received, correlations_success/failed, failure reasons
- Feat: Failure reason tracking: no_match_key, time_window, buffer_eviction, ttl_expired
- Feat: Buffer size monitoring (buffer_a_size, buffer_b_size)
- Feat: Orphan tracking (orphans_emitted, orphans_pending, pending_orphan_match)
- Feat: Keep-Alive reset counter for connection tracking
- Feat: Test scripts added (test-correlation.sh, test-correlation-advanced.py)
- Change: Config example updated with metrics section
- Docs: README.md updated with debugging guide and troubleshooting table
* Tue Mar 03 2026 logcorrelator <dev@example.com> - 1.1.8-1
- Migrated from FPM to rpmbuild (native RPM build)
- Reduced build image size by 200MB (-40%)
- Removed FPM gem dependency (use rpmbuild directly)
- Scripts post/preun/postun now inline in spec file
- Build image: rockylinux:8 instead of ruby:3.2-bookworm
* Tue Mar 03 2026 logcorrelator <dev@example.com> - 1.1.7-1
- Fix: Critical Keep-Alive bug - network events evicted based on original timestamp instead of reset TTL
- Fix: Correlation time window increased from 1s to 10s for HTTP Keep-Alive support
- Fix: Network source now uses payload timestamp if available (fallback to reception time)
- Change: Default network TTL increased from 30s to 120s for long Keep-Alive sessions
- Test: Added comprehensive Keep-Alive tests (TTL reset, long session scenarios)
* Tue Mar 03 2026 logcorrelator <dev@example.com> - 1.1.6-1
- Docs: Update ClickHouse schema documentation (http_logs_raw + http_logs tables)
- Fix: ClickHouse insertion uses single raw_json column (FORMAT JSONEachRow)
- Fix: ClickHouse native API (clickhouse-go/v2 PrepareBatch + Append + Send)
* Tue Mar 03 2026 logcorrelator <dev@example.com> - 1.1.5-1
- Fix: ClickHouse insertion using native clickhouse-go/v2 API (PrepareBatch + Append + Send)
- Fix: Replaced database/sql wrapper with clickhouse.Open() and clickhouse.Conn
- Fix: Proper batch sending to avoid ATTEMPT_TO_READ_AFTER_EOF errors
- Fix: Set correct permissions (755) on /var/run/logcorrelator in RPM post-install
* Mon Mar 02 2026 logcorrelator <dev@example.com> - 1.1.4-1
- Fix: Log raw JSON data on parse errors for debugging
* Mon Mar 02 2026 logcorrelator <dev@example.com> - 1.1.3-1
- Refactor: Switch Unix sockets from STREAM to DGRAM mode (SOCK_DGRAM)
- Test: Comprehensive tests added - coverage improved to 74.4%
- Fix: Example config file installed to /etc/logcorrelator/logcorrelator.yml.example
- Change: Default socket permissions from 0660 to 0666 (world read/write)
* Mon Mar 02 2026 logcorrelator <dev@example.com> - 1.1.2-1
- Fix: Example config file installed to /etc/logcorrelator/logcorrelator.yml.example
- Change: Default socket permissions from 0660 to 0666 (world read/write)
* Mon Mar 02 2026 logcorrelator <dev@example.com> - 1.1.1-1
- Fix: Move logcorrelator.yml.example from /usr/share/logcorrelator/ to /etc/logcorrelator/
* Mon Mar 02 2026 logcorrelator <dev@example.com> - 1.1.0-1
- Feat: Keep-Alive support (one-to-many correlation mode)
- Feat: Dynamic TTL for network events (source B)
- Feat: Separate buffer sizes for HTTP and network events
- Feat: SIGHUP signal handling for log rotation
- Feat: File sink Reopen() method for log rotation
- Feat: logrotate configuration included
- Feat: ExecReload added to systemd service
- Feat: New YAML config structure (time_window, orphan_policy, matching, buffers, ttl)
- Docs: Updated architecture.yml and config.example.yml
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.7-1
- Added: Log levels DEBUG, INFO, WARN, ERROR configurable via log.level
- Added: Warn and Warnf methods for warning messages
- Added: Debug logs for events received from sockets and correlations
- Added: Warning logs for orphan events and buffer overflow
- Changed: Configuration log.enabled replaced by log.level
- Changed: Orphan events and buffer overflow now logged as WARN instead of DEBUG
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.6-1
- Changed: Configuration YAML simplified, removed service.name, service.language
- Changed: Correlation config simplified, time_window_s instead of nested object
- Changed: Orphan policy simplified to emit_orphans boolean
- Changed: Apache socket renamed to http.socket
- Added: socket_permissions option on unix sockets
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.5-1
- Added: Systemd service auto-start after RPM installation
- Added: Systemd service hardening (TimeoutStartSec, TimeoutStopSec, ReadWritePaths)
- Fixed: Systemd service unit correct config path (.yml instead of .conf)
- Fixed: CI workflow branch name main to master
- Changed: RPM packaging generic el8/el9/el10 directory naming
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.4-1
- Breaking: Flattened JSON output structure - removed apache and network subdivisions
- All log fields now merged into single-level JSON structure
- ClickHouse schema: replaced apache JSON and network JSON columns with fields JSON column
- Custom MarshalJSON() implementation for flat output
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.3-1
- Fix: Added missing ClickHouse driver dependency
- Fix: Fixed race condition in orchestrator
- Security: Added explicit source_type configuration for Unix socket sources
- Added: Comprehensive test suite improvements
- Added: Test coverage improved from 50.6% to 62.0%
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.2-1
- Added: Initial RPM packaging support for Rocky Linux 8/9 and AlmaLinux 10
- Added: Docker multi-stage build pipeline
- Added: Hexagonal architecture implementation
- Added: Unix socket input sources (JSON line protocol)
- Added: File output sink (JSON lines)
- Added: ClickHouse output sink with batching and retry logic
- Added: Time-window based correlation on src_ip + src_port
- Added: Graceful shutdown with signal handling (SIGINT, SIGTERM)
* Sat Feb 28 2026 logcorrelator <dev@example.com> - 1.0.1-1
- Initial package for CentOS 7, Rocky Linux 8, 9, 10

View File

@ -0,0 +1,13 @@
/var/log/logcorrelator/correlated.log {
daily
rotate 7
compress
delaycompress
missingok
notifempty
create 0640 logcorrelator logcorrelator
sharedscripts
postrotate
/bin/systemctl reload logcorrelator > /dev/null 2>&1 || true
endscript
}

View File

@ -0,0 +1,258 @@
#!/bin/bash
# Test script for logcorrelator RPM package
# Verifies installation, permissions, and service status
#
# Usage: ./packaging/test/test-rpm.sh [el8|el9|el10]
#
# This script tests the RPM package in a Docker container to ensure:
# - Installation succeeds
# - File permissions are correct
# - Service starts properly
# - Sockets are created with correct ownership
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$(dirname "$SCRIPT_DIR")")"
RPM_DIR="${PROJECT_ROOT}/dist/rpm"
# Default to el8 if no argument provided
DISTRO="${1:-el8}"
echo "========================================="
echo "Testing logcorrelator RPM for ${DISTRO}"
echo "========================================="
# Find the RPM file
case "${DISTRO}" in
el8|rocky8)
RPM_PATH="${RPM_DIR}/el8"
BASE_IMAGE="rockylinux:8"
;;
el9|rocky9)
RPM_PATH="${RPM_DIR}/el9"
BASE_IMAGE="rockylinux:9"
;;
el10|alma10)
RPM_PATH="${RPM_DIR}/el10"
BASE_IMAGE="almalinux:10"
;;
*)
echo "Unknown distribution: ${DISTRO}"
echo "Valid options: el8, el9, el10"
exit 1
;;
esac
# Find the latest RPM file
RPM_FILE=$(ls -t "${RPM_PATH}"/logcorrelator-*.rpm 2>/dev/null | head -n 1)
if [ -z "${RPM_FILE}" ]; then
echo "ERROR: No RPM file found in ${RPM_PATH}"
echo "Please run 'make package-rpm' first"
exit 1
fi
echo "Testing RPM: ${RPM_FILE}"
echo "Base image: ${BASE_IMAGE}"
echo ""
# Create test script
TEST_SCRIPT=$(cat <<'EOF'
#!/bin/bash
set -e
echo "=== Installing logcorrelator RPM ==="
rpm -ivh /tmp/logcorrelator.rpm
echo ""
echo "=== Checking user and group ==="
if ! getent group logcorrelator >/dev/null; then
echo "FAIL: logcorrelator group not created"
exit 1
fi
echo "OK: logcorrelator group exists"
if ! getent passwd logcorrelator >/dev/null; then
echo "FAIL: logcorrelator user not created"
exit 1
fi
echo "OK: logcorrelator user exists"
echo ""
echo "=== Checking directory permissions ==="
# Check /var/run/logcorrelator
DIR="/var/run/logcorrelator"
if [ ! -d "$DIR" ]; then
echo "FAIL: $DIR does not exist"
exit 1
fi
OWNER=$(stat -c '%U:%G' "$DIR")
PERMS=$(stat -c '%a' "$DIR")
if [ "$OWNER" != "logcorrelator:logcorrelator" ]; then
echo "FAIL: $DIR owner is $OWNER (expected logcorrelator:logcorrelator)"
exit 1
fi
if [ "$PERMS" != "755" ]; then
echo "FAIL: $DIR permissions are $PERMS (expected 755)"
exit 1
fi
echo "OK: $DIR - owner=$OWNER, permissions=$PERMS"
# Check /var/log/logcorrelator
DIR="/var/log/logcorrelator"
if [ ! -d "$DIR" ]; then
echo "FAIL: $DIR does not exist"
exit 1
fi
OWNER=$(stat -c '%U:%G' "$DIR")
PERMS=$(stat -c '%a' "$DIR")
if [ "$OWNER" != "logcorrelator:logcorrelator" ]; then
echo "FAIL: $DIR owner is $OWNER (expected logcorrelator:logcorrelator)"
exit 1
fi
if [ "$PERMS" != "750" ]; then
echo "FAIL: $DIR permissions are $PERMS (expected 750)"
exit 1
fi
echo "OK: $DIR - owner=$OWNER, permissions=$PERMS"
# Check /var/lib/logcorrelator
DIR="/var/lib/logcorrelator"
if [ ! -d "$DIR" ]; then
echo "FAIL: $DIR does not exist"
exit 1
fi
OWNER=$(stat -c '%U:%G' "$DIR")
PERMS=$(stat -c '%a' "$DIR")
if [ "$OWNER" != "logcorrelator:logcorrelator" ]; then
echo "FAIL: $DIR owner is $OWNER (expected logcorrelator:logcorrelator)"
exit 1
fi
if [ "$PERMS" != "750" ]; then
echo "FAIL: $DIR permissions are $PERMS (expected 750)"
exit 1
fi
echo "OK: $DIR - owner=$OWNER, permissions=$PERMS"
echo ""
echo "=== Checking config files ==="
# Check config file exists and has correct permissions
CONFIG="/etc/logcorrelator/logcorrelator.yml"
if [ ! -f "$CONFIG" ]; then
echo "FAIL: $CONFIG does not exist"
exit 1
fi
OWNER=$(stat -c '%U:%G' "$CONFIG")
PERMS=$(stat -c '%a' "$CONFIG")
if [ "$OWNER" != "logcorrelator:logcorrelator" ]; then
echo "FAIL: $CONFIG owner is $OWNER (expected logcorrelator:logcorrelator)"
exit 1
fi
if [ "$PERMS" != "640" ]; then
echo "FAIL: $CONFIG permissions are $PERMS (expected 640)"
exit 1
fi
echo "OK: $CONFIG - owner=$OWNER, permissions=$PERMS"
# Check example config file
EXAMPLE_CONFIG="/etc/logcorrelator/logcorrelator.yml.example"
if [ ! -f "$EXAMPLE_CONFIG" ]; then
echo "FAIL: $EXAMPLE_CONFIG does not exist"
exit 1
fi
OWNER=$(stat -c '%U:%G' "$EXAMPLE_CONFIG")
PERMS=$(stat -c '%a' "$EXAMPLE_CONFIG")
if [ "$OWNER" != "logcorrelator:logcorrelator" ]; then
echo "FAIL: $EXAMPLE_CONFIG owner is $OWNER (expected logcorrelator:logcorrelator)"
exit 1
fi
if [ "$PERMS" != "640" ]; then
echo "FAIL: $EXAMPLE_CONFIG permissions are $PERMS (expected 640)"
exit 1
fi
echo "OK: $EXAMPLE_CONFIG - owner=$OWNER, permissions=$PERMS"
echo ""
echo "=== Checking systemd service ==="
if [ ! -f /etc/systemd/system/logcorrelator.service ]; then
echo "FAIL: systemd service file not found"
exit 1
fi
echo "OK: systemd service file exists"
echo ""
echo "=== Checking logrotate config ==="
if [ ! -f /etc/logrotate.d/logcorrelator ]; then
echo "FAIL: logrotate config not found"
exit 1
fi
echo "OK: logrotate config exists"
echo ""
echo "=== Testing service start ==="
# Try to start the service (may fail in container without full systemd)
if command -v systemctl >/dev/null 2>&1; then
systemctl daemon-reload || true
if systemctl start logcorrelator.service 2>/dev/null; then
echo "OK: service started successfully"
# Wait for sockets to be created
sleep 2
echo ""
echo "=== Checking sockets ==="
HTTP_SOCKET="/var/run/logcorrelator/http.socket"
NETWORK_SOCKET="/var/run/logcorrelator/network.socket"
if [ -S "$HTTP_SOCKET" ]; then
OWNER=$(stat -c '%U:%G' "$HTTP_SOCKET")
PERMS=$(stat -c '%a' "$HTTP_SOCKET")
echo "OK: $HTTP_SOCKET exists - owner=$OWNER, permissions=$PERMS"
if [ "$PERMS" != "666" ]; then
echo "WARN: socket permissions are $PERMS (expected 666)"
fi
else
echo "WARN: $HTTP_SOCKET not found (service may not have started)"
fi
if [ -S "$NETWORK_SOCKET" ]; then
OWNER=$(stat -c '%U:%G' "$NETWORK_SOCKET")
PERMS=$(stat -c '%a' "$NETWORK_SOCKET")
echo "OK: $NETWORK_SOCKET exists - owner=$OWNER, permissions=$PERMS"
if [ "$PERMS" != "666" ]; then
echo "WARN: socket permissions are $PERMS (expected 666)"
fi
else
echo "WARN: $NETWORK_SOCKET not found (service may not have started)"
fi
systemctl stop logcorrelator.service || true
else
echo "WARN: service failed to start (expected in minimal container)"
fi
else
echo "WARN: systemctl not available (minimal container)"
fi
echo ""
echo "========================================="
echo "All tests passed!"
echo "========================================="
EOF
)
# Run test in Docker container
echo "Running tests in Docker container..."
echo ""
docker run --rm \
-v "${RPM_FILE}:/tmp/logcorrelator.rpm:ro" \
-v "${TEST_SCRIPT}:/test.sh:ro" \
"${BASE_IMAGE}" \
bash /test.sh
echo ""
echo "Test completed successfully for ${DISTRO}"

View File

@ -0,0 +1,101 @@
#!/bin/bash
set -e
echo "=== AUDIT ARCHITECTURE COMPLIANCE ==="
echo ""
# 1. Runtime - systemd service
echo "1. RUNTIME - SYSTEMD SERVICE"
if [ -f /src/logcorrelator.service ]; then
echo "✅ logcorrelator.service exists"
grep -q "ExecStart=/usr/bin/logcorrelator" /src/logcorrelator.service && echo " ✅ ExecStart correct" || echo " ❌ ExecStart incorrect"
grep -q "ExecReload=" /src/logcorrelator.service && echo " ✅ ExecReload present" || echo " ❌ ExecReload missing"
grep -q "Restart=on-failure" /src/logcorrelator.service && echo " ✅ Restart policy correct" || echo " ❌ Restart policy incorrect"
else
echo "❌ logcorrelator.service missing"
fi
# Check signal handling in code
echo ""
grep -r "SIGINT\|SIGTERM\|SIGHUP" /src/cmd/logcorrelator/main.go > /dev/null && echo "✅ Signal handling (SIGINT/SIGTERM/SIGHUP) implemented" || echo "❌ Signal handling missing"
# 2. Packaging - RPM
echo ""
echo "2. PACKAGING - RPM"
[ -f /src/packaging/rpm/logcorrelator.spec ] && echo "✅ RPM spec file exists" || echo "❌ RPM spec missing"
grep -q "fpm" /src/Dockerfile.package && echo "✅ fpm tool used for packaging" || echo "❌ fpm not found"
# 3. Config - YAML
echo ""
echo "3. CONFIG - YAML"
[ -f /src/config.example.yml ] && echo "✅ config.example.yml exists" || echo "❌ config.example.yml missing"
grep -q "log:" /src/config.example.yml && echo " ✅ log section present" || echo " ❌ log section missing"
grep -q "inputs:" /src/config.example.yml && echo " ✅ inputs section present" || echo " ❌ inputs section missing"
grep -q "outputs:" /src/config.example.yml && echo " ✅ outputs section present" || echo " ❌ outputs section missing"
grep -q "correlation:" /src/config.example.yml && echo " ✅ correlation section present" || echo " ❌ correlation section missing"
# 4. Inputs - Unix datagram sockets
echo ""
echo "4. INPUTS - UNIX DATAGRAM SOCKETS"
grep -q "ListenUnixgram" /src/internal/adapters/inbound/unixsocket/source.go && echo "✅ Using ListenUnixgram (SOCK_DGRAM)" || echo "❌ Not using SOCK_DGRAM"
grep -q "ReadFromUnix" /src/internal/adapters/inbound/unixsocket/source.go && echo "✅ Using ReadFromUnix for datagrams" || echo "❌ Not using ReadFromUnix"
grep -q "MaxDatagramSize = 65535" /src/internal/adapters/inbound/unixsocket/source.go && echo "✅ max_datagram_bytes = 65535" || echo "❌ max_datagram_bytes incorrect"
grep -q "0666" /src/internal/adapters/inbound/unixsocket/source.go && echo "✅ Default socket permissions 0666" || echo "❌ Socket permissions not 0666"
# Check socket paths in config
grep -q "http.socket" /src/config.example.yml && echo " ✅ http.socket path configured" || echo " ❌ http.socket path missing"
grep -q "network.socket" /src/config.example.yml && echo " ✅ network.socket path configured" || echo " ❌ network.socket path missing"
# 5. Outputs - Sinks
echo ""
echo "5. OUTPUTS - SINKS"
[ -f /src/internal/adapters/outbound/file/sink.go ] && echo "✅ File sink exists" || echo "❌ File sink missing"
[ -f /src/internal/adapters/outbound/clickhouse/sink.go ] && echo "✅ ClickHouse sink exists" || echo "❌ ClickHouse sink missing"
[ -f /src/internal/adapters/outbound/multi/sink.go ] && echo "✅ MultiSink exists" || echo "❌ MultiSink missing"
# Check SIGHUP reopen in file sink
grep -q "Reopen" /src/internal/adapters/outbound/file/sink.go && echo " ✅ FileSink.Reopen() for SIGHUP" || echo " ❌ FileSink.Reopen() missing"
# Check ClickHouse batching
grep -q "batch" /src/internal/adapters/outbound/clickhouse/sink.go && echo " ✅ ClickHouse batching implemented" || echo " ❌ ClickHouse batching missing"
grep -q "drop_on_overflow\|DropOnOverflow" /src/internal/adapters/outbound/clickhouse/sink.go && echo " ✅ drop_on_overflow implemented" || echo " ❌ drop_on_overflow missing"
# 6. Correlation
echo ""
echo "6. CORRELATION"
grep -q "src_ip" /src/internal/domain/correlation_service.go && echo "✅ src_ip in correlation key" || echo "❌ src_ip missing"
grep -q "src_port" /src/internal/domain/correlation_service.go && echo "✅ src_port in correlation key" || echo "❌ src_port missing"
grep -q "MatchingMode" /src/internal/domain/correlation_service.go && echo "✅ MatchingMode (one_to_one/one_to_many) implemented" || echo "❌ MatchingMode missing"
grep -q "ApacheAlwaysEmit" /src/internal/domain/correlation_service.go && echo "✅ apache_always_emit orphan policy" || echo "❌ apache_always_emit missing"
grep -q "network_ttl\|NetworkTTLS" /src/internal/domain/correlation_service.go && echo "✅ TTL management for network events" || echo "❌ TTL management missing"
grep -q "max_http_items\|maxHttpItems\|MaxHTTPItems" /src/internal/domain/correlation_service.go && echo "✅ Buffer limit max_http_items" || echo " ⚠️ Buffer limit naming may differ"
grep -q "max_network_items\|maxNetworkItems\|MaxNetworkItems" /src/internal/domain/correlation_service.go && echo "✅ Buffer limit max_network_items" || echo " ⚠️ Buffer limit naming may differ"
# 7. Schema - Source A and B
echo ""
echo "7. SCHEMA - SOURCE A AND B"
grep -q "timestamp" /src/internal/adapters/inbound/unixsocket/source.go && echo "✅ timestamp field for Source A" || echo "❌ timestamp missing for Source A"
grep -q "SourceA\|SourceB" /src/internal/domain/event.go && echo "✅ EventSource enum (A/B)" || echo "❌ EventSource enum missing"
grep -q "header_" /src/internal/adapters/inbound/unixsocket/source.go && echo "✅ header_* dynamic fields" || echo "❌ header_* fields missing"
grep -q "Extra" /src/internal/domain/event.go && echo "✅ Extra fields map" || echo "❌ Extra fields missing"
# 8. Architecture modules
echo ""
echo "8. ARCHITECTURE MODULES"
[ -d /src/internal/domain ] && echo "✅ internal/domain" || echo "❌ internal/domain missing"
[ -d /src/internal/ports ] && echo "✅ internal/ports" || echo "❌ internal/ports missing"
[ -d /src/internal/app ] && echo "✅ internal/app" || echo "❌ internal/app missing"
[ -d /src/internal/adapters/inbound ] && echo "✅ internal/adapters/inbound" || echo "❌ internal/adapters/inbound missing"
[ -d /src/internal/adapters/outbound ] && echo "✅ internal/adapters/outbound" || echo "❌ internal/adapters/outbound missing"
[ -d /src/internal/config ] && echo "✅ internal/config" || echo "❌ internal/config missing"
[ -d /src/internal/observability ] && echo "✅ internal/observability" || echo "❌ internal/observability missing"
[ -d /src/cmd/logcorrelator ] && echo "✅ cmd/logcorrelator" || echo "❌ cmd/logcorrelator missing"
# 9. Testing
echo ""
echo "9. TESTING"
echo "Running tests with coverage..."
cd /src && go test ./... -cover 2>&1 | grep -E "^(ok|FAIL|\?)" || true
echo ""
echo "=== AUDIT COMPLETE ==="

View File

@ -0,0 +1,582 @@
#!/usr/bin/env python3
"""
test-correlation-advanced.py - Advanced correlation testing tool
This script provides comprehensive testing for the logcorrelator service,
including various scenarios to debug correlation issues.
Usage:
python3 test-correlation-advanced.py [options]
Requirements:
- Python 3.6+
- requests library (for metrics): pip install requests
"""
import argparse
import json
import socket
import sys
import time
from datetime import datetime
from typing import Dict, Any, Optional, Tuple
try:
import requests
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
class Colors:
"""ANSI color codes for terminal output."""
BLUE = '\033[0;34m'
GREEN = '\033[0;32m'
YELLOW = '\033[1;33m'
RED = '\033[0;31m'
NC = '\033[0m' # No Color
BOLD = '\033[1m'
def colorize(text: str, color: str) -> str:
"""Wrap text with ANSI color codes."""
return f"{color}{text}{Colors.NC}"
def info(text: str):
print(colorize(f"[INFO] ", Colors.BLUE) + text)
def success(text: str):
print(colorize(f"[OK] ", Colors.GREEN) + text)
def warn(text: str):
print(colorize(f"[WARN] ", Colors.YELLOW) + text)
def error(text: str):
print(colorize(f"[ERROR] ", Colors.RED) + text)
def debug(text: str, verbose: bool = False):
if verbose:
print(colorize(f"[DEBUG] ", Colors.BLUE) + text)
class CorrelationTester:
"""Main test class for correlation testing."""
def __init__(
self,
http_socket: str = "/var/run/logcorrelator/http.socket",
network_socket: str = "/var/run/logcorrelator/network.socket",
metrics_url: str = "http://localhost:8080/metrics",
verbose: bool = False,
skip_metrics: bool = False
):
self.http_socket = http_socket
self.network_socket = network_socket
self.metrics_url = metrics_url
self.verbose = verbose
self.skip_metrics = skip_metrics
self.http_sock: Optional[socket.socket] = None
self.network_sock: Optional[socket.socket] = None
def connect(self) -> bool:
"""Connect to Unix sockets."""
try:
# HTTP socket
self.http_sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
self.http_sock.connect(self.http_socket)
debug(f"Connected to HTTP socket: {self.http_socket}", self.verbose)
# Network socket
self.network_sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
self.network_sock.connect(self.network_socket)
debug(f"Connected to Network socket: {self.network_socket}", self.verbose)
return True
except FileNotFoundError as e:
error(f"Socket not found: {e}")
return False
except Exception as e:
error(f"Connection error: {e}")
return False
def close(self):
"""Close socket connections."""
if self.http_sock:
self.http_sock.close()
if self.network_sock:
self.network_sock.close()
def send_http_event(
self,
src_ip: str,
src_port: int,
timestamp: int,
method: str = "GET",
path: str = "/test",
host: str = "example.com",
extra_headers: Optional[Dict[str, str]] = None
) -> Dict[str, Any]:
"""Send an HTTP (source A) event."""
event = {
"src_ip": src_ip,
"src_port": src_port,
"dst_ip": "10.0.0.1",
"dst_port": 443,
"timestamp": timestamp,
"method": method,
"path": path,
"host": host,
"http_version": "HTTP/1.1",
"header_user_agent": "TestAgent/1.0",
"header_accept": "*/*"
}
if extra_headers:
for key, value in extra_headers.items():
event[f"header_{key}"] = value
json_data = json.dumps(event)
if self.http_sock:
self.http_sock.sendall(json_data.encode())
debug(f"Sent HTTP event: {src_ip}:{src_port} ts={timestamp}", self.verbose)
return event
def send_network_event(
self,
src_ip: str,
src_port: int,
timestamp: int,
ja3: str = "abc123",
ja4: str = "def456",
tls_version: str = "TLS1.3",
tls_sni: str = "example.com"
) -> Dict[str, Any]:
"""Send a Network (source B) event."""
event = {
"src_ip": src_ip,
"src_port": src_port,
"dst_ip": "10.0.0.1",
"dst_port": 443,
"timestamp": timestamp,
"ja3": ja3,
"ja4": ja4,
"tls_version": tls_version,
"tls_sni": tls_sni
}
json_data = json.dumps(event)
if self.network_sock:
self.network_sock.sendall(json_data.encode())
debug(f"Sent Network event: {src_ip}:{src_port} ts={timestamp}", self.verbose)
return event
def get_metrics(self) -> Dict[str, Any]:
"""Fetch metrics from the metrics server."""
if self.skip_metrics:
return {}
if not HAS_REQUESTS:
warn("requests library not installed, skipping metrics")
return {}
try:
response = requests.get(self.metrics_url, timeout=5)
response.raise_for_status()
return response.json()
except Exception as e:
warn(f"Failed to fetch metrics: {e}")
return {}
def print_metrics(self, metrics: Dict[str, Any], title: str = "Metrics"):
"""Print metrics in a formatted way."""
if not metrics:
return
print(f"\n{colorize(f'=== {title} ===', Colors.BOLD)}")
keys_to_show = [
("events_received_a", "Events A"),
("events_received_b", "Events B"),
("correlations_success", "Correlations"),
("correlations_failed", "Failures"),
("failed_no_match_key", " - No match key"),
("failed_time_window", " - Time window"),
("failed_buffer_eviction", " - Buffer eviction"),
("failed_ttl_expired", " - TTL expired"),
("buffer_a_size", "Buffer A size"),
("buffer_b_size", "Buffer B size"),
("orphans_emitted_a", "Orphans A"),
("orphans_emitted_b", "Orphans B"),
("pending_orphan_match", "Pending orphan matches"),
("keepalive_resets", "Keep-Alive resets"),
]
for key, label in keys_to_show:
if key in metrics:
print(f" {label}: {metrics[key]}")
def check_sockets(self) -> bool:
"""Check if sockets exist."""
import os
errors = 0
for name, path in [("HTTP", self.http_socket), ("Network", self.network_socket)]:
if not os.path.exists(path):
error(f"{name} socket not found: {path}")
errors += 1
elif not os.path.exists(path) or not os.path.stat(path).st_mode & 0o170000 == 0o140000:
# Check if it's a socket
try:
if not socket.getaddrinfo(path, None, socket.AF_UNIX):
error(f"{name} path exists but is not a socket: {path}")
errors += 1
except:
pass
else:
debug(f"{name} socket found: {path}", self.verbose)
return errors == 0
def run_basic_test(self, count: int = 10, delay_ms: int = 100) -> Tuple[bool, Dict[str, int]]:
"""
Run basic correlation test.
Sends N pairs of A+B events with matching src_ip:src_port and timestamps.
All should correlate successfully.
"""
info(f"Running basic correlation test with {count} pairs...")
# Get initial metrics
initial_metrics = self.get_metrics()
self.print_metrics(initial_metrics, "Initial Metrics")
initial_success = initial_metrics.get("correlations_success", 0)
initial_failed = initial_metrics.get("correlations_failed", 0)
initial_a = initial_metrics.get("events_received_a", 0)
initial_b = initial_metrics.get("events_received_b", 0)
# Send test events
print(f"\nSending {count} event pairs...")
base_timestamp = time.time_ns()
sent = 0
for i in range(1, count + 1):
src_ip = f"192.168.1.{(i % 254) + 1}"
src_port = 8000 + i
# Same timestamp for perfect correlation
timestamp = base_timestamp + (i * 1_000_000)
self.send_http_event(src_ip, src_port, timestamp)
self.send_network_event(src_ip, src_port, timestamp)
sent += 1
if delay_ms > 0:
time.sleep(delay_ms / 1000.0)
success(f"Sent {sent} event pairs")
# Wait for processing
info("Waiting for processing (2 seconds)...")
time.sleep(2)
# Get final metrics
final_metrics = self.get_metrics()
self.print_metrics(final_metrics, "Final Metrics")
# Calculate deltas
delta_success = final_metrics.get("correlations_success", 0) - initial_success
delta_failed = final_metrics.get("correlations_failed", 0) - initial_failed
delta_a = final_metrics.get("events_received_a", 0) - initial_a
delta_b = final_metrics.get("events_received_b", 0) - initial_b
results = {
"sent": sent,
"received_a": delta_a,
"received_b": delta_b,
"correlations": delta_success,
"failures": delta_failed
}
# Print results
print(f"\n{colorize('=== Results ===', Colors.BOLD)}")
print(f" Events A sent: {delta_a} (expected: {sent})")
print(f" Events B sent: {delta_b} (expected: {sent})")
print(f" Correlations: {delta_success}")
print(f" Failures: {delta_failed}")
# Validation
test_passed = True
if delta_a != sent:
error(f"Event A count mismatch: got {delta_a}, expected {sent}")
test_passed = False
if delta_b != sent:
error(f"Event B count mismatch: got {delta_b}, expected {sent}")
test_passed = False
if delta_success != sent:
error(f"Correlation count mismatch: got {delta_success}, expected {sent}")
test_passed = False
if delta_failed > 0:
warn(f"Unexpected correlation failures: {delta_failed}")
if test_passed:
success("All tests passed! Correlation is working correctly.")
else:
error("Some tests failed. Check logs for details.")
return test_passed, results
def run_time_window_test(self) -> bool:
"""Test time window expiration."""
info("Running time window test...")
src_ip = "192.168.100.1"
src_port = 9999
# Send A event
ts_a = time.time_ns()
self.send_http_event(src_ip, src_port, ts_a)
info(f"Sent A event at {ts_a}")
# Wait for time window to expire (default 10s)
info("Waiting 11 seconds (time window should expire)...")
time.sleep(11)
# Send B event
ts_b = time.time_ns()
self.send_network_event(src_ip, src_port, ts_b)
info(f"Sent B event at {ts_b}")
time_diff_sec = (ts_b - ts_a) / 1_000_000_000
info(f"Time difference: {time_diff_sec:.1f} seconds")
info("Expected: time_window failure (check metrics)")
return True
def run_different_ip_test(self) -> bool:
"""Test different IP (should not correlate)."""
info("Running different IP test...")
ts = time.time_ns()
# Send A with IP 192.168.200.1
self.send_http_event("192.168.200.1", 7777, ts)
info("Sent A event from 192.168.200.1:7777")
# Send B with different IP
self.send_network_event("192.168.200.2", 7777, ts)
info("Sent B event from 192.168.200.2:7777 (different IP)")
info("Expected: no_match_key failure (different src_ip)")
return True
def run_keepalive_test(self, count: int = 5) -> bool:
"""Test Keep-Alive mode (one B correlates with multiple A)."""
info(f"Running Keep-Alive test with {count} HTTP requests on same connection...")
src_ip = "192.168.50.1"
src_port = 6000
# Send one B event first (network/TCP connection)
ts_b = time.time_ns()
self.send_network_event(src_ip, src_port, ts_b)
info(f"Sent B event (connection): {src_ip}:{src_port}")
# Send multiple A events (HTTP requests) on same connection
for i in range(count):
ts_a = time.time_ns() + (i * 100_000_000) # 100ms apart
self.send_http_event(src_ip, src_port, ts_a, path=f"/request{i}")
info(f"Sent A event (request {i}): {src_ip}:{src_port}")
time.sleep(0.05) # 50ms delay
time.sleep(2) # Wait for processing
# Check metrics
metrics = self.get_metrics()
keepalive_resets = metrics.get("keepalive_resets", 0)
info(f"Keep-Alive resets: {keepalive_resets} (expected: {count - 1})")
if keepalive_resets >= count - 1:
success("Keep-Alive test passed!")
return True
else:
warn(f"Keep-Alive resets lower than expected. This may be normal depending on timing.")
return True
def run_all_tests(self) -> bool:
"""Run all test scenarios."""
results = []
# Basic test
passed, _ = self.run_basic_test(count=10)
results.append(("Basic correlation", passed))
print("\n" + "=" * 50 + "\n")
# Time window test
self.run_time_window_test()
results.append(("Time window", True)) # Informational
print("\n" + "=" * 50 + "\n")
# Different IP test
self.run_different_ip_test()
results.append(("Different IP", True)) # Informational
print("\n" + "=" * 50 + "\n")
# Keep-Alive test
self.run_keepalive_test()
results.append(("Keep-Alive", True))
# Summary
print(f"\n{colorize('=== Test Summary ===', Colors.BOLD)}")
for name, passed in results:
status = colorize("PASS", Colors.GREEN) if passed else colorize("FAIL", Colors.RED)
print(f" {name}: {status}")
return all(r[1] for r in results)
def main():
parser = argparse.ArgumentParser(
description="Advanced correlation testing tool for logcorrelator",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Run basic test with 20 pairs
python3 test-correlation-advanced.py -c 20
# Run all tests with verbose output
python3 test-correlation-advanced.py --all -v
# Test with custom socket paths
python3 test-correlation-advanced.py -H /tmp/http.sock -N /tmp/network.sock
# Skip metrics check
python3 test-correlation-advanced.py --skip-metrics
"""
)
parser.add_argument(
"-H", "--http-socket",
default="/var/run/logcorrelator/http.socket",
help="Path to HTTP Unix socket (default: /var/run/logcorrelator/http.socket)"
)
parser.add_argument(
"-N", "--network-socket",
default="/var/run/logcorrelator/network.socket",
help="Path to Network Unix socket (default: /var/run/logcorrelator/network.socket)"
)
parser.add_argument(
"-m", "--metrics-url",
default="http://localhost:8080/metrics",
help="Metrics server URL (default: http://localhost:8080/metrics)"
)
parser.add_argument(
"-c", "--count",
type=int,
default=10,
help="Number of test pairs to send (default: 10)"
)
parser.add_argument(
"-d", "--delay",
type=int,
default=100,
help="Delay between pairs in milliseconds (default: 100)"
)
parser.add_argument(
"-v", "--verbose",
action="store_true",
help="Enable verbose output"
)
parser.add_argument(
"--skip-metrics",
action="store_true",
help="Skip metrics check"
)
parser.add_argument(
"--all",
action="store_true",
help="Run all test scenarios"
)
parser.add_argument(
"--time-window",
action="store_true",
help="Run time window test only"
)
parser.add_argument(
"--different-ip",
action="store_true",
help="Run different IP test only"
)
parser.add_argument(
"--keepalive",
action="store_true",
help="Run Keep-Alive test only"
)
args = parser.parse_args()
# Create tester
tester = CorrelationTester(
http_socket=args.http_socket,
network_socket=args.network_socket,
metrics_url=args.metrics_url,
verbose=args.verbose,
skip_metrics=args.skip_metrics
)
# Check sockets
if not tester.check_sockets():
error("Socket check failed. Is logcorrelator running?")
sys.exit(1)
success("Socket check passed")
# Connect
if not tester.connect():
error("Failed to connect to sockets")
sys.exit(1)
try:
if args.all:
success = tester.run_all_tests()
elif args.time_window:
tester.run_time_window_test()
success = True
elif args.different_ip:
tester.run_different_ip_test()
success = True
elif args.keepalive:
tester.run_keepalive_test()
success = True
else:
_, _ = tester.run_basic_test(count=args.count, delay_ms=args.delay)
success = True
sys.exit(0 if success else 1)
finally:
tester.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,404 @@
#!/bin/bash
#
# test-correlation.sh - Test script for log correlation debugging
#
# This script sends test HTTP (A) and Network (B) events to the logcorrelator
# Unix sockets and verifies that correlation is working correctly.
#
# Usage:
# ./test-correlation.sh [options]
#
# Options:
# -h, --http-socket PATH Path to HTTP socket (default: /var/run/logcorrelator/http.socket)
# -n, --network-socket PATH Path to Network socket (default: /var/run/logcorrelator/network.socket)
# -c, --count NUM Number of test pairs to send (default: 10)
# -d, --delay MS Delay between pairs in milliseconds (default: 100)
# -v, --verbose Enable verbose output
# -m, --metrics-url URL Metrics server URL (default: http://localhost:8080/metrics)
# --skip-metrics Skip metrics check
# --help Show this help message
#
set -e
# Default values
HTTP_SOCKET="/var/run/logcorrelator/http.socket"
NETWORK_SOCKET="/var/run/logcorrelator/network.socket"
COUNT=10
DELAY_MS=100
VERBOSE=false
METRICS_URL="http://localhost:8080/metrics"
SKIP_METRICS=false
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Print functions
info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
success() {
echo -e "${GREEN}[OK]${NC} $1"
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1"
}
verbose() {
if [ "$VERBOSE" = true ]; then
echo -e "${BLUE}[DEBUG]${NC} $1"
fi
}
# Show help
show_help() {
head -20 "$0" | tail -17 | sed 's/^#//' | sed 's/^ //'
exit 0
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-h|--http-socket)
HTTP_SOCKET="$2"
shift 2
;;
-n|--network-socket)
NETWORK_SOCKET="$2"
shift 2
;;
-c|--count)
COUNT="$2"
shift 2
;;
-d|--delay)
DELAY_MS="$2"
shift 2
;;
-v|--verbose)
VERBOSE=true
shift
;;
-m|--metrics-url)
METRICS_URL="$2"
shift 2
;;
--skip-metrics)
SKIP_METRICS=true
shift
;;
--help)
show_help
;;
*)
error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Check if socat or netcat is available
if command -v socat &> /dev/null; then
SEND_CMD="socat"
elif command -v nc &> /dev/null; then
SEND_CMD="nc"
else
error "Neither socat nor nc (netcat) found. Please install one of them."
echo " Ubuntu/Debian: apt-get install socat OR apt-get install netcat-openbsd"
echo " RHEL/CentOS: yum install socat OR yum install nc"
exit 1
fi
# Function to send data to Unix socket
send_to_socket() {
local socket="$1"
local data="$2"
if [ "$SEND_CMD" = "socat" ]; then
echo "$data" | socat - "UNIX-SENDTO:$socket" 2>/dev/null
else
echo "$data" | nc -U -u "$socket" 2>/dev/null
fi
}
# Function to generate timestamp in nanoseconds
get_timestamp_ns() {
date +%s%N
}
# Function to send HTTP (A) event
send_http_event() {
local src_ip="$1"
local src_port="$2"
local timestamp="$3"
local method="${4:-GET}"
local path="${5:-/test}"
local host="${6:-example.com}"
local json=$(cat <<EOF
{"src_ip":"$src_ip","src_port":$src_port,"dst_ip":"10.0.0.1","dst_port":443,"timestamp":$timestamp,"method":"$method","path":"$path","host":"$host","http_version":"HTTP/1.1","header_user_agent":"TestAgent/1.0","header_accept":"*/*"}
EOF
)
verbose "Sending HTTP event: $json"
send_to_socket "$HTTP_SOCKET" "$json"
}
# Function to send Network (B) event
send_network_event() {
local src_ip="$1"
local src_port="$2"
local timestamp="$3"
local ja3="${4:-abc123}"
local ja4="${5:-def456}"
local json=$(cat <<EOF
{"src_ip":"$src_ip","src_port":$src_port,"dst_ip":"10.0.0.1","dst_port":443,"timestamp":$timestamp,"ja3":"$ja3","ja4":"$ja4","tls_version":"TLS1.3","tls_sni":"example.com"}
EOF
)
verbose "Sending Network event: $json"
send_to_socket "$NETWORK_SOCKET" "$json"
}
# Check sockets exist
check_sockets() {
local errors=0
if [ ! -S "$HTTP_SOCKET" ]; then
error "HTTP socket not found: $HTTP_SOCKET"
errors=$((errors + 1))
else
verbose "HTTP socket found: $HTTP_SOCKET"
fi
if [ ! -S "$NETWORK_SOCKET" ]; then
error "Network socket not found: $NETWORK_SOCKET"
errors=$((errors + 1))
else
verbose "Network socket found: $NETWORK_SOCKET"
fi
if [ $errors -gt 0 ]; then
error "$errors socket(s) not found. Is logcorrelator running?"
exit 1
fi
success "Sockets check passed"
}
# Get metrics from server
get_metrics() {
if [ "$SKIP_METRICS" = true ]; then
return 0
fi
if command -v curl &> /dev/null; then
curl -s "$METRICS_URL" 2>/dev/null || echo "{}"
elif command -v wget &> /dev/null; then
wget -qO- "$METRICS_URL" 2>/dev/null || echo "{}"
else
warn "Neither curl nor wget found. Skipping metrics check."
echo "{}"
fi
}
# Extract value from JSON (simple grep-based, requires jq for complex queries)
get_json_value() {
local json="$1"
local key="$2"
if command -v jq &> /dev/null; then
echo "$json" | jq -r ".$key // 0"
else
# Fallback: simple grep (works for flat JSON)
echo "$json" | grep -o "\"$key\":[0-9]*" | cut -d: -f2 || echo "0"
fi
}
# Main test function
run_test() {
info "Starting correlation test..."
info "Configuration:"
echo " HTTP Socket: $HTTP_SOCKET"
echo " Network Socket: $NETWORK_SOCKET"
echo " Test pairs: $COUNT"
echo " Delay between: ${DELAY_MS}ms"
echo " Metrics URL: $METRICS_URL"
echo " Send command: $SEND_CMD"
echo ""
# Get initial metrics
info "Fetching initial metrics..."
local initial_metrics=$(get_metrics)
local initial_success=$(get_json_value "$initial_metrics" "correlations_success")
local initial_failed=$(get_json_value "$initial_metrics" "correlations_failed")
local initial_a=$(get_json_value "$initial_metrics" "events_received_a")
local initial_b=$(get_json_value "$initial_metrics" "events_received_b")
info "Initial metrics:"
echo " Events A: $initial_a"
echo " Events B: $initial_b"
echo " Success: $initial_success"
echo " Failed: $initial_failed"
echo ""
# Send test events
info "Sending $COUNT test event pairs..."
local base_timestamp=$(get_timestamp_ns)
local sent=0
local correlated=0
for i in $(seq 1 $COUNT); do
local src_ip="192.168.1.$((i % 254 + 1))"
local src_port=$((8000 + i))
# Send A and B with same timestamp (should correlate)
local ts_a=$((base_timestamp + i * 1000000))
local ts_b=$ts_a # Same timestamp for perfect correlation
send_http_event "$src_ip" "$src_port" "$ts_a"
send_network_event "$src_ip" "$src_port" "$ts_b"
sent=$((sent + 1))
verbose "Sent pair $i: $src_ip:$src_port"
if [ $DELAY_MS -gt 0 ]; then
sleep $(echo "scale=3; $DELAY_MS / 1000" | bc)
fi
done
success "Sent $sent event pairs"
echo ""
# Wait for processing
info "Waiting for processing (2 seconds)..."
sleep 2
# Get final metrics
info "Fetching final metrics..."
local final_metrics=$(get_metrics)
local final_success=$(get_json_value "$final_metrics" "correlations_success")
local final_failed=$(get_json_value "$final_metrics" "correlations_failed")
local final_a=$(get_json_value "$final_metrics" "events_received_a")
local final_b=$(get_json_value "$final_metrics" "events_received_b")
# Calculate deltas
local delta_success=$((final_success - initial_success))
local delta_failed=$((final_failed - initial_failed))
local delta_a=$((final_a - initial_a))
local delta_b=$((final_b - initial_b))
echo ""
info "Results:"
echo " Events A sent: $delta_a (expected: $sent)"
echo " Events B sent: $delta_b (expected: $sent)"
echo " Correlations: $delta_success"
echo " Failures: $delta_failed"
echo ""
# Validation
local test_passed=true
if [ "$delta_a" -ne "$sent" ]; then
error "Event A count mismatch: got $delta_a, expected $sent"
test_passed=false
fi
if [ "$delta_b" -ne "$sent" ]; then
error "Event B count mismatch: got $delta_b, expected $sent"
test_passed=false
fi
if [ "$delta_success" -ne "$sent" ]; then
error "Correlation count mismatch: got $delta_success, expected $sent"
test_passed=false
fi
if [ "$delta_failed" -ne 0 ]; then
warn "Unexpected correlation failures: $delta_failed"
fi
if [ "$test_passed" = true ]; then
success "All tests passed! Correlation is working correctly."
exit 0
else
error "Some tests failed. Check the logs for details."
exit 1
fi
}
# Test with time window exceeded
run_time_window_test() {
info "Running time window test (B arrives after time window)..."
local src_ip="192.168.100.1"
local src_port="9999"
# Send A event
local ts_a=$(get_timestamp_ns)
send_http_event "$src_ip" "$src_port" "$ts_a"
info "Sent A event at timestamp $ts_a"
# Wait for time window to expire (default is 10s, we wait 11s)
info "Waiting 11 seconds (time window should expire)..."
sleep 11
# Send B event
local ts_b=$(get_timestamp_ns)
send_network_event "$src_ip" "$src_port" "$ts_b"
info "Sent B event at timestamp $ts_b"
info "This should result in a time_window failure (check metrics)"
}
# Test with different src_ip
run_different_ip_test() {
info "Running different IP test (should NOT correlate)..."
# Send A with IP 192.168.200.1
local ts=$(get_timestamp_ns)
send_http_event "192.168.200.1" "7777" "$ts"
info "Sent A event from 192.168.200.1:7777"
# Send B with different IP
send_network_event "192.168.200.2" "7777" "$ts"
info "Sent B event from 192.168.200.2:7777 (different IP)"
info "These should NOT correlate (different src_ip)"
}
# Run tests
check_sockets
echo ""
# Run main test
run_test
echo ""
info "Additional tests available:"
echo " --test-time-window Test time window expiration"
echo " --test-different-ip Test different IP (no correlation)"
# Check for additional test flags
if [[ "$@" == *"--test-time-window"* ]]; then
echo ""
run_time_window_test
fi
if [[ "$@" == *"--test-different-ip"* ]]; then
echo ""
run_different_ip_test
fi

View File

@ -0,0 +1,21 @@
DROP TABLE IF EXISTS mabase_prod.ref_bot_networks;
CREATE TABLE mabase_prod.ref_bot_networks (
-- On utilise IPv6CIDR car il accepte aussi les IPv4 au format ::ffff:1.2.3.4/120
network IPv6CIDR,
bot_name LowCardinality(String),
is_legitimate UInt8,
last_update DateTime
) ENGINE = ReplacingMergeTree(last_update)
ORDER BY (network, bot_name);
-- Création de la table lisant le fichier des IPs
CREATE TABLE mabase_prod.bot_ip (
ip String
) ENGINE = File(CSV, 'bot_ip.csv');
-- Création de la table lisant le fichier des signatures JA4
CREATE TABLE mabase_prod.bot_ja4 (
ja4 String
) ENGINE = File(CSV, 'bot_ja4.csv');

View File

@ -0,0 +1,234 @@
-- =============================================================================
-- logcorrelator - Initialisation ClickHouse
-- =============================================================================
-- Ce fichier crée la base de données, les tables, la vue matérialisée
-- et les utilisateurs nécessaires au fonctionnement de logcorrelator.
--
-- Usage :
-- clickhouse-client --multiquery < sql/init.sql
-- =============================================================================
-- -----------------------------------------------------------------------------
-- Base de données
-- -----------------------------------------------------------------------------
CREATE DATABASE IF NOT EXISTS mabase_prod;
-- -----------------------------------------------------------------------------
-- Table brute : cible directe des inserts du service
-- Le service n'insère que dans cette table (colonne raw_json).
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.http_logs_raw
(
`raw_json` String CODEC(ZSTD(3)),
`ingest_time` DateTime DEFAULT now()
)
ENGINE = MergeTree
PARTITION BY toDate(ingest_time)
ORDER BY ingest_time
TTL ingest_time + INTERVAL 1 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
-- -----------------------------------------------------------------------------
-- Table parsée : alimentée automatiquement par la vue matérialisée
-- -----------------------------------------------------------------------------
CREATE TABLE mabase_prod.http_logs
(
-- Temporel
`time` DateTime,
`log_date` Date DEFAULT toDate(time),
-- Réseau
`src_ip` IPv4,
`src_port` UInt16,
`dst_ip` IPv4,
`dst_port` UInt16,
-- Enrichissement IPLocate
`src_asn` UInt32,
`src_country_code` LowCardinality(String),
`src_as_name` LowCardinality(String),
`src_org` LowCardinality(String),
`src_domain` LowCardinality(String),
-- HTTP
`method` LowCardinality(String),
`scheme` LowCardinality(String),
`host` LowCardinality(String),
`path` String CODEC(ZSTD(3)),
`query` String CODEC(ZSTD(3)),
`http_version` LowCardinality(String),
-- Corrélation
`orphan_side` LowCardinality(String),
`correlated` UInt8,
`keepalives` UInt16,
`a_timestamp` UInt64,
`b_timestamp` UInt64,
`conn_id` String CODEC(ZSTD(3)),
-- Métadonnées IP
`ip_meta_df` UInt8,
`ip_meta_id` UInt16,
`ip_meta_total_length` UInt16,
`ip_meta_ttl` UInt8,
-- Métadonnées TCP
`tcp_meta_options` LowCardinality(String),
`tcp_meta_window_size` UInt32,
`tcp_meta_mss` UInt16,
`tcp_meta_window_scale` UInt8,
`syn_to_clienthello_ms` Int32,
-- TLS / fingerprint
`tls_version` LowCardinality(String),
`tls_sni` LowCardinality(String),
`tls_alpn` LowCardinality(String),
`ja3` String CODEC(ZSTD(3)),
`ja3_hash` String CODEC(ZSTD(3)),
`ja4` String CODEC(ZSTD(3)),
-- En-têtes HTTP
`client_headers` String CODEC(ZSTD(3)),
`header_user_agent` String CODEC(ZSTD(3)),
`header_accept` String CODEC(ZSTD(3)),
`header_accept_encoding` String CODEC(ZSTD(3)),
`header_accept_language` String CODEC(ZSTD(3)),
`header_content_type` String CODEC(ZSTD(3)),
`header_x_request_id` String CODEC(ZSTD(3)),
`header_x_trace_id` String CODEC(ZSTD(3)),
`header_x_forwarded_for` String CODEC(ZSTD(3)),
`header_sec_ch_ua` String CODEC(ZSTD(3)),
`header_sec_ch_ua_mobile` String CODEC(ZSTD(3)),
`header_sec_ch_ua_platform` String CODEC(ZSTD(3)),
`header_sec_fetch_dest` String CODEC(ZSTD(3)),
`header_sec_fetch_mode` String CODEC(ZSTD(3)),
`header_sec_fetch_site` String CODEC(ZSTD(3))
)
ENGINE = MergeTree
PARTITION BY log_date
ORDER BY (time, src_ip, dst_ip, ja4)
TTL log_date + INTERVAL 7 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
-- -----------------------------------------------------------------------------
-- Vue matérialisée : parse le JSON de http_logs_raw vers http_logs
-- -----------------------------------------------------------------------------
DROP VIEW IF EXISTS mabase_prod.mv_http_logs;
CREATE MATERIALIZED VIEW IF NOT EXISTS mabase_prod.mv_http_logs
TO mabase_prod.http_logs
AS
SELECT
parseDateTimeBestEffort(coalesce(JSONExtractString(raw_json, 'time'), '1970-01-01T00:00:00Z')) AS time,
toDate(time) AS log_date,
toIPv4(coalesce(JSONExtractString(raw_json, 'src_ip'), '0.0.0.0')) AS src_ip,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'src_port'), 0)) AS src_port,
toIPv4(coalesce(JSONExtractString(raw_json, 'dst_ip'), '0.0.0.0')) AS dst_ip,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'dst_port'), 0)) AS dst_port,
dictGetOrDefault(
'mabase_prod.dict_iplocate_asn',
'asn',
IPv4ToIPv6(IPv4StringToNum(toString(src_ip))),
toUInt32(0)
) AS src_asn,
dictGetOrDefault(
'mabase_prod.dict_iplocate_asn',
'country_code',
IPv4ToIPv6(IPv4StringToNum(toString(src_ip))),
''
) AS src_country_code,
dictGetOrDefault(
'mabase_prod.dict_iplocate_asn',
'name',
IPv4ToIPv6(IPv4StringToNum(toString(src_ip))),
''
) AS src_as_name,
dictGetOrDefault(
'mabase_prod.dict_iplocate_asn',
'org',
IPv4ToIPv6(IPv4StringToNum(toString(src_ip))),
''
) AS src_org,
dictGetOrDefault(
'mabase_prod.dict_iplocate_asn',
'domain',
IPv4ToIPv6(IPv4StringToNum(toString(src_ip))),
''
) AS src_domain,
coalesce(JSONExtractString(raw_json, 'method'), '') AS method,
coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme,
coalesce(JSONExtractString(raw_json, 'host'), '') AS host,
coalesce(JSONExtractString(raw_json, 'path'), '') AS path,
coalesce(JSONExtractString(raw_json, 'query'), '') AS query,
coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version,
coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side,
toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives,
coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp,
coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp,
coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id,
toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0)) AS ip_meta_id,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0)) AS ip_meta_total_length,
toUInt8(coalesce(JSONExtractUInt(raw_json, 'ip_meta_ttl'), 0)) AS ip_meta_ttl,
coalesce(JSONExtractString(raw_json, 'tcp_meta_options'), '') AS tcp_meta_options,
toUInt32(coalesce(JSONExtractUInt(raw_json, 'tcp_meta_window_size'), 0)) AS tcp_meta_window_size,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'tcp_meta_mss'), 0)) AS tcp_meta_mss,
toUInt8(coalesce(JSONExtractUInt(raw_json, 'tcp_meta_window_scale'), 0)) AS tcp_meta_window_scale,
toInt32(coalesce(JSONExtractInt(raw_json, 'syn_to_clienthello_ms'), 0)) AS syn_to_clienthello_ms,
coalesce(JSONExtractString(raw_json, 'tls_version'), '') AS tls_version,
coalesce(JSONExtractString(raw_json, 'tls_sni'), '') AS tls_sni,
coalesce(JSONExtractString(raw_json, 'tls_alpn'), '') AS tls_alpn,
coalesce(JSONExtractString(raw_json, 'ja3'), '') AS ja3,
coalesce(JSONExtractString(raw_json, 'ja3_hash'), '') AS ja3_hash,
coalesce(JSONExtractString(raw_json, 'ja4'), '') AS ja4,
coalesce(JSONExtractString(raw_json, 'client_headers'), '') AS client_headers,
coalesce(JSONExtractString(raw_json, 'header_User-Agent'), '') AS header_user_agent,
coalesce(JSONExtractString(raw_json, 'header_Accept'), '') AS header_accept,
coalesce(JSONExtractString(raw_json, 'header_Accept-Encoding'), '') AS header_accept_encoding,
coalesce(JSONExtractString(raw_json, 'header_Accept-Language'), '') AS header_accept_language,
coalesce(JSONExtractString(raw_json, 'header_Content-Type'), '') AS header_content_type,
coalesce(JSONExtractString(raw_json, 'header_X-Request-Id'), '') AS header_x_request_id,
coalesce(JSONExtractString(raw_json, 'header_X-Trace-Id'), '') AS header_x_trace_id,
coalesce(JSONExtractString(raw_json, 'header_X-Forwarded-For'), '') AS header_x_forwarded_for,
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA'), '') AS header_sec_ch_ua,
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Mobile'), '') AS header_sec_ch_ua_mobile,
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Platform'), '') AS header_sec_ch_ua_platform,
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Dest'), '') AS header_sec_fetch_dest,
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Mode'), '') AS header_sec_fetch_mode,
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Site'), '') AS header_sec_fetch_site
FROM mabase_prod.http_logs_raw;
-- -----------------------------------------------------------------------------
-- Utilisateurs et permissions
-- -----------------------------------------------------------------------------
CREATE USER IF NOT EXISTS data_writer IDENTIFIED WITH plaintext_password BY 'ChangeMe';
CREATE USER IF NOT EXISTS analyst IDENTIFIED WITH plaintext_password BY 'ChangeMe';
-- data_writer : INSERT uniquement sur la table brute
GRANT INSERT ON mabase_prod.http_logs_raw TO data_writer;
GRANT SELECT ON mabase_prod.http_logs_raw TO data_writer;
-- analyst : lecture sur la table parsée
GRANT SELECT ON mabase_prod.http_logs TO analyst;
-- -----------------------------------------------------------------------------
-- Vérifications post-installation
-- -----------------------------------------------------------------------------
-- SELECT count(*), min(ingest_time), max(ingest_time) FROM mabase_prod.http_logs_raw;
-- SELECT count(*), min(time), max(time) FROM mabase_prod.http_logs;
-- SELECT time, src_ip, dst_ip, method, host, path, ja4 FROM mabase_prod.http_logs ORDER BY time DESC LIMIT 10;

View File

@ -0,0 +1,29 @@
DROP DICTIONARY IF EXISTS mabase_prod.dict_iplocate_asn;
CREATE DICTIONARY IF NOT EXISTS mabase_prod.dict_iplocate_asn
(
network String,
asn UInt32,
country_code String,
name String,
org String,
domain String
)
PRIMARY KEY network
SOURCE(FILE(path '/var/lib/clickhouse/user_files/iplocate-ip-to-asn.csv' format 'CSVWithNames'))
LAYOUT(IP_TRIE())
LIFETIME(MIN 3600 MAX 7200);
-- Suppression si existe pour reconfiguration
DROP TABLE IF EXISTS mabase_prod.ref_bot_networks;
-- Table optimisée pour le filtrage binaire de CIDR
CREATE TABLE mabase_prod.ref_bot_networks (
network IPv6CIDR, -- Gère nativement '1.2.3.0/24' et '2001:db8::/32'
bot_name LowCardinality(String),
is_legitimate UInt8, -- 1 = Whitelist, 0 = Blacklist
last_update DateTime
) ENGINE = ReplacingMergeTree(last_update)
ORDER BY (network, bot_name)