Initial commit: logcorrelator with unified packaging (DEB + RPM using fpm)

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Jacquin Antoine
2026-02-27 15:31:46 +01:00
commit 8fc14c1e94
35 changed files with 4829 additions and 0 deletions

View File

@ -0,0 +1,334 @@
package unixsocket
import (
"bufio"
"context"
"encoding/json"
"fmt"
"net"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
const (
// Default socket file permissions (owner + group read/write)
DefaultSocketPermissions os.FileMode = 0660
// Maximum line size for JSON logs (1MB)
MaxLineSize = 1024 * 1024
// Maximum concurrent connections per socket
MaxConcurrentConnections = 100
// Rate limit: max events per second
MaxEventsPerSecond = 10000
)
// Config holds the Unix socket source configuration.
type Config struct {
Name string
Path string
}
// UnixSocketSource reads JSON events from a Unix socket.
type UnixSocketSource struct {
config Config
mu sync.Mutex
listener net.Listener
done chan struct{}
wg sync.WaitGroup
semaphore chan struct{} // Limit concurrent connections
}
// NewUnixSocketSource creates a new Unix socket source.
func NewUnixSocketSource(config Config) *UnixSocketSource {
return &UnixSocketSource{
config: config,
done: make(chan struct{}),
semaphore: make(chan struct{}, MaxConcurrentConnections),
}
}
// Name returns the source name.
func (s *UnixSocketSource) Name() string {
return s.config.Name
}
// Start begins listening on the Unix socket.
func (s *UnixSocketSource) Start(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) error {
// Remove existing socket file if present
if info, err := os.Stat(s.config.Path); err == nil {
if info.Mode()&os.ModeSocket != 0 {
if err := os.Remove(s.config.Path); err != nil {
return fmt.Errorf("failed to remove existing socket: %w", err)
}
} else {
return fmt.Errorf("path exists but is not a socket: %s", s.config.Path)
}
}
// Create listener
listener, err := net.Listen("unix", s.config.Path)
if err != nil {
return fmt.Errorf("failed to create unix socket listener: %w", err)
}
s.listener = listener
// Set permissions - fail if we can't
if err := os.Chmod(s.config.Path, DefaultSocketPermissions); err != nil {
listener.Close()
os.Remove(s.config.Path)
return fmt.Errorf("failed to set socket permissions: %w", err)
}
s.wg.Add(1)
go func() {
defer s.wg.Done()
s.acceptConnections(ctx, eventChan)
}()
return nil
}
func (s *UnixSocketSource) acceptConnections(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) {
for {
select {
case <-s.done:
return
case <-ctx.Done():
return
default:
}
conn, err := s.listener.Accept()
if err != nil {
select {
case <-s.done:
return
case <-ctx.Done():
return
default:
continue
}
}
// Check semaphore for connection limiting
select {
case s.semaphore <- struct{}{}:
// Connection accepted
default:
// Too many connections, reject
conn.Close()
continue
}
s.wg.Add(1)
go func(c net.Conn) {
defer s.wg.Done()
defer func() { <-s.semaphore }()
defer c.Close()
s.readEvents(ctx, c, eventChan)
}(conn)
}
}
func (s *UnixSocketSource) readEvents(ctx context.Context, conn net.Conn, eventChan chan<- *domain.NormalizedEvent) {
// Set read deadline to prevent hanging
conn.SetReadDeadline(time.Now().Add(5 * time.Minute))
scanner := bufio.NewScanner(conn)
// Increase buffer size limit to 1MB
buf := make([]byte, 0, 4096)
scanner.Buffer(buf, MaxLineSize)
for scanner.Scan() {
select {
case <-ctx.Done():
return
default:
}
line := scanner.Bytes()
if len(line) == 0 {
continue
}
event, err := parseJSONEvent(line)
if err != nil {
// Log parse errors but continue processing
continue
}
select {
case eventChan <- event:
case <-ctx.Done():
return
}
}
if err := scanner.Err(); err != nil {
// Connection error, log but don't crash
}
}
func parseJSONEvent(data []byte) (*domain.NormalizedEvent, error) {
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return nil, fmt.Errorf("invalid JSON: %w", err)
}
event := &domain.NormalizedEvent{
Raw: raw,
Extra: make(map[string]any),
}
// Extract and validate src_ip
if v, ok := getString(raw, "src_ip"); ok {
event.SrcIP = v
} else {
return nil, fmt.Errorf("missing required field: src_ip")
}
// Extract and validate src_port
if v, ok := getInt(raw, "src_port"); ok {
if v < 1 || v > 65535 {
return nil, fmt.Errorf("src_port must be between 1 and 65535, got %d", v)
}
event.SrcPort = v
} else {
return nil, fmt.Errorf("missing required field: src_port")
}
// Extract dst_ip (optional)
if v, ok := getString(raw, "dst_ip"); ok {
event.DstIP = v
}
// Extract dst_port (optional)
if v, ok := getInt(raw, "dst_port"); ok {
if v < 0 || v > 65535 {
return nil, fmt.Errorf("dst_port must be between 0 and 65535, got %d", v)
}
event.DstPort = v
}
// Extract timestamp - try different fields
if ts, ok := getInt64(raw, "timestamp"); ok {
// Assume nanoseconds
event.Timestamp = time.Unix(0, ts)
} else if tsStr, ok := getString(raw, "time"); ok {
if t, err := time.Parse(time.RFC3339, tsStr); err == nil {
event.Timestamp = t
}
} else if tsStr, ok := getString(raw, "timestamp"); ok {
if t, err := time.Parse(time.RFC3339, tsStr); err == nil {
event.Timestamp = t
}
}
if event.Timestamp.IsZero() {
event.Timestamp = time.Now()
}
// Extract headers (header_* fields)
event.Headers = make(map[string]string)
for k, v := range raw {
if len(k) > 7 && k[:7] == "header_" {
if sv, ok := v.(string); ok {
event.Headers[k[7:]] = sv
}
}
}
// Determine source based on fields present
if len(event.Headers) > 0 {
event.Source = domain.SourceA
} else {
event.Source = domain.SourceB
}
// Extra fields (single pass)
knownFields := map[string]bool{
"src_ip": true, "src_port": true, "dst_ip": true, "dst_port": true,
"timestamp": true, "time": true,
}
for k, v := range raw {
if knownFields[k] {
continue
}
if strings.HasPrefix(k, "header_") {
continue
}
event.Extra[k] = v
}
return event, nil
}
func getString(m map[string]any, key string) (string, bool) {
if v, ok := m[key]; ok {
if s, ok := v.(string); ok {
return s, true
}
}
return "", false
}
func getInt(m map[string]any, key string) (int, bool) {
if v, ok := m[key]; ok {
switch val := v.(type) {
case float64:
return int(val), true
case int:
return val, true
case int64:
return int(val), true
case string:
if i, err := strconv.Atoi(val); err == nil {
return i, true
}
}
}
return 0, false
}
func getInt64(m map[string]any, key string) (int64, bool) {
if v, ok := m[key]; ok {
switch val := v.(type) {
case float64:
return int64(val), true
case int:
return int64(val), true
case int64:
return val, true
case string:
if i, err := strconv.ParseInt(val, 10, 64); err == nil {
return i, true
}
}
}
return 0, false
}
// Stop gracefully stops the source.
func (s *UnixSocketSource) Stop() error {
s.mu.Lock()
defer s.mu.Unlock()
close(s.done)
if s.listener != nil {
s.listener.Close()
}
s.wg.Wait()
// Clean up socket file
if err := os.Remove(s.config.Path); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove socket file: %w", err)
}
return nil
}

View File

@ -0,0 +1,98 @@
package unixsocket
import (
"testing"
"time"
)
func TestParseJSONEvent_Apache(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"dst_ip": "10.0.0.1",
"dst_port": 80,
"timestamp": 1704110400000000000,
"method": "GET",
"path": "/api/test",
"header_host": "example.com",
"header_user_agent": "Mozilla/5.0"
}`)
event, err := parseJSONEvent(data)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if event.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", event.SrcIP)
}
if event.SrcPort != 8080 {
t.Errorf("expected src_port 8080, got %d", event.SrcPort)
}
if event.Headers["host"] != "example.com" {
t.Errorf("expected header host example.com, got %s", event.Headers["host"])
}
if event.Headers["user_agent"] != "Mozilla/5.0" {
t.Errorf("expected header_user_agent Mozilla/5.0, got %s", event.Headers["user_agent"])
}
}
func TestParseJSONEvent_Network(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"dst_ip": "10.0.0.1",
"dst_port": 443,
"ja3": "abc123def456",
"ja4": "xyz789",
"tcp_meta_flags": "SYN"
}`)
event, err := parseJSONEvent(data)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if event.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", event.SrcIP)
}
if event.Extra["ja3"] != "abc123def456" {
t.Errorf("expected ja3 abc123def456, got %v", event.Extra["ja3"])
}
}
func TestParseJSONEvent_InvalidJSON(t *testing.T) {
data := []byte(`{invalid json}`)
_, err := parseJSONEvent(data)
if err == nil {
t.Error("expected error for invalid JSON")
}
}
func TestParseJSONEvent_MissingFields(t *testing.T) {
data := []byte(`{"other_field": "value"}`)
_, err := parseJSONEvent(data)
if err == nil {
t.Error("expected error for missing src_ip/src_port")
}
}
func TestParseJSONEvent_StringTimestamp(t *testing.T) {
data := []byte(`{
"src_ip": "192.168.1.1",
"src_port": 8080,
"time": "2024-01-01T12:00:00Z"
}`)
event, err := parseJSONEvent(data)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
expected := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
if !event.Timestamp.Equal(expected) {
t.Errorf("expected timestamp %v, got %v", expected, event.Timestamp)
}
}

View File

@ -0,0 +1,333 @@
package clickhouse
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"sync"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
const (
// DefaultBatchSize is the default number of records per batch
DefaultBatchSize = 500
// DefaultFlushIntervalMs is the default flush interval in milliseconds
DefaultFlushIntervalMs = 200
// DefaultMaxBufferSize is the default maximum buffer size
DefaultMaxBufferSize = 5000
// DefaultTimeoutMs is the default timeout for operations in milliseconds
DefaultTimeoutMs = 1000
// DefaultPingTimeoutMs is the timeout for initial connection ping
DefaultPingTimeoutMs = 5000
// MaxRetries is the maximum number of retry attempts for failed inserts
MaxRetries = 3
// RetryBaseDelay is the base delay between retries
RetryBaseDelay = 100 * time.Millisecond
)
// Config holds the ClickHouse sink configuration.
type Config struct {
DSN string
Table string
BatchSize int
FlushIntervalMs int
MaxBufferSize int
DropOnOverflow bool
AsyncInsert bool
TimeoutMs int
}
// ClickHouseSink writes correlated logs to ClickHouse.
type ClickHouseSink struct {
config Config
db *sql.DB
mu sync.Mutex
buffer []domain.CorrelatedLog
flushChan chan struct{}
done chan struct{}
wg sync.WaitGroup
}
// NewClickHouseSink creates a new ClickHouse sink.
func NewClickHouseSink(config Config) (*ClickHouseSink, error) {
// Apply defaults
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
}
if config.FlushIntervalMs <= 0 {
config.FlushIntervalMs = DefaultFlushIntervalMs
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
if config.TimeoutMs <= 0 {
config.TimeoutMs = DefaultTimeoutMs
}
s := &ClickHouseSink{
config: config,
buffer: make([]domain.CorrelatedLog, 0, config.BatchSize),
flushChan: make(chan struct{}, 1),
done: make(chan struct{}),
}
// Connect to ClickHouse
db, err := sql.Open("clickhouse", config.DSN)
if err != nil {
return nil, fmt.Errorf("failed to connect to ClickHouse: %w", err)
}
// Ping with timeout
pingCtx, pingCancel := context.WithTimeout(context.Background(), time.Duration(DefaultPingTimeoutMs)*time.Millisecond)
defer pingCancel()
if err := db.PingContext(pingCtx); err != nil {
db.Close()
return nil, fmt.Errorf("failed to ping ClickHouse: %w", err)
}
s.db = db
// Start flush goroutine
s.wg.Add(1)
go s.flushLoop()
return s, nil
}
// Name returns the sink name.
func (s *ClickHouseSink) Name() string {
return "clickhouse"
}
// Write adds a log to the buffer.
func (s *ClickHouseSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
s.mu.Lock()
defer s.mu.Unlock()
// Check buffer overflow
if len(s.buffer) >= s.config.MaxBufferSize {
if s.config.DropOnOverflow {
// Drop the log
return nil
}
// Block until space is available (with timeout)
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Duration(s.config.TimeoutMs) * time.Millisecond):
return fmt.Errorf("buffer full, timeout exceeded")
}
}
s.buffer = append(s.buffer, log)
// Trigger flush if batch is full
if len(s.buffer) >= s.config.BatchSize {
select {
case s.flushChan <- struct{}{}:
default:
}
}
return nil
}
// Flush flushes the buffer to ClickHouse.
func (s *ClickHouseSink) Flush(ctx context.Context) error {
return s.doFlush(ctx)
}
// Close closes the sink.
func (s *ClickHouseSink) Close() error {
close(s.done)
s.wg.Wait()
if s.db != nil {
return s.db.Close()
}
return nil
}
func (s *ClickHouseSink) flushLoop() {
defer s.wg.Done()
ticker := time.NewTicker(time.Duration(s.config.FlushIntervalMs) * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-s.done:
return
case <-ticker.C:
s.mu.Lock()
needsFlush := len(s.buffer) > 0
s.mu.Unlock()
if needsFlush {
// Use timeout context for flush
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
s.doFlush(ctx)
cancel()
}
case <-s.flushChan:
s.mu.Lock()
needsFlush := len(s.buffer) >= s.config.BatchSize
s.mu.Unlock()
if needsFlush {
// Use timeout context for flush
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
s.doFlush(ctx)
cancel()
}
}
}
}
func (s *ClickHouseSink) doFlush(ctx context.Context) error {
s.mu.Lock()
if len(s.buffer) == 0 {
s.mu.Unlock()
return nil
}
// Copy buffer to flush
buffer := make([]domain.CorrelatedLog, len(s.buffer))
copy(buffer, s.buffer)
s.buffer = make([]domain.CorrelatedLog, 0, s.config.BatchSize)
s.mu.Unlock()
// Prepare batch insert with retry
query := fmt.Sprintf(`
INSERT INTO %s (timestamp, src_ip, src_port, dst_ip, dst_port, correlated, orphan_side, apache, network)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`, s.config.Table)
// Retry logic with exponential backoff
var lastErr error
for attempt := 0; attempt < MaxRetries; attempt++ {
if attempt > 0 {
// Exponential backoff
delay := RetryBaseDelay * time.Duration(1<<uint(attempt-1))
select {
case <-time.After(delay):
case <-ctx.Done():
return ctx.Err()
}
}
lastErr = s.executeBatch(ctx, query, buffer)
if lastErr == nil {
return nil // Success
}
// Check if error is retryable
if !isRetryableError(lastErr) {
return fmt.Errorf("non-retryable error: %w", lastErr)
}
}
return fmt.Errorf("failed after %d retries: %w", MaxRetries, lastErr)
}
func (s *ClickHouseSink) executeBatch(ctx context.Context, query string, buffer []domain.CorrelatedLog) error {
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()
stmt, err := tx.PrepareContext(ctx, query)
if err != nil {
return fmt.Errorf("failed to prepare statement: %w", err)
}
defer stmt.Close()
for _, log := range buffer {
apacheJSON, _ := json.Marshal(log.Apache)
networkJSON, _ := json.Marshal(log.Network)
orphanSide := log.OrphanSide
if !log.Correlated {
orphanSide = log.OrphanSide
}
correlated := 0
if log.Correlated {
correlated = 1
}
_, err := stmt.ExecContext(ctx,
log.Timestamp,
log.SrcIP,
log.SrcPort,
log.DstIP,
log.DstPort,
correlated,
orphanSide,
string(apacheJSON),
string(networkJSON),
)
if err != nil {
return fmt.Errorf("failed to execute insert: %w", err)
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("failed to commit transaction: %w", err)
}
return nil
}
// isRetryableError checks if an error is retryable.
func isRetryableError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
// Common retryable errors
retryableErrors := []string{
"connection refused",
"connection reset",
"timeout",
"temporary failure",
"network is unreachable",
"broken pipe",
}
for _, re := range retryableErrors {
if containsIgnoreCase(errStr, re) {
return true
}
}
return false
}
func containsIgnoreCase(s, substr string) bool {
return len(s) >= len(substr) && containsLower(s, substr)
}
func containsLower(s, substr string) bool {
s = toLower(s)
substr = toLower(substr)
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
func toLower(s string) string {
var result []byte
for i := 0; i < len(s); i++ {
c := s[i]
if c >= 'A' && c <= 'Z' {
c = c + ('a' - 'A')
}
result = append(result, c)
}
return string(result)
}

View File

@ -0,0 +1,305 @@
package clickhouse
import (
"context"
"testing"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
func TestClickHouseSink_Name(t *testing.T) {
sink := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
}
if sink.Name() != "clickhouse" {
t.Errorf("expected name 'clickhouse', got %s", sink.Name())
}
}
func TestClickHouseSink_ConfigDefaults(t *testing.T) {
// Test that defaults are applied correctly
config := Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
// Other fields are zero, should get defaults
}
// Verify defaults would be applied (we can't actually connect in tests)
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
}
if config.FlushIntervalMs <= 0 {
config.FlushIntervalMs = DefaultFlushIntervalMs
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
if config.TimeoutMs <= 0 {
config.TimeoutMs = DefaultTimeoutMs
}
if config.BatchSize != DefaultBatchSize {
t.Errorf("expected BatchSize %d, got %d", DefaultBatchSize, config.BatchSize)
}
if config.FlushIntervalMs != DefaultFlushIntervalMs {
t.Errorf("expected FlushIntervalMs %d, got %d", DefaultFlushIntervalMs, config.FlushIntervalMs)
}
if config.MaxBufferSize != DefaultMaxBufferSize {
t.Errorf("expected MaxBufferSize %d, got %d", DefaultMaxBufferSize, config.MaxBufferSize)
}
if config.TimeoutMs != DefaultTimeoutMs {
t.Errorf("expected TimeoutMs %d, got %d", DefaultTimeoutMs, config.TimeoutMs)
}
}
func TestClickHouseSink_Write_BufferOverflow(t *testing.T) {
// This test verifies the buffer overflow logic without actually connecting
config := Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
BatchSize: 10,
MaxBufferSize: 10,
DropOnOverflow: true,
TimeoutMs: 100,
FlushIntervalMs: 1000,
}
// We can't test actual writes without a ClickHouse instance,
// but we can verify the config is valid
if config.BatchSize > config.MaxBufferSize {
t.Error("BatchSize should not exceed MaxBufferSize")
}
}
func TestClickHouseSink_IsRetryableError(t *testing.T) {
tests := []struct {
name string
err error
expected bool
}{
{"nil error", nil, false},
{"connection refused", &mockError{"connection refused"}, true},
{"connection reset", &mockError{"connection reset by peer"}, true},
{"timeout", &mockError{"timeout waiting for response"}, true},
{"network unreachable", &mockError{"network is unreachable"}, true},
{"broken pipe", &mockError{"broken pipe"}, true},
{"syntax error", &mockError{"syntax error in SQL"}, false},
{"table not found", &mockError{"table test not found"}, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isRetryableError(tt.err)
if result != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, result)
}
})
}
}
func TestClickHouseSink_FlushEmpty(t *testing.T) {
// Test that flushing an empty buffer doesn't cause issues
// (We can't test actual ClickHouse operations without a real instance)
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
}
// Should not panic or error on empty flush
ctx := context.Background()
err := s.Flush(ctx)
if err != nil {
t.Errorf("expected no error on empty flush, got %v", err)
}
}
func TestClickHouseSink_CloseWithoutConnect(t *testing.T) {
// Test that closing without connecting doesn't panic
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
done: make(chan struct{}),
}
err := s.Close()
if err != nil {
t.Errorf("expected no error on close without connect, got %v", err)
}
}
func TestClickHouseSink_Constants(t *testing.T) {
// Verify constants have reasonable values
if DefaultBatchSize <= 0 {
t.Error("DefaultBatchSize should be positive")
}
if DefaultFlushIntervalMs <= 0 {
t.Error("DefaultFlushIntervalMs should be positive")
}
if DefaultMaxBufferSize <= 0 {
t.Error("DefaultMaxBufferSize should be positive")
}
if DefaultTimeoutMs <= 0 {
t.Error("DefaultTimeoutMs should be positive")
}
if DefaultPingTimeoutMs <= 0 {
t.Error("DefaultPingTimeoutMs should be positive")
}
if MaxRetries <= 0 {
t.Error("MaxRetries should be positive")
}
if RetryBaseDelay <= 0 {
t.Error("RetryBaseDelay should be positive")
}
}
// mockError implements error for testing
type mockError struct {
msg string
}
func (e *mockError) Error() string {
return e.msg
}
// Test the doFlush function with empty buffer (no actual DB connection)
func TestClickHouseSink_DoFlushEmpty(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
}
ctx := context.Background()
err := s.doFlush(ctx)
if err != nil {
t.Errorf("expected no error when flushing empty buffer, got %v", err)
}
}
// Test that buffer is properly managed (without actual DB operations)
func TestClickHouseSink_BufferManagement(t *testing.T) {
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 100, // Allow more than 1 element
DropOnOverflow: false,
TimeoutMs: 1000,
},
buffer: []domain.CorrelatedLog{log},
}
// Verify buffer has data
if len(s.buffer) != 1 {
t.Fatalf("expected buffer length 1, got %d", len(s.buffer))
}
// Test that Write properly adds to buffer
ctx := context.Background()
err := s.Write(ctx, log)
if err != nil {
t.Errorf("unexpected error on Write: %v", err)
}
if len(s.buffer) != 2 {
t.Errorf("expected buffer length 2 after Write, got %d", len(s.buffer))
}
}
// Test Write with context cancellation
func TestClickHouseSink_Write_ContextCancel(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: false,
TimeoutMs: 10,
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
// Fill the buffer
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
s.buffer = append(s.buffer, log)
// Try to write with cancelled context
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
err := s.Write(ctx, log)
if err == nil {
t.Error("expected error when writing with cancelled context")
}
}
// Test DropOnOverflow behavior
func TestClickHouseSink_Write_DropOnOverflow(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: true,
TimeoutMs: 10,
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
// Fill the buffer
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
s.buffer = append(s.buffer, log)
// Try to write when buffer is full - should drop silently
ctx := context.Background()
err := s.Write(ctx, log)
if err != nil {
t.Errorf("expected no error when DropOnOverflow is true, got %v", err)
}
}
// Benchmark Write operation (without actual DB)
func BenchmarkClickHouseSink_Write(b *testing.B) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 10000,
DropOnOverflow: true,
},
buffer: make([]domain.CorrelatedLog, 0, 10000),
}
log := domain.CorrelatedLog{
Timestamp: time.Now(),
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
ctx := context.Background()
b.ResetTimer()
for i := 0; i < b.N; i++ {
s.Write(ctx, log)
}
}

View File

@ -0,0 +1,168 @@
package file
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
const (
// DefaultFilePermissions for output files
DefaultFilePermissions os.FileMode = 0644
// DefaultDirPermissions for output directories
DefaultDirPermissions os.FileMode = 0750
)
// Config holds the file sink configuration.
type Config struct {
Path string
}
// FileSink writes correlated logs to a file as JSON lines.
type FileSink struct {
config Config
mu sync.Mutex
file *os.File
writer *bufio.Writer
}
// NewFileSink creates a new file sink.
func NewFileSink(config Config) (*FileSink, error) {
// Validate path
if err := validateFilePath(config.Path); err != nil {
return nil, fmt.Errorf("invalid file path: %w", err)
}
return &FileSink{
config: config,
}, nil
}
// Name returns the sink name.
func (s *FileSink) Name() string {
return "file"
}
// Write writes a correlated log to the file.
func (s *FileSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.file == nil {
if err := s.openFile(); err != nil {
return err
}
}
data, err := json.Marshal(log)
if err != nil {
return fmt.Errorf("failed to marshal log: %w", err)
}
if _, err := s.writer.Write(data); err != nil {
return fmt.Errorf("failed to write log: %w", err)
}
if _, err := s.writer.WriteString("\n"); err != nil {
return fmt.Errorf("failed to write newline: %w", err)
}
return nil
}
// Flush flushes any buffered data.
func (s *FileSink) Flush(ctx context.Context) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.writer != nil {
return s.writer.Flush()
}
return nil
}
// Close closes the sink.
func (s *FileSink) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.writer != nil {
if err := s.writer.Flush(); err != nil {
return err
}
}
if s.file != nil {
return s.file.Close()
}
return nil
}
func (s *FileSink) openFile() error {
// Validate path again before opening
if err := validateFilePath(s.config.Path); err != nil {
return fmt.Errorf("invalid file path: %w", err)
}
// Ensure directory exists
dir := filepath.Dir(s.config.Path)
if err := os.MkdirAll(dir, DefaultDirPermissions); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
file, err := os.OpenFile(s.config.Path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, DefaultFilePermissions)
if err != nil {
return fmt.Errorf("failed to open file: %w", err)
}
s.file = file
s.writer = bufio.NewWriter(file)
return nil
}
// validateFilePath validates that the file path is safe and allowed.
func validateFilePath(path string) error {
if path == "" {
return fmt.Errorf("path cannot be empty")
}
// Clean the path
cleanPath := filepath.Clean(path)
// Ensure path is absolute or relative to allowed directories
allowedPrefixes := []string{
"/var/log/logcorrelator",
"/var/log",
"/tmp",
}
// Check if path is in allowed directories
allowed := false
for _, prefix := range allowedPrefixes {
if strings.HasPrefix(cleanPath, prefix) {
allowed = true
break
}
}
if !allowed {
// Allow relative paths for testing
if !filepath.IsAbs(cleanPath) {
return nil
}
return fmt.Errorf("path must be in allowed directories: %v", allowedPrefixes)
}
// Check for path traversal
if strings.Contains(cleanPath, "..") {
return fmt.Errorf("path cannot contain '..'")
}
return nil
}

View File

@ -0,0 +1,96 @@
package file
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
func TestFileSink_Write(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
if err := sink.Flush(context.Background()); err != nil {
t.Fatalf("failed to flush: %v", err)
}
// Verify file exists and contains data
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
if len(data) == 0 {
t.Error("expected non-empty file")
}
}
func TestFileSink_MultipleWrites(t *testing.T) {
tmpDir := t.TempDir()
testPath := filepath.Join(tmpDir, "test.log")
sink, err := NewFileSink(Config{Path: testPath})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
defer sink.Close()
for i := 0; i < 5; i++ {
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080 + i,
}
if err := sink.Write(context.Background(), log); err != nil {
t.Fatalf("failed to write: %v", err)
}
}
sink.Close()
// Verify file has 5 lines
data, err := os.ReadFile(testPath)
if err != nil {
t.Fatalf("failed to read file: %v", err)
}
lines := 0
for _, b := range data {
if b == '\n' {
lines++
}
}
if lines != 5 {
t.Errorf("expected 5 lines, got %d", lines)
}
}
func TestFileSink_Name(t *testing.T) {
sink, err := NewFileSink(Config{Path: "/tmp/test.log"})
if err != nil {
t.Fatalf("failed to create sink: %v", err)
}
if sink.Name() != "file" {
t.Errorf("expected name 'file', got %s", sink.Name())
}
}

View File

@ -0,0 +1,123 @@
package multi
import (
"context"
"sync"
"github.com/logcorrelator/logcorrelator/internal/domain"
"github.com/logcorrelator/logcorrelator/internal/ports"
)
// MultiSink fans out correlated logs to multiple sinks.
type MultiSink struct {
mu sync.RWMutex
sinks []ports.CorrelatedLogSink
}
// NewMultiSink creates a new multi-sink.
func NewMultiSink(sinks ...ports.CorrelatedLogSink) *MultiSink {
return &MultiSink{
sinks: sinks,
}
}
// Name returns the sink name.
func (s *MultiSink) Name() string {
return "multi"
}
// AddSink adds a sink to the fan-out.
func (s *MultiSink) AddSink(sink ports.CorrelatedLogSink) {
s.mu.Lock()
defer s.mu.Unlock()
s.sinks = append(s.sinks, sink)
}
// Write writes a correlated log to all sinks concurrently.
// Returns the first error encountered (but all sinks are attempted).
func (s *MultiSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
s.mu.RLock()
sinks := make([]ports.CorrelatedLogSink, len(s.sinks))
copy(sinks, s.sinks)
s.mu.RUnlock()
if len(sinks) == 0 {
return nil
}
var wg sync.WaitGroup
var firstErr error
var firstErrMu sync.Mutex
errChan := make(chan error, len(sinks))
for _, sink := range sinks {
wg.Add(1)
go func(sk ports.CorrelatedLogSink) {
defer wg.Done()
if err := sk.Write(ctx, log); err != nil {
// Non-blocking send to errChan
select {
case errChan <- err:
default:
// Channel full, error will be handled via firstErr
}
}
}(sink)
}
// Wait for all writes to complete in a separate goroutine
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
// Collect errors with timeout
select {
case <-done:
close(errChan)
// Collect first error
for err := range errChan {
if err != nil {
firstErrMu.Lock()
if firstErr == nil {
firstErr = err
}
firstErrMu.Unlock()
}
}
case <-ctx.Done():
return ctx.Err()
}
firstErrMu.Lock()
defer firstErrMu.Unlock()
return firstErr
}
// Flush flushes all sinks.
func (s *MultiSink) Flush(ctx context.Context) error {
s.mu.RLock()
defer s.mu.RUnlock()
for _, sink := range s.sinks {
if err := sink.Flush(ctx); err != nil {
return err
}
}
return nil
}
// Close closes all sinks.
func (s *MultiSink) Close() error {
s.mu.RLock()
defer s.mu.RUnlock()
var firstErr error
for _, sink := range s.sinks {
if err := sink.Close(); err != nil && firstErr == nil {
firstErr = err
}
}
return firstErr
}

View File

@ -0,0 +1,114 @@
package multi
import (
"context"
"sync"
"testing"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
type mockSink struct {
name string
mu sync.Mutex
writeFunc func(domain.CorrelatedLog) error
flushFunc func() error
closeFunc func() error
}
func (m *mockSink) Name() string { return m.name }
func (m *mockSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
m.mu.Lock()
defer m.mu.Unlock()
return m.writeFunc(log)
}
func (m *mockSink) Flush(ctx context.Context) error { return m.flushFunc() }
func (m *mockSink) Close() error { return m.closeFunc() }
func TestMultiSink_Write(t *testing.T) {
var mu sync.Mutex
writeCount := 0
sink1 := &mockSink{
name: "sink1",
writeFunc: func(log domain.CorrelatedLog) error {
mu.Lock()
writeCount++
mu.Unlock()
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
sink2 := &mockSink{
name: "sink2",
writeFunc: func(log domain.CorrelatedLog) error {
mu.Lock()
writeCount++
mu.Unlock()
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink1, sink2)
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if writeCount != 2 {
t.Errorf("expected 2 writes, got %d", writeCount)
}
}
func TestMultiSink_Write_OneFails(t *testing.T) {
sink1 := &mockSink{
name: "sink1",
writeFunc: func(log domain.CorrelatedLog) error {
return nil
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
sink2 := &mockSink{
name: "sink2",
writeFunc: func(log domain.CorrelatedLog) error {
return context.Canceled
},
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms := NewMultiSink(sink1, sink2)
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err == nil {
t.Error("expected error when one sink fails")
}
}
func TestMultiSink_AddSink(t *testing.T) {
ms := NewMultiSink()
sink := &mockSink{
name: "dynamic",
writeFunc: func(log domain.CorrelatedLog) error { return nil },
flushFunc: func() error { return nil },
closeFunc: func() error { return nil },
}
ms.AddSink(sink)
log := domain.CorrelatedLog{SrcIP: "192.168.1.1"}
err := ms.Write(context.Background(), log)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}

View File

@ -0,0 +1,158 @@
package app
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
"github.com/logcorrelator/logcorrelator/internal/ports"
)
const (
// DefaultEventChannelBufferSize is the default size for event channels
DefaultEventChannelBufferSize = 1000
// ShutdownTimeout is the maximum time to wait for graceful shutdown
ShutdownTimeout = 30 * time.Second
)
// OrchestratorConfig holds the orchestrator configuration.
type OrchestratorConfig struct {
Sources []ports.EventSource
Sink ports.CorrelatedLogSink
}
// Orchestrator connects sources to the correlation service and sinks.
type Orchestrator struct {
config OrchestratorConfig
correlationSvc ports.CorrelationProcessor
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
running atomic.Bool
}
// NewOrchestrator creates a new orchestrator.
func NewOrchestrator(config OrchestratorConfig, correlationSvc ports.CorrelationProcessor) *Orchestrator {
ctx, cancel := context.WithCancel(context.Background())
return &Orchestrator{
config: config,
correlationSvc: correlationSvc,
ctx: ctx,
cancel: cancel,
}
}
// Start begins the orchestration.
func (o *Orchestrator) Start() error {
if !o.running.CompareAndSwap(false, true) {
return nil // Already running
}
// Start each source
for _, source := range o.config.Sources {
eventChan := make(chan *domain.NormalizedEvent, DefaultEventChannelBufferSize)
o.wg.Add(1)
go func(src ports.EventSource, evChan chan *domain.NormalizedEvent) {
defer o.wg.Done()
o.processEvents(evChan)
}(source, eventChan)
o.wg.Add(1)
go func(src ports.EventSource, evChan chan *domain.NormalizedEvent) {
defer o.wg.Done()
if err := src.Start(o.ctx, evChan); err != nil {
// Source failed, but continue with others
}
}(source, eventChan)
}
return nil
}
func (o *Orchestrator) processEvents(eventChan <-chan *domain.NormalizedEvent) {
for {
select {
case <-o.ctx.Done():
// Drain remaining events before exiting
for {
select {
case event, ok := <-eventChan:
if !ok {
return
}
logs := o.correlationSvc.ProcessEvent(event)
for _, log := range logs {
o.config.Sink.Write(o.ctx, log)
}
default:
return
}
}
case event, ok := <-eventChan:
if !ok {
return
}
// Process through correlation service
logs := o.correlationSvc.ProcessEvent(event)
// Write correlated logs to sink
for _, log := range logs {
if err := o.config.Sink.Write(o.ctx, log); err != nil {
// Log error but continue processing
}
}
}
}
}
// Stop gracefully stops the orchestrator.
// It stops all sources first, then flushes remaining events, then closes sinks.
func (o *Orchestrator) Stop() error {
if !o.running.CompareAndSwap(true, false) {
return nil // Not running
}
// Create shutdown context with timeout
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), ShutdownTimeout)
defer shutdownCancel()
// First, cancel the main context to stop accepting new events
o.cancel()
// Wait for source goroutines to finish
// Use a separate goroutine with timeout to prevent deadlock
done := make(chan struct{})
go func() {
o.wg.Wait()
close(done)
}()
select {
case <-done:
// Sources stopped cleanly
case <-shutdownCtx.Done():
// Timeout waiting for sources
}
// Flush remaining events from correlation service
flushedLogs := o.correlationSvc.Flush()
for _, log := range flushedLogs {
if err := o.config.Sink.Write(shutdownCtx, log); err != nil {
// Log error but continue
}
}
// Flush and close sink with timeout
if err := o.config.Sink.Flush(shutdownCtx); err != nil {
// Log error
}
if err := o.config.Sink.Close(); err != nil {
// Log error
}
return nil
}

View File

@ -0,0 +1,160 @@
package app
import (
"context"
"sync"
"testing"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
"github.com/logcorrelator/logcorrelator/internal/ports"
)
type mockEventSource struct {
name string
mu sync.RWMutex
eventChan chan<- *domain.NormalizedEvent
started bool
stopped bool
}
func (m *mockEventSource) Name() string { return m.name }
func (m *mockEventSource) Start(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) error {
m.mu.Lock()
m.started = true
m.eventChan = eventChan
m.mu.Unlock()
<-ctx.Done()
m.mu.Lock()
m.stopped = true
m.mu.Unlock()
return nil
}
func (m *mockEventSource) Stop() error { return nil }
func (m *mockEventSource) getEventChan() chan<- *domain.NormalizedEvent {
m.mu.RLock()
defer m.mu.RUnlock()
return m.eventChan
}
func (m *mockEventSource) isStarted() bool {
m.mu.RLock()
defer m.mu.RUnlock()
return m.started
}
type mockSink struct {
mu sync.Mutex
written []domain.CorrelatedLog
}
func (m *mockSink) Name() string { return "mock" }
func (m *mockSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
m.mu.Lock()
defer m.mu.Unlock()
m.written = append(m.written, log)
return nil
}
func (m *mockSink) Flush(ctx context.Context) error { return nil }
func (m *mockSink) Close() error { return nil }
func (m *mockSink) getWritten() []domain.CorrelatedLog {
m.mu.Lock()
defer m.mu.Unlock()
result := make([]domain.CorrelatedLog, len(m.written))
copy(result, m.written)
return result
}
func TestOrchestrator_StartStop(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
orchestrator := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
if err := orchestrator.Start(); err != nil {
t.Fatalf("failed to start: %v", err)
}
// Let it run briefly
time.Sleep(100 * time.Millisecond)
if err := orchestrator.Stop(); err != nil {
t.Fatalf("failed to stop: %v", err)
}
if !source.isStarted() {
t.Error("expected source to be started")
}
}
func TestOrchestrator_ProcessEvent(t *testing.T) {
source := &mockEventSource{name: "test"}
sink := &mockSink{}
corrConfig := domain.CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
correlationSvc := domain.NewCorrelationService(corrConfig, &domain.RealTimeProvider{})
orchestrator := NewOrchestrator(OrchestratorConfig{
Sources: []ports.EventSource{source},
Sink: sink,
}, correlationSvc)
if err := orchestrator.Start(); err != nil {
t.Fatalf("failed to start: %v", err)
}
// Wait for source to start and get the channel
var eventChan chan<- *domain.NormalizedEvent
for i := 0; i < 50; i++ {
eventChan = source.getEventChan()
if eventChan != nil {
break
}
time.Sleep(10 * time.Millisecond)
}
if eventChan == nil {
t.Fatal("source did not start properly")
}
// Send an event through the source
event := &domain.NormalizedEvent{
Source: domain.SourceA,
Timestamp: time.Now(),
SrcIP: "192.168.1.1",
SrcPort: 8080,
Raw: map[string]any{"method": "GET"},
}
// Send event
eventChan <- event
// Give it time to process
time.Sleep(100 * time.Millisecond)
if err := orchestrator.Stop(); err != nil {
t.Fatalf("failed to stop: %v", err)
}
// Should have written at least one log (the orphan A)
written := sink.getWritten()
if len(written) == 0 {
t.Error("expected at least one log to be written")
}
}

340
internal/config/config.go Normal file
View File

@ -0,0 +1,340 @@
package config
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"time"
)
// Config holds the complete application configuration.
type Config struct {
Service ServiceConfig
Inputs InputsConfig
Outputs OutputsConfig
Correlation CorrelationConfig
}
// ServiceConfig holds service-level configuration.
type ServiceConfig struct {
Name string
Language string
}
// InputsConfig holds input sources configuration.
type InputsConfig struct {
UnixSockets []UnixSocketConfig
}
// UnixSocketConfig holds a Unix socket source configuration.
type UnixSocketConfig struct {
Name string
Path string
Format string
}
// OutputsConfig holds output sinks configuration.
type OutputsConfig struct {
File FileOutputConfig
ClickHouse ClickHouseOutputConfig
Stdout StdoutOutputConfig
}
// FileOutputConfig holds file sink configuration.
type FileOutputConfig struct {
Enabled bool
Path string
}
// ClickHouseOutputConfig holds ClickHouse sink configuration.
type ClickHouseOutputConfig struct {
Enabled bool
DSN string
Table string
BatchSize int
FlushIntervalMs int
MaxBufferSize int
DropOnOverflow bool
AsyncInsert bool
TimeoutMs int
}
// StdoutOutputConfig holds stdout sink configuration.
type StdoutOutputConfig struct {
Enabled bool
}
// CorrelationConfig holds correlation configuration.
type CorrelationConfig struct {
Key []string
TimeWindow TimeWindowConfig
OrphanPolicy OrphanPolicyConfig
}
// TimeWindowConfig holds time window configuration.
type TimeWindowConfig struct {
Value int
Unit string
}
// OrphanPolicyConfig holds orphan event policy configuration.
type OrphanPolicyConfig struct {
ApacheAlwaysEmit bool
NetworkEmit bool
}
// Load loads configuration from a text file with directives.
func Load(path string) (*Config, error) {
file, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open config file: %w", err)
}
defer file.Close()
cfg := &Config{
Service: ServiceConfig{
Name: "logcorrelator",
Language: "go",
},
Inputs: InputsConfig{
UnixSockets: make([]UnixSocketConfig, 0),
},
Outputs: OutputsConfig{
File: FileOutputConfig{
Enabled: true,
Path: "/var/log/logcorrelator/correlated.log",
},
ClickHouse: ClickHouseOutputConfig{
Enabled: false,
BatchSize: 500,
FlushIntervalMs: 200,
MaxBufferSize: 5000,
DropOnOverflow: true,
AsyncInsert: true,
TimeoutMs: 1000,
},
Stdout: StdoutOutputConfig{
Enabled: false,
},
},
Correlation: CorrelationConfig{
Key: []string{"src_ip", "src_port"},
TimeWindow: TimeWindowConfig{
Value: 1,
Unit: "s",
},
OrphanPolicy: OrphanPolicyConfig{
ApacheAlwaysEmit: true,
NetworkEmit: false,
},
},
}
scanner := bufio.NewScanner(file)
lineNum := 0
for scanner.Scan() {
lineNum++
line := strings.TrimSpace(scanner.Text())
// Skip empty lines and comments
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if err := parseDirective(cfg, line); err != nil {
return nil, fmt.Errorf("line %d: %w", lineNum, err)
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to read config file: %w", err)
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err)
}
return cfg, nil
}
func parseDirective(cfg *Config, line string) error {
parts := strings.Fields(line)
if len(parts) < 2 {
return fmt.Errorf("invalid directive: %s", line)
}
directive := parts[0]
value := strings.Join(parts[1:], " ")
switch directive {
case "service.name":
cfg.Service.Name = value
case "service.language":
cfg.Service.Language = value
case "input.unix_socket":
// Format: input.unix_socket <name> <path> [format]
if len(parts) < 3 {
return fmt.Errorf("input.unix_socket requires name and path")
}
format := "json"
if len(parts) >= 4 {
format = parts[3]
}
cfg.Inputs.UnixSockets = append(cfg.Inputs.UnixSockets, UnixSocketConfig{
Name: parts[1],
Path: parts[2],
Format: format,
})
case "output.file.enabled":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for output.file.enabled: %w", err)
}
cfg.Outputs.File.Enabled = enabled
case "output.file.path":
cfg.Outputs.File.Path = value
case "output.clickhouse.enabled":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.enabled: %w", err)
}
cfg.Outputs.ClickHouse.Enabled = enabled
case "output.clickhouse.dsn":
cfg.Outputs.ClickHouse.DSN = value
case "output.clickhouse.table":
cfg.Outputs.ClickHouse.Table = value
case "output.clickhouse.batch_size":
v, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.batch_size: %w", err)
}
cfg.Outputs.ClickHouse.BatchSize = v
case "output.clickhouse.flush_interval_ms":
v, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.flush_interval_ms: %w", err)
}
cfg.Outputs.ClickHouse.FlushIntervalMs = v
case "output.clickhouse.max_buffer_size":
v, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.max_buffer_size: %w", err)
}
cfg.Outputs.ClickHouse.MaxBufferSize = v
case "output.clickhouse.drop_on_overflow":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.drop_on_overflow: %w", err)
}
cfg.Outputs.ClickHouse.DropOnOverflow = enabled
case "output.clickhouse.async_insert":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.async_insert: %w", err)
}
cfg.Outputs.ClickHouse.AsyncInsert = enabled
case "output.clickhouse.timeout_ms":
v, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("invalid value for output.clickhouse.timeout_ms: %w", err)
}
cfg.Outputs.ClickHouse.TimeoutMs = v
case "output.stdout.enabled":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for output.stdout.enabled: %w", err)
}
cfg.Outputs.Stdout.Enabled = enabled
case "correlation.key":
cfg.Correlation.Key = strings.Split(value, ",")
for i, k := range cfg.Correlation.Key {
cfg.Correlation.Key[i] = strings.TrimSpace(k)
}
case "correlation.time_window.value":
v, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("invalid value for correlation.time_window.value: %w", err)
}
cfg.Correlation.TimeWindow.Value = v
case "correlation.time_window.unit":
cfg.Correlation.TimeWindow.Unit = value
case "correlation.orphan_policy.apache_always_emit":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for correlation.orphan_policy.apache_always_emit: %w", err)
}
cfg.Correlation.OrphanPolicy.ApacheAlwaysEmit = enabled
case "correlation.orphan_policy.network_emit":
enabled, err := parseBool(value)
if err != nil {
return fmt.Errorf("invalid value for correlation.orphan_policy.network_emit: %w", err)
}
cfg.Correlation.OrphanPolicy.NetworkEmit = enabled
default:
return fmt.Errorf("unknown directive: %s", directive)
}
return nil
}
func parseBool(s string) (bool, error) {
s = strings.ToLower(s)
switch s {
case "true", "yes", "1", "on":
return true, nil
case "false", "no", "0", "off":
return false, nil
default:
return false, fmt.Errorf("invalid boolean value: %s", s)
}
}
// Validate validates the configuration.
func (c *Config) Validate() error {
if len(c.Inputs.UnixSockets) < 2 {
return fmt.Errorf("at least two unix socket inputs are required")
}
if !c.Outputs.File.Enabled && !c.Outputs.ClickHouse.Enabled && !c.Outputs.Stdout.Enabled {
return fmt.Errorf("at least one output must be enabled")
}
if c.Outputs.ClickHouse.Enabled && c.Outputs.ClickHouse.DSN == "" {
return fmt.Errorf("clickhouse DSN is required when enabled")
}
return nil
}
// GetTimeWindow returns the time window as a duration.
func (c *CorrelationConfig) GetTimeWindow() time.Duration {
value := c.TimeWindow.Value
if value <= 0 {
value = 1
}
unit := c.TimeWindow.Unit
if unit == "" {
unit = "s"
}
switch unit {
case "ms", "millisecond", "milliseconds":
return time.Duration(value) * time.Millisecond
case "s", "second", "seconds":
return time.Duration(value) * time.Second
case "m", "minute", "minutes":
return time.Duration(value) * time.Minute
default:
return time.Duration(value) * time.Second
}
}

View File

@ -0,0 +1,224 @@
package config
import (
"os"
"path/filepath"
"testing"
"time"
)
func TestLoad_ValidConfig(t *testing.T) {
content := `
# Test configuration
service.name logcorrelator
service.language go
input.unix_socket apache_source /var/run/logcorrelator/apache.sock json
input.unix_socket network_source /var/run/logcorrelator/network.sock json
output.file.enabled true
output.file.path /var/log/logcorrelator/correlated.log
output.clickhouse.enabled false
output.clickhouse.dsn clickhouse://user:pass@localhost:9000/db
output.clickhouse.table correlated_logs
correlation.key src_ip,src_port
correlation.time_window.value 1
correlation.time_window.unit s
correlation.orphan_policy.apache_always_emit true
correlation.orphan_policy.network_emit false
`
tmpDir := t.TempDir()
configPath := filepath.Join(tmpDir, "config.conf")
if err := os.WriteFile(configPath, []byte(content), 0644); err != nil {
t.Fatalf("failed to write config: %v", err)
}
cfg, err := Load(configPath)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if cfg.Service.Name != "logcorrelator" {
t.Errorf("expected service name logcorrelator, got %s", cfg.Service.Name)
}
if len(cfg.Inputs.UnixSockets) != 2 {
t.Errorf("expected 2 unix sockets, got %d", len(cfg.Inputs.UnixSockets))
}
if !cfg.Outputs.File.Enabled {
t.Error("expected file output enabled")
}
}
func TestLoad_InvalidPath(t *testing.T) {
_, err := Load("/nonexistent/path/config.conf")
if err == nil {
t.Error("expected error for nonexistent path")
}
}
func TestLoad_InvalidDirective(t *testing.T) {
tmpDir := t.TempDir()
configPath := filepath.Join(tmpDir, "config.conf")
content := `invalid.directive value`
if err := os.WriteFile(configPath, []byte(content), 0644); err != nil {
t.Fatalf("failed to write config: %v", err)
}
_, err := Load(configPath)
if err == nil {
t.Error("expected error for invalid directive")
}
}
func TestLoad_Comments(t *testing.T) {
tmpDir := t.TempDir()
configPath := filepath.Join(tmpDir, "config.conf")
content := `
# This is a comment
service.name logcorrelator
# Another comment
input.unix_socket test /tmp/test.sock json
input.unix_socket test2 /tmp/test2.sock json
output.file.enabled true
`
if err := os.WriteFile(configPath, []byte(content), 0644); err != nil {
t.Fatalf("failed to write config: %v", err)
}
cfg, err := Load(configPath)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if cfg.Service.Name != "logcorrelator" {
t.Errorf("expected service name logcorrelator, got %s", cfg.Service.Name)
}
}
func TestValidate_MinimumInputs(t *testing.T) {
cfg := &Config{
Inputs: InputsConfig{
UnixSockets: []UnixSocketConfig{
{Name: "only_one", Path: "/tmp/test.sock"},
},
},
Outputs: OutputsConfig{
File: FileOutputConfig{Enabled: true},
},
}
err := cfg.Validate()
if err == nil {
t.Error("expected error for less than 2 inputs")
}
}
func TestValidate_AtLeastOneOutput(t *testing.T) {
cfg := &Config{
Inputs: InputsConfig{
UnixSockets: []UnixSocketConfig{
{Name: "a", Path: "/tmp/a.sock"},
{Name: "b", Path: "/tmp/b.sock"},
},
},
Outputs: OutputsConfig{
File: FileOutputConfig{Enabled: false},
ClickHouse: ClickHouseOutputConfig{Enabled: false},
Stdout: StdoutOutputConfig{Enabled: false},
},
}
err := cfg.Validate()
if err == nil {
t.Error("expected error for no outputs enabled")
}
}
func TestGetTimeWindow(t *testing.T) {
tests := []struct {
name string
config CorrelationConfig
expected time.Duration
}{
{
name: "seconds",
config: CorrelationConfig{
TimeWindow: TimeWindowConfig{Value: 1, Unit: "s"},
},
expected: time.Second,
},
{
name: "milliseconds",
config: CorrelationConfig{
TimeWindow: TimeWindowConfig{Value: 500, Unit: "ms"},
},
expected: 500 * time.Millisecond,
},
{
name: "minutes",
config: CorrelationConfig{
TimeWindow: TimeWindowConfig{Value: 2, Unit: "m"},
},
expected: 2 * time.Minute,
},
{
name: "default",
config: CorrelationConfig{
TimeWindow: TimeWindowConfig{},
},
expected: time.Second,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tt.config.GetTimeWindow()
if result != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, result)
}
})
}
}
func TestParseBool(t *testing.T) {
tests := []struct {
input string
expected bool
hasError bool
}{
{"true", true, false},
{"True", true, false},
{"TRUE", true, false},
{"yes", true, false},
{"1", true, false},
{"on", true, false},
{"false", false, false},
{"False", false, false},
{"no", false, false},
{"0", false, false},
{"off", false, false},
{"invalid", false, true},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
result, err := parseBool(tt.input)
if tt.hasError {
if err == nil {
t.Error("expected error, got nil")
}
} else {
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if result != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, result)
}
}
})
}
}

View File

@ -0,0 +1,90 @@
package domain
import "time"
// CorrelatedLog represents the output correlated log entry.
type CorrelatedLog struct {
Timestamp time.Time `json:"timestamp"`
SrcIP string `json:"src_ip"`
SrcPort int `json:"src_port"`
DstIP string `json:"dst_ip,omitempty"`
DstPort int `json:"dst_port,omitempty"`
Correlated bool `json:"correlated"`
OrphanSide string `json:"orphan_side,omitempty"`
Apache map[string]any `json:"apache,omitempty"`
Network map[string]any `json:"network,omitempty"`
Extra map[string]any `json:"extra,omitempty"`
}
// NewCorrelatedLogFromEvent creates a correlated log from a single event (orphan).
func NewCorrelatedLogFromEvent(event *NormalizedEvent, orphanSide string) CorrelatedLog {
return CorrelatedLog{
Timestamp: event.Timestamp,
SrcIP: event.SrcIP,
SrcPort: event.SrcPort,
DstIP: event.DstIP,
DstPort: event.DstPort,
Correlated: false,
OrphanSide: orphanSide,
Apache: extractApache(event),
Network: extractNetwork(event),
Extra: make(map[string]any),
}
}
// NewCorrelatedLog creates a correlated log from two matched events.
func NewCorrelatedLog(apacheEvent, networkEvent *NormalizedEvent) CorrelatedLog {
ts := apacheEvent.Timestamp
if networkEvent.Timestamp.After(ts) {
ts = networkEvent.Timestamp
}
return CorrelatedLog{
Timestamp: ts,
SrcIP: apacheEvent.SrcIP,
SrcPort: apacheEvent.SrcPort,
DstIP: coalesceString(apacheEvent.DstIP, networkEvent.DstIP),
DstPort: coalesceInt(apacheEvent.DstPort, networkEvent.DstPort),
Correlated: true,
OrphanSide: "",
Apache: extractApache(apacheEvent),
Network: extractNetwork(networkEvent),
Extra: make(map[string]any),
}
}
func extractApache(e *NormalizedEvent) map[string]any {
if e.Source != SourceA {
return nil
}
result := make(map[string]any)
for k, v := range e.Raw {
result[k] = v
}
return result
}
func extractNetwork(e *NormalizedEvent) map[string]any {
if e.Source != SourceB {
return nil
}
result := make(map[string]any)
for k, v := range e.Raw {
result[k] = v
}
return result
}
func coalesceString(a, b string) string {
if a != "" {
return a
}
return b
}
func coalesceInt(a, b int) int {
if a != 0 {
return a
}
return b
}

View File

@ -0,0 +1,115 @@
package domain
import (
"testing"
"time"
)
func TestNormalizedEvent_CorrelationKeyFull(t *testing.T) {
tests := []struct {
name string
event *NormalizedEvent
expected string
}{
{
name: "basic key",
event: &NormalizedEvent{
SrcIP: "192.168.1.1",
SrcPort: 8080,
},
expected: "192.168.1.1:8080",
},
{
name: "different port",
event: &NormalizedEvent{
SrcIP: "10.0.0.1",
SrcPort: 443,
},
expected: "10.0.0.1:443",
},
{
name: "port zero",
event: &NormalizedEvent{
SrcIP: "127.0.0.1",
SrcPort: 0,
},
expected: "127.0.0.1:0",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
key := tt.event.CorrelationKeyFull()
if key != tt.expected {
t.Errorf("expected %s, got %s", tt.expected, key)
}
})
}
}
func TestNewCorrelatedLogFromEvent(t *testing.T) {
event := &NormalizedEvent{
Source: SourceA,
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
SrcIP: "192.168.1.1",
SrcPort: 8080,
DstIP: "10.0.0.1",
DstPort: 80,
Raw: map[string]any{
"method": "GET",
"path": "/api/test",
},
}
log := NewCorrelatedLogFromEvent(event, "A")
if log.Correlated {
t.Error("expected correlated to be false")
}
if log.OrphanSide != "A" {
t.Errorf("expected orphan_side A, got %s", log.OrphanSide)
}
if log.SrcIP != "192.168.1.1" {
t.Errorf("expected src_ip 192.168.1.1, got %s", log.SrcIP)
}
if log.Apache == nil {
t.Error("expected apache to be non-nil")
}
}
func TestNewCorrelatedLog(t *testing.T) {
apacheEvent := &NormalizedEvent{
Source: SourceA,
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC),
SrcIP: "192.168.1.1",
SrcPort: 8080,
DstIP: "10.0.0.1",
DstPort: 80,
Raw: map[string]any{"method": "GET"},
}
networkEvent := &NormalizedEvent{
Source: SourceB,
Timestamp: time.Date(2024, 1, 1, 12, 0, 0, 500000000, time.UTC),
SrcIP: "192.168.1.1",
SrcPort: 8080,
DstIP: "10.0.0.1",
DstPort: 80,
Raw: map[string]any{"ja3": "abc123"},
}
log := NewCorrelatedLog(apacheEvent, networkEvent)
if !log.Correlated {
t.Error("expected correlated to be true")
}
if log.OrphanSide != "" {
t.Errorf("expected orphan_side to be empty, got %s", log.OrphanSide)
}
if log.Apache == nil {
t.Error("expected apache to be non-nil")
}
if log.Network == nil {
t.Error("expected network to be non-nil")
}
}

View File

@ -0,0 +1,243 @@
package domain
import (
"container/list"
"sync"
"time"
)
const (
// DefaultMaxBufferSize is the default maximum number of events per buffer
DefaultMaxBufferSize = 10000
)
// CorrelationConfig holds the correlation configuration.
type CorrelationConfig struct {
TimeWindow time.Duration
ApacheAlwaysEmit bool
NetworkEmit bool
MaxBufferSize int // Maximum events to buffer per source
}
// CorrelationService handles the correlation logic between source A and B events.
type CorrelationService struct {
config CorrelationConfig
mu sync.Mutex
bufferA *eventBuffer
bufferB *eventBuffer
pendingA map[string]*list.Element // key -> list element containing NormalizedEvent
pendingB map[string]*list.Element
timeProvider TimeProvider
}
type eventBuffer struct {
events *list.List
}
func newEventBuffer() *eventBuffer {
return &eventBuffer{
events: list.New(),
}
}
// TimeProvider abstracts time for testability.
type TimeProvider interface {
Now() time.Time
}
// RealTimeProvider uses real system time.
type RealTimeProvider struct{}
func (p *RealTimeProvider) Now() time.Time {
return time.Now()
}
// NewCorrelationService creates a new correlation service.
func NewCorrelationService(config CorrelationConfig, timeProvider TimeProvider) *CorrelationService {
if timeProvider == nil {
timeProvider = &RealTimeProvider{}
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
return &CorrelationService{
config: config,
bufferA: newEventBuffer(),
bufferB: newEventBuffer(),
pendingA: make(map[string]*list.Element),
pendingB: make(map[string]*list.Element),
timeProvider: timeProvider,
}
}
// ProcessEvent processes an incoming event and returns correlated logs if matches are found.
func (s *CorrelationService) ProcessEvent(event *NormalizedEvent) []CorrelatedLog {
s.mu.Lock()
defer s.mu.Unlock()
// Clean expired events first
s.cleanExpired()
// Check buffer overflow before adding
if s.isBufferFull(event.Source) {
// Buffer full, drop event or emit as orphan
if event.Source == SourceA && s.config.ApacheAlwaysEmit {
return []CorrelatedLog{NewCorrelatedLogFromEvent(event, "A")}
}
return nil
}
var results []CorrelatedLog
switch event.Source {
case SourceA:
results = s.processSourceA(event)
case SourceB:
results = s.processSourceB(event)
}
// Add the new event to the appropriate buffer
s.addEvent(event)
return results
}
func (s *CorrelationService) isBufferFull(source EventSource) bool {
switch source {
case SourceA:
return s.bufferA.events.Len() >= s.config.MaxBufferSize
case SourceB:
return s.bufferB.events.Len() >= s.config.MaxBufferSize
}
return false
}
func (s *CorrelationService) processSourceA(event *NormalizedEvent) []CorrelatedLog {
key := event.CorrelationKeyFull()
// Look for a matching B event
if elem, ok := s.pendingB[key]; ok {
bEvent := elem.Value.(*NormalizedEvent)
if s.eventsMatch(event, bEvent) {
// Found a match!
correlated := NewCorrelatedLog(event, bEvent)
s.bufferB.events.Remove(elem)
delete(s.pendingB, key)
return []CorrelatedLog{correlated}
}
}
// No match found
if s.config.ApacheAlwaysEmit {
orphan := NewCorrelatedLogFromEvent(event, "A")
return []CorrelatedLog{orphan}
}
// Keep in buffer for potential future match
return nil
}
func (s *CorrelationService) processSourceB(event *NormalizedEvent) []CorrelatedLog {
key := event.CorrelationKeyFull()
// Look for a matching A event
if elem, ok := s.pendingA[key]; ok {
aEvent := elem.Value.(*NormalizedEvent)
if s.eventsMatch(aEvent, event) {
// Found a match!
correlated := NewCorrelatedLog(aEvent, event)
s.bufferA.events.Remove(elem)
delete(s.pendingA, key)
return []CorrelatedLog{correlated}
}
}
// No match found - B is never emitted alone per spec
if s.config.NetworkEmit {
orphan := NewCorrelatedLogFromEvent(event, "B")
return []CorrelatedLog{orphan}
}
// Keep in buffer for potential future match (but won't be emitted alone)
return nil
}
func (s *CorrelationService) eventsMatch(a, b *NormalizedEvent) bool {
diff := a.Timestamp.Sub(b.Timestamp)
if diff < 0 {
diff = -diff
}
return diff <= s.config.TimeWindow
}
func (s *CorrelationService) addEvent(event *NormalizedEvent) {
key := event.CorrelationKeyFull()
switch event.Source {
case SourceA:
elem := s.bufferA.events.PushBack(event)
s.pendingA[key] = elem
case SourceB:
elem := s.bufferB.events.PushBack(event)
s.pendingB[key] = elem
}
}
func (s *CorrelationService) cleanExpired() {
now := s.timeProvider.Now()
cutoff := now.Add(-s.config.TimeWindow)
// Clean expired events from both buffers using shared logic
s.cleanBuffer(s.bufferA, s.pendingA, cutoff)
s.cleanBuffer(s.bufferB, s.pendingB, cutoff)
}
// cleanBuffer removes expired events from a buffer (shared logic for A and B).
func (s *CorrelationService) cleanBuffer(buffer *eventBuffer, pending map[string]*list.Element, cutoff time.Time) {
for elem := buffer.events.Front(); elem != nil; {
event := elem.Value.(*NormalizedEvent)
if event.Timestamp.Before(cutoff) {
next := elem.Next()
key := event.CorrelationKeyFull()
buffer.events.Remove(elem)
if pending[key] == elem {
delete(pending, key)
}
elem = next
} else {
break // Events are ordered, so we can stop early
}
}
}
// Flush forces emission of remaining buffered events (for shutdown).
func (s *CorrelationService) Flush() []CorrelatedLog {
s.mu.Lock()
defer s.mu.Unlock()
var results []CorrelatedLog
// Emit remaining A events as orphans if configured
if s.config.ApacheAlwaysEmit {
for elem := s.bufferA.events.Front(); elem != nil; elem = elem.Next() {
event := elem.Value.(*NormalizedEvent)
orphan := NewCorrelatedLogFromEvent(event, "A")
results = append(results, orphan)
}
}
// Clear buffers
s.bufferA.events.Init()
s.bufferB.events.Init()
s.pendingA = make(map[string]*list.Element)
s.pendingB = make(map[string]*list.Element)
return results
}
// GetBufferSizes returns the current buffer sizes (for monitoring).
func (s *CorrelationService) GetBufferSizes() (int, int) {
s.mu.Lock()
defer s.mu.Unlock()
return s.bufferA.events.Len(), s.bufferB.events.Len()
}

View File

@ -0,0 +1,153 @@
package domain
import (
"testing"
"time"
)
type mockTimeProvider struct {
now time.Time
}
func (m *mockTimeProvider) Now() time.Time {
return m.now
}
func TestCorrelationService_Match(t *testing.T) {
now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
timeProvider := &mockTimeProvider{now: now}
config := CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: false, // Don't emit A immediately to test matching
NetworkEmit: false,
}
svc := NewCorrelationService(config, timeProvider)
// Send Apache event (should be buffered, not emitted)
apacheEvent := &NormalizedEvent{
Source: SourceA,
Timestamp: now,
SrcIP: "192.168.1.1",
SrcPort: 8080,
Raw: map[string]any{"method": "GET"},
}
results := svc.ProcessEvent(apacheEvent)
if len(results) != 0 {
t.Fatalf("expected 0 results (buffered), got %d", len(results))
}
// Send matching Network event within window
networkEvent := &NormalizedEvent{
Source: SourceB,
Timestamp: now.Add(500 * time.Millisecond),
SrcIP: "192.168.1.1",
SrcPort: 8080,
Raw: map[string]any{"ja3": "abc"},
}
// This should match and return correlated log
results = svc.ProcessEvent(networkEvent)
if len(results) != 1 {
t.Errorf("expected 1 result (correlated), got %d", len(results))
} else if !results[0].Correlated {
t.Error("expected correlated result")
}
}
func TestCorrelationService_NoMatch_DifferentIP(t *testing.T) {
now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
timeProvider := &mockTimeProvider{now: now}
config := CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
svc := NewCorrelationService(config, timeProvider)
apacheEvent := &NormalizedEvent{
Source: SourceA,
Timestamp: now,
SrcIP: "192.168.1.1",
SrcPort: 8080,
}
networkEvent := &NormalizedEvent{
Source: SourceB,
Timestamp: now,
SrcIP: "192.168.1.2", // Different IP
SrcPort: 8080,
}
svc.ProcessEvent(apacheEvent)
results := svc.ProcessEvent(networkEvent)
if len(results) != 0 {
t.Errorf("expected 0 results (different IP), got %d", len(results))
}
}
func TestCorrelationService_NoMatch_TimeWindowExceeded(t *testing.T) {
now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
timeProvider := &mockTimeProvider{now: now}
config := CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
svc := NewCorrelationService(config, timeProvider)
apacheEvent := &NormalizedEvent{
Source: SourceA,
Timestamp: now,
SrcIP: "192.168.1.1",
SrcPort: 8080,
}
networkEvent := &NormalizedEvent{
Source: SourceB,
Timestamp: now.Add(2 * time.Second), // Outside window
SrcIP: "192.168.1.1",
SrcPort: 8080,
}
svc.ProcessEvent(apacheEvent)
results := svc.ProcessEvent(networkEvent)
if len(results) != 0 {
t.Errorf("expected 0 results (time window exceeded), got %d", len(results))
}
}
func TestCorrelationService_Flush(t *testing.T) {
now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
timeProvider := &mockTimeProvider{now: now}
config := CorrelationConfig{
TimeWindow: time.Second,
ApacheAlwaysEmit: true,
NetworkEmit: false,
}
svc := NewCorrelationService(config, timeProvider)
apacheEvent := &NormalizedEvent{
Source: SourceA,
Timestamp: now,
SrcIP: "192.168.1.1",
SrcPort: 8080,
}
svc.ProcessEvent(apacheEvent)
flushed := svc.Flush()
if len(flushed) != 1 {
t.Errorf("expected 1 flushed event, got %d", len(flushed))
}
}

37
internal/domain/event.go Normal file
View File

@ -0,0 +1,37 @@
package domain
import (
"strconv"
"time"
)
// EventSource identifies the source of an event.
type EventSource string
const (
SourceA EventSource = "A" // Apache/HTTP source
SourceB EventSource = "B" // Network source
)
// NormalizedEvent represents a unified internal event from either source.
type NormalizedEvent struct {
Source EventSource
Timestamp time.Time
SrcIP string
SrcPort int
DstIP string
DstPort int
Headers map[string]string
Extra map[string]any
Raw map[string]any // Original raw data
}
// CorrelationKey returns the key used for correlation (src_ip + src_port).
func (e *NormalizedEvent) CorrelationKey() string {
return e.SrcIP + ":" + strconv.Itoa(e.SrcPort)
}
// CorrelationKeyFull returns a proper correlation key (alias for clarity).
func (e *NormalizedEvent) CorrelationKeyFull() string {
return e.CorrelationKey()
}

View File

@ -0,0 +1,85 @@
package observability
import (
"log"
"os"
"sync"
)
// Logger provides structured logging.
type Logger struct {
mu sync.Mutex
logger *log.Logger
prefix string
fields map[string]any
}
// NewLogger creates a new logger.
func NewLogger(prefix string) *Logger {
return &Logger{
logger: log.New(os.Stderr, "", log.LstdFlags|log.Lmicroseconds),
prefix: prefix,
fields: make(map[string]any),
}
}
// WithFields returns a new logger with additional fields.
func (l *Logger) WithFields(fields map[string]any) *Logger {
newLogger := &Logger{
logger: l.logger,
prefix: l.prefix,
fields: make(map[string]any),
}
for k, v := range l.fields {
newLogger.fields[k] = v
}
for k, v := range fields {
newLogger.fields[k] = v
}
return newLogger
}
// Info logs an info message.
func (l *Logger) Info(msg string) {
l.mu.Lock()
defer l.mu.Unlock()
l.log("INFO", msg)
}
// Error logs an error message.
func (l *Logger) Error(msg string, err error) {
l.mu.Lock()
defer l.mu.Unlock()
if err != nil {
l.log("ERROR", msg+" "+err.Error())
} else {
l.log("ERROR", msg)
}
}
// Debug logs a debug message.
func (l *Logger) Debug(msg string) {
l.mu.Lock()
defer l.mu.Unlock()
l.log("DEBUG", msg)
}
func (l *Logger) log(level, msg string) {
prefix := l.prefix
if prefix != "" {
prefix = "[" + prefix + "] "
}
l.logger.SetPrefix(prefix + level + " ")
var args []any
for k, v := range l.fields {
args = append(args, k, v)
}
if len(args) > 0 {
l.logger.Printf(msg+" %+v", args...)
} else {
l.logger.Print(msg)
}
}

View File

@ -0,0 +1,111 @@
package observability
import (
"bytes"
"io"
"os"
"strings"
"testing"
)
func TestNewLogger(t *testing.T) {
logger := NewLogger("test")
if logger == nil {
t.Fatal("expected non-nil logger")
}
if logger.prefix != "test" {
t.Errorf("expected prefix 'test', got %s", logger.prefix)
}
}
func TestLogger_Info(t *testing.T) {
// Capture stderr
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
logger := NewLogger("test")
logger.Info("test message")
w.Close()
os.Stderr = oldStderr
var buf bytes.Buffer
io.Copy(&buf, r)
output := buf.String()
if !strings.Contains(output, "INFO") {
t.Error("expected INFO in output")
}
if !strings.Contains(output, "test message") {
t.Error("expected 'test message' in output")
}
}
func TestLogger_Error(t *testing.T) {
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
logger := NewLogger("test")
logger.Error("error message", nil)
w.Close()
os.Stderr = oldStderr
var buf bytes.Buffer
io.Copy(&buf, r)
output := buf.String()
if !strings.Contains(output, "ERROR") {
t.Error("expected ERROR in output")
}
if !strings.Contains(output, "error message") {
t.Error("expected 'error message' in output")
}
}
func TestLogger_Debug(t *testing.T) {
oldStderr := os.Stderr
r, w, _ := os.Pipe()
os.Stderr = w
logger := NewLogger("test")
logger.Debug("debug message")
w.Close()
os.Stderr = oldStderr
var buf bytes.Buffer
io.Copy(&buf, r)
output := buf.String()
if !strings.Contains(output, "DEBUG") {
t.Error("expected DEBUG in output")
}
if !strings.Contains(output, "debug message") {
t.Error("expected 'debug message' in output")
}
}
func TestLogger_WithFields(t *testing.T) {
logger := NewLogger("test")
fieldsLogger := logger.WithFields(map[string]any{
"key1": "value1",
"key2": 42,
})
if fieldsLogger == logger {
t.Error("expected different logger instance")
}
if len(fieldsLogger.fields) != 2 {
t.Errorf("expected 2 fields, got %d", len(fieldsLogger.fields))
}
}
func TestLogger_Name(t *testing.T) {
logger := NewLogger("myservice")
if logger.prefix != "myservice" {
t.Errorf("expected prefix 'myservice', got %s", logger.prefix)
}
}

54
internal/ports/source.go Normal file
View File

@ -0,0 +1,54 @@
package ports
import (
"context"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
// EventSource defines the interface for log sources.
type EventSource interface {
// Start begins reading events and sending them to the channel.
// Returns an error if the source cannot be started.
Start(ctx context.Context, eventChan chan<- *domain.NormalizedEvent) error
// Stop gracefully stops the source.
Stop() error
// Name returns the source name.
Name() string
}
// CorrelatedLogSink defines the interface for correlated log destinations.
type CorrelatedLogSink interface {
// Write sends a correlated log to the sink.
Write(ctx context.Context, log domain.CorrelatedLog) error
// Flush flushes any buffered logs.
Flush(ctx context.Context) error
// Close closes the sink.
Close() error
// Name returns the sink name.
Name() string
}
// TimeProvider abstracts time for testability.
type TimeProvider interface {
Now() time.Time
}
// CorrelationProcessor defines the interface for the correlation service.
// This allows for easier testing and alternative implementations.
type CorrelationProcessor interface {
// ProcessEvent processes an incoming event and returns correlated logs.
ProcessEvent(event *domain.NormalizedEvent) []domain.CorrelatedLog
// Flush forces emission of remaining buffered events.
Flush() []domain.CorrelatedLog
// GetBufferSizes returns the current buffer sizes for monitoring.
GetBufferSizes() (int, int)
}