Initial commit: logcorrelator with unified packaging (DEB + RPM using fpm)

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Jacquin Antoine
2026-02-27 15:31:46 +01:00
commit 8fc14c1e94
35 changed files with 4829 additions and 0 deletions

View File

@ -0,0 +1,333 @@
package clickhouse
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"sync"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
const (
// DefaultBatchSize is the default number of records per batch
DefaultBatchSize = 500
// DefaultFlushIntervalMs is the default flush interval in milliseconds
DefaultFlushIntervalMs = 200
// DefaultMaxBufferSize is the default maximum buffer size
DefaultMaxBufferSize = 5000
// DefaultTimeoutMs is the default timeout for operations in milliseconds
DefaultTimeoutMs = 1000
// DefaultPingTimeoutMs is the timeout for initial connection ping
DefaultPingTimeoutMs = 5000
// MaxRetries is the maximum number of retry attempts for failed inserts
MaxRetries = 3
// RetryBaseDelay is the base delay between retries
RetryBaseDelay = 100 * time.Millisecond
)
// Config holds the ClickHouse sink configuration.
type Config struct {
DSN string
Table string
BatchSize int
FlushIntervalMs int
MaxBufferSize int
DropOnOverflow bool
AsyncInsert bool
TimeoutMs int
}
// ClickHouseSink writes correlated logs to ClickHouse.
type ClickHouseSink struct {
config Config
db *sql.DB
mu sync.Mutex
buffer []domain.CorrelatedLog
flushChan chan struct{}
done chan struct{}
wg sync.WaitGroup
}
// NewClickHouseSink creates a new ClickHouse sink.
func NewClickHouseSink(config Config) (*ClickHouseSink, error) {
// Apply defaults
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
}
if config.FlushIntervalMs <= 0 {
config.FlushIntervalMs = DefaultFlushIntervalMs
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
if config.TimeoutMs <= 0 {
config.TimeoutMs = DefaultTimeoutMs
}
s := &ClickHouseSink{
config: config,
buffer: make([]domain.CorrelatedLog, 0, config.BatchSize),
flushChan: make(chan struct{}, 1),
done: make(chan struct{}),
}
// Connect to ClickHouse
db, err := sql.Open("clickhouse", config.DSN)
if err != nil {
return nil, fmt.Errorf("failed to connect to ClickHouse: %w", err)
}
// Ping with timeout
pingCtx, pingCancel := context.WithTimeout(context.Background(), time.Duration(DefaultPingTimeoutMs)*time.Millisecond)
defer pingCancel()
if err := db.PingContext(pingCtx); err != nil {
db.Close()
return nil, fmt.Errorf("failed to ping ClickHouse: %w", err)
}
s.db = db
// Start flush goroutine
s.wg.Add(1)
go s.flushLoop()
return s, nil
}
// Name returns the sink name.
func (s *ClickHouseSink) Name() string {
return "clickhouse"
}
// Write adds a log to the buffer.
func (s *ClickHouseSink) Write(ctx context.Context, log domain.CorrelatedLog) error {
s.mu.Lock()
defer s.mu.Unlock()
// Check buffer overflow
if len(s.buffer) >= s.config.MaxBufferSize {
if s.config.DropOnOverflow {
// Drop the log
return nil
}
// Block until space is available (with timeout)
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Duration(s.config.TimeoutMs) * time.Millisecond):
return fmt.Errorf("buffer full, timeout exceeded")
}
}
s.buffer = append(s.buffer, log)
// Trigger flush if batch is full
if len(s.buffer) >= s.config.BatchSize {
select {
case s.flushChan <- struct{}{}:
default:
}
}
return nil
}
// Flush flushes the buffer to ClickHouse.
func (s *ClickHouseSink) Flush(ctx context.Context) error {
return s.doFlush(ctx)
}
// Close closes the sink.
func (s *ClickHouseSink) Close() error {
close(s.done)
s.wg.Wait()
if s.db != nil {
return s.db.Close()
}
return nil
}
func (s *ClickHouseSink) flushLoop() {
defer s.wg.Done()
ticker := time.NewTicker(time.Duration(s.config.FlushIntervalMs) * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-s.done:
return
case <-ticker.C:
s.mu.Lock()
needsFlush := len(s.buffer) > 0
s.mu.Unlock()
if needsFlush {
// Use timeout context for flush
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
s.doFlush(ctx)
cancel()
}
case <-s.flushChan:
s.mu.Lock()
needsFlush := len(s.buffer) >= s.config.BatchSize
s.mu.Unlock()
if needsFlush {
// Use timeout context for flush
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
s.doFlush(ctx)
cancel()
}
}
}
}
func (s *ClickHouseSink) doFlush(ctx context.Context) error {
s.mu.Lock()
if len(s.buffer) == 0 {
s.mu.Unlock()
return nil
}
// Copy buffer to flush
buffer := make([]domain.CorrelatedLog, len(s.buffer))
copy(buffer, s.buffer)
s.buffer = make([]domain.CorrelatedLog, 0, s.config.BatchSize)
s.mu.Unlock()
// Prepare batch insert with retry
query := fmt.Sprintf(`
INSERT INTO %s (timestamp, src_ip, src_port, dst_ip, dst_port, correlated, orphan_side, apache, network)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`, s.config.Table)
// Retry logic with exponential backoff
var lastErr error
for attempt := 0; attempt < MaxRetries; attempt++ {
if attempt > 0 {
// Exponential backoff
delay := RetryBaseDelay * time.Duration(1<<uint(attempt-1))
select {
case <-time.After(delay):
case <-ctx.Done():
return ctx.Err()
}
}
lastErr = s.executeBatch(ctx, query, buffer)
if lastErr == nil {
return nil // Success
}
// Check if error is retryable
if !isRetryableError(lastErr) {
return fmt.Errorf("non-retryable error: %w", lastErr)
}
}
return fmt.Errorf("failed after %d retries: %w", MaxRetries, lastErr)
}
func (s *ClickHouseSink) executeBatch(ctx context.Context, query string, buffer []domain.CorrelatedLog) error {
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()
stmt, err := tx.PrepareContext(ctx, query)
if err != nil {
return fmt.Errorf("failed to prepare statement: %w", err)
}
defer stmt.Close()
for _, log := range buffer {
apacheJSON, _ := json.Marshal(log.Apache)
networkJSON, _ := json.Marshal(log.Network)
orphanSide := log.OrphanSide
if !log.Correlated {
orphanSide = log.OrphanSide
}
correlated := 0
if log.Correlated {
correlated = 1
}
_, err := stmt.ExecContext(ctx,
log.Timestamp,
log.SrcIP,
log.SrcPort,
log.DstIP,
log.DstPort,
correlated,
orphanSide,
string(apacheJSON),
string(networkJSON),
)
if err != nil {
return fmt.Errorf("failed to execute insert: %w", err)
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("failed to commit transaction: %w", err)
}
return nil
}
// isRetryableError checks if an error is retryable.
func isRetryableError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
// Common retryable errors
retryableErrors := []string{
"connection refused",
"connection reset",
"timeout",
"temporary failure",
"network is unreachable",
"broken pipe",
}
for _, re := range retryableErrors {
if containsIgnoreCase(errStr, re) {
return true
}
}
return false
}
func containsIgnoreCase(s, substr string) bool {
return len(s) >= len(substr) && containsLower(s, substr)
}
func containsLower(s, substr string) bool {
s = toLower(s)
substr = toLower(substr)
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
func toLower(s string) string {
var result []byte
for i := 0; i < len(s); i++ {
c := s[i]
if c >= 'A' && c <= 'Z' {
c = c + ('a' - 'A')
}
result = append(result, c)
}
return string(result)
}

View File

@ -0,0 +1,305 @@
package clickhouse
import (
"context"
"testing"
"time"
"github.com/logcorrelator/logcorrelator/internal/domain"
)
func TestClickHouseSink_Name(t *testing.T) {
sink := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
}
if sink.Name() != "clickhouse" {
t.Errorf("expected name 'clickhouse', got %s", sink.Name())
}
}
func TestClickHouseSink_ConfigDefaults(t *testing.T) {
// Test that defaults are applied correctly
config := Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
// Other fields are zero, should get defaults
}
// Verify defaults would be applied (we can't actually connect in tests)
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
}
if config.FlushIntervalMs <= 0 {
config.FlushIntervalMs = DefaultFlushIntervalMs
}
if config.MaxBufferSize <= 0 {
config.MaxBufferSize = DefaultMaxBufferSize
}
if config.TimeoutMs <= 0 {
config.TimeoutMs = DefaultTimeoutMs
}
if config.BatchSize != DefaultBatchSize {
t.Errorf("expected BatchSize %d, got %d", DefaultBatchSize, config.BatchSize)
}
if config.FlushIntervalMs != DefaultFlushIntervalMs {
t.Errorf("expected FlushIntervalMs %d, got %d", DefaultFlushIntervalMs, config.FlushIntervalMs)
}
if config.MaxBufferSize != DefaultMaxBufferSize {
t.Errorf("expected MaxBufferSize %d, got %d", DefaultMaxBufferSize, config.MaxBufferSize)
}
if config.TimeoutMs != DefaultTimeoutMs {
t.Errorf("expected TimeoutMs %d, got %d", DefaultTimeoutMs, config.TimeoutMs)
}
}
func TestClickHouseSink_Write_BufferOverflow(t *testing.T) {
// This test verifies the buffer overflow logic without actually connecting
config := Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
BatchSize: 10,
MaxBufferSize: 10,
DropOnOverflow: true,
TimeoutMs: 100,
FlushIntervalMs: 1000,
}
// We can't test actual writes without a ClickHouse instance,
// but we can verify the config is valid
if config.BatchSize > config.MaxBufferSize {
t.Error("BatchSize should not exceed MaxBufferSize")
}
}
func TestClickHouseSink_IsRetryableError(t *testing.T) {
tests := []struct {
name string
err error
expected bool
}{
{"nil error", nil, false},
{"connection refused", &mockError{"connection refused"}, true},
{"connection reset", &mockError{"connection reset by peer"}, true},
{"timeout", &mockError{"timeout waiting for response"}, true},
{"network unreachable", &mockError{"network is unreachable"}, true},
{"broken pipe", &mockError{"broken pipe"}, true},
{"syntax error", &mockError{"syntax error in SQL"}, false},
{"table not found", &mockError{"table test not found"}, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isRetryableError(tt.err)
if result != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, result)
}
})
}
}
func TestClickHouseSink_FlushEmpty(t *testing.T) {
// Test that flushing an empty buffer doesn't cause issues
// (We can't test actual ClickHouse operations without a real instance)
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
}
// Should not panic or error on empty flush
ctx := context.Background()
err := s.Flush(ctx)
if err != nil {
t.Errorf("expected no error on empty flush, got %v", err)
}
}
func TestClickHouseSink_CloseWithoutConnect(t *testing.T) {
// Test that closing without connecting doesn't panic
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
done: make(chan struct{}),
}
err := s.Close()
if err != nil {
t.Errorf("expected no error on close without connect, got %v", err)
}
}
func TestClickHouseSink_Constants(t *testing.T) {
// Verify constants have reasonable values
if DefaultBatchSize <= 0 {
t.Error("DefaultBatchSize should be positive")
}
if DefaultFlushIntervalMs <= 0 {
t.Error("DefaultFlushIntervalMs should be positive")
}
if DefaultMaxBufferSize <= 0 {
t.Error("DefaultMaxBufferSize should be positive")
}
if DefaultTimeoutMs <= 0 {
t.Error("DefaultTimeoutMs should be positive")
}
if DefaultPingTimeoutMs <= 0 {
t.Error("DefaultPingTimeoutMs should be positive")
}
if MaxRetries <= 0 {
t.Error("MaxRetries should be positive")
}
if RetryBaseDelay <= 0 {
t.Error("RetryBaseDelay should be positive")
}
}
// mockError implements error for testing
type mockError struct {
msg string
}
func (e *mockError) Error() string {
return e.msg
}
// Test the doFlush function with empty buffer (no actual DB connection)
func TestClickHouseSink_DoFlushEmpty(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
},
buffer: make([]domain.CorrelatedLog, 0),
}
ctx := context.Background()
err := s.doFlush(ctx)
if err != nil {
t.Errorf("expected no error when flushing empty buffer, got %v", err)
}
}
// Test that buffer is properly managed (without actual DB operations)
func TestClickHouseSink_BufferManagement(t *testing.T) {
log := domain.CorrelatedLog{
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 100, // Allow more than 1 element
DropOnOverflow: false,
TimeoutMs: 1000,
},
buffer: []domain.CorrelatedLog{log},
}
// Verify buffer has data
if len(s.buffer) != 1 {
t.Fatalf("expected buffer length 1, got %d", len(s.buffer))
}
// Test that Write properly adds to buffer
ctx := context.Background()
err := s.Write(ctx, log)
if err != nil {
t.Errorf("unexpected error on Write: %v", err)
}
if len(s.buffer) != 2 {
t.Errorf("expected buffer length 2 after Write, got %d", len(s.buffer))
}
}
// Test Write with context cancellation
func TestClickHouseSink_Write_ContextCancel(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: false,
TimeoutMs: 10,
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
// Fill the buffer
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
s.buffer = append(s.buffer, log)
// Try to write with cancelled context
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
err := s.Write(ctx, log)
if err == nil {
t.Error("expected error when writing with cancelled context")
}
}
// Test DropOnOverflow behavior
func TestClickHouseSink_Write_DropOnOverflow(t *testing.T) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 1,
DropOnOverflow: true,
TimeoutMs: 10,
},
buffer: make([]domain.CorrelatedLog, 0, 1),
}
// Fill the buffer
log := domain.CorrelatedLog{SrcIP: "192.168.1.1", SrcPort: 8080}
s.buffer = append(s.buffer, log)
// Try to write when buffer is full - should drop silently
ctx := context.Background()
err := s.Write(ctx, log)
if err != nil {
t.Errorf("expected no error when DropOnOverflow is true, got %v", err)
}
}
// Benchmark Write operation (without actual DB)
func BenchmarkClickHouseSink_Write(b *testing.B) {
s := &ClickHouseSink{
config: Config{
DSN: "clickhouse://test:test@localhost:9000/test",
Table: "test_table",
MaxBufferSize: 10000,
DropOnOverflow: true,
},
buffer: make([]domain.CorrelatedLog, 0, 10000),
}
log := domain.CorrelatedLog{
Timestamp: time.Now(),
SrcIP: "192.168.1.1",
SrcPort: 8080,
Correlated: true,
}
ctx := context.Background()
b.ResetTimer()
for i := 0; i < b.N; i++ {
s.Write(ctx, log)
}
}