fix: durcir la validation et fiabiliser flush/arrêt idempotents

Co-authored-by: aider (openrouter/openai/gpt-5.3-codex) <aider@aider.chat>
This commit is contained in:
Jacquin Antoine
2026-02-28 20:10:28 +01:00
parent 81849b16d8
commit 7e9535122e
5 changed files with 239 additions and 123 deletions

View File

@ -5,6 +5,7 @@ import (
"database/sql"
"encoding/json"
"fmt"
"strings"
"sync"
"time"
@ -49,10 +50,18 @@ type ClickHouseSink struct {
flushChan chan struct{}
done chan struct{}
wg sync.WaitGroup
closeOnce sync.Once
}
// NewClickHouseSink creates a new ClickHouse sink.
func NewClickHouseSink(config Config) (*ClickHouseSink, error) {
if strings.TrimSpace(config.DSN) == "" {
return nil, fmt.Errorf("clickhouse DSN is required")
}
if strings.TrimSpace(config.Table) == "" {
return nil, fmt.Errorf("clickhouse table is required")
}
// Apply defaults
if config.BatchSize <= 0 {
config.BatchSize = DefaultBatchSize
@ -85,7 +94,7 @@ func NewClickHouseSink(config Config) (*ClickHouseSink, error) {
defer pingCancel()
if err := db.PingContext(pingCtx); err != nil {
db.Close()
_ = db.Close()
return nil, fmt.Errorf("failed to ping ClickHouse: %w", err)
}
@ -143,13 +152,28 @@ func (s *ClickHouseSink) Flush(ctx context.Context) error {
// Close closes the sink.
func (s *ClickHouseSink) Close() error {
close(s.done)
s.wg.Wait()
var closeErr error
if s.db != nil {
return s.db.Close()
}
return nil
s.closeOnce.Do(func() {
if s.done != nil {
close(s.done)
}
s.wg.Wait()
flushCtx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
defer cancel()
if err := s.doFlush(flushCtx); err != nil {
closeErr = err
}
if s.db != nil {
if err := s.db.Close(); err != nil && closeErr == nil {
closeErr = err
}
}
})
return closeErr
}
func (s *ClickHouseSink) flushLoop() {
@ -161,25 +185,30 @@ func (s *ClickHouseSink) flushLoop() {
for {
select {
case <-s.done:
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
_ = s.doFlush(ctx)
cancel()
return
case <-ticker.C:
s.mu.Lock()
needsFlush := len(s.buffer) > 0
s.mu.Unlock()
if needsFlush {
// Use timeout context for flush
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
s.doFlush(ctx)
_ = s.doFlush(ctx)
cancel()
}
case <-s.flushChan:
s.mu.Lock()
needsFlush := len(s.buffer) >= s.config.BatchSize
s.mu.Unlock()
if needsFlush {
// Use timeout context for flush
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.config.TimeoutMs)*time.Millisecond)
s.doFlush(ctx)
_ = s.doFlush(ctx)
cancel()
}
}
@ -199,7 +228,10 @@ func (s *ClickHouseSink) doFlush(ctx context.Context) error {
s.buffer = make([]domain.CorrelatedLog, 0, s.config.BatchSize)
s.mu.Unlock()
// Prepare batch insert with retry
if s.db == nil {
return fmt.Errorf("clickhouse connection is not initialized")
}
query := fmt.Sprintf(`
INSERT INTO %s (timestamp, src_ip, src_port, dst_ip, dst_port, correlated, orphan_side, apache, network)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
@ -209,7 +241,6 @@ func (s *ClickHouseSink) doFlush(ctx context.Context) error {
var lastErr error
for attempt := 0; attempt < MaxRetries; attempt++ {
if attempt > 0 {
// Exponential backoff
delay := RetryBaseDelay * time.Duration(1<<uint(attempt-1))
select {
case <-time.After(delay):
@ -220,10 +251,9 @@ func (s *ClickHouseSink) doFlush(ctx context.Context) error {
lastErr = s.executeBatch(ctx, query, buffer)
if lastErr == nil {
return nil // Success
return nil
}
// Check if error is retryable
if !isRetryableError(lastErr) {
return fmt.Errorf("non-retryable error: %w", lastErr)
}
@ -249,11 +279,6 @@ func (s *ClickHouseSink) executeBatch(ctx context.Context, query string, buffer
apacheJSON, _ := json.Marshal(log.Apache)
networkJSON, _ := json.Marshal(log.Network)
orphanSide := log.OrphanSide
if !log.Correlated {
orphanSide = log.OrphanSide
}
correlated := 0
if log.Correlated {
correlated = 1
@ -266,7 +291,7 @@ func (s *ClickHouseSink) executeBatch(ctx context.Context, query string, buffer
log.DstIP,
log.DstPort,
correlated,
orphanSide,
log.OrphanSide,
string(apacheJSON),
string(networkJSON),
)
@ -287,8 +312,7 @@ func isRetryableError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
// Common retryable errors
errStr := strings.ToLower(err.Error())
retryableErrors := []string{
"connection refused",
"connection reset",
@ -298,36 +322,9 @@ func isRetryableError(err error) bool {
"broken pipe",
}
for _, re := range retryableErrors {
if containsIgnoreCase(errStr, re) {
if strings.Contains(errStr, re) {
return true
}
}
return false
}
func containsIgnoreCase(s, substr string) bool {
return len(s) >= len(substr) && containsLower(s, substr)
}
func containsLower(s, substr string) bool {
s = toLower(s)
substr = toLower(substr)
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
func toLower(s string) string {
var result []byte
for i := 0; i < len(s); i++ {
c := s[i]
if c >= 'A' && c <= 'Z' {
c = c + ('a' - 'A')
}
result = append(result, c)
}
return string(result)
}