feat: observability, IP filtering, stdout/clickhouse fixes (v1.1.11)

- feat(observability): metrics server with /metrics and /health endpoints
- feat(observability): correlation metrics (events, success/failed, reasons, buffers)
- feat(correlation): IP exclusion filter (exact IPs and CIDR ranges)
- feat(correlation): pending orphan delay for late-arriving B events
- fix(stdout): sink is now a no-op for data; JSON must never appear on stdout
- fix(clickhouse): all flush errors were silently discarded, now properly logged
- fix(clickhouse): buffer overflow with DropOnOverflow now logged at WARN
- fix(clickhouse): retry attempts logged at WARN with attempt/delay/error context
- feat(clickhouse): connection success logged at INFO, batch sends at DEBUG
- feat(clickhouse): SetLogger() for external logger injection
- test(stdout): assert stdout remains empty for correlated and orphan logs
- chore(rpm): bump version to 1.1.11, update changelog
- docs: README and architecture.yml updated

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-03-05 11:40:54 +01:00
parent 68f0fcf810
commit e9dcd8ea51
16 changed files with 2035 additions and 116 deletions

View File

@ -2,6 +2,7 @@ package config
import (
"fmt"
"net"
"os"
"strconv"
"strings"
@ -17,6 +18,13 @@ type Config struct {
Inputs InputsConfig `yaml:"inputs"`
Outputs OutputsConfig `yaml:"outputs"`
Correlation CorrelationConfig `yaml:"correlation"`
Metrics MetricsConfig `yaml:"metrics"`
}
// MetricsConfig holds metrics server configuration.
type MetricsConfig struct {
Enabled bool `yaml:"enabled"`
Addr string `yaml:"addr"` // e.g., ":8080", "localhost:8080"
}
// LogConfig holds logging configuration.
@ -85,11 +93,12 @@ type StdoutOutputConfig struct {
// CorrelationConfig holds correlation configuration.
type CorrelationConfig struct {
TimeWindow TimeWindowConfig `yaml:"time_window"`
OrphanPolicy OrphanPolicyConfig `yaml:"orphan_policy"`
Matching MatchingConfig `yaml:"matching"`
Buffers BuffersConfig `yaml:"buffers"`
TTL TTLConfig `yaml:"ttl"`
TimeWindow TimeWindowConfig `yaml:"time_window"`
OrphanPolicy OrphanPolicyConfig `yaml:"orphan_policy"`
Matching MatchingConfig `yaml:"matching"`
Buffers BuffersConfig `yaml:"buffers"`
TTL TTLConfig `yaml:"ttl"`
ExcludeSourceIPs []string `yaml:"exclude_source_ips"` // List of source IPs or CIDR ranges to exclude
// Deprecated: Use TimeWindow.Value instead
TimeWindowS int `yaml:"time_window_s"`
// Deprecated: Use OrphanPolicy.ApacheAlwaysEmit instead
@ -341,3 +350,48 @@ func (c *UnixSocketConfig) GetSocketPermissions() os.FileMode {
return os.FileMode(perms)
}
// GetExcludeSourceIPs returns the list of excluded source IPs or CIDR ranges.
func (c *CorrelationConfig) GetExcludeSourceIPs() []string {
return c.ExcludeSourceIPs
}
// IsSourceIPExcluded checks if a source IP should be excluded.
// Supports both exact IP matches and CIDR ranges.
func (c *CorrelationConfig) IsSourceIPExcluded(ip string) bool {
if len(c.ExcludeSourceIPs) == 0 {
return false
}
// Parse the IP once
parsedIP := net.ParseIP(ip)
if parsedIP == nil {
return false // Invalid IP
}
for _, exclude := range c.ExcludeSourceIPs {
// Try CIDR first
if strings.Contains(exclude, "/") {
_, cidr, err := net.ParseCIDR(exclude)
if err != nil {
continue // Invalid CIDR, skip
}
if cidr.Contains(parsedIP) {
return true
}
} else {
// Exact IP match
if exclude == ip {
return true
}
// Also try parsing as IP (handles different formats like 192.168.1.1 vs 192.168.001.001)
if excludeIP := net.ParseIP(exclude); excludeIP != nil {
if excludeIP.Equal(parsedIP) {
return true
}
}
}
}
return false
}