- feat(observability): metrics server with /metrics and /health endpoints - feat(observability): correlation metrics (events, success/failed, reasons, buffers) - feat(correlation): IP exclusion filter (exact IPs and CIDR ranges) - feat(correlation): pending orphan delay for late-arriving B events - fix(stdout): sink is now a no-op for data; JSON must never appear on stdout - fix(clickhouse): all flush errors were silently discarded, now properly logged - fix(clickhouse): buffer overflow with DropOnOverflow now logged at WARN - fix(clickhouse): retry attempts logged at WARN with attempt/delay/error context - feat(clickhouse): connection success logged at INFO, batch sends at DEBUG - feat(clickhouse): SetLogger() for external logger injection - test(stdout): assert stdout remains empty for correlated and orphan logs - chore(rpm): bump version to 1.1.11, update changelog - docs: README and architecture.yml updated Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
398 lines
11 KiB
Go
398 lines
11 KiB
Go
package config
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/logcorrelator/logcorrelator/internal/domain"
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// Config holds the complete application configuration.
|
|
type Config struct {
|
|
Log LogConfig `yaml:"log"`
|
|
Inputs InputsConfig `yaml:"inputs"`
|
|
Outputs OutputsConfig `yaml:"outputs"`
|
|
Correlation CorrelationConfig `yaml:"correlation"`
|
|
Metrics MetricsConfig `yaml:"metrics"`
|
|
}
|
|
|
|
// MetricsConfig holds metrics server configuration.
|
|
type MetricsConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
Addr string `yaml:"addr"` // e.g., ":8080", "localhost:8080"
|
|
}
|
|
|
|
// LogConfig holds logging configuration.
|
|
type LogConfig struct {
|
|
Level string `yaml:"level"` // DEBUG, INFO, WARN, ERROR
|
|
}
|
|
|
|
// GetLogLevel returns the log level, defaulting to INFO if not set.
|
|
func (c *LogConfig) GetLevel() string {
|
|
if c.Level == "" {
|
|
return "INFO"
|
|
}
|
|
return strings.ToUpper(c.Level)
|
|
}
|
|
|
|
// ServiceConfig holds service-level configuration.
|
|
type ServiceConfig struct {
|
|
Name string `yaml:"name"`
|
|
Language string `yaml:"language"`
|
|
}
|
|
|
|
// InputsConfig holds input sources configuration.
|
|
type InputsConfig struct {
|
|
UnixSockets []UnixSocketConfig `yaml:"unix_sockets"`
|
|
}
|
|
|
|
// UnixSocketConfig holds a Unix socket source configuration.
|
|
type UnixSocketConfig struct {
|
|
Name string `yaml:"name"`
|
|
Path string `yaml:"path"`
|
|
Format string `yaml:"format"`
|
|
SourceType string `yaml:"source_type"` // "A" for Apache/HTTP, "B" for Network
|
|
SocketPermissions string `yaml:"socket_permissions"` // octal string, e.g., "0660", "0666"
|
|
}
|
|
|
|
// OutputsConfig holds output sinks configuration.
|
|
type OutputsConfig struct {
|
|
File FileOutputConfig `yaml:"file"`
|
|
ClickHouse ClickHouseOutputConfig `yaml:"clickhouse"`
|
|
Stdout StdoutOutputConfig `yaml:"stdout"`
|
|
}
|
|
|
|
// FileOutputConfig holds file sink configuration.
|
|
type FileOutputConfig struct {
|
|
Path string `yaml:"path"`
|
|
}
|
|
|
|
// ClickHouseOutputConfig holds ClickHouse sink configuration.
|
|
type ClickHouseOutputConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
DSN string `yaml:"dsn"`
|
|
Table string `yaml:"table"`
|
|
BatchSize int `yaml:"batch_size"`
|
|
FlushIntervalMs int `yaml:"flush_interval_ms"`
|
|
MaxBufferSize int `yaml:"max_buffer_size"`
|
|
DropOnOverflow bool `yaml:"drop_on_overflow"`
|
|
AsyncInsert bool `yaml:"async_insert"`
|
|
TimeoutMs int `yaml:"timeout_ms"`
|
|
}
|
|
|
|
// StdoutOutputConfig holds stdout sink configuration.
|
|
type StdoutOutputConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
Level string `yaml:"level"` // DEBUG, INFO, WARN, ERROR - filters output verbosity
|
|
}
|
|
|
|
// CorrelationConfig holds correlation configuration.
|
|
type CorrelationConfig struct {
|
|
TimeWindow TimeWindowConfig `yaml:"time_window"`
|
|
OrphanPolicy OrphanPolicyConfig `yaml:"orphan_policy"`
|
|
Matching MatchingConfig `yaml:"matching"`
|
|
Buffers BuffersConfig `yaml:"buffers"`
|
|
TTL TTLConfig `yaml:"ttl"`
|
|
ExcludeSourceIPs []string `yaml:"exclude_source_ips"` // List of source IPs or CIDR ranges to exclude
|
|
// Deprecated: Use TimeWindow.Value instead
|
|
TimeWindowS int `yaml:"time_window_s"`
|
|
// Deprecated: Use OrphanPolicy.ApacheAlwaysEmit instead
|
|
EmitOrphans bool `yaml:"emit_orphans"`
|
|
}
|
|
|
|
// TimeWindowConfig holds time window configuration.
|
|
type TimeWindowConfig struct {
|
|
Value int `yaml:"value"`
|
|
Unit string `yaml:"unit"` // s, ms, etc.
|
|
}
|
|
|
|
// GetDuration returns the time window as a duration.
|
|
func (c *TimeWindowConfig) GetDuration() time.Duration {
|
|
value := c.Value
|
|
if value <= 0 {
|
|
value = 1
|
|
}
|
|
switch c.Unit {
|
|
case "ms", "millisecond", "milliseconds":
|
|
return time.Duration(value) * time.Millisecond
|
|
case "s", "sec", "second", "seconds":
|
|
fallthrough
|
|
default:
|
|
return time.Duration(value) * time.Second
|
|
}
|
|
}
|
|
|
|
// OrphanPolicyConfig holds orphan event policy configuration.
|
|
type OrphanPolicyConfig struct {
|
|
ApacheAlwaysEmit bool `yaml:"apache_always_emit"`
|
|
ApacheEmitDelayMs int `yaml:"apache_emit_delay_ms"` // Delay in ms before emitting orphan A
|
|
NetworkEmit bool `yaml:"network_emit"`
|
|
}
|
|
|
|
// MatchingConfig holds matching mode configuration.
|
|
type MatchingConfig struct {
|
|
Mode string `yaml:"mode"` // one_to_one or one_to_many
|
|
}
|
|
|
|
// BuffersConfig holds buffer size configuration.
|
|
type BuffersConfig struct {
|
|
MaxHTTPItems int `yaml:"max_http_items"`
|
|
MaxNetworkItems int `yaml:"max_network_items"`
|
|
}
|
|
|
|
// TTLConfig holds TTL configuration.
|
|
type TTLConfig struct {
|
|
NetworkTTLS int `yaml:"network_ttl_s"`
|
|
}
|
|
|
|
// Load loads configuration from a YAML file.
|
|
func Load(path string) (*Config, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read config file: %w", err)
|
|
}
|
|
|
|
cfg := defaultConfig()
|
|
|
|
if err := yaml.Unmarshal(data, cfg); err != nil {
|
|
return nil, fmt.Errorf("failed to parse config file: %w", err)
|
|
}
|
|
|
|
if err := cfg.Validate(); err != nil {
|
|
return nil, fmt.Errorf("invalid config: %w", err)
|
|
}
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
// defaultConfig returns a Config with default values.
|
|
func defaultConfig() *Config {
|
|
return &Config{
|
|
Log: LogConfig{
|
|
Level: "INFO",
|
|
},
|
|
Inputs: InputsConfig{
|
|
UnixSockets: make([]UnixSocketConfig, 0),
|
|
},
|
|
Outputs: OutputsConfig{
|
|
File: FileOutputConfig{
|
|
Path: "/var/log/logcorrelator/correlated.log",
|
|
},
|
|
ClickHouse: ClickHouseOutputConfig{
|
|
Enabled: false,
|
|
BatchSize: 500,
|
|
FlushIntervalMs: 200,
|
|
MaxBufferSize: 5000,
|
|
DropOnOverflow: true,
|
|
AsyncInsert: true,
|
|
TimeoutMs: 1000,
|
|
},
|
|
Stdout: StdoutOutputConfig{Enabled: false},
|
|
},
|
|
Correlation: CorrelationConfig{
|
|
TimeWindowS: 1,
|
|
EmitOrphans: true,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Validate validates the configuration.
|
|
func (c *Config) Validate() error {
|
|
if len(c.Inputs.UnixSockets) < 2 {
|
|
return fmt.Errorf("at least two unix socket inputs are required")
|
|
}
|
|
|
|
seenNames := make(map[string]struct{}, len(c.Inputs.UnixSockets))
|
|
seenPaths := make(map[string]struct{}, len(c.Inputs.UnixSockets))
|
|
|
|
for i, input := range c.Inputs.UnixSockets {
|
|
if strings.TrimSpace(input.Name) == "" {
|
|
return fmt.Errorf("inputs.unix_sockets[%d].name is required", i)
|
|
}
|
|
if strings.TrimSpace(input.Path) == "" {
|
|
return fmt.Errorf("inputs.unix_sockets[%d].path is required", i)
|
|
}
|
|
|
|
if _, exists := seenNames[input.Name]; exists {
|
|
return fmt.Errorf("duplicate unix socket input name: %s", input.Name)
|
|
}
|
|
seenNames[input.Name] = struct{}{}
|
|
|
|
if _, exists := seenPaths[input.Path]; exists {
|
|
return fmt.Errorf("duplicate unix socket input path: %s", input.Path)
|
|
}
|
|
seenPaths[input.Path] = struct{}{}
|
|
}
|
|
|
|
// At least one output must be enabled
|
|
hasOutput := false
|
|
if c.Outputs.File.Path != "" {
|
|
hasOutput = true
|
|
}
|
|
if c.Outputs.ClickHouse.Enabled {
|
|
hasOutput = true
|
|
}
|
|
if c.Outputs.Stdout.Enabled {
|
|
hasOutput = true
|
|
}
|
|
|
|
if !hasOutput {
|
|
return fmt.Errorf("at least one output must be enabled (file, clickhouse, or stdout)")
|
|
}
|
|
|
|
if c.Outputs.ClickHouse.Enabled {
|
|
if strings.TrimSpace(c.Outputs.ClickHouse.DSN) == "" {
|
|
return fmt.Errorf("clickhouse DSN is required when enabled")
|
|
}
|
|
if strings.TrimSpace(c.Outputs.ClickHouse.Table) == "" {
|
|
return fmt.Errorf("clickhouse table is required when enabled")
|
|
}
|
|
if c.Outputs.ClickHouse.BatchSize <= 0 {
|
|
return fmt.Errorf("clickhouse batch_size must be > 0")
|
|
}
|
|
if c.Outputs.ClickHouse.MaxBufferSize <= 0 {
|
|
return fmt.Errorf("clickhouse max_buffer_size must be > 0")
|
|
}
|
|
if c.Outputs.ClickHouse.TimeoutMs <= 0 {
|
|
return fmt.Errorf("clickhouse timeout_ms must be > 0")
|
|
}
|
|
}
|
|
|
|
if c.Correlation.TimeWindowS <= 0 {
|
|
return fmt.Errorf("correlation.time_window_s must be > 0")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetTimeWindow returns the time window as a duration.
|
|
// Deprecated: Use TimeWindow.GetDuration() instead.
|
|
func (c *CorrelationConfig) GetTimeWindow() time.Duration {
|
|
// New config takes precedence
|
|
if c.TimeWindow.Value > 0 {
|
|
return c.TimeWindow.GetDuration()
|
|
}
|
|
// Fallback to deprecated field
|
|
value := c.TimeWindowS
|
|
if value <= 0 {
|
|
value = 1
|
|
}
|
|
return time.Duration(value) * time.Second
|
|
}
|
|
|
|
// GetApacheAlwaysEmit returns whether to always emit Apache events.
|
|
func (c *CorrelationConfig) GetApacheAlwaysEmit() bool {
|
|
if c.OrphanPolicy.ApacheAlwaysEmit {
|
|
return true
|
|
}
|
|
// Fallback to deprecated field
|
|
return c.EmitOrphans
|
|
}
|
|
|
|
// GetApacheEmitDelayMs returns the delay in milliseconds before emitting orphan A events.
|
|
func (c *CorrelationConfig) GetApacheEmitDelayMs() int {
|
|
if c.OrphanPolicy.ApacheEmitDelayMs > 0 {
|
|
return c.OrphanPolicy.ApacheEmitDelayMs
|
|
}
|
|
return domain.DefaultApacheEmitDelayMs // Default: 500ms
|
|
}
|
|
|
|
// GetMatchingMode returns the matching mode.
|
|
func (c *CorrelationConfig) GetMatchingMode() string {
|
|
if c.Matching.Mode != "" {
|
|
return c.Matching.Mode
|
|
}
|
|
return "one_to_many" // Default to Keep-Alive
|
|
}
|
|
|
|
// GetMaxHTTPBufferSize returns the max HTTP buffer size.
|
|
func (c *CorrelationConfig) GetMaxHTTPBufferSize() int {
|
|
if c.Buffers.MaxHTTPItems > 0 {
|
|
return c.Buffers.MaxHTTPItems
|
|
}
|
|
return domain.DefaultMaxHTTPBufferSize
|
|
}
|
|
|
|
// GetMaxNetworkBufferSize returns the max network buffer size.
|
|
func (c *CorrelationConfig) GetMaxNetworkBufferSize() int {
|
|
if c.Buffers.MaxNetworkItems > 0 {
|
|
return c.Buffers.MaxNetworkItems
|
|
}
|
|
return domain.DefaultMaxNetworkBufferSize
|
|
}
|
|
|
|
// GetNetworkTTLS returns the network TTL in seconds.
|
|
func (c *CorrelationConfig) GetNetworkTTLS() int {
|
|
if c.TTL.NetworkTTLS > 0 {
|
|
return c.TTL.NetworkTTLS
|
|
}
|
|
return domain.DefaultNetworkTTLS
|
|
}
|
|
|
|
// GetSocketPermissions returns the socket permissions as os.FileMode.
|
|
// Default is 0666 (world read/write).
|
|
func (c *UnixSocketConfig) GetSocketPermissions() os.FileMode {
|
|
trimmed := strings.TrimSpace(c.SocketPermissions)
|
|
if trimmed == "" {
|
|
return 0666
|
|
}
|
|
|
|
// Parse octal string (e.g., "0660", "660", "0666")
|
|
perms, err := strconv.ParseUint(trimmed, 8, 32)
|
|
if err != nil {
|
|
return 0666
|
|
}
|
|
|
|
return os.FileMode(perms)
|
|
}
|
|
|
|
// GetExcludeSourceIPs returns the list of excluded source IPs or CIDR ranges.
|
|
func (c *CorrelationConfig) GetExcludeSourceIPs() []string {
|
|
return c.ExcludeSourceIPs
|
|
}
|
|
|
|
// IsSourceIPExcluded checks if a source IP should be excluded.
|
|
// Supports both exact IP matches and CIDR ranges.
|
|
func (c *CorrelationConfig) IsSourceIPExcluded(ip string) bool {
|
|
if len(c.ExcludeSourceIPs) == 0 {
|
|
return false
|
|
}
|
|
|
|
// Parse the IP once
|
|
parsedIP := net.ParseIP(ip)
|
|
if parsedIP == nil {
|
|
return false // Invalid IP
|
|
}
|
|
|
|
for _, exclude := range c.ExcludeSourceIPs {
|
|
// Try CIDR first
|
|
if strings.Contains(exclude, "/") {
|
|
_, cidr, err := net.ParseCIDR(exclude)
|
|
if err != nil {
|
|
continue // Invalid CIDR, skip
|
|
}
|
|
if cidr.Contains(parsedIP) {
|
|
return true
|
|
}
|
|
} else {
|
|
// Exact IP match
|
|
if exclude == ip {
|
|
return true
|
|
}
|
|
// Also try parsing as IP (handles different formats like 192.168.1.1 vs 192.168.001.001)
|
|
if excludeIP := net.ParseIP(exclude); excludeIP != nil {
|
|
if excludeIP.Equal(parsedIP) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|