feat: observability, IP filtering, stdout/clickhouse fixes (v1.1.11)
- feat(observability): metrics server with /metrics and /health endpoints - feat(observability): correlation metrics (events, success/failed, reasons, buffers) - feat(correlation): IP exclusion filter (exact IPs and CIDR ranges) - feat(correlation): pending orphan delay for late-arriving B events - fix(stdout): sink is now a no-op for data; JSON must never appear on stdout - fix(clickhouse): all flush errors were silently discarded, now properly logged - fix(clickhouse): buffer overflow with DropOnOverflow now logged at WARN - fix(clickhouse): retry attempts logged at WARN with attempt/delay/error context - feat(clickhouse): connection success logged at INFO, batch sends at DEBUG - feat(clickhouse): SetLogger() for external logger injection - test(stdout): assert stdout remains empty for correlated and orphan logs - chore(rpm): bump version to 1.1.11, update changelog - docs: README and architecture.yml updated Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -1,11 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/logcorrelator/logcorrelator/internal/adapters/inbound/unixsocket"
|
||||
"github.com/logcorrelator/logcorrelator/internal/adapters/outbound/clickhouse"
|
||||
@ -88,17 +90,15 @@ func main() {
|
||||
logger.Error("Failed to create ClickHouse sink", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
clickHouseSink.SetLogger(logger)
|
||||
sinks = append(sinks, clickHouseSink)
|
||||
logger.Info(fmt.Sprintf("Configured ClickHouse sink: table=%s", cfg.Outputs.ClickHouse.Table))
|
||||
}
|
||||
|
||||
if cfg.Outputs.Stdout.Enabled {
|
||||
stdoutSink := stdout.NewStdoutSink(stdout.Config{
|
||||
Enabled: true,
|
||||
Level: cfg.Outputs.Stdout.Level,
|
||||
})
|
||||
stdoutSink := stdout.NewStdoutSink(stdout.Config{Enabled: true})
|
||||
sinks = append(sinks, stdoutSink)
|
||||
logger.Info(fmt.Sprintf("Configured stdout sink: level=%s", cfg.Outputs.Stdout.Level))
|
||||
logger.Info("Configured stdout sink (operational logs on stderr)")
|
||||
}
|
||||
|
||||
// Create multi-sink wrapper
|
||||
@ -106,14 +106,15 @@ func main() {
|
||||
|
||||
// Create correlation service
|
||||
correlationSvc := domain.NewCorrelationService(domain.CorrelationConfig{
|
||||
TimeWindow: cfg.Correlation.GetTimeWindow(),
|
||||
ApacheAlwaysEmit: cfg.Correlation.GetApacheAlwaysEmit(),
|
||||
ApacheEmitDelayMs: cfg.Correlation.GetApacheEmitDelayMs(),
|
||||
NetworkEmit: false,
|
||||
MaxHTTPBufferSize: cfg.Correlation.GetMaxHTTPBufferSize(),
|
||||
TimeWindow: cfg.Correlation.GetTimeWindow(),
|
||||
ApacheAlwaysEmit: cfg.Correlation.GetApacheAlwaysEmit(),
|
||||
ApacheEmitDelayMs: cfg.Correlation.GetApacheEmitDelayMs(),
|
||||
NetworkEmit: false,
|
||||
MaxHTTPBufferSize: cfg.Correlation.GetMaxHTTPBufferSize(),
|
||||
MaxNetworkBufferSize: cfg.Correlation.GetMaxNetworkBufferSize(),
|
||||
NetworkTTLS: cfg.Correlation.GetNetworkTTLS(),
|
||||
MatchingMode: cfg.Correlation.GetMatchingMode(),
|
||||
NetworkTTLS: cfg.Correlation.GetNetworkTTLS(),
|
||||
MatchingMode: cfg.Correlation.GetMatchingMode(),
|
||||
ExcludeSourceIPs: cfg.Correlation.GetExcludeSourceIPs(),
|
||||
}, &domain.RealTimeProvider{})
|
||||
|
||||
// Set logger for correlation service
|
||||
@ -124,6 +125,27 @@ func main() {
|
||||
cfg.Correlation.GetApacheAlwaysEmit(),
|
||||
cfg.Correlation.GetApacheEmitDelayMs()))
|
||||
|
||||
// Start metrics server if enabled
|
||||
var metricsServer *observability.MetricsServer
|
||||
if cfg.Metrics.Enabled {
|
||||
addr := cfg.Metrics.Addr
|
||||
if addr == "" {
|
||||
addr = ":8080" // Default address
|
||||
}
|
||||
var err error
|
||||
metricsServer, err = observability.NewMetricsServer(addr, correlationSvc.GetMetricsSnapshot)
|
||||
if err != nil {
|
||||
logger.Error("Failed to create metrics server", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := metricsServer.Start(); err != nil {
|
||||
logger.Error("Failed to start metrics server", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
logger.Info(fmt.Sprintf("Metrics server started: addr=%s", metricsServer.Addr()))
|
||||
logger.Info("Metrics endpoints: /metrics (JSON), /health")
|
||||
}
|
||||
|
||||
// Create orchestrator
|
||||
orchestrator := app.NewOrchestrator(app.OrchestratorConfig{
|
||||
Sources: sources,
|
||||
@ -166,5 +188,14 @@ func main() {
|
||||
logger.Error("Error during shutdown", err)
|
||||
}
|
||||
|
||||
// Stop metrics server
|
||||
if metricsServer != nil {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := metricsServer.Stop(shutdownCtx); err != nil {
|
||||
logger.Error("Error stopping metrics server", err)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("logcorrelator stopped")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user