fix(correlation): Keep-Alive time window + orphan timer + TTL purge (v1.1.14)
Bug #1 - processSourceA: utilise bEventHasValidTTL en mode one_to_many au lieu de eventsMatch qui comparait les timestamps originaux. Apres ~10s les requetes A devenaient toutes orphelines alors que la session KA etait active. Bug #4 - checkPendingOrphansForCorrelation: meme correction, cle identique = meme connexion en one_to_many, pas besoin de comparer les timestamps. Bug #3 - cleanNetworkBufferByTTL: expiration B => emission immediate des pending orphans associes (ils ne peuvent plus jamais corréler). Bug #2 - Orchestrateur: goroutine ticker 250ms appelle EmitPendingOrphans() pour drainer les orphans independamment du flux d'evenements entrants. EmitPendingOrphans() expose la methode comme publique thread-safe. Tests: 4 nouveaux tests de non-regression (un par bug). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
6235
91.224.92.185.txt
Normal file
6235
91.224.92.185.txt
Normal file
File diff suppressed because it is too large
Load Diff
2
Makefile
2
Makefile
@ -20,7 +20,7 @@ BINARY_NAME=logcorrelator
|
|||||||
DIST_DIR=dist
|
DIST_DIR=dist
|
||||||
|
|
||||||
# Package version
|
# Package version
|
||||||
PKG_VERSION ?= 1.1.13
|
PKG_VERSION ?= 1.1.14
|
||||||
|
|
||||||
# Enable BuildKit for better performance
|
# Enable BuildKit for better performance
|
||||||
export DOCKER_BUILDKIT=1
|
export DOCKER_BUILDKIT=1
|
||||||
|
|||||||
@ -15,6 +15,10 @@ const (
|
|||||||
DefaultEventChannelBufferSize = 1000
|
DefaultEventChannelBufferSize = 1000
|
||||||
// ShutdownTimeout is the maximum time to wait for graceful shutdown
|
// ShutdownTimeout is the maximum time to wait for graceful shutdown
|
||||||
ShutdownTimeout = 30 * time.Second
|
ShutdownTimeout = 30 * time.Second
|
||||||
|
// OrphanTickInterval is how often the orchestrator drains pending orphans.
|
||||||
|
// Set to half the default emit delay (500ms/2) so orphans are emitted promptly
|
||||||
|
// even when no new events arrive.
|
||||||
|
OrphanTickInterval = 250 * time.Millisecond
|
||||||
)
|
)
|
||||||
|
|
||||||
// OrchestratorConfig holds the orchestrator configuration.
|
// OrchestratorConfig holds the orchestrator configuration.
|
||||||
@ -77,6 +81,27 @@ func (o *Orchestrator) Start() error {
|
|||||||
}(source, eventChan)
|
}(source, eventChan)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start a periodic ticker to drain pending orphan A events independently of the
|
||||||
|
// event flow. Without this, orphans are only emitted when a new event arrives,
|
||||||
|
// causing them to accumulate silently when the source goes quiet.
|
||||||
|
o.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer o.wg.Done()
|
||||||
|
ticker := time.NewTicker(OrphanTickInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-o.ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
logs := o.correlationSvc.EmitPendingOrphans()
|
||||||
|
for _, log := range logs {
|
||||||
|
o.config.Sink.Write(o.ctx, log) //nolint:errcheck
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -352,7 +352,13 @@ func (s *CorrelationService) processSourceA(event *NormalizedEvent) ([]Correlate
|
|||||||
s.logger.Debugf("processing A event: key=%s keepalive_seq=%d timestamp=%v", key, event.KeepAliveSeq, event.Timestamp)
|
s.logger.Debugf("processing A event: key=%s keepalive_seq=%d timestamp=%v", key, event.KeepAliveSeq, event.Timestamp)
|
||||||
|
|
||||||
// Look for matching B events
|
// Look for matching B events
|
||||||
|
// In one_to_many (Keep-Alive) mode use TTL-based matching: B is valid as long as
|
||||||
|
// its TTL has not expired (the timestamp is fixed at TCP connection time and grows
|
||||||
|
// beyond TimeWindow after ~10s, making timestamp comparison incorrect).
|
||||||
matches := s.findMatches(s.bufferB, s.pendingB, key, func(other *NormalizedEvent) bool {
|
matches := s.findMatches(s.bufferB, s.pendingB, key, func(other *NormalizedEvent) bool {
|
||||||
|
if s.config.MatchingMode == MatchingModeOneToMany {
|
||||||
|
return s.bEventHasValidTTL(other)
|
||||||
|
}
|
||||||
return s.eventsMatch(event, other)
|
return s.eventsMatch(event, other)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -392,9 +398,13 @@ func (s *CorrelationService) processSourceA(event *NormalizedEvent) ([]Correlate
|
|||||||
return results, false
|
return results, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// No match found - check if there are B events with same key but outside time window
|
// No match found - check if there are B events with same key but outside match criteria
|
||||||
if bElements, ok := s.pendingB[key]; ok && len(bElements) > 0 {
|
if bElements, ok := s.pendingB[key]; ok && len(bElements) > 0 {
|
||||||
// Key exists but no time match - log the time difference for debugging
|
if s.config.MatchingMode == MatchingModeOneToMany {
|
||||||
|
// In Keep-Alive mode, B exists but TTL expired
|
||||||
|
s.logger.Debugf("A event has same key as B but B TTL expired: key=%s", key)
|
||||||
|
} else {
|
||||||
|
// In one-to-one mode, B exists but outside time window
|
||||||
for _, bElem := range bElements {
|
for _, bElem := range bElements {
|
||||||
bEvent := bElem.Value.(*NormalizedEvent)
|
bEvent := bElem.Value.(*NormalizedEvent)
|
||||||
timeDiff := event.Timestamp.Sub(bEvent.Timestamp)
|
timeDiff := event.Timestamp.Sub(bEvent.Timestamp)
|
||||||
@ -404,6 +414,7 @@ func (s *CorrelationService) processSourceA(event *NormalizedEvent) ([]Correlate
|
|||||||
s.logger.Debugf("A event has same key as B but outside time window: key=%s time_diff=%v window=%v",
|
s.logger.Debugf("A event has same key as B but outside time window: key=%s time_diff=%v window=%v",
|
||||||
key, timeDiff, s.config.TimeWindow)
|
key, timeDiff, s.config.TimeWindow)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
s.metrics.RecordCorrelationFailed("time_window")
|
s.metrics.RecordCorrelationFailed("time_window")
|
||||||
} else {
|
} else {
|
||||||
// No B events with same key at all
|
// No B events with same key at all
|
||||||
@ -497,6 +508,23 @@ func (s *CorrelationService) eventsMatch(a, b *NormalizedEvent) bool {
|
|||||||
return diff <= s.config.TimeWindow
|
return diff <= s.config.TimeWindow
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bEventHasValidTTL returns true if the B event is still alive (TTL not expired).
|
||||||
|
// Used in one_to_many mode where the B timestamp is fixed at TCP connection time and
|
||||||
|
// grows beyond TimeWindow — TTL validity is the correct liveness indicator.
|
||||||
|
func (s *CorrelationService) bEventHasValidTTL(bEvent *NormalizedEvent) bool {
|
||||||
|
key := bEvent.CorrelationKey()
|
||||||
|
if elements, ok := s.pendingB[key]; ok {
|
||||||
|
for _, elem := range elements {
|
||||||
|
if elem.Value.(*NormalizedEvent) == bEvent {
|
||||||
|
if ttl, exists := s.networkTTLs[elem]; exists {
|
||||||
|
return !s.timeProvider.Now().After(ttl)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (s *CorrelationService) addEvent(event *NormalizedEvent) {
|
func (s *CorrelationService) addEvent(event *NormalizedEvent) {
|
||||||
key := event.CorrelationKey()
|
key := event.CorrelationKey()
|
||||||
|
|
||||||
@ -605,6 +633,8 @@ func (s *CorrelationService) cleanBuffer(buffer *eventBuffer, pending map[string
|
|||||||
// cleanNetworkBufferByTTL removes expired B events based on their reset TTL.
|
// cleanNetworkBufferByTTL removes expired B events based on their reset TTL.
|
||||||
// For Keep-Alive support, we check the TTL map (which is reset on each correlation)
|
// For Keep-Alive support, we check the TTL map (which is reset on each correlation)
|
||||||
// rather than the original event timestamp.
|
// rather than the original event timestamp.
|
||||||
|
// When a B event expires, any pending orphan A events for that key are immediately
|
||||||
|
// emitted (they can no longer ever be correlated).
|
||||||
func (s *CorrelationService) cleanNetworkBufferByTTL() {
|
func (s *CorrelationService) cleanNetworkBufferByTTL() {
|
||||||
now := s.timeProvider.Now()
|
now := s.timeProvider.Now()
|
||||||
var removed int
|
var removed int
|
||||||
@ -618,6 +648,20 @@ func (s *CorrelationService) cleanNetworkBufferByTTL() {
|
|||||||
delete(s.pendingB, key)
|
delete(s.pendingB, key)
|
||||||
// Connection fully gone: reset Keep-Alive counter for this key
|
// Connection fully gone: reset Keep-Alive counter for this key
|
||||||
delete(s.keepAliveSeqA, key)
|
delete(s.keepAliveSeqA, key)
|
||||||
|
|
||||||
|
// Purge any pending orphan A events for this key: they can no longer
|
||||||
|
// correlate with a B (the connection is closed), emit them immediately.
|
||||||
|
if s.config.ApacheAlwaysEmit {
|
||||||
|
if orphans, ok := s.pendingOrphans[key]; ok && len(orphans) > 0 {
|
||||||
|
s.logger.Debugf("B TTL expired, emitting %d pending orphan(s) for key=%s", len(orphans), key)
|
||||||
|
for _, o := range orphans {
|
||||||
|
s.logger.Warnf("orphan A event (B TTL expired): src_ip=%s src_port=%d key=%s keepalive_seq=%d",
|
||||||
|
o.event.SrcIP, o.event.SrcPort, key, o.event.KeepAliveSeq)
|
||||||
|
s.metrics.RecordOrphanEmitted("A")
|
||||||
|
}
|
||||||
|
delete(s.pendingOrphans, key)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
delete(s.networkTTLs, elem)
|
delete(s.networkTTLs, elem)
|
||||||
removed++
|
removed++
|
||||||
@ -778,7 +822,16 @@ func (s *CorrelationService) checkPendingOrphansForCorrelation(bEvent *Normalize
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, orphan := range orphans {
|
for i, orphan := range orphans {
|
||||||
if s.eventsMatch(orphan.event, bEvent) {
|
// In one_to_many mode: same key = same Keep-Alive connection → always correlate.
|
||||||
|
// The incoming B event may have an old original timestamp (fixed at TCP connection
|
||||||
|
// time) so eventsMatch would fail for long-lived sessions. Use key identity instead.
|
||||||
|
var matched bool
|
||||||
|
if s.config.MatchingMode == MatchingModeOneToMany {
|
||||||
|
matched = true
|
||||||
|
} else {
|
||||||
|
matched = s.eventsMatch(orphan.event, bEvent)
|
||||||
|
}
|
||||||
|
if matched {
|
||||||
// Found a match! Remove from pending and return
|
// Found a match! Remove from pending and return
|
||||||
aEvent := orphan.event
|
aEvent := orphan.event
|
||||||
s.pendingOrphans[key] = append(orphans[:i], orphans[i+1:]...)
|
s.pendingOrphans[key] = append(orphans[:i], orphans[i+1:]...)
|
||||||
@ -828,6 +881,15 @@ func (s *CorrelationService) emitPendingOrphans() []CorrelatedLog {
|
|||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EmitPendingOrphans emits all pending orphans that have passed their emit delay.
|
||||||
|
// This is the public, thread-safe version intended to be called by an external ticker
|
||||||
|
// (e.g. the Orchestrator) so that orphans are emitted even when no new events arrive.
|
||||||
|
func (s *CorrelationService) EmitPendingOrphans() []CorrelatedLog {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
return s.emitPendingOrphans()
|
||||||
|
}
|
||||||
|
|
||||||
func removeElementFromSlice(elements []*list.Element, target *list.Element) []*list.Element {
|
func removeElementFromSlice(elements []*list.Element, target *list.Element) []*list.Element {
|
||||||
if len(elements) == 0 {
|
if len(elements) == 0 {
|
||||||
return elements
|
return elements
|
||||||
|
|||||||
@ -1612,3 +1612,213 @@ if !correlated {
|
|||||||
t.Errorf("A2 Keep-Alive: expected correlated result, got %d results — B was not buffered after pending orphan match (bug)", len(results))
|
t.Errorf("A2 Keep-Alive: expected correlated result, got %d results — B was not buffered after pending orphan match (bug)", len(results))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestKeepAlive_BeyondTimeWindow_StillCorrelates verifies that in one_to_many mode,
|
||||||
|
// A events are correlated with the buffered B event even when the time difference
|
||||||
|
// between A.Timestamp and B.original Timestamp exceeds TimeWindow.
|
||||||
|
// Regression test for Bug #1.
|
||||||
|
func TestKeepAlive_BeyondTimeWindow_StillCorrelates(t *testing.T) {
|
||||||
|
now := time.Date(2026, 3, 5, 16, 30, 0, 0, time.UTC)
|
||||||
|
tp := &mockTimeProvider{now: now}
|
||||||
|
|
||||||
|
config := CorrelationConfig{
|
||||||
|
TimeWindow: 10 * time.Second,
|
||||||
|
ApacheAlwaysEmit: true,
|
||||||
|
ApacheEmitDelayMs: 500,
|
||||||
|
NetworkTTLS: 120,
|
||||||
|
MatchingMode: MatchingModeOneToMany,
|
||||||
|
MaxHTTPBufferSize: DefaultMaxHTTPBufferSize,
|
||||||
|
MaxNetworkBufferSize: DefaultMaxNetworkBufferSize,
|
||||||
|
}
|
||||||
|
svc := NewCorrelationService(config, tp)
|
||||||
|
|
||||||
|
// B arrives first (TCP connection established)
|
||||||
|
b := &NormalizedEvent{
|
||||||
|
Source: SourceB,
|
||||||
|
Timestamp: now,
|
||||||
|
SrcIP: "91.224.92.185",
|
||||||
|
SrcPort: 53471,
|
||||||
|
}
|
||||||
|
if results := svc.ProcessEvent(b); len(results) != 0 {
|
||||||
|
t.Fatalf("B buffered: expected 0 results, got %d", len(results))
|
||||||
|
}
|
||||||
|
|
||||||
|
// A seq=1 arrives within window -- correlates and resets TTL
|
||||||
|
tp.now = now.Add(100 * time.Millisecond)
|
||||||
|
a1 := &NormalizedEvent{
|
||||||
|
Source: SourceA,
|
||||||
|
Timestamp: tp.now,
|
||||||
|
SrcIP: "91.224.92.185",
|
||||||
|
SrcPort: 53471,
|
||||||
|
}
|
||||||
|
if results := svc.ProcessEvent(a1); len(results) != 1 || !results[0].Correlated {
|
||||||
|
t.Fatalf("A seq=1: expected 1 correlated result, got %d", len(svc.ProcessEvent(a1)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// A seq=10 arrives 11s after B (beyond TimeWindow) but B TTL is still valid (reset at seq=1)
|
||||||
|
tp.now = now.Add(11 * time.Second)
|
||||||
|
a10 := &NormalizedEvent{
|
||||||
|
Source: SourceA,
|
||||||
|
Timestamp: tp.now,
|
||||||
|
SrcIP: "91.224.92.185",
|
||||||
|
SrcPort: 53471,
|
||||||
|
}
|
||||||
|
results := svc.ProcessEvent(a10)
|
||||||
|
|
||||||
|
correlated := false
|
||||||
|
for _, r := range results {
|
||||||
|
if r.Correlated {
|
||||||
|
correlated = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !correlated {
|
||||||
|
t.Errorf("A seq=10 (diff>TimeWindow, TTL valid): expected Correlated=true, got %d results -- Bug #1 not fixed", len(results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestKeepAlive_PendingOrphan_MatchesBeyondTimeWindow verifies that a pending orphan A
|
||||||
|
// is correctly matched by an incoming B when timestamps differ by more than TimeWindow.
|
||||||
|
// Regression test for Bug #4.
|
||||||
|
func TestKeepAlive_PendingOrphan_MatchesBeyondTimeWindow(t *testing.T) {
|
||||||
|
now := time.Date(2026, 3, 5, 16, 30, 0, 0, time.UTC)
|
||||||
|
tp := &mockTimeProvider{now: now}
|
||||||
|
|
||||||
|
config := CorrelationConfig{
|
||||||
|
TimeWindow: 10 * time.Second,
|
||||||
|
ApacheAlwaysEmit: true,
|
||||||
|
ApacheEmitDelayMs: 500,
|
||||||
|
NetworkTTLS: 120,
|
||||||
|
MatchingMode: MatchingModeOneToMany,
|
||||||
|
MaxHTTPBufferSize: DefaultMaxHTTPBufferSize,
|
||||||
|
MaxNetworkBufferSize: DefaultMaxNetworkBufferSize,
|
||||||
|
}
|
||||||
|
svc := NewCorrelationService(config, tp)
|
||||||
|
|
||||||
|
// A arrives first, no B in buffer -> pending orphan
|
||||||
|
a := &NormalizedEvent{
|
||||||
|
Source: SourceA,
|
||||||
|
Timestamp: now.Add(15 * time.Second),
|
||||||
|
SrcIP: "10.1.2.3",
|
||||||
|
SrcPort: 9999,
|
||||||
|
}
|
||||||
|
if results := svc.ProcessEvent(a); len(results) != 0 {
|
||||||
|
t.Fatalf("A with no B: expected 0 results (pending orphan), got %d", len(results))
|
||||||
|
}
|
||||||
|
if len(svc.pendingOrphans) != 1 {
|
||||||
|
t.Fatalf("expected 1 pending orphan key, got %d", len(svc.pendingOrphans))
|
||||||
|
}
|
||||||
|
|
||||||
|
// B arrives with old timestamp (diff with A = 15s > TimeWindow)
|
||||||
|
// In one_to_many: same key = same connection, must match pending orphan regardless of timestamps
|
||||||
|
b := &NormalizedEvent{
|
||||||
|
Source: SourceB,
|
||||||
|
Timestamp: now,
|
||||||
|
SrcIP: "10.1.2.3",
|
||||||
|
SrcPort: 9999,
|
||||||
|
}
|
||||||
|
results := svc.ProcessEvent(b)
|
||||||
|
|
||||||
|
correlated := false
|
||||||
|
for _, r := range results {
|
||||||
|
if r.Correlated {
|
||||||
|
correlated = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !correlated {
|
||||||
|
t.Errorf("pending orphan + B (diff>TimeWindow): expected Correlated=true -- Bug #4 not fixed")
|
||||||
|
}
|
||||||
|
if len(svc.pendingOrphans) != 0 {
|
||||||
|
t.Errorf("expected pendingOrphans empty after match, got %d keys", len(svc.pendingOrphans))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBTTLExpiry_PurgesPendingOrphans verifies that when a B event TTL expires,
|
||||||
|
// pending orphan A events for that key are purged. Regression test for Bug #3.
|
||||||
|
func TestBTTLExpiry_PurgesPendingOrphans(t *testing.T) {
|
||||||
|
now := time.Date(2026, 3, 5, 16, 30, 0, 0, time.UTC)
|
||||||
|
tp := &mockTimeProvider{now: now}
|
||||||
|
|
||||||
|
// Use one_to_one + tight TimeWindow so A goes to pending orphans in old code path
|
||||||
|
config := CorrelationConfig{
|
||||||
|
TimeWindow: 2 * time.Second,
|
||||||
|
ApacheAlwaysEmit: true,
|
||||||
|
ApacheEmitDelayMs: 500,
|
||||||
|
NetworkTTLS: 5,
|
||||||
|
MatchingMode: MatchingModeOneToOne,
|
||||||
|
MaxHTTPBufferSize: DefaultMaxHTTPBufferSize,
|
||||||
|
MaxNetworkBufferSize: DefaultMaxNetworkBufferSize,
|
||||||
|
}
|
||||||
|
svc := NewCorrelationService(config, tp)
|
||||||
|
|
||||||
|
// B arrives
|
||||||
|
svc.ProcessEvent(&NormalizedEvent{
|
||||||
|
Source: SourceB, Timestamp: now, SrcIP: "10.9.9.9", SrcPort: 8888,
|
||||||
|
})
|
||||||
|
|
||||||
|
// A arrives 3s later -- beyond TimeWindow(2s), no match -> pending orphan
|
||||||
|
tp.now = now.Add(3 * time.Second)
|
||||||
|
svc.ProcessEvent(&NormalizedEvent{
|
||||||
|
Source: SourceA, Timestamp: tp.now, SrcIP: "10.9.9.9", SrcPort: 8888,
|
||||||
|
})
|
||||||
|
|
||||||
|
if _, exists := svc.pendingOrphans["10.9.9.9:8888"]; !exists {
|
||||||
|
t.Skip("A not in pendingOrphans (different code path) -- skipping")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance past B TTL (5s from t=0, now at t=6s)
|
||||||
|
tp.now = now.Add(6 * time.Second)
|
||||||
|
// Trigger cleanExpired via ProcessEvent with a dummy event
|
||||||
|
svc.ProcessEvent(&NormalizedEvent{
|
||||||
|
Source: SourceB, Timestamp: tp.now, SrcIP: "99.99.99.1", SrcPort: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
if _, exists := svc.pendingOrphans["10.9.9.9:8888"]; exists {
|
||||||
|
t.Errorf("Bug #3: pendingOrphans not purged after B TTL expired")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEmitPendingOrphans_PublicMethod verifies EmitPendingOrphans() emits orphans
|
||||||
|
// after the delay expires and nothing before. Regression test for Bug #2.
|
||||||
|
func TestEmitPendingOrphans_PublicMethod(t *testing.T) {
|
||||||
|
now := time.Date(2026, 3, 5, 16, 30, 0, 0, time.UTC)
|
||||||
|
tp := &mockTimeProvider{now: now}
|
||||||
|
|
||||||
|
config := CorrelationConfig{
|
||||||
|
TimeWindow: 10 * time.Second,
|
||||||
|
ApacheAlwaysEmit: true,
|
||||||
|
ApacheEmitDelayMs: 500,
|
||||||
|
NetworkTTLS: 120,
|
||||||
|
MatchingMode: MatchingModeOneToMany,
|
||||||
|
MaxHTTPBufferSize: DefaultMaxHTTPBufferSize,
|
||||||
|
MaxNetworkBufferSize: DefaultMaxNetworkBufferSize,
|
||||||
|
}
|
||||||
|
svc := NewCorrelationService(config, tp)
|
||||||
|
|
||||||
|
// A arrives with no B -> pending orphan
|
||||||
|
if results := svc.ProcessEvent(&NormalizedEvent{
|
||||||
|
Source: SourceA, Timestamp: now, SrcIP: "192.0.2.1", SrcPort: 4444,
|
||||||
|
}); len(results) != 0 {
|
||||||
|
t.Fatalf("expected 0 results (pending), got %d", len(results))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Before delay: nothing emitted
|
||||||
|
tp.now = now.Add(200 * time.Millisecond)
|
||||||
|
if emitted := svc.EmitPendingOrphans(); len(emitted) != 0 {
|
||||||
|
t.Errorf("before delay: expected 0 emitted orphans, got %d", len(emitted))
|
||||||
|
}
|
||||||
|
|
||||||
|
// After delay (>500ms): orphan emitted
|
||||||
|
tp.now = now.Add(600 * time.Millisecond)
|
||||||
|
emitted := svc.EmitPendingOrphans()
|
||||||
|
if len(emitted) != 1 {
|
||||||
|
t.Fatalf("after delay: expected 1 emitted orphan, got %d", len(emitted))
|
||||||
|
}
|
||||||
|
if emitted[0].Correlated {
|
||||||
|
t.Errorf("expected orphan (Correlated=false), got Correlated=true")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second call: nothing (already emitted)
|
||||||
|
if emitted2 := svc.EmitPendingOrphans(); len(emitted2) != 0 {
|
||||||
|
t.Errorf("second call: expected 0 (already emitted), got %d", len(emitted2))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -47,6 +47,11 @@ type CorrelationProcessor interface {
|
|||||||
// Flush forces emission of remaining buffered events.
|
// Flush forces emission of remaining buffered events.
|
||||||
Flush() []domain.CorrelatedLog
|
Flush() []domain.CorrelatedLog
|
||||||
|
|
||||||
|
// EmitPendingOrphans emits orphan A events whose delay has expired.
|
||||||
|
// Called periodically by the Orchestrator ticker so orphans are not blocked
|
||||||
|
// waiting for the next incoming event.
|
||||||
|
EmitPendingOrphans() []domain.CorrelatedLog
|
||||||
|
|
||||||
// GetBufferSizes returns the current buffer sizes for monitoring.
|
// GetBufferSizes returns the current buffer sizes for monitoring.
|
||||||
GetBufferSizes() (int, int)
|
GetBufferSizes() (int, int)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -145,6 +145,25 @@ exit 0
|
|||||||
%config(noreplace) /etc/logrotate.d/logcorrelator
|
%config(noreplace) /etc/logrotate.d/logcorrelator
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.14-1
|
||||||
|
- Fix(correlation/bug1): Keep-Alive sessions au-dela de TimeWindow ne correlent plus en orphelins
|
||||||
|
Le matcher dans processSourceA utilisait eventsMatch (comparaison de timestamps) en mode
|
||||||
|
one_to_many. Apres ~10s, B.Timestamp_original depasse la TimeWindow et toutes les requetes
|
||||||
|
suivantes devenaient orphelines. Nouveau matcher bEventHasValidTTL : un B event est valide
|
||||||
|
tant que son TTL n'a pas expire (le TTL est reset a chaque correlation Keep-Alive).
|
||||||
|
- Fix(correlation/bug4): checkPendingOrphansForCorrelation utilisait eventsMatch (meme bug)
|
||||||
|
En mode one_to_many, un B arrivant avec un vieux timestamp ne matchait plus les pending orphans
|
||||||
|
pour la meme cle. Remplace par une verification de cle uniquement (meme cle = meme connexion).
|
||||||
|
- Fix(correlation/bug3): pendingOrphans non purges quand le B expire (cleanNetworkBufferByTTL)
|
||||||
|
Quand un B event expirait (TTL), les pending orphan A associes etaient bloques indefiniment.
|
||||||
|
Ils sont desormais emis immediatement lors de l'expiration du B correspondant.
|
||||||
|
- Fix(correlation/bug2): orphans emis uniquement sur reception d'evenement (pas de timer dedie)
|
||||||
|
EmitPendingOrphans() est maintenant une methode publique thread-safe. L'Orchestrateur
|
||||||
|
demarre un goroutine ticker (250ms) qui appelle EmitPendingOrphans() independamment du flux,
|
||||||
|
garantissant l'emission meme en l'absence de nouveaux evenements.
|
||||||
|
- Feat(ports): ajout de EmitPendingOrphans() dans l'interface CorrelationProcessor
|
||||||
|
- Test: 4 nouveaux tests de non-regression (Bug #1, #2, #3, #4)
|
||||||
|
|
||||||
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.13-1
|
* Thu Mar 05 2026 logcorrelator <dev@example.com> - 1.1.13-1
|
||||||
- Fix: Unix sockets ne passent plus en root:root lors des restarts du service
|
- Fix: Unix sockets ne passent plus en root:root lors des restarts du service
|
||||||
- Fix: Ajout de RuntimeDirectory=logcorrelator dans le service systemd (systemd gère /run/logcorrelator avec le bon propriétaire à chaque démarrage/restart)
|
- Fix: Ajout de RuntimeDirectory=logcorrelator dans le service systemd (systemd gère /run/logcorrelator avec le bon propriétaire à chaque démarrage/restart)
|
||||||
|
|||||||
Reference in New Issue
Block a user