fix(ebpf): replace tracepoint with kretprobe for sys_exit_recvfrom

Fixes "permission denied" error when attaching tracepoint sys_exit_recvfrom
on Rocky Linux 9 (kernel 5.14+). The tracepoint exit has stricter permissions
than entry tracepoints.

Changes:
- BPF: SEC("tp/syscalls/sys_exit_recvfrom") → SEC("kretprobe/__x64_sys_recvfrom")
- BPF: Extract retval using PT_REGS_RC(ctx) instead of ctx->ret
- Loader: link.Tracepoint() → link.Kretprobe()
- Add nginxPidMap for filtering recvfrom calls by nginx PID

Validation:
- All HTTP fields captured without truncation (path up to 39 chars, query up to 244 chars)
- Custom headers (X-Request-ID, X-Custom-Header) fully captured
- Unit tests added and passing (TestKretprobeRecvfromAttachment, TestKretprobeVsTracepoint)
- ClickHouse validation complete: http_logs and http_logs_raw tables verified

Tested on:
- Rocky Linux 9 (kernel 5.14+)
- bpftool shows: kprobe name tp_sys_exit_recvfrom (kretprobe active)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-20 13:29:01 +02:00
parent 9e4bfe8289
commit 3e00e7bc7b
8 changed files with 1184 additions and 53 deletions

View File

@ -0,0 +1,161 @@
package loader
import (
"testing"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/rlimit"
)
// TestKretprobeRecvfromAttachment teste que le kretprobe sur __x64_sys_recvfrom
// peut s'attacher correctement, contrairement au tracepoint standard.
func TestKretprobeRecvfromAttachment(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test that requires kernel BPF")
}
// Supprimer la limite mémoire pour eBPF
if err := rlimit.RemoveMemlock(); err != nil {
t.Fatalf("Failed to remove memlock: %v", err)
}
// Charger les objets nginx BPF
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
t.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
// Tenter d'attacher le kretprobe
kp, err := link.Kretprobe("__x64_sys_recvfrom", objs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
t.Fatalf("Failed to attach kretprobe __x64_sys_recvfrom: %v", err)
}
defer kp.Close()
// Le kretprobe doit être attaché
t.Log("kretprobe __x64_sys_recvfrom attached successfully")
// Vérifier que le programme BPF est bien chargé
if objs.TpSysExitRecvfrom == nil {
t.Fatal("TpSysExitRecvfrom program is nil")
}
// Vérifier le type du programme (doit être Kprobe pour kretprobe)
info, err := objs.TpSysExitRecvfrom.Info()
if err != nil {
t.Fatalf("Failed to get program info: %v", err)
}
if info.Type != ebpf.Kprobe {
t.Errorf("Expected program type Kprobe, got %v", info.Type)
}
t.Logf("kretprobe __x64_sys_recvfrom validated: type=%v", info.Type)
}
// TestKretprobeVsTracepoint compare l'attachement entre tracepoint et kretprobe
func TestKretprobeVsTracepoint(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test that requires kernel BPF")
}
if err := rlimit.RemoveMemlock(); err != nil {
t.Fatalf("Failed to remove memlock: %v", err)
}
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
t.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
// Test 1: Tracepoint standard (doit échouer sur Rocky Linux 9)
t.Run("TracepointStandard", func(t *testing.T) {
tp, err := link.Tracepoint("syscalls", "sys_exit_recvfrom",
objs.TpSysExitRecvfrom, nil)
if err != nil {
t.Logf("Expected failure: tracepoint sys_exit_recvfrom failed: %v", err)
// C'est le comportement attendu sur Rocky Linux 9
return
}
tp.Close()
t.Error("Tracepoint sys_exit_recvfrom succeeded unexpectedly (should fail on Rocky 9)")
})
// Test 2: Kretprobe (doit réussir)
t.Run("KretprobeRecvfrom", func(t *testing.T) {
kp, err := link.Kretprobe("__x64_sys_recvfrom", objs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
t.Fatalf("kretprobe __x64_sys_recvfrom failed: %v (should succeed)", err)
}
defer kp.Close()
t.Log("kretprobe __x64_sys_recvfrom attached successfully")
})
}
// TestRecvfromEventStructure teste que la structure nginx_http_event
// est correctement définie pour le kretprobe.
func TestRecvfromEventStructure(t *testing.T) {
// Vérifier que la taille de la structure est correcte
const expectedSize = 426 // offset du champ data dans nginx_http_event
if testing.Short() {
t.Skip("Skipping BPF structure test")
}
if err := rlimit.RemoveMemlock(); err != nil {
t.Fatalf("Failed to remove memlock: %v", err)
}
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
t.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
// Vérifier que la map NginxBuf existe et a la bonne taille
nginxBuf := objs.NginxBuf
if nginxBuf == nil {
t.Fatal("NginxBuf map not found in BPF objects")
}
// La map doit être de type PERCPU_ARRAY
info, err := nginxBuf.Info()
if err != nil {
t.Fatalf("Failed to get __nginx_buf map info: %v", err)
}
if info.Type != ebpf.PerCPUArray {
t.Errorf("Expected PERCPU_ARRAY, got %v", info.Type)
}
if info.ValueSize < expectedSize {
t.Errorf("Expected value size >= %d, got %d", expectedSize, info.ValueSize)
}
t.Logf("nginx_http_event structure validated: size=%d bytes", info.ValueSize)
}
// BenchmarkKretprobeAttachment mesure le temps d'attachement du kretprobe
func BenchmarkKretprobeAttachment(b *testing.B) {
if err := rlimit.RemoveMemlock(); err != nil {
b.Fatalf("Failed to remove memlock: %v", err)
}
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
b.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
kp, err := link.Kretprobe("__x64_sys_recvfrom", objs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
b.Fatalf("Failed to attach kretprobe: %v", err)
}
kp.Close()
}
}