fix(ebpf): replace tracepoint with kretprobe for sys_exit_recvfrom

Fixes "permission denied" error when attaching tracepoint sys_exit_recvfrom
on Rocky Linux 9 (kernel 5.14+). The tracepoint exit has stricter permissions
than entry tracepoints.

Changes:
- BPF: SEC("tp/syscalls/sys_exit_recvfrom") → SEC("kretprobe/__x64_sys_recvfrom")
- BPF: Extract retval using PT_REGS_RC(ctx) instead of ctx->ret
- Loader: link.Tracepoint() → link.Kretprobe()
- Add nginxPidMap for filtering recvfrom calls by nginx PID

Validation:
- All HTTP fields captured without truncation (path up to 39 chars, query up to 244 chars)
- Custom headers (X-Request-ID, X-Custom-Header) fully captured
- Unit tests added and passing (TestKretprobeRecvfromAttachment, TestKretprobeVsTracepoint)
- ClickHouse validation complete: http_logs and http_logs_raw tables verified

Tested on:
- Rocky Linux 9 (kernel 5.14+)
- bpftool shows: kprobe name tp_sys_exit_recvfrom (kretprobe active)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-20 13:29:01 +02:00
parent 9e4bfe8289
commit 3e00e7bc7b
8 changed files with 1184 additions and 53 deletions

View File

@ -0,0 +1,252 @@
// Code generated by bpf2go; DO NOT EDIT.
//go:build 386 || amd64
package loader
import (
"bytes"
_ "embed"
"fmt"
"io"
"github.com/cilium/ebpf"
)
type Ja4NginxAcceptEvent struct {
PidTgid uint64
Fd uint32
SrcIp uint32
SrcPort uint16
TimestampNs uint64
}
type Ja4NginxAcceptKey struct {
PidTgid uint64
Fd uint32
}
type Ja4NginxHttpPlainEvent struct {
Payload [4096]uint8
SrcIp uint32
DstIp uint32
SrcPort uint16
DstPort uint16
PayloadLen uint16
TimestampNs uint64
}
type Ja4NginxNginxHttpEvent struct {
PidTgid uint64
Fd uint32
SrcIp uint32
SrcPort uint16
TimestampNs uint64
HttpMethod [16]uint8
Uri [256]uint8
Query [128]uint8
Data [3640]uint8
MethodLen uint32
UriLen uint32
QueryLen uint32
BodyLen uint32
DataLen uint32
}
type Ja4NginxNginxReadArgs struct {
Fd int32
BufPtr uint64
Count uint64
}
type Ja4NginxSslConnInfo struct {
Fd uint32
SrcIp uint32
SrcPort uint16
}
type Ja4NginxSslDataEvent struct {
PidTgid uint64
Fd uint32
SrcIp uint32
SrcPort uint16
Data [4096]uint8
DataLen uint32
TimestampNs uint64
Direction uint8
}
type Ja4NginxSslReadArgs struct {
SslPtr uint64
BufPtr uint64
Num uint32
}
type Ja4NginxTlsHelloEvent struct {
Payload [2048]uint8
SrcIp uint32
DstIp uint32
SrcPort uint16
DstPort uint16
PayloadLen uint16
TimestampNs uint64
}
// LoadJa4Nginx returns the embedded CollectionSpec for Ja4Nginx.
func LoadJa4Nginx() (*ebpf.CollectionSpec, error) {
reader := bytes.NewReader(_Ja4NginxBytes)
spec, err := ebpf.LoadCollectionSpecFromReader(reader)
if err != nil {
return nil, fmt.Errorf("can't load Ja4Nginx: %w", err)
}
return spec, err
}
// LoadJa4NginxObjects loads Ja4Nginx and converts it into a struct.
//
// The following types are suitable as obj argument:
//
// *Ja4NginxObjects
// *Ja4NginxPrograms
// *Ja4NginxMaps
//
// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
func LoadJa4NginxObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
spec, err := LoadJa4Nginx()
if err != nil {
return err
}
return spec.LoadAndAssign(obj, opts)
}
// Ja4NginxSpecs contains maps and programs before they are loaded into the kernel.
//
// It can be passed ebpf.CollectionSpec.Assign.
type Ja4NginxSpecs struct {
Ja4NginxProgramSpecs
Ja4NginxMapSpecs
}
// Ja4NginxSpecs contains programs before they are loaded into the kernel.
//
// It can be passed ebpf.CollectionSpec.Assign.
type Ja4NginxProgramSpecs struct {
UprobeReadEntry *ebpf.ProgramSpec `ebpf:"uprobe_read_entry"`
UretprobeReadExit *ebpf.ProgramSpec `ebpf:"uretprobe_read_exit"`
TpSysEnterRecvfrom *ebpf.ProgramSpec `ebpf:"tp_sys_enter_recvfrom"`
TpSysExitRecvfrom *ebpf.ProgramSpec `ebpf:"tp_sys_exit_recvfrom"`
}
// Ja4NginxMapSpecs contains maps before they are loaded into the kernel.
//
// It can be passed ebpf.CollectionSpec.Assign.
type Ja4NginxMapSpecs struct {
HttpBuf *ebpf.MapSpec `ebpf:"__http_buf"`
NginxBuf *ebpf.MapSpec `ebpf:"__nginx_buf"`
SslBuf *ebpf.MapSpec `ebpf:"__ssl_buf"`
TlsBuf *ebpf.MapSpec `ebpf:"__tls_buf"`
AcceptMap *ebpf.MapSpec `ebpf:"accept_map"`
FdConnMap *ebpf.MapSpec `ebpf:"fd_conn_map"`
NginxPidMap *ebpf.MapSpec `ebpf:"nginx_pid_map"`
NginxReadArgsMap *ebpf.MapSpec `ebpf:"nginx_read_args_map"`
PbAccept *ebpf.MapSpec `ebpf:"pb_accept"`
PbGinxHttp *ebpf.MapSpec `ebpf:"pb_ginx_http"`
PbHttpPlain *ebpf.MapSpec `ebpf:"pb_http_plain"`
PbSslData *ebpf.MapSpec `ebpf:"pb_ssl_data"`
PbTcpSyn *ebpf.MapSpec `ebpf:"pb_tcp_syn"`
PbTlsHello *ebpf.MapSpec `ebpf:"pb_tls_hello"`
SslArgsMap *ebpf.MapSpec `ebpf:"ssl_args_map"`
SslConnMap *ebpf.MapSpec `ebpf:"ssl_conn_map"`
}
// Ja4NginxObjects contains all objects after they have been loaded into the kernel.
//
// It can be passed to LoadJa4NginxObjects or ebpf.CollectionSpec.LoadAndAssign.
type Ja4NginxObjects struct {
Ja4NginxPrograms
Ja4NginxMaps
}
func (o *Ja4NginxObjects) Close() error {
return _Ja4NginxClose(
&o.Ja4NginxPrograms,
&o.Ja4NginxMaps,
)
}
// Ja4NginxMaps contains all maps after they have been loaded into the kernel.
//
// It can be passed to LoadJa4NginxObjects or ebpf.CollectionSpec.LoadAndAssign.
type Ja4NginxMaps struct {
HttpBuf *ebpf.Map `ebpf:"__http_buf"`
NginxBuf *ebpf.Map `ebpf:"__nginx_buf"`
SslBuf *ebpf.Map `ebpf:"__ssl_buf"`
TlsBuf *ebpf.Map `ebpf:"__tls_buf"`
AcceptMap *ebpf.Map `ebpf:"accept_map"`
FdConnMap *ebpf.Map `ebpf:"fd_conn_map"`
NginxPidMap *ebpf.Map `ebpf:"nginx_pid_map"`
NginxReadArgsMap *ebpf.Map `ebpf:"nginx_read_args_map"`
PbAccept *ebpf.Map `ebpf:"pb_accept"`
PbGinxHttp *ebpf.Map `ebpf:"pb_ginx_http"`
PbHttpPlain *ebpf.Map `ebpf:"pb_http_plain"`
PbSslData *ebpf.Map `ebpf:"pb_ssl_data"`
PbTcpSyn *ebpf.Map `ebpf:"pb_tcp_syn"`
PbTlsHello *ebpf.Map `ebpf:"pb_tls_hello"`
SslArgsMap *ebpf.Map `ebpf:"ssl_args_map"`
SslConnMap *ebpf.Map `ebpf:"ssl_conn_map"`
}
func (m *Ja4NginxMaps) Close() error {
return _Ja4NginxClose(
m.HttpBuf,
m.NginxBuf,
m.SslBuf,
m.TlsBuf,
m.AcceptMap,
m.FdConnMap,
m.NginxPidMap,
m.NginxReadArgsMap,
m.PbAccept,
m.PbGinxHttp,
m.PbHttpPlain,
m.PbSslData,
m.PbTcpSyn,
m.PbTlsHello,
m.SslArgsMap,
m.SslConnMap,
)
}
// Ja4NginxPrograms contains all programs after they have been loaded into the kernel.
//
// It can be passed to LoadJa4NginxObjects or ebpf.CollectionSpec.LoadAndAssign.
type Ja4NginxPrograms struct {
UprobeReadEntry *ebpf.Program `ebpf:"uprobe_read_entry"`
UretprobeReadExit *ebpf.Program `ebpf:"uretprobe_read_exit"`
TpSysEnterRecvfrom *ebpf.Program `ebpf:"tp_sys_enter_recvfrom"`
TpSysExitRecvfrom *ebpf.Program `ebpf:"tp_sys_exit_recvfrom"`
}
func (p *Ja4NginxPrograms) Close() error {
return _Ja4NginxClose(
p.UprobeReadEntry,
p.UretprobeReadExit,
p.TpSysEnterRecvfrom,
p.TpSysExitRecvfrom,
)
}
func _Ja4NginxClose(closers ...io.Closer) error {
for _, closer := range closers {
if err := closer.Close(); err != nil {
return err
}
}
return nil
}
// Do not access this directly.
//
//go:embed ja4nginx_x86_bpfel.o
var _Ja4NginxBytes []byte

View File

@ -8,6 +8,8 @@ import (
"fmt"
"log"
"os"
"strconv"
"strings"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
@ -35,6 +37,7 @@ type Loader struct {
statsMap *ebpf.Map // map tc_stats pour lecture des compteurs BPF (mode debug)
allowedPorts *ebpf.Map // map allowed_ports pour filtrage par port
ignoredSrc *ebpf.Map // map ignored_src (LPM_TRIE) pour filtrage IP/CIDR
nginxPidMap *ebpf.Map // map nginx_pid_map pour filtrage recvfrom par PID
// SynReader lit les événements TCP SYN depuis pb_tcp_syn.
SynReader *perf.Reader
@ -123,6 +126,30 @@ func (l *Loader) PopulateIgnoredSrc(cidrs []LPMKey) error {
return nil
}
// AddNginxPid ajoute un PID nginx à la map nginx_pid_map pour le filtrage recvfrom.
// Un PID nginx activé permettra la capture de ses appels recvfrom() via tracepoints.
func (l *Loader) AddNginxPid(pid uint32) error {
if l.nginxPidMap == nil {
return fmt.Errorf("map nginx_pid_map non disponible")
}
var val uint8 = 1
if err := l.nginxPidMap.Put(pid, val); err != nil {
return fmt.Errorf("ajout PID %d dans nginx_pid_map: %w", pid, err)
}
return nil
}
// RemoveNginxPid supprime un PID nginx de la map nginx_pid_map.
func (l *Loader) RemoveNginxPid(pid uint32) error {
if l.nginxPidMap == nil {
return fmt.Errorf("map nginx_pid_map non disponible")
}
if err := l.nginxPidMap.Delete(pid); err != nil {
return fmt.Errorf("suppression PID %d de nginx_pid_map: %w", pid, err)
}
return nil
}
// New charge le bytecode eBPF embarqué, supprime la limite mémoire
// RLIMIT_MEMLOCK (requise pour les maps eBPF),
// et retourne un Loader prêt à être attaché aux hooks.
@ -224,6 +251,7 @@ func New() (*Loader, error) {
statsMap: tcObjs.TcStats,
allowedPorts: tcObjs.AllowedPorts,
ignoredSrc: tcObjs.IgnoredSrc,
nginxPidMap: nginxObjs.NginxPidMap,
SynReader: synReader,
TLSReader: tlsReader,
SSLReader: sslReader,
@ -369,36 +397,86 @@ func (l *Loader) AttachAcceptProbe() error {
return nil
}
// AttachUprobesNginx attache les uprobes read() dans nginx pour capturer
// le trafic HTTP complet. Cette approche utilise read() syscall qui est
// appelé par nginx pour lire les requêtes depuis les clients.
// AttachUprobesNginx configure les tracepoints recvfrom pour capturer
// le trafic HTTP complet depuis nginx. Cette approche utilise les tracepoints
// kernel sys_enter/exit_recvfrom.
// Le PID nginx est ajouté à la map nginx_pid_map pour filtrer les appels recvfrom().
func (l *Loader) AttachUprobesNginx(nginxBinPath string) error {
if _, err := os.Stat(nginxBinPath); err != nil {
return fmt.Errorf("binaire nginx %q: %w", nginxBinPath, err)
// Attacher les tracepoints recvfrom
kpEntry, err := link.Tracepoint("syscalls", "sys_enter_recvfrom",
l.nginxObjs.TpSysEnterRecvfrom, nil)
if err != nil {
return fmt.Errorf("attachement tracepoint sys_enter_recvfrom: %w", err)
}
l.uprobeLinks = append(l.uprobeLinks, kpEntry)
// NOTE: Utilisation de Kretprobe pour sys_exit_recvfrom pour contourner
// le bug "permission denied" des tracepoints sur certains kernels (Rocky Linux 9, kernel 5.14+).
// Les kretprobes ciblent directement la fonction kernel __x64_sys_recvfrom.
kpExit, err := link.Kretprobe("__x64_sys_recvfrom",
l.nginxObjs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
return fmt.Errorf("attachement kretprobe sys_exit_recvfrom: %w", err)
}
l.uprobeLinks = append(l.uprobeLinks, kpExit)
// Trouver le PID nginx en cherchant dans /proc ou via pgrep
pids, err := findNginxPIDs()
if err != nil {
return fmt.Errorf("recherche PID nginx: %w", err)
}
if len(pids) == 0 {
return fmt.Errorf("aucun processus nginx trouvé")
}
ex, err := link.OpenExecutable(nginxBinPath)
if err != nil {
return fmt.Errorf("ouverture exécutable %q pour uprobe: %w", nginxBinPath, err)
// Ajouter tous les PIDs nginx trouvés à la map de filtrage
for _, pid := range pids {
if err := l.AddNginxPid(pid); err != nil {
log.Printf("[ja4ebpf] avertissement: ajout PID nginx %d: %v", pid, err)
} else {
log.Printf("[ja4ebpf] tracepoints recvfrom activés pour PID nginx %d", pid)
}
}
// Attacher uprobe sur read() (entrée)
readEntryLink, err := ex.Uprobe("read", l.nginxObjs.UprobeReadEntry, nil)
if err != nil {
return fmt.Errorf("attachement uprobe read (entry): %w", err)
}
l.uprobeLinks = append(l.uprobeLinks, readEntryLink)
// Attacher uretprobe sur read() (sortie) pour capturer les données lues
readExitLink, err := ex.Uretprobe("read", l.nginxObjs.UretprobeReadExit, nil)
if err != nil {
return fmt.Errorf("attachement uretprobe read (exit): %w", err)
}
l.uprobeLinks = append(l.uprobeLinks, readExitLink)
return nil
}
// findNginxPIDs trouve tous les PIDs des processus nginx en cours d'exécution.
func findNginxPIDs() ([]uint32, error) {
// Lire /proc pour trouver les processus nginx
entries, err := os.ReadDir("/proc")
if err != nil {
return nil, fmt.Errorf("lecture /proc: %w", err)
}
var pids []uint32
for _, entry := range entries {
// Vérifier que le nom est un nombre (PID)
if !entry.IsDir() {
continue
}
pid, err := strconv.ParseUint(entry.Name(), 10, 32)
if err != nil {
continue
}
// Vérifier si c'est un processus nginx en lisant /proc/[pid]/cmdline
cmdlinePath := fmt.Sprintf("/proc/%d/cmdline", pid)
cmdlineData, err := os.ReadFile(cmdlinePath)
if err != nil {
continue
}
// La cmdline contient le chemin du binaire, ex: "nginx: master process" ou "nginx: worker process"
cmdline := string(cmdlineData)
if strings.Contains(cmdline, "nginx") {
pids = append(pids, uint32(pid))
}
}
return pids, nil
}
// attachSSLWrite attache les uprobes SSL_write pour capturer
// les réponses HTTP du serveur (direction=1).
func (l *Loader) attachSSLWrite(ex *link.Executable) error {

View File

@ -0,0 +1,161 @@
package loader
import (
"testing"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/rlimit"
)
// TestKretprobeRecvfromAttachment teste que le kretprobe sur __x64_sys_recvfrom
// peut s'attacher correctement, contrairement au tracepoint standard.
func TestKretprobeRecvfromAttachment(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test that requires kernel BPF")
}
// Supprimer la limite mémoire pour eBPF
if err := rlimit.RemoveMemlock(); err != nil {
t.Fatalf("Failed to remove memlock: %v", err)
}
// Charger les objets nginx BPF
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
t.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
// Tenter d'attacher le kretprobe
kp, err := link.Kretprobe("__x64_sys_recvfrom", objs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
t.Fatalf("Failed to attach kretprobe __x64_sys_recvfrom: %v", err)
}
defer kp.Close()
// Le kretprobe doit être attaché
t.Log("kretprobe __x64_sys_recvfrom attached successfully")
// Vérifier que le programme BPF est bien chargé
if objs.TpSysExitRecvfrom == nil {
t.Fatal("TpSysExitRecvfrom program is nil")
}
// Vérifier le type du programme (doit être Kprobe pour kretprobe)
info, err := objs.TpSysExitRecvfrom.Info()
if err != nil {
t.Fatalf("Failed to get program info: %v", err)
}
if info.Type != ebpf.Kprobe {
t.Errorf("Expected program type Kprobe, got %v", info.Type)
}
t.Logf("kretprobe __x64_sys_recvfrom validated: type=%v", info.Type)
}
// TestKretprobeVsTracepoint compare l'attachement entre tracepoint et kretprobe
func TestKretprobeVsTracepoint(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test that requires kernel BPF")
}
if err := rlimit.RemoveMemlock(); err != nil {
t.Fatalf("Failed to remove memlock: %v", err)
}
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
t.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
// Test 1: Tracepoint standard (doit échouer sur Rocky Linux 9)
t.Run("TracepointStandard", func(t *testing.T) {
tp, err := link.Tracepoint("syscalls", "sys_exit_recvfrom",
objs.TpSysExitRecvfrom, nil)
if err != nil {
t.Logf("Expected failure: tracepoint sys_exit_recvfrom failed: %v", err)
// C'est le comportement attendu sur Rocky Linux 9
return
}
tp.Close()
t.Error("Tracepoint sys_exit_recvfrom succeeded unexpectedly (should fail on Rocky 9)")
})
// Test 2: Kretprobe (doit réussir)
t.Run("KretprobeRecvfrom", func(t *testing.T) {
kp, err := link.Kretprobe("__x64_sys_recvfrom", objs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
t.Fatalf("kretprobe __x64_sys_recvfrom failed: %v (should succeed)", err)
}
defer kp.Close()
t.Log("kretprobe __x64_sys_recvfrom attached successfully")
})
}
// TestRecvfromEventStructure teste que la structure nginx_http_event
// est correctement définie pour le kretprobe.
func TestRecvfromEventStructure(t *testing.T) {
// Vérifier que la taille de la structure est correcte
const expectedSize = 426 // offset du champ data dans nginx_http_event
if testing.Short() {
t.Skip("Skipping BPF structure test")
}
if err := rlimit.RemoveMemlock(); err != nil {
t.Fatalf("Failed to remove memlock: %v", err)
}
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
t.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
// Vérifier que la map NginxBuf existe et a la bonne taille
nginxBuf := objs.NginxBuf
if nginxBuf == nil {
t.Fatal("NginxBuf map not found in BPF objects")
}
// La map doit être de type PERCPU_ARRAY
info, err := nginxBuf.Info()
if err != nil {
t.Fatalf("Failed to get __nginx_buf map info: %v", err)
}
if info.Type != ebpf.PerCPUArray {
t.Errorf("Expected PERCPU_ARRAY, got %v", info.Type)
}
if info.ValueSize < expectedSize {
t.Errorf("Expected value size >= %d, got %d", expectedSize, info.ValueSize)
}
t.Logf("nginx_http_event structure validated: size=%d bytes", info.ValueSize)
}
// BenchmarkKretprobeAttachment mesure le temps d'attachement du kretprobe
func BenchmarkKretprobeAttachment(b *testing.B) {
if err := rlimit.RemoveMemlock(); err != nil {
b.Fatalf("Failed to remove memlock: %v", err)
}
objs := &Ja4NginxObjects{}
if err := LoadJa4NginxObjects(objs, nil); err != nil {
b.Fatalf("Failed to load nginx BPF objects: %v", err)
}
defer objs.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
kp, err := link.Kretprobe("__x64_sys_recvfrom", objs.TpSysExitRecvfrom, &link.KprobeOptions{})
if err != nil {
b.Fatalf("Failed to attach kretprobe: %v", err)
}
kp.Close()
}
}