ja4-platform/services/ja4ebpf/internal/procutil/proc_lookup.go

// Package procutil fournit des utilitaires pour résoudre les informations de
// connexion réseau depuis le système de fichiers /proc.
// Utilisé comme fallback quand la sonde accept4 n'est pas disponible (ex: Docker).
package procutil

import (
	"bufio"
	"encoding/binary"
	"fmt"
	"net"
	"os"
	"strconv"
	"strings"
	"sync"
	"time"
)

// cacheEntry est une entrée du cache de résolution fd→IP.
type cacheEntry struct {
	IP        net.IP
	Port      uint16
	expiresAt time.Time
}

// FDCache résout un descripteur de fichier socket en adresse IP:port du client
// en interrogeant /proc. Les résultats sont mis en cache pour limiter les I/O.
type FDCache struct {
	mu    sync.Mutex
	cache map[fdKey]*cacheEntry
	ttl   time.Duration
}

// fdKey est la clé du cache : TGID (PID du groupe de threads) + fd.
type fdKey struct {
	tgid uint32
	fd   uint32
}

// NewFDCache crée un nouveau cache avec la durée de vie d'entrée spécifiée.
func NewFDCache(ttl time.Duration) *FDCache {
	c := &FDCache{
		cache: make(map[fdKey]*cacheEntry),
		ttl:   ttl,
	}
	// Purge périodique des entrées expirées
	go c.purgeLoop()
	return c
}

// Lookup retourne l'IP et le port du client pour un socket identifié par (tgid, fd).
// Consulte d'abord le cache, puis /proc si nécessaire.
func (c *FDCache) Lookup(tgid, fd uint32) (net.IP, uint16, error) {
	key := fdKey{tgid: tgid, fd: fd}

	c.mu.Lock()
	if e, ok := c.cache[key]; ok && time.Now().Before(e.expiresAt) {
		ip, port := e.IP, e.Port
		c.mu.Unlock()
		return ip, port, nil
	}
	c.mu.Unlock()

	// Résoudre depuis /proc
	ip, port, err := lookupFDPeer(tgid, fd)
	if err != nil {
		return nil, 0, err
	}

	c.mu.Lock()
	c.cache[key] = &cacheEntry{
		IP:        ip,
		Port:      port,
		expiresAt: time.Now().Add(c.ttl),
	}
	c.mu.Unlock()

	return ip, port, nil
}

// lookupFDPeer résout l'adresse du pair (client) pour un fd donné via /proc.
func lookupFDPeer(tgid, fd uint32) (net.IP, uint16, error) {
	// Lire le lien symbolique /proc/<tgid>/fd/<fd> → "socket:[inode]"
	linkPath := fmt.Sprintf("/proc/%d/fd/%d", tgid, fd)
	dest, err := os.Readlink(linkPath)
	if err != nil {
		return nil, 0, fmt.Errorf("readlink %s: %w", linkPath, err)
	}

	if !strings.HasPrefix(dest, "socket:[") || !strings.HasSuffix(dest, "]") {
		return nil, 0, fmt.Errorf("fd %d n'est pas un socket: %s", fd, dest)
	}

	inodeStr := dest[8 : len(dest)-1]
	inode, err := strconv.ParseUint(inodeStr, 10, 64)
	if err != nil {
		return nil, 0, fmt.Errorf("inode invalide '%s': %w", inodeStr, err)
	}

	// Chercher dans /proc/<tgid>/net/tcp (IPv4)
	ip, port, err := searchTCPTable(fmt.Sprintf("/proc/%d/net/tcp", tgid), inode, false)
	if err == nil {
		return ip, port, nil
	}

	// Fallback sur /proc/<tgid>/net/tcp6 (IPv6 et IPv4-mappé)
	ip, port, err = searchTCPTable(fmt.Sprintf("/proc/%d/net/tcp6", tgid), inode, true)
	if err == nil {
		return ip, port, nil
	}

	// Dernier recours : /proc/net/tcp (namespace réseau global)
	ip, port, err = searchTCPTable("/proc/net/tcp", inode, false)
	if err == nil {
		return ip, port, nil
	}

	return nil, 0, fmt.Errorf("inode %d introuvable dans les tables TCP", inode)
}

// searchTCPTable recherche un inode dans /proc/.../net/tcp ou tcp6.
// Retourne l'adresse du pair (remote = client) et son port.
func searchTCPTable(path string, inode uint64, isIPv6 bool) (net.IP, uint16, error) {
	f, err := os.Open(path)
	if err != nil {
		return nil, 0, err
	}
	defer f.Close()

	scanner := bufio.NewScanner(f)
	scanner.Scan() // sauter la ligne d'en-tête

	for scanner.Scan() {
		line := scanner.Text()
		fields := strings.Fields(line)
		if len(fields) < 10 {
			continue
		}

		// Le champ d'inode est en position 9
		lineInode, err := strconv.ParseUint(fields[9], 10, 64)
		if err != nil || lineInode != inode {
			continue
		}

		// Le champ remote_address est en position 2 : "AABBCCDD:PPPP"
		remAddr := fields[2]
		colonIdx := strings.Index(remAddr, ":")
		if colonIdx < 0 {
			continue
		}

		hexIP := remAddr[:colonIdx]
		hexPort := remAddr[colonIdx+1:]

		var ip net.IP
		if isIPv6 {
			ip, err = parseHexIPv6(hexIP)
		} else {
			ip, err = parseHexIPv4(hexIP)
		}
		if err != nil {
			continue
		}

		portVal, err := strconv.ParseUint(hexPort, 16, 16)
		if err != nil {
			continue
		}

		return ip, uint16(portVal), nil
	}

	return nil, 0, fmt.Errorf("inode %d non trouvé dans %s", inode, path)
}

// parseHexIPv4 décode une adresse IPv4 hex 8 caractères depuis /proc/net/tcp.
// Sur x86 little-endian, le noyau écrit l'adresse en ordre little-endian.
// Exemple : "0201010A" → 10.1.1.2
func parseHexIPv4(hexStr string) (net.IP, error) {
	if len(hexStr) != 8 {
		return nil, fmt.Errorf("adresse IPv4 hex invalide: %s", hexStr)
	}
	val, err := strconv.ParseUint(hexStr, 16, 32)
	if err != nil {
		return nil, err
	}
	ip := make(net.IP, 4)
	// Le noyau stocke en little-endian sur x86 → PutUint32 en little-endian reconstitue les octets
	binary.LittleEndian.PutUint32(ip, uint32(val))
	return ip, nil
}

// parseHexIPv6 décode une adresse IPv6 hex 32 caractères depuis /proc/net/tcp6.
// Gère aussi les adresses IPv4-mappées (::ffff:x.x.x.x).
func parseHexIPv6(hexStr string) (net.IP, error) {
	if len(hexStr) != 32 {
		return nil, fmt.Errorf("adresse IPv6 hex invalide: %s", hexStr)
	}

	// Les 32 caractères hex représentent 4 groupes de 4 octets en little-endian
	rawIP := make(net.IP, 16)
	for i := 0; i < 4; i++ {
		chunk := hexStr[i*8 : i*8+8]
		val, err := strconv.ParseUint(chunk, 16, 32)
		if err != nil {
			return nil, err
		}
		binary.LittleEndian.PutUint32(rawIP[i*4:], uint32(val))
	}

	// Détecter IPv4-mappé ::ffff:x.x.x.x
	if isIPv4MappedIPv6(rawIP) {
		return rawIP[12:].To4(), nil
	}

	return rawIP, nil
}

// isIPv4MappedIPv6 retourne true si l'adresse est une IPv4-mappée dans IPv6.
func isIPv4MappedIPv6(ip net.IP) bool {
	if len(ip) != 16 {
		return false
	}
	// ::ffff:x.x.x.x : les 10 premiers octets sont 0, puis FF FF, puis 4 octets IPv4
	for i := 0; i < 10; i++ {
		if ip[i] != 0 {
			return false
		}
	}
	return ip[10] == 0xff && ip[11] == 0xff
}

// purgeLoop nettoie périodiquement le cache des entrées expirées.
func (c *FDCache) purgeLoop() {
	ticker := time.NewTicker(30 * time.Second)
	defer ticker.Stop()
	for range ticker.C {
		c.mu.Lock()
		now := time.Now()
		for k, e := range c.cache {
			if now.After(e.expiresAt) {
				delete(c.cache, k)
			}
		}
		c.mu.Unlock()
	}
}