- Add multi-interface TC attachment (default "any" = all UP interfaces) - Add BPF LPM_TRIE map ignored_src for kernel-side CIDR filtering - Add userspace ignore_src filtering for SSL/accept4 path via net.IPNet.Contains() - Add AcceptCache for fd→SessionKey correlation with TTL and Close() - Add 5 test files covering writer, procutil, dispatcher, accept_cache, and cmd - Fix formatTCPOptions infinite loop on EOL (case 0 break→return) - Fix pseudoOrderToShort panic on empty slice (negative cap) - Fix AcceptCache goroutine leak (add done channel + Close()) - Update config.yml.example with interfaces, listen_ports, ignore_src - Rewrite docs/services/ja4ebpf.md (was massively stale: XDP, RingBuffer, etc.) - Fix stale XDP/RingBuffer references in docs/architecture.md, thesis, tls.go Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
374 lines
13 KiB
C
374 lines
13 KiB
C
/* ============================================================================
|
|
* tc_capture.c — Programme TC ingress : capture des TCP SYN, TLS ClientHello
|
|
* et HTTP en clair
|
|
*
|
|
* Hook TC ingress (clsact qdisc) compatible kernel 4.1+.
|
|
* Émet via bpf_perf_event_output() (kernel 4.4+) pour compatibilité maximale.
|
|
*
|
|
* IMPORTANT : Ce programme n'utilise AUCUN accès direct au paquet (data/data_end).
|
|
* Toutes les lectures se font via bpf_skb_load_bytes() (kernel 4.5+) avec des
|
|
* tailles constantes, pour compatibilité avec le vérificateur kernel 4.18 qui
|
|
* rejette "math between pkt pointer and register with unbounded min value".
|
|
*
|
|
* Les copies de payload utilisent bpf_skb_load_bytes() avec &= (2^n - 1)
|
|
* pour borner la taille per le vérificateur.
|
|
* Les structs > 512o utilisent un PERCPU_ARRAY temporaire (stack limit eBPF).
|
|
* ============================================================================ */
|
|
|
|
#include "vmlinux.h"
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_endian.h>
|
|
#include <bpf/bpf_core_read.h>
|
|
#include "bpf_types.h"
|
|
|
|
/* Constantes */
|
|
#define ETH_P_IP 0x0800
|
|
#define ETH_HLEN 14
|
|
#define IPPROTO_TCP 6
|
|
#define IP_DF 0x4000
|
|
#define TH_SYN 0x02
|
|
#define TH_ACK 0x10
|
|
#define TH_FIN 0x01
|
|
#define TH_RST 0x04
|
|
#define HTTPS_PORT 443
|
|
#define HTTP_PORT 80
|
|
#define HTTP_ALT_PORT 8080
|
|
#define TLS_CONTENT_HANDSHAKE 0x16
|
|
#define TLS_MSG_CLIENT_HELLO 0x01
|
|
#define MAX_TLS_PAYLOAD 2048
|
|
#define MAX_HTTP_PAYLOAD 1024
|
|
#define MAX_TCP_OPTIONS 40
|
|
|
|
/* Counter map for debug */
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
|
__uint(max_entries, 7);
|
|
__type(key, __u32);
|
|
__type(value, __u64);
|
|
} tc_stats SEC(".maps");
|
|
|
|
/* Map de ports autorisés — peuplée depuis Go au démarrage.
|
|
* key = port (uint16), value = 1 (autorisé).
|
|
* Ports non présents dans la map sont ignorés. */
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(max_entries, 64);
|
|
__type(key, __u16);
|
|
__type(value, __u8);
|
|
} allowed_ports SEC(".maps");
|
|
|
|
/* Map LPM_TRIE des CIDR/IP sources à ignorer — peuplée depuis Go.
|
|
* key = {prefixlen, ip[4]} (8 octets), value = 1 (ignorer).
|
|
* Un lookup réussi = IP source à ignorer → return TC_ACT_OK.
|
|
* data est en network byte order (big-endian) pour correspondre à iph.saddr. */
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_LPM_TRIE);
|
|
__uint(max_entries, 256);
|
|
__type(key, struct { __u32 prefixlen; __u8 data[4]; });
|
|
__type(value, __u8);
|
|
__uint(map_flags, BPF_F_NO_PREALLOC);
|
|
} ignored_src SEC(".maps");
|
|
|
|
#define STAT_TOTAL 0
|
|
#define STAT_IPV4 1
|
|
#define STAT_TCP 2
|
|
#define STAT_SYN 3
|
|
#define STAT_SYN_SUBMIT 4
|
|
#define STAT_TLS_SUBMIT 5
|
|
#define STAT_HTTP_SUBMIT 6
|
|
|
|
/* ---------------------------------------------------------------------------
|
|
* capture_tc — Point d'entrée TC ingress (clsact)
|
|
*
|
|
* AUCUN accès direct au paquet. Tout via bpf_skb_load_bytes() + tailles constantes.
|
|
* Compatible vérificateur kernel 4.18.
|
|
* ---------------------------------------------------------------------------*/
|
|
SEC("tc")
|
|
int capture_tc(struct __sk_buff *ctx)
|
|
{
|
|
__u32 key;
|
|
__u64 *cnt;
|
|
__u32 pkt_len = ctx->len;
|
|
|
|
key = STAT_TOTAL;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
|
|
/* --- Ethernet : vérifier type IPv4 --- */
|
|
if (pkt_len < ETH_HLEN + 20 + 20)
|
|
return TC_ACT_OK;
|
|
|
|
__be16 h_proto;
|
|
bpf_skb_load_bytes(ctx, 12, &h_proto, 2);
|
|
if (h_proto != bpf_htons(ETH_P_IP))
|
|
return TC_ACT_OK;
|
|
|
|
/* --- IPv4 : lire le header (20 octets min) --- */
|
|
key = STAT_IPV4;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
|
|
struct iphdr iph;
|
|
bpf_skb_load_bytes(ctx, ETH_HLEN, &iph, sizeof(iph));
|
|
|
|
if (iph.protocol != IPPROTO_TCP)
|
|
return TC_ACT_OK;
|
|
|
|
__u32 ihl = iph.ihl & 0x0F;
|
|
if (ihl < 5 || ihl > 15)
|
|
return TC_ACT_OK;
|
|
__u32 ip_hlen = ihl << 2;
|
|
if (ip_hlen < 20 || ip_hlen > 60)
|
|
return TC_ACT_OK;
|
|
|
|
__u32 src_ip = iph.saddr;
|
|
__u32 dst_ip = iph.daddr;
|
|
__u8 ttl = iph.ttl;
|
|
__u16 ip_id = bpf_ntohs(iph.id);
|
|
__u16 frag_off = bpf_ntohs(iph.frag_off);
|
|
__u8 df_bit = (frag_off & IP_DF) ? 1 : 0;
|
|
|
|
/* --- TCP : lire le header (20 octets) --- */
|
|
__u32 tcp_off = ETH_HLEN + ip_hlen;
|
|
if (pkt_len < tcp_off + 20)
|
|
return TC_ACT_OK;
|
|
|
|
key = STAT_TCP;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
|
|
struct tcphdr tcph;
|
|
bpf_skb_load_bytes(ctx, tcp_off, &tcph, sizeof(tcph));
|
|
|
|
__u16 src_port = bpf_ntohs(tcph.source);
|
|
__u16 dst_port = bpf_ntohs(tcph.dest);
|
|
__u16 window = bpf_ntohs(tcph.window);
|
|
|
|
__u8 tcp_flags = 0;
|
|
if (tcph.syn) tcp_flags |= TH_SYN;
|
|
if (tcph.ack) tcp_flags |= TH_ACK;
|
|
if (tcph.fin) tcp_flags |= TH_FIN;
|
|
if (tcph.rst) tcp_flags |= TH_RST;
|
|
|
|
__u32 doff = tcph.doff;
|
|
if (doff < 5 || doff > 15)
|
|
return TC_ACT_OK;
|
|
__u32 tcp_hlen = doff << 2;
|
|
if (tcp_hlen < 20 || tcp_hlen > 60)
|
|
return TC_ACT_OK;
|
|
|
|
__u32 payload_off = ETH_HLEN + ip_hlen + tcp_hlen;
|
|
|
|
__u32 avail = 0;
|
|
__u32 zero = 0;
|
|
|
|
/* Vérification globale : port autorisé ? (SYN, TLS, HTTP)
|
|
* On autorise si dst_port OU src_port est dans allowed_ports.
|
|
* En ingress TC, les réponses ont src_port=80/443 (serveur distant)
|
|
* et dst_port=ephemeral (client local). */
|
|
__u8 *port_allowed = bpf_map_lookup_elem(&allowed_ports, &dst_port);
|
|
if (!port_allowed) {
|
|
port_allowed = bpf_map_lookup_elem(&allowed_ports, &src_port);
|
|
if (!port_allowed)
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
/* Vérification : IP source ignorée ? (LPM_TRIE lookup /32) */
|
|
struct { __u32 prefixlen; __u8 data[4]; } lpm_key = {};
|
|
lpm_key.prefixlen = 32;
|
|
/* Copier src_ip (network byte order) dans data[4] byte par byte.
|
|
* src_ip est en network byte order (big-endian) depuis iph.saddr.
|
|
* Sur x86 little-endian, il faut extraire du MSB vers le LSB
|
|
* pour que data[] soit en network byte order comme les clés Go. */
|
|
__u32 src_ip_h = bpf_ntohl(src_ip);
|
|
lpm_key.data[0] = (__u8)((src_ip_h >> 24) & 0xFF);
|
|
lpm_key.data[1] = (__u8)((src_ip_h >> 16) & 0xFF);
|
|
lpm_key.data[2] = (__u8)((src_ip_h >> 8) & 0xFF);
|
|
lpm_key.data[3] = (__u8)(src_ip_h & 0xFF);
|
|
__u8 *src_ignored = bpf_map_lookup_elem(&ignored_src, &lpm_key);
|
|
if (src_ignored)
|
|
return TC_ACT_OK;
|
|
|
|
/* ===================================================================
|
|
* TCP SYN
|
|
* ===================================================================*/
|
|
if ((tcp_flags & TH_SYN) && !(tcp_flags & TH_ACK)) {
|
|
key = STAT_SYN;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
|
|
struct tcp_syn_event evt = {};
|
|
evt.src_ip = bpf_ntohl(src_ip);
|
|
evt.dst_ip = bpf_ntohl(dst_ip);
|
|
evt.src_port = src_port;
|
|
evt.dst_port = dst_port;
|
|
evt.ttl = ttl;
|
|
evt.df_bit = df_bit;
|
|
evt.ip_id = ip_id;
|
|
evt.ip_total_length = bpf_ntohs(iph.tot_len);
|
|
evt.window_size = window;
|
|
evt.window_scale = 0xFF;
|
|
evt.mss = 0;
|
|
evt.timestamp_ns = bpf_ktime_get_ns();
|
|
evt.tcp_options_len = 0;
|
|
|
|
/* Copie des options TCP via bpf_skb_load_bytes avec cascade de tailles.
|
|
* Le vérificateur BPF exige une taille constante pour bpf_skb_load_bytes.
|
|
* On essaie 40, puis 20, puis 10 octets — le premier appel qui réussit
|
|
* donne les options disponibles (même partielles). */
|
|
__u32 opts_off = tcp_off + 20;
|
|
__u32 opts_len = tcp_hlen - 20;
|
|
if (opts_len > 0 && opts_len <= MAX_TCP_OPTIONS) {
|
|
if (opts_off + 40 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, opts_off, evt.tcp_options_raw, 40);
|
|
evt.tcp_options_len = (__u8)opts_len;
|
|
} else if (opts_off + 20 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, opts_off, evt.tcp_options_raw, 20);
|
|
evt.tcp_options_len = (__u8)(opts_len > 20 ? 20 : opts_len);
|
|
} else if (opts_off + 10 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, opts_off, evt.tcp_options_raw, 10);
|
|
evt.tcp_options_len = (__u8)(opts_len > 10 ? 10 : opts_len);
|
|
}
|
|
}
|
|
|
|
bpf_perf_event_output(ctx, &pb_tcp_syn, BPF_F_CURRENT_CPU,
|
|
&evt, sizeof(evt));
|
|
|
|
key = STAT_SYN_SUBMIT;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
}
|
|
|
|
/* ===================================================================
|
|
* TLS ClientHello
|
|
* ===================================================================*/
|
|
/* Lire les 6 premiers octets du payload pour vérifier le type TLS */
|
|
if (payload_off + 6 > pkt_len)
|
|
goto try_http;
|
|
|
|
__u8 tls_hdr[6];
|
|
bpf_skb_load_bytes(ctx, payload_off, tls_hdr, 6);
|
|
|
|
if (tls_hdr[0] != TLS_CONTENT_HANDSHAKE || tls_hdr[5] != TLS_MSG_CLIENT_HELLO)
|
|
goto try_http;
|
|
|
|
/* Avail via pkt_len (scalaire pur) */
|
|
avail = 0;
|
|
if (pkt_len > payload_off) {
|
|
avail = pkt_len - payload_off;
|
|
if (avail > MAX_TLS_PAYLOAD)
|
|
avail = MAX_TLS_PAYLOAD;
|
|
}
|
|
if (avail == 0)
|
|
return TC_ACT_OK;
|
|
|
|
struct tls_hello_event *tls_evt = bpf_map_lookup_elem(&__tls_buf, &zero);
|
|
if (!tls_evt)
|
|
return TC_ACT_OK;
|
|
|
|
tls_evt->src_ip = 0;
|
|
tls_evt->dst_ip = 0;
|
|
tls_evt->src_port = 0;
|
|
tls_evt->dst_port = 0;
|
|
tls_evt->payload_len = 0;
|
|
tls_evt->timestamp_ns = 0;
|
|
|
|
tls_evt->src_ip = bpf_ntohl(src_ip);
|
|
tls_evt->dst_ip = bpf_ntohl(dst_ip);
|
|
tls_evt->src_port = src_port;
|
|
tls_evt->dst_port = dst_port;
|
|
tls_evt->timestamp_ns = bpf_ktime_get_ns();
|
|
|
|
/* Copie via bpf_skb_load_bytes avec tailles constantes en cascade.
|
|
* Kernel 4.18 ne supporte pas les tailles variables vers map values.
|
|
* On essaie 1024 puis 512 puis 256 pour capturer SNI et extensions.
|
|
* La taille réellement copiée est stockée dans payload_len. */
|
|
if (payload_off + 1024 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, tls_evt, 1024);
|
|
tls_evt->payload_len = 1024;
|
|
} else if (payload_off + 512 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, tls_evt, 512);
|
|
tls_evt->payload_len = 512;
|
|
} else if (payload_off + 256 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, tls_evt, 256);
|
|
tls_evt->payload_len = 256;
|
|
} else {
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
bpf_perf_event_output(ctx, &pb_tls_hello, BPF_F_CURRENT_CPU,
|
|
tls_evt, sizeof(*tls_evt));
|
|
|
|
key = STAT_TLS_SUBMIT;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
|
|
return TC_ACT_OK;
|
|
|
|
try_http:
|
|
|
|
/* ===================================================================
|
|
* HTTP en clair (ports autorisés, non-TLS)
|
|
* ===================================================================*/
|
|
if (tcp_flags & (TH_SYN | TH_FIN | TH_RST))
|
|
return TC_ACT_OK;
|
|
if (payload_off >= pkt_len)
|
|
return TC_ACT_OK;
|
|
|
|
/* Avail via pkt_len (scalaire pur) */
|
|
avail = 0;
|
|
if (pkt_len > payload_off) {
|
|
avail = pkt_len - payload_off;
|
|
if (avail > MAX_HTTP_PAYLOAD)
|
|
avail = MAX_HTTP_PAYLOAD;
|
|
}
|
|
if (avail == 0)
|
|
return TC_ACT_OK;
|
|
|
|
struct http_plain_event *h_evt = bpf_map_lookup_elem(&__http_buf, &zero);
|
|
if (!h_evt)
|
|
return TC_ACT_OK;
|
|
|
|
h_evt->src_ip = 0;
|
|
h_evt->dst_ip = 0;
|
|
h_evt->src_port = 0;
|
|
h_evt->dst_port = 0;
|
|
h_evt->payload_len = 0;
|
|
h_evt->timestamp_ns = 0;
|
|
|
|
h_evt->src_ip = bpf_ntohl(src_ip);
|
|
h_evt->dst_ip = bpf_ntohl(dst_ip);
|
|
h_evt->src_port = src_port;
|
|
h_evt->dst_port = dst_port;
|
|
h_evt->timestamp_ns = bpf_ktime_get_ns();
|
|
|
|
/* Copie via bpf_skb_load_bytes avec tailles constantes en cascade.
|
|
* Les requêtes HTTP sont souvent < 512 octets, on descend à 256, 128, 64. */
|
|
if (payload_off + 512 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, h_evt, 512);
|
|
h_evt->payload_len = 512;
|
|
} else if (payload_off + 256 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, h_evt, 256);
|
|
h_evt->payload_len = 256;
|
|
} else if (payload_off + 128 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, h_evt, 128);
|
|
h_evt->payload_len = 128;
|
|
} else if (payload_off + 64 <= pkt_len) {
|
|
bpf_skb_load_bytes(ctx, payload_off, h_evt, 64);
|
|
h_evt->payload_len = 64;
|
|
} else {
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
bpf_perf_event_output(ctx, &pb_http_plain, BPF_F_CURRENT_CPU,
|
|
h_evt, sizeof(*h_evt));
|
|
|
|
key = STAT_HTTP_SUBMIT;
|
|
cnt = bpf_map_lookup_elem(&tc_stats, &key);
|
|
if (cnt) (*cnt)++;
|
|
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
char LICENSE[] SEC("license") = "GPL";
|