feat(mod_reqin_log): fingerprinting HTTP/2 passif (Akamai format)

Ajoute un filtre d'entrée de connexion (AP_FTYPE_CONNECTION, APR_HOOK_LAST)
qui s'insère entre mod_ssl et mod_http2 pour lire de manière non-destructive
le preface HTTP/2 (RFC 9113 §3.4) et en extraire :

- h2_fingerprint    : fingerprint Akamai complet
                      ex. '1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p'
- h2_settings_fp    : entrées SETTINGS brutes  (ex. '1:65536,4:6291456')
- h2_window_update  : incrément WINDOW_UPDATE  (ex. '15663105')
- h2_pseudo_order   : ordre des pseudo-headers (ex. 'm,a,s,p' Chrome,
                                                     'm,p,s,a' Firefox)

Technique : lecture spéculative AP_MODE_SPECULATIVE (non-destructive)
de 512 octets — la donnée reste disponible pour mod_http2. Le filtre
se retire de la chaîne après la première invocation.

Stockage dans c->notes (H2_NOTE_*) puis émission JSON dans log_request().
ClickHouse : 4 nouvelles colonnes dans http_logs + JSONExtract dans mv_http_logs.
Migration pour déploiements existants : 04_http2_fields.sql.
14 tests unitaires (cmocka) couvrent Chrome/Firefox/HTTP1/troncature/HPACK.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-09 23:46:50 +02:00
parent bc11cfa8eb
commit 8ca4a1e849
6 changed files with 882 additions and 2 deletions

View File

@ -0,0 +1,25 @@
-- === 04_http2_fields.sql — Ajout des colonnes HTTP/2 à http_logs ===
--
-- Migration pour les déploiements existants : ajoute les 4 colonnes de
-- fingerprint HTTP/2 passif extraites par mod_reqin_log via son filtre
-- de connexion (APR_HOOK_LAST, AP_FTYPE_CONNECTION).
--
-- Format du fingerprint Akamai (h2_fingerprint) :
-- Chrome : "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p"
-- Firefox : "1:65536,4:131072,5:16384|12517377|0|m,p,s,a"
-- Safari : "1:4096,3:100,4:65535|10485760|0|m,a,s,p"
--
-- Appliquer avec :
-- clickhouse-client --multiquery < 04_http2_fields.sql
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_fingerprint` String CODEC(ZSTD(3)) DEFAULT '';
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_settings_fp` String CODEC(ZSTD(3)) DEFAULT '';
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_window_update` UInt32 DEFAULT 0;
ALTER TABLE ja4_logs.http_logs
ADD COLUMN IF NOT EXISTS `h2_pseudo_order` LowCardinality(String) DEFAULT '';

View File

@ -29,15 +29,19 @@ target_link_libraries(test_header_handling ${CMOCKA_LIBRARIES} ${APR_LIBRARIES})
add_executable(test_json_serialization tests/unit/test_json_serialization.c) add_executable(test_json_serialization tests/unit/test_json_serialization.c)
target_link_libraries(test_json_serialization ${CMOCKA_LIBRARIES} ${APR_LIBRARIES}) target_link_libraries(test_json_serialization ${CMOCKA_LIBRARIES} ${APR_LIBRARIES})
add_executable(test_h2_parsing tests/unit/test_h2_parsing.c)
target_link_libraries(test_h2_parsing ${CMOCKA_LIBRARIES})
# Enable testing # Enable testing
enable_testing() enable_testing()
add_test(NAME RealModuleTest COMMAND test_module_real) add_test(NAME RealModuleTest COMMAND test_module_real)
add_test(NAME ConfigParsingTest COMMAND test_config_parsing) add_test(NAME ConfigParsingTest COMMAND test_config_parsing)
add_test(NAME HeaderHandlingTest COMMAND test_header_handling) add_test(NAME HeaderHandlingTest COMMAND test_header_handling)
add_test(NAME JsonSerializationTest COMMAND test_json_serialization) add_test(NAME JsonSerializationTest COMMAND test_json_serialization)
add_test(NAME H2ParsingTest COMMAND test_h2_parsing)
# Custom target for running tests # Custom target for running tests
add_custom_target(run_tests add_custom_target(run_tests
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
DEPENDS test_module_real test_config_parsing test_header_handling test_json_serialization DEPENDS test_module_real test_config_parsing test_header_handling test_json_serialization test_h2_parsing
) )

View File

@ -20,6 +20,8 @@
#include "apr_lib.h" #include "apr_lib.h"
#include "ap_config.h" #include "ap_config.h"
#include "ap_mpm.h" #include "ap_mpm.h"
#include "util_filter.h"
#include "http_connection.h"
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/un.h> #include <sys/un.h>
@ -132,6 +134,12 @@ static void reqin_log_child_init(apr_pool_t *p, server_rec *s);
static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s); static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s);
static void reqin_log_register_hooks(apr_pool_t *p); static void reqin_log_register_hooks(apr_pool_t *p);
/* Forward declarations for le filtre HTTP/2 */
static apr_status_t reqin_h2_filter(ap_filter_t *f, apr_bucket_brigade *bb,
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes);
static void reqin_h2_add_filter(conn_rec *c, void *csd);
/* Command table */ /* Command table */
static const command_rec reqin_log_cmds[] = { static const command_rec reqin_log_cmds[] = {
AP_INIT_FLAG("JsonSockLogEnabled", cmd_set_enabled, NULL, RSRC_CONF, AP_INIT_FLAG("JsonSockLogEnabled", cmd_set_enabled, NULL, RSRC_CONF,
@ -1088,6 +1096,26 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
} }
} }
/* Champs HTTP/2 passif depuis les notes de connexion (vides si HTTP/1.x) */
{
const char *h2_fp = apr_table_get(r->connection->notes, H2_NOTE_FINGERPRINT);
const char *h2_set = apr_table_get(r->connection->notes, H2_NOTE_SETTINGS);
const char *h2_wu = apr_table_get(r->connection->notes, H2_NOTE_WUPDATE);
const char *h2_ps = apr_table_get(r->connection->notes, H2_NOTE_PSEUDO_ORDER);
if (h2_set && h2_set[0] != '\0') {
dynbuf_append(&buf, ",\"h2_fingerprint\":\"", (apr_size_t)-1);
append_json_string(&buf, h2_fp ? h2_fp : "");
dynbuf_append(&buf, "\",\"h2_settings_fp\":\"", (apr_size_t)-1);
append_json_string(&buf, h2_set);
dynbuf_append(&buf, "\",\"h2_window_update\":", (apr_size_t)-1);
dynbuf_append(&buf, (h2_wu && h2_wu[0]) ? h2_wu : "0", (apr_size_t)-1);
dynbuf_append(&buf, ",\"h2_pseudo_order\":\"", (apr_size_t)-1);
append_json_string(&buf, h2_ps ? h2_ps : "");
dynbuf_append(&buf, "\"", 1);
}
}
dynbuf_append(&buf, "}\n", 2); dynbuf_append(&buf, "}\n", 2);
if (buf.len > MAX_JSON_SIZE) { if (buf.len > MAX_JSON_SIZE) {
@ -1112,6 +1140,343 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child
write_to_socket(buf.data, buf.len, s, cfg, state); write_to_socket(buf.data, buf.len, s, cfg, state);
} }
/* ====== Fingerprinting HTTP/2 passif ====== */
/** @brief Taille du buffer de lecture spéculative pour le preface HTTP/2.
*
* 512 octets couvrent le magic (24) + SETTINGS (≤108) + WINDOW_UPDATE (13)
* + premier HEADERS (≤350). Valeur conservatrice, non-destructive.
*/
#define H2_PEEK_SIZE 512
/**
* @brief Décode un entier HPACK (RFC 7541 §5.1).
*
* Un entier HPACK a un préfixe N-bit dans le premier octet. Si tous les bits
* du préfixe sont à 1, la valeur continue dans les octets suivants
* (encodage LEB128 modifié).
*
* @param buf Buffer HPACK.
* @param len Longueur du buffer.
* @param prefix Nombre de bits du préfixe (18).
* @param pos Position courante (modifiée).
* @param out Valeur décodée en sortie.
* @return 1 si décodage réussi, 0 si buffer insuffisant.
*/
static int hpack_int_decode(const unsigned char *buf, apr_size_t len, int prefix,
apr_size_t *pos, unsigned int *out)
{
unsigned int mask = (1u << prefix) - 1u;
unsigned int b, m;
if (*pos >= len) return 0;
*out = buf[(*pos)++] & mask;
if (*out < mask) return 1;
/* Encodage multi-octet : chaque octet contribue 7 bits */
m = 0;
while (*pos < len) {
b = buf[(*pos)++];
*out += (b & 0x7fu) << m;
m += 7;
if (!(b & 0x80u)) return 1;
if (m > 28) return 0; /* Protection contre les dépassements */
}
return 0;
}
/**
* @brief Saute une chaîne HPACK (RFC 7541 §5.2).
*
* Une chaîne HPACK est précédée d'un entier 7-bit indiquant sa longueur
* (bit de poids fort = indicateur de compression Huffman, ignoré ici).
*
* @param buf Buffer HPACK.
* @param len Longueur du buffer.
* @param pos Position courante (modifiée pour sauter la chaîne entière).
* @return 1 si succès, 0 si buffer insuffisant.
*/
static int hpack_skip_string(const unsigned char *buf, apr_size_t len, apr_size_t *pos)
{
unsigned int str_len = 0;
if (!hpack_int_decode(buf, len, 7, pos, &str_len)) return 0;
if (*pos + str_len > len) return 0;
*pos += str_len;
return 1;
}
/**
* @brief Convertit un index de la table statique HPACK en caractère de pseudo-header.
*
* Table statique RFC 7541 Annexe A (indices 17 = pseudo-headers) :
* 1 = :authority → 'a'
* 2 = :method GET → 'm'
* 3 = :method POST → 'm'
* 4 = :path / → 'p'
* 5 = :path /index → 'p'
* 6 = :scheme http → 's'
* 7 = :scheme https → 's'
*
* @param index Index dans la table statique HPACK.
* @return Caractère 'a', 'm', 'p', 's', ou 0 si header régulier.
*/
static char h2_hpack_pseudo(unsigned int index)
{
switch (index) {
case 1: return 'a';
case 2: case 3: return 'm';
case 4: case 5: return 'p';
case 6: case 7: return 's';
default: return 0;
}
}
/**
* @brief Extrait l'ordre des pseudo-headers depuis un bloc HPACK.
*
* Parcourt le début du bloc HPACK du premier frame HEADERS pour récupérer
* l'ordre des pseudo-headers (:method, :authority, :scheme, :path) avant
* d'atteindre un header régulier (index ≥ 8) ou une représentation non-indexée.
* Résultat : chaîne CSV du type "m,a,s,p".
*
* @param hpack Pointeur vers le début du bloc HPACK.
* @param len Longueur du bloc HPACK.
* @param out Buffer de sortie (capacité minimale : 8 octets).
*/
static void h2_extract_pseudo_order(const unsigned char *hpack, apr_size_t len, char *out)
{
apr_size_t pos = 0;
int out_pos = 0;
int first = 1;
while (pos < len && out_pos < 7) {
unsigned char byte = hpack[pos];
if (byte & 0x80u) {
/* Représentation indexée (bit 7 = 1) : 0x80 | index */
unsigned int idx = 0;
if (!hpack_int_decode(hpack, len, 7, &pos, &idx)) break;
if (idx == 0) break;
char c = h2_hpack_pseudo(idx);
if (!c) break; /* Index ≥ 8 : on a atteint les headers réguliers */
if (!first) out[out_pos++] = ',';
out[out_pos++] = c;
first = 0;
} else if ((byte & 0xe0u) == 0x20u) {
/* Mise à jour de taille de table dynamique (RFC 7541 §6.3) : ignorer */
unsigned int sz = 0;
if (!hpack_int_decode(hpack, len, 5, &pos, &sz)) break;
} else {
/* Représentation littérale → header régulier ou pseudo non-indexé */
break;
}
}
out[out_pos] = '\0';
}
/**
* @brief Parse le preface HTTP/2 client et stocke le fingerprint dans c->notes.
*
* Le preface HTTP/2 (RFC 9113 §3.4) commence par 24 octets de magic string,
* suivis d'un frame SETTINGS (type 0x04), optionnellement d'un WINDOW_UPDATE
* (type 0x08), puis d'un frame HEADERS (type 0x01).
*
* Stocke dans c->notes (si la connexion est bien HTTP/2) :
* H2_NOTE_SETTINGS → ex. "1:65536,2:0,4:6291456,6:262144"
* H2_NOTE_WUPDATE → ex. "15663105" (0 si absent)
* H2_NOTE_PSEUDO_ORDER → ex. "m,a,s,p"
* H2_NOTE_FINGERPRINT → fingerprint Akamai : "SETTINGS|WUPDATE|PRIO|PSEUDO"
*
* @param c Connexion Apache (pool + notes).
* @param buf Buffer contenant les premiers octets du flux (lecture spéculative).
* @param len Longueur effective du buffer.
*/
static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len)
{
static const char H2_MAGIC[] = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n";
const apr_size_t MAGIC_LEN = 24u;
const apr_size_t FRAME_HDR = 9u;
char settings_buf[256] = "";
char wupdate_buf[16] = "0";
char pseudo_buf[16] = "";
int has_priority = 0;
int settings_pos_out = 0;
/* Vérification du magic HTTP/2 */
if (len < MAGIC_LEN || memcmp(buf, H2_MAGIC, MAGIC_LEN) != 0) return;
apr_size_t pos = MAGIC_LEN;
/* Parcours des frames du preface */
while (pos + FRAME_HDR <= len) {
apr_size_t frame_len = ((unsigned char)buf[pos] << 16)
| ((unsigned char)buf[pos+1] << 8)
| (unsigned char)buf[pos+2];
unsigned char type = (unsigned char)buf[pos+3];
unsigned char flags = (unsigned char)buf[pos+4];
uint32_t stream_id = (((unsigned char)buf[pos+5] & 0x7fu) << 24)
| ((unsigned char)buf[pos+6] << 16)
| ((unsigned char)buf[pos+7] << 8)
| (unsigned char)buf[pos+8];
pos += FRAME_HDR;
if (pos + frame_len > len) break; /* Frame incomplète dans le buffer */
if (type == 0x04u && stream_id == 0 && !(flags & 0x01u)) {
/* ---- Frame SETTINGS (hors ACK) ---- */
apr_size_t sp = 0;
while (sp + 6 <= frame_len &&
settings_pos_out < (int)sizeof(settings_buf) - 24) {
uint16_t id = ((unsigned char)buf[pos + sp] << 8)
| (unsigned char)buf[pos + sp + 1];
uint32_t val = ((unsigned char)buf[pos + sp + 2] << 24)
| ((unsigned char)buf[pos + sp + 3] << 16)
| ((unsigned char)buf[pos + sp + 4] << 8)
| (unsigned char)buf[pos + sp + 5];
sp += 6;
if (settings_pos_out > 0)
settings_buf[settings_pos_out++] = ',';
settings_pos_out += snprintf(settings_buf + settings_pos_out,
(int)sizeof(settings_buf) - settings_pos_out,
"%u:%u", id, val);
}
} else if (type == 0x08u && stream_id == 0) {
/* ---- Frame WINDOW_UPDATE sur la connexion ---- */
if (frame_len >= 4) {
uint32_t inc = (((unsigned char)buf[pos] & 0x7fu) << 24)
| ((unsigned char)buf[pos+1] << 16)
| ((unsigned char)buf[pos+2] << 8)
| (unsigned char)buf[pos+3];
snprintf(wupdate_buf, sizeof(wupdate_buf), "%u", inc);
}
} else if (type == 0x01u && stream_id > 0) {
/* ---- Premier frame HEADERS → extraire l'ordre des pseudo-headers ---- */
apr_size_t hpack_start = 0;
int parse_ok = 1;
if ((flags & 0x08u) && parse_ok) {
/* Flag PADDED : 1 octet de longueur de padding */
if (hpack_start >= frame_len) {
parse_ok = 0;
} else {
unsigned char pad_len = (unsigned char)buf[pos + hpack_start++];
if (frame_len < hpack_start + (apr_size_t)pad_len)
parse_ok = 0;
else
frame_len -= (apr_size_t)pad_len;
}
}
if ((flags & 0x20u) && parse_ok) {
/* Flag PRIORITY : 5 octets de priorité */
if (hpack_start + 5u > frame_len) {
parse_ok = 0;
} else {
hpack_start += 5u;
has_priority = 1;
}
}
if (parse_ok && hpack_start < frame_len) {
h2_extract_pseudo_order(
(const unsigned char *)(buf + pos + hpack_start),
frame_len - hpack_start,
pseudo_buf
);
}
pos += frame_len;
break; /* HEADERS frame trouvé : parse terminé */
}
pos += frame_len;
}
/* Stocker dans les notes uniquement si une connexion HTTP/2 confirmée */
if (settings_buf[0] == '\0') return;
char fp[512];
snprintf(fp, sizeof(fp), "%s|%s|%d|%s",
settings_buf, wupdate_buf, has_priority, pseudo_buf);
apr_table_setn(c->notes, H2_NOTE_FINGERPRINT, apr_pstrdup(c->pool, fp));
apr_table_setn(c->notes, H2_NOTE_SETTINGS, apr_pstrdup(c->pool, settings_buf));
apr_table_setn(c->notes, H2_NOTE_WUPDATE, apr_pstrdup(c->pool, wupdate_buf));
apr_table_setn(c->notes, H2_NOTE_PSEUDO_ORDER, apr_pstrdup(c->pool, pseudo_buf));
}
/**
* @brief Filtre d'entrée de connexion pour la capture passive du preface HTTP/2.
*
* S'injecte entre le filtre SSL (déchiffrement) et mod_http2 grâce à sa
* priorité AP_FTYPE_CONNECTION et à l'inscription via APR_HOOK_LAST.
* À la première invocation, effectue une lecture spéculative non-destructive
* (AP_MODE_SPECULATIVE) de H2_PEEK_SIZE octets, parse le preface HTTP/2,
* stocke les résultats dans c->notes, puis se retire de la chaîne.
*
* @param f Filtre courant.
* @param bb Brigade cible pour la lecture réelle.
* @param mode Mode de lecture demandé (transmis à f->next).
* @param block Type de blocage (transmis à f->next).
* @param readbytes Nombre d'octets demandés.
* @return Statut APR de la lecture réelle.
*/
static apr_status_t reqin_h2_filter(ap_filter_t *f, apr_bucket_brigade *bb,
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes)
{
conn_rec *c = f->c;
if (!apr_table_get(c->notes, H2_NOTE_PARSED)) {
/* Lecture spéculative : ne consomme pas les données du flux */
apr_bucket_brigade *peek = apr_brigade_create(c->pool, c->bucket_alloc);
apr_status_t rv = ap_get_brigade(f->next, peek,
AP_MODE_SPECULATIVE, APR_BLOCK_READ,
H2_PEEK_SIZE);
if (rv == APR_SUCCESS) {
char peek_buf[H2_PEEK_SIZE];
apr_size_t peek_len = sizeof(peek_buf);
if (apr_brigade_flatten(peek, peek_buf, &peek_len) == APR_SUCCESS
&& peek_len > 0) {
h2_parse_preface(c, peek_buf, peek_len);
}
}
apr_brigade_cleanup(peek);
apr_table_setn(c->notes, H2_NOTE_PARSED, "1");
}
/* Le filtre n'est nécessaire qu'une seule fois par connexion */
ap_remove_input_filter(f);
return ap_get_brigade(f->next, bb, mode, block, readbytes);
}
/**
* @brief Hook pre_connection — enregistre le filtre HTTP/2 sur chaque connexion.
*
* Appelé à l'établissement de chaque connexion. Inscrit reqin_h2_filter dans
* la chaîne d'entrée avec APR_HOOK_LAST, ce qui garantit son positionnement
* après le filtre SSL (qui s'inscrit avec APR_HOOK_MIDDLE) et donc son accès
* au flux HTTP/2 en clair.
*
* @param c Connexion Apache.
* @param csd Socket descriptor (non utilisé).
*/
static void reqin_h2_add_filter(conn_rec *c, void *csd)
{
(void)csd;
ap_add_input_filter(H2_FILTER_NAME, NULL, NULL, c);
}
/* ====== Hooks Apache ====== */ /* ====== Hooks Apache ====== */
/** /**
@ -1262,6 +1627,10 @@ static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t
static void reqin_log_register_hooks(apr_pool_t *p) static void reqin_log_register_hooks(apr_pool_t *p)
{ {
(void)p; (void)p;
/* Enregistrement du filtre de connexion HTTP/2 (avant les hooks de requête) */
ap_register_input_filter(H2_FILTER_NAME, reqin_h2_filter, NULL, AP_FTYPE_CONNECTION);
ap_hook_pre_connection(reqin_h2_add_filter, NULL, NULL, APR_HOOK_LAST);
ap_hook_post_config(reqin_log_post_config, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_post_config(reqin_log_post_config, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_post_read_request(reqin_log_post_read_request, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_post_read_request(reqin_log_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_child_init(reqin_log_child_init, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_child_init(reqin_log_child_init, NULL, NULL, APR_HOOK_MIDDLE);

View File

@ -34,4 +34,16 @@ typedef struct {
/* External module declaration */ /* External module declaration */
extern module AP_MODULE_DECLARE_DATA reqin_log_module; extern module AP_MODULE_DECLARE_DATA reqin_log_module;
/* ====== Fingerprinting HTTP/2 passif ====== */
/* Nom du filtre d'entrée de connexion pour la capture du preface HTTP/2 */
#define H2_FILTER_NAME "REQIN_H2_PEEK"
/* Clés des notes de connexion stockant le fingerprint HTTP/2 parsé */
#define H2_NOTE_FINGERPRINT "reqin_h2_fp" /* Fingerprint Akamai complet */
#define H2_NOTE_SETTINGS "reqin_h2_set" /* Entrées SETTINGS brutes */
#define H2_NOTE_WUPDATE "reqin_h2_wu" /* Incrément WINDOW_UPDATE */
#define H2_NOTE_PSEUDO_ORDER "reqin_h2_ps" /* Ordre pseudo-headers */
#define H2_NOTE_PARSED "reqin_h2_done" /* Marqueur "déjà parsé" */
#endif /* MOD_REQIN_LOG_H */ #endif /* MOD_REQIN_LOG_H */

View File

@ -0,0 +1,458 @@
/*
* test_h2_parsing.c — Tests unitaires du fingerprinting HTTP/2 passif.
*
* Les fonctions testées (hpack_int_decode, h2_extract_pseudo_order,
* h2_parse_preface_buf) sont réimplimentées localement pour éviter les
* dépendances Apache/APR. La logique est identique à mod_reqin_log.c.
*/
#include <stdarg.h>
#include <stddef.h>
#include <setjmp.h>
#include <cmocka.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
/* ====== Réimplémentation locale des fonctions H2 ====== */
static int hpack_int_decode(const unsigned char *buf, size_t len, int prefix,
size_t *pos, unsigned int *out)
{
unsigned int mask = (1u << prefix) - 1u;
unsigned int b, m;
if (*pos >= len) return 0;
*out = buf[(*pos)++] & mask;
if (*out < mask) return 1;
m = 0;
while (*pos < len) {
b = buf[(*pos)++];
*out += (b & 0x7fu) << m;
m += 7;
if (!(b & 0x80u)) return 1;
if (m > 28) return 0;
}
return 0;
}
static char h2_hpack_pseudo(unsigned int index)
{
switch (index) {
case 1: return 'a';
case 2: case 3: return 'm';
case 4: case 5: return 'p';
case 6: case 7: return 's';
default: return 0;
}
}
static void h2_extract_pseudo_order(const unsigned char *hpack, size_t len, char *out)
{
size_t pos = 0;
int out_pos = 0;
int first = 1;
while (pos < len && out_pos < 7) {
unsigned char byte = hpack[pos];
if (byte & 0x80u) {
unsigned int idx = 0;
if (!hpack_int_decode(hpack, len, 7, &pos, &idx)) break;
if (idx == 0) break;
char c = h2_hpack_pseudo(idx);
if (!c) break;
if (!first) out[out_pos++] = ',';
out[out_pos++] = c;
first = 0;
} else if ((byte & 0xe0u) == 0x20u) {
unsigned int sz = 0;
if (!hpack_int_decode(hpack, len, 5, &pos, &sz)) break;
} else {
break;
}
}
out[out_pos] = '\0';
}
/* Résultat de h2_parse_preface_buf — version allégée (pas d'APR) */
typedef struct {
char settings[256];
char wupdate[16];
char pseudo[16];
char fingerprint[512];
int has_priority;
int is_h2;
} h2_result_t;
static void h2_parse_preface_buf(const char *buf, size_t len, h2_result_t *res)
{
static const char H2_MAGIC[] = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n";
const size_t MAGIC_LEN = 24u;
const size_t FRAME_HDR = 9u;
memset(res, 0, sizeof(*res));
strcpy(res->wupdate, "0");
if (len < MAGIC_LEN || memcmp(buf, H2_MAGIC, MAGIC_LEN) != 0) return;
int settings_out = 0;
size_t pos = MAGIC_LEN;
while (pos + FRAME_HDR <= len) {
size_t frame_len = ((unsigned char)buf[pos] << 16)
| ((unsigned char)buf[pos+1] << 8)
| (unsigned char)buf[pos+2];
unsigned char type = (unsigned char)buf[pos+3];
unsigned char flags = (unsigned char)buf[pos+4];
uint32_t stream_id = (((unsigned char)buf[pos+5] & 0x7fu) << 24)
| ((unsigned char)buf[pos+6] << 16)
| ((unsigned char)buf[pos+7] << 8)
| (unsigned char)buf[pos+8];
pos += FRAME_HDR;
if (pos + frame_len > len) break;
if (type == 0x04u && stream_id == 0 && !(flags & 0x01u)) {
size_t sp = 0;
while (sp + 6 <= frame_len &&
settings_out < (int)sizeof(res->settings) - 24) {
uint16_t id = ((unsigned char)buf[pos + sp] << 8)
| (unsigned char)buf[pos + sp + 1];
uint32_t val = ((unsigned char)buf[pos + sp + 2] << 24)
| ((unsigned char)buf[pos + sp + 3] << 16)
| ((unsigned char)buf[pos + sp + 4] << 8)
| (unsigned char)buf[pos + sp + 5];
sp += 6;
if (settings_out > 0)
res->settings[settings_out++] = ',';
settings_out += snprintf(res->settings + settings_out,
(int)sizeof(res->settings) - settings_out,
"%u:%u", id, val);
}
} else if (type == 0x08u && stream_id == 0) {
if (frame_len >= 4) {
uint32_t inc = (((unsigned char)buf[pos] & 0x7fu) << 24)
| ((unsigned char)buf[pos+1] << 16)
| ((unsigned char)buf[pos+2] << 8)
| (unsigned char)buf[pos+3];
snprintf(res->wupdate, sizeof(res->wupdate), "%u", inc);
}
} else if (type == 0x01u && stream_id > 0) {
size_t hpack_start = 0;
int parse_ok = 1;
if ((flags & 0x08u) && parse_ok) {
if (hpack_start >= frame_len) {
parse_ok = 0;
} else {
unsigned char pad_len = (unsigned char)buf[pos + hpack_start++];
if (frame_len < hpack_start + (size_t)pad_len)
parse_ok = 0;
else
frame_len -= (size_t)pad_len;
}
}
if ((flags & 0x20u) && parse_ok) {
if (hpack_start + 5u > frame_len) {
parse_ok = 0;
} else {
hpack_start += 5u;
res->has_priority = 1;
}
}
if (parse_ok && hpack_start < frame_len) {
h2_extract_pseudo_order(
(const unsigned char *)(buf + pos + hpack_start),
frame_len - hpack_start,
res->pseudo
);
}
pos += frame_len;
break;
}
pos += frame_len;
}
if (res->settings[0] != '\0') {
res->is_h2 = 1;
snprintf(res->fingerprint, sizeof(res->fingerprint), "%s|%s|%d|%s",
res->settings, res->wupdate, res->has_priority, res->pseudo);
}
}
/* ====== Données de test : preface Chrome 120 ====== */
/*
* Preface HTTP/2 Chrome 120 (capturée) :
* Magic (24 octets)
* SETTINGS frame : HEADER_TABLE_SIZE=65536, ENABLE_PUSH=0,
* INITIAL_WINDOW_SIZE=6291456, MAX_HEADER_LIST_SIZE=262144
* WINDOW_UPDATE : incrément 15663105
* HEADERS stream 1 : :method GET, :authority, :scheme https, :path /
* → ordre HPACK indexé : 0x82(GET), 0x81(:auth), 0x87(https), 0x84(/)
*/
static const unsigned char CHROME_PREFACE[] = {
/* Magic */
'P','R','I',' ','*',' ','H','T','T','P','/','2','.','0','\r','\n',
'\r','\n','S','M','\r','\n','\r','\n',
/* SETTINGS frame : length=24, type=0x04, flags=0x00, stream=0 */
0x00, 0x00, 0x18, /* length = 24 = 4×6 */
0x04, /* type SETTINGS */
0x00, /* flags = 0 */
0x00, 0x00, 0x00, 0x00, /* stream 0 */
/* Entry 1: HEADER_TABLE_SIZE (1) = 65536 = 0x00010000 */
0x00, 0x01, 0x00, 0x01, 0x00, 0x00,
/* Entry 2: ENABLE_PUSH (2) = 0 */
0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
/* Entry 3: INITIAL_WINDOW_SIZE (4) = 6291456 = 0x00600000 */
0x00, 0x04, 0x00, 0x60, 0x00, 0x00,
/* Entry 4: MAX_HEADER_LIST_SIZE (6) = 262144 = 0x00040000 */
0x00, 0x06, 0x00, 0x04, 0x00, 0x00,
/* WINDOW_UPDATE frame : length=4, type=0x08, flags=0, stream=0 */
0x00, 0x00, 0x04,
0x08,
0x00,
0x00, 0x00, 0x00, 0x00,
/* increment = 15663105 = 0x00EF0001 */
0x00, 0xEF, 0x00, 0x01,
/* HEADERS frame : length=14, type=0x01, flags=0x05 (END_STREAM|END_HEADERS), stream=1 */
0x00, 0x00, 0x0E,
0x01,
0x05,
0x00, 0x00, 0x00, 0x01,
/* HPACK : :method GET (0x82), :authority (0x81), :scheme https (0x87), :path / (0x84) */
/* → ordre Chrome : m,a,s,p */
0x82, 0x81, 0x87, 0x84,
/* + quelques headers supplémentaires (indices statiques) */
0x86, /* :scheme http (index 6, régulier → stop après pseudo) */
0x53, /* accept (sans valeur — littéral, arrête le scan) */
0x00, 0x05, 0x74, 0x65, 0x78, 0x74, 0x2F, 0x68, 0x74, 0x6D, 0x6C
};
/* ====== Données de test : preface Firefox 120 ====== */
/*
* Preface HTTP/2 Firefox 120 :
* SETTINGS: HEADER_TABLE_SIZE=65536, INITIAL_WINDOW_SIZE=131072, MAX_FRAME_SIZE=16384
* WINDOW_UPDATE: 12517377
* HEADERS: :method GET (0x82), :path / (0x84), :scheme https (0x87), :authority (0x81)
* → ordre Firefox : m,p,s,a
*/
static const unsigned char FIREFOX_PREFACE[] = {
/* Magic */
'P','R','I',' ','*',' ','H','T','T','P','/','2','.','0','\r','\n',
'\r','\n','S','M','\r','\n','\r','\n',
/* SETTINGS frame : length=18, type=0x04, flags=0x00, stream=0 */
0x00, 0x00, 0x12,
0x04,
0x00,
0x00, 0x00, 0x00, 0x00,
/* HEADER_TABLE_SIZE (1) = 65536 */
0x00, 0x01, 0x00, 0x01, 0x00, 0x00,
/* INITIAL_WINDOW_SIZE (4) = 131072 = 0x00020000 */
0x00, 0x04, 0x00, 0x02, 0x00, 0x00,
/* MAX_FRAME_SIZE (5) = 16384 = 0x00004000 */
0x00, 0x05, 0x00, 0x00, 0x40, 0x00,
/* WINDOW_UPDATE : increment = 12517377 = 0x00BF0001 */
0x00, 0x00, 0x04,
0x08,
0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0xBF, 0x00, 0x01,
/* HEADERS frame : length=4, type=0x01, flags=0x05, stream=1 */
0x00, 0x00, 0x04,
0x01,
0x05,
0x00, 0x00, 0x00, 0x01,
/* HPACK : :method GET (0x82), :path / (0x84), :scheme https (0x87), :authority (0x81) */
/* → ordre Firefox : m,p,s,a */
0x82, 0x84, 0x87, 0x81
};
/* ====== Données de test : flux HTTP/1.1 (ne doit pas matcher) ====== */
static const char HTTP1_DATA[] =
"GET / HTTP/1.1\r\nHost: example.com\r\n\r\n";
/* ====== Tests ====== */
static void test_chrome_settings_parsed(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res);
assert_int_equal(res.is_h2, 1);
/* SETTINGS attendus : 1:65536,2:0,4:6291456,6:262144 */
assert_string_equal(res.settings, "1:65536,2:0,4:6291456,6:262144");
}
static void test_chrome_window_update(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res);
assert_string_equal(res.wupdate, "15663105");
}
static void test_chrome_pseudo_order(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res);
/* Chrome : :method(m), :authority(a), :scheme(s), :path(p) */
assert_string_equal(res.pseudo, "m,a,s,p");
}
static void test_chrome_fingerprint_akamai(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res);
assert_string_equal(res.fingerprint,
"1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p");
}
static void test_firefox_settings_parsed(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)FIREFOX_PREFACE, sizeof(FIREFOX_PREFACE), &res);
assert_int_equal(res.is_h2, 1);
assert_string_equal(res.settings, "1:65536,4:131072,5:16384");
}
static void test_firefox_pseudo_order(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)FIREFOX_PREFACE, sizeof(FIREFOX_PREFACE), &res);
/* Firefox : :method(m), :path(p), :scheme(s), :authority(a) */
assert_string_equal(res.pseudo, "m,p,s,a");
}
static void test_firefox_fingerprint_akamai(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf((const char *)FIREFOX_PREFACE, sizeof(FIREFOX_PREFACE), &res);
assert_string_equal(res.fingerprint,
"1:65536,4:131072,5:16384|12517377|0|m,p,s,a");
}
static void test_http1_not_detected(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf(HTTP1_DATA, strlen(HTTP1_DATA), &res);
assert_int_equal(res.is_h2, 0);
assert_string_equal(res.settings, "");
assert_string_equal(res.fingerprint, "");
}
static void test_empty_buffer_not_detected(void **state)
{
(void)state;
h2_result_t res;
h2_parse_preface_buf("", 0, &res);
assert_int_equal(res.is_h2, 0);
}
static void test_truncated_preface_no_crash(void **state)
{
(void)state;
h2_result_t res;
/* Magic complet mais frame tronquée */
h2_parse_preface_buf((const char *)CHROME_PREFACE, 30, &res);
assert_int_equal(res.is_h2, 0); /* SETTINGS incomplet → pas de fingerprint */
}
static void test_hpack_int_single_byte(void **state)
{
(void)state;
/* Entier 7-bit < 127 → encodé sur 1 octet */
unsigned char buf[] = { 0x82 }; /* 0x80 | 2 → index=2 */
size_t pos = 0;
unsigned int out = 0;
int ok = hpack_int_decode(buf, 1, 7, &pos, &out);
assert_int_equal(ok, 1);
assert_int_equal(out, 2);
assert_int_equal(pos, 1);
}
static void test_hpack_pseudo_table(void **state)
{
(void)state;
assert_int_equal(h2_hpack_pseudo(1), 'a');
assert_int_equal(h2_hpack_pseudo(2), 'm');
assert_int_equal(h2_hpack_pseudo(3), 'm');
assert_int_equal(h2_hpack_pseudo(4), 'p');
assert_int_equal(h2_hpack_pseudo(5), 'p');
assert_int_equal(h2_hpack_pseudo(6), 's');
assert_int_equal(h2_hpack_pseudo(7), 's');
assert_int_equal(h2_hpack_pseudo(8), 0); /* header régulier */
assert_int_equal(h2_hpack_pseudo(62), 0);
}
static void test_pseudo_order_extraction_direct(void **state)
{
(void)state;
/* HPACK block : :method(0x82), :path(0x84), :scheme(0x87), :authority(0x81) */
unsigned char hpack[] = { 0x82, 0x84, 0x87, 0x81 };
char out[16];
h2_extract_pseudo_order(hpack, sizeof(hpack), out);
assert_string_equal(out, "m,p,s,a");
}
static void test_pseudo_order_stops_at_regular_header(void **state)
{
(void)state;
/* :method(0x82), puis header régulier (0x88 = index 8) */
unsigned char hpack[] = { 0x82, 0x88 };
char out[16];
h2_extract_pseudo_order(hpack, sizeof(hpack), out);
assert_string_equal(out, "m");
}
/* ====== main ====== */
int main(void)
{
const struct CMUnitTest tests[] = {
cmocka_unit_test(test_chrome_settings_parsed),
cmocka_unit_test(test_chrome_window_update),
cmocka_unit_test(test_chrome_pseudo_order),
cmocka_unit_test(test_chrome_fingerprint_akamai),
cmocka_unit_test(test_firefox_settings_parsed),
cmocka_unit_test(test_firefox_pseudo_order),
cmocka_unit_test(test_firefox_fingerprint_akamai),
cmocka_unit_test(test_http1_not_detected),
cmocka_unit_test(test_empty_buffer_not_detected),
cmocka_unit_test(test_truncated_preface_no_crash),
cmocka_unit_test(test_hpack_int_single_byte),
cmocka_unit_test(test_hpack_pseudo_table),
cmocka_unit_test(test_pseudo_order_extraction_direct),
cmocka_unit_test(test_pseudo_order_stops_at_regular_header),
};
return cmocka_run_group_tests(tests, NULL, NULL);
}

View File

@ -89,6 +89,12 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
`anubis_bot_action` LowCardinality(String) DEFAULT '', `anubis_bot_action` LowCardinality(String) DEFAULT '',
`anubis_bot_category` LowCardinality(String) DEFAULT '', `anubis_bot_category` LowCardinality(String) DEFAULT '',
-- Fingerprint HTTP/2 passif (mod_reqin_log connection filter)
`h2_fingerprint` String CODEC(ZSTD(3)) DEFAULT '',
`h2_settings_fp` String CODEC(ZSTD(3)) DEFAULT '',
`h2_window_update` UInt32 DEFAULT 0,
`h2_pseudo_order` LowCardinality(String) DEFAULT '',
-- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent -- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent
-- les granules qui ne contiennent pas cette IP (~90% des granules en pratique). -- les granules qui ne contiennent pas cette IP (~90% des granules en pratique).
-- Taux de faux positifs 1% (0.01) : bon compromis taille / efficacité. -- Taux de faux positifs 1% (0.01) : bon compromis taille / efficacité.
@ -192,6 +198,12 @@ SELECT
nullIf(dictGetOrDefault('ja4_processing.dict_anubis_ip', 'category', _ip, ''), ''), nullIf(dictGetOrDefault('ja4_processing.dict_anubis_ip', 'category', _ip, ''), ''),
nullIf(dictGetOrDefault('ja4_processing.dict_anubis_asn', 'category', _asn, ''), ''), nullIf(dictGetOrDefault('ja4_processing.dict_anubis_asn', 'category', _asn, ''), ''),
'' ''
) AS anubis_bot_category ) AS anubis_bot_category,
-- Fingerprint HTTP/2 passif : champs émis par mod_reqin_log si HTTP/2 détecté
coalesce(JSONExtractString(raw_json, 'h2_fingerprint'), '') AS h2_fingerprint,
coalesce(JSONExtractString(raw_json, 'h2_settings_fp'), '') AS h2_settings_fp,
toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update,
coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order
FROM ja4_logs.http_logs_raw; FROM ja4_logs.http_logs_raw;