diff --git a/services/correlator/sql/migrations/04_http2_fields.sql b/services/correlator/sql/migrations/04_http2_fields.sql new file mode 100644 index 0000000..53b657f --- /dev/null +++ b/services/correlator/sql/migrations/04_http2_fields.sql @@ -0,0 +1,25 @@ +-- === 04_http2_fields.sql — Ajout des colonnes HTTP/2 à http_logs === +-- +-- Migration pour les déploiements existants : ajoute les 4 colonnes de +-- fingerprint HTTP/2 passif extraites par mod_reqin_log via son filtre +-- de connexion (APR_HOOK_LAST, AP_FTYPE_CONNECTION). +-- +-- Format du fingerprint Akamai (h2_fingerprint) : +-- Chrome : "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p" +-- Firefox : "1:65536,4:131072,5:16384|12517377|0|m,p,s,a" +-- Safari : "1:4096,3:100,4:65535|10485760|0|m,a,s,p" +-- +-- Appliquer avec : +-- clickhouse-client --multiquery < 04_http2_fields.sql + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_fingerprint` String CODEC(ZSTD(3)) DEFAULT ''; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_settings_fp` String CODEC(ZSTD(3)) DEFAULT ''; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_window_update` UInt32 DEFAULT 0; + +ALTER TABLE ja4_logs.http_logs + ADD COLUMN IF NOT EXISTS `h2_pseudo_order` LowCardinality(String) DEFAULT ''; diff --git a/services/mod-reqin-log/CMakeLists.txt b/services/mod-reqin-log/CMakeLists.txt index 64bbe0c..369c2c5 100644 --- a/services/mod-reqin-log/CMakeLists.txt +++ b/services/mod-reqin-log/CMakeLists.txt @@ -29,15 +29,19 @@ target_link_libraries(test_header_handling ${CMOCKA_LIBRARIES} ${APR_LIBRARIES}) add_executable(test_json_serialization tests/unit/test_json_serialization.c) target_link_libraries(test_json_serialization ${CMOCKA_LIBRARIES} ${APR_LIBRARIES}) +add_executable(test_h2_parsing tests/unit/test_h2_parsing.c) +target_link_libraries(test_h2_parsing ${CMOCKA_LIBRARIES}) + # Enable testing enable_testing() add_test(NAME RealModuleTest COMMAND test_module_real) add_test(NAME ConfigParsingTest COMMAND test_config_parsing) add_test(NAME HeaderHandlingTest COMMAND test_header_handling) add_test(NAME JsonSerializationTest COMMAND test_json_serialization) +add_test(NAME H2ParsingTest COMMAND test_h2_parsing) # Custom target for running tests add_custom_target(run_tests COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure - DEPENDS test_module_real test_config_parsing test_header_handling test_json_serialization + DEPENDS test_module_real test_config_parsing test_header_handling test_json_serialization test_h2_parsing ) diff --git a/services/mod-reqin-log/src/mod_reqin_log.c b/services/mod-reqin-log/src/mod_reqin_log.c index eb39fde..2a46b73 100644 --- a/services/mod-reqin-log/src/mod_reqin_log.c +++ b/services/mod-reqin-log/src/mod_reqin_log.c @@ -20,6 +20,8 @@ #include "apr_lib.h" #include "ap_config.h" #include "ap_mpm.h" +#include "util_filter.h" +#include "http_connection.h" #include #include @@ -132,6 +134,12 @@ static void reqin_log_child_init(apr_pool_t *p, server_rec *s); static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s); static void reqin_log_register_hooks(apr_pool_t *p); +/* Forward declarations for le filtre HTTP/2 */ +static apr_status_t reqin_h2_filter(ap_filter_t *f, apr_bucket_brigade *bb, + ap_input_mode_t mode, apr_read_type_e block, + apr_off_t readbytes); +static void reqin_h2_add_filter(conn_rec *c, void *csd); + /* Command table */ static const command_rec reqin_log_cmds[] = { AP_INIT_FLAG("JsonSockLogEnabled", cmd_set_enabled, NULL, RSRC_CONF, @@ -1088,6 +1096,26 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child } } + /* Champs HTTP/2 passif depuis les notes de connexion (vides si HTTP/1.x) */ + { + const char *h2_fp = apr_table_get(r->connection->notes, H2_NOTE_FINGERPRINT); + const char *h2_set = apr_table_get(r->connection->notes, H2_NOTE_SETTINGS); + const char *h2_wu = apr_table_get(r->connection->notes, H2_NOTE_WUPDATE); + const char *h2_ps = apr_table_get(r->connection->notes, H2_NOTE_PSEUDO_ORDER); + + if (h2_set && h2_set[0] != '\0') { + dynbuf_append(&buf, ",\"h2_fingerprint\":\"", (apr_size_t)-1); + append_json_string(&buf, h2_fp ? h2_fp : ""); + dynbuf_append(&buf, "\",\"h2_settings_fp\":\"", (apr_size_t)-1); + append_json_string(&buf, h2_set); + dynbuf_append(&buf, "\",\"h2_window_update\":", (apr_size_t)-1); + dynbuf_append(&buf, (h2_wu && h2_wu[0]) ? h2_wu : "0", (apr_size_t)-1); + dynbuf_append(&buf, ",\"h2_pseudo_order\":\"", (apr_size_t)-1); + append_json_string(&buf, h2_ps ? h2_ps : ""); + dynbuf_append(&buf, "\"", 1); + } + } + dynbuf_append(&buf, "}\n", 2); if (buf.len > MAX_JSON_SIZE) { @@ -1112,6 +1140,343 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child write_to_socket(buf.data, buf.len, s, cfg, state); } +/* ====== Fingerprinting HTTP/2 passif ====== */ + +/** @brief Taille du buffer de lecture spéculative pour le preface HTTP/2. + * + * 512 octets couvrent le magic (24) + SETTINGS (≤108) + WINDOW_UPDATE (13) + * + premier HEADERS (≤350). Valeur conservatrice, non-destructive. + */ +#define H2_PEEK_SIZE 512 + +/** + * @brief Décode un entier HPACK (RFC 7541 §5.1). + * + * Un entier HPACK a un préfixe N-bit dans le premier octet. Si tous les bits + * du préfixe sont à 1, la valeur continue dans les octets suivants + * (encodage LEB128 modifié). + * + * @param buf Buffer HPACK. + * @param len Longueur du buffer. + * @param prefix Nombre de bits du préfixe (1–8). + * @param pos Position courante (modifiée). + * @param out Valeur décodée en sortie. + * @return 1 si décodage réussi, 0 si buffer insuffisant. + */ +static int hpack_int_decode(const unsigned char *buf, apr_size_t len, int prefix, + apr_size_t *pos, unsigned int *out) +{ + unsigned int mask = (1u << prefix) - 1u; + unsigned int b, m; + + if (*pos >= len) return 0; + *out = buf[(*pos)++] & mask; + if (*out < mask) return 1; + + /* Encodage multi-octet : chaque octet contribue 7 bits */ + m = 0; + while (*pos < len) { + b = buf[(*pos)++]; + *out += (b & 0x7fu) << m; + m += 7; + if (!(b & 0x80u)) return 1; + if (m > 28) return 0; /* Protection contre les dépassements */ + } + return 0; +} + +/** + * @brief Saute une chaîne HPACK (RFC 7541 §5.2). + * + * Une chaîne HPACK est précédée d'un entier 7-bit indiquant sa longueur + * (bit de poids fort = indicateur de compression Huffman, ignoré ici). + * + * @param buf Buffer HPACK. + * @param len Longueur du buffer. + * @param pos Position courante (modifiée pour sauter la chaîne entière). + * @return 1 si succès, 0 si buffer insuffisant. + */ +static int hpack_skip_string(const unsigned char *buf, apr_size_t len, apr_size_t *pos) +{ + unsigned int str_len = 0; + if (!hpack_int_decode(buf, len, 7, pos, &str_len)) return 0; + if (*pos + str_len > len) return 0; + *pos += str_len; + return 1; +} + +/** + * @brief Convertit un index de la table statique HPACK en caractère de pseudo-header. + * + * Table statique RFC 7541 Annexe A (indices 1–7 = pseudo-headers) : + * 1 = :authority → 'a' + * 2 = :method GET → 'm' + * 3 = :method POST → 'm' + * 4 = :path / → 'p' + * 5 = :path /index → 'p' + * 6 = :scheme http → 's' + * 7 = :scheme https → 's' + * + * @param index Index dans la table statique HPACK. + * @return Caractère 'a', 'm', 'p', 's', ou 0 si header régulier. + */ +static char h2_hpack_pseudo(unsigned int index) +{ + switch (index) { + case 1: return 'a'; + case 2: case 3: return 'm'; + case 4: case 5: return 'p'; + case 6: case 7: return 's'; + default: return 0; + } +} + +/** + * @brief Extrait l'ordre des pseudo-headers depuis un bloc HPACK. + * + * Parcourt le début du bloc HPACK du premier frame HEADERS pour récupérer + * l'ordre des pseudo-headers (:method, :authority, :scheme, :path) avant + * d'atteindre un header régulier (index ≥ 8) ou une représentation non-indexée. + * Résultat : chaîne CSV du type "m,a,s,p". + * + * @param hpack Pointeur vers le début du bloc HPACK. + * @param len Longueur du bloc HPACK. + * @param out Buffer de sortie (capacité minimale : 8 octets). + */ +static void h2_extract_pseudo_order(const unsigned char *hpack, apr_size_t len, char *out) +{ + apr_size_t pos = 0; + int out_pos = 0; + int first = 1; + + while (pos < len && out_pos < 7) { + unsigned char byte = hpack[pos]; + + if (byte & 0x80u) { + /* Représentation indexée (bit 7 = 1) : 0x80 | index */ + unsigned int idx = 0; + if (!hpack_int_decode(hpack, len, 7, &pos, &idx)) break; + if (idx == 0) break; + + char c = h2_hpack_pseudo(idx); + if (!c) break; /* Index ≥ 8 : on a atteint les headers réguliers */ + + if (!first) out[out_pos++] = ','; + out[out_pos++] = c; + first = 0; + + } else if ((byte & 0xe0u) == 0x20u) { + /* Mise à jour de taille de table dynamique (RFC 7541 §6.3) : ignorer */ + unsigned int sz = 0; + if (!hpack_int_decode(hpack, len, 5, &pos, &sz)) break; + + } else { + /* Représentation littérale → header régulier ou pseudo non-indexé */ + break; + } + } + out[out_pos] = '\0'; +} + +/** + * @brief Parse le preface HTTP/2 client et stocke le fingerprint dans c->notes. + * + * Le preface HTTP/2 (RFC 9113 §3.4) commence par 24 octets de magic string, + * suivis d'un frame SETTINGS (type 0x04), optionnellement d'un WINDOW_UPDATE + * (type 0x08), puis d'un frame HEADERS (type 0x01). + * + * Stocke dans c->notes (si la connexion est bien HTTP/2) : + * H2_NOTE_SETTINGS → ex. "1:65536,2:0,4:6291456,6:262144" + * H2_NOTE_WUPDATE → ex. "15663105" (0 si absent) + * H2_NOTE_PSEUDO_ORDER → ex. "m,a,s,p" + * H2_NOTE_FINGERPRINT → fingerprint Akamai : "SETTINGS|WUPDATE|PRIO|PSEUDO" + * + * @param c Connexion Apache (pool + notes). + * @param buf Buffer contenant les premiers octets du flux (lecture spéculative). + * @param len Longueur effective du buffer. + */ +static void h2_parse_preface(conn_rec *c, const char *buf, apr_size_t len) +{ + static const char H2_MAGIC[] = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"; + const apr_size_t MAGIC_LEN = 24u; + const apr_size_t FRAME_HDR = 9u; + + char settings_buf[256] = ""; + char wupdate_buf[16] = "0"; + char pseudo_buf[16] = ""; + int has_priority = 0; + int settings_pos_out = 0; + + /* Vérification du magic HTTP/2 */ + if (len < MAGIC_LEN || memcmp(buf, H2_MAGIC, MAGIC_LEN) != 0) return; + + apr_size_t pos = MAGIC_LEN; + + /* Parcours des frames du preface */ + while (pos + FRAME_HDR <= len) { + apr_size_t frame_len = ((unsigned char)buf[pos] << 16) + | ((unsigned char)buf[pos+1] << 8) + | (unsigned char)buf[pos+2]; + unsigned char type = (unsigned char)buf[pos+3]; + unsigned char flags = (unsigned char)buf[pos+4]; + uint32_t stream_id = (((unsigned char)buf[pos+5] & 0x7fu) << 24) + | ((unsigned char)buf[pos+6] << 16) + | ((unsigned char)buf[pos+7] << 8) + | (unsigned char)buf[pos+8]; + + pos += FRAME_HDR; + + if (pos + frame_len > len) break; /* Frame incomplète dans le buffer */ + + if (type == 0x04u && stream_id == 0 && !(flags & 0x01u)) { + /* ---- Frame SETTINGS (hors ACK) ---- */ + apr_size_t sp = 0; + while (sp + 6 <= frame_len && + settings_pos_out < (int)sizeof(settings_buf) - 24) { + uint16_t id = ((unsigned char)buf[pos + sp] << 8) + | (unsigned char)buf[pos + sp + 1]; + uint32_t val = ((unsigned char)buf[pos + sp + 2] << 24) + | ((unsigned char)buf[pos + sp + 3] << 16) + | ((unsigned char)buf[pos + sp + 4] << 8) + | (unsigned char)buf[pos + sp + 5]; + sp += 6; + + if (settings_pos_out > 0) + settings_buf[settings_pos_out++] = ','; + settings_pos_out += snprintf(settings_buf + settings_pos_out, + (int)sizeof(settings_buf) - settings_pos_out, + "%u:%u", id, val); + } + + } else if (type == 0x08u && stream_id == 0) { + /* ---- Frame WINDOW_UPDATE sur la connexion ---- */ + if (frame_len >= 4) { + uint32_t inc = (((unsigned char)buf[pos] & 0x7fu) << 24) + | ((unsigned char)buf[pos+1] << 16) + | ((unsigned char)buf[pos+2] << 8) + | (unsigned char)buf[pos+3]; + snprintf(wupdate_buf, sizeof(wupdate_buf), "%u", inc); + } + + } else if (type == 0x01u && stream_id > 0) { + /* ---- Premier frame HEADERS → extraire l'ordre des pseudo-headers ---- */ + apr_size_t hpack_start = 0; + int parse_ok = 1; + + if ((flags & 0x08u) && parse_ok) { + /* Flag PADDED : 1 octet de longueur de padding */ + if (hpack_start >= frame_len) { + parse_ok = 0; + } else { + unsigned char pad_len = (unsigned char)buf[pos + hpack_start++]; + if (frame_len < hpack_start + (apr_size_t)pad_len) + parse_ok = 0; + else + frame_len -= (apr_size_t)pad_len; + } + } + + if ((flags & 0x20u) && parse_ok) { + /* Flag PRIORITY : 5 octets de priorité */ + if (hpack_start + 5u > frame_len) { + parse_ok = 0; + } else { + hpack_start += 5u; + has_priority = 1; + } + } + + if (parse_ok && hpack_start < frame_len) { + h2_extract_pseudo_order( + (const unsigned char *)(buf + pos + hpack_start), + frame_len - hpack_start, + pseudo_buf + ); + } + + pos += frame_len; + break; /* HEADERS frame trouvé : parse terminé */ + } + + pos += frame_len; + } + + /* Stocker dans les notes uniquement si une connexion HTTP/2 confirmée */ + if (settings_buf[0] == '\0') return; + + char fp[512]; + snprintf(fp, sizeof(fp), "%s|%s|%d|%s", + settings_buf, wupdate_buf, has_priority, pseudo_buf); + + apr_table_setn(c->notes, H2_NOTE_FINGERPRINT, apr_pstrdup(c->pool, fp)); + apr_table_setn(c->notes, H2_NOTE_SETTINGS, apr_pstrdup(c->pool, settings_buf)); + apr_table_setn(c->notes, H2_NOTE_WUPDATE, apr_pstrdup(c->pool, wupdate_buf)); + apr_table_setn(c->notes, H2_NOTE_PSEUDO_ORDER, apr_pstrdup(c->pool, pseudo_buf)); +} + +/** + * @brief Filtre d'entrée de connexion pour la capture passive du preface HTTP/2. + * + * S'injecte entre le filtre SSL (déchiffrement) et mod_http2 grâce à sa + * priorité AP_FTYPE_CONNECTION et à l'inscription via APR_HOOK_LAST. + * À la première invocation, effectue une lecture spéculative non-destructive + * (AP_MODE_SPECULATIVE) de H2_PEEK_SIZE octets, parse le preface HTTP/2, + * stocke les résultats dans c->notes, puis se retire de la chaîne. + * + * @param f Filtre courant. + * @param bb Brigade cible pour la lecture réelle. + * @param mode Mode de lecture demandé (transmis à f->next). + * @param block Type de blocage (transmis à f->next). + * @param readbytes Nombre d'octets demandés. + * @return Statut APR de la lecture réelle. + */ +static apr_status_t reqin_h2_filter(ap_filter_t *f, apr_bucket_brigade *bb, + ap_input_mode_t mode, apr_read_type_e block, + apr_off_t readbytes) +{ + conn_rec *c = f->c; + + if (!apr_table_get(c->notes, H2_NOTE_PARSED)) { + /* Lecture spéculative : ne consomme pas les données du flux */ + apr_bucket_brigade *peek = apr_brigade_create(c->pool, c->bucket_alloc); + apr_status_t rv = ap_get_brigade(f->next, peek, + AP_MODE_SPECULATIVE, APR_BLOCK_READ, + H2_PEEK_SIZE); + if (rv == APR_SUCCESS) { + char peek_buf[H2_PEEK_SIZE]; + apr_size_t peek_len = sizeof(peek_buf); + if (apr_brigade_flatten(peek, peek_buf, &peek_len) == APR_SUCCESS + && peek_len > 0) { + h2_parse_preface(c, peek_buf, peek_len); + } + } + apr_brigade_cleanup(peek); + apr_table_setn(c->notes, H2_NOTE_PARSED, "1"); + } + + /* Le filtre n'est nécessaire qu'une seule fois par connexion */ + ap_remove_input_filter(f); + + return ap_get_brigade(f->next, bb, mode, block, readbytes); +} + +/** + * @brief Hook pre_connection — enregistre le filtre HTTP/2 sur chaque connexion. + * + * Appelé à l'établissement de chaque connexion. Inscrit reqin_h2_filter dans + * la chaîne d'entrée avec APR_HOOK_LAST, ce qui garantit son positionnement + * après le filtre SSL (qui s'inscrit avec APR_HOOK_MIDDLE) et donc son accès + * au flux HTTP/2 en clair. + * + * @param c Connexion Apache. + * @param csd Socket descriptor (non utilisé). + */ +static void reqin_h2_add_filter(conn_rec *c, void *csd) +{ + (void)csd; + ap_add_input_filter(H2_FILTER_NAME, NULL, NULL, c); +} + /* ====== Hooks Apache ====== */ /** @@ -1262,6 +1627,10 @@ static int reqin_log_post_config(apr_pool_t *pconf, apr_pool_t *plog, apr_pool_t static void reqin_log_register_hooks(apr_pool_t *p) { (void)p; + /* Enregistrement du filtre de connexion HTTP/2 (avant les hooks de requête) */ + ap_register_input_filter(H2_FILTER_NAME, reqin_h2_filter, NULL, AP_FTYPE_CONNECTION); + ap_hook_pre_connection(reqin_h2_add_filter, NULL, NULL, APR_HOOK_LAST); + ap_hook_post_config(reqin_log_post_config, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_post_read_request(reqin_log_post_read_request, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_child_init(reqin_log_child_init, NULL, NULL, APR_HOOK_MIDDLE); diff --git a/services/mod-reqin-log/src/mod_reqin_log.h b/services/mod-reqin-log/src/mod_reqin_log.h index c281b72..6a693c9 100644 --- a/services/mod-reqin-log/src/mod_reqin_log.h +++ b/services/mod-reqin-log/src/mod_reqin_log.h @@ -34,4 +34,16 @@ typedef struct { /* External module declaration */ extern module AP_MODULE_DECLARE_DATA reqin_log_module; +/* ====== Fingerprinting HTTP/2 passif ====== */ + +/* Nom du filtre d'entrée de connexion pour la capture du preface HTTP/2 */ +#define H2_FILTER_NAME "REQIN_H2_PEEK" + +/* Clés des notes de connexion stockant le fingerprint HTTP/2 parsé */ +#define H2_NOTE_FINGERPRINT "reqin_h2_fp" /* Fingerprint Akamai complet */ +#define H2_NOTE_SETTINGS "reqin_h2_set" /* Entrées SETTINGS brutes */ +#define H2_NOTE_WUPDATE "reqin_h2_wu" /* Incrément WINDOW_UPDATE */ +#define H2_NOTE_PSEUDO_ORDER "reqin_h2_ps" /* Ordre pseudo-headers */ +#define H2_NOTE_PARSED "reqin_h2_done" /* Marqueur "déjà parsé" */ + #endif /* MOD_REQIN_LOG_H */ diff --git a/services/mod-reqin-log/tests/unit/test_h2_parsing.c b/services/mod-reqin-log/tests/unit/test_h2_parsing.c new file mode 100644 index 0000000..67c29f2 --- /dev/null +++ b/services/mod-reqin-log/tests/unit/test_h2_parsing.c @@ -0,0 +1,458 @@ +/* + * test_h2_parsing.c — Tests unitaires du fingerprinting HTTP/2 passif. + * + * Les fonctions testées (hpack_int_decode, h2_extract_pseudo_order, + * h2_parse_preface_buf) sont réimplimentées localement pour éviter les + * dépendances Apache/APR. La logique est identique à mod_reqin_log.c. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* ====== Réimplémentation locale des fonctions H2 ====== */ + +static int hpack_int_decode(const unsigned char *buf, size_t len, int prefix, + size_t *pos, unsigned int *out) +{ + unsigned int mask = (1u << prefix) - 1u; + unsigned int b, m; + + if (*pos >= len) return 0; + *out = buf[(*pos)++] & mask; + if (*out < mask) return 1; + + m = 0; + while (*pos < len) { + b = buf[(*pos)++]; + *out += (b & 0x7fu) << m; + m += 7; + if (!(b & 0x80u)) return 1; + if (m > 28) return 0; + } + return 0; +} + +static char h2_hpack_pseudo(unsigned int index) +{ + switch (index) { + case 1: return 'a'; + case 2: case 3: return 'm'; + case 4: case 5: return 'p'; + case 6: case 7: return 's'; + default: return 0; + } +} + +static void h2_extract_pseudo_order(const unsigned char *hpack, size_t len, char *out) +{ + size_t pos = 0; + int out_pos = 0; + int first = 1; + + while (pos < len && out_pos < 7) { + unsigned char byte = hpack[pos]; + + if (byte & 0x80u) { + unsigned int idx = 0; + if (!hpack_int_decode(hpack, len, 7, &pos, &idx)) break; + if (idx == 0) break; + + char c = h2_hpack_pseudo(idx); + if (!c) break; + + if (!first) out[out_pos++] = ','; + out[out_pos++] = c; + first = 0; + + } else if ((byte & 0xe0u) == 0x20u) { + unsigned int sz = 0; + if (!hpack_int_decode(hpack, len, 5, &pos, &sz)) break; + } else { + break; + } + } + out[out_pos] = '\0'; +} + +/* Résultat de h2_parse_preface_buf — version allégée (pas d'APR) */ +typedef struct { + char settings[256]; + char wupdate[16]; + char pseudo[16]; + char fingerprint[512]; + int has_priority; + int is_h2; +} h2_result_t; + +static void h2_parse_preface_buf(const char *buf, size_t len, h2_result_t *res) +{ + static const char H2_MAGIC[] = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"; + const size_t MAGIC_LEN = 24u; + const size_t FRAME_HDR = 9u; + + memset(res, 0, sizeof(*res)); + strcpy(res->wupdate, "0"); + + if (len < MAGIC_LEN || memcmp(buf, H2_MAGIC, MAGIC_LEN) != 0) return; + + int settings_out = 0; + size_t pos = MAGIC_LEN; + + while (pos + FRAME_HDR <= len) { + size_t frame_len = ((unsigned char)buf[pos] << 16) + | ((unsigned char)buf[pos+1] << 8) + | (unsigned char)buf[pos+2]; + unsigned char type = (unsigned char)buf[pos+3]; + unsigned char flags = (unsigned char)buf[pos+4]; + uint32_t stream_id = (((unsigned char)buf[pos+5] & 0x7fu) << 24) + | ((unsigned char)buf[pos+6] << 16) + | ((unsigned char)buf[pos+7] << 8) + | (unsigned char)buf[pos+8]; + + pos += FRAME_HDR; + if (pos + frame_len > len) break; + + if (type == 0x04u && stream_id == 0 && !(flags & 0x01u)) { + size_t sp = 0; + while (sp + 6 <= frame_len && + settings_out < (int)sizeof(res->settings) - 24) { + uint16_t id = ((unsigned char)buf[pos + sp] << 8) + | (unsigned char)buf[pos + sp + 1]; + uint32_t val = ((unsigned char)buf[pos + sp + 2] << 24) + | ((unsigned char)buf[pos + sp + 3] << 16) + | ((unsigned char)buf[pos + sp + 4] << 8) + | (unsigned char)buf[pos + sp + 5]; + sp += 6; + if (settings_out > 0) + res->settings[settings_out++] = ','; + settings_out += snprintf(res->settings + settings_out, + (int)sizeof(res->settings) - settings_out, + "%u:%u", id, val); + } + + } else if (type == 0x08u && stream_id == 0) { + if (frame_len >= 4) { + uint32_t inc = (((unsigned char)buf[pos] & 0x7fu) << 24) + | ((unsigned char)buf[pos+1] << 16) + | ((unsigned char)buf[pos+2] << 8) + | (unsigned char)buf[pos+3]; + snprintf(res->wupdate, sizeof(res->wupdate), "%u", inc); + } + + } else if (type == 0x01u && stream_id > 0) { + size_t hpack_start = 0; + int parse_ok = 1; + + if ((flags & 0x08u) && parse_ok) { + if (hpack_start >= frame_len) { + parse_ok = 0; + } else { + unsigned char pad_len = (unsigned char)buf[pos + hpack_start++]; + if (frame_len < hpack_start + (size_t)pad_len) + parse_ok = 0; + else + frame_len -= (size_t)pad_len; + } + } + + if ((flags & 0x20u) && parse_ok) { + if (hpack_start + 5u > frame_len) { + parse_ok = 0; + } else { + hpack_start += 5u; + res->has_priority = 1; + } + } + + if (parse_ok && hpack_start < frame_len) { + h2_extract_pseudo_order( + (const unsigned char *)(buf + pos + hpack_start), + frame_len - hpack_start, + res->pseudo + ); + } + pos += frame_len; + break; + } + pos += frame_len; + } + + if (res->settings[0] != '\0') { + res->is_h2 = 1; + snprintf(res->fingerprint, sizeof(res->fingerprint), "%s|%s|%d|%s", + res->settings, res->wupdate, res->has_priority, res->pseudo); + } +} + +/* ====== Données de test : preface Chrome 120 ====== */ + +/* + * Preface HTTP/2 Chrome 120 (capturée) : + * Magic (24 octets) + * SETTINGS frame : HEADER_TABLE_SIZE=65536, ENABLE_PUSH=0, + * INITIAL_WINDOW_SIZE=6291456, MAX_HEADER_LIST_SIZE=262144 + * WINDOW_UPDATE : incrément 15663105 + * HEADERS stream 1 : :method GET, :authority, :scheme https, :path / + * → ordre HPACK indexé : 0x82(GET), 0x81(:auth), 0x87(https), 0x84(/) + */ +static const unsigned char CHROME_PREFACE[] = { + /* Magic */ + 'P','R','I',' ','*',' ','H','T','T','P','/','2','.','0','\r','\n', + '\r','\n','S','M','\r','\n','\r','\n', + /* SETTINGS frame : length=24, type=0x04, flags=0x00, stream=0 */ + 0x00, 0x00, 0x18, /* length = 24 = 4×6 */ + 0x04, /* type SETTINGS */ + 0x00, /* flags = 0 */ + 0x00, 0x00, 0x00, 0x00, /* stream 0 */ + /* Entry 1: HEADER_TABLE_SIZE (1) = 65536 = 0x00010000 */ + 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, + /* Entry 2: ENABLE_PUSH (2) = 0 */ + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, + /* Entry 3: INITIAL_WINDOW_SIZE (4) = 6291456 = 0x00600000 */ + 0x00, 0x04, 0x00, 0x60, 0x00, 0x00, + /* Entry 4: MAX_HEADER_LIST_SIZE (6) = 262144 = 0x00040000 */ + 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, + /* WINDOW_UPDATE frame : length=4, type=0x08, flags=0, stream=0 */ + 0x00, 0x00, 0x04, + 0x08, + 0x00, + 0x00, 0x00, 0x00, 0x00, + /* increment = 15663105 = 0x00EF0001 */ + 0x00, 0xEF, 0x00, 0x01, + /* HEADERS frame : length=14, type=0x01, flags=0x05 (END_STREAM|END_HEADERS), stream=1 */ + 0x00, 0x00, 0x0E, + 0x01, + 0x05, + 0x00, 0x00, 0x00, 0x01, + /* HPACK : :method GET (0x82), :authority (0x81), :scheme https (0x87), :path / (0x84) */ + /* → ordre Chrome : m,a,s,p */ + 0x82, 0x81, 0x87, 0x84, + /* + quelques headers supplémentaires (indices statiques) */ + 0x86, /* :scheme http (index 6, régulier → stop après pseudo) */ + 0x53, /* accept (sans valeur — littéral, arrête le scan) */ + 0x00, 0x05, 0x74, 0x65, 0x78, 0x74, 0x2F, 0x68, 0x74, 0x6D, 0x6C +}; + +/* ====== Données de test : preface Firefox 120 ====== */ + +/* + * Preface HTTP/2 Firefox 120 : + * SETTINGS: HEADER_TABLE_SIZE=65536, INITIAL_WINDOW_SIZE=131072, MAX_FRAME_SIZE=16384 + * WINDOW_UPDATE: 12517377 + * HEADERS: :method GET (0x82), :path / (0x84), :scheme https (0x87), :authority (0x81) + * → ordre Firefox : m,p,s,a + */ +static const unsigned char FIREFOX_PREFACE[] = { + /* Magic */ + 'P','R','I',' ','*',' ','H','T','T','P','/','2','.','0','\r','\n', + '\r','\n','S','M','\r','\n','\r','\n', + /* SETTINGS frame : length=18, type=0x04, flags=0x00, stream=0 */ + 0x00, 0x00, 0x12, + 0x04, + 0x00, + 0x00, 0x00, 0x00, 0x00, + /* HEADER_TABLE_SIZE (1) = 65536 */ + 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, + /* INITIAL_WINDOW_SIZE (4) = 131072 = 0x00020000 */ + 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, + /* MAX_FRAME_SIZE (5) = 16384 = 0x00004000 */ + 0x00, 0x05, 0x00, 0x00, 0x40, 0x00, + /* WINDOW_UPDATE : increment = 12517377 = 0x00BF0001 */ + 0x00, 0x00, 0x04, + 0x08, + 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0xBF, 0x00, 0x01, + /* HEADERS frame : length=4, type=0x01, flags=0x05, stream=1 */ + 0x00, 0x00, 0x04, + 0x01, + 0x05, + 0x00, 0x00, 0x00, 0x01, + /* HPACK : :method GET (0x82), :path / (0x84), :scheme https (0x87), :authority (0x81) */ + /* → ordre Firefox : m,p,s,a */ + 0x82, 0x84, 0x87, 0x81 +}; + +/* ====== Données de test : flux HTTP/1.1 (ne doit pas matcher) ====== */ +static const char HTTP1_DATA[] = + "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"; + +/* ====== Tests ====== */ + +static void test_chrome_settings_parsed(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res); + + assert_int_equal(res.is_h2, 1); + /* SETTINGS attendus : 1:65536,2:0,4:6291456,6:262144 */ + assert_string_equal(res.settings, "1:65536,2:0,4:6291456,6:262144"); +} + +static void test_chrome_window_update(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res); + + assert_string_equal(res.wupdate, "15663105"); +} + +static void test_chrome_pseudo_order(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res); + + /* Chrome : :method(m), :authority(a), :scheme(s), :path(p) */ + assert_string_equal(res.pseudo, "m,a,s,p"); +} + +static void test_chrome_fingerprint_akamai(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)CHROME_PREFACE, sizeof(CHROME_PREFACE), &res); + + assert_string_equal(res.fingerprint, + "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p"); +} + +static void test_firefox_settings_parsed(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)FIREFOX_PREFACE, sizeof(FIREFOX_PREFACE), &res); + + assert_int_equal(res.is_h2, 1); + assert_string_equal(res.settings, "1:65536,4:131072,5:16384"); +} + +static void test_firefox_pseudo_order(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)FIREFOX_PREFACE, sizeof(FIREFOX_PREFACE), &res); + + /* Firefox : :method(m), :path(p), :scheme(s), :authority(a) */ + assert_string_equal(res.pseudo, "m,p,s,a"); +} + +static void test_firefox_fingerprint_akamai(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf((const char *)FIREFOX_PREFACE, sizeof(FIREFOX_PREFACE), &res); + + assert_string_equal(res.fingerprint, + "1:65536,4:131072,5:16384|12517377|0|m,p,s,a"); +} + +static void test_http1_not_detected(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf(HTTP1_DATA, strlen(HTTP1_DATA), &res); + + assert_int_equal(res.is_h2, 0); + assert_string_equal(res.settings, ""); + assert_string_equal(res.fingerprint, ""); +} + +static void test_empty_buffer_not_detected(void **state) +{ + (void)state; + h2_result_t res; + h2_parse_preface_buf("", 0, &res); + + assert_int_equal(res.is_h2, 0); +} + +static void test_truncated_preface_no_crash(void **state) +{ + (void)state; + h2_result_t res; + /* Magic complet mais frame tronquée */ + h2_parse_preface_buf((const char *)CHROME_PREFACE, 30, &res); + + assert_int_equal(res.is_h2, 0); /* SETTINGS incomplet → pas de fingerprint */ +} + +static void test_hpack_int_single_byte(void **state) +{ + (void)state; + /* Entier 7-bit < 127 → encodé sur 1 octet */ + unsigned char buf[] = { 0x82 }; /* 0x80 | 2 → index=2 */ + size_t pos = 0; + unsigned int out = 0; + int ok = hpack_int_decode(buf, 1, 7, &pos, &out); + + assert_int_equal(ok, 1); + assert_int_equal(out, 2); + assert_int_equal(pos, 1); +} + +static void test_hpack_pseudo_table(void **state) +{ + (void)state; + assert_int_equal(h2_hpack_pseudo(1), 'a'); + assert_int_equal(h2_hpack_pseudo(2), 'm'); + assert_int_equal(h2_hpack_pseudo(3), 'm'); + assert_int_equal(h2_hpack_pseudo(4), 'p'); + assert_int_equal(h2_hpack_pseudo(5), 'p'); + assert_int_equal(h2_hpack_pseudo(6), 's'); + assert_int_equal(h2_hpack_pseudo(7), 's'); + assert_int_equal(h2_hpack_pseudo(8), 0); /* header régulier */ + assert_int_equal(h2_hpack_pseudo(62), 0); +} + +static void test_pseudo_order_extraction_direct(void **state) +{ + (void)state; + /* HPACK block : :method(0x82), :path(0x84), :scheme(0x87), :authority(0x81) */ + unsigned char hpack[] = { 0x82, 0x84, 0x87, 0x81 }; + char out[16]; + h2_extract_pseudo_order(hpack, sizeof(hpack), out); + + assert_string_equal(out, "m,p,s,a"); +} + +static void test_pseudo_order_stops_at_regular_header(void **state) +{ + (void)state; + /* :method(0x82), puis header régulier (0x88 = index 8) */ + unsigned char hpack[] = { 0x82, 0x88 }; + char out[16]; + h2_extract_pseudo_order(hpack, sizeof(hpack), out); + + assert_string_equal(out, "m"); +} + +/* ====== main ====== */ + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_chrome_settings_parsed), + cmocka_unit_test(test_chrome_window_update), + cmocka_unit_test(test_chrome_pseudo_order), + cmocka_unit_test(test_chrome_fingerprint_akamai), + cmocka_unit_test(test_firefox_settings_parsed), + cmocka_unit_test(test_firefox_pseudo_order), + cmocka_unit_test(test_firefox_fingerprint_akamai), + cmocka_unit_test(test_http1_not_detected), + cmocka_unit_test(test_empty_buffer_not_detected), + cmocka_unit_test(test_truncated_preface_no_crash), + cmocka_unit_test(test_hpack_int_single_byte), + cmocka_unit_test(test_hpack_pseudo_table), + cmocka_unit_test(test_pseudo_order_extraction_direct), + cmocka_unit_test(test_pseudo_order_stops_at_regular_header), + }; + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/shared/clickhouse/04_mv_http_logs.sql b/shared/clickhouse/04_mv_http_logs.sql index 47eabfb..6342a72 100644 --- a/shared/clickhouse/04_mv_http_logs.sql +++ b/shared/clickhouse/04_mv_http_logs.sql @@ -89,6 +89,12 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs `anubis_bot_action` LowCardinality(String) DEFAULT '', `anubis_bot_category` LowCardinality(String) DEFAULT '', + -- Fingerprint HTTP/2 passif (mod_reqin_log connection filter) + `h2_fingerprint` String CODEC(ZSTD(3)) DEFAULT '', + `h2_settings_fp` String CODEC(ZSTD(3)) DEFAULT '', + `h2_window_update` UInt32 DEFAULT 0, + `h2_pseudo_order` LowCardinality(String) DEFAULT '', + -- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent -- les granules qui ne contiennent pas cette IP (~90% des granules en pratique). -- Taux de faux positifs 1% (0.01) : bon compromis taille / efficacité. @@ -192,6 +198,12 @@ SELECT nullIf(dictGetOrDefault('ja4_processing.dict_anubis_ip', 'category', _ip, ''), ''), nullIf(dictGetOrDefault('ja4_processing.dict_anubis_asn', 'category', _asn, ''), ''), '' - ) AS anubis_bot_category + ) AS anubis_bot_category, + + -- Fingerprint HTTP/2 passif : champs émis par mod_reqin_log si HTTP/2 détecté + coalesce(JSONExtractString(raw_json, 'h2_fingerprint'), '') AS h2_fingerprint, + coalesce(JSONExtractString(raw_json, 'h2_settings_fp'), '') AS h2_settings_fp, + toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update, + coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order FROM ja4_logs.http_logs_raw;