feat: HTTP/2 passive fingerprinting with individual SETTINGS fields

Complete implementation of HTTP/2 passive fingerprinting per thesis §2.5.3:

mod-reqin-log (C module):
- Replace connection-level filter with ap_hook_process_connection (APR_HOOK_FIRST)
  to capture H2 preface before mod_http2 takes over the connection
- AP_MODE_SPECULATIVE read of 512 bytes from c->input_filters
- Parse SETTINGS, WINDOW_UPDATE, PRIORITY flags, pseudo-header order
- Output individual SETTINGS params as separate JSON fields (IDs 1-6, 8)
- Read H2 notes from c1 (master connection) for mod_http2 secondary conns
- Fix header_order_signature JSON length bug (26→strlen)

ClickHouse schema:
- Add 8 new columns to http_logs: h2_has_priority, h2_header_table_size,
  h2_enable_push, h2_max_concurrent_streams, h2_initial_window_size,
  h2_max_frame_size, h2_max_header_list_size, h2_enable_connect_protocol
- Use Int32/Int64 with DEFAULT -1 to distinguish absent vs zero
- Update mv_http_logs to extract individual fields via JSONHas/JSONExtractInt
- Migration 04_http2_fields.sql updated for existing deployments

Correlator:
- Accept both timestamp_ns and timestamp field names (backward compat)

Integration:
- Enable HTTP/2 in Apache: Protocols h2 http/1.1 in httpd-integration.conf

Validated end-to-end via Playwright: H2 curl traffic → mod-reqin-log →
correlator → ClickHouse with all 12 H2 columns populated correctly.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-11 02:33:45 +02:00
parent bd81331411
commit 85d3b95b7b
25 changed files with 649 additions and 160 deletions

View File

@ -46,6 +46,15 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
`b_timestamp` UInt64,
`conn_id` String CODEC(ZSTD(3)),
-- Response metadata (captured at log_transaction phase)
`status_code` UInt16 DEFAULT 0,
`response_size` UInt64 DEFAULT 0,
`duration_ms` UInt64 DEFAULT 0,
-- Header fingerprinting
`headers_raw` String DEFAULT '' CODEC(ZSTD(3)),
`header_order_signature` String DEFAULT '' CODEC(ZSTD(3)),
-- IP metadata
`ip_meta_df` UInt8,
`ip_meta_id` UInt16,
@ -94,6 +103,17 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
`h2_settings_fp` String DEFAULT '' CODEC(ZSTD(3)),
`h2_window_update` UInt32 DEFAULT 0,
`h2_pseudo_order` LowCardinality(String) DEFAULT '',
`h2_has_priority` UInt8 DEFAULT 0,
-- Paramètres SETTINGS HTTP/2 individuels (RFC 9113 §6.5.2)
-- -1 = absent du preface client (le client n'a pas envoyé ce paramètre)
`h2_header_table_size` Int32 DEFAULT -1,
`h2_enable_push` Int32 DEFAULT -1,
`h2_max_concurrent_streams` Int32 DEFAULT -1,
`h2_initial_window_size` Int64 DEFAULT -1,
`h2_max_frame_size` Int32 DEFAULT -1,
`h2_max_header_list_size` Int32 DEFAULT -1,
`h2_enable_connect_protocol` Int32 DEFAULT -1,
-- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent
-- les granules qui ne contiennent pas cette IP (~90% des granules en pratique).
@ -104,7 +124,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
ENGINE = MergeTree
PARTITION BY log_date
ORDER BY (time, src_ip, dst_ip, ja4)
TTL log_date + INTERVAL 7 DAY
TTL log_date + INTERVAL 30 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
@ -142,14 +162,19 @@ SELECT
coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme,
coalesce(JSONExtractString(raw_json, 'host'), '') AS host,
coalesce(JSONExtractString(raw_json, 'path'), '') AS path,
coalesce(JSONExtractString(raw_json, 'query'), '') AS query,
coalesce(JSONExtractString(raw_json, 'query_string'), JSONExtractString(raw_json, 'query'), '') AS query,
coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version,
coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side,
toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated,
toUInt8(coalesce(JSONExtractUInt(raw_json, 'correlated'), 0)) AS correlated,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives,
coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp,
coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp,
coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'status_code'), 0)) AS status_code,
coalesce(JSONExtractUInt(raw_json, 'response_size'), 0) AS response_size,
coalesce(JSONExtractUInt(raw_json, 'duration_ms'), 0) AS duration_ms,
coalesce(JSONExtractString(raw_json, 'headers_raw'), '') AS headers_raw,
coalesce(JSONExtractString(raw_json, 'header_order_signature'), '') AS header_order_signature,
toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0)) AS ip_meta_id,
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0)) AS ip_meta_total_length,
@ -204,6 +229,16 @@ SELECT
coalesce(JSONExtractString(raw_json, 'h2_fingerprint'), '') AS h2_fingerprint,
coalesce(JSONExtractString(raw_json, 'h2_settings_fp'), '') AS h2_settings_fp,
toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update,
coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order
coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order,
toUInt8(coalesce(JSONExtractUInt(raw_json, 'h2_has_priority'), 0)) AS h2_has_priority,
-- Paramètres SETTINGS HTTP/2 individuels (-1 = absent du preface client)
toInt32(if(JSONHas(raw_json, 'h2_header_table_size'), JSONExtractInt(raw_json, 'h2_header_table_size'), -1)) AS h2_header_table_size,
toInt32(if(JSONHas(raw_json, 'h2_enable_push'), JSONExtractInt(raw_json, 'h2_enable_push'), -1)) AS h2_enable_push,
toInt32(if(JSONHas(raw_json, 'h2_max_concurrent_streams'), JSONExtractInt(raw_json, 'h2_max_concurrent_streams'), -1)) AS h2_max_concurrent_streams,
toInt64(if(JSONHas(raw_json, 'h2_initial_window_size'), JSONExtractInt(raw_json, 'h2_initial_window_size'), -1)) AS h2_initial_window_size,
toInt32(if(JSONHas(raw_json, 'h2_max_frame_size'), JSONExtractInt(raw_json, 'h2_max_frame_size'), -1)) AS h2_max_frame_size,
toInt32(if(JSONHas(raw_json, 'h2_max_header_list_size'), JSONExtractInt(raw_json, 'h2_max_header_list_size'), -1)) AS h2_max_header_list_size,
toInt32(if(JSONHas(raw_json, 'h2_enable_connect_protocol'), JSONExtractInt(raw_json, 'h2_enable_connect_protocol'), -1)) AS h2_enable_connect_protocol
FROM ja4_logs.http_logs_raw;