feat: HTTP/2 passive fingerprinting with individual SETTINGS fields
Complete implementation of HTTP/2 passive fingerprinting per thesis §2.5.3: mod-reqin-log (C module): - Replace connection-level filter with ap_hook_process_connection (APR_HOOK_FIRST) to capture H2 preface before mod_http2 takes over the connection - AP_MODE_SPECULATIVE read of 512 bytes from c->input_filters - Parse SETTINGS, WINDOW_UPDATE, PRIORITY flags, pseudo-header order - Output individual SETTINGS params as separate JSON fields (IDs 1-6, 8) - Read H2 notes from c1 (master connection) for mod_http2 secondary conns - Fix header_order_signature JSON length bug (26→strlen) ClickHouse schema: - Add 8 new columns to http_logs: h2_has_priority, h2_header_table_size, h2_enable_push, h2_max_concurrent_streams, h2_initial_window_size, h2_max_frame_size, h2_max_header_list_size, h2_enable_connect_protocol - Use Int32/Int64 with DEFAULT -1 to distinguish absent vs zero - Update mv_http_logs to extract individual fields via JSONHas/JSONExtractInt - Migration 04_http2_fields.sql updated for existing deployments Correlator: - Accept both timestamp_ns and timestamp field names (backward compat) Integration: - Enable HTTP/2 in Apache: Protocols h2 http/1.1 in httpd-integration.conf Validated end-to-end via Playwright: H2 curl traffic → mod-reqin-log → correlator → ClickHouse with all 12 H2 columns populated correctly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -10,7 +10,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs_raw
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toDate(ingest_time)
|
||||
ORDER BY ingest_time
|
||||
TTL ingest_time + INTERVAL 1 DAY
|
||||
TTL ingest_time + INTERVAL 2 HOUR
|
||||
SETTINGS
|
||||
index_granularity = 8192,
|
||||
ttl_only_drop_parts = 1;
|
||||
|
||||
@ -46,6 +46,15 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
|
||||
`b_timestamp` UInt64,
|
||||
`conn_id` String CODEC(ZSTD(3)),
|
||||
|
||||
-- Response metadata (captured at log_transaction phase)
|
||||
`status_code` UInt16 DEFAULT 0,
|
||||
`response_size` UInt64 DEFAULT 0,
|
||||
`duration_ms` UInt64 DEFAULT 0,
|
||||
|
||||
-- Header fingerprinting
|
||||
`headers_raw` String DEFAULT '' CODEC(ZSTD(3)),
|
||||
`header_order_signature` String DEFAULT '' CODEC(ZSTD(3)),
|
||||
|
||||
-- IP metadata
|
||||
`ip_meta_df` UInt8,
|
||||
`ip_meta_id` UInt16,
|
||||
@ -94,6 +103,17 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
|
||||
`h2_settings_fp` String DEFAULT '' CODEC(ZSTD(3)),
|
||||
`h2_window_update` UInt32 DEFAULT 0,
|
||||
`h2_pseudo_order` LowCardinality(String) DEFAULT '',
|
||||
`h2_has_priority` UInt8 DEFAULT 0,
|
||||
|
||||
-- Paramètres SETTINGS HTTP/2 individuels (RFC 9113 §6.5.2)
|
||||
-- -1 = absent du preface client (le client n'a pas envoyé ce paramètre)
|
||||
`h2_header_table_size` Int32 DEFAULT -1,
|
||||
`h2_enable_push` Int32 DEFAULT -1,
|
||||
`h2_max_concurrent_streams` Int32 DEFAULT -1,
|
||||
`h2_initial_window_size` Int64 DEFAULT -1,
|
||||
`h2_max_frame_size` Int32 DEFAULT -1,
|
||||
`h2_max_header_list_size` Int32 DEFAULT -1,
|
||||
`h2_enable_connect_protocol` Int32 DEFAULT -1,
|
||||
|
||||
-- Index bloom_filter sur src_ip : les requêtes WHERE src_ip = X sautent
|
||||
-- les granules qui ne contiennent pas cette IP (~90% des granules en pratique).
|
||||
@ -104,7 +124,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY log_date
|
||||
ORDER BY (time, src_ip, dst_ip, ja4)
|
||||
TTL log_date + INTERVAL 7 DAY
|
||||
TTL log_date + INTERVAL 30 DAY
|
||||
SETTINGS
|
||||
index_granularity = 8192,
|
||||
ttl_only_drop_parts = 1;
|
||||
@ -142,14 +162,19 @@ SELECT
|
||||
coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme,
|
||||
coalesce(JSONExtractString(raw_json, 'host'), '') AS host,
|
||||
coalesce(JSONExtractString(raw_json, 'path'), '') AS path,
|
||||
coalesce(JSONExtractString(raw_json, 'query'), '') AS query,
|
||||
coalesce(JSONExtractString(raw_json, 'query_string'), JSONExtractString(raw_json, 'query'), '') AS query,
|
||||
coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version,
|
||||
coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side,
|
||||
toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated,
|
||||
toUInt8(coalesce(JSONExtractUInt(raw_json, 'correlated'), 0)) AS correlated,
|
||||
toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives,
|
||||
coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp,
|
||||
coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp,
|
||||
coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id,
|
||||
toUInt16(coalesce(JSONExtractUInt(raw_json, 'status_code'), 0)) AS status_code,
|
||||
coalesce(JSONExtractUInt(raw_json, 'response_size'), 0) AS response_size,
|
||||
coalesce(JSONExtractUInt(raw_json, 'duration_ms'), 0) AS duration_ms,
|
||||
coalesce(JSONExtractString(raw_json, 'headers_raw'), '') AS headers_raw,
|
||||
coalesce(JSONExtractString(raw_json, 'header_order_signature'), '') AS header_order_signature,
|
||||
toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df,
|
||||
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0)) AS ip_meta_id,
|
||||
toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0)) AS ip_meta_total_length,
|
||||
@ -204,6 +229,16 @@ SELECT
|
||||
coalesce(JSONExtractString(raw_json, 'h2_fingerprint'), '') AS h2_fingerprint,
|
||||
coalesce(JSONExtractString(raw_json, 'h2_settings_fp'), '') AS h2_settings_fp,
|
||||
toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update,
|
||||
coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order
|
||||
coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order,
|
||||
toUInt8(coalesce(JSONExtractUInt(raw_json, 'h2_has_priority'), 0)) AS h2_has_priority,
|
||||
|
||||
-- Paramètres SETTINGS HTTP/2 individuels (-1 = absent du preface client)
|
||||
toInt32(if(JSONHas(raw_json, 'h2_header_table_size'), JSONExtractInt(raw_json, 'h2_header_table_size'), -1)) AS h2_header_table_size,
|
||||
toInt32(if(JSONHas(raw_json, 'h2_enable_push'), JSONExtractInt(raw_json, 'h2_enable_push'), -1)) AS h2_enable_push,
|
||||
toInt32(if(JSONHas(raw_json, 'h2_max_concurrent_streams'), JSONExtractInt(raw_json, 'h2_max_concurrent_streams'), -1)) AS h2_max_concurrent_streams,
|
||||
toInt64(if(JSONHas(raw_json, 'h2_initial_window_size'), JSONExtractInt(raw_json, 'h2_initial_window_size'), -1)) AS h2_initial_window_size,
|
||||
toInt32(if(JSONHas(raw_json, 'h2_max_frame_size'), JSONExtractInt(raw_json, 'h2_max_frame_size'), -1)) AS h2_max_frame_size,
|
||||
toInt32(if(JSONHas(raw_json, 'h2_max_header_list_size'), JSONExtractInt(raw_json, 'h2_max_header_list_size'), -1)) AS h2_max_header_list_size,
|
||||
toInt32(if(JSONHas(raw_json, 'h2_enable_connect_protocol'), JSONExtractInt(raw_json, 'h2_enable_connect_protocol'), -1)) AS h2_enable_connect_protocol
|
||||
|
||||
FROM ja4_logs.http_logs_raw;
|
||||
|
||||
@ -144,7 +144,10 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_host_ip_ja4_1h
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
ORDER BY (window_start, src_ip, ja4, host)
|
||||
SETTINGS deduplicate_merge_projection_mode = 'drop';
|
||||
TTL window_start + INTERVAL 7 DAY
|
||||
SETTINGS
|
||||
deduplicate_merge_projection_mode = 'drop',
|
||||
ttl_only_drop_parts = 1;
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
@ -177,7 +180,15 @@ SELECT
|
||||
sum(IF(match(src.path, '(?i)\.(png|jpg|jpeg|gif|css|js|ico|woff2|svg|eot)$'), 1, 0)) AS count_assets,
|
||||
sum(IF(position(src.client_headers, 'Referer') = 0, 1, 0)) AS count_no_referer,
|
||||
uniqState(src.header_user_agent) AS uniq_ua,
|
||||
0 AS max_requests_per_sec, -- TODO(P0): calculer via sous-requête par seconde (impossible dans un seul GROUP BY)
|
||||
toUInt32(if(count() > 0,
|
||||
arrayMax(
|
||||
arrayMap(
|
||||
s -> toUInt64(countEqual(groupArray(toStartOfSecond(src.time)), s)),
|
||||
arrayDistinct(groupArray(toStartOfSecond(src.time)))
|
||||
)
|
||||
),
|
||||
0
|
||||
)) AS max_requests_per_sec,
|
||||
varPopState(toFloat64(length(replaceAll(src.path, '/', '//')) - length(src.path))) AS url_depth_variance,
|
||||
sum(IF(src.ip_meta_total_length < 60 OR src.ip_meta_total_length > 1500, 1, 0)) AS count_anomalous_payload,
|
||||
uniqState(src.ja3) AS uniq_ja3,
|
||||
@ -224,7 +235,9 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_header_fingerprint_1h
|
||||
sec_fetch_dest SimpleAggregateFunction(any, String)
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
ORDER BY (window_start, src_ip);
|
||||
ORDER BY (window_start, src_ip)
|
||||
TTL window_start + INTERVAL 7 DAY
|
||||
SETTINGS ttl_only_drop_parts = 1;
|
||||
|
||||
DROP VIEW IF EXISTS ja4_processing.mv_agg_header_fingerprint_1h;
|
||||
|
||||
@ -249,3 +262,36 @@ SELECT
|
||||
any(src.header_sec_fetch_dest) AS sec_fetch_dest
|
||||
FROM ja4_logs.http_logs AS src
|
||||
GROUP BY window_start, src.src_ip;
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- unknown_h2_fingerprints — file d'examen pour signatures H2 inconnues (§3.9.5)
|
||||
--
|
||||
-- Sessions dont le fingerprint H2 ne correspond à aucune famille connue
|
||||
-- (browser_match_max < 0.45) mais qui présentent un comportement navigateur
|
||||
-- (browser_confidence ≥ 0.55, Sec-Fetch-* présent, TLS 1.3).
|
||||
-- Utilisée pour enrichir progressivement browser_signatures.
|
||||
-- -----------------------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS ja4_processing.unknown_h2_fingerprints
|
||||
(
|
||||
observed_at DateTime DEFAULT now(),
|
||||
src_ip IPv6,
|
||||
ja4 String CODEC(ZSTD(3)),
|
||||
h2_fingerprint String CODEC(ZSTD(3)),
|
||||
h2_settings_fp String CODEC(ZSTD(3)),
|
||||
h2_window_update UInt32,
|
||||
h2_pseudo_order LowCardinality(String),
|
||||
h2_has_priority UInt8,
|
||||
browser_confidence_score Float32,
|
||||
header_user_agent String CODEC(ZSTD(3)),
|
||||
tls_version LowCardinality(String),
|
||||
hit_count UInt64 DEFAULT 1,
|
||||
|
||||
INDEX idx_observed_at observed_at TYPE minmax GRANULARITY 4
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(observed_at)
|
||||
ORDER BY (h2_fingerprint, ja4, src_ip)
|
||||
TTL observed_at + INTERVAL 30 DAY
|
||||
SETTINGS
|
||||
index_granularity = 8192,
|
||||
ttl_only_drop_parts = 1;
|
||||
|
||||
@ -73,7 +73,7 @@ SETTINGS
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- ml_all_scores — all classifications (no threshold, for observability)
|
||||
--
|
||||
-- PARTITION BY date : TTL de 3 jours → les partitions expirées sont supprimées
|
||||
-- PARTITION BY date : TTL de 7 jours → les partitions expirées sont supprimées
|
||||
-- entièrement sans avoir à lire chaque granule (ttl_only_drop_parts).
|
||||
-- INDEX idx_detected_at : idem ml_detected_anomalies.
|
||||
-- -----------------------------------------------------------------------------
|
||||
@ -115,7 +115,7 @@ CREATE TABLE IF NOT EXISTS ja4_processing.ml_all_scores
|
||||
ENGINE = ReplacingMergeTree(detected_at)
|
||||
PARTITION BY toYYYYMMDD(window_start)
|
||||
ORDER BY (window_start, src_ip, ja4, host, model_name)
|
||||
TTL window_start + INTERVAL 3 DAY
|
||||
TTL window_start + INTERVAL 7 DAY
|
||||
SETTINGS
|
||||
index_granularity = 8192,
|
||||
ttl_only_drop_parts = 1;
|
||||
|
||||
Reference in New Issue
Block a user