refactor: suppression dépendance User-Agent de la détection navigateur
Changements SQL : - modern_browser_score : sec-ch-ua→100, Sec-Fetch→70 (plus de UA fallback) - Ajout has_sec_ch_ua (UInt8) dans agg_header_fingerprint_1h et ml_all_scores - mss_mobile_mismatch utilise has_sec_ch_ua au lieu de modern_browser_score - header_order_confidence : PARTITION BY ja4 au lieu de first_ua - sec_ch_mobile_mismatch : comparaison Client Hints interne (sans UA) - Migration 03_remove_ua_browser_detection.sql Changements Python : - browser.py Axe 3 : Client Hints + Sec-Fetch + is_fake_navigation (PAS de UA) - Pondération axes : ja4_known 0.30, tls_coherence 0.20 (signaux TLS renforcés) - preprocessing.py : has_sec_ch_ua ajouté aux features et binary_features Fichiers modifiés : 8 SQL/Python + 1 migration, 36/36 tests passent. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -202,6 +202,7 @@ CREATE TABLE IF NOT EXISTS ja4_processing.agg_header_fingerprint_1h
|
||||
has_cookie SimpleAggregateFunction(max, UInt8),
|
||||
has_referer SimpleAggregateFunction(max, UInt8),
|
||||
modern_browser_score SimpleAggregateFunction(max, UInt8),
|
||||
has_sec_ch_ua SimpleAggregateFunction(max, UInt8),
|
||||
ua_ch_mismatch SimpleAggregateFunction(max, UInt8),
|
||||
sec_ch_mobile_mismatch SimpleAggregateFunction(max, UInt8),
|
||||
sec_fetch_mode SimpleAggregateFunction(any, String),
|
||||
@ -222,11 +223,12 @@ SELECT
|
||||
max(toUInt8(if(position(src.client_headers, 'Accept-Language') > 0, 1, 0))) AS has_accept_language,
|
||||
max(toUInt8(if(position(src.client_headers, 'Cookie') > 0, 1, 0))) AS has_cookie,
|
||||
max(toUInt8(if(position(src.client_headers, 'Referer') > 0, 1, 0))) AS has_referer,
|
||||
max(toUInt8(if(length(src.header_sec_ch_ua) > 0, 100, if(length(src.header_user_agent) > 0, 50, 0)))) AS modern_browser_score,
|
||||
max(toUInt8(if(length(src.header_sec_ch_ua) > 0, 100, if(length(src.header_sec_fetch_site) > 0, 70, 0)))) AS modern_browser_score,
|
||||
max(toUInt8(if(length(src.header_sec_ch_ua) > 0, 1, 0))) AS has_sec_ch_ua,
|
||||
max(toUInt8(if((position(src.header_user_agent, 'Windows') > 0 AND position(src.header_sec_ch_ua_platform, 'Windows') == 0) OR (position(src.header_user_agent, 'iPhone') > 0 AND position(src.header_sec_ch_ua_platform, 'iOS') == 0), 1, 0))) AS ua_ch_mismatch,
|
||||
max(toUInt8(if(
|
||||
(src.header_sec_ch_ua_mobile = '?1' AND position(src.header_user_agent, 'Mobile') == 0 AND position(src.header_user_agent, 'Android') == 0 AND position(src.header_user_agent, 'iPhone') == 0)
|
||||
OR (src.header_sec_ch_ua_mobile = '?0' AND (position(src.header_user_agent, 'iPhone') > 0 OR position(src.header_user_agent, 'Android') > 0)),
|
||||
(src.header_sec_ch_ua_mobile = '?1' AND position(src.header_sec_ch_ua_platform, 'Windows') > 0)
|
||||
OR (src.header_sec_ch_ua_mobile = '?0' AND position(src.header_sec_ch_ua_platform, 'Android') > 0),
|
||||
1, 0))) AS sec_ch_mobile_mismatch,
|
||||
any(src.header_sec_fetch_mode) AS sec_fetch_mode,
|
||||
any(src.header_sec_fetch_dest) AS sec_fetch_dest
|
||||
|
||||
@ -33,7 +33,8 @@ CREATE TABLE IF NOT EXISTS ja4_processing.ml_detected_anomalies
|
||||
tcp_jitter_variance Float32, tcp_shared_count UInt32, true_window_size UInt64,
|
||||
window_mss_ratio Float32, alpn_http_mismatch UInt8, is_alpn_missing UInt8,
|
||||
sni_host_mismatch UInt8, header_count UInt16, has_accept_language UInt8,
|
||||
has_cookie UInt8, has_referer UInt8, modern_browser_score UInt8, is_headless UInt8,
|
||||
has_cookie UInt8, has_referer UInt8, modern_browser_score UInt8, has_sec_ch_ua UInt8,
|
||||
is_headless UInt8,
|
||||
ua_ch_mismatch UInt8, header_order_shared_count UInt32, ip_id_zero_ratio Float32,
|
||||
request_size_variance Float32, multiplexing_efficiency Float32,
|
||||
mss_mobile_mismatch UInt8, correlated UInt8, reason String,
|
||||
|
||||
@ -44,6 +44,7 @@ WITH base_data AS (
|
||||
h.header_order_hash AS header_order_hash, h.header_count AS header_count,
|
||||
h.has_accept_language AS has_accept_language, h.has_cookie AS has_cookie,
|
||||
h.has_referer AS has_referer, h.modern_browser_score AS modern_browser_score,
|
||||
h.has_sec_ch_ua AS has_sec_ch_ua,
|
||||
h.ua_ch_mismatch AS ua_ch_mismatch,
|
||||
(a.count_post / (a.hits + 1)) AS post_ratio,
|
||||
(a.uniq_query_params / (a.uniq_paths + 1)) AS fuzzing_index,
|
||||
@ -52,7 +53,7 @@ WITH base_data AS (
|
||||
(a.orphan_count / (a.hits + 1)) AS orphan_ratio,
|
||||
(a.ip_id_zero_count / (a.hits + 1)) AS ip_id_zero_ratio,
|
||||
(a.hits / (a.unique_conn_id + 1)) AS multiplexing_efficiency,
|
||||
IF(a.mss_1460_count > (a.hits * 0.8) AND h.modern_browser_score > 70, 1, 0) AS mss_mobile_mismatch,
|
||||
IF(a.mss_1460_count > (a.hits * 0.8) AND h.has_sec_ch_ua > 0, 1, 0) AS mss_mobile_mismatch,
|
||||
a.request_size_variance AS request_size_variance,
|
||||
IF(a.tls_alpn = 'h2' AND a.http_version != '2', 1, 0) AS alpn_http_mismatch,
|
||||
IF(length(a.tls_alpn) = 0 OR a.tls_alpn = '00', 1, 0) AS is_alpn_missing,
|
||||
@ -68,7 +69,7 @@ WITH base_data AS (
|
||||
(sum(a.hits) OVER (PARTITION BY a.ja4, a.src_asn) / (sum(a.hits) OVER (PARTITION BY a.ja4) + 1)) AS ja4_asn_concentration,
|
||||
(sum(a.hits) OVER (PARTITION BY a.ja4, a.src_country_code) / (sum(a.hits) OVER (PARTITION BY a.ja4) + 1)) AS ja4_country_concentration,
|
||||
IF(sum(a.hits) OVER (PARTITION BY a.ja4) < 100, 1, 0) AS is_rare_ja4,
|
||||
(count() OVER (PARTITION BY h.header_order_hash, a.first_ua) / (count() OVER (PARTITION BY a.first_ua) + 1)) AS header_order_confidence,
|
||||
(count() OVER (PARTITION BY h.header_order_hash, a.ja4) / (count() OVER (PARTITION BY a.ja4) + 1)) AS header_order_confidence,
|
||||
uniqExact(h.header_order_hash) OVER (PARTITION BY a.src_ip) AS distinct_header_orders,
|
||||
(a.uniq_paths / (a.hits + 1)) AS path_diversity_ratio,
|
||||
a.url_depth_variance AS url_depth_variance,
|
||||
@ -141,7 +142,8 @@ WITH base_data AS (
|
||||
window_start, src_ip, any(header_order_hash) AS header_order_hash,
|
||||
max(header_count) AS header_count, max(has_accept_language) AS has_accept_language,
|
||||
max(has_cookie) AS has_cookie, max(has_referer) AS has_referer,
|
||||
max(modern_browser_score) AS modern_browser_score, max(ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
max(modern_browser_score) AS modern_browser_score, max(has_sec_ch_ua) AS has_sec_ch_ua,
|
||||
max(ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
max(sec_ch_mobile_mismatch) AS sec_ch_mobile_mismatch,
|
||||
any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest
|
||||
FROM ja4_processing.agg_header_fingerprint_1h
|
||||
|
||||
Reference in New Issue
Block a user