fix(bot-detector): neutralize H2 dimensions behind proxy (X-Forwarded-For)
When has_xff=1, the H2 connection is terminated by the reverse proxy/CDN, so client H2 fingerprints are lost. Previously only D1 (h2_settings) was neutralized; D2 (window_update), D3 (pseudo_order), and D4 (priority) still penalized proxied traffic — a real Chrome behind Cloudflare scored 0.0 on 3 dimensions (45% of total weight). Now all 4 H2 dimensions return 0.5 (neutral) when has_xff>0, and non-browser H2 detection is also disabled behind proxies. Tests: 10/10 passed including 3 new XFF-specific cases. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -249,16 +249,61 @@ def test_tls_h2_mismatch_detection():
|
||||
|
||||
|
||||
def test_cdn_proxy_neutralizes_h2_settings():
|
||||
"""Session derrière un CDN (has_xff=1) : dimension H2 SETTINGS neutralisée (0.5)."""
|
||||
"""Session derrière un CDN (has_xff=1) : toutes les dimensions H2 neutralisées."""
|
||||
df = _chrome_session(
|
||||
has_xff=1, # proxy CDN détecté
|
||||
h2_dict_family="", # le proxy a terminé la connexion H2 — fingerprint perdu
|
||||
h2_settings_known=0,
|
||||
h2_window_update_value=0, # pas de H2 du client
|
||||
)
|
||||
result = run_browser_matcher(df)
|
||||
# Le score peut être inférieur mais la session ne devrait pas être NON_BROWSER
|
||||
# (le proxy ne rend pas le client malveillant)
|
||||
assert result.loc[0, "bm_non_browser"] is False or result.loc[0, "bm_non_browser"] == False
|
||||
# Le score ne devrait pas être NON_BROWSER
|
||||
assert result.loc[0, "bm_non_browser"] == False
|
||||
# Le score devrait rester significatif grâce aux dimensions HTTP headers + TLS + JA4
|
||||
# (H2 dimensions scores 0.5 × 4 = neutre, pas pénalisant)
|
||||
score = result.loc[0, "bm_score"]
|
||||
assert score > 0.2, f"Score trop bas derrière proxy: {score}"
|
||||
|
||||
|
||||
def test_cdn_proxy_score_higher_than_no_proxy_without_h2():
|
||||
"""Un vrai Chrome derrière CDN doit scorer mieux qu'un curl sans XFF.
|
||||
|
||||
Derrière un proxy, les 4 dimensions H2 valent 0.5 (neutre) au lieu de 0.0,
|
||||
donc le score total est plus élevé qu'un outil qui n'a pas de H2 du tout.
|
||||
"""
|
||||
# Chrome derrière CDN : H2 perdu mais headers HTTP + TLS corrects
|
||||
df_proxy = _chrome_session(
|
||||
has_xff=1,
|
||||
h2_dict_family="",
|
||||
h2_settings_known=0,
|
||||
h2_window_update_value=0,
|
||||
)
|
||||
# curl sans proxy : pas de H2 et pas de bons headers
|
||||
df_curl = _curl_session()
|
||||
|
||||
r_proxy = run_browser_matcher(df_proxy)
|
||||
r_curl = run_browser_matcher(df_curl)
|
||||
|
||||
proxy_score = r_proxy.loc[0, "bm_score"]
|
||||
curl_score = r_curl.loc[0, "bm_score"]
|
||||
assert proxy_score > curl_score, (
|
||||
f"Chrome via CDN ({proxy_score}) devrait scorer > curl ({curl_score})"
|
||||
)
|
||||
|
||||
|
||||
def test_cdn_proxy_go_not_flagged_non_browser():
|
||||
"""go net/http derrière un proxy CDN ne devrait pas être marqué NON_BROWSER.
|
||||
|
||||
Le H2 WINDOW_UPDATE observé vient du proxy, pas du client.
|
||||
"""
|
||||
df = _curl_session(
|
||||
has_xff=1,
|
||||
h2_window_update_value=1073676289, # signature go, mais provient du proxy
|
||||
h2_window_absent=0,
|
||||
h2_settings_known=1,
|
||||
)
|
||||
result = run_browser_matcher(df)
|
||||
assert result.loc[0, "bm_non_browser"] == False
|
||||
|
||||
|
||||
def test_non_browser_go_net_http():
|
||||
|
||||
Reference in New Issue
Block a user