feat(dashboard): fingerprint discovery page — extract and group JA4/H2/headers from traffic

- GET /api/fingerprint-discovery: queries http_logs, groups by JA4, aggregates
  UA family, header presence rates (Sec-CH-UA, Sec-Fetch, Accept-Language,
  zstd, brotli, gzip, XFF), H2 data, TLS info, dict lookups
- /fingerprints page: KPIs, doughnut chart by family, stacked header bars,
  filterable/sortable profile table, expandable detail panel
- Promote button: push H2 fingerprints to browser_h2_signatures via existing
  POST /api/browser-signatures/entries endpoint
- Nav link: Découverte added after Navigateurs in sidebar

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-10 15:02:53 +02:00
parent fde6864311
commit fb73c60e7d
4 changed files with 538 additions and 0 deletions

View File

@ -1897,3 +1897,134 @@ async def browser_sig_delete(fingerprint: str = Query(...)) -> dict[str, Any]:
except Exception as exc:
logger.exception("browser_h2_signatures delete failed")
raise HTTPException(status_code=500, detail=str(exc))
# ---------------------------------------------------------------------------
# GET /api/fingerprint-discovery — Extraction et regroupement des fingerprints
# du trafic réel pour proposer des signatures navigateur
# ---------------------------------------------------------------------------
@router.get("/fingerprint-discovery")
async def fingerprint_discovery(
days: int = Query(default=7, ge=1, le=30),
min_hits: int = Query(default=10, ge=1, le=100000),
limit: int = Query(default=300, ge=10, le=1000),
) -> dict[str, Any]:
"""Découverte de profils fingerprint depuis http_logs.
Regroupe par JA4 et agrège : user-agent, headers HTTP,
données H2, TLS — pour proposer des signatures navigateur.
"""
try:
profiles = query(
f"SELECT "
f" ja4, "
# ── Famille navigateur extraite du User-Agent (vote majoritaire) ──
f" topK(1)("
f" multiIf("
f" position(header_user_agent, 'Edg/') > 0, 'Edge', "
f" position(header_user_agent, 'OPR/') > 0, 'Opera', "
f" position(header_user_agent, 'Chrome/') > 0 AND "
f" position(header_user_agent, 'Safari/') > 0, 'Chrome', "
f" position(header_user_agent, 'Firefox/') > 0, 'Firefox', "
f" position(header_user_agent, 'Safari/') > 0, 'Safari', "
f" position(lower(header_user_agent), 'bot') > 0 OR "
f" position(lower(header_user_agent), 'crawl') > 0 OR "
f" position(lower(header_user_agent), 'spider') > 0, 'Bot', "
f" header_user_agent = '', 'Vide', "
f" 'Autre'"
f" )"
f" )[1] AS ua_family, "
# ── Volume ──
f" count() AS total_hits, "
f" uniqExact(src_ip) AS unique_ips, "
f" uniqExact(header_user_agent) AS distinct_uas, "
# ── Échantillons UA (top 3) ──
f" topK(3)(header_user_agent) AS ua_samples, "
# ── TLS ──
f" any(tls_version) AS tls_version, "
f" any(tls_alpn) AS tls_alpn, "
# ── H2 ──
f" anyIf(h2_fingerprint, h2_fingerprint != '') AS h2_fp, "
f" anyIf(h2_settings_fp, h2_settings_fp != '') AS h2_settings, "
f" max(h2_window_update) AS h2_wu, "
f" anyIf(h2_pseudo_order, h2_pseudo_order != '') AS h2_pseudo, "
# ── Taux de présence headers (%) ──
f" round(countIf(header_sec_ch_ua != '') * 100.0 / count(), 1) "
f" AS pct_sec_ch_ua, "
f" round(countIf(header_sec_fetch_mode != '') * 100.0 / count(), 1) "
f" AS pct_sec_fetch, "
f" round(countIf(header_accept_language != '') * 100.0 / count(), 1) "
f" AS pct_accept_lang, "
f" round(countIf(position(header_accept_encoding, 'zstd') > 0) "
f" * 100.0 / count(), 1) AS pct_zstd, "
f" round(countIf(position(header_accept_encoding, 'br') > 0) "
f" * 100.0 / count(), 1) AS pct_brotli, "
f" round(countIf(position(header_accept_encoding, 'gzip') > 0) "
f" * 100.0 / count(), 1) AS pct_gzip, "
f" round(countIf(header_x_forwarded_for != '') * 100.0 / count(), 1) "
f" AS pct_xff, "
# ── Détails Sec-CH-UA ──
f" anyIf(header_sec_ch_ua, header_sec_ch_ua != '') AS sec_ch_ua_sample, "
f" anyIf(header_sec_ch_ua_platform, header_sec_ch_ua_platform != '') "
f" AS platform_sample, "
f" anyIf(header_sec_ch_ua_mobile, header_sec_ch_ua_mobile != '') "
f" AS mobile_sample, "
# ── Accept-Encoding dominant ──
f" topK(1)(header_accept_encoding)[1] AS accept_enc_main, "
# ── Lookup dictionnaire ──
f" dictGetOrDefault('{_DB}.dict_browser_ja4', 'browser_family', "
f" tuple(ja4), '') AS dict_family "
# ── Source ──
f"FROM {_DB_LOGS}.http_logs "
"WHERE ja4 != '' AND log_date >= today() - {days:UInt32} "
"GROUP BY ja4 "
"HAVING count() >= {min_hits:UInt32} "
"ORDER BY total_hits DESC "
"LIMIT {lim:UInt32}",
{"days": days, "min_hits": min_hits, "lim": limit},
)
except Exception as exc:
logger.exception("fingerprint-discovery query failed")
raise HTTPException(status_code=500, detail=str(exc))
# ── Regroupement par famille navigateur côté Python ──
groups: dict[str, dict[str, Any]] = {}
for p in profiles:
# Famille prioritaire : dict > UA
family = p.get("dict_family") or p.get("ua_family") or "Inconnu"
if family not in groups:
groups[family] = {
"family": family,
"ja4_count": 0,
"total_hits": 0,
"unique_ips": 0,
"has_h2": False,
"has_sec_ch_ua": False,
"has_sec_fetch": False,
}
g = groups[family]
g["ja4_count"] += 1
g["total_hits"] += p.get("total_hits", 0)
g["unique_ips"] += p.get("unique_ips", 0)
if p.get("h2_fp"):
g["has_h2"] = True
if (p.get("pct_sec_ch_ua") or 0) > 50:
g["has_sec_ch_ua"] = True
if (p.get("pct_sec_fetch") or 0) > 50:
g["has_sec_fetch"] = True
groups_sorted = sorted(
groups.values(), key=lambda g: g["total_hits"], reverse=True
)
return {
"profiles": profiles,
"groups": groups_sorted,
"meta": {
"total_ja4": len(profiles),
"total_groups": len(groups_sorted),
"days": days,
"min_hits": min_hits,
},
}

View File

@ -96,3 +96,8 @@ async def health_page(request: Request):
@router.get("/browsers")
async def browsers_page(request: Request):
return templates.TemplateResponse("browsers.html", _ctx(request, "browsers"))
@router.get("/fingerprints")
async def fingerprints_page(request: Request):
return templates.TemplateResponse("fingerprints.html", _ctx(request, "fingerprints"))