feat(dashboard): fingerprint discovery page — extract and group JA4/H2/headers from traffic
- GET /api/fingerprint-discovery: queries http_logs, groups by JA4, aggregates UA family, header presence rates (Sec-CH-UA, Sec-Fetch, Accept-Language, zstd, brotli, gzip, XFF), H2 data, TLS info, dict lookups - /fingerprints page: KPIs, doughnut chart by family, stacked header bars, filterable/sortable profile table, expandable detail panel - Promote button: push H2 fingerprints to browser_h2_signatures via existing POST /api/browser-signatures/entries endpoint - Nav link: Découverte added after Navigateurs in sidebar Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -1897,3 +1897,134 @@ async def browser_sig_delete(fingerprint: str = Query(...)) -> dict[str, Any]:
|
||||
except Exception as exc:
|
||||
logger.exception("browser_h2_signatures delete failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/fingerprint-discovery — Extraction et regroupement des fingerprints
|
||||
# du trafic réel pour proposer des signatures navigateur
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/fingerprint-discovery")
|
||||
async def fingerprint_discovery(
|
||||
days: int = Query(default=7, ge=1, le=30),
|
||||
min_hits: int = Query(default=10, ge=1, le=100000),
|
||||
limit: int = Query(default=300, ge=10, le=1000),
|
||||
) -> dict[str, Any]:
|
||||
"""Découverte de profils fingerprint depuis http_logs.
|
||||
|
||||
Regroupe par JA4 et agrège : user-agent, headers HTTP,
|
||||
données H2, TLS — pour proposer des signatures navigateur.
|
||||
"""
|
||||
try:
|
||||
profiles = query(
|
||||
f"SELECT "
|
||||
f" ja4, "
|
||||
# ── Famille navigateur extraite du User-Agent (vote majoritaire) ──
|
||||
f" topK(1)("
|
||||
f" multiIf("
|
||||
f" position(header_user_agent, 'Edg/') > 0, 'Edge', "
|
||||
f" position(header_user_agent, 'OPR/') > 0, 'Opera', "
|
||||
f" position(header_user_agent, 'Chrome/') > 0 AND "
|
||||
f" position(header_user_agent, 'Safari/') > 0, 'Chrome', "
|
||||
f" position(header_user_agent, 'Firefox/') > 0, 'Firefox', "
|
||||
f" position(header_user_agent, 'Safari/') > 0, 'Safari', "
|
||||
f" position(lower(header_user_agent), 'bot') > 0 OR "
|
||||
f" position(lower(header_user_agent), 'crawl') > 0 OR "
|
||||
f" position(lower(header_user_agent), 'spider') > 0, 'Bot', "
|
||||
f" header_user_agent = '', 'Vide', "
|
||||
f" 'Autre'"
|
||||
f" )"
|
||||
f" )[1] AS ua_family, "
|
||||
# ── Volume ──
|
||||
f" count() AS total_hits, "
|
||||
f" uniqExact(src_ip) AS unique_ips, "
|
||||
f" uniqExact(header_user_agent) AS distinct_uas, "
|
||||
# ── Échantillons UA (top 3) ──
|
||||
f" topK(3)(header_user_agent) AS ua_samples, "
|
||||
# ── TLS ──
|
||||
f" any(tls_version) AS tls_version, "
|
||||
f" any(tls_alpn) AS tls_alpn, "
|
||||
# ── H2 ──
|
||||
f" anyIf(h2_fingerprint, h2_fingerprint != '') AS h2_fp, "
|
||||
f" anyIf(h2_settings_fp, h2_settings_fp != '') AS h2_settings, "
|
||||
f" max(h2_window_update) AS h2_wu, "
|
||||
f" anyIf(h2_pseudo_order, h2_pseudo_order != '') AS h2_pseudo, "
|
||||
# ── Taux de présence headers (%) ──
|
||||
f" round(countIf(header_sec_ch_ua != '') * 100.0 / count(), 1) "
|
||||
f" AS pct_sec_ch_ua, "
|
||||
f" round(countIf(header_sec_fetch_mode != '') * 100.0 / count(), 1) "
|
||||
f" AS pct_sec_fetch, "
|
||||
f" round(countIf(header_accept_language != '') * 100.0 / count(), 1) "
|
||||
f" AS pct_accept_lang, "
|
||||
f" round(countIf(position(header_accept_encoding, 'zstd') > 0) "
|
||||
f" * 100.0 / count(), 1) AS pct_zstd, "
|
||||
f" round(countIf(position(header_accept_encoding, 'br') > 0) "
|
||||
f" * 100.0 / count(), 1) AS pct_brotli, "
|
||||
f" round(countIf(position(header_accept_encoding, 'gzip') > 0) "
|
||||
f" * 100.0 / count(), 1) AS pct_gzip, "
|
||||
f" round(countIf(header_x_forwarded_for != '') * 100.0 / count(), 1) "
|
||||
f" AS pct_xff, "
|
||||
# ── Détails Sec-CH-UA ──
|
||||
f" anyIf(header_sec_ch_ua, header_sec_ch_ua != '') AS sec_ch_ua_sample, "
|
||||
f" anyIf(header_sec_ch_ua_platform, header_sec_ch_ua_platform != '') "
|
||||
f" AS platform_sample, "
|
||||
f" anyIf(header_sec_ch_ua_mobile, header_sec_ch_ua_mobile != '') "
|
||||
f" AS mobile_sample, "
|
||||
# ── Accept-Encoding dominant ──
|
||||
f" topK(1)(header_accept_encoding)[1] AS accept_enc_main, "
|
||||
# ── Lookup dictionnaire ──
|
||||
f" dictGetOrDefault('{_DB}.dict_browser_ja4', 'browser_family', "
|
||||
f" tuple(ja4), '') AS dict_family "
|
||||
# ── Source ──
|
||||
f"FROM {_DB_LOGS}.http_logs "
|
||||
"WHERE ja4 != '' AND log_date >= today() - {days:UInt32} "
|
||||
"GROUP BY ja4 "
|
||||
"HAVING count() >= {min_hits:UInt32} "
|
||||
"ORDER BY total_hits DESC "
|
||||
"LIMIT {lim:UInt32}",
|
||||
{"days": days, "min_hits": min_hits, "lim": limit},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("fingerprint-discovery query failed")
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
# ── Regroupement par famille navigateur côté Python ──
|
||||
groups: dict[str, dict[str, Any]] = {}
|
||||
for p in profiles:
|
||||
# Famille prioritaire : dict > UA
|
||||
family = p.get("dict_family") or p.get("ua_family") or "Inconnu"
|
||||
if family not in groups:
|
||||
groups[family] = {
|
||||
"family": family,
|
||||
"ja4_count": 0,
|
||||
"total_hits": 0,
|
||||
"unique_ips": 0,
|
||||
"has_h2": False,
|
||||
"has_sec_ch_ua": False,
|
||||
"has_sec_fetch": False,
|
||||
}
|
||||
g = groups[family]
|
||||
g["ja4_count"] += 1
|
||||
g["total_hits"] += p.get("total_hits", 0)
|
||||
g["unique_ips"] += p.get("unique_ips", 0)
|
||||
if p.get("h2_fp"):
|
||||
g["has_h2"] = True
|
||||
if (p.get("pct_sec_ch_ua") or 0) > 50:
|
||||
g["has_sec_ch_ua"] = True
|
||||
if (p.get("pct_sec_fetch") or 0) > 50:
|
||||
g["has_sec_fetch"] = True
|
||||
|
||||
groups_sorted = sorted(
|
||||
groups.values(), key=lambda g: g["total_hits"], reverse=True
|
||||
)
|
||||
|
||||
return {
|
||||
"profiles": profiles,
|
||||
"groups": groups_sorted,
|
||||
"meta": {
|
||||
"total_ja4": len(profiles),
|
||||
"total_groups": len(groups_sorted),
|
||||
"days": days,
|
||||
"min_hits": min_hits,
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user