From fb73c60e7dfa3bec43c0af987c61e2c70c28512d Mon Sep 17 00:00:00 2001 From: toto Date: Fri, 10 Apr 2026 15:02:53 +0200 Subject: [PATCH] =?UTF-8?q?feat(dashboard):=20fingerprint=20discovery=20pa?= =?UTF-8?q?ge=20=E2=80=94=20extract=20and=20group=20JA4/H2/headers=20from?= =?UTF-8?q?=20traffic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - GET /api/fingerprint-discovery: queries http_logs, groups by JA4, aggregates UA family, header presence rates (Sec-CH-UA, Sec-Fetch, Accept-Language, zstd, brotli, gzip, XFF), H2 data, TLS info, dict lookups - /fingerprints page: KPIs, doughnut chart by family, stacked header bars, filterable/sortable profile table, expandable detail panel - Promote button: push H2 fingerprints to browser_h2_signatures via existing POST /api/browser-signatures/entries endpoint - Nav link: Découverte added after Navigateurs in sidebar Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- services/dashboard/backend/routes/api.py | 131 ++++++ services/dashboard/backend/routes/pages.py | 5 + .../dashboard/backend/templates/base.html | 4 + .../backend/templates/fingerprints.html | 398 ++++++++++++++++++ 4 files changed, 538 insertions(+) create mode 100644 services/dashboard/backend/templates/fingerprints.html diff --git a/services/dashboard/backend/routes/api.py b/services/dashboard/backend/routes/api.py index 7bf050e..6857303 100644 --- a/services/dashboard/backend/routes/api.py +++ b/services/dashboard/backend/routes/api.py @@ -1897,3 +1897,134 @@ async def browser_sig_delete(fingerprint: str = Query(...)) -> dict[str, Any]: except Exception as exc: logger.exception("browser_h2_signatures delete failed") raise HTTPException(status_code=500, detail=str(exc)) + + +# --------------------------------------------------------------------------- +# GET /api/fingerprint-discovery — Extraction et regroupement des fingerprints +# du trafic réel pour proposer des signatures navigateur +# --------------------------------------------------------------------------- + +@router.get("/fingerprint-discovery") +async def fingerprint_discovery( + days: int = Query(default=7, ge=1, le=30), + min_hits: int = Query(default=10, ge=1, le=100000), + limit: int = Query(default=300, ge=10, le=1000), +) -> dict[str, Any]: + """Découverte de profils fingerprint depuis http_logs. + + Regroupe par JA4 et agrège : user-agent, headers HTTP, + données H2, TLS — pour proposer des signatures navigateur. + """ + try: + profiles = query( + f"SELECT " + f" ja4, " + # ── Famille navigateur extraite du User-Agent (vote majoritaire) ── + f" topK(1)(" + f" multiIf(" + f" position(header_user_agent, 'Edg/') > 0, 'Edge', " + f" position(header_user_agent, 'OPR/') > 0, 'Opera', " + f" position(header_user_agent, 'Chrome/') > 0 AND " + f" position(header_user_agent, 'Safari/') > 0, 'Chrome', " + f" position(header_user_agent, 'Firefox/') > 0, 'Firefox', " + f" position(header_user_agent, 'Safari/') > 0, 'Safari', " + f" position(lower(header_user_agent), 'bot') > 0 OR " + f" position(lower(header_user_agent), 'crawl') > 0 OR " + f" position(lower(header_user_agent), 'spider') > 0, 'Bot', " + f" header_user_agent = '', 'Vide', " + f" 'Autre'" + f" )" + f" )[1] AS ua_family, " + # ── Volume ── + f" count() AS total_hits, " + f" uniqExact(src_ip) AS unique_ips, " + f" uniqExact(header_user_agent) AS distinct_uas, " + # ── Échantillons UA (top 3) ── + f" topK(3)(header_user_agent) AS ua_samples, " + # ── TLS ── + f" any(tls_version) AS tls_version, " + f" any(tls_alpn) AS tls_alpn, " + # ── H2 ── + f" anyIf(h2_fingerprint, h2_fingerprint != '') AS h2_fp, " + f" anyIf(h2_settings_fp, h2_settings_fp != '') AS h2_settings, " + f" max(h2_window_update) AS h2_wu, " + f" anyIf(h2_pseudo_order, h2_pseudo_order != '') AS h2_pseudo, " + # ── Taux de présence headers (%) ── + f" round(countIf(header_sec_ch_ua != '') * 100.0 / count(), 1) " + f" AS pct_sec_ch_ua, " + f" round(countIf(header_sec_fetch_mode != '') * 100.0 / count(), 1) " + f" AS pct_sec_fetch, " + f" round(countIf(header_accept_language != '') * 100.0 / count(), 1) " + f" AS pct_accept_lang, " + f" round(countIf(position(header_accept_encoding, 'zstd') > 0) " + f" * 100.0 / count(), 1) AS pct_zstd, " + f" round(countIf(position(header_accept_encoding, 'br') > 0) " + f" * 100.0 / count(), 1) AS pct_brotli, " + f" round(countIf(position(header_accept_encoding, 'gzip') > 0) " + f" * 100.0 / count(), 1) AS pct_gzip, " + f" round(countIf(header_x_forwarded_for != '') * 100.0 / count(), 1) " + f" AS pct_xff, " + # ── Détails Sec-CH-UA ── + f" anyIf(header_sec_ch_ua, header_sec_ch_ua != '') AS sec_ch_ua_sample, " + f" anyIf(header_sec_ch_ua_platform, header_sec_ch_ua_platform != '') " + f" AS platform_sample, " + f" anyIf(header_sec_ch_ua_mobile, header_sec_ch_ua_mobile != '') " + f" AS mobile_sample, " + # ── Accept-Encoding dominant ── + f" topK(1)(header_accept_encoding)[1] AS accept_enc_main, " + # ── Lookup dictionnaire ── + f" dictGetOrDefault('{_DB}.dict_browser_ja4', 'browser_family', " + f" tuple(ja4), '') AS dict_family " + # ── Source ── + f"FROM {_DB_LOGS}.http_logs " + "WHERE ja4 != '' AND log_date >= today() - {days:UInt32} " + "GROUP BY ja4 " + "HAVING count() >= {min_hits:UInt32} " + "ORDER BY total_hits DESC " + "LIMIT {lim:UInt32}", + {"days": days, "min_hits": min_hits, "lim": limit}, + ) + except Exception as exc: + logger.exception("fingerprint-discovery query failed") + raise HTTPException(status_code=500, detail=str(exc)) + + # ── Regroupement par famille navigateur côté Python ── + groups: dict[str, dict[str, Any]] = {} + for p in profiles: + # Famille prioritaire : dict > UA + family = p.get("dict_family") or p.get("ua_family") or "Inconnu" + if family not in groups: + groups[family] = { + "family": family, + "ja4_count": 0, + "total_hits": 0, + "unique_ips": 0, + "has_h2": False, + "has_sec_ch_ua": False, + "has_sec_fetch": False, + } + g = groups[family] + g["ja4_count"] += 1 + g["total_hits"] += p.get("total_hits", 0) + g["unique_ips"] += p.get("unique_ips", 0) + if p.get("h2_fp"): + g["has_h2"] = True + if (p.get("pct_sec_ch_ua") or 0) > 50: + g["has_sec_ch_ua"] = True + if (p.get("pct_sec_fetch") or 0) > 50: + g["has_sec_fetch"] = True + + groups_sorted = sorted( + groups.values(), key=lambda g: g["total_hits"], reverse=True + ) + + return { + "profiles": profiles, + "groups": groups_sorted, + "meta": { + "total_ja4": len(profiles), + "total_groups": len(groups_sorted), + "days": days, + "min_hits": min_hits, + }, + } diff --git a/services/dashboard/backend/routes/pages.py b/services/dashboard/backend/routes/pages.py index 557df68..632f370 100644 --- a/services/dashboard/backend/routes/pages.py +++ b/services/dashboard/backend/routes/pages.py @@ -96,3 +96,8 @@ async def health_page(request: Request): @router.get("/browsers") async def browsers_page(request: Request): return templates.TemplateResponse("browsers.html", _ctx(request, "browsers")) + + +@router.get("/fingerprints") +async def fingerprints_page(request: Request): + return templates.TemplateResponse("fingerprints.html", _ctx(request, "fingerprints")) diff --git a/services/dashboard/backend/templates/base.html b/services/dashboard/backend/templates/base.html index 853b6b6..6261ef0 100644 --- a/services/dashboard/backend/templates/base.html +++ b/services/dashboard/backend/templates/base.html @@ -163,6 +163,10 @@ Navigateurs + + + Découverte + diff --git a/services/dashboard/backend/templates/fingerprints.html b/services/dashboard/backend/templates/fingerprints.html new file mode 100644 index 0000000..531f685 --- /dev/null +++ b/services/dashboard/backend/templates/fingerprints.html @@ -0,0 +1,398 @@ +{% extends "base.html" %} +{% block page_title %}Découverte de fingerprints{% endblock %} +{% block content %} +
+ + +
+

+ + Découverte de fingerprints +

+
+
+
+
JA4 distincts
+
+
+
+
Familles
+
+
+
+
Avec H2
+
+
+
+
Requêtes
+
+
+
+ + +
+ + + + + + + + +
+ + +
+
+

Répartition par famille

+ +
+
+

Résumé par groupe

+
+ + + + + + + + + + + + + +
FamilleJA4HitsIPsH2Sec-CH-UASec-Fetch
+
+
+
+ + +
+

Présence des headers HTTP par JA4 (top 50)

+ +
+ + +
+
+

Profils JA4 détaillés

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + +
JA4FamilleHitsIPsUAsTLSCH-UAFetchLangBrZstdH2 FPH2 WUPseudoDictUA principal
+
+
+ + + + +
+ + + +{% endblock %}