diff --git a/services/dashboard/backend/routes/api.py b/services/dashboard/backend/routes/api.py index 8f65fdb..7bf050e 100644 --- a/services/dashboard/backend/routes/api.py +++ b/services/dashboard/backend/routes/api.py @@ -1505,6 +1505,7 @@ _REFLIST_SORT = { "bot_ip": {"prefix", "bot_name"}, "bot_ja4": {"ja4", "bot_name"}, "browser_ja4": {"ja4", "browser_family", "tls_library"}, + "browser_h2": {"h2_fingerprint", "browser_family"}, "asn_reputation": {"src_asn", "label"}, "iplocate_asn": {"asn", "country_code", "name", "network"}, "anubis_ip_rules": {"prefix", "bot_name", "action", "category"}, @@ -1515,6 +1516,7 @@ _REFLIST_SEARCH_COLS: dict[str, list[str]] = { "bot_ip": ["prefix", "bot_name"], "bot_ja4": ["ja4", "bot_name"], "browser_ja4": ["ja4", "browser_family", "tls_library", "context"], + "browser_h2": ["h2_fingerprint", "browser_family"], "asn_reputation": ["toString(src_asn)", "label"], "iplocate_asn": ["network", "toString(asn)", "country_code", "name"], "anubis_ip_rules": ["prefix", "bot_name", "action", "category"], @@ -1529,6 +1531,10 @@ _REFLIST_QUERIES: dict[str, str] = { f"SELECT ja4, browser_family, tls_library, context " f"FROM dictionary('{_DB}.dict_browser_ja4')" ), + "browser_h2": ( + f"SELECT h2_fingerprint, browser_family " + f"FROM dictionary('{_DB}.dict_browser_h2') ORDER BY browser_family" + ), "asn_reputation": ( f"SELECT src_asn, label FROM dictionary('{_DB}.dict_asn_reputation')" ), @@ -1786,3 +1792,108 @@ async def browser_signatures() -> dict[str, Any]: return result + +# --------------------------------------------------------------------------- +# GET /api/browser-signatures/entries — liste des fingerprints H2 gérés +# POST /api/browser-signatures/entries — ajouter un fingerprint H2 +# DELETE /api/browser-signatures/entries — supprimer un fingerprint H2 +# --------------------------------------------------------------------------- + +class BrowserH2Entry(BaseModel): + """Nouveau fingerprint H2 à enregistrer dans browser_h2_signatures.""" + + h2_fingerprint: str + browser_family: str + confidence: float = 1.0 + notes: str = "" + + +_VALID_BROWSER_FAMILIES = {"Chrome", "Firefox", "Safari", "Edge", "Other"} + + +@router.get("/browser-signatures/entries") +async def browser_sig_entries() -> dict[str, Any]: + """Retourne le contenu de la table browser_h2_signatures. + + Si la table n'existe pas encore (migration 06 non appliquée), + retourne les données du dictionnaire CSV (sans confidence/notes). + """ + # Essai prioritaire : table structurée (post-migration 06) + try: + rows = query( + f"SELECT h2_fingerprint, browser_family, confidence, notes " + f"FROM {_DB}.browser_h2_signatures " + f"ORDER BY browser_family, confidence DESC" + ) + return {"entries": rows, "total": len(rows), "source": "table"} + except Exception: + pass + + # Fallback : dictionnaire CSV (pré-migration 06) + try: + rows = query( + f"SELECT h2_fingerprint, browser_family, " + f"toFloat32(1.0) AS confidence, '' AS notes " + f"FROM dictionary('{_DB}.dict_browser_h2') " + f"ORDER BY browser_family" + ) + return {"entries": rows, "total": len(rows), "source": "dict_csv", "readonly": True} + except Exception as exc: + logger.exception("browser_h2 entries fallback failed") + raise HTTPException(status_code=500, detail=str(exc)) + + +@router.post("/browser-signatures/entries", status_code=201) +async def browser_sig_add(body: BrowserH2Entry) -> dict[str, Any]: + """Ajoute un fingerprint H2 dans browser_h2_signatures et recharge le dictionnaire.""" + if not body.h2_fingerprint.strip(): + raise HTTPException(status_code=422, detail="h2_fingerprint ne peut pas être vide") + if body.browser_family not in _VALID_BROWSER_FAMILIES: + raise HTTPException( + status_code=422, + detail=f"browser_family doit être l'un de {_VALID_BROWSER_FAMILIES}", + ) + if not 0.0 <= body.confidence <= 1.0: + raise HTTPException(status_code=422, detail="confidence doit être entre 0.0 et 1.0") + try: + execute( + f"INSERT INTO {_DB}.browser_h2_signatures " + "(h2_fingerprint, browser_family, confidence, notes) VALUES " + "({fp:String}, {fam:String}, {conf:Float32}, {notes:String})", + { + "fp": body.h2_fingerprint.strip(), + "fam": body.browser_family, + "conf": body.confidence, + "notes": body.notes, + }, + ) + # Force le rechargement du dictionnaire + try: + execute(f"SYSTEM RELOAD DICTIONARY {_DB}.dict_browser_h2") + except Exception: + logger.warning("dict_browser_h2 reload failed (migration 06 peut-être non appliquée)") + return {"status": "ok", "h2_fingerprint": body.h2_fingerprint.strip()} + except Exception as exc: + logger.exception("browser_h2_signatures insert failed") + raise HTTPException(status_code=500, detail=str(exc)) + + +@router.delete("/browser-signatures/entries") +async def browser_sig_delete(fingerprint: str = Query(...)) -> dict[str, Any]: + """Supprime un fingerprint H2 de browser_h2_signatures et recharge le dictionnaire.""" + if not fingerprint.strip(): + raise HTTPException(status_code=422, detail="fingerprint ne peut pas être vide") + try: + execute( + f"ALTER TABLE {_DB}.browser_h2_signatures DELETE " + "WHERE h2_fingerprint = {fp:String}", + {"fp": fingerprint.strip()}, + ) + try: + execute(f"SYSTEM RELOAD DICTIONARY {_DB}.dict_browser_h2") + except Exception: + logger.warning("dict_browser_h2 reload failed") + return {"status": "ok", "deleted": fingerprint.strip()} + except Exception as exc: + logger.exception("browser_h2_signatures delete failed") + raise HTTPException(status_code=500, detail=str(exc)) diff --git a/services/dashboard/backend/templates/browsers.html b/services/dashboard/backend/templates/browsers.html index e5c83a6..3b8e499 100644 --- a/services/dashboard/backend/templates/browsers.html +++ b/services/dashboard/backend/templates/browsers.html @@ -191,7 +191,6 @@
-
Chrome / Safari — m,a,s,p
@@ -216,9 +215,270 @@
+ +
+
+ + Base de signatures H2 +
+

Table browser_h2_signatures

+

Source des fingerprints HTTP/2 (format Akamai) utilisés par + dict_browser_h2. Le dictionnaire est rechargé automatiquement + après chaque ajout ou suppression.

+

Format : SETTINGS|WINDOW_UPDATE|PRIORITY|PSEUDO_ORDER
+ Exemple Chrome : 1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p

+

Source : ja4_processing.browser_h2_signatures (migration 06)

+
+
+ +
+ + + + +
+ + + + + + + + + + + + + +
Fingerprint H2 (format Akamai)FamilleConfidenceNotesAction
Chargement…
+
+
+ + +
+
+ + Règles de scoring — browser_matcher.py +
+

Dimensions du scoring navigateur

+

Le browser_matcher calcule un score 0–1 par famille en agrégeant 7 dimensions. + Ces règles sont définies dans bot_detector/browser_signatures.py.

+

Modification : éditer le fichier Python et redéployer bot-detector

+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DimensionPoidsChromeFirefoxSafari
H2 SETTINGS exact0.301:65536,2:0,4:6291456,6:2621441:65536,4:131072,5:163841:4096,3:100,4:65535
H2 WINDOW_UPDATE0.1515 663 10512 517 37710 485 760
Pseudo-header order0.15m,a,s,pm,p,s,am,a,s,p
HTTP headers cohérence0.15Sec-CH-UA ✓ · Sec-Fetch ✓Sec-CH-UA ✗ · Sec-Fetch ✓Sec-CH-UA ✗ · Sec-Fetch ✗
H2 PRIORITY frames0.10absentabsentabsent
TLS structure (JA4 famille)0.10Chromium · Chrome · Edge + GREASEFirefox · pas de GREASESafari · pas de GREASE
JA4 dict lookup0.05dict_browser_ja4 — correspondance fingerprint TLS exact
Seuil de bypass ML≥ 0.72≥ 0.68≥ 0.68
+
+

Mode actuel : DUAL_MODE — le matcher journalise les décisions sans modifier le scoring ML. + Activer le bypass : variable d'environnement BROWSER_MATCHER_REPLACE=true dans bot-detector.

+
+
+
{% endblock %} diff --git a/tests/integration/docker-compose.yml b/tests/integration/docker-compose.yml index 971e3c5..5dc2795 100644 --- a/tests/integration/docker-compose.yml +++ b/tests/integration/docker-compose.yml @@ -40,6 +40,8 @@ services: - ../../shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro - ../../shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro - ../../shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro + # Reference CSV files (dictionaries / browser signatures) + - ../../shared/data/browser_h2.csv:/initdb-src/browser_h2.csv:ro # Empty CSV stubs (dictionaries expect these files) - ./platform/csv-stubs:/var/lib/clickhouse/user_files ports: diff --git a/tests/integration/platform/clickhouse-init.sh b/tests/integration/platform/clickhouse-init.sh index 2cc30db..372af02 100755 --- a/tests/integration/platform/clickhouse-init.sh +++ b/tests/integration/platform/clickhouse-init.sh @@ -8,8 +8,19 @@ set -e SRC_DIR="/initdb-src" TMP_DIR="/tmp/initdb-patched" +USER_FILES="/var/lib/clickhouse/user_files" mkdir -p "$TMP_DIR" +# Copier les CSV de référence dans user_files (dictionnaires navigateurs) +for csv in "$SRC_DIR"/*.csv; do + [ -f "$csv" ] || continue + fname=$(basename "$csv") + if [ ! -f "$USER_FILES/$fname" ]; then + cp "$csv" "$USER_FILES/$fname" + echo "[init] CSV copié : $fname" + fi +done + for f in "$SRC_DIR"/*.sql; do [ -f "$f" ] || continue base=$(basename "$f")