feat(dashboard): browser signatures management UI

- Ajoute dict_browser_h2 dans /reflists (lecture seule via dict_browser_h2)
- Nouveaux endpoints API :
    GET  /api/browser-signatures/entries — liste browser_h2_signatures
         (fallback dict CSV si migration 06 non appliquée)
    POST /api/browser-signatures/entries — ajout fingerprint + reload dict
    DELETE /api/browser-signatures/entries — suppression + reload dict
- Page /browsers : 2 nouvelles sections
    'Base de signatures H2' — tableau des 10 fingerprints, form d'ajout,
    mode lecture seule automatique si migration 06 non appliquée
    'Règles de scoring browser_matcher.py' — tableau statique des 7 dimensions
    (poids, valeurs par famille, seuils de bypass)
- Integration : browser_h2.csv copié dans user_files au démarrage ClickHouse

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-10 14:46:07 +02:00
parent da1b579d4f
commit fde6864311
4 changed files with 386 additions and 1 deletions

View File

@ -1505,6 +1505,7 @@ _REFLIST_SORT = {
"bot_ip": {"prefix", "bot_name"},
"bot_ja4": {"ja4", "bot_name"},
"browser_ja4": {"ja4", "browser_family", "tls_library"},
"browser_h2": {"h2_fingerprint", "browser_family"},
"asn_reputation": {"src_asn", "label"},
"iplocate_asn": {"asn", "country_code", "name", "network"},
"anubis_ip_rules": {"prefix", "bot_name", "action", "category"},
@ -1515,6 +1516,7 @@ _REFLIST_SEARCH_COLS: dict[str, list[str]] = {
"bot_ip": ["prefix", "bot_name"],
"bot_ja4": ["ja4", "bot_name"],
"browser_ja4": ["ja4", "browser_family", "tls_library", "context"],
"browser_h2": ["h2_fingerprint", "browser_family"],
"asn_reputation": ["toString(src_asn)", "label"],
"iplocate_asn": ["network", "toString(asn)", "country_code", "name"],
"anubis_ip_rules": ["prefix", "bot_name", "action", "category"],
@ -1529,6 +1531,10 @@ _REFLIST_QUERIES: dict[str, str] = {
f"SELECT ja4, browser_family, tls_library, context "
f"FROM dictionary('{_DB}.dict_browser_ja4')"
),
"browser_h2": (
f"SELECT h2_fingerprint, browser_family "
f"FROM dictionary('{_DB}.dict_browser_h2') ORDER BY browser_family"
),
"asn_reputation": (
f"SELECT src_asn, label FROM dictionary('{_DB}.dict_asn_reputation')"
),
@ -1786,3 +1792,108 @@ async def browser_signatures() -> dict[str, Any]:
return result
# ---------------------------------------------------------------------------
# GET /api/browser-signatures/entries — liste des fingerprints H2 gérés
# POST /api/browser-signatures/entries — ajouter un fingerprint H2
# DELETE /api/browser-signatures/entries — supprimer un fingerprint H2
# ---------------------------------------------------------------------------
class BrowserH2Entry(BaseModel):
"""Nouveau fingerprint H2 à enregistrer dans browser_h2_signatures."""
h2_fingerprint: str
browser_family: str
confidence: float = 1.0
notes: str = ""
_VALID_BROWSER_FAMILIES = {"Chrome", "Firefox", "Safari", "Edge", "Other"}
@router.get("/browser-signatures/entries")
async def browser_sig_entries() -> dict[str, Any]:
"""Retourne le contenu de la table browser_h2_signatures.
Si la table n'existe pas encore (migration 06 non appliquée),
retourne les données du dictionnaire CSV (sans confidence/notes).
"""
# Essai prioritaire : table structurée (post-migration 06)
try:
rows = query(
f"SELECT h2_fingerprint, browser_family, confidence, notes "
f"FROM {_DB}.browser_h2_signatures "
f"ORDER BY browser_family, confidence DESC"
)
return {"entries": rows, "total": len(rows), "source": "table"}
except Exception:
pass
# Fallback : dictionnaire CSV (pré-migration 06)
try:
rows = query(
f"SELECT h2_fingerprint, browser_family, "
f"toFloat32(1.0) AS confidence, '' AS notes "
f"FROM dictionary('{_DB}.dict_browser_h2') "
f"ORDER BY browser_family"
)
return {"entries": rows, "total": len(rows), "source": "dict_csv", "readonly": True}
except Exception as exc:
logger.exception("browser_h2 entries fallback failed")
raise HTTPException(status_code=500, detail=str(exc))
@router.post("/browser-signatures/entries", status_code=201)
async def browser_sig_add(body: BrowserH2Entry) -> dict[str, Any]:
"""Ajoute un fingerprint H2 dans browser_h2_signatures et recharge le dictionnaire."""
if not body.h2_fingerprint.strip():
raise HTTPException(status_code=422, detail="h2_fingerprint ne peut pas être vide")
if body.browser_family not in _VALID_BROWSER_FAMILIES:
raise HTTPException(
status_code=422,
detail=f"browser_family doit être l'un de {_VALID_BROWSER_FAMILIES}",
)
if not 0.0 <= body.confidence <= 1.0:
raise HTTPException(status_code=422, detail="confidence doit être entre 0.0 et 1.0")
try:
execute(
f"INSERT INTO {_DB}.browser_h2_signatures "
"(h2_fingerprint, browser_family, confidence, notes) VALUES "
"({fp:String}, {fam:String}, {conf:Float32}, {notes:String})",
{
"fp": body.h2_fingerprint.strip(),
"fam": body.browser_family,
"conf": body.confidence,
"notes": body.notes,
},
)
# Force le rechargement du dictionnaire
try:
execute(f"SYSTEM RELOAD DICTIONARY {_DB}.dict_browser_h2")
except Exception:
logger.warning("dict_browser_h2 reload failed (migration 06 peut-être non appliquée)")
return {"status": "ok", "h2_fingerprint": body.h2_fingerprint.strip()}
except Exception as exc:
logger.exception("browser_h2_signatures insert failed")
raise HTTPException(status_code=500, detail=str(exc))
@router.delete("/browser-signatures/entries")
async def browser_sig_delete(fingerprint: str = Query(...)) -> dict[str, Any]:
"""Supprime un fingerprint H2 de browser_h2_signatures et recharge le dictionnaire."""
if not fingerprint.strip():
raise HTTPException(status_code=422, detail="fingerprint ne peut pas être vide")
try:
execute(
f"ALTER TABLE {_DB}.browser_h2_signatures DELETE "
"WHERE h2_fingerprint = {fp:String}",
{"fp": fingerprint.strip()},
)
try:
execute(f"SYSTEM RELOAD DICTIONARY {_DB}.dict_browser_h2")
except Exception:
logger.warning("dict_browser_h2 reload failed")
return {"status": "ok", "deleted": fingerprint.strip()}
except Exception as exc:
logger.exception("browser_h2_signatures delete failed")
raise HTTPException(status_code=500, detail=str(exc))

View File

@ -191,7 +191,6 @@
</div>
<div class="section-body">
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
<!-- mini KPIs pour les ordres pseudo-headers -->
<div class="bg-gray-900 rounded-lg px-4 py-3 border border-gray-800">
<div class="text-xs text-gray-500 mb-1">Chrome / Safari — <code>m,a,s,p</code></div>
<div class="text-2xl font-bold text-indigo-400" id="pseudo-chromesafari"></div>
@ -216,9 +215,270 @@
</div>
</div>
<!-- ═══ Row 4 : Base de signatures H2 ═══ -->
<div class="section-card" id="section-sig-h2">
<div class="section-header flex items-center justify-between">
<span class="section-title">
Base de signatures H2
<span class="relative inline-block ml-1"><button onclick="docToggle(this)" class="doc-btn"></button><div class="doc-panel">
<h4>Table <code>browser_h2_signatures</code></h4>
<p>Source des fingerprints HTTP/2 (format Akamai) utilisés par
<code>dict_browser_h2</code>. Le dictionnaire est rechargé automatiquement
après chaque ajout ou suppression.</p>
<p>Format : <code>SETTINGS|WINDOW_UPDATE|PRIORITY|PSEUDO_ORDER</code><br>
Exemple Chrome : <code>1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p</code></p>
<p class="doc-source">Source : ja4_processing.browser_h2_signatures (migration 06)</p>
</div></span>
</span>
<button onclick="toggleAddForm()" class="text-xs bg-indigo-700 hover:bg-indigo-600 text-white px-3 py-1 rounded font-medium transition-colors">+ Ajouter</button>
</div>
<!-- Formulaire d'ajout (masqué par défaut) -->
<div id="form-add-sig" class="hidden px-4 py-3 bg-gray-950 border-b border-gray-800">
<div class="grid grid-cols-1 md:grid-cols-4 gap-3 text-xs">
<div class="md:col-span-2">
<label class="text-gray-400 block mb-1">H2 Fingerprint (format Akamai)</label>
<input id="inp-fp" type="text" placeholder="1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p"
class="w-full bg-gray-900 border border-gray-700 rounded px-2 py-1.5 text-gray-200 font-mono text-[11px] focus:outline-none focus:border-indigo-500">
</div>
<div>
<label class="text-gray-400 block mb-1">Famille</label>
<select id="inp-family" class="w-full bg-gray-900 border border-gray-700 rounded px-2 py-1.5 text-gray-200 focus:outline-none focus:border-indigo-500">
<option value="Chrome">Chrome</option>
<option value="Firefox">Firefox</option>
<option value="Safari">Safari</option>
<option value="Edge">Edge</option>
<option value="Other">Other</option>
</select>
</div>
<div>
<label class="text-gray-400 block mb-1">Confidence (01)</label>
<input id="inp-conf" type="number" step="0.05" min="0" max="1" value="1.0"
class="w-full bg-gray-900 border border-gray-700 rounded px-2 py-1.5 text-gray-200 focus:outline-none focus:border-indigo-500">
</div>
<div class="md:col-span-3">
<label class="text-gray-400 block mb-1">Notes</label>
<input id="inp-notes" type="text" placeholder="ex: Chrome 143 beta"
class="w-full bg-gray-900 border border-gray-700 rounded px-2 py-1.5 text-gray-200 focus:outline-none focus:border-indigo-500">
</div>
<div class="flex items-end">
<button onclick="submitAdd()" class="w-full bg-emerald-700 hover:bg-emerald-600 text-white rounded px-3 py-1.5 text-xs font-medium transition-colors">Enregistrer</button>
</div>
</div>
<div id="add-status" class="mt-2 text-xs hidden"></div>
</div>
<div class="section-body overflow-x-auto">
<table class="w-full text-xs text-gray-400">
<thead class="border-b border-gray-800 text-left">
<tr>
<th class="px-3 py-2 font-medium">Fingerprint H2 (format Akamai)</th>
<th class="px-3 py-2 font-medium w-24">Famille</th>
<th class="px-3 py-2 font-medium w-24 text-center">Confidence</th>
<th class="px-3 py-2 font-medium">Notes</th>
<th class="px-3 py-2 font-medium w-16 text-center">Action</th>
</tr>
</thead>
<tbody id="tbl-sigs-body" class="divide-y divide-gray-800/50">
<tr><td colspan="5" class="px-3 py-6 text-center text-gray-600">Chargement…</td></tr>
</tbody>
</table>
</div>
</div>
<!-- ═══ Row 5 : Règles de scoring Python (browser_matcher) ═══ -->
<div class="section-card">
<div class="section-header">
<span class="section-title">
Règles de scoring — <code>browser_matcher.py</code>
<span class="relative inline-block ml-1"><button onclick="docToggle(this)" class="doc-btn"></button><div class="doc-panel">
<h4>Dimensions du scoring navigateur</h4>
<p>Le browser_matcher calcule un score 01 par famille en agrégeant 7 dimensions.
Ces règles sont définies dans <code>bot_detector/browser_signatures.py</code>.</p>
<p class="doc-source">Modification : éditer le fichier Python et redéployer bot-detector</p>
</div></span>
</span>
</div>
<div class="section-body">
<div class="overflow-x-auto">
<table class="w-full text-xs text-gray-400">
<thead class="border-b border-gray-800 text-left">
<tr>
<th class="px-3 py-2 font-medium">Dimension</th>
<th class="px-3 py-2 font-medium text-right w-16">Poids</th>
<th class="px-3 py-2 font-medium">Chrome</th>
<th class="px-3 py-2 font-medium">Firefox</th>
<th class="px-3 py-2 font-medium">Safari</th>
</tr>
</thead>
<tbody class="divide-y divide-gray-800/50 font-mono text-[11px]">
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">H2 SETTINGS exact</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.30</td>
<td class="px-3 py-2">1:65536,2:0,4:6291456,6:262144</td>
<td class="px-3 py-2">1:65536,4:131072,5:16384</td>
<td class="px-3 py-2">1:4096,3:100,4:65535</td>
</tr>
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">H2 WINDOW_UPDATE</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.15</td>
<td class="px-3 py-2 text-indigo-400">15 663 105</td>
<td class="px-3 py-2 text-orange-400">12 517 377</td>
<td class="px-3 py-2 text-cyan-400">10 485 760</td>
</tr>
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">Pseudo-header order</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.15</td>
<td class="px-3 py-2">m,a,s,p</td>
<td class="px-3 py-2">m,p,s,a</td>
<td class="px-3 py-2">m,a,s,p</td>
</tr>
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">HTTP headers cohérence</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.15</td>
<td class="px-3 py-2 text-[10px]">Sec-CH-UA ✓ · Sec-Fetch ✓</td>
<td class="px-3 py-2 text-[10px]">Sec-CH-UA ✗ · Sec-Fetch ✓</td>
<td class="px-3 py-2 text-[10px]">Sec-CH-UA ✗ · Sec-Fetch ✗</td>
</tr>
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">H2 PRIORITY frames</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.10</td>
<td class="px-3 py-2 text-gray-500">absent</td>
<td class="px-3 py-2 text-gray-500">absent</td>
<td class="px-3 py-2 text-gray-500">absent</td>
</tr>
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">TLS structure (JA4 famille)</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.10</td>
<td class="px-3 py-2 text-[10px]">Chromium · Chrome · Edge + GREASE</td>
<td class="px-3 py-2 text-[10px]">Firefox · pas de GREASE</td>
<td class="px-3 py-2 text-[10px]">Safari · pas de GREASE</td>
</tr>
<tr>
<td class="px-3 py-2 text-gray-300 font-sans text-xs">JA4 dict lookup</td>
<td class="px-3 py-2 text-right text-amber-400 font-sans">0.05</td>
<td colspan="3" class="px-3 py-2 text-gray-500">dict_browser_ja4 — correspondance fingerprint TLS exact</td>
</tr>
<tr class="bg-gray-900/50">
<td class="px-3 py-2 text-gray-300 font-sans font-semibold">Seuil de bypass ML</td>
<td class="px-3 py-2 text-right text-emerald-400 font-sans font-semibold"></td>
<td class="px-3 py-2 text-emerald-400 font-sans">≥ 0.72</td>
<td class="px-3 py-2 text-emerald-400 font-sans">≥ 0.68</td>
<td class="px-3 py-2 text-emerald-400 font-sans">≥ 0.68</td>
</tr>
</tbody>
</table>
</div>
<p class="mt-3 text-[10px] text-gray-600">Mode actuel : <strong class="text-gray-400">DUAL_MODE</strong> — le matcher journalise les décisions sans modifier le scoring ML.
Activer le bypass : variable d'environnement <code>BROWSER_MATCHER_REPLACE=true</code> dans bot-detector.</p>
</div>
</div>
</div>
<script>
// ─── Gestion des signatures H2 ───────────────────────────────────────────
const FAM_COLORS = {
Chrome: 'text-indigo-400', Firefox: 'text-orange-400',
Safari: 'text-cyan-400', Edge: 'text-purple-400', Other: 'text-gray-400',
};
function toggleAddForm() {
const el = document.getElementById('form-add-sig');
el.classList.toggle('hidden');
}
async function loadSignatureEntries() {
try {
const r = await fetch('/api/browser-signatures/entries');
if (!r.ok) throw new Error(r.statusText);
const data = await r.json();
const isReadonly = data.readonly === true;
// Masquer le bouton Ajouter si source = dict CSV (table pas encore créée)
if (isReadonly) {
document.querySelector('#section-sig-h2 .section-header button')?.setAttribute('disabled', 'true');
document.querySelector('#section-sig-h2 .section-header button')?.classList.add('opacity-40', 'cursor-not-allowed');
}
renderSigTable(data.entries || [], isReadonly);
} catch (e) {
document.getElementById('tbl-sigs-body').innerHTML =
`<tr><td colspan="5" class="px-3 py-4 text-center text-gray-600">Indisponible</td></tr>`;
}
}
function renderSigTable(rows, readonly = false) {
const tbody = document.getElementById('tbl-sigs-body');
if (!rows.length) {
tbody.innerHTML = '<tr><td colspan="5" class="px-3 py-6 text-center text-gray-600">Aucun fingerprint enregistré</td></tr>';
return;
}
tbody.innerHTML = rows.map(r => {
const fc = FAM_COLORS[r.browser_family] || 'text-gray-400';
const conf = typeof r.confidence === 'number' ? r.confidence.toFixed(2) : (r.confidence || '—');
const confColor = (r.confidence || 0) >= 0.95 ? 'text-emerald-400' : (r.confidence || 0) >= 0.8 ? 'text-amber-400' : 'text-red-400';
const fpEsc = encodeURIComponent(r.h2_fingerprint);
const actionCell = readonly
? '<td class="px-3 py-2 text-center text-gray-700 text-[10px]">lecture seule</td>'
: `<td class="px-3 py-2 text-center">
<button onclick="deleteSig('${fpEsc}')" class="text-red-500 hover:text-red-400 text-[11px] transition-colors" title="Supprimer">✕</button>
</td>`;
return `<tr class="hover:bg-gray-800/20 transition-colors">
<td class="px-3 py-2 font-mono text-[11px] text-gray-300 break-all">${escHtml(r.h2_fingerprint)}</td>
<td class="px-3 py-2 font-semibold ${fc}">${r.browser_family}</td>
<td class="px-3 py-2 text-center font-semibold ${confColor}">${conf}</td>
<td class="px-3 py-2 text-gray-500">${escHtml(r.notes || '')}</td>
${actionCell}
</tr>`;
}).join('');
}
async function submitAdd() {
const fp = document.getElementById('inp-fp').value.trim();
const fam = document.getElementById('inp-family').value;
const conf = parseFloat(document.getElementById('inp-conf').value);
const notes = document.getElementById('inp-notes').value.trim();
const status = document.getElementById('add-status');
if (!fp) { showStatus(status, 'Le fingerprint est requis', 'error'); return; }
try {
const r = await fetch('/api/browser-signatures/entries', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ h2_fingerprint: fp, browser_family: fam, confidence: conf, notes }),
});
const data = await r.json();
if (!r.ok) { showStatus(status, data.detail || 'Erreur', 'error'); return; }
showStatus(status, '✓ Enregistré — dictionnaire rechargé', 'ok');
document.getElementById('inp-fp').value = '';
document.getElementById('inp-notes').value = '';
setTimeout(() => document.getElementById('form-add-sig').classList.add('hidden'), 1500);
await loadSignatureEntries();
} catch (e) {
showStatus(status, e.message, 'error');
}
}
async function deleteSig(fpEncoded) {
const fp = decodeURIComponent(fpEncoded);
if (!confirm(`Supprimer ce fingerprint ?\n\n${fp}`)) return;
try {
const r = await fetch(`/api/browser-signatures/entries?fingerprint=${fpEncoded}`, { method: 'DELETE' });
if (!r.ok) { alert('Erreur lors de la suppression'); return; }
await loadSignatureEntries();
} catch (e) { alert(e.message); }
}
function showStatus(el, msg, type) {
el.textContent = msg;
el.className = 'mt-2 text-xs ' + (type === 'ok' ? 'text-emerald-400' : 'text-red-400');
el.classList.remove('hidden');
}
function escHtml(s) {
return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
}
// ─── Chargement des données ───────────────────────────────────────────────
async function loadBrowserData() {
let data;
@ -377,5 +637,6 @@ function setText(id, val) { const el = document.getElementById(id); if (el) el.t
function fmt(n) { return n == null ? '—' : Number(n).toLocaleString('fr-FR'); }
loadBrowserData();
loadSignatureEntries();
</script>
{% endblock %}

View File

@ -40,6 +40,8 @@ services:
- ../../shared/clickhouse/10_perf_indexes.sql:/initdb-src/10_perf_indexes.sql:ro
- ../../shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro
- ../../shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro
# Reference CSV files (dictionaries / browser signatures)
- ../../shared/data/browser_h2.csv:/initdb-src/browser_h2.csv:ro
# Empty CSV stubs (dictionaries expect these files)
- ./platform/csv-stubs:/var/lib/clickhouse/user_files
ports:

View File

@ -8,8 +8,19 @@ set -e
SRC_DIR="/initdb-src"
TMP_DIR="/tmp/initdb-patched"
USER_FILES="/var/lib/clickhouse/user_files"
mkdir -p "$TMP_DIR"
# Copier les CSV de référence dans user_files (dictionnaires navigateurs)
for csv in "$SRC_DIR"/*.csv; do
[ -f "$csv" ] || continue
fname=$(basename "$csv")
if [ ! -f "$USER_FILES/$fname" ]; then
cp "$csv" "$USER_FILES/$fname"
echo "[init] CSV copié : $fname"
fi
done
for f in "$SRC_DIR"/*.sql; do
[ -f "$f" ] || continue
base=$(basename "$f")