""" Endpoints pour l'analyse des empreintes d'en-têtes HTTP """ from fastapi import APIRouter, HTTPException, Query from ..database import db router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"]) @router.get("/clusters") async def get_header_clusters(limit: int = Query(50, ge=1, le=200)): """Clusters d'empreintes d'en-têtes groupés par header_order_hash.""" try: sql = """ SELECT header_order_hash AS hash, uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips, avg(modern_browser_score) AS avg_browser_score, sum(ua_ch_mismatch) AS ua_ch_mismatch_count, round(sum(ua_ch_mismatch) * 100.0 / count(), 2) AS ua_ch_mismatch_pct, groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes, round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct, round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct FROM mabase_prod.agg_header_fingerprint_1h GROUP BY header_order_hash ORDER BY unique_ips DESC LIMIT %(limit)s """ result = db.query(sql, {"limit": limit}) total_sql = """ SELECT uniq(header_order_hash) FROM mabase_prod.agg_header_fingerprint_1h """ total_clusters = int(db.query(total_sql).result_rows[0][0]) clusters = [] for row in result.result_rows: h = str(row[0]) unique_ips = int(row[1]) avg_browser_score = float(row[2] or 0) ua_ch_mismatch_cnt = int(row[3]) ua_ch_mismatch_pct = float(row[4] or 0) top_modes = list(set(str(m) for m in (row[5] or []))) has_cookie_pct = float(row[6] or 0) has_referer_pct = float(row[7] or 0) if avg_browser_score >= 90 and ua_ch_mismatch_pct < 5: classification = "legitimate" elif ua_ch_mismatch_pct > 50: classification = "bot_suspicious" else: classification = "mixed" clusters.append({ "hash": h, "unique_ips": unique_ips, "avg_browser_score": round(avg_browser_score, 2), "ua_ch_mismatch_count":ua_ch_mismatch_cnt, "ua_ch_mismatch_pct": ua_ch_mismatch_pct, "top_sec_fetch_modes": top_modes, "has_cookie_pct": has_cookie_pct, "has_referer_pct": has_referer_pct, "classification": classification, }) return {"clusters": clusters, "total_clusters": total_clusters} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.get("/cluster/{hash}/ips") async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)): """Liste des IPs appartenant à un cluster d'en-têtes donné.""" try: sql = """ SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip, any(modern_browser_score) AS browser_score, any(ua_ch_mismatch) AS ua_ch_mismatch, any(sec_fetch_mode) AS sec_fetch_mode, any(sec_fetch_dest) AS sec_fetch_dest FROM mabase_prod.agg_header_fingerprint_1h WHERE header_order_hash = %(hash)s GROUP BY src_ip ORDER BY browser_score DESC LIMIT %(limit)s """ result = db.query(sql, {"hash": hash, "limit": limit}) items = [] for row in result.result_rows: items.append({ "ip": str(row[0]), "browser_score": int(row[1] or 0), "ua_ch_mismatch": int(row[2] or 0), "sec_fetch_mode": str(row[3] or ""), "sec_fetch_dest": str(row[4] or ""), }) return {"items": items, "total": len(items)} except Exception as e: raise HTTPException(status_code=500, detail=str(e))