fix: correct CampaignsView, analysis.py IPv4 split, entities date filter
- CampaignsView: update ClusterData interface to match real API response
(severity/unique_ips/score instead of threat_level/total_ips/confidence_range)
Fix fetch to use data.items, rewrite ClusterCard and BehavioralTab
Remove unused getClassificationColor and THREAT_ORDER constants
- analysis.py: fix IPv4Address object has no attribute 'split' on line 322
Add str() conversion before calling .split('.')
- entities.py: fix Date vs DateTime comparison — log_date is a Date column,
comparing against now()-INTERVAL HOUR caused yesterday's entries to be excluded
Use toDate(now() - INTERVAL X HOUR) for correct Date-level comparison
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
737
backend/routes/fingerprints.py
Normal file
737
backend/routes/fingerprints.py
Normal file
@ -0,0 +1,737 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des fingerprints JA4 et User-Agents
|
||||
|
||||
Objectifs:
|
||||
- Détecter le spoofing JA4 (fingerprint TLS qui prétend être un navigateur mais
|
||||
dont les User-Agents, les headers HTTP ou les métriques comportementales trahissent
|
||||
une origine bot/script)
|
||||
- Construire une matrice JA4 × User-Agent pour visualiser les associations suspectes
|
||||
- Analyser la distribution des User-Agents pour identifier les rotateurs et les bots
|
||||
qui usurpent des UA de navigateurs légitimes
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional
|
||||
import re
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"])
|
||||
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# Patterns indiquant clairement un bot/script sans simulation de navigateur
|
||||
_BOT_PATTERNS = re.compile(
|
||||
r"bot|crawler|spider|scraper|python|curl|wget|go-http|java/|axios|"
|
||||
r"libwww|httpclient|okhttp|requests|aiohttp|httpx|playwright|puppeteer|"
|
||||
r"selenium|headless|phantomjs",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Navigateurs légitimes communs — un JA4 de type "browser" devrait venir avec ces UAs
|
||||
_BROWSER_PATTERNS = re.compile(
|
||||
r"mozilla|chrome|safari|firefox|edge|opera|trident",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _classify_ua(ua: str) -> str:
|
||||
"""Retourne 'bot', 'browser', ou 'script'"""
|
||||
if not ua:
|
||||
return "empty"
|
||||
if _BOT_PATTERNS.search(ua):
|
||||
return "bot"
|
||||
if _BROWSER_PATTERNS.search(ua):
|
||||
return "browser"
|
||||
return "script"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 1 — Détection de spoofing JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/spoofing")
|
||||
async def get_ja4_spoofing(
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle"),
|
||||
min_detections: int = Query(10, ge=1, description="Nombre minimum de détections"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Identifie les JA4 fingerprints suspects de spoofing navigateur.
|
||||
|
||||
Un JA4 est considéré suspect quand:
|
||||
- Il présente un taux élevé de ua_ch_mismatch (header UA ≠ Client Hints)
|
||||
- Son modern_browser_score est élevé mais les UAs associés sont des bots/scripts
|
||||
- Il apparaît avec un taux élevé de sni_host_mismatch ou alpn_http_mismatch
|
||||
- is_rare_ja4 = true avec un volume important
|
||||
|
||||
Retourne un score de confiance de spoofing [0-100] pour chaque JA4.
|
||||
"""
|
||||
try:
|
||||
# Agrégation par JA4 avec tous les indicateurs de spoofing
|
||||
query = """
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
|
||||
-- Indicateurs de mismatch
|
||||
countIf(ua_ch_mismatch = true) AS ua_ch_mismatch_count,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
countIf(sni_host_mismatch = true) AS sni_mismatch_count,
|
||||
round(countIf(sni_host_mismatch = true) * 100.0 / count(), 2) AS sni_mismatch_pct,
|
||||
countIf(alpn_http_mismatch = true) AS alpn_mismatch_count,
|
||||
round(countIf(alpn_http_mismatch = true) * 100.0 / count(), 2) AS alpn_mismatch_pct,
|
||||
|
||||
-- Indicateurs comportementaux
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_ja4_count,
|
||||
round(countIf(is_rare_ja4 = true) * 100.0 / count(), 2) AS rare_ja4_pct,
|
||||
countIf(is_ua_rotating = true) AS ua_rotating_count,
|
||||
round(countIf(is_ua_rotating = true) * 100.0 / count(), 2) AS ua_rotating_pct,
|
||||
|
||||
-- Métriques TLS/TCP
|
||||
countIf(is_alpn_missing = true) AS alpn_missing_count,
|
||||
avg(distinct_ja4_count) AS avg_distinct_ja4_per_ip,
|
||||
|
||||
-- Répartition threat levels
|
||||
countIf(threat_level = 'CRITICAL') AS critical_count,
|
||||
countIf(threat_level = 'HIGH') AS high_count,
|
||||
|
||||
-- Botnet indicators
|
||||
avg(ja4_asn_concentration) AS avg_asn_concentration,
|
||||
avg(ja4_country_concentration) AS avg_country_concentration,
|
||||
|
||||
argMax(threat_level, detected_at) AS last_threat_level
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING total_detections >= %(min_detections)s
|
||||
ORDER BY ua_ch_mismatch_pct DESC, total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
"hours": hours,
|
||||
"min_detections": min_detections,
|
||||
"limit": limit,
|
||||
})
|
||||
|
||||
# Fetch top UA per JA4 from view_dashboard_user_agents
|
||||
ja4_list = [str(r[0]) for r in result.result_rows if r[0]]
|
||||
ua_by_ja4: dict = {}
|
||||
if ja4_list:
|
||||
ja4_sql = ", ".join(f"'{j}'" for j in ja4_list[:100])
|
||||
ua_q = f"""
|
||||
SELECT ja4, groupArray(5)(ua) AS top_uas
|
||||
FROM (
|
||||
SELECT ja4, arrayJoin(user_agents) AS ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ja4, ua
|
||||
ORDER BY ja4, cnt DESC
|
||||
)
|
||||
GROUP BY ja4
|
||||
"""
|
||||
try:
|
||||
ua_res = db.query(ua_q)
|
||||
for ua_row in ua_res.result_rows:
|
||||
j4 = str(ua_row[0])
|
||||
if ua_row[1]:
|
||||
ua_by_ja4[j4] = list(ua_row[1])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
ua_ch_mismatch_pct = float(row[4] or 0)
|
||||
sni_mismatch_pct = float(row[6] or 0)
|
||||
alpn_mismatch_pct = float(row[8] or 0)
|
||||
avg_browser_score = float(row[9] or 0)
|
||||
rare_ja4_pct = float(row[11] or 0)
|
||||
ua_rotating_pct = float(row[13] or 0)
|
||||
alpn_missing_count = int(row[14] or 0)
|
||||
total = int(row[1] or 1)
|
||||
|
||||
top_uas = ua_by_ja4.get(ja4, [])
|
||||
ua_classes = [_classify_ua(u) for u in top_uas]
|
||||
has_bot_ua = any(c == "bot" for c in ua_classes)
|
||||
has_browser_ua = any(c == "browser" for c in ua_classes)
|
||||
|
||||
# Spoofing confidence score [0-100]:
|
||||
# UA/CH mismatch est le signal le plus fort (poids 40)
|
||||
# Browser UA avec score navigateur élevé mais indicateurs bot (poids 25)
|
||||
# SNI/ALPN mismatches (poids 15)
|
||||
# is_rare_ja4 avec gros volume (poids 10)
|
||||
# UA rotating (poids 10)
|
||||
spoof_score = min(100, round(
|
||||
ua_ch_mismatch_pct * 0.40
|
||||
+ (avg_browser_score * 25 / 100 if has_bot_ua else 0)
|
||||
+ sni_mismatch_pct * 0.10
|
||||
+ alpn_mismatch_pct * 0.05
|
||||
+ rare_ja4_pct * 0.10
|
||||
+ ua_rotating_pct * 0.10
|
||||
+ (10 if alpn_missing_count > total * 0.3 else 0)
|
||||
))
|
||||
|
||||
# Classification du JA4
|
||||
if spoof_score >= 60:
|
||||
classification = "spoofed_browser"
|
||||
elif has_bot_ua and avg_browser_score < 30:
|
||||
classification = "known_bot"
|
||||
elif has_browser_ua and ua_ch_mismatch_pct < 10:
|
||||
classification = "legitimate_browser"
|
||||
else:
|
||||
classification = "suspicious"
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"classification": classification,
|
||||
"spoofing_score": spoof_score,
|
||||
"total_detections": int(row[1] or 0),
|
||||
"unique_ips": int(row[2] or 0),
|
||||
"indicators": {
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"sni_mismatch_pct": sni_mismatch_pct,
|
||||
"alpn_mismatch_pct": alpn_mismatch_pct,
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"rare_ja4_pct": rare_ja4_pct,
|
||||
"ua_rotating_pct": ua_rotating_pct,
|
||||
"alpn_missing_count": alpn_missing_count,
|
||||
"avg_asn_concentration": round(float(row[18] or 0), 3),
|
||||
"avg_country_concentration": round(float(row[19] or 0), 3),
|
||||
},
|
||||
"top_user_agents": [
|
||||
{"ua": u, "type": _classify_ua(u)} for u in top_uas
|
||||
],
|
||||
"threat_breakdown": {
|
||||
"critical": int(row[16] or 0),
|
||||
"high": int(row[17] or 0),
|
||||
"last_level": str(row[20] or "LOW"),
|
||||
},
|
||||
})
|
||||
|
||||
# Trier: spoofed_browser d'abord, puis par score
|
||||
items.sort(key=lambda x: (-x["spoofing_score"], -x["total_detections"]))
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"summary": {
|
||||
"spoofed_browser": sum(1 for i in items if i["classification"] == "spoofed_browser"),
|
||||
"known_bot": sum(1 for i in items if i["classification"] == "known_bot"),
|
||||
"suspicious": sum(1 for i in items if i["classification"] == "suspicious"),
|
||||
"legitimate_browser": sum(1 for i in items if i["classification"] == "legitimate_browser"),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 2 — Matrice JA4 × User-Agent
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ja4-ua-matrix")
|
||||
async def get_ja4_ua_matrix(
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
min_ips: int = Query(3, ge=1, description="Nombre minimum d'IPs pour inclure un JA4"),
|
||||
limit: int = Query(30, ge=1, le=100),
|
||||
):
|
||||
"""
|
||||
Matrice JA4 × User-Agent.
|
||||
|
||||
Pour chaque JA4:
|
||||
- Top User-Agents associés (depuis view_dashboard_entities)
|
||||
- Taux de ua_ch_mismatch
|
||||
- Classification UA (bot / browser / script)
|
||||
- Indicateur de spoofing si browser_score élevé + UA non-navigateur
|
||||
"""
|
||||
try:
|
||||
# Stats JA4 depuis ml_detected_anomalies
|
||||
stats_query = """
|
||||
SELECT
|
||||
ja4,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
count() AS total_detections,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_count,
|
||||
countIf(is_ua_rotating = true) AS rotating_count,
|
||||
argMax(threat_level, detected_at) AS last_threat
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
ORDER BY ua_ch_mismatch_pct DESC, unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
stats_res = db.query(stats_query, {"hours": hours, "min_ips": min_ips, "limit": limit})
|
||||
ja4_list = [str(r[0]) for r in stats_res.result_rows]
|
||||
|
||||
if not ja4_list:
|
||||
return {"items": [], "total": 0, "period_hours": hours}
|
||||
|
||||
# UAs par JA4 depuis view_dashboard_user_agents
|
||||
ja4_sql = ", ".join(f"'{j}'" for j in ja4_list)
|
||||
ua_query = f"""
|
||||
SELECT
|
||||
ja4,
|
||||
ua,
|
||||
sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ja4, ua
|
||||
ORDER BY ja4, cnt DESC
|
||||
"""
|
||||
|
||||
ua_by_ja4: dict = {}
|
||||
try:
|
||||
ua_res = db.query(ua_query)
|
||||
for row in ua_res.result_rows:
|
||||
j4 = str(row[0])
|
||||
if j4 not in ua_by_ja4:
|
||||
ua_by_ja4[j4] = []
|
||||
if len(ua_by_ja4[j4]) < 8:
|
||||
ua_by_ja4[j4].append({"ua": str(row[1]), "count": int(row[2] or 0)})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
items = []
|
||||
for row in stats_res.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1] or 0)
|
||||
ua_ch_mismatch_pct = float(row[3] or 0)
|
||||
avg_browser_score = float(row[4] or 0)
|
||||
|
||||
top_uas = ua_by_ja4.get(ja4, [])
|
||||
ua_total = sum(u["count"] for u in top_uas) or 1
|
||||
|
||||
classified_uas = []
|
||||
for u in top_uas:
|
||||
ua_type = _classify_ua(u["ua"])
|
||||
classified_uas.append({
|
||||
"ua": u["ua"],
|
||||
"count": u["count"],
|
||||
"pct": round(u["count"] * 100 / ua_total, 1),
|
||||
"type": ua_type,
|
||||
})
|
||||
|
||||
bot_pct = sum(u["pct"] for u in classified_uas if u["type"] == "bot")
|
||||
browser_pct = sum(u["pct"] for u in classified_uas if u["type"] == "browser")
|
||||
|
||||
# Spoofing flag: JA4 ressemble à un navigateur (browser_score élevé)
|
||||
# mais les UAs sont des bots/scripts
|
||||
is_spoofing = avg_browser_score > 50 and bot_pct > 30 and ua_ch_mismatch_pct > 20
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"total_detections": int(row[2] or 0),
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"rare_count": int(row[5] or 0),
|
||||
"rotating_count": int(row[6] or 0),
|
||||
"last_threat": str(row[7] or "LOW"),
|
||||
"user_agents": classified_uas,
|
||||
"ua_summary": {
|
||||
"bot_pct": round(bot_pct, 1),
|
||||
"browser_pct": round(browser_pct, 1),
|
||||
"script_pct": round(100 - bot_pct - browser_pct, 1),
|
||||
"total_distinct": len(top_uas),
|
||||
},
|
||||
"is_spoofing_suspect": is_spoofing,
|
||||
})
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 3 — Analyse globale des User-Agents
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ua-analysis")
|
||||
async def get_ua_analysis(
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Analyse globale des User-Agents dans les détections.
|
||||
|
||||
Identifie:
|
||||
- UAs de type bot/script
|
||||
- UAs browser légitimes vs UAs browser utilisés par des bots (via ua_ch_mismatch)
|
||||
- UAs rares/suspects qui tournent (is_ua_rotating)
|
||||
- Distribution JA4 par UA pour détecter les UAs multi-fingerprints (rotation)
|
||||
"""
|
||||
try:
|
||||
# Top UAs globaux depuis view_dashboard_user_agents
|
||||
ua_global_query = """
|
||||
SELECT
|
||||
ua,
|
||||
sum(requests) AS ip_count
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE hour >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ua
|
||||
ORDER BY ip_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
ua_global_res = db.query(ua_global_query, {"hours": hours, "limit": limit})
|
||||
top_uas = [str(r[0]) for r in ua_global_res.result_rows]
|
||||
|
||||
# Pour chaque UA, chercher ses JA4 via view_dashboard_user_agents
|
||||
ua_sql = ", ".join(f"'{u.replace(chr(39), chr(39)*2)}'" for u in top_uas[:50]) if top_uas else "''"
|
||||
ja4_per_ua_query = f"""
|
||||
SELECT
|
||||
ua,
|
||||
uniq(ja4) AS unique_ja4s,
|
||||
groupUniqArray(3)(ja4) AS sample_ja4s
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ua IN ({ua_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
AND ja4 != ''
|
||||
GROUP BY ua
|
||||
"""
|
||||
ja4_by_ua: dict = {}
|
||||
try:
|
||||
ja4_res = db.query(ja4_per_ua_query)
|
||||
for r in ja4_res.result_rows:
|
||||
ja4_by_ua[str(r[0])] = {
|
||||
"unique_ja4s": int(r[1] or 0),
|
||||
"sample_ja4s": list(r[2] or []),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# IPs avec is_ua_rotating depuis ml_detected_anomalies
|
||||
rotating_query = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
avg(ua_ch_mismatch) AS avg_ua_ch_mismatch
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND is_ua_rotating = true
|
||||
GROUP BY clean_ip
|
||||
ORDER BY avg_ua_ch_mismatch DESC
|
||||
LIMIT 100
|
||||
"""
|
||||
rotating_ips: list = []
|
||||
try:
|
||||
rot_res = db.query(rotating_query, {"hours": hours})
|
||||
rotating_ips = [str(r[0]) for r in rot_res.result_rows]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Construire la réponse
|
||||
items = []
|
||||
for row in ua_global_res.result_rows:
|
||||
ua = str(row[0])
|
||||
ip_count = int(row[1] or 0)
|
||||
ua_type = _classify_ua(ua)
|
||||
ja4_info = ja4_by_ua.get(ua, {"unique_ja4s": 0, "sample_ja4s": []})
|
||||
|
||||
# UA multi-JA4 est suspect: un vrai navigateur a généralement 1-2 JA4
|
||||
multi_ja4_flag = ja4_info["unique_ja4s"] > 3
|
||||
|
||||
items.append({
|
||||
"user_agent": ua,
|
||||
"type": ua_type,
|
||||
"ip_count": ip_count,
|
||||
"unique_ja4_count": ja4_info["unique_ja4s"],
|
||||
"sample_ja4s": ja4_info["sample_ja4s"],
|
||||
"is_multi_ja4_suspect": multi_ja4_flag,
|
||||
"risk_flags": _build_ua_risk_flags(ua, ua_type, ja4_info["unique_ja4s"], ip_count),
|
||||
})
|
||||
|
||||
# IPs avec rotation d'UA
|
||||
ua_rotating_stats = {
|
||||
"rotating_ip_count": len(rotating_ips),
|
||||
"sample_rotating_ips": rotating_ips[:10],
|
||||
}
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"ua_rotating_stats": ua_rotating_stats,
|
||||
"summary": {
|
||||
"bot_count": sum(1 for i in items if i["type"] == "bot"),
|
||||
"browser_count": sum(1 for i in items if i["type"] == "browser"),
|
||||
"script_count": sum(1 for i in items if i["type"] == "script"),
|
||||
"multi_ja4_suspect_count": sum(1 for i in items if i["is_multi_ja4_suspect"]),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
def _build_ua_risk_flags(ua: str, ua_type: str, unique_ja4s: int, ip_count: int) -> list:
|
||||
flags = []
|
||||
if ua_type == "bot":
|
||||
flags.append("ua_bot_signature")
|
||||
elif ua_type == "script":
|
||||
flags.append("ua_script_library")
|
||||
if unique_ja4s > 5:
|
||||
flags.append("ja4_rotation_suspect")
|
||||
if unique_ja4s > 3 and ua_type == "browser":
|
||||
flags.append("browser_ua_multi_fingerprint")
|
||||
if ip_count > 100:
|
||||
flags.append("high_volume")
|
||||
return flags
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 4 — JA4 d'un IP spécifique: analyse de cohérence UA/JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ip/{ip}/coherence")
|
||||
async def get_ip_fingerprint_coherence(ip: str):
|
||||
"""
|
||||
Analyse la cohérence JA4/UA pour une IP spécifique.
|
||||
|
||||
Répond à la question: "Cette IP spoofait-elle son fingerprint?"
|
||||
|
||||
Calcule un score de cohérence basé sur:
|
||||
- Correspondance entre JA4 (TLS client fingerprint) et User-Agent
|
||||
- ua_ch_mismatch (User-Agent vs Client Hints)
|
||||
- modern_browser_score vs type d'UA réel
|
||||
- Nombre de JA4 distincts utilisés (rotation)
|
||||
- sni_host_mismatch, alpn_http_mismatch
|
||||
"""
|
||||
try:
|
||||
# Données depuis ml_detected_anomalies
|
||||
ml_query = """
|
||||
SELECT
|
||||
ja4,
|
||||
ua_ch_mismatch,
|
||||
modern_browser_score,
|
||||
sni_host_mismatch,
|
||||
alpn_http_mismatch,
|
||||
is_alpn_missing,
|
||||
is_rare_ja4,
|
||||
is_ua_rotating,
|
||||
distinct_ja4_count,
|
||||
header_count,
|
||||
has_accept_language,
|
||||
has_cookie,
|
||||
has_referer,
|
||||
header_order_shared_count,
|
||||
detected_at,
|
||||
threat_level,
|
||||
window_mss_ratio,
|
||||
tcp_jitter_variance,
|
||||
multiplexing_efficiency
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 20
|
||||
"""
|
||||
ml_res = db.query(ml_query, {"ip": ip})
|
||||
|
||||
if not ml_res.result_rows:
|
||||
raise HTTPException(status_code=404, detail="IP non trouvée dans les détections")
|
||||
|
||||
# User-agents réels depuis view_dashboard_user_agents
|
||||
ua_query = """
|
||||
SELECT ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE toString(src_ip) = %(ip)s
|
||||
AND hour >= now() - INTERVAL 72 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ua ORDER BY cnt DESC LIMIT 10
|
||||
"""
|
||||
ua_res = db.query(ua_query, {"ip": ip})
|
||||
top_uas = [{"ua": str(r[0]), "count": int(r[1] or 0), "type": _classify_ua(str(r[0]))}
|
||||
for r in ua_res.result_rows]
|
||||
|
||||
# Agréger les indicateurs de la dernière session
|
||||
rows = ml_res.result_rows
|
||||
latest = rows[0]
|
||||
total_rows = len(rows)
|
||||
|
||||
ua_ch_mismatch_count = sum(1 for r in rows if r[1])
|
||||
sni_mismatch_count = sum(1 for r in rows if r[3])
|
||||
alpn_mismatch_count = sum(1 for r in rows if r[4])
|
||||
is_rare_count = sum(1 for r in rows if r[6])
|
||||
is_rotating = any(r[7] for r in rows)
|
||||
distinct_ja4s = {str(r[0]) for r in rows if r[0]}
|
||||
avg_browser_score = sum(int(r[2] or 0) for r in rows) / total_rows
|
||||
|
||||
# UA analysis
|
||||
has_browser_ua = any(u["type"] == "browser" for u in top_uas)
|
||||
has_bot_ua = any(u["type"] == "bot" for u in top_uas)
|
||||
primary_ua_type = top_uas[0]["type"] if top_uas else "empty"
|
||||
|
||||
# Calcul du score de spoofing
|
||||
spoof_score = min(100, round(
|
||||
(ua_ch_mismatch_count / total_rows * 100) * 0.40
|
||||
+ (avg_browser_score * 0.20 if has_bot_ua else 0)
|
||||
+ (sni_mismatch_count / total_rows * 100) * 0.10
|
||||
+ (alpn_mismatch_count / total_rows * 100) * 0.05
|
||||
+ (len(distinct_ja4s) * 5 if len(distinct_ja4s) > 2 else 0)
|
||||
+ (15 if is_rotating else 0)
|
||||
+ (10 if is_rare_count > total_rows * 0.5 else 0)
|
||||
))
|
||||
|
||||
# Verdict
|
||||
if spoof_score >= 70:
|
||||
verdict = "high_confidence_spoofing"
|
||||
elif spoof_score >= 40:
|
||||
verdict = "suspicious_spoofing"
|
||||
elif has_bot_ua and avg_browser_score < 20:
|
||||
verdict = "known_bot_no_spoofing"
|
||||
elif has_browser_ua and spoof_score < 20:
|
||||
verdict = "legitimate_browser"
|
||||
else:
|
||||
verdict = "inconclusive"
|
||||
|
||||
# Explication humaine
|
||||
explanation = []
|
||||
if ua_ch_mismatch_count > total_rows * 0.3:
|
||||
explanation.append(f"UA-Client-Hints mismatch sur {round(ua_ch_mismatch_count*100/total_rows)}% des requêtes")
|
||||
if has_bot_ua and avg_browser_score > 40:
|
||||
explanation.append(f"JA4 ressemble à un navigateur (score {round(avg_browser_score)}/100) mais UA est de type bot")
|
||||
if len(distinct_ja4s) > 2:
|
||||
explanation.append(f"{len(distinct_ja4s)} JA4 distincts utilisés → rotation de fingerprint")
|
||||
if is_rotating:
|
||||
explanation.append("is_ua_rotating détecté → rotation d'User-Agent confirmée")
|
||||
if sni_mismatch_count > 0:
|
||||
explanation.append(f"SNI ≠ Host header sur {sni_mismatch_count}/{total_rows} requêtes")
|
||||
if not explanation:
|
||||
explanation.append("Aucun indicateur de spoofing majeur détecté")
|
||||
|
||||
return {
|
||||
"ip": ip,
|
||||
"verdict": verdict,
|
||||
"spoofing_score": spoof_score,
|
||||
"explanation": explanation,
|
||||
"indicators": {
|
||||
"ua_ch_mismatch_rate": round(ua_ch_mismatch_count / total_rows * 100, 1),
|
||||
"sni_mismatch_rate": round(sni_mismatch_count / total_rows * 100, 1),
|
||||
"alpn_mismatch_rate": round(alpn_mismatch_count / total_rows * 100, 1),
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"distinct_ja4_count": len(distinct_ja4s),
|
||||
"is_ua_rotating": is_rotating,
|
||||
"rare_ja4_rate": round(is_rare_count / total_rows * 100, 1),
|
||||
},
|
||||
"fingerprints": {
|
||||
"ja4_list": list(distinct_ja4s),
|
||||
"latest_ja4": str(latest[0] or ""),
|
||||
},
|
||||
"user_agents": top_uas,
|
||||
"latest_detection": {
|
||||
"detected_at": latest[14].isoformat() if latest[14] else "",
|
||||
"threat_level": str(latest[15] or "LOW"),
|
||||
"modern_browser_score": int(latest[2] or 0),
|
||||
"header_count": int(latest[9] or 0),
|
||||
"has_accept_language": bool(latest[10]),
|
||||
"has_cookie": bool(latest[11]),
|
||||
"has_referer": bool(latest[12]),
|
||||
"header_order_shared_count": int(latest[13] or 0),
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 5 — JA4 légitimes (baseline / whitelist)
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/legitimate-ja4")
|
||||
async def get_legitimate_ja4(
|
||||
hours: int = Query(168, ge=24, le=720, description="Fenêtre pour établir la baseline"),
|
||||
min_ips: int = Query(50, ge=5, description="Nombre minimum d'IPs pour qualifier un JA4 de légitime"),
|
||||
):
|
||||
"""
|
||||
Établit une baseline des JA4 fingerprints légitimes.
|
||||
|
||||
Un JA4 est considéré légitime si:
|
||||
- Il est utilisé par un grand nombre d'IPs distinctes (> min_ips)
|
||||
- Son taux de ua_ch_mismatch est faible (< 5%)
|
||||
- Son modern_browser_score est élevé (> 60)
|
||||
- Il n'est PAS is_rare_ja4
|
||||
- Ses UAs sont dominés par des navigateurs connus
|
||||
|
||||
Utile comme whitelist pour réduire les faux positifs.
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
ja4,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
count() AS total_detections,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_count,
|
||||
round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct,
|
||||
round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
AND ua_ch_mismatch_pct < 5.0
|
||||
AND avg_browser_score > 60
|
||||
AND rare_count = 0
|
||||
ORDER BY unique_ips DESC
|
||||
LIMIT 100
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours, "min_ips": min_ips})
|
||||
|
||||
items = [
|
||||
{
|
||||
"ja4": str(row[0]),
|
||||
"unique_ips": int(row[1] or 0),
|
||||
"total_detections": int(row[2] or 0),
|
||||
"ua_ch_mismatch_pct": float(row[3] or 0),
|
||||
"avg_browser_score": round(float(row[4] or 0), 1),
|
||||
"critical_pct": float(row[6] or 0),
|
||||
"high_pct": float(row[7] or 0),
|
||||
"legitimacy_confidence": min(100, round(
|
||||
(1 - float(row[3] or 0) / 100) * 40
|
||||
+ float(row[4] or 0) * 0.40
|
||||
+ min(int(row[1] or 0) / min_ips, 1) * 20
|
||||
)),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"note": "Ces JA4 sont candidats à une whitelist. Vérifier manuellement avant de whitelister.",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
Reference in New Issue
Block a user