feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized
Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
1
services/dashboard/backend/routes/__init__.py
Normal file
1
services/dashboard/backend/routes/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Routes package
|
||||
686
services/dashboard/backend/routes/analysis.py
Normal file
686
services/dashboard/backend/routes/analysis.py
Normal file
@ -0,0 +1,686 @@
|
||||
"""
|
||||
Endpoints pour l'analyse de corrélations et la classification SOC
|
||||
"""
|
||||
from collections import defaultdict
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional, List
|
||||
import ipaddress
|
||||
import json
|
||||
|
||||
from ..database import db
|
||||
from ..models import (
|
||||
SubnetAnalysis, CountryAnalysis, CountryData, JA4Analysis, JA4SubnetData,
|
||||
UserAgentAnalysis, UserAgentData, CorrelationIndicators,
|
||||
ClassificationRecommendation, ClassificationLabel,
|
||||
ClassificationCreate, Classification, ClassificationsListResponse
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/analysis", tags=["analysis"])
|
||||
|
||||
# Mapping code ISO → nom lisible (utilisé par analyze_ip_country et analyze_country)
|
||||
_COUNTRY_NAMES: dict[str, str] = {
|
||||
"CN": "China", "US": "United States", "DE": "Germany",
|
||||
"FR": "France", "RU": "Russia", "GB": "United Kingdom",
|
||||
"NL": "Netherlands", "IN": "India", "BR": "Brazil",
|
||||
"JP": "Japan", "KR": "South Korea", "IT": "Italy",
|
||||
"ES": "Spain", "CA": "Canada", "AU": "Australia"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE SUBNET / ASN
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/subnet", response_model=SubnetAnalysis)
|
||||
async def analyze_subnet(ip: str):
|
||||
"""
|
||||
Analyse les IPs du même subnet et ASN
|
||||
"""
|
||||
try:
|
||||
# Calculer le subnet /24
|
||||
ip_obj = ipaddress.ip_address(ip)
|
||||
subnet = ipaddress.ip_network(f"{ip}/24", strict=False)
|
||||
subnet_str = str(subnet)
|
||||
|
||||
# Récupérer les infos ASN pour cette IP
|
||||
asn_query = """
|
||||
SELECT asn_number, asn_org
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
asn_result = db.query(asn_query, {"ip": ip})
|
||||
|
||||
if not asn_result.result_rows:
|
||||
# Fallback: utiliser données par défaut
|
||||
asn_number = "0"
|
||||
asn_org = "Unknown"
|
||||
else:
|
||||
asn_number = str(asn_result.result_rows[0][0] or "0")
|
||||
asn_org = asn_result.result_rows[0][1] or "Unknown"
|
||||
|
||||
# IPs du même subnet /24
|
||||
subnet_ips_query = """
|
||||
SELECT DISTINCT src_ip
|
||||
FROM ml_detected_anomalies
|
||||
WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s)
|
||||
AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s)
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
ORDER BY src_ip
|
||||
"""
|
||||
|
||||
subnet_result = db.query(subnet_ips_query, {
|
||||
"subnet_start": str(subnet.network_address),
|
||||
"subnet_end": str(subnet.broadcast_address)
|
||||
})
|
||||
|
||||
subnet_ips = [str(row[0]) for row in subnet_result.result_rows]
|
||||
|
||||
# Total IPs du même ASN
|
||||
if asn_number != "0":
|
||||
asn_total_query = """
|
||||
SELECT uniq(src_ip)
|
||||
FROM ml_detected_anomalies
|
||||
WHERE asn_number = %(asn_number)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
asn_total_result = db.query(asn_total_query, {"asn_number": asn_number})
|
||||
asn_total = asn_total_result.result_rows[0][0] if asn_total_result.result_rows else 0
|
||||
else:
|
||||
asn_total = 0
|
||||
|
||||
return SubnetAnalysis(
|
||||
ip=ip,
|
||||
subnet=subnet_str,
|
||||
ips_in_subnet=subnet_ips,
|
||||
total_in_subnet=len(subnet_ips),
|
||||
asn_number=asn_number,
|
||||
asn_org=asn_org,
|
||||
total_in_asn=asn_total,
|
||||
alert=len(subnet_ips) > 10
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{ip}/country", response_model=dict)
|
||||
async def analyze_ip_country(ip: str):
|
||||
"""
|
||||
Analyse le pays d'une IP spécifique et la répartition des autres pays du même ASN
|
||||
"""
|
||||
try:
|
||||
# Pays de l'IP
|
||||
ip_country_query = """
|
||||
SELECT country_code, asn_number
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
ip_result = db.query(ip_country_query, {"ip": ip})
|
||||
|
||||
if not ip_result.result_rows:
|
||||
return {"ip_country": None, "asn_countries": []}
|
||||
|
||||
ip_country_code = ip_result.result_rows[0][0]
|
||||
asn_number = ip_result.result_rows[0][1]
|
||||
|
||||
# Répartition des autres pays du même ASN
|
||||
asn_countries_query = """
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE asn_number = %(asn_number)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
asn_result = db.query(asn_countries_query, {"asn_number": asn_number})
|
||||
|
||||
total = sum(row[1] for row in asn_result.result_rows)
|
||||
|
||||
asn_countries = [
|
||||
{
|
||||
"code": row[0],
|
||||
"name": _COUNTRY_NAMES.get(row[0], row[0]),
|
||||
"count": row[1],
|
||||
"percentage": round((row[1] / total * 100), 2) if total > 0 else 0.0
|
||||
}
|
||||
for row in asn_result.result_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"ip_country": {
|
||||
"code": ip_country_code,
|
||||
"name": _COUNTRY_NAMES.get(ip_country_code, ip_country_code)
|
||||
},
|
||||
"asn_countries": asn_countries
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE PAYS
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/country", response_model=CountryAnalysis)
|
||||
async def analyze_country(days: int = Query(1, ge=1, le=30)):
|
||||
"""
|
||||
Analyse la distribution des pays
|
||||
"""
|
||||
try:
|
||||
# Top pays
|
||||
top_query = """
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(days)s DAY
|
||||
AND country_code != '' AND country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
top_result = db.query(top_query, {"days": days})
|
||||
|
||||
# Calculer le total pour le pourcentage
|
||||
total = sum(row[1] for row in top_result.result_rows)
|
||||
|
||||
top_countries = [
|
||||
CountryData(
|
||||
code=row[0],
|
||||
name=_COUNTRY_NAMES.get(row[0], row[0]),
|
||||
count=row[1],
|
||||
percentage=round((row[1] / total * 100), 2) if total > 0 else 0.0
|
||||
)
|
||||
for row in top_result.result_rows
|
||||
]
|
||||
|
||||
# Baseline (7 derniers jours)
|
||||
baseline_query = """
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 7 DAY
|
||||
AND country_code != '' AND country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
baseline_result = db.query(baseline_query)
|
||||
|
||||
baseline_total = sum(row[1] for row in baseline_result.result_rows)
|
||||
baseline = {
|
||||
row[0]: round((row[1] / baseline_total * 100), 2) if baseline_total > 0 else 0.0
|
||||
for row in baseline_result.result_rows
|
||||
}
|
||||
|
||||
# Détecter pays surreprésenté
|
||||
alert_country = None
|
||||
for country in top_countries:
|
||||
baseline_pct = baseline.get(country.code, 0)
|
||||
if baseline_pct > 0 and country.percentage > baseline_pct * 2 and country.percentage > 30:
|
||||
alert_country = country.code
|
||||
break
|
||||
|
||||
return CountryAnalysis(
|
||||
top_countries=top_countries,
|
||||
baseline=baseline,
|
||||
alert_country=alert_country
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/ja4", response_model=JA4Analysis)
|
||||
async def analyze_ja4(ip: str):
|
||||
"""
|
||||
Analyse le JA4 fingerprint
|
||||
"""
|
||||
try:
|
||||
# JA4 de cette IP
|
||||
ja4_query = """
|
||||
SELECT ja4
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
ja4_result = db.query(ja4_query, {"ip": ip})
|
||||
|
||||
if not ja4_result.result_rows:
|
||||
return JA4Analysis(
|
||||
ja4="",
|
||||
shared_ips_count=0,
|
||||
top_subnets=[],
|
||||
other_ja4_for_ip=[]
|
||||
)
|
||||
|
||||
ja4 = ja4_result.result_rows[0][0]
|
||||
|
||||
# IPs avec le même JA4
|
||||
shared_query = """
|
||||
SELECT uniq(src_ip)
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ja4 = %(ja4)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
shared_result = db.query(shared_query, {"ja4": ja4})
|
||||
shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0
|
||||
|
||||
# Top subnets pour ce JA4 - Simplifié
|
||||
subnets_query = """
|
||||
SELECT
|
||||
src_ip,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ja4 = %(ja4)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
subnets_result = db.query(subnets_query, {"ja4": ja4})
|
||||
|
||||
# Grouper par subnet /24
|
||||
subnet_counts = defaultdict(int)
|
||||
for row in subnets_result.result_rows:
|
||||
ip_addr = str(row[0])
|
||||
parts = ip_addr.split('.')
|
||||
if len(parts) == 4:
|
||||
subnet = f"{parts[0]}.{parts[1]}.{parts[2]}.0/24"
|
||||
subnet_counts[subnet] += row[1]
|
||||
|
||||
top_subnets = [
|
||||
JA4SubnetData(subnet=subnet, count=count)
|
||||
for subnet, count in sorted(subnet_counts.items(), key=lambda x: x[1], reverse=True)[:10]
|
||||
]
|
||||
|
||||
# Autres JA4 pour cette IP
|
||||
other_ja4_query = """
|
||||
SELECT DISTINCT ja4
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
AND ja4 != %(current_ja4)s
|
||||
"""
|
||||
|
||||
other_result = db.query(other_ja4_query, {"ip": ip, "current_ja4": ja4})
|
||||
other_ja4 = [row[0] for row in other_result.result_rows]
|
||||
|
||||
return JA4Analysis(
|
||||
ja4=ja4,
|
||||
shared_ips_count=shared_count,
|
||||
top_subnets=top_subnets,
|
||||
other_ja4_for_ip=other_ja4
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE USER-AGENTS
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/user-agents", response_model=UserAgentAnalysis)
|
||||
async def analyze_user_agents(ip: str):
|
||||
"""
|
||||
Analyse les User-Agents
|
||||
"""
|
||||
try:
|
||||
# User-Agents pour cette IP (depuis http_logs)
|
||||
ip_ua_query = """
|
||||
SELECT
|
||||
header_user_agent AS ua,
|
||||
count() AS count
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE src_ip = %(ip)s
|
||||
AND header_user_agent != '' AND header_user_agent IS NOT NULL
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY ua
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
# Total réel des requêtes (pour les pourcentages corrects)
|
||||
ip_total_query = """
|
||||
SELECT count()
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE src_ip = %(ip)s
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
ip_ua_result = db.query(ip_ua_query, {"ip": ip})
|
||||
ip_total_result = db.query(ip_total_query, {"ip": ip})
|
||||
|
||||
# Classification des UAs
|
||||
def classify_ua(ua: str) -> str:
|
||||
ua_lower = ua.lower()
|
||||
if any(bot in ua_lower for bot in ['bot', 'crawler', 'spider', 'curl', 'wget', 'python', 'requests', 'scrapy']):
|
||||
return 'bot'
|
||||
if any(script in ua_lower for script in ['python', 'java', 'php', 'ruby', 'perl', 'node']):
|
||||
return 'script'
|
||||
if not ua or ua.strip() == '':
|
||||
return 'script'
|
||||
return 'normal'
|
||||
|
||||
# Total réel de toutes les requêtes (pour des pourcentages corrects même avec LIMIT)
|
||||
total_count = ip_total_result.result_rows[0][0] if ip_total_result.result_rows else 0
|
||||
if total_count == 0:
|
||||
total_count = sum(row[1] for row in ip_ua_result.result_rows)
|
||||
|
||||
ip_user_agents = [
|
||||
UserAgentData(
|
||||
value=row[0],
|
||||
count=row[1],
|
||||
percentage=round((row[1] / total_count * 100), 2) if total_count > 0 else 0.0,
|
||||
classification=classify_ua(row[0])
|
||||
)
|
||||
for row in ip_ua_result.result_rows
|
||||
]
|
||||
|
||||
# Pour les UAs du JA4, on retourne les mêmes pour l'instant
|
||||
ja4_user_agents = ip_user_agents
|
||||
|
||||
# Pourcentage de bots
|
||||
bot_count = sum(ua.count for ua in ip_user_agents if ua.classification in ['bot', 'script'])
|
||||
bot_percentage = (bot_count / total_count * 100) if total_count > 0 else 0
|
||||
|
||||
return UserAgentAnalysis(
|
||||
ip_user_agents=ip_user_agents,
|
||||
ja4_user_agents=ja4_user_agents,
|
||||
bot_percentage=bot_percentage,
|
||||
alert=bot_percentage > 20
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# RECOMMANDATION DE CLASSIFICATION
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/recommendation", response_model=ClassificationRecommendation)
|
||||
async def get_classification_recommendation(ip: str):
|
||||
"""
|
||||
Génère une recommandation de classification basée sur les corrélations
|
||||
"""
|
||||
try:
|
||||
# Récupérer les analyses
|
||||
try:
|
||||
subnet_analysis = await analyze_subnet(ip)
|
||||
except Exception:
|
||||
subnet_analysis = None
|
||||
|
||||
try:
|
||||
country_analysis = await analyze_country(1)
|
||||
except Exception:
|
||||
country_analysis = None
|
||||
|
||||
try:
|
||||
ja4_analysis = await analyze_ja4(ip)
|
||||
except Exception:
|
||||
ja4_analysis = None
|
||||
|
||||
try:
|
||||
ua_analysis = await analyze_user_agents(ip)
|
||||
except Exception:
|
||||
ua_analysis = None
|
||||
|
||||
# Indicateurs par défaut
|
||||
indicators = CorrelationIndicators(
|
||||
subnet_ips_count=subnet_analysis.total_in_subnet if subnet_analysis else 0,
|
||||
asn_ips_count=subnet_analysis.total_in_asn if subnet_analysis else 0,
|
||||
country_percentage=0.0,
|
||||
ja4_shared_ips=ja4_analysis.shared_ips_count if ja4_analysis else 0,
|
||||
user_agents_count=len(ua_analysis.ja4_user_agents) if ua_analysis else 0,
|
||||
bot_ua_percentage=ua_analysis.bot_percentage if ua_analysis else 0.0
|
||||
)
|
||||
|
||||
# Score de confiance
|
||||
score = 0.0
|
||||
reasons = []
|
||||
tags = []
|
||||
|
||||
# Subnet > 10 IPs
|
||||
if subnet_analysis and subnet_analysis.total_in_subnet > 10:
|
||||
score += 0.25
|
||||
reasons.append(f"{subnet_analysis.total_in_subnet} IPs du même subnet")
|
||||
tags.append("distributed")
|
||||
|
||||
# JA4 partagé > 50 IPs
|
||||
if ja4_analysis and ja4_analysis.shared_ips_count > 50:
|
||||
score += 0.25
|
||||
reasons.append(f"{ja4_analysis.shared_ips_count} IPs avec même JA4")
|
||||
tags.append("ja4-rotation")
|
||||
|
||||
# Bot UA > 20%
|
||||
if ua_analysis and ua_analysis.bot_percentage > 20:
|
||||
score += 0.25
|
||||
reasons.append(f"{ua_analysis.bot_percentage:.0f}% UAs bots/scripts")
|
||||
tags.append("bot-ua")
|
||||
|
||||
# Pays surreprésenté
|
||||
if country_analysis and country_analysis.alert_country:
|
||||
score += 0.15
|
||||
reasons.append(f"Pays {country_analysis.alert_country} surreprésenté")
|
||||
tags.append(f"country-{country_analysis.alert_country.lower()}")
|
||||
|
||||
# ASN hosting
|
||||
if subnet_analysis:
|
||||
hosting_keywords = ["ovh", "amazon", "aws", "google", "azure", "digitalocean", "linode", "vultr", "china169", "chinamobile"]
|
||||
if any(kw in (subnet_analysis.asn_org or "").lower() for kw in hosting_keywords):
|
||||
score += 0.10
|
||||
tags.append("hosting-asn")
|
||||
|
||||
# Déterminer label
|
||||
if score >= 0.7:
|
||||
label = ClassificationLabel.MALICIOUS
|
||||
tags.append("campaign")
|
||||
elif score >= 0.4:
|
||||
label = ClassificationLabel.SUSPICIOUS
|
||||
else:
|
||||
label = ClassificationLabel.LEGITIMATE
|
||||
|
||||
reason = " | ".join(reasons) if reasons else "Aucun indicateur fort"
|
||||
|
||||
return ClassificationRecommendation(
|
||||
label=label,
|
||||
confidence=min(score, 1.0),
|
||||
indicators=indicators,
|
||||
suggested_tags=tags,
|
||||
reason=reason
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLASSIFICATIONS CRUD
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/classifications", response_model=Classification)
|
||||
async def create_classification(data: ClassificationCreate):
|
||||
"""
|
||||
Crée une classification pour une IP ou un JA4
|
||||
"""
|
||||
try:
|
||||
# Validation: soit ip, soit ja4 doit être fourni
|
||||
if not data.ip and not data.ja4:
|
||||
raise HTTPException(status_code=400, detail="IP ou JA4 requis")
|
||||
|
||||
query = """
|
||||
INSERT INTO mabase_prod.classifications
|
||||
(ip, ja4, label, tags, comment, confidence, features, analyst, created_at)
|
||||
VALUES
|
||||
(%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now())
|
||||
"""
|
||||
|
||||
db.query(query, {
|
||||
"ip": data.ip or "",
|
||||
"ja4": data.ja4 or "",
|
||||
"label": data.label.value,
|
||||
"tags": data.tags,
|
||||
"comment": data.comment,
|
||||
"confidence": data.confidence,
|
||||
"features": json.dumps(data.features),
|
||||
"analyst": data.analyst
|
||||
})
|
||||
|
||||
# Récupérer la classification créée
|
||||
where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s"
|
||||
select_query = f"""
|
||||
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
|
||||
FROM mabase_prod.classifications
|
||||
WHERE {where_clause}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = db.query(select_query, {"entity": data.ip or data.ja4})
|
||||
|
||||
if not result.result_rows:
|
||||
raise HTTPException(status_code=404, detail="Classification non trouvée")
|
||||
|
||||
row = result.result_rows[0]
|
||||
return Classification(
|
||||
ip=row[0] or None,
|
||||
ja4=row[1] or None,
|
||||
label=ClassificationLabel(row[2]),
|
||||
tags=row[3],
|
||||
comment=row[4],
|
||||
confidence=row[5],
|
||||
features=json.loads(row[6]) if row[6] else {},
|
||||
analyst=row[7],
|
||||
created_at=row[8]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/classifications", response_model=ClassificationsListResponse)
|
||||
async def list_classifications(
|
||||
ip: Optional[str] = Query(None, description="Filtrer par IP"),
|
||||
ja4: Optional[str] = Query(None, description="Filtrer par JA4"),
|
||||
label: Optional[str] = Query(None, description="Filtrer par label"),
|
||||
limit: int = Query(100, ge=1, le=1000)
|
||||
):
|
||||
"""
|
||||
Liste les classifications
|
||||
"""
|
||||
try:
|
||||
where_clauses = ["1=1"]
|
||||
params = {"limit": limit}
|
||||
|
||||
if ip:
|
||||
where_clauses.append("ip = %(ip)s")
|
||||
params["ip"] = ip
|
||||
|
||||
if ja4:
|
||||
where_clauses.append("ja4 = %(ja4)s")
|
||||
params["ja4"] = ja4
|
||||
|
||||
if label:
|
||||
where_clauses.append("label = %(label)s")
|
||||
params["label"] = label
|
||||
|
||||
where_clause = " AND ".join(where_clauses)
|
||||
|
||||
query = f"""
|
||||
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
|
||||
FROM mabase_prod.classifications
|
||||
WHERE {where_clause}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, params)
|
||||
|
||||
classifications = [
|
||||
Classification(
|
||||
ip=row[0] or None,
|
||||
ja4=row[1] or None,
|
||||
label=ClassificationLabel(row[2]),
|
||||
tags=row[3],
|
||||
comment=row[4],
|
||||
confidence=row[5],
|
||||
features=json.loads(row[6]) if row[6] else {},
|
||||
analyst=row[7],
|
||||
created_at=row[8]
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
# Total
|
||||
count_query = f"""
|
||||
SELECT count()
|
||||
FROM mabase_prod.classifications
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, params)
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return ClassificationsListResponse(
|
||||
items=classifications,
|
||||
total=total
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/classifications/stats")
|
||||
async def get_classification_stats():
|
||||
"""
|
||||
Statistiques des classifications
|
||||
"""
|
||||
try:
|
||||
stats_query = """
|
||||
SELECT
|
||||
label,
|
||||
count() AS total,
|
||||
uniq(ip) AS unique_ips,
|
||||
avg(confidence) AS avg_confidence
|
||||
FROM mabase_prod.classifications
|
||||
GROUP BY label
|
||||
ORDER BY total DESC
|
||||
"""
|
||||
|
||||
result = db.query(stats_query)
|
||||
|
||||
stats = [
|
||||
{
|
||||
"label": row[0],
|
||||
"total": row[1],
|
||||
"unique_ips": row[2],
|
||||
"avg_confidence": float(row[3]) if row[3] else 0.0
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return {"stats": stats}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
92
services/dashboard/backend/routes/attributes.py
Normal file
92
services/dashboard/backend/routes/attributes.py
Normal file
@ -0,0 +1,92 @@
|
||||
"""
|
||||
Endpoints pour la liste des attributs uniques
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from ..database import db
|
||||
from ..models import AttributeListResponse, AttributeListItem
|
||||
|
||||
router = APIRouter(prefix="/api/attributes", tags=["attributes"])
|
||||
|
||||
|
||||
@router.get("/{attr_type}", response_model=AttributeListResponse)
|
||||
async def get_attributes(
|
||||
attr_type: str,
|
||||
limit: int = Query(100, ge=1, le=1000, description="Nombre maximum de résultats")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des valeurs uniques pour un type d'attribut
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
"threat_level": "threat_level",
|
||||
"model_name": "model_name",
|
||||
"asn_org": "asn_org"
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
# Requête de base
|
||||
base_query = f"""
|
||||
SELECT
|
||||
{column} AS value,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
# Ajout du filtre pour exclure les valeurs vides/nulles
|
||||
# Gestion spéciale pour les types IPv6/IPv4 qui ne peuvent pas être comparés à ''
|
||||
if attr_type == "ip":
|
||||
# Pour les adresses IP, on convertit en string et on filtre
|
||||
query = f"""
|
||||
SELECT value, count FROM (
|
||||
SELECT toString({column}) AS value, count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY {column}
|
||||
)
|
||||
WHERE value != '' AND value IS NOT NULL
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
else:
|
||||
query = f"""
|
||||
{base_query}
|
||||
AND {column} != '' AND {column} IS NOT NULL
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {"limit": limit})
|
||||
|
||||
items = [
|
||||
AttributeListItem(
|
||||
value=str(row[0]),
|
||||
count=row[1]
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return AttributeListResponse(
|
||||
type=attr_type,
|
||||
items=items,
|
||||
total=len(items)
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
238
services/dashboard/backend/routes/audit.py
Normal file
238
services/dashboard/backend/routes/audit.py
Normal file
@ -0,0 +1,238 @@
|
||||
"""
|
||||
Routes pour l'audit et les logs d'activité
|
||||
"""
|
||||
import logging
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/audit", tags=["audit"])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.post("/logs")
|
||||
async def create_audit_log(
|
||||
request: Request,
|
||||
action: str,
|
||||
entity_type: Optional[str] = None,
|
||||
entity_id: Optional[str] = None,
|
||||
entity_count: Optional[int] = None,
|
||||
details: Optional[dict] = None,
|
||||
user: Optional[str] = "soc_user"
|
||||
):
|
||||
"""
|
||||
Crée un log d'audit pour une action utilisateur
|
||||
"""
|
||||
try:
|
||||
# Récupérer l'IP du client
|
||||
client_ip = request.client.host if request.client else "unknown"
|
||||
|
||||
# Insérer dans ClickHouse
|
||||
insert_query = """
|
||||
INSERT INTO mabase_prod.audit_logs
|
||||
(timestamp, user_name, action, entity_type, entity_id, entity_count, details, client_ip)
|
||||
VALUES
|
||||
(%(timestamp)s, %(user)s, %(action)s, %(entity_type)s, %(entity_id)s, %(entity_count)s, %(details)s, %(client_ip)s)
|
||||
"""
|
||||
|
||||
params = {
|
||||
'timestamp': datetime.now(),
|
||||
'user': user,
|
||||
'action': action,
|
||||
'entity_type': entity_type,
|
||||
'entity_id': entity_id,
|
||||
'entity_count': entity_count,
|
||||
'details': str(details) if details else '',
|
||||
'client_ip': client_ip
|
||||
}
|
||||
|
||||
# Note: This requires the audit_logs table to exist
|
||||
# See deploy_audit_logs_table.sql
|
||||
try:
|
||||
db.query(insert_query, params)
|
||||
except Exception as e:
|
||||
# La table peut ne pas encore exister — on logue mais on ne bloque pas l'appelant
|
||||
logger.warning(f"Could not insert audit log: {e}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Audit log created",
|
||||
"action": action,
|
||||
"timestamp": params['timestamp'].isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/logs")
|
||||
async def get_audit_logs(
|
||||
hours: int = Query(24, ge=1, le=720, description="Fenêtre temporelle en heures"),
|
||||
user: Optional[str] = Query(None, description="Filtrer par utilisateur"),
|
||||
action: Optional[str] = Query(None, description="Filtrer par action"),
|
||||
entity_type: Optional[str] = Query(None, description="Filtrer par type d'entité"),
|
||||
limit: int = Query(100, ge=1, le=1000, description="Nombre maximum de résultats")
|
||||
):
|
||||
"""
|
||||
Récupère les logs d'audit avec filtres
|
||||
"""
|
||||
try:
|
||||
where_clauses = ["timestamp >= now() - INTERVAL %(hours)s HOUR"]
|
||||
params = {"hours": hours, "limit": limit}
|
||||
|
||||
if user:
|
||||
where_clauses.append("user_name = %(user)s")
|
||||
params["user"] = user
|
||||
|
||||
if action:
|
||||
where_clauses.append("action = %(action)s")
|
||||
params["action"] = action
|
||||
|
||||
if entity_type:
|
||||
where_clauses.append("entity_type = %(entity_type)s")
|
||||
params["entity_type"] = entity_type
|
||||
|
||||
where_clause = " AND ".join(where_clauses)
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
timestamp,
|
||||
user_name,
|
||||
action,
|
||||
entity_type,
|
||||
entity_id,
|
||||
entity_count,
|
||||
details,
|
||||
client_ip
|
||||
FROM mabase_prod.audit_logs
|
||||
WHERE {where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, params)
|
||||
|
||||
logs = []
|
||||
for row in result.result_rows:
|
||||
logs.append({
|
||||
"timestamp": row[0].isoformat() if row[0] else "",
|
||||
"user_name": row[1] or "",
|
||||
"action": row[2] or "",
|
||||
"entity_type": row[3] or "",
|
||||
"entity_id": row[4] or "",
|
||||
"entity_count": row[5] or 0,
|
||||
"details": row[6] or "",
|
||||
"client_ip": row[7] or ""
|
||||
})
|
||||
|
||||
return {
|
||||
"items": logs,
|
||||
"total": len(logs),
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# If table doesn't exist, return empty result
|
||||
if "Table" in str(e) and "doesn't exist" in str(e):
|
||||
return {
|
||||
"items": [],
|
||||
"total": 0,
|
||||
"period_hours": hours,
|
||||
"warning": "Audit logs table not created yet"
|
||||
}
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_audit_stats(
|
||||
hours: int = Query(24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Statistiques d'audit
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
action,
|
||||
count() AS count,
|
||||
uniq(user_name) AS unique_users,
|
||||
sum(entity_count) AS total_entities
|
||||
FROM mabase_prod.audit_logs
|
||||
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY action
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours})
|
||||
|
||||
stats = []
|
||||
for row in result.result_rows:
|
||||
stats.append({
|
||||
"action": row[0] or "",
|
||||
"count": row[1] or 0,
|
||||
"unique_users": row[2] or 0,
|
||||
"total_entities": row[3] or 0
|
||||
})
|
||||
|
||||
return {
|
||||
"items": stats,
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if "Table" in str(e) and "doesn't exist" in str(e):
|
||||
return {
|
||||
"items": [],
|
||||
"period_hours": hours,
|
||||
"warning": "Audit logs table not created yet"
|
||||
}
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/users/activity")
|
||||
async def get_user_activity(
|
||||
hours: int = Query(24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Activité par utilisateur
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
user_name,
|
||||
count() AS actions,
|
||||
uniq(action) AS action_types,
|
||||
min(timestamp) AS first_action,
|
||||
max(timestamp) AS last_action
|
||||
FROM mabase_prod.audit_logs
|
||||
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY user_name
|
||||
ORDER BY actions DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours})
|
||||
|
||||
users = []
|
||||
for row in result.result_rows:
|
||||
users.append({
|
||||
"user_name": row[0] or "",
|
||||
"actions": row[1] or 0,
|
||||
"action_types": row[2] or 0,
|
||||
"first_action": row[3].isoformat() if row[3] else "",
|
||||
"last_action": row[4].isoformat() if row[4] else ""
|
||||
})
|
||||
|
||||
return {
|
||||
"items": users,
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if "Table" in str(e) and "doesn't exist" in str(e):
|
||||
return {
|
||||
"items": [],
|
||||
"period_hours": hours,
|
||||
"warning": "Audit logs table not created yet"
|
||||
}
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
105
services/dashboard/backend/routes/botnets.py
Normal file
105
services/dashboard/backend/routes/botnets.py
Normal file
@ -0,0 +1,105 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des botnets via la propagation des fingerprints JA4
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/botnets", tags=["botnets"])
|
||||
|
||||
|
||||
def _botnet_class(unique_countries: int) -> str:
|
||||
if unique_countries > 100:
|
||||
return "global_botnet"
|
||||
if unique_countries > 20:
|
||||
return "regional_botnet"
|
||||
return "concentrated"
|
||||
|
||||
|
||||
@router.get("/ja4-spread")
|
||||
async def get_ja4_spread():
|
||||
"""Propagation des JA4 fingerprints à travers les pays et les IPs."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
ja4,
|
||||
unique_ips,
|
||||
unique_countries,
|
||||
targeted_hosts
|
||||
FROM mabase_prod.view_host_ja4_anomalies
|
||||
ORDER BY unique_countries DESC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
unique_countries = int(row[2])
|
||||
targeted_hosts = int(row[3])
|
||||
dist_score = round(
|
||||
unique_countries / max(unique_ips ** 0.5, 0.001), 2
|
||||
)
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"unique_countries": unique_countries,
|
||||
"targeted_hosts": targeted_hosts,
|
||||
"distribution_score":dist_score,
|
||||
"botnet_class": _botnet_class(unique_countries),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ja4/{ja4}/countries")
|
||||
async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)):
|
||||
"""Top pays pour un JA4 donné depuis agg_host_ip_ja4_1h."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
src_country_code AS country_code,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE ja4 = %(ja4)s
|
||||
GROUP BY src_country_code
|
||||
ORDER BY unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"ja4": ja4, "limit": limit})
|
||||
items = [
|
||||
{
|
||||
"country_code": str(row[0]),
|
||||
"unique_ips": int(row[1]),
|
||||
"hits": int(row[2]),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/summary")
|
||||
async def get_botnets_summary():
|
||||
"""Statistiques globales sur les botnets détectés."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
countIf(unique_countries > 100) AS total_global_botnets,
|
||||
sumIf(unique_ips, unique_countries > 50) AS total_ips_in_botnets,
|
||||
argMax(ja4, unique_countries) AS most_spread_ja4,
|
||||
argMax(ja4, unique_ips) AS most_ips_ja4
|
||||
FROM mabase_prod.view_host_ja4_anomalies
|
||||
"""
|
||||
result = db.query(sql)
|
||||
row = result.result_rows[0]
|
||||
return {
|
||||
"total_global_botnets": int(row[0]),
|
||||
"total_ips_in_botnets": int(row[1]),
|
||||
"most_spread_ja4": str(row[2]),
|
||||
"most_ips_ja4": str(row[3]),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
141
services/dashboard/backend/routes/bruteforce.py
Normal file
141
services/dashboard/backend/routes/bruteforce.py
Normal file
@ -0,0 +1,141 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des attaques par force brute sur les formulaires
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"])
|
||||
|
||||
|
||||
@router.get("/targets")
|
||||
async def get_bruteforce_targets():
|
||||
"""Liste des hôtes ciblés par brute-force, triés par total_hits DESC."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
host,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
groupArray(3)(ja4) AS top_ja4s
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
GROUP BY host
|
||||
ORDER BY total_hits DESC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
host = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
total_hits = int(row[2])
|
||||
total_params= int(row[3])
|
||||
top_ja4s = [str(j) for j in (row[4] or [])]
|
||||
attack_type = (
|
||||
"credential_stuffing"
|
||||
if total_hits > 0 and total_params / total_hits > 0.5
|
||||
else "enumeration"
|
||||
)
|
||||
items.append({
|
||||
"host": host,
|
||||
"unique_ips": unique_ips,
|
||||
"total_hits": total_hits,
|
||||
"total_params":total_params,
|
||||
"attack_type": attack_type,
|
||||
"top_ja4s": top_ja4s,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/attackers")
|
||||
async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)):
|
||||
"""Top IPs attaquantes triées par total_hits DESC."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
uniq(host) AS distinct_hosts,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
argMax(ja4, hits) AS ja4
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
GROUP BY src_ip
|
||||
ORDER BY total_hits DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"distinct_hosts":int(row[1]),
|
||||
"total_hits": int(row[2]),
|
||||
"total_params": int(row[3]),
|
||||
"ja4": str(row[4]),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/timeline")
|
||||
async def get_bruteforce_timeline():
|
||||
"""Hits par heure (dernières 72h) depuis agg_host_ip_ja4_1h."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS ips
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
hours = []
|
||||
for row in result.result_rows:
|
||||
hours.append({
|
||||
"hour": int(row[0]),
|
||||
"hits": int(row[1]),
|
||||
"ips": int(row[2]),
|
||||
})
|
||||
return {"hours": hours}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/host/{host:path}/attackers")
|
||||
async def get_host_attackers(host: str, limit: int = Query(20, ge=1, le=200)):
|
||||
"""Top IPs attaquant un hôte spécifique, avec JA4 et type d'attaque."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
argMax(ja4, hits) AS ja4,
|
||||
max(hits) AS max_hits_per_window
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
WHERE host = %(host)s
|
||||
GROUP BY src_ip
|
||||
ORDER BY total_hits DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"host": host, "limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
total_hits = int(row[1])
|
||||
total_params = int(row[2])
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"total_hits": total_hits,
|
||||
"total_params":total_params,
|
||||
"ja4": str(row[3] or ""),
|
||||
"attack_type": "credential_stuffing" if total_hits > 0 and total_params / total_hits > 0.5 else "enumeration",
|
||||
})
|
||||
return {"host": host, "items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
551
services/dashboard/backend/routes/clustering.py
Normal file
551
services/dashboard/backend/routes/clustering.py
Normal file
@ -0,0 +1,551 @@
|
||||
"""
|
||||
Clustering d'IPs multi-métriques — WebGL / deck.gl backend.
|
||||
|
||||
- Calcul sur la TOTALITÉ des IPs (GROUP BY src_ip, ja4 sans LIMIT)
|
||||
- K-means++ vectorisé (numpy) + PCA-2D + enveloppes convexes (scipy)
|
||||
- Calcul en background thread + cache 30 min
|
||||
- Endpoints : /clusters, /status, /cluster/{id}/points
|
||||
"""
|
||||
import math
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
from ..services.clustering_engine import (
|
||||
FEATURE_NAMES,
|
||||
build_feature_vector, kmeans_pp, pca_2d, compute_hulls,
|
||||
name_cluster, risk_score_from_centroid, standardize,
|
||||
risk_to_gradient_color,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/clustering", tags=["clustering"])
|
||||
|
||||
# ─── Cache global ──────────────────────────────────────────────────────────────
|
||||
_CACHE: dict[str, Any] = {
|
||||
"status": "idle", # idle | computing | ready | error
|
||||
"error": None,
|
||||
"result": None, # dict résultat complet
|
||||
"ts": 0.0, # timestamp dernière mise à jour
|
||||
"params": {},
|
||||
"cluster_ips": {}, # cluster_idx → [(ip, ja4, pca_x, pca_y, risk)]
|
||||
}
|
||||
_CACHE_TTL = 1800 # 30 minutes
|
||||
_LOCK = threading.Lock()
|
||||
_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
|
||||
|
||||
# ─── Palette de couleurs (remplace l'ancienne logique menace) ─────────────────
|
||||
# Les couleurs sont désormais attribuées par index de cluster pour maximiser
|
||||
# la distinction visuelle, indépendamment du niveau de risque.
|
||||
|
||||
|
||||
# ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
|
||||
_SQL_ALL_IPS = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip,
|
||||
t.ja4,
|
||||
any(t.tcp_ttl_raw) AS ttl,
|
||||
any(t.tcp_win_raw) AS win,
|
||||
any(t.tcp_scale_raw) AS scale,
|
||||
any(t.tcp_mss_raw) AS mss,
|
||||
any(t.first_ua) AS ua,
|
||||
sum(t.hits) AS hits,
|
||||
|
||||
avg(abs(ml.anomaly_score)) AS avg_score,
|
||||
avg(ml.hit_velocity) AS avg_velocity,
|
||||
avg(ml.fuzzing_index) AS avg_fuzzing,
|
||||
avg(ml.is_headless) AS pct_headless,
|
||||
avg(ml.post_ratio) AS avg_post,
|
||||
avg(ml.ip_id_zero_ratio) AS ip_id_zero,
|
||||
avg(ml.temporal_entropy) AS entropy,
|
||||
avg(ml.modern_browser_score) AS browser_score,
|
||||
avg(ml.alpn_http_mismatch) AS alpn_mismatch,
|
||||
avg(ml.is_alpn_missing) AS alpn_missing,
|
||||
avg(ml.multiplexing_efficiency) AS h2_eff,
|
||||
avg(ml.header_order_confidence) AS hdr_conf,
|
||||
avg(ml.ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
avg(ml.asset_ratio) AS asset_ratio,
|
||||
avg(ml.direct_access_ratio) AS direct_ratio,
|
||||
avg(ml.distinct_ja4_count) AS ja4_count,
|
||||
max(ml.is_ua_rotating) AS ua_rotating,
|
||||
|
||||
max(ml.threat_level) AS threat,
|
||||
any(ml.country_code) AS country,
|
||||
any(ml.asn_org) AS asn_org,
|
||||
|
||||
-- Features headers HTTP (depuis view_dashboard_entities)
|
||||
avg(ml.has_accept_language) AS hdr_accept_lang,
|
||||
any(vh.hdr_enc) AS hdr_has_encoding,
|
||||
any(vh.hdr_sec_fetch) AS hdr_has_sec_fetch,
|
||||
any(vh.hdr_count) AS hdr_count_raw,
|
||||
|
||||
-- Fingerprint HTTP Headers (depuis agg_header_fingerprint_1h + ml_detected_anomalies)
|
||||
-- header_order_shared_count : nb d'IPs partageant le même fingerprint
|
||||
-- → faible = fingerprint rare = comportement suspect
|
||||
avg(ml.header_order_shared_count) AS hfp_shared_count,
|
||||
-- distinct_header_orders : nb de fingerprints distincts émis par cette IP
|
||||
-- → élevé = rotation de fingerprint = comportement bot
|
||||
avg(ml.distinct_header_orders) AS hfp_distinct_orders,
|
||||
-- Cookie et Referer issus de la table dédiée aux empreintes
|
||||
any(hfp.hfp_cookie) AS hfp_cookie,
|
||||
any(hfp.hfp_referer) AS hfp_referer
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h t
|
||||
LEFT JOIN mabase_prod.ml_detected_anomalies ml
|
||||
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
|
||||
AND ml.detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
toIPv6(concat('::ffff:', toString(src_ip))) AS src_ip_v6,
|
||||
ja4,
|
||||
any(arrayExists(x -> x LIKE '%%Accept-Encoding%%', client_headers)) AS hdr_enc,
|
||||
any(arrayExists(x -> x LIKE '%%Sec-Fetch%%', client_headers)) AS hdr_sec_fetch,
|
||||
any(length(splitByChar(',', client_headers[1]))) AS hdr_count
|
||||
FROM mabase_prod.view_dashboard_entities
|
||||
WHERE length(client_headers) > 0
|
||||
AND log_date >= today() - 2
|
||||
GROUP BY src_ip_v6, ja4
|
||||
) vh ON t.src_ip = vh.src_ip_v6 AND t.ja4 = vh.ja4
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
src_ip,
|
||||
avg(has_cookie) AS hfp_cookie,
|
||||
avg(has_referer) AS hfp_referer
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY src_ip
|
||||
) hfp ON t.src_ip = hfp.src_ip
|
||||
WHERE t.window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
AND t.tcp_ttl_raw > 0
|
||||
GROUP BY t.src_ip, t.ja4
|
||||
"""
|
||||
|
||||
_SQL_COLS = [
|
||||
"ip", "ja4", "ttl", "win", "scale", "mss", "ua", "hits",
|
||||
"avg_score", "avg_velocity", "avg_fuzzing", "pct_headless", "avg_post",
|
||||
"ip_id_zero", "entropy", "browser_score", "alpn_mismatch", "alpn_missing",
|
||||
"h2_eff", "hdr_conf", "ua_ch_mismatch", "asset_ratio", "direct_ratio",
|
||||
"ja4_count", "ua_rotating", "threat", "country", "asn_org",
|
||||
"hdr_accept_lang", "hdr_has_encoding", "hdr_has_sec_fetch", "hdr_count_raw",
|
||||
"hfp_shared_count", "hfp_distinct_orders", "hfp_cookie", "hfp_referer",
|
||||
]
|
||||
|
||||
|
||||
# ─── Worker de clustering (thread pool) ──────────────────────────────────────
|
||||
|
||||
def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
"""Exécuté dans le thread pool. Met à jour _CACHE.
|
||||
|
||||
sensitivity : multiplicateur de k [0.5 – 5.0].
|
||||
0.5 = vue très agrégée (k/2 clusters)
|
||||
1.0 = comportement par défaut
|
||||
2.0 = deux fois plus de clusters → groupes plus homogènes
|
||||
5.0 = granularité maximale (classification la plus fine)
|
||||
|
||||
k_actual est plafonné à 300 pour éviter des temps de calcul excessifs.
|
||||
n_init est réduit à 1 quand k_actual > 60 pour rester rapide.
|
||||
"""
|
||||
k_actual = max(4, min(300, round(k * sensitivity)))
|
||||
t0 = time.time()
|
||||
with _LOCK:
|
||||
_CACHE["status"] = "computing"
|
||||
_CACHE["error"] = None
|
||||
|
||||
try:
|
||||
log.info(f"[clustering] Démarrage k={k_actual} (base={k}×sens={sensitivity}) hours={hours}")
|
||||
|
||||
# ── 1. Chargement de toutes les IPs ──────────────────────────────
|
||||
result = db.query(_SQL_ALL_IPS, {"hours": hours})
|
||||
rows: list[dict] = []
|
||||
for row in result.result_rows:
|
||||
rows.append({col: row[i] for i, col in enumerate(_SQL_COLS)})
|
||||
|
||||
n = len(rows)
|
||||
log.info(f"[clustering] {n} IPs chargées")
|
||||
if n < k_actual:
|
||||
raise ValueError(f"Seulement {n} IPs disponibles (k={k_actual} requis)")
|
||||
|
||||
# ── 2. Construction de la matrice de features (numpy) ────────────
|
||||
X = np.array([build_feature_vector(r) for r in rows], dtype=np.float32)
|
||||
log.info(f"[clustering] Matrice X: {X.shape} — {X.nbytes/1024/1024:.1f} MB")
|
||||
|
||||
# ── 3. Standardisation z-score ────────────────────────────────────
|
||||
# Normalise par variance : features discriminantes (forte std)
|
||||
# contribuent plus que les features quasi-constantes.
|
||||
X64 = X.astype(np.float64)
|
||||
X_std, feat_mean, feat_std = standardize(X64)
|
||||
|
||||
# ── 4. K-means++ sur l'espace standardisé ────────────────────────
|
||||
# n_init réduit à 1 pour k élevé (> 60) afin de limiter le temps de calcul
|
||||
n_init = 1 if k_actual > 60 else 3
|
||||
km = kmeans_pp(X_std, k=k_actual, max_iter=80, n_init=n_init, seed=42)
|
||||
log.info(f"[clustering] K-means: {km.n_iter} iters, inertia={km.inertia:.2f}")
|
||||
|
||||
# Centroïdes dans l'espace original [0,1] pour affichage radar
|
||||
# (dé-standardisation : c_orig = c_std * std + mean, puis clip [0,1])
|
||||
centroids_orig = np.clip(km.centroids * feat_std + feat_mean, 0.0, 1.0)
|
||||
|
||||
# ── 5. PCA-2D sur les features ORIGINALES (normalisées [0,1]) ────
|
||||
coords = pca_2d(X64) # (n, 2), normalisé [0,1]
|
||||
|
||||
# ── 5b. Enveloppes convexes par cluster ──────────────────────────
|
||||
hulls = compute_hulls(coords, km.labels, k_actual)
|
||||
|
||||
# ── 6. Agrégation par cluster ─────────────────────────────────────
|
||||
cluster_rows: list[list[dict]] = [[] for _ in range(k_actual)]
|
||||
cluster_coords: list[list[list[float]]] = [[] for _ in range(k_actual)]
|
||||
cluster_ips_map: dict[int, list] = {j: [] for j in range(k_actual)}
|
||||
|
||||
for i, label in enumerate(km.labels):
|
||||
j = int(label)
|
||||
cluster_rows[j].append(rows[i])
|
||||
cluster_coords[j].append(coords[i].tolist())
|
||||
cluster_ips_map[j].append((
|
||||
rows[i]["ip"],
|
||||
rows[i]["ja4"],
|
||||
float(coords[i][0]),
|
||||
float(coords[i][1]),
|
||||
float(risk_score_from_centroid(centroids_orig[j])),
|
||||
))
|
||||
|
||||
# ── 7. Construction des nœuds ─────────────────────────────────────
|
||||
nodes = []
|
||||
for j in range(k_actual):
|
||||
if not cluster_rows[j]:
|
||||
continue
|
||||
|
||||
def avg_f(key: str, crows: list[dict] = cluster_rows[j]) -> float:
|
||||
return float(np.mean([float(r.get(key) or 0) for r in crows]))
|
||||
|
||||
mean_ttl = avg_f("ttl")
|
||||
mean_mss = avg_f("mss")
|
||||
mean_scale = avg_f("scale")
|
||||
mean_win = avg_f("win")
|
||||
|
||||
raw_stats = {"mean_ttl": mean_ttl, "mean_mss": mean_mss, "mean_scale": mean_scale}
|
||||
label_name = name_cluster(centroids_orig[j], raw_stats)
|
||||
risk = float(risk_score_from_centroid(centroids_orig[j]))
|
||||
color = risk_to_gradient_color(risk)
|
||||
|
||||
# Centroïde 2D = moyenne des coords du cluster
|
||||
cxy = np.mean(cluster_coords[j], axis=0).tolist() if cluster_coords[j] else [0.5, 0.5]
|
||||
ip_set = list({r["ip"] for r in cluster_rows[j]})
|
||||
ip_count = len(ip_set)
|
||||
hit_count = int(sum(float(r.get("hits") or 0) for r in cluster_rows[j]))
|
||||
|
||||
threats = [str(r.get("threat") or "") for r in cluster_rows[j] if r.get("threat")]
|
||||
countries = [str(r.get("country") or "") for r in cluster_rows[j] if r.get("country")]
|
||||
orgs = [str(r.get("asn_org") or "") for r in cluster_rows[j] if r.get("asn_org")]
|
||||
|
||||
def topk(lst: list[str], n: int = 5) -> list[str]:
|
||||
return [v for v, _ in Counter(lst).most_common(n) if v]
|
||||
|
||||
radar = [
|
||||
{"feature": name, "value": round(float(centroids_orig[j][i]), 4)}
|
||||
for i, name in enumerate(FEATURE_NAMES)
|
||||
]
|
||||
|
||||
radius = max(8, min(30, int(math.log1p(ip_count) * 2.2)))
|
||||
|
||||
sample_rows = sorted(cluster_rows[j], key=lambda r: float(r.get("hits") or 0), reverse=True)[:8]
|
||||
sample_ips = [r["ip"] for r in sample_rows]
|
||||
sample_ua = str(cluster_rows[j][0].get("ua") or "")
|
||||
|
||||
nodes.append({
|
||||
"id": f"c{j}_k{k_actual}",
|
||||
"cluster_idx": j,
|
||||
"label": label_name,
|
||||
"pca_x": round(cxy[0], 6),
|
||||
"pca_y": round(cxy[1], 6),
|
||||
"radius": radius,
|
||||
"color": color,
|
||||
"risk_score": round(risk, 4),
|
||||
|
||||
"mean_ttl": round(mean_ttl, 1),
|
||||
"mean_mss": round(mean_mss, 0),
|
||||
"mean_scale": round(mean_scale, 1),
|
||||
"mean_win": round(mean_win, 0),
|
||||
"mean_velocity":round(avg_f("avg_velocity"),3),
|
||||
"mean_fuzzing": round(avg_f("avg_fuzzing"), 3),
|
||||
"mean_headless":round(avg_f("pct_headless"),3),
|
||||
"mean_post": round(avg_f("avg_post"), 3),
|
||||
"mean_asset": round(avg_f("asset_ratio"), 3),
|
||||
"mean_direct": round(avg_f("direct_ratio"),3),
|
||||
"mean_alpn_mismatch": round(avg_f("alpn_mismatch"),3),
|
||||
"mean_h2_eff": round(avg_f("h2_eff"), 3),
|
||||
"mean_hdr_conf":round(avg_f("hdr_conf"), 3),
|
||||
"mean_ua_ch": round(avg_f("ua_ch_mismatch"),3),
|
||||
"mean_entropy": round(avg_f("entropy"), 3),
|
||||
"mean_ja4_diversity": round(avg_f("ja4_count"),3),
|
||||
"mean_ip_id_zero": round(avg_f("ip_id_zero"),3),
|
||||
"mean_browser_score": round(avg_f("browser_score"),1),
|
||||
"mean_ua_rotating": round(avg_f("ua_rotating"),3),
|
||||
|
||||
"ip_count": ip_count,
|
||||
"hit_count": hit_count,
|
||||
"top_threat": topk(threats, 1)[0] if threats else "",
|
||||
"top_countries":topk(countries, 5),
|
||||
"top_orgs": topk(orgs, 5),
|
||||
"sample_ips": sample_ips,
|
||||
"sample_ua": sample_ua,
|
||||
"radar": radar,
|
||||
|
||||
# Hull pour deck.gl PolygonLayer
|
||||
"hull": hulls.get(j, []),
|
||||
})
|
||||
|
||||
# ── 8. Arêtes k-NN entre clusters ────────────────────────────────
|
||||
edges = []
|
||||
seen: set[frozenset] = set()
|
||||
for i, ni in enumerate(nodes):
|
||||
ci = ni["cluster_idx"]
|
||||
dists = sorted(
|
||||
[(j, nj["cluster_idx"],
|
||||
float(np.sum((centroids_orig[ci] - centroids_orig[nj["cluster_idx"]]) ** 2)))
|
||||
for j, nj in enumerate(nodes) if j != i],
|
||||
key=lambda x: x[2]
|
||||
)
|
||||
for j_idx, cj, d2 in dists[:2]:
|
||||
key = frozenset([ni["id"], nodes[j_idx]["id"]])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
edges.append({
|
||||
"id": f"e_{ni['id']}_{nodes[j_idx]['id']}",
|
||||
"source": ni["id"],
|
||||
"target": nodes[j_idx]["id"],
|
||||
"similarity": round(1.0 / (1.0 + math.sqrt(d2)), 3),
|
||||
})
|
||||
|
||||
# ── 9. Stockage résultat + cache IPs ─────────────────────────────
|
||||
total_ips = sum(n_["ip_count"] for n_ in nodes)
|
||||
total_hits = sum(n_["hit_count"] for n_ in nodes)
|
||||
elapsed = round(time.time() - t0, 2)
|
||||
|
||||
result_dict = {
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"stats": {
|
||||
"total_clusters": len(nodes),
|
||||
"total_ips": total_ips,
|
||||
"total_hits": total_hits,
|
||||
"n_samples": n,
|
||||
"k": k_actual,
|
||||
"k_base": k,
|
||||
"sensitivity": sensitivity,
|
||||
"elapsed_s": elapsed,
|
||||
},
|
||||
"feature_names": FEATURE_NAMES,
|
||||
}
|
||||
|
||||
with _LOCK:
|
||||
_CACHE["result"] = result_dict
|
||||
_CACHE["cluster_ips"] = cluster_ips_map
|
||||
_CACHE["status"] = "ready"
|
||||
_CACHE["ts"] = time.time()
|
||||
_CACHE["params"] = {"k": k, "hours": hours, "sensitivity": sensitivity}
|
||||
_CACHE["error"] = None
|
||||
|
||||
log.info(f"[clustering] Terminé en {elapsed}s — {total_ips} IPs, {len(nodes)} clusters")
|
||||
|
||||
except Exception as e:
|
||||
log.exception("[clustering] Erreur lors du calcul")
|
||||
with _LOCK:
|
||||
_CACHE["status"] = "error"
|
||||
_CACHE["error"] = str(e)
|
||||
|
||||
|
||||
def _maybe_trigger(k: int, hours: int, sensitivity: float) -> None:
|
||||
"""Lance le calcul si cache absent, expiré ou paramètres différents."""
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
params = _CACHE["params"]
|
||||
ts = _CACHE["ts"]
|
||||
|
||||
cache_stale = (time.time() - ts) > _CACHE_TTL
|
||||
params_changed = (
|
||||
params.get("k") != k or
|
||||
params.get("hours") != hours or
|
||||
params.get("sensitivity") != sensitivity
|
||||
)
|
||||
|
||||
if status in ("computing",):
|
||||
return # déjà en cours
|
||||
|
||||
if status == "ready" and not cache_stale and not params_changed:
|
||||
return # cache frais
|
||||
|
||||
_EXECUTOR.submit(_run_clustering_job, k, hours, sensitivity)
|
||||
|
||||
|
||||
# ─── Endpoints ────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/status")
|
||||
async def get_status():
|
||||
"""État du calcul en cours (polling frontend)."""
|
||||
with _LOCK:
|
||||
return {
|
||||
"status": _CACHE["status"],
|
||||
"error": _CACHE["error"],
|
||||
"ts": _CACHE["ts"],
|
||||
"params": _CACHE["params"],
|
||||
"age_s": round(time.time() - _CACHE["ts"], 0) if _CACHE["ts"] else None,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/clusters")
|
||||
async def get_clusters(
|
||||
k: int = Query(20, ge=4, le=100, description="Nombre de clusters de base"),
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle (heures)"),
|
||||
sensitivity: float = Query(1.0, ge=0.5, le=5.0, description="Sensibilité : multiplicateur de k (5.0 = granularité maximale)"),
|
||||
force: bool = Query(False, description="Forcer le recalcul"),
|
||||
):
|
||||
"""
|
||||
Clustering multi-métriques sur TOUTES les IPs.
|
||||
|
||||
k_actual = round(k × sensitivity) — la sensibilité contrôle la granularité.
|
||||
Retourne immédiatement depuis le cache. Déclenche le calcul si nécessaire.
|
||||
"""
|
||||
if force:
|
||||
with _LOCK:
|
||||
_CACHE["status"] = "idle"
|
||||
_CACHE["ts"] = 0.0
|
||||
_CACHE["result"] = None
|
||||
_CACHE["cluster_ips"] = {}
|
||||
|
||||
_maybe_trigger(k, hours, sensitivity)
|
||||
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
result = _CACHE["result"]
|
||||
error = _CACHE["error"]
|
||||
|
||||
if status == "computing":
|
||||
return {"status": "computing", "message": "Calcul en cours, réessayez dans quelques secondes"}
|
||||
|
||||
if status == "error":
|
||||
raise HTTPException(status_code=500, detail=error or "Erreur inconnue")
|
||||
|
||||
if result is None:
|
||||
return {"status": "idle", "message": "Calcul démarré, réessayez dans quelques secondes"}
|
||||
|
||||
return {**result, "status": "ready"}
|
||||
|
||||
|
||||
@router.get("/cluster/{cluster_id}/points")
|
||||
async def get_cluster_points(
|
||||
cluster_id: str,
|
||||
limit: int = Query(5000, ge=1, le=20000),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""
|
||||
Coordonnées PCA + métadonnées de toutes les IPs d'un cluster.
|
||||
Utilisé par deck.gl ScatterplotLayer (drill-down ou zoom avancé).
|
||||
"""
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
ips_map = _CACHE["cluster_ips"]
|
||||
|
||||
if status != "ready" or not ips_map:
|
||||
raise HTTPException(status_code=404, detail="Cache absent — appelez /clusters d'abord")
|
||||
|
||||
try:
|
||||
idx = int(cluster_id.split("_")[0][1:])
|
||||
except (ValueError, IndexError):
|
||||
raise HTTPException(status_code=400, detail="cluster_id invalide (format: c{n}_k{k})")
|
||||
|
||||
members = ips_map.get(idx, [])
|
||||
total = len(members)
|
||||
page = members[offset: offset + limit]
|
||||
|
||||
points = [
|
||||
{"ip": m[0], "ja4": m[1], "pca_x": round(m[2], 6), "pca_y": round(m[3], 6), "risk": round(m[4], 3)}
|
||||
for m in page
|
||||
]
|
||||
return {"points": points, "total": total, "offset": offset, "limit": limit}
|
||||
|
||||
|
||||
@router.get("/cluster/{cluster_id}/ips")
|
||||
async def get_cluster_ips(
|
||||
cluster_id: str,
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""IPs avec détails SQL (backward-compat avec l'ancienne UI)."""
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
ips_map = _CACHE["cluster_ips"]
|
||||
|
||||
if status != "ready" or not ips_map:
|
||||
raise HTTPException(status_code=404, detail="Cache absent — appelez /clusters d'abord")
|
||||
|
||||
try:
|
||||
idx = int(cluster_id.split("_")[0][1:])
|
||||
except (ValueError, IndexError):
|
||||
raise HTTPException(status_code=400, detail="cluster_id invalide")
|
||||
|
||||
members = ips_map.get(idx, [])
|
||||
total = len(members)
|
||||
page = members[offset: offset + limit]
|
||||
if not page:
|
||||
return {"ips": [], "total": total, "cluster_id": cluster_id}
|
||||
|
||||
safe_ips = [m[0].replace("'", "") for m in page[:200]]
|
||||
ip_filter = ", ".join(f"'{ip}'" for ip in safe_ips)
|
||||
|
||||
sql = f"""
|
||||
SELECT
|
||||
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS src_ip,
|
||||
t.ja4,
|
||||
any(t.tcp_ttl_raw) AS ttl,
|
||||
any(t.tcp_win_raw) AS win,
|
||||
any(t.tcp_scale_raw) AS scale,
|
||||
any(t.tcp_mss_raw) AS mss,
|
||||
sum(t.hits) AS hits,
|
||||
any(t.first_ua) AS ua,
|
||||
round(avg(abs(ml.anomaly_score)), 3) AS avg_score,
|
||||
max(ml.threat_level) AS threat_level,
|
||||
any(ml.country_code) AS country_code,
|
||||
any(ml.asn_org) AS asn_org,
|
||||
round(avg(ml.fuzzing_index), 2) AS fuzzing,
|
||||
round(avg(ml.hit_velocity), 2) AS velocity
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h t
|
||||
LEFT JOIN mabase_prod.ml_detected_anomalies ml
|
||||
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
|
||||
AND ml.detected_at >= now() - INTERVAL 24 HOUR
|
||||
WHERE t.window_start >= now() - INTERVAL 24 HOUR
|
||||
AND replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') IN ({ip_filter})
|
||||
GROUP BY t.src_ip, t.ja4
|
||||
ORDER BY hits DESC
|
||||
"""
|
||||
try:
|
||||
result = db.query(sql)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
ips = []
|
||||
for row in result.result_rows:
|
||||
ips.append({
|
||||
"ip": str(row[0] or ""),
|
||||
"ja4": str(row[1] or ""),
|
||||
"tcp_ttl": int(row[2] or 0),
|
||||
"tcp_win": int(row[3] or 0),
|
||||
"tcp_scale": int(row[4] or 0),
|
||||
"tcp_mss": int(row[5] or 0),
|
||||
"hits": int(row[6] or 0),
|
||||
"ua": str(row[7] or ""),
|
||||
"avg_score": float(row[8] or 0),
|
||||
"threat_level": str(row[9] or ""),
|
||||
"country_code": str(row[10] or ""),
|
||||
"asn_org": str(row[11] or ""),
|
||||
"fuzzing": float(row[12] or 0),
|
||||
"velocity": float(row[13] or 0),
|
||||
})
|
||||
|
||||
return {"ips": ips, "total": total, "cluster_id": cluster_id}
|
||||
450
services/dashboard/backend/routes/detections.py
Normal file
450
services/dashboard/backend/routes/detections.py
Normal file
@ -0,0 +1,450 @@
|
||||
"""
|
||||
Endpoints pour la liste des détections
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional, List
|
||||
from ..database import db
|
||||
from ..models import DetectionsListResponse, Detection
|
||||
|
||||
router = APIRouter(prefix="/api/detections", tags=["detections"])
|
||||
|
||||
# Mapping label ASN → score float (0 = très suspect, 1 = légitime)
|
||||
_ASN_LABEL_SCORES: dict[str, float] = {
|
||||
'human': 0.9, 'bot': 0.05, 'proxy': 0.25, 'vpn': 0.3,
|
||||
'tor': 0.1, 'datacenter': 0.4, 'scanner': 0.05, 'malicious': 0.05,
|
||||
}
|
||||
|
||||
|
||||
def _label_to_score(label: str) -> float | None:
|
||||
"""Convertit un label de réputation ASN en score numérique."""
|
||||
if not label:
|
||||
return None
|
||||
return _ASN_LABEL_SCORES.get(label.lower(), 0.5)
|
||||
|
||||
|
||||
@router.get("", response_model=DetectionsListResponse, summary="Liste paginée des détections")
|
||||
async def get_detections(
|
||||
page: int = Query(1, ge=1, description="Numéro de page"),
|
||||
page_size: int = Query(25, ge=1, le=100, description="Nombre de lignes par page"),
|
||||
threat_level: Optional[str] = Query(None, description="Filtrer par niveau de menace"),
|
||||
model_name: Optional[str] = Query(None, description="Filtrer par modèle"),
|
||||
country_code: Optional[str] = Query(None, description="Filtrer par pays"),
|
||||
asn_number: Optional[str] = Query(None, description="Filtrer par ASN"),
|
||||
search: Optional[str] = Query(None, description="Recherche texte (IP, JA4, Host)"),
|
||||
sort_by: str = Query("detected_at", description="Trier par"),
|
||||
sort_order: str = Query("DESC", description="Ordre (ASC/DESC)"),
|
||||
group_by_ip: bool = Query(False, description="Grouper par IP (first_seen/last_seen agrégés)"),
|
||||
score_type: Optional[str] = Query(None, description="Filtrer par type de score: BOT, REGLE, BOT_REGLE, SCORE")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des détections avec pagination et filtres
|
||||
"""
|
||||
try:
|
||||
# Construction de la requête
|
||||
where_clauses = ["detected_at >= now() - INTERVAL 24 HOUR"]
|
||||
params = {}
|
||||
|
||||
if threat_level:
|
||||
where_clauses.append("threat_level = %(threat_level)s")
|
||||
params["threat_level"] = threat_level
|
||||
|
||||
if model_name:
|
||||
where_clauses.append("model_name = %(model_name)s")
|
||||
params["model_name"] = model_name
|
||||
|
||||
if country_code:
|
||||
where_clauses.append("country_code = %(country_code)s")
|
||||
params["country_code"] = country_code.upper()
|
||||
|
||||
if asn_number:
|
||||
where_clauses.append("asn_number = %(asn_number)s")
|
||||
params["asn_number"] = asn_number
|
||||
|
||||
if search:
|
||||
where_clauses.append(
|
||||
"(ilike(toString(src_ip), %(search)s) OR ilike(ja4, %(search)s) OR ilike(host, %(search)s))"
|
||||
)
|
||||
params["search"] = f"%{search}%"
|
||||
|
||||
if score_type:
|
||||
st = score_type.upper()
|
||||
if st == "BOT":
|
||||
where_clauses.append("threat_level = 'KNOWN_BOT'")
|
||||
elif st == "REGLE":
|
||||
where_clauses.append("threat_level = 'ANUBIS_DENY'")
|
||||
elif st == "BOT_REGLE":
|
||||
where_clauses.append("threat_level IN ('KNOWN_BOT', 'ANUBIS_DENY')")
|
||||
elif st == "SCORE":
|
||||
where_clauses.append("threat_level NOT IN ('KNOWN_BOT', 'ANUBIS_DENY')")
|
||||
|
||||
where_clause = " AND ".join(where_clauses)
|
||||
|
||||
# Requête de comptage
|
||||
count_query = f"""
|
||||
SELECT count()
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, params)
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
# Requête principale
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
sort_order = "DESC" if sort_order.upper() == "DESC" else "ASC"
|
||||
|
||||
# ── Mode groupé par IP (first_seen / last_seen depuis la DB) ────────────
|
||||
if group_by_ip:
|
||||
valid_sort_grouped = ["anomaly_score", "hits", "hit_velocity", "first_seen", "last_seen", "src_ip", "detected_at"]
|
||||
grouped_sort = sort_by if sort_by in valid_sort_grouped else "last_seen"
|
||||
# detected_at → last_seen (max(detected_at) dans le GROUP BY)
|
||||
if grouped_sort == "detected_at":
|
||||
grouped_sort = "last_seen"
|
||||
# In outer query, min_score is exposed as anomaly_score — keep the alias
|
||||
outer_sort = "min_score" if grouped_sort == "anomaly_score" else grouped_sort
|
||||
|
||||
# Count distinct IPs
|
||||
count_ip_query = f"""
|
||||
SELECT uniq(src_ip)
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
cr = db.query(count_ip_query, params)
|
||||
total = cr.result_rows[0][0] if cr.result_rows else 0
|
||||
|
||||
grouped_query = f"""
|
||||
SELECT
|
||||
ip_data.src_ip,
|
||||
ip_data.first_seen,
|
||||
ip_data.last_seen,
|
||||
ip_data.detection_count,
|
||||
ip_data.unique_ja4s,
|
||||
ip_data.unique_hosts,
|
||||
ip_data.min_score AS anomaly_score,
|
||||
ip_data.threat_level_best,
|
||||
ip_data.model_name_best,
|
||||
ip_data.country_code,
|
||||
ip_data.asn_number,
|
||||
ip_data.asn_org,
|
||||
ip_data.hit_velocity,
|
||||
ip_data.hits,
|
||||
ip_data.asn_label,
|
||||
ar.label AS asn_rep_label,
|
||||
ip_data.anubis_bot_name_best,
|
||||
ip_data.anubis_bot_action_best,
|
||||
ip_data.anubis_bot_category_best
|
||||
FROM (
|
||||
SELECT
|
||||
src_ip,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen,
|
||||
count() AS detection_count,
|
||||
groupUniqArray(5)(ja4) AS unique_ja4s,
|
||||
groupUniqArray(5)(host) AS unique_hosts,
|
||||
min(anomaly_score) AS min_score,
|
||||
argMin(threat_level, anomaly_score) AS threat_level_best,
|
||||
argMin(model_name, anomaly_score) AS model_name_best,
|
||||
any(country_code) AS country_code,
|
||||
any(asn_number) AS asn_number,
|
||||
any(asn_org) AS asn_org,
|
||||
max(hit_velocity) AS hit_velocity,
|
||||
sum(hits) AS hits,
|
||||
any(asn_label) AS asn_label,
|
||||
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
|
||||
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
|
||||
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
GROUP BY src_ip
|
||||
) ip_data
|
||||
LEFT JOIN mabase_prod.asn_reputation ar
|
||||
ON ar.src_asn = toUInt32OrZero(ip_data.asn_number)
|
||||
ORDER BY {outer_sort} {sort_order}
|
||||
LIMIT %(limit)s OFFSET %(offset)s
|
||||
"""
|
||||
params["limit"] = page_size
|
||||
params["offset"] = offset
|
||||
gresult = db.query(grouped_query, params)
|
||||
|
||||
detections = []
|
||||
for row in gresult.result_rows:
|
||||
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
|
||||
# anomaly_score, threat_level_best, model_name_best, country_code, asn_number,
|
||||
# asn_org, hit_velocity, hits, asn_label, asn_rep_label,
|
||||
# anubis_bot_name, anubis_bot_action, anubis_bot_category
|
||||
ja4s = list(row[4]) if row[4] else []
|
||||
hosts = list(row[5]) if row[5] else []
|
||||
detections.append(Detection(
|
||||
detected_at=row[1],
|
||||
src_ip=str(row[0]),
|
||||
ja4=ja4s[0] if ja4s else "",
|
||||
host=hosts[0] if hosts else "",
|
||||
bot_name="",
|
||||
anomaly_score=float(row[6]) if row[6] else 0.0,
|
||||
threat_level=row[7] or "LOW",
|
||||
model_name=row[8] or "",
|
||||
recurrence=int(row[3] or 0),
|
||||
asn_number=str(row[10]) if row[10] else "",
|
||||
asn_org=row[11] or "",
|
||||
asn_detail="",
|
||||
asn_domain="",
|
||||
country_code=row[9] or "",
|
||||
asn_label=row[14] or "",
|
||||
hits=int(row[13] or 0),
|
||||
hit_velocity=float(row[12]) if row[12] else 0.0,
|
||||
fuzzing_index=0.0,
|
||||
post_ratio=0.0,
|
||||
reason="",
|
||||
asn_rep_label=row[15] or "",
|
||||
asn_score=_label_to_score(row[15] or ""),
|
||||
first_seen=row[1],
|
||||
last_seen=row[2],
|
||||
unique_ja4s=ja4s,
|
||||
unique_hosts=hosts,
|
||||
anubis_bot_name=row[16] or "",
|
||||
anubis_bot_action=row[17] or "",
|
||||
anubis_bot_category=row[18] or "",
|
||||
))
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
return DetectionsListResponse(
|
||||
items=detections, total=total, page=page,
|
||||
page_size=page_size, total_pages=total_pages
|
||||
)
|
||||
|
||||
# ── Mode individuel (comportement original) ──────────────────────────────
|
||||
# Validation du tri
|
||||
valid_sort_columns = [
|
||||
"detected_at", "src_ip", "threat_level", "anomaly_score",
|
||||
"asn_number", "country_code", "hits", "hit_velocity"
|
||||
]
|
||||
if sort_by not in valid_sort_columns:
|
||||
sort_by = "detected_at"
|
||||
|
||||
main_query = f"""
|
||||
SELECT
|
||||
detected_at,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
bot_name,
|
||||
anomaly_score,
|
||||
threat_level,
|
||||
model_name,
|
||||
recurrence,
|
||||
asn_number,
|
||||
asn_org,
|
||||
asn_detail,
|
||||
asn_domain,
|
||||
country_code,
|
||||
asn_label,
|
||||
hits,
|
||||
hit_velocity,
|
||||
fuzzing_index,
|
||||
post_ratio,
|
||||
reason,
|
||||
ar.label AS asn_rep_label,
|
||||
anubis_bot_name,
|
||||
anubis_bot_action,
|
||||
anubis_bot_category
|
||||
FROM ml_detected_anomalies
|
||||
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
|
||||
WHERE {where_clause}
|
||||
ORDER BY {sort_by} {sort_order}
|
||||
LIMIT %(limit)s OFFSET %(offset)s
|
||||
"""
|
||||
|
||||
params["limit"] = page_size
|
||||
params["offset"] = offset
|
||||
|
||||
result = db.query(main_query, params)
|
||||
|
||||
detections = [
|
||||
Detection(
|
||||
detected_at=row[0],
|
||||
src_ip=str(row[1]),
|
||||
ja4=row[2] or "",
|
||||
host=row[3] or "",
|
||||
bot_name=row[4] or "",
|
||||
anomaly_score=float(row[5]) if row[5] else 0.0,
|
||||
threat_level=row[6] or "LOW",
|
||||
model_name=row[7] or "",
|
||||
recurrence=row[8] or 0,
|
||||
asn_number=str(row[9]) if row[9] else "",
|
||||
asn_org=row[10] or "",
|
||||
asn_detail=row[11] or "",
|
||||
asn_domain=row[12] or "",
|
||||
country_code=row[13] or "",
|
||||
asn_label=row[14] or "",
|
||||
hits=row[15] or 0,
|
||||
hit_velocity=float(row[16]) if row[16] else 0.0,
|
||||
fuzzing_index=float(row[17]) if row[17] else 0.0,
|
||||
post_ratio=float(row[18]) if row[18] else 0.0,
|
||||
reason=row[19] or "",
|
||||
asn_rep_label=row[20] or "",
|
||||
asn_score=_label_to_score(row[20] or ""),
|
||||
anubis_bot_name=row[21] or "",
|
||||
anubis_bot_action=row[22] or "",
|
||||
anubis_bot_category=row[23] or "",
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
|
||||
return DetectionsListResponse(
|
||||
items=detections,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur lors de la récupération des détections: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{detection_id}")
|
||||
async def get_detection_details(detection_id: str):
|
||||
"""
|
||||
Récupère les détails d'une détection spécifique
|
||||
detection_id peut être une IP ou un identifiant
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
detected_at,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
bot_name,
|
||||
anomaly_score,
|
||||
threat_level,
|
||||
model_name,
|
||||
recurrence,
|
||||
asn_number,
|
||||
asn_org,
|
||||
asn_detail,
|
||||
asn_domain,
|
||||
country_code,
|
||||
asn_label,
|
||||
hits,
|
||||
hit_velocity,
|
||||
fuzzing_index,
|
||||
post_ratio,
|
||||
port_exhaustion_ratio,
|
||||
orphan_ratio,
|
||||
tcp_jitter_variance,
|
||||
tcp_shared_count,
|
||||
true_window_size,
|
||||
window_mss_ratio,
|
||||
alpn_http_mismatch,
|
||||
is_alpn_missing,
|
||||
sni_host_mismatch,
|
||||
header_count,
|
||||
has_accept_language,
|
||||
has_cookie,
|
||||
has_referer,
|
||||
modern_browser_score,
|
||||
ua_ch_mismatch,
|
||||
header_order_shared_count,
|
||||
ip_id_zero_ratio,
|
||||
request_size_variance,
|
||||
multiplexing_efficiency,
|
||||
mss_mobile_mismatch,
|
||||
correlated,
|
||||
reason,
|
||||
asset_ratio,
|
||||
direct_access_ratio,
|
||||
is_ua_rotating,
|
||||
distinct_ja4_count,
|
||||
src_port_density,
|
||||
ja4_asn_concentration,
|
||||
ja4_country_concentration,
|
||||
is_rare_ja4
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = db.query(query, {"ip": detection_id})
|
||||
|
||||
if not result.result_rows:
|
||||
raise HTTPException(status_code=404, detail="Détection non trouvée")
|
||||
|
||||
row = result.result_rows[0]
|
||||
|
||||
return {
|
||||
"detected_at": row[0],
|
||||
"src_ip": str(row[1]),
|
||||
"ja4": row[2] or "",
|
||||
"host": row[3] or "",
|
||||
"bot_name": row[4] or "",
|
||||
"anomaly_score": float(row[5]) if row[5] else 0.0,
|
||||
"threat_level": row[6] or "LOW",
|
||||
"model_name": row[7] or "",
|
||||
"recurrence": row[8] or 0,
|
||||
"asn": {
|
||||
"number": str(row[9]) if row[9] else "",
|
||||
"org": row[10] or "",
|
||||
"detail": row[11] or "",
|
||||
"domain": row[12] or "",
|
||||
"label": row[14] or ""
|
||||
},
|
||||
"country": {
|
||||
"code": row[13] or "",
|
||||
},
|
||||
"metrics": {
|
||||
"hits": row[15] or 0,
|
||||
"hit_velocity": float(row[16]) if row[16] else 0.0,
|
||||
"fuzzing_index": float(row[17]) if row[17] else 0.0,
|
||||
"post_ratio": float(row[18]) if row[18] else 0.0,
|
||||
"port_exhaustion_ratio": float(row[19]) if row[19] else 0.0,
|
||||
"orphan_ratio": float(row[20]) if row[20] else 0.0,
|
||||
},
|
||||
"tcp": {
|
||||
"jitter_variance": float(row[21]) if row[21] else 0.0,
|
||||
"shared_count": row[22] or 0,
|
||||
"true_window_size": row[23] or 0,
|
||||
"window_mss_ratio": float(row[24]) if row[24] else 0.0,
|
||||
},
|
||||
"tls": {
|
||||
"alpn_http_mismatch": bool(row[25]) if row[25] is not None else False,
|
||||
"is_alpn_missing": bool(row[26]) if row[26] is not None else False,
|
||||
"sni_host_mismatch": bool(row[27]) if row[27] is not None else False,
|
||||
},
|
||||
"headers": {
|
||||
"count": row[28] or 0,
|
||||
"has_accept_language": bool(row[29]) if row[29] is not None else False,
|
||||
"has_cookie": bool(row[30]) if row[30] is not None else False,
|
||||
"has_referer": bool(row[31]) if row[31] is not None else False,
|
||||
"modern_browser_score": row[32] or 0,
|
||||
"ua_ch_mismatch": bool(row[33]) if row[33] is not None else False,
|
||||
"header_order_shared_count": row[34] or 0,
|
||||
},
|
||||
"behavior": {
|
||||
"ip_id_zero_ratio": float(row[35]) if row[35] else 0.0,
|
||||
"request_size_variance": float(row[36]) if row[36] else 0.0,
|
||||
"multiplexing_efficiency": float(row[37]) if row[37] else 0.0,
|
||||
"mss_mobile_mismatch": bool(row[38]) if row[38] is not None else False,
|
||||
"correlated": bool(row[39]) if row[39] is not None else False,
|
||||
},
|
||||
"advanced": {
|
||||
"asset_ratio": float(row[41]) if row[41] else 0.0,
|
||||
"direct_access_ratio": float(row[42]) if row[42] else 0.0,
|
||||
"is_ua_rotating": bool(row[43]) if row[43] is not None else False,
|
||||
"distinct_ja4_count": row[44] or 0,
|
||||
"src_port_density": float(row[45]) if row[45] else 0.0,
|
||||
"ja4_asn_concentration": float(row[46]) if row[46] else 0.0,
|
||||
"ja4_country_concentration": float(row[47]) if row[47] else 0.0,
|
||||
"is_rare_ja4": bool(row[48]) if row[48] is not None else False,
|
||||
},
|
||||
"reason": row[40] or ""
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
509
services/dashboard/backend/routes/entities.py
Normal file
509
services/dashboard/backend/routes/entities.py
Normal file
@ -0,0 +1,509 @@
|
||||
"""
|
||||
Routes pour l'investigation d'entités (IP, JA4, User-Agent, Client-Header, Host, Path, Query-Param)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional, List
|
||||
|
||||
from ..database import db
|
||||
from ..models import (
|
||||
EntityInvestigation,
|
||||
EntityStats,
|
||||
EntityRelatedAttributes,
|
||||
EntityAttributeValue
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/entities", tags=["Entities"])
|
||||
|
||||
# Ensemble des types d'entités valides
|
||||
VALID_ENTITY_TYPES = frozenset({
|
||||
'ip', 'ja4', 'user_agent', 'client_header', 'host', 'path', 'query_param'
|
||||
})
|
||||
|
||||
|
||||
def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Optional[EntityStats]:
|
||||
"""
|
||||
Récupère les statistiques pour une entité donnée
|
||||
"""
|
||||
query = """
|
||||
SELECT
|
||||
entity_type,
|
||||
entity_value,
|
||||
sum(requests) as total_requests,
|
||||
sum(unique_ips) as unique_ips,
|
||||
min(log_date) as first_seen,
|
||||
max(log_date) as last_seen
|
||||
FROM mabase_prod.view_dashboard_entities
|
||||
WHERE entity_type = %(entity_type)s
|
||||
AND entity_value = %(entity_value)s
|
||||
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
|
||||
GROUP BY entity_type, entity_value
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
'entity_type': entity_type,
|
||||
'entity_value': entity_value,
|
||||
'hours': hours
|
||||
})
|
||||
|
||||
if not result.result_rows:
|
||||
return None
|
||||
|
||||
row = result.result_rows[0]
|
||||
return EntityStats(
|
||||
entity_type=row[0],
|
||||
entity_value=row[1],
|
||||
total_requests=row[2],
|
||||
unique_ips=row[3],
|
||||
first_seen=row[4],
|
||||
last_seen=row[5]
|
||||
)
|
||||
|
||||
|
||||
def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24) -> EntityRelatedAttributes:
|
||||
"""
|
||||
Récupère les attributs associés à une entité
|
||||
"""
|
||||
# Requête pour agréger tous les attributs associés
|
||||
query = """
|
||||
SELECT
|
||||
(SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips,
|
||||
(SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s,
|
||||
(SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts,
|
||||
(SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns,
|
||||
(SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
'entity_type': entity_type,
|
||||
'entity_value': entity_value,
|
||||
'hours': hours
|
||||
})
|
||||
|
||||
if not result.result_rows or not any(result.result_rows[0]):
|
||||
return EntityRelatedAttributes(
|
||||
ips=[],
|
||||
ja4s=[],
|
||||
hosts=[],
|
||||
asns=[],
|
||||
countries=[]
|
||||
)
|
||||
|
||||
row = result.result_rows[0]
|
||||
return EntityRelatedAttributes(
|
||||
ips=[str(ip) for ip in (row[0] or []) if ip],
|
||||
ja4s=[ja4 for ja4 in (row[1] or []) if ja4],
|
||||
hosts=[host for host in (row[2] or []) if host],
|
||||
asns=[asn for asn in (row[3] or []) if asn],
|
||||
countries=[country for country in (row[4] or []) if country]
|
||||
)
|
||||
|
||||
|
||||
def get_array_values(entity_type: str, entity_value: str, array_field: str, hours: int = 24) -> List[EntityAttributeValue]:
|
||||
"""
|
||||
Extrait et retourne les valeurs d'un champ Array (user_agents, client_headers, etc.)
|
||||
"""
|
||||
query = f"""
|
||||
SELECT
|
||||
value,
|
||||
count() as count,
|
||||
round(count * 100.0 / sum(count) OVER (), 2) as percentage
|
||||
FROM (
|
||||
SELECT
|
||||
arrayJoin({array_field}) as value
|
||||
FROM mabase_prod.view_dashboard_entities
|
||||
WHERE entity_type = %(entity_type)s
|
||||
AND entity_value = %(entity_value)s
|
||||
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
|
||||
AND notEmpty({array_field})
|
||||
)
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
'entity_type': entity_type,
|
||||
'entity_value': entity_value,
|
||||
'hours': hours
|
||||
})
|
||||
|
||||
return [
|
||||
EntityAttributeValue(
|
||||
value=row[0],
|
||||
count=row[1],
|
||||
percentage=row[2]
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/subnet/{subnet:path}")
|
||||
async def get_subnet_investigation(
|
||||
subnet: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère toutes les IPs d'un subnet /24 avec leurs statistiques
|
||||
Utilise ml_detected_anomalies pour les détections + view_dashboard_entities pour les user-agents
|
||||
"""
|
||||
try:
|
||||
# Extraire l'IP de base du subnet (ex: 192.168.1.0/24 -> 192.168.1.0)
|
||||
subnet_ip = subnet.replace('/24', '').replace('/16', '').replace('/8', '')
|
||||
|
||||
# Extraire les 3 premiers octets pour le filtre (ex: 141.98.11)
|
||||
subnet_parts = subnet_ip.split('.')[:3]
|
||||
subnet_prefix = subnet_parts[0]
|
||||
subnet_mask = subnet_parts[1]
|
||||
subnet_third = subnet_parts[2]
|
||||
|
||||
# Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA
|
||||
stats_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
host,
|
||||
country_code,
|
||||
asn_number
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_filter AS (
|
||||
SELECT *
|
||||
FROM cleaned_ips
|
||||
WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', clean_ip)[3] = %(subnet_third)s
|
||||
),
|
||||
-- Récupérer les user-agents depuis view_dashboard_entities
|
||||
ua_data AS (
|
||||
SELECT
|
||||
entity_value AS ip,
|
||||
arrayJoin(user_agents) AS user_agent
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
|
||||
AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', entity_value)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', entity_value)[3] = %(subnet_third)s
|
||||
)
|
||||
SELECT
|
||||
%(subnet)s AS subnet,
|
||||
uniq(clean_ip) AS total_ips,
|
||||
count() AS total_detections,
|
||||
uniq(ja4) AS unique_ja4,
|
||||
(SELECT uniq(user_agent) FROM ua_data) AS unique_ua,
|
||||
uniq(host) AS unique_hosts,
|
||||
argMax(country_code, detected_at) AS primary_country,
|
||||
argMax(asn_number, detected_at) AS primary_asn,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM subnet_filter
|
||||
"""
|
||||
|
||||
stats_result = db.query(stats_query, {
|
||||
"subnet": subnet,
|
||||
"subnet_prefix": subnet_prefix,
|
||||
"subnet_mask": subnet_mask,
|
||||
"subnet_third": subnet_third,
|
||||
"hours": hours
|
||||
})
|
||||
|
||||
if not stats_result.result_rows or stats_result.result_rows[0][1] == 0:
|
||||
raise HTTPException(status_code=404, detail="Subnet non trouvé")
|
||||
|
||||
stats_row = stats_result.result_rows[0]
|
||||
stats = {
|
||||
"subnet": subnet,
|
||||
"total_ips": stats_row[1] or 0,
|
||||
"total_detections": stats_row[2] or 0,
|
||||
"unique_ja4": stats_row[3] or 0,
|
||||
"unique_ua": stats_row[4] or 0,
|
||||
"unique_hosts": stats_row[5] or 0,
|
||||
"primary_country": stats_row[6] or "XX",
|
||||
"primary_asn": str(stats_row[7]) if stats_row[7] else "?",
|
||||
"first_seen": stats_row[8].isoformat() if stats_row[8] else "",
|
||||
"last_seen": stats_row[9].isoformat() if stats_row[9] else ""
|
||||
}
|
||||
|
||||
# Liste des IPs avec détails - 2 requêtes séparées + fusion en Python
|
||||
ips_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_filter AS (
|
||||
SELECT *
|
||||
FROM cleaned_ips
|
||||
WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', clean_ip)[3] = %(subnet_third)s
|
||||
)
|
||||
SELECT
|
||||
clean_ip AS ip,
|
||||
count() AS total_detections,
|
||||
uniq(ja4) AS unique_ja4,
|
||||
argMax(country_code, detected_at) AS primary_country,
|
||||
argMax(asn_number, detected_at) AS primary_asn,
|
||||
argMax(threat_level, detected_at) AS threat_level,
|
||||
avg(anomaly_score) AS avg_score,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM subnet_filter
|
||||
GROUP BY ip
|
||||
ORDER BY total_detections DESC
|
||||
"""
|
||||
|
||||
# Exécuter la première requête pour obtenir les IPs
|
||||
ips_result = db.query(ips_query, {
|
||||
"subnet_prefix": subnet_prefix,
|
||||
"subnet_mask": subnet_mask,
|
||||
"subnet_third": subnet_third,
|
||||
"hours": hours
|
||||
})
|
||||
|
||||
# Extraire la liste des IPs pour la requête UA
|
||||
ip_list = [str(row[0]) for row in ips_result.result_rows]
|
||||
|
||||
# Requête pour les user-agents avec IN clause (utilise l'index)
|
||||
unique_ua_dict = {}
|
||||
if ip_list:
|
||||
# Formater la liste pour la clause IN
|
||||
ip_values = ', '.join(f"'{ip}'" for ip in ip_list)
|
||||
ua_query = f"""
|
||||
SELECT
|
||||
entity_value AS ip,
|
||||
uniq(arrayJoin(user_agents)) AS unique_ua
|
||||
FROM view_dashboard_entities
|
||||
PREWHERE entity_type = 'ip'
|
||||
WHERE entity_value IN ({ip_values})
|
||||
AND log_date >= today() - INTERVAL 30 DAY
|
||||
GROUP BY entity_value
|
||||
"""
|
||||
ua_result = db.query(ua_query, {})
|
||||
unique_ua_dict = {row[0]: row[1] for row in ua_result.result_rows}
|
||||
|
||||
# Fusionner les résultats
|
||||
ips = []
|
||||
for row in ips_result.result_rows:
|
||||
ips.append({
|
||||
"ip": str(row[0]),
|
||||
"total_detections": row[1],
|
||||
"unique_ja4": row[2],
|
||||
"unique_ua": unique_ua_dict.get(row[0], 0),
|
||||
"primary_country": row[3] or "XX",
|
||||
"primary_asn": str(row[4]) if row[4] else "?",
|
||||
"threat_level": row[5] or "LOW",
|
||||
"avg_score": abs(row[6] or 0),
|
||||
"first_seen": row[7].isoformat() if row[7] else "",
|
||||
"last_seen": row[8].isoformat() if row[8] else ""
|
||||
})
|
||||
|
||||
return {
|
||||
"stats": stats,
|
||||
"ips": ips
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/types")
|
||||
async def get_entity_types():
|
||||
"""
|
||||
Retourne la liste des types d'entités supportés.
|
||||
NOTE: Cette route DOIT être déclarée avant /{entity_type}/... pour ne pas être masquée.
|
||||
"""
|
||||
return {
|
||||
"entity_types": sorted(VALID_ENTITY_TYPES),
|
||||
"descriptions": {
|
||||
"ip": "Adresse IP source",
|
||||
"ja4": "Fingerprint JA4 TLS",
|
||||
"user_agent": "User-Agent HTTP",
|
||||
"client_header": "Client Header",
|
||||
"host": "Host HTTP",
|
||||
"path": "Path URL",
|
||||
"query_param": "Query Param"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}", response_model=EntityInvestigation)
|
||||
async def get_entity_investigation(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720, description="Fenêtre temporelle en heures")
|
||||
):
|
||||
"""
|
||||
Investigation complète pour une entité donnée
|
||||
|
||||
- **entity_type**: Type d'entité (ip, ja4, user_agent, client_header, host, path, query_param)
|
||||
- **entity_value**: Valeur de l'entité
|
||||
- **hours**: Fenêtre temporelle (défaut: 24h)
|
||||
|
||||
Retourne:
|
||||
- Stats générales
|
||||
- Attributs associés (IPs, JA4, Hosts, ASNs, Pays)
|
||||
- User-Agents
|
||||
- Client-Headers
|
||||
- Paths
|
||||
- Query-Params
|
||||
"""
|
||||
# Valider le type d'entité
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type d'entité invalide. Types supportés: {', '.join(VALID_ENTITY_TYPES)}"
|
||||
)
|
||||
|
||||
# Stats générales
|
||||
stats = get_entity_stats(entity_type, entity_value, hours)
|
||||
if not stats:
|
||||
raise HTTPException(status_code=404, detail="Entité non trouvée")
|
||||
|
||||
# Attributs associés
|
||||
related = get_related_attributes(entity_type, entity_value, hours)
|
||||
|
||||
# User-Agents
|
||||
user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours)
|
||||
|
||||
# Client-Headers
|
||||
client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours)
|
||||
|
||||
# Paths
|
||||
paths = get_array_values(entity_type, entity_value, 'paths', hours)
|
||||
|
||||
# Query-Params
|
||||
query_params = get_array_values(entity_type, entity_value, 'query_params', hours)
|
||||
|
||||
return EntityInvestigation(
|
||||
stats=stats,
|
||||
related=related,
|
||||
user_agents=user_agents,
|
||||
client_headers=client_headers,
|
||||
paths=paths,
|
||||
query_params=query_params
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/related")
|
||||
async def get_entity_related(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère uniquement les attributs associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type d'entité invalide. Types supportés: {', '.join(VALID_ENTITY_TYPES)}"
|
||||
)
|
||||
|
||||
related = get_related_attributes(entity_type, entity_value, hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"hours": hours,
|
||||
"related": related
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/user_agents")
|
||||
async def get_entity_user_agents(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les User-Agents associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"user_agents": user_agents,
|
||||
"total": len(user_agents)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/client_headers")
|
||||
async def get_entity_client_headers(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les Client-Headers associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"client_headers": client_headers,
|
||||
"total": len(client_headers)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/paths")
|
||||
async def get_entity_paths(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les Paths associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
paths = get_array_values(entity_type, entity_value, 'paths', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"paths": paths,
|
||||
"total": len(paths)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/query_params")
|
||||
async def get_entity_query_params(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les Query-Params associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
query_params = get_array_values(entity_type, entity_value, 'query_params', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"query_params": query_params,
|
||||
"total": len(query_params)
|
||||
}
|
||||
827
services/dashboard/backend/routes/fingerprints.py
Normal file
827
services/dashboard/backend/routes/fingerprints.py
Normal file
@ -0,0 +1,827 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des fingerprints JA4 et User-Agents
|
||||
|
||||
Objectifs:
|
||||
- Détecter le spoofing JA4 (fingerprint TLS qui prétend être un navigateur mais
|
||||
dont les User-Agents, les headers HTTP ou les métriques comportementales trahissent
|
||||
une origine bot/script)
|
||||
- Construire une matrice JA4 × User-Agent pour visualiser les associations suspectes
|
||||
- Analyser la distribution des User-Agents pour identifier les rotateurs et les bots
|
||||
qui usurpent des UA de navigateurs légitimes
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
import re
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"])
|
||||
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# Patterns indiquant clairement un bot/script sans simulation de navigateur
|
||||
_BOT_PATTERNS = re.compile(
|
||||
r"bot|crawler|spider|scraper|python|curl|wget|go-http|java/|axios|"
|
||||
r"libwww|httpclient|okhttp|requests|aiohttp|httpx|playwright|puppeteer|"
|
||||
r"selenium|headless|phantomjs",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Navigateurs légitimes communs — un JA4 de type "browser" devrait venir avec ces UAs
|
||||
_BROWSER_PATTERNS = re.compile(
|
||||
r"mozilla|chrome|safari|firefox|edge|opera|trident",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _classify_ua(ua: str) -> str:
|
||||
"""Retourne 'bot', 'browser', ou 'script'"""
|
||||
if not ua:
|
||||
return "empty"
|
||||
if _BOT_PATTERNS.search(ua):
|
||||
return "bot"
|
||||
if _BROWSER_PATTERNS.search(ua):
|
||||
return "browser"
|
||||
return "script"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 1 — Détection de spoofing JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/spoofing")
|
||||
async def get_ja4_spoofing(
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle"),
|
||||
min_detections: int = Query(10, ge=1, description="Nombre minimum de détections"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Identifie les JA4 fingerprints suspects de spoofing navigateur.
|
||||
|
||||
Un JA4 est considéré suspect quand:
|
||||
- Il présente un taux élevé de ua_ch_mismatch (header UA ≠ Client Hints)
|
||||
- Son modern_browser_score est élevé mais les UAs associés sont des bots/scripts
|
||||
- Il apparaît avec un taux élevé de sni_host_mismatch ou alpn_http_mismatch
|
||||
- is_rare_ja4 = true avec un volume important
|
||||
|
||||
Retourne un score de confiance de spoofing [0-100] pour chaque JA4.
|
||||
"""
|
||||
try:
|
||||
# Agrégation par JA4 avec tous les indicateurs de spoofing
|
||||
query = """
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
|
||||
-- Indicateurs de mismatch
|
||||
countIf(ua_ch_mismatch = true) AS ua_ch_mismatch_count,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
countIf(sni_host_mismatch = true) AS sni_mismatch_count,
|
||||
round(countIf(sni_host_mismatch = true) * 100.0 / count(), 2) AS sni_mismatch_pct,
|
||||
countIf(alpn_http_mismatch = true) AS alpn_mismatch_count,
|
||||
round(countIf(alpn_http_mismatch = true) * 100.0 / count(), 2) AS alpn_mismatch_pct,
|
||||
|
||||
-- Indicateurs comportementaux
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_ja4_count,
|
||||
round(countIf(is_rare_ja4 = true) * 100.0 / count(), 2) AS rare_ja4_pct,
|
||||
countIf(is_ua_rotating = true) AS ua_rotating_count,
|
||||
round(countIf(is_ua_rotating = true) * 100.0 / count(), 2) AS ua_rotating_pct,
|
||||
|
||||
-- Métriques TLS/TCP
|
||||
countIf(is_alpn_missing = true) AS alpn_missing_count,
|
||||
avg(distinct_ja4_count) AS avg_distinct_ja4_per_ip,
|
||||
|
||||
-- Répartition threat levels
|
||||
countIf(threat_level = 'CRITICAL') AS critical_count,
|
||||
countIf(threat_level = 'HIGH') AS high_count,
|
||||
|
||||
-- Botnet indicators
|
||||
avg(ja4_asn_concentration) AS avg_asn_concentration,
|
||||
avg(ja4_country_concentration) AS avg_country_concentration,
|
||||
|
||||
argMax(threat_level, detected_at) AS last_threat_level
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING total_detections >= %(min_detections)s
|
||||
ORDER BY ua_ch_mismatch_pct DESC, total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
"hours": hours,
|
||||
"min_detections": min_detections,
|
||||
"limit": limit,
|
||||
})
|
||||
|
||||
# Fetch top UA per JA4 from view_dashboard_user_agents
|
||||
ja4_list = [str(r[0]) for r in result.result_rows if r[0]]
|
||||
ua_by_ja4: dict = {}
|
||||
if ja4_list:
|
||||
ja4_sql = ", ".join(f"'{j}'" for j in ja4_list[:100])
|
||||
ua_q = f"""
|
||||
SELECT ja4, groupArray(5)(ua) AS top_uas
|
||||
FROM (
|
||||
SELECT ja4, arrayJoin(user_agents) AS ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ja4, ua
|
||||
ORDER BY ja4, cnt DESC
|
||||
)
|
||||
GROUP BY ja4
|
||||
"""
|
||||
try:
|
||||
ua_res = db.query(ua_q)
|
||||
for ua_row in ua_res.result_rows:
|
||||
j4 = str(ua_row[0])
|
||||
if ua_row[1]:
|
||||
ua_by_ja4[j4] = list(ua_row[1])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
ua_ch_mismatch_pct = float(row[4] or 0)
|
||||
sni_mismatch_pct = float(row[6] or 0)
|
||||
alpn_mismatch_pct = float(row[8] or 0)
|
||||
avg_browser_score = float(row[9] or 0)
|
||||
rare_ja4_pct = float(row[11] or 0)
|
||||
ua_rotating_pct = float(row[13] or 0)
|
||||
alpn_missing_count = int(row[14] or 0)
|
||||
total = int(row[1] or 1)
|
||||
|
||||
top_uas = ua_by_ja4.get(ja4, [])
|
||||
ua_classes = [_classify_ua(u) for u in top_uas]
|
||||
has_bot_ua = any(c == "bot" for c in ua_classes)
|
||||
has_browser_ua = any(c == "browser" for c in ua_classes)
|
||||
|
||||
# Spoofing confidence score [0-100]:
|
||||
# UA/CH mismatch est le signal le plus fort (poids 40)
|
||||
# Browser UA avec score navigateur élevé mais indicateurs bot (poids 25)
|
||||
# SNI/ALPN mismatches (poids 15)
|
||||
# is_rare_ja4 avec gros volume (poids 10)
|
||||
# UA rotating (poids 10)
|
||||
spoof_score = min(100, round(
|
||||
ua_ch_mismatch_pct * 0.40
|
||||
+ (avg_browser_score * 25 / 100 if has_bot_ua else 0)
|
||||
+ sni_mismatch_pct * 0.10
|
||||
+ alpn_mismatch_pct * 0.05
|
||||
+ rare_ja4_pct * 0.10
|
||||
+ ua_rotating_pct * 0.10
|
||||
+ (10 if alpn_missing_count > total * 0.3 else 0)
|
||||
))
|
||||
|
||||
# Classification du JA4
|
||||
if spoof_score >= 60:
|
||||
classification = "spoofed_browser"
|
||||
elif has_bot_ua and avg_browser_score < 30:
|
||||
classification = "known_bot"
|
||||
elif has_browser_ua and ua_ch_mismatch_pct < 10:
|
||||
classification = "legitimate_browser"
|
||||
else:
|
||||
classification = "suspicious"
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"classification": classification,
|
||||
"spoofing_score": spoof_score,
|
||||
"total_detections": int(row[1] or 0),
|
||||
"unique_ips": int(row[2] or 0),
|
||||
"indicators": {
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"sni_mismatch_pct": sni_mismatch_pct,
|
||||
"alpn_mismatch_pct": alpn_mismatch_pct,
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"rare_ja4_pct": rare_ja4_pct,
|
||||
"ua_rotating_pct": ua_rotating_pct,
|
||||
"alpn_missing_count": alpn_missing_count,
|
||||
"avg_asn_concentration": round(float(row[18] or 0), 3),
|
||||
"avg_country_concentration": round(float(row[19] or 0), 3),
|
||||
},
|
||||
"top_user_agents": [
|
||||
{"ua": u, "type": _classify_ua(u)} for u in top_uas
|
||||
],
|
||||
"threat_breakdown": {
|
||||
"critical": int(row[16] or 0),
|
||||
"high": int(row[17] or 0),
|
||||
"last_level": str(row[20] or "LOW"),
|
||||
},
|
||||
})
|
||||
|
||||
# Trier: spoofed_browser d'abord, puis par score
|
||||
items.sort(key=lambda x: (-x["spoofing_score"], -x["total_detections"]))
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"summary": {
|
||||
"spoofed_browser": sum(1 for i in items if i["classification"] == "spoofed_browser"),
|
||||
"known_bot": sum(1 for i in items if i["classification"] == "known_bot"),
|
||||
"suspicious": sum(1 for i in items if i["classification"] == "suspicious"),
|
||||
"legitimate_browser": sum(1 for i in items if i["classification"] == "legitimate_browser"),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 2 — Matrice JA4 × User-Agent
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ja4-ua-matrix")
|
||||
async def get_ja4_ua_matrix(
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
min_ips: int = Query(3, ge=1, description="Nombre minimum d'IPs pour inclure un JA4"),
|
||||
limit: int = Query(30, ge=1, le=100),
|
||||
):
|
||||
"""
|
||||
Matrice JA4 × User-Agent.
|
||||
|
||||
Pour chaque JA4:
|
||||
- Top User-Agents associés (depuis view_dashboard_entities)
|
||||
- Taux de ua_ch_mismatch
|
||||
- Classification UA (bot / browser / script)
|
||||
- Indicateur de spoofing si browser_score élevé + UA non-navigateur
|
||||
"""
|
||||
try:
|
||||
# Stats JA4 depuis ml_detected_anomalies
|
||||
stats_query = """
|
||||
SELECT
|
||||
ja4,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
count() AS total_detections,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_count,
|
||||
countIf(is_ua_rotating = true) AS rotating_count,
|
||||
argMax(threat_level, detected_at) AS last_threat
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
ORDER BY ua_ch_mismatch_pct DESC, unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
stats_res = db.query(stats_query, {"hours": hours, "min_ips": min_ips, "limit": limit})
|
||||
ja4_list = [str(r[0]) for r in stats_res.result_rows]
|
||||
|
||||
if not ja4_list:
|
||||
return {"items": [], "total": 0, "period_hours": hours}
|
||||
|
||||
# UAs par JA4 depuis view_dashboard_user_agents
|
||||
ja4_sql = ", ".join(f"'{j}'" for j in ja4_list)
|
||||
ua_query = f"""
|
||||
SELECT
|
||||
ja4,
|
||||
ua,
|
||||
sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ja4, ua
|
||||
ORDER BY ja4, cnt DESC
|
||||
"""
|
||||
|
||||
ua_by_ja4: dict = {}
|
||||
try:
|
||||
ua_res = db.query(ua_query)
|
||||
for row in ua_res.result_rows:
|
||||
j4 = str(row[0])
|
||||
if j4 not in ua_by_ja4:
|
||||
ua_by_ja4[j4] = []
|
||||
if len(ua_by_ja4[j4]) < 8:
|
||||
ua_by_ja4[j4].append({"ua": str(row[1]), "count": int(row[2] or 0)})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
items = []
|
||||
for row in stats_res.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1] or 0)
|
||||
ua_ch_mismatch_pct = float(row[3] or 0)
|
||||
avg_browser_score = float(row[4] or 0)
|
||||
|
||||
top_uas = ua_by_ja4.get(ja4, [])
|
||||
ua_total = sum(u["count"] for u in top_uas) or 1
|
||||
|
||||
classified_uas = []
|
||||
for u in top_uas:
|
||||
ua_type = _classify_ua(u["ua"])
|
||||
classified_uas.append({
|
||||
"ua": u["ua"],
|
||||
"count": u["count"],
|
||||
"pct": round(u["count"] * 100 / ua_total, 1),
|
||||
"type": ua_type,
|
||||
})
|
||||
|
||||
bot_pct = sum(u["pct"] for u in classified_uas if u["type"] == "bot")
|
||||
browser_pct = sum(u["pct"] for u in classified_uas if u["type"] == "browser")
|
||||
|
||||
# Spoofing flag: JA4 ressemble à un navigateur (browser_score élevé)
|
||||
# mais les UAs sont des bots/scripts
|
||||
is_spoofing = avg_browser_score > 50 and bot_pct > 30 and ua_ch_mismatch_pct > 20
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"total_detections": int(row[2] or 0),
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"rare_count": int(row[5] or 0),
|
||||
"rotating_count": int(row[6] or 0),
|
||||
"last_threat": str(row[7] or "LOW"),
|
||||
"user_agents": classified_uas,
|
||||
"ua_summary": {
|
||||
"bot_pct": round(bot_pct, 1),
|
||||
"browser_pct": round(browser_pct, 1),
|
||||
"script_pct": round(100 - bot_pct - browser_pct, 1),
|
||||
"total_distinct": len(top_uas),
|
||||
},
|
||||
"is_spoofing_suspect": is_spoofing,
|
||||
})
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 3 — Analyse globale des User-Agents
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ua-analysis")
|
||||
async def get_ua_analysis(
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Analyse globale des User-Agents dans les détections.
|
||||
|
||||
Identifie:
|
||||
- UAs de type bot/script
|
||||
- UAs browser légitimes vs UAs browser utilisés par des bots (via ua_ch_mismatch)
|
||||
- UAs rares/suspects qui tournent (is_ua_rotating)
|
||||
- Distribution JA4 par UA pour détecter les UAs multi-fingerprints (rotation)
|
||||
"""
|
||||
try:
|
||||
# Top UAs globaux depuis view_dashboard_user_agents
|
||||
ua_global_query = """
|
||||
SELECT
|
||||
ua,
|
||||
sum(requests) AS ip_count
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE hour >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ua
|
||||
ORDER BY ip_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
ua_global_res = db.query(ua_global_query, {"hours": hours, "limit": limit})
|
||||
top_uas = [str(r[0]) for r in ua_global_res.result_rows]
|
||||
|
||||
# Pour chaque UA, chercher ses JA4 via view_dashboard_user_agents
|
||||
ua_sql = ", ".join(f"'{u.replace(chr(39), chr(39)*2)}'" for u in top_uas[:50]) if top_uas else "''"
|
||||
ja4_per_ua_query = f"""
|
||||
SELECT
|
||||
ua,
|
||||
uniq(ja4) AS unique_ja4s,
|
||||
groupUniqArray(3)(ja4) AS sample_ja4s
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ua IN ({ua_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
AND ja4 != ''
|
||||
GROUP BY ua
|
||||
"""
|
||||
ja4_by_ua: dict = {}
|
||||
try:
|
||||
ja4_res = db.query(ja4_per_ua_query)
|
||||
for r in ja4_res.result_rows:
|
||||
ja4_by_ua[str(r[0])] = {
|
||||
"unique_ja4s": int(r[1] or 0),
|
||||
"sample_ja4s": list(r[2] or []),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# IPs avec is_ua_rotating depuis ml_detected_anomalies
|
||||
rotating_query = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
avg(ua_ch_mismatch) AS avg_ua_ch_mismatch
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND is_ua_rotating = true
|
||||
GROUP BY clean_ip
|
||||
ORDER BY avg_ua_ch_mismatch DESC
|
||||
"""
|
||||
rotating_ips: list = []
|
||||
try:
|
||||
rot_res = db.query(rotating_query, {"hours": hours})
|
||||
rotating_ips = [str(r[0]) for r in rot_res.result_rows]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Construire la réponse
|
||||
items = []
|
||||
for row in ua_global_res.result_rows:
|
||||
ua = str(row[0])
|
||||
ip_count = int(row[1] or 0)
|
||||
ua_type = _classify_ua(ua)
|
||||
ja4_info = ja4_by_ua.get(ua, {"unique_ja4s": 0, "sample_ja4s": []})
|
||||
|
||||
# UA multi-JA4 est suspect: un vrai navigateur a généralement 1-2 JA4
|
||||
multi_ja4_flag = ja4_info["unique_ja4s"] > 3
|
||||
|
||||
items.append({
|
||||
"user_agent": ua,
|
||||
"type": ua_type,
|
||||
"ip_count": ip_count,
|
||||
"unique_ja4_count": ja4_info["unique_ja4s"],
|
||||
"sample_ja4s": ja4_info["sample_ja4s"],
|
||||
"is_multi_ja4_suspect": multi_ja4_flag,
|
||||
"risk_flags": _build_ua_risk_flags(ua, ua_type, ja4_info["unique_ja4s"], ip_count),
|
||||
})
|
||||
|
||||
# IPs avec rotation d'UA
|
||||
ua_rotating_stats = {
|
||||
"rotating_ip_count": len(rotating_ips),
|
||||
"sample_rotating_ips": rotating_ips[:10],
|
||||
}
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"ua_rotating_stats": ua_rotating_stats,
|
||||
"summary": {
|
||||
"bot_count": sum(1 for i in items if i["type"] == "bot"),
|
||||
"browser_count": sum(1 for i in items if i["type"] == "browser"),
|
||||
"script_count": sum(1 for i in items if i["type"] == "script"),
|
||||
"multi_ja4_suspect_count": sum(1 for i in items if i["is_multi_ja4_suspect"]),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
def _build_ua_risk_flags(ua: str, ua_type: str, unique_ja4s: int, ip_count: int) -> list:
|
||||
flags = []
|
||||
if ua_type == "bot":
|
||||
flags.append("ua_bot_signature")
|
||||
elif ua_type == "script":
|
||||
flags.append("ua_script_library")
|
||||
if unique_ja4s > 5:
|
||||
flags.append("ja4_rotation_suspect")
|
||||
if unique_ja4s > 3 and ua_type == "browser":
|
||||
flags.append("browser_ua_multi_fingerprint")
|
||||
if ip_count > 100:
|
||||
flags.append("high_volume")
|
||||
return flags
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 4 — JA4 d'un IP spécifique: analyse de cohérence UA/JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ip/{ip}/coherence")
|
||||
async def get_ip_fingerprint_coherence(ip: str):
|
||||
"""
|
||||
Analyse la cohérence JA4/UA pour une IP spécifique.
|
||||
|
||||
Répond à la question: "Cette IP spoofait-elle son fingerprint?"
|
||||
|
||||
Calcule un score de cohérence basé sur:
|
||||
- Correspondance entre JA4 (TLS client fingerprint) et User-Agent
|
||||
- ua_ch_mismatch (User-Agent vs Client Hints)
|
||||
- modern_browser_score vs type d'UA réel
|
||||
- Nombre de JA4 distincts utilisés (rotation)
|
||||
- sni_host_mismatch, alpn_http_mismatch
|
||||
"""
|
||||
try:
|
||||
# Données depuis ml_detected_anomalies
|
||||
ml_query = """
|
||||
SELECT
|
||||
ja4,
|
||||
ua_ch_mismatch,
|
||||
modern_browser_score,
|
||||
sni_host_mismatch,
|
||||
alpn_http_mismatch,
|
||||
is_alpn_missing,
|
||||
is_rare_ja4,
|
||||
is_ua_rotating,
|
||||
distinct_ja4_count,
|
||||
header_count,
|
||||
has_accept_language,
|
||||
has_cookie,
|
||||
has_referer,
|
||||
header_order_shared_count,
|
||||
detected_at,
|
||||
threat_level,
|
||||
window_mss_ratio,
|
||||
tcp_jitter_variance,
|
||||
multiplexing_efficiency
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
"""
|
||||
ml_res = db.query(ml_query, {"ip": ip})
|
||||
|
||||
if not ml_res.result_rows:
|
||||
raise HTTPException(status_code=404, detail="IP non trouvée dans les détections")
|
||||
|
||||
# User-agents réels depuis view_dashboard_user_agents
|
||||
ua_query = """
|
||||
SELECT ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE toString(src_ip) = %(ip)s
|
||||
AND hour >= now() - INTERVAL 72 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ua ORDER BY cnt DESC
|
||||
"""
|
||||
ua_res = db.query(ua_query, {"ip": ip})
|
||||
top_uas = [{"ua": str(r[0]), "count": int(r[1] or 0), "type": _classify_ua(str(r[0]))}
|
||||
for r in ua_res.result_rows]
|
||||
|
||||
# Agréger les indicateurs de la dernière session
|
||||
rows = ml_res.result_rows
|
||||
latest = rows[0]
|
||||
total_rows = len(rows)
|
||||
|
||||
ua_ch_mismatch_count = sum(1 for r in rows if r[1])
|
||||
sni_mismatch_count = sum(1 for r in rows if r[3])
|
||||
alpn_mismatch_count = sum(1 for r in rows if r[4])
|
||||
is_rare_count = sum(1 for r in rows if r[6])
|
||||
is_rotating = any(r[7] for r in rows)
|
||||
distinct_ja4s = {str(r[0]) for r in rows if r[0]}
|
||||
avg_browser_score = sum(int(r[2] or 0) for r in rows) / total_rows
|
||||
|
||||
# UA analysis
|
||||
has_browser_ua = any(u["type"] == "browser" for u in top_uas)
|
||||
has_bot_ua = any(u["type"] == "bot" for u in top_uas)
|
||||
primary_ua_type = top_uas[0]["type"] if top_uas else "empty"
|
||||
|
||||
# Calcul du score de spoofing
|
||||
spoof_score = min(100, round(
|
||||
(ua_ch_mismatch_count / total_rows * 100) * 0.40
|
||||
+ (avg_browser_score * 0.20 if has_bot_ua else 0)
|
||||
+ (sni_mismatch_count / total_rows * 100) * 0.10
|
||||
+ (alpn_mismatch_count / total_rows * 100) * 0.05
|
||||
+ (len(distinct_ja4s) * 5 if len(distinct_ja4s) > 2 else 0)
|
||||
+ (15 if is_rotating else 0)
|
||||
+ (10 if is_rare_count > total_rows * 0.5 else 0)
|
||||
))
|
||||
|
||||
# Verdict
|
||||
if spoof_score >= 70:
|
||||
verdict = "high_confidence_spoofing"
|
||||
elif spoof_score >= 40:
|
||||
verdict = "suspicious_spoofing"
|
||||
elif has_bot_ua and avg_browser_score < 20:
|
||||
verdict = "known_bot_no_spoofing"
|
||||
elif has_browser_ua and spoof_score < 20:
|
||||
verdict = "legitimate_browser"
|
||||
else:
|
||||
verdict = "inconclusive"
|
||||
|
||||
# Explication humaine
|
||||
explanation = []
|
||||
if ua_ch_mismatch_count > total_rows * 0.3:
|
||||
explanation.append(f"UA-Client-Hints mismatch sur {round(ua_ch_mismatch_count*100/total_rows)}% des requêtes")
|
||||
if has_bot_ua and avg_browser_score > 40:
|
||||
explanation.append(f"JA4 ressemble à un navigateur (score {round(avg_browser_score)}/100) mais UA est de type bot")
|
||||
if len(distinct_ja4s) > 2:
|
||||
explanation.append(f"{len(distinct_ja4s)} JA4 distincts utilisés → rotation de fingerprint")
|
||||
if is_rotating:
|
||||
explanation.append("is_ua_rotating détecté → rotation d'User-Agent confirmée")
|
||||
if sni_mismatch_count > 0:
|
||||
explanation.append(f"SNI ≠ Host header sur {sni_mismatch_count}/{total_rows} requêtes")
|
||||
if not explanation:
|
||||
explanation.append("Aucun indicateur de spoofing majeur détecté")
|
||||
|
||||
return {
|
||||
"ip": ip,
|
||||
"verdict": verdict,
|
||||
"spoofing_score": spoof_score,
|
||||
"explanation": explanation,
|
||||
"indicators": {
|
||||
"ua_ch_mismatch_rate": round(ua_ch_mismatch_count / total_rows * 100, 1),
|
||||
"sni_mismatch_rate": round(sni_mismatch_count / total_rows * 100, 1),
|
||||
"alpn_mismatch_rate": round(alpn_mismatch_count / total_rows * 100, 1),
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"distinct_ja4_count": len(distinct_ja4s),
|
||||
"is_ua_rotating": is_rotating,
|
||||
"rare_ja4_rate": round(is_rare_count / total_rows * 100, 1),
|
||||
},
|
||||
"fingerprints": {
|
||||
"ja4_list": list(distinct_ja4s),
|
||||
"latest_ja4": str(latest[0] or ""),
|
||||
},
|
||||
"user_agents": top_uas,
|
||||
"latest_detection": {
|
||||
"detected_at": latest[14].isoformat() if latest[14] else "",
|
||||
"threat_level": str(latest[15] or "LOW"),
|
||||
"modern_browser_score": int(latest[2] or 0),
|
||||
"header_count": int(latest[9] or 0),
|
||||
"has_accept_language": bool(latest[10]),
|
||||
"has_cookie": bool(latest[11]),
|
||||
"has_referer": bool(latest[12]),
|
||||
"header_order_shared_count": int(latest[13] or 0),
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 5 — JA4 légitimes (baseline / whitelist)
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/legitimate-ja4")
|
||||
async def get_legitimate_ja4(
|
||||
hours: int = Query(168, ge=24, le=720, description="Fenêtre pour établir la baseline"),
|
||||
min_ips: int = Query(50, ge=5, description="Nombre minimum d'IPs pour qualifier un JA4 de légitime"),
|
||||
):
|
||||
"""
|
||||
Établit une baseline des JA4 fingerprints légitimes.
|
||||
|
||||
Un JA4 est considéré légitime si:
|
||||
- Il est utilisé par un grand nombre d'IPs distinctes (> min_ips)
|
||||
- Son taux de ua_ch_mismatch est faible (< 5%)
|
||||
- Son modern_browser_score est élevé (> 60)
|
||||
- Il n'est PAS is_rare_ja4
|
||||
- Ses UAs sont dominés par des navigateurs connus
|
||||
|
||||
Utile comme whitelist pour réduire les faux positifs.
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
ja4,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
count() AS total_detections,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_count,
|
||||
round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct,
|
||||
round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
AND ua_ch_mismatch_pct < 5.0
|
||||
AND avg_browser_score > 60
|
||||
AND rare_count = 0
|
||||
ORDER BY unique_ips DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours, "min_ips": min_ips})
|
||||
|
||||
items = [
|
||||
{
|
||||
"ja4": str(row[0]),
|
||||
"unique_ips": int(row[1] or 0),
|
||||
"total_detections": int(row[2] or 0),
|
||||
"ua_ch_mismatch_pct": float(row[3] or 0),
|
||||
"avg_browser_score": round(float(row[4] or 0), 1),
|
||||
"critical_pct": float(row[6] or 0),
|
||||
"high_pct": float(row[7] or 0),
|
||||
"legitimacy_confidence": min(100, round(
|
||||
(1 - float(row[3] or 0) / 100) * 40
|
||||
+ float(row[4] or 0) * 0.40
|
||||
+ min(int(row[1] or 0) / min_ips, 1) * 20
|
||||
)),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"note": "Ces JA4 sont candidats à une whitelist. Vérifier manuellement avant de whitelister.",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT — Corrélation JA4 × ASN / Pays (C5)
|
||||
# Détecte les JA4 fortement concentrés sur un seul ASN ou pays
|
||||
# → signal de botnet ciblé ou d'infrastructure de test/attaque partagée
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/asn-correlation")
|
||||
async def get_ja4_asn_correlation(
|
||||
min_concentration: float = Query(0.7, ge=0.0, le=1.0, description="Seuil min de concentration ASN ou pays"),
|
||||
min_ips: int = Query(5, ge=1, description="Nombre minimum d'IPs par JA4"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Identifie les JA4 fingerprints fortement concentrés sur un seul ASN ou pays.
|
||||
Un JA4 avec asn_concentration ≥ 0.7 signifie que ≥70% des IPs utilisant ce fingerprint
|
||||
proviennent du même ASN → infrastructure de bot partagée ou datacenter suspect.
|
||||
"""
|
||||
try:
|
||||
# Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN,
|
||||
# then aggregate per ja4 to compute concentration ratio
|
||||
sql = """
|
||||
SELECT
|
||||
ja4,
|
||||
sum(ips_per_combo) AS unique_ips,
|
||||
uniq(src_asn) AS unique_asns,
|
||||
uniq(src_country_code) AS unique_countries,
|
||||
toString(argMax(src_asn, ips_per_combo)) AS top_asn_number,
|
||||
argMax(asn_name, ips_per_combo) AS top_asn_name,
|
||||
argMax(src_country_code, country_ips) AS dominant_country,
|
||||
sum(total_hits) AS total_hits,
|
||||
round(max(ips_per_combo) / greatest(sum(ips_per_combo), 1), 3) AS asn_concentration,
|
||||
round(max(country_ips) / greatest(sum(ips_per_combo), 1), 3) AS country_concentration
|
||||
FROM (
|
||||
SELECT
|
||||
ja4,
|
||||
src_asn,
|
||||
src_country_code,
|
||||
any(src_as_name) AS asn_name,
|
||||
uniq(src_ip) AS ips_per_combo,
|
||||
uniq(src_ip) AS country_ips,
|
||||
sum(hits) AS total_hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
AND ja4 != ''
|
||||
GROUP BY ja4, src_asn, src_country_code
|
||||
)
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
AND (asn_concentration >= %(min_conc)s OR country_concentration >= %(min_conc)s)
|
||||
ORDER BY asn_concentration DESC, unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"min_ips": min_ips, "min_conc": min_concentration, "limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
unique_asns = int(row[2])
|
||||
unique_countries = int(row[3])
|
||||
top_asn_number = str(row[4] or "")
|
||||
top_asn_name = str(row[5] or "")
|
||||
dominant_country = str(row[6] or "")
|
||||
total_hits = int(row[7] or 0)
|
||||
asn_concentration = float(row[8] or 0)
|
||||
country_concentration = float(row[9] or 0)
|
||||
|
||||
if asn_concentration >= 0.85:
|
||||
corr_type, risk = "asn_monopoly", "high"
|
||||
elif asn_concentration >= min_concentration:
|
||||
corr_type, risk = "asn_dominant", "medium"
|
||||
elif country_concentration >= min_concentration:
|
||||
corr_type, risk = "geo_targeted", "medium"
|
||||
else:
|
||||
corr_type, risk = "distributed", "low"
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"unique_asns": unique_asns,
|
||||
"unique_countries": unique_countries,
|
||||
"top_asn_name": top_asn_name,
|
||||
"top_asn_number": top_asn_number,
|
||||
"dominant_country": dominant_country,
|
||||
"total_hits": total_hits,
|
||||
"asn_concentration": asn_concentration,
|
||||
"country_concentration":country_concentration,
|
||||
"correlation_type": corr_type,
|
||||
"risk": risk,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
101
services/dashboard/backend/routes/header_fingerprint.py
Normal file
101
services/dashboard/backend/routes/header_fingerprint.py
Normal file
@ -0,0 +1,101 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des empreintes d'en-têtes HTTP
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
|
||||
|
||||
|
||||
@router.get("/clusters")
|
||||
async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
|
||||
"""Clusters d'empreintes d'en-têtes groupés par header_order_hash."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
header_order_hash AS hash,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
sum(ua_ch_mismatch) AS ua_ch_mismatch_count,
|
||||
round(sum(ua_ch_mismatch) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes,
|
||||
round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct,
|
||||
round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
GROUP BY header_order_hash
|
||||
ORDER BY unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
|
||||
total_sql = """
|
||||
SELECT uniq(header_order_hash)
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
"""
|
||||
total_clusters = int(db.query(total_sql).result_rows[0][0])
|
||||
|
||||
clusters = []
|
||||
for row in result.result_rows:
|
||||
h = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
avg_browser_score = float(row[2] or 0)
|
||||
ua_ch_mismatch_cnt = int(row[3])
|
||||
ua_ch_mismatch_pct = float(row[4] or 0)
|
||||
top_modes = list(set(str(m) for m in (row[5] or [])))
|
||||
has_cookie_pct = float(row[6] or 0)
|
||||
has_referer_pct = float(row[7] or 0)
|
||||
|
||||
if avg_browser_score >= 90 and ua_ch_mismatch_pct < 5:
|
||||
classification = "legitimate"
|
||||
elif ua_ch_mismatch_pct > 50:
|
||||
classification = "bot_suspicious"
|
||||
else:
|
||||
classification = "mixed"
|
||||
|
||||
clusters.append({
|
||||
"hash": h,
|
||||
"unique_ips": unique_ips,
|
||||
"avg_browser_score": round(avg_browser_score, 2),
|
||||
"ua_ch_mismatch_count":ua_ch_mismatch_cnt,
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"top_sec_fetch_modes": top_modes,
|
||||
"has_cookie_pct": has_cookie_pct,
|
||||
"has_referer_pct": has_referer_pct,
|
||||
"classification": classification,
|
||||
})
|
||||
return {"clusters": clusters, "total_clusters": total_clusters}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/cluster/{hash}/ips")
|
||||
async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)):
|
||||
"""Liste des IPs appartenant à un cluster d'en-têtes donné."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(modern_browser_score) AS browser_score,
|
||||
any(ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
any(sec_fetch_mode) AS sec_fetch_mode,
|
||||
any(sec_fetch_dest) AS sec_fetch_dest
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
WHERE header_order_hash = %(hash)s
|
||||
GROUP BY src_ip
|
||||
ORDER BY browser_score DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"hash": hash, "limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"browser_score": int(row[1] or 0),
|
||||
"ua_ch_mismatch": int(row[2] or 0),
|
||||
"sec_fetch_mode": str(row[3] or ""),
|
||||
"sec_fetch_dest": str(row[4] or ""),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
144
services/dashboard/backend/routes/heatmap.py
Normal file
144
services/dashboard/backend/routes/heatmap.py
Normal file
@ -0,0 +1,144 @@
|
||||
"""
|
||||
Endpoints pour la heatmap temporelle (hits par heure / hôte)
|
||||
"""
|
||||
from collections import defaultdict
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/heatmap", tags=["heatmap"])
|
||||
|
||||
|
||||
@router.get("/hourly")
|
||||
async def get_heatmap_hourly():
|
||||
"""Hits agrégés par heure sur les 72 dernières heures."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
max(max_requests_per_sec) AS max_rps
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
hours = [
|
||||
{
|
||||
"hour": int(row[0]),
|
||||
"hits": int(row[1]),
|
||||
"unique_ips": int(row[2]),
|
||||
"max_rps": int(row[3]),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
return {"hours": hours}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/top-hosts")
|
||||
async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
|
||||
"""Hôtes les plus ciblés avec répartition horaire sur 24h."""
|
||||
try:
|
||||
# Aggregate overall stats per host
|
||||
agg_sql = """
|
||||
SELECT
|
||||
host,
|
||||
sum(hits) AS total_hits,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
uniq(ja4) AS unique_ja4s
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY host
|
||||
ORDER BY total_hits DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
agg_res = db.query(agg_sql, {"limit": limit})
|
||||
top_hosts = [str(r[0]) for r in agg_res.result_rows]
|
||||
host_stats = {
|
||||
str(r[0]): {
|
||||
"host": str(r[0]),
|
||||
"total_hits": int(r[1]),
|
||||
"unique_ips": int(r[2]),
|
||||
"unique_ja4s":int(r[3]),
|
||||
}
|
||||
for r in agg_res.result_rows
|
||||
}
|
||||
|
||||
if not top_hosts:
|
||||
return {"items": []}
|
||||
|
||||
# Hourly breakdown per host
|
||||
hourly_sql = """
|
||||
SELECT
|
||||
host,
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
AND host IN %(hosts)s
|
||||
GROUP BY host, hour
|
||||
"""
|
||||
hourly_res = db.query(hourly_sql, {"hosts": top_hosts})
|
||||
|
||||
hourly_map: dict = defaultdict(lambda: [0] * 24)
|
||||
for row in hourly_res.result_rows:
|
||||
h = str(row[0])
|
||||
hour = int(row[1])
|
||||
hits = int(row[2])
|
||||
hourly_map[h][hour] += hits
|
||||
|
||||
items = []
|
||||
for host in top_hosts:
|
||||
entry = dict(host_stats[host])
|
||||
entry["hourly_hits"] = hourly_map[host]
|
||||
items.append(entry)
|
||||
|
||||
return {"items": items}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/matrix")
|
||||
async def get_heatmap_matrix():
|
||||
"""Matrice top-15 hôtes × 24 heures (sum hits) sur les 72 dernières heures."""
|
||||
try:
|
||||
top_sql = """
|
||||
SELECT host, sum(hits) AS total_hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY host
|
||||
ORDER BY total_hits DESC
|
||||
"""
|
||||
top_res = db.query(top_sql)
|
||||
top_hosts = [str(r[0]) for r in top_res.result_rows]
|
||||
|
||||
if not top_hosts:
|
||||
return {"hosts": [], "matrix": []}
|
||||
|
||||
cell_sql = """
|
||||
SELECT
|
||||
host,
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
AND host IN %(hosts)s
|
||||
GROUP BY host, hour
|
||||
"""
|
||||
cell_res = db.query(cell_sql, {"hosts": top_hosts})
|
||||
|
||||
matrix_map: dict = defaultdict(lambda: [0] * 24)
|
||||
for row in cell_res.result_rows:
|
||||
h = str(row[0])
|
||||
hour = int(row[1])
|
||||
hits = int(row[2])
|
||||
matrix_map[h][hour] += hits
|
||||
|
||||
matrix = [matrix_map[h] for h in top_hosts]
|
||||
return {"hosts": top_hosts, "matrix": matrix}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
266
services/dashboard/backend/routes/incidents.py
Normal file
266
services/dashboard/backend/routes/incidents.py
Normal file
@ -0,0 +1,266 @@
|
||||
"""
|
||||
Routes pour la gestion des incidents clusterisés
|
||||
"""
|
||||
import hashlib
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/incidents", tags=["incidents"])
|
||||
|
||||
|
||||
@router.get("/clusters")
|
||||
async def get_incident_clusters(
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle en heures"),
|
||||
min_severity: str = Query("LOW", description="Niveau de sévérité minimum"),
|
||||
limit: int = Query(20, ge=1, le=100, description="Nombre maximum de clusters")
|
||||
):
|
||||
"""
|
||||
Récupère les incidents clusterisés automatiquement
|
||||
|
||||
Les clusters sont formés par:
|
||||
- Subnet /24
|
||||
- JA4 fingerprint
|
||||
- Pattern temporel
|
||||
"""
|
||||
try:
|
||||
# Cluster par subnet /24 avec une IP exemple
|
||||
# Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x
|
||||
# toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x
|
||||
cluster_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_groups AS (
|
||||
SELECT
|
||||
concat(
|
||||
splitByChar('.', clean_ip)[1], '.',
|
||||
splitByChar('.', clean_ip)[2], '.',
|
||||
splitByChar('.', clean_ip)[3], '.0/24'
|
||||
) AS subnet,
|
||||
count() AS total_detections,
|
||||
uniq(clean_ip) AS unique_ips,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen,
|
||||
argMax(ja4, detected_at) AS ja4,
|
||||
argMax(country_code, detected_at) AS country_code,
|
||||
argMax(asn_number, detected_at) AS asn_number,
|
||||
argMax(threat_level, detected_at) AS threat_level,
|
||||
avg(anomaly_score) AS avg_score,
|
||||
argMax(clean_ip, detected_at) AS sample_ip
|
||||
FROM cleaned_ips
|
||||
GROUP BY subnet
|
||||
HAVING total_detections >= 2
|
||||
)
|
||||
SELECT
|
||||
subnet,
|
||||
total_detections,
|
||||
unique_ips,
|
||||
first_seen,
|
||||
last_seen,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
avg_score,
|
||||
sample_ip
|
||||
FROM subnet_groups
|
||||
ORDER BY avg_score ASC, total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(cluster_query, {"hours": hours, "limit": limit})
|
||||
|
||||
# Collect sample IPs to fetch real UA and trend data in bulk
|
||||
sample_ips = [row[10] for row in result.result_rows if row[10]]
|
||||
|
||||
# Fetch real primary UA per sample IP from view_dashboard_entities
|
||||
ua_by_ip: dict = {}
|
||||
if sample_ips:
|
||||
ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50])
|
||||
ua_query = f"""
|
||||
SELECT entity_value, arrayElement(user_agents, 1) AS top_ua
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND entity_value IN ({ip_list_sql})
|
||||
AND notEmpty(user_agents)
|
||||
GROUP BY entity_value, top_ua
|
||||
ORDER BY entity_value
|
||||
"""
|
||||
try:
|
||||
ua_result = db.query(ua_query)
|
||||
for ua_row in ua_result.result_rows:
|
||||
if ua_row[0] not in ua_by_ip and ua_row[1]:
|
||||
ua_by_ip[str(ua_row[0])] = str(ua_row[1])
|
||||
except Exception:
|
||||
pass # UA enrichment is best-effort
|
||||
|
||||
# Compute real trend: compare current window vs previous window of same duration
|
||||
trend_query = """
|
||||
WITH cleaned AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
concat(
|
||||
splitByChar('.', clean_ip)[1], '.',
|
||||
splitByChar('.', clean_ip)[2], '.',
|
||||
splitByChar('.', clean_ip)[3], '.0/24'
|
||||
) AS subnet
|
||||
FROM ml_detected_anomalies
|
||||
),
|
||||
current_window AS (
|
||||
SELECT subnet, count() AS cnt
|
||||
FROM cleaned
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY subnet
|
||||
),
|
||||
prev_window AS (
|
||||
SELECT subnet, count() AS cnt
|
||||
FROM cleaned
|
||||
WHERE detected_at >= now() - INTERVAL %(hours2)s HOUR
|
||||
AND detected_at < now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY subnet
|
||||
)
|
||||
SELECT c.subnet, c.cnt AS current_cnt, p.cnt AS prev_cnt
|
||||
FROM current_window c
|
||||
LEFT JOIN prev_window p ON c.subnet = p.subnet
|
||||
"""
|
||||
trend_by_subnet: dict = {}
|
||||
try:
|
||||
trend_result = db.query(trend_query, {"hours": hours, "hours2": hours * 2})
|
||||
for tr in trend_result.result_rows:
|
||||
subnet_key = tr[0]
|
||||
curr = tr[1] or 0
|
||||
prev = tr[2] or 0
|
||||
if prev == 0:
|
||||
trend_by_subnet[subnet_key] = ("new", 100)
|
||||
else:
|
||||
pct = round(((curr - prev) / prev) * 100)
|
||||
trend_by_subnet[subnet_key] = ("up" if pct >= 0 else "down", abs(pct))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
clusters = []
|
||||
for row in result.result_rows:
|
||||
subnet = row[0]
|
||||
threat_level = row[8] or 'LOW'
|
||||
unique_ips = row[2] or 1
|
||||
avg_score = abs(row[9] or 0)
|
||||
sample_ip = row[10] if row[10] else subnet.split('/')[0]
|
||||
|
||||
critical_count = 1 if threat_level == 'CRITICAL' else 0
|
||||
high_count = 1 if threat_level == 'HIGH' else 0
|
||||
|
||||
risk_score = min(100, round(
|
||||
(critical_count * 30) +
|
||||
(high_count * 20) +
|
||||
(unique_ips * 5) +
|
||||
(avg_score * 100)
|
||||
))
|
||||
|
||||
if critical_count > 0 or risk_score >= 80:
|
||||
severity = "CRITICAL"
|
||||
elif high_count > (row[1] or 1) * 0.3 or risk_score >= 60:
|
||||
severity = "HIGH"
|
||||
elif high_count > 0 or risk_score >= 40:
|
||||
severity = "MEDIUM"
|
||||
else:
|
||||
severity = "LOW"
|
||||
|
||||
trend_dir, trend_pct = trend_by_subnet.get(subnet, ("stable", 0))
|
||||
primary_ua = ua_by_ip.get(sample_ip, "")
|
||||
|
||||
clusters.append({
|
||||
"id": f"INC-{hashlib.md5(subnet.encode()).hexdigest()[:8].upper()}",
|
||||
"score": risk_score,
|
||||
"severity": severity,
|
||||
"total_detections": row[1],
|
||||
"unique_ips": row[2],
|
||||
"subnet": subnet,
|
||||
"sample_ip": sample_ip,
|
||||
"ja4": row[5] or "",
|
||||
"primary_ua": primary_ua,
|
||||
"primary_target": row[3].strftime('%H:%M') if row[3] else "Unknown",
|
||||
"countries": [{"code": row[6] or "XX", "percentage": 100}],
|
||||
"asn": str(row[7]) if row[7] else "",
|
||||
"first_seen": row[3].isoformat() if row[3] else "",
|
||||
"last_seen": row[4].isoformat() if row[4] else "",
|
||||
"trend": trend_dir,
|
||||
"trend_percentage": trend_pct,
|
||||
})
|
||||
|
||||
return {
|
||||
"items": clusters,
|
||||
"total": len(clusters),
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{cluster_id}")
|
||||
async def get_incident_details(cluster_id: str):
|
||||
"""
|
||||
Récupère les détails d'un incident spécifique.
|
||||
Non encore implémenté — les détails par cluster seront disponibles dans une prochaine version.
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Détails par incident non encore implémentés. Utilisez /api/incidents/clusters pour la liste."
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{cluster_id}/classify")
|
||||
async def classify_incident(
|
||||
cluster_id: str,
|
||||
label: str,
|
||||
tags: List[str] = None,
|
||||
comment: str = ""
|
||||
):
|
||||
"""
|
||||
Classe un incident rapidement.
|
||||
Non encore implémenté — utilisez /api/analysis/{ip}/classify pour classifier une IP.
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Classification par incident non encore implémentée. Utilisez /api/analysis/{ip}/classify."
|
||||
)
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_incidents(
|
||||
status: str = Query("active", description="Statut des incidents"),
|
||||
severity: Optional[str] = Query(None, description="Filtrer par sévérité (LOW/MEDIUM/HIGH/CRITICAL)"),
|
||||
hours: int = Query(24, ge=1, le=168)
|
||||
):
|
||||
"""
|
||||
Liste tous les incidents avec filtres.
|
||||
Délègue à get_incident_clusters ; le filtre severity est appliqué post-requête.
|
||||
"""
|
||||
try:
|
||||
result = await get_incident_clusters(hours=hours, limit=100)
|
||||
items = result["items"]
|
||||
|
||||
if severity:
|
||||
sev_upper = severity.upper()
|
||||
items = [c for c in items if c.get("severity") == sev_upper]
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
185
services/dashboard/backend/routes/investigation_summary.py
Normal file
185
services/dashboard/backend/routes/investigation_summary.py
Normal file
@ -0,0 +1,185 @@
|
||||
"""
|
||||
Endpoint d'investigation enrichie pour une IP donnée.
|
||||
Agrège en une seule requête les données provenant de toutes les sources :
|
||||
ml_detected_anomalies, view_form_bruteforce_detected, view_tcp_spoofing_detected,
|
||||
agg_host_ip_ja4_1h (rotation JA4), view_ip_recurrence, view_ai_features_1h.
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from ..database import db
|
||||
from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua
|
||||
|
||||
router = APIRouter(prefix="/api/investigation", tags=["investigation"])
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{ip}/summary",
|
||||
summary="Synthèse complète d'une IP",
|
||||
response_description="Score de risque 0-100, détections ML, brute-force, spoofing TCP, rotation JA4, persistance et timeline 24h",
|
||||
)
|
||||
async def get_ip_full_summary(ip: str):
|
||||
"""
|
||||
Synthèse complète pour une IP : toutes les sources en un appel.
|
||||
Normalise l'IP (accepte ::ffff:x.x.x.x ou x.x.x.x).
|
||||
"""
|
||||
clean_ip = ip.replace("::ffff:", "").strip()
|
||||
try:
|
||||
# ── 1. Score ML / features ─────────────────────────────────────────────
|
||||
ml_sql = """
|
||||
SELECT
|
||||
max(abs(anomaly_score)) AS max_score,
|
||||
any(threat_level) AS threat_level,
|
||||
any(bot_name) AS bot_name,
|
||||
count() AS total_detections,
|
||||
uniq(host) AS distinct_hosts,
|
||||
uniq(ja4) AS distinct_ja4
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
"""
|
||||
ml_res = db.query(ml_sql, {"ip": clean_ip})
|
||||
ml_row = ml_res.result_rows[0] if ml_res.result_rows else None
|
||||
ml_data = {
|
||||
"max_score": round(float(ml_row[0] or 0), 2) if ml_row else 0,
|
||||
"threat_level": str(ml_row[1] or "") if ml_row else "",
|
||||
"attack_type": str(ml_row[2] or "") if ml_row else "",
|
||||
"total_detections": int(ml_row[3] or 0) if ml_row else 0,
|
||||
"distinct_hosts": int(ml_row[4] or 0) if ml_row else 0,
|
||||
"distinct_ja4": int(ml_row[5] or 0) if ml_row else 0,
|
||||
}
|
||||
|
||||
# ── 2. Brute force ─────────────────────────────────────────────────────
|
||||
bf_sql = """
|
||||
SELECT
|
||||
uniq(host) AS hosts_attacked,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
groupArray(3)(host) AS top_hosts
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
"""
|
||||
bf_res = db.query(bf_sql, {"ip": clean_ip})
|
||||
bf_row = bf_res.result_rows[0] if bf_res.result_rows else None
|
||||
bf_data = {
|
||||
"active": bool(bf_row and int(bf_row[1] or 0) > 0),
|
||||
"hosts_attacked": int(bf_row[0] or 0) if bf_row else 0,
|
||||
"total_hits": int(bf_row[1] or 0) if bf_row else 0,
|
||||
"total_params": int(bf_row[2] or 0) if bf_row else 0,
|
||||
"top_hosts": [str(h) for h in (bf_row[3] or [])] if bf_row else [],
|
||||
}
|
||||
|
||||
# ── 3. TCP spoofing — fingerprinting multi-signal ─────────────────────
|
||||
tcp_sql = """
|
||||
SELECT
|
||||
any(tcp_ttl_raw) AS ttl,
|
||||
any(tcp_win_raw) AS win,
|
||||
any(tcp_scale_raw) AS scale,
|
||||
any(tcp_mss_raw) AS mss,
|
||||
any(first_ua) AS ua
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
AND tcp_ttl_raw > 0
|
||||
LIMIT 1
|
||||
"""
|
||||
tcp_res = db.query(tcp_sql, {"ip": clean_ip})
|
||||
tcp_data = {"detected": False, "tcp_ttl": None, "suspected_os": None}
|
||||
if tcp_res.result_rows:
|
||||
r = tcp_res.result_rows[0]
|
||||
ttl = int(r[0] or 0)
|
||||
win = int(r[1] or 0)
|
||||
scale = int(r[2] or 0)
|
||||
mss = int(r[3] or 0)
|
||||
ua = str(r[4] or "")
|
||||
fp = fingerprint_os(ttl, win, scale, mss)
|
||||
dec_os = declared_os_from_ua(ua)
|
||||
spoof_res = detect_spoof(fp, dec_os)
|
||||
tcp_data = {
|
||||
"detected": spoof_res.is_spoof,
|
||||
"tcp_ttl": ttl,
|
||||
"tcp_mss": mss,
|
||||
"tcp_win_scale": scale,
|
||||
"initial_ttl": fp.initial_ttl,
|
||||
"hop_count": fp.hop_count,
|
||||
"suspected_os": fp.os_name,
|
||||
"declared_os": dec_os,
|
||||
"confidence": fp.confidence,
|
||||
"network_path": fp.network_path,
|
||||
"is_bot_tool": fp.is_bot_tool,
|
||||
"spoof_reason": spoof_res.reason,
|
||||
}
|
||||
|
||||
# ── 4. JA4 rotation ────────────────────────────────────────────────────
|
||||
rot_sql = """
|
||||
SELECT distinct_ja4_count, total_hits
|
||||
FROM mabase_prod.view_host_ip_ja4_rotation
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
LIMIT 1
|
||||
"""
|
||||
rot_res = db.query(rot_sql, {"ip": clean_ip})
|
||||
rot_data = {"rotating": False, "distinct_ja4_count": 0}
|
||||
if rot_res.result_rows:
|
||||
row = rot_res.result_rows[0]
|
||||
cnt = int(row[0] or 0)
|
||||
rot_data = {"rotating": cnt > 1, "distinct_ja4_count": cnt, "total_hits": int(row[1] or 0)}
|
||||
|
||||
# ── 5. Persistance ─────────────────────────────────────────────────────
|
||||
pers_sql = """
|
||||
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
|
||||
FROM mabase_prod.view_ip_recurrence
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
LIMIT 1
|
||||
"""
|
||||
pers_res = db.query(pers_sql, {"ip": clean_ip})
|
||||
pers_data = {"persistent": False, "recurrence": 0}
|
||||
if pers_res.result_rows:
|
||||
row = pers_res.result_rows[0]
|
||||
pers_data = {
|
||||
"persistent": True,
|
||||
"recurrence": int(row[0] or 0),
|
||||
"worst_score": round(float(row[1] or 0), 2),
|
||||
"worst_threat_level":str(row[2] or ""),
|
||||
"first_seen": str(row[3]),
|
||||
"last_seen": str(row[4]),
|
||||
}
|
||||
|
||||
# ── 6. Timeline 24h ────────────────────────────────────────────────────
|
||||
tl_sql = """
|
||||
SELECT
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits,
|
||||
groupUniqArray(3)(ja4) AS ja4s
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
tl_res = db.query(tl_sql, {"ip": clean_ip})
|
||||
timeline = [
|
||||
{"hour": int(r[0]), "hits": int(r[1]), "ja4s": [str(j) for j in (r[2] or [])]}
|
||||
for r in tl_res.result_rows
|
||||
]
|
||||
|
||||
# ── Global risk score (heuristic) ──────────────────────────────────────
|
||||
risk = 0
|
||||
risk += min(50, ml_data["max_score"] * 50)
|
||||
if bf_data["active"]: risk += 20
|
||||
if tcp_data["detected"]:
|
||||
if tcp_data.get("is_bot_tool"): risk += 30 # outil de scan connu
|
||||
else: risk += 15 # spoof OS
|
||||
if rot_data["rotating"]: risk += min(15, rot_data["distinct_ja4_count"] * 3)
|
||||
if pers_data["persistent"]: risk += min(10, pers_data["recurrence"] * 2)
|
||||
risk = min(100, round(risk))
|
||||
|
||||
return {
|
||||
"ip": clean_ip,
|
||||
"risk_score": risk,
|
||||
"ml": ml_data,
|
||||
"bruteforce": bf_data,
|
||||
"tcp_spoofing":tcp_data,
|
||||
"ja4_rotation":rot_data,
|
||||
"persistence": pers_data,
|
||||
"timeline_24h":timeline,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
175
services/dashboard/backend/routes/metrics.py
Normal file
175
services/dashboard/backend/routes/metrics.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""
|
||||
Endpoints pour les métriques du dashboard
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from ..database import db
|
||||
from ..models import MetricsResponse, MetricsSummary, TimeSeriesPoint
|
||||
|
||||
router = APIRouter(prefix="/api/metrics", tags=["metrics"])
|
||||
|
||||
|
||||
@router.get("", response_model=MetricsResponse, summary="Métriques globales du dashboard")
|
||||
async def get_metrics():
|
||||
"""
|
||||
Récupère les métriques globales du dashboard
|
||||
"""
|
||||
try:
|
||||
# Résumé des métriques
|
||||
summary_query = """
|
||||
SELECT
|
||||
count() AS total_detections,
|
||||
countIf(threat_level = 'CRITICAL') AS critical_count,
|
||||
countIf(threat_level = 'HIGH') AS high_count,
|
||||
countIf(threat_level = 'MEDIUM') AS medium_count,
|
||||
countIf(threat_level = 'LOW') AS low_count,
|
||||
countIf(bot_name != '') AS known_bots_count,
|
||||
countIf(bot_name = '') AS anomalies_count,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
summary_result = db.query(summary_query)
|
||||
summary_row = summary_result.result_rows[0] if summary_result.result_rows else None
|
||||
|
||||
if not summary_row:
|
||||
raise HTTPException(status_code=404, detail="Aucune donnée disponible")
|
||||
|
||||
summary = MetricsSummary(
|
||||
total_detections=summary_row[0],
|
||||
critical_count=summary_row[1],
|
||||
high_count=summary_row[2],
|
||||
medium_count=summary_row[3],
|
||||
low_count=summary_row[4],
|
||||
known_bots_count=summary_row[5],
|
||||
anomalies_count=summary_row[6],
|
||||
unique_ips=summary_row[7]
|
||||
)
|
||||
|
||||
# Série temporelle (par heure)
|
||||
timeseries_query = """
|
||||
SELECT
|
||||
toStartOfHour(detected_at) AS hour,
|
||||
count() AS total,
|
||||
countIf(threat_level = 'CRITICAL') AS critical,
|
||||
countIf(threat_level = 'HIGH') AS high,
|
||||
countIf(threat_level = 'MEDIUM') AS medium,
|
||||
countIf(threat_level = 'LOW') AS low
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour
|
||||
"""
|
||||
|
||||
timeseries_result = db.query(timeseries_query)
|
||||
timeseries = [
|
||||
TimeSeriesPoint(
|
||||
hour=row[0],
|
||||
total=row[1],
|
||||
critical=row[2],
|
||||
high=row[3],
|
||||
medium=row[4],
|
||||
low=row[5]
|
||||
)
|
||||
for row in timeseries_result.result_rows
|
||||
]
|
||||
|
||||
# Distribution par menace
|
||||
threat_distribution = {
|
||||
"CRITICAL": summary.critical_count,
|
||||
"HIGH": summary.high_count,
|
||||
"MEDIUM": summary.medium_count,
|
||||
"LOW": summary.low_count
|
||||
}
|
||||
|
||||
return MetricsResponse(
|
||||
summary=summary,
|
||||
timeseries=timeseries,
|
||||
threat_distribution=threat_distribution
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur lors de la récupération des métriques: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/threats")
|
||||
async def get_threat_distribution():
|
||||
"""
|
||||
Récupère la répartition par niveau de menace
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
threat_level,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY threat_level
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
result = db.query(query)
|
||||
|
||||
return {
|
||||
"items": [
|
||||
{"threat_level": row[0], "count": row[1], "percentage": row[2]}
|
||||
for row in result.result_rows
|
||||
]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/baseline")
|
||||
async def get_metrics_baseline():
|
||||
"""
|
||||
Compare les métriques actuelles (24h) vs hier (24h-48h) pour afficher les tendances.
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
countIf(detected_at >= now() - INTERVAL 24 HOUR) AS today_total,
|
||||
countIf(detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_total,
|
||||
uniqIf(src_ip, detected_at >= now() - INTERVAL 24 HOUR) AS today_ips,
|
||||
uniqIf(src_ip, detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_ips,
|
||||
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 24 HOUR) AS today_critical,
|
||||
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_critical
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 48 HOUR
|
||||
"""
|
||||
r = db.query(query)
|
||||
row = r.result_rows[0] if r.result_rows else None
|
||||
|
||||
def pct_change(today: int, yesterday: int) -> float:
|
||||
if yesterday == 0:
|
||||
return 100.0 if today > 0 else 0.0
|
||||
return round((today - yesterday) / yesterday * 100, 1)
|
||||
|
||||
today_total = int(row[0] or 0) if row else 0
|
||||
yesterday_total = int(row[1] or 0) if row else 0
|
||||
today_ips = int(row[2] or 0) if row else 0
|
||||
yesterday_ips = int(row[3] or 0) if row else 0
|
||||
today_crit = int(row[4] or 0) if row else 0
|
||||
yesterday_crit = int(row[5] or 0) if row else 0
|
||||
|
||||
return {
|
||||
"total_detections": {
|
||||
"today": today_total,
|
||||
"yesterday": yesterday_total,
|
||||
"pct_change": pct_change(today_total, yesterday_total),
|
||||
},
|
||||
"unique_ips": {
|
||||
"today": today_ips,
|
||||
"yesterday": yesterday_ips,
|
||||
"pct_change": pct_change(today_ips, yesterday_ips),
|
||||
},
|
||||
"critical_alerts": {
|
||||
"today": today_crit,
|
||||
"yesterday": yesterday_crit,
|
||||
"pct_change": pct_change(today_crit, yesterday_crit),
|
||||
},
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur baseline: {str(e)}")
|
||||
425
services/dashboard/backend/routes/ml_features.py
Normal file
425
services/dashboard/backend/routes/ml_features.py
Normal file
@ -0,0 +1,425 @@
|
||||
"""
|
||||
Endpoints pour les features ML / IA (scores d'anomalies, radar, scatter)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/ml", tags=["ml_features"])
|
||||
|
||||
|
||||
def _attack_type(fuzzing_index: float, hit_velocity: float,
|
||||
is_fake_nav: int, ua_ch_mismatch: int) -> str:
|
||||
if fuzzing_index > 50:
|
||||
return "brute_force"
|
||||
if hit_velocity > 1.0:
|
||||
return "flood"
|
||||
if is_fake_nav:
|
||||
return "scraper"
|
||||
if ua_ch_mismatch:
|
||||
return "spoofing"
|
||||
return "scanner"
|
||||
|
||||
|
||||
@router.get("/top-anomalies")
|
||||
async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
|
||||
"""Top IPs anomales (24h) — bypass view_ai_features_1h pour éviter les window functions.
|
||||
Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip,
|
||||
any(a.ja4) AS ja4,
|
||||
any(a.host) AS host,
|
||||
sum(a.hits) AS hits,
|
||||
round(uniqMerge(a.uniq_query_params)
|
||||
/ greatest(uniqMerge(a.uniq_paths), 1), 4) AS fuzzing_index,
|
||||
round(sum(a.hits)
|
||||
/ greatest(dateDiff('second', min(a.first_seen), max(a.last_seen)), 1), 2) AS hit_velocity,
|
||||
round(sum(a.count_head) / greatest(sum(a.hits), 1), 4) AS head_ratio,
|
||||
round(sum(a.count_no_sec_fetch) / greatest(sum(a.hits), 1), 4) AS sec_fetch_absence,
|
||||
round(sum(a.tls12_count) / greatest(sum(a.hits), 1), 4) AS tls12_ratio,
|
||||
round(sum(a.count_generic_accept) / greatest(sum(a.hits), 1), 4) AS generic_accept_ratio,
|
||||
any(a.src_country_code) AS country,
|
||||
any(a.src_as_name) AS asn_name,
|
||||
max(h.ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
max(h.modern_browser_score) AS browser_score,
|
||||
dictGetOrDefault('mabase_prod.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
|
||||
coalesce(
|
||||
nullIf(dictGetOrDefault('mabase_prod.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
|
||||
''
|
||||
) AS bot_name
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h a
|
||||
LEFT JOIN mabase_prod.agg_header_fingerprint_1h h
|
||||
ON a.src_ip = h.src_ip AND a.window_start = h.window_start
|
||||
WHERE a.window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY a.src_ip
|
||||
ORDER BY fuzzing_index DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
fuzzing = float(row[4] or 0)
|
||||
velocity = float(row[5] or 0)
|
||||
ua_mm = int(row[12] or 0)
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4": str(row[1]),
|
||||
"host": str(row[2]),
|
||||
"hits": int(row[3] or 0),
|
||||
"fuzzing_index": fuzzing,
|
||||
"hit_velocity": velocity,
|
||||
"head_ratio": float(row[6] or 0),
|
||||
"sec_fetch_absence": float(row[7] or 0),
|
||||
"tls12_ratio": float(row[8] or 0),
|
||||
"generic_accept_ratio": float(row[9] or 0),
|
||||
"country": str(row[10] or ""),
|
||||
"asn_name": str(row[11] or ""),
|
||||
"ua_ch_mismatch": ua_mm,
|
||||
"browser_score": int(row[13] or 0),
|
||||
"asn_label": str(row[14] or ""),
|
||||
"bot_name": str(row[15] or ""),
|
||||
"attack_type": _attack_type(fuzzing, velocity, 0, ua_mm),
|
||||
})
|
||||
return {"items": items}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ip/{ip}/radar")
|
||||
async def get_ip_radar(ip: str):
|
||||
"""Scores radar pour une IP spécifique (8 dimensions d'anomalie)."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
avg(fuzzing_index) AS fuzzing_index,
|
||||
avg(hit_velocity) AS hit_velocity,
|
||||
avg(is_fake_navigation) AS is_fake_navigation,
|
||||
avg(ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
avg(sni_host_mismatch) AS sni_host_mismatch,
|
||||
avg(orphan_ratio) AS orphan_ratio,
|
||||
avg(path_diversity_ratio) AS path_diversity_ratio,
|
||||
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
|
||||
FROM mabase_prod.view_ai_features_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
result = db.query(sql, {"ip": ip})
|
||||
if not result.result_rows:
|
||||
raise HTTPException(status_code=404, detail="IP not found")
|
||||
row = result.result_rows[0]
|
||||
|
||||
def _f(v) -> float:
|
||||
return float(v or 0)
|
||||
|
||||
return {
|
||||
"ip": ip,
|
||||
"fuzzing_score": min(100.0, _f(row[0])),
|
||||
"velocity_score": min(100.0, _f(row[1]) * 100),
|
||||
"fake_nav_score": _f(row[2]) * 100,
|
||||
"ua_mismatch_score": _f(row[3]) * 100,
|
||||
"sni_mismatch_score": _f(row[4]) * 100,
|
||||
"orphan_score": min(100.0, _f(row[5]) * 100),
|
||||
"path_repetition_score": max(0.0, 100 - _f(row[6]) * 100),
|
||||
"payload_anomaly_score": min(100.0, _f(row[7]) * 100),
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/score-distribution")
|
||||
async def get_score_distribution():
|
||||
"""
|
||||
Distribution de TOUS les scores ML depuis ml_all_scores (3j).
|
||||
Single query avec conditional aggregates pour éviter le double scan.
|
||||
"""
|
||||
try:
|
||||
# Single scan — global totals + per-model breakdown via GROUPING SETS
|
||||
sql = """
|
||||
SELECT
|
||||
threat_level,
|
||||
model_name,
|
||||
count() AS total,
|
||||
round(avg(anomaly_score), 4) AS avg_score,
|
||||
round(min(anomaly_score), 4) AS min_score,
|
||||
countIf(threat_level = 'NORMAL') AS normal_count,
|
||||
countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count,
|
||||
countIf(threat_level = 'KNOWN_BOT') AS bot_count
|
||||
FROM mabase_prod.ml_all_scores
|
||||
WHERE detected_at >= now() - INTERVAL 3 DAY
|
||||
GROUP BY threat_level, model_name
|
||||
ORDER BY model_name, total DESC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
by_model: dict = {}
|
||||
grand_total = 0
|
||||
total_normal = total_anomaly = total_bot = 0
|
||||
for row in result.result_rows:
|
||||
level = str(row[0])
|
||||
model = str(row[1])
|
||||
total = int(row[2])
|
||||
grand_total += total
|
||||
total_normal += int(row[5] or 0)
|
||||
total_anomaly += int(row[6] or 0)
|
||||
total_bot += int(row[7] or 0)
|
||||
if model not in by_model:
|
||||
by_model[model] = []
|
||||
by_model[model].append({
|
||||
"threat_level": level,
|
||||
"total": total,
|
||||
"avg_score": float(row[3] or 0),
|
||||
"min_score": float(row[4] or 0),
|
||||
})
|
||||
|
||||
grand_total = max(grand_total, 1)
|
||||
return {
|
||||
"by_model": by_model,
|
||||
"totals": {
|
||||
"normal": total_normal,
|
||||
"anomaly": total_anomaly,
|
||||
"known_bot": total_bot,
|
||||
"grand_total": grand_total,
|
||||
"normal_pct": round(total_normal / grand_total * 100, 1),
|
||||
"anomaly_pct": round(total_anomaly / grand_total * 100, 1),
|
||||
"bot_pct": round(total_bot / grand_total * 100, 1),
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/score-trends")
|
||||
async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
|
||||
"""
|
||||
Évolution temporelle des scores ML depuis ml_all_scores.
|
||||
Retourne le score moyen et les counts par heure et par modèle.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
toStartOfHour(window_start) AS hour,
|
||||
model_name,
|
||||
countIf(threat_level = 'NORMAL') AS normal_count,
|
||||
countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count,
|
||||
countIf(threat_level = 'KNOWN_BOT') AS bot_count,
|
||||
round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score
|
||||
FROM mabase_prod.ml_all_scores
|
||||
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY hour, model_name
|
||||
ORDER BY hour ASC, model_name
|
||||
"""
|
||||
result = db.query(sql, {"hours": hours})
|
||||
points = []
|
||||
for row in result.result_rows:
|
||||
points.append({
|
||||
"hour": str(row[0]),
|
||||
"model": str(row[1]),
|
||||
"normal_count": int(row[2] or 0),
|
||||
"anomaly_count": int(row[3] or 0),
|
||||
"bot_count": int(row[4] or 0),
|
||||
"avg_anomaly_score": float(row[5] or 0),
|
||||
})
|
||||
return {"points": points, "hours": hours}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/b-features")
|
||||
async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
||||
"""
|
||||
Agrégation des B-features (HTTP pures) pour les top IPs anomales.
|
||||
Source: agg_host_ip_ja4_1h (SimpleAggregateFunction columns).
|
||||
Expose: head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio.
|
||||
Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT ip, ja4, country, asn_name, total_hits AS hits,
|
||||
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
|
||||
missing_accept_enc_ratio, http_scheme_ratio
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(ja4) AS ja4,
|
||||
any(src_country_code) AS country,
|
||||
any(src_as_name) AS asn_name,
|
||||
sum(hits) AS total_hits,
|
||||
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
|
||||
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
|
||||
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
|
||||
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
|
||||
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
|
||||
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
|
||||
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
|
||||
OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3
|
||||
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4": str(row[1] or ""),
|
||||
"country": str(row[2] or ""),
|
||||
"asn_name": str(row[3] or ""),
|
||||
"hits": int(row[4] or 0),
|
||||
"head_ratio": float(row[5] or 0),
|
||||
"sec_fetch_absence": float(row[6] or 0),
|
||||
"tls12_ratio": float(row[7] or 0),
|
||||
"generic_accept_ratio": float(row[8] or 0),
|
||||
"http10_ratio": float(row[9] or 0),
|
||||
"missing_accept_enc_ratio":float(row[10] or 0),
|
||||
"http_scheme_ratio": float(row[11] or 0),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/campaigns")
|
||||
async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Query(20, ge=1, le=100)):
|
||||
"""
|
||||
Groupes d'anomalies détectées par DBSCAN (campaign_id >= 0).
|
||||
Si aucune campagne active, fallback sur clustering par /24 subnet + JA4 commun.
|
||||
Utile pour détecter les botnets distribués sans état de campagne DBSCAN.
|
||||
"""
|
||||
try:
|
||||
# First: check real campaigns
|
||||
campaign_sql = """
|
||||
SELECT
|
||||
campaign_id,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
any(threat_level) AS dominant_threat,
|
||||
groupUniqArray(3)(threat_level) AS threat_levels,
|
||||
groupUniqArray(3)(bot_name) AS bot_names,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND campaign_id >= 0
|
||||
GROUP BY campaign_id
|
||||
ORDER BY total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(campaign_sql, {"hours": hours, "limit": limit})
|
||||
campaigns = []
|
||||
for row in result.result_rows:
|
||||
campaigns.append({
|
||||
"id": f"C{row[0]}",
|
||||
"campaign_id": int(row[0]),
|
||||
"total_detections": int(row[1]),
|
||||
"unique_ips": int(row[2]),
|
||||
"dominant_threat": str(row[3] or ""),
|
||||
"threat_levels": list(row[4] or []),
|
||||
"bot_names": list(row[5] or []),
|
||||
"first_seen": str(row[6]),
|
||||
"last_seen": str(row[7]),
|
||||
"source": "dbscan",
|
||||
})
|
||||
|
||||
# Fallback: subnet-based clustering when DBSCAN has no campaigns
|
||||
if not campaigns:
|
||||
subnet_sql = """
|
||||
SELECT
|
||||
IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
groupArray(3)(threat_level) AS threat_levels,
|
||||
any(bot_name) AS bot_name,
|
||||
any(ja4) AS sample_ja4,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND threat_level IN ('HIGH','CRITICAL','MEDIUM')
|
||||
GROUP BY subnet
|
||||
HAVING unique_ips >= 3
|
||||
ORDER BY total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result2 = db.query(subnet_sql, {"hours": hours, "limit": limit})
|
||||
for i, row in enumerate(result2.result_rows):
|
||||
subnet_str = str(row[0]) + "/24"
|
||||
campaigns.append({
|
||||
"id": f"S{i+1:03d}",
|
||||
"campaign_id": -1,
|
||||
"subnet": subnet_str,
|
||||
"total_detections": int(row[1]),
|
||||
"unique_ips": int(row[2]),
|
||||
"dominant_threat": str((row[3] or [""])[0]),
|
||||
"threat_levels": list(row[3] or []),
|
||||
"bot_names": [str(row[4] or "")],
|
||||
"sample_ja4": str(row[5] or ""),
|
||||
"first_seen": str(row[6]),
|
||||
"last_seen": str(row[7]),
|
||||
"source": "subnet_cluster",
|
||||
})
|
||||
|
||||
dbscan_active = any(c["campaign_id"] >= 0 for c in campaigns)
|
||||
return {
|
||||
"campaigns": campaigns,
|
||||
"total": len(campaigns),
|
||||
"dbscan_active": dbscan_active,
|
||||
"hours": hours,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/scatter")
|
||||
async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
|
||||
"""Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
ip,
|
||||
ja4,
|
||||
round(fuzzing_index, 4) AS fuzzing_index,
|
||||
round(total_hits / greatest(dateDiff('second', min_first, max_last), 1), 2) AS hit_velocity,
|
||||
total_hits AS hits,
|
||||
round(total_count_head / greatest(total_hits, 1), 4) AS head_ratio,
|
||||
correlated
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(ja4) AS ja4,
|
||||
uniqMerge(uniq_query_params) / greatest(uniqMerge(uniq_paths), 1) AS fuzzing_index,
|
||||
sum(hits) AS total_hits,
|
||||
min(first_seen) AS min_first,
|
||||
max(last_seen) AS max_last,
|
||||
sum(count_head) AS total_count_head,
|
||||
max(correlated_raw) AS correlated
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
ORDER BY fuzzing_index DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
points = []
|
||||
for row in result.result_rows:
|
||||
fuzzing = float(row[2] or 0)
|
||||
velocity = float(row[3] or 0)
|
||||
points.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4": str(row[1]),
|
||||
"fuzzing_index":fuzzing,
|
||||
"hit_velocity": velocity,
|
||||
"hits": int(row[4] or 0),
|
||||
"attack_type": _attack_type(fuzzing, velocity, 0, 0),
|
||||
})
|
||||
return {"points": points}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
125
services/dashboard/backend/routes/reputation.py
Normal file
125
services/dashboard/backend/routes/reputation.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""
|
||||
Routes pour la réputation IP (bases de données publiques)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Path
|
||||
from typing import Dict, Any
|
||||
import re
|
||||
|
||||
from ..services.reputation_ip import get_reputation_service
|
||||
|
||||
router = APIRouter(prefix="/api/reputation", tags=["Reputation"])
|
||||
|
||||
# Pattern de validation d'IP (IPv4)
|
||||
IP_PATTERN = re.compile(r'^(\d{1,3}\.){3}\d{1,3}$')
|
||||
|
||||
|
||||
def is_valid_ipv4(ip: str) -> bool:
|
||||
"""Valide qu'une chaîne est une adresse IPv4 valide"""
|
||||
if not IP_PATTERN.match(ip):
|
||||
return False
|
||||
|
||||
# Vérifie que chaque octet est entre 0 et 255
|
||||
parts = ip.split('.')
|
||||
for part in parts:
|
||||
try:
|
||||
num = int(part)
|
||||
if num < 0 or num > 255:
|
||||
return False
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@router.get("/ip/{ip_address}", summary="Réputation complète d'une IP")
|
||||
async def get_ip_reputation(
|
||||
ip_address: str = Path(..., description="Adresse IP à vérifier")
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Récupère la réputation d'une adresse IP depuis les bases de données publiques
|
||||
|
||||
Sources utilisées (sans clé API):
|
||||
- IP-API.com: Géolocalisation + Proxy/Hosting detection
|
||||
- IPinfo.io: ASN + Organisation
|
||||
|
||||
Returns:
|
||||
Dict avec:
|
||||
- ip: Adresse IP vérifiée
|
||||
- timestamp: Date de la vérification
|
||||
- sources: Détails par source
|
||||
- aggregated: Résultats agrégés
|
||||
- is_proxy: bool
|
||||
- is_hosting: bool
|
||||
- is_vpn: bool
|
||||
- is_tor: bool
|
||||
- threat_score: 0-100
|
||||
- threat_level: clean/low/medium/high/critical
|
||||
- country: Pays
|
||||
- asn: Numéro ASN
|
||||
- asn_org: Organisation ASN
|
||||
- org: ISP/Organisation
|
||||
- warnings: Liste des alertes
|
||||
"""
|
||||
# Valide l'adresse IP
|
||||
if not is_valid_ipv4(ip_address):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Adresse IP invalide: {ip_address}. Format attendu: x.x.x.x"
|
||||
)
|
||||
|
||||
try:
|
||||
# Récupère le service de réputation
|
||||
reputation_service = get_reputation_service()
|
||||
|
||||
# Interroge les sources
|
||||
results = await reputation_service.get_reputation(ip_address)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Erreur lors de la vérification de réputation: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/ip/{ip_address}/summary", summary="Réputation simplifiée d'une IP")
|
||||
async def get_ip_reputation_summary(
|
||||
ip_address: str = Path(..., description="Adresse IP à vérifier")
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Version simplifiée de la réputation IP (juste les infos essentielles)
|
||||
|
||||
Utile pour affichage rapide dans les tableaux
|
||||
"""
|
||||
if not is_valid_ipv4(ip_address):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Adresse IP invalide: {ip_address}"
|
||||
)
|
||||
|
||||
try:
|
||||
reputation_service = get_reputation_service()
|
||||
full_results = await reputation_service.get_reputation(ip_address)
|
||||
|
||||
# Retourne juste l'essentiel
|
||||
aggregated = full_results.get('aggregated', {})
|
||||
|
||||
return {
|
||||
'ip': ip_address,
|
||||
'threat_level': aggregated.get('threat_level', 'unknown'),
|
||||
'threat_score': aggregated.get('threat_score', 0),
|
||||
'is_proxy': aggregated.get('is_proxy', False),
|
||||
'is_hosting': aggregated.get('is_hosting', False),
|
||||
'country': aggregated.get('country'),
|
||||
'country_code': aggregated.get('country_code'),
|
||||
'asn': aggregated.get('asn'),
|
||||
'org': aggregated.get('org'),
|
||||
'warnings_count': len(aggregated.get('warnings', []))
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Erreur: {str(e)}"
|
||||
)
|
||||
217
services/dashboard/backend/routes/rotation.py
Normal file
217
services/dashboard/backend/routes/rotation.py
Normal file
@ -0,0 +1,217 @@
|
||||
"""
|
||||
Endpoints pour la détection de la rotation de fingerprints JA4 et des menaces persistantes
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/rotation", tags=["rotation"])
|
||||
|
||||
|
||||
@router.get("/ja4-rotators")
|
||||
async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)):
|
||||
"""IPs qui effectuent le plus de rotation de fingerprints JA4."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
distinct_ja4_count,
|
||||
total_hits
|
||||
FROM mabase_prod.view_host_ip_ja4_rotation
|
||||
ORDER BY distinct_ja4_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
distinct = int(row[1])
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"distinct_ja4_count":distinct,
|
||||
"total_hits": int(row[2]),
|
||||
"evasion_score": min(100, distinct * 15),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/persistent-threats")
|
||||
async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
|
||||
"""Menaces persistantes triées par score de persistance."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
recurrence,
|
||||
worst_score,
|
||||
worst_threat_level,
|
||||
first_seen,
|
||||
last_seen
|
||||
FROM mabase_prod.view_ip_recurrence
|
||||
ORDER BY (least(100, recurrence * 20 + worst_score * 50)) DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
recurrence = int(row[1])
|
||||
worst_score = float(row[2] or 0)
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"recurrence": recurrence,
|
||||
"worst_score": worst_score,
|
||||
"worst_threat_level":str(row[3] or ""),
|
||||
"first_seen": str(row[4]),
|
||||
"last_seen": str(row[5]),
|
||||
"persistence_score": min(100, recurrence * 20 + worst_score * 50),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ip/{ip}/ja4-history")
|
||||
async def get_ip_ja4_history(ip: str):
|
||||
"""Historique des JA4 utilisés par une IP donnée."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
ja4,
|
||||
sum(hits) AS hits,
|
||||
min(window_start) AS first_seen,
|
||||
max(window_start) AS last_seen
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
GROUP BY ja4
|
||||
ORDER BY hits DESC
|
||||
"""
|
||||
result = db.query(sql, {"ip": ip})
|
||||
items = [
|
||||
{
|
||||
"ja4": str(row[0]),
|
||||
"hits": int(row[1]),
|
||||
"first_seen":str(row[2]),
|
||||
"last_seen": str(row[3]),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
return {"ip": ip, "ja4_history": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/sophistication")
|
||||
async def get_sophistication(limit: int = Query(50, ge=1, le=500)):
|
||||
"""Score de sophistication adversaire par IP (rotation JA4 + récurrence + bruteforce).
|
||||
Single SQL JOIN query — aucun traitement Python sur 34K entrées.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
r.ip,
|
||||
r.distinct_ja4_count,
|
||||
coalesce(rec.recurrence, 0) AS recurrence,
|
||||
coalesce(bf.bruteforce_hits, 0) AS bruteforce_hits,
|
||||
round(least(100.0,
|
||||
r.distinct_ja4_count * 10
|
||||
+ coalesce(rec.recurrence, 0) * 20
|
||||
+ least(30.0, log(coalesce(bf.bruteforce_hits, 0) + 1) * 5)
|
||||
), 1) AS sophistication_score
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
distinct_ja4_count
|
||||
FROM mabase_prod.view_host_ip_ja4_rotation
|
||||
) r
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
count() AS recurrence
|
||||
FROM mabase_prod.ml_detected_anomalies FINAL
|
||||
GROUP BY ip
|
||||
) rec ON r.ip = rec.ip
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
sum(hits) AS bruteforce_hits
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
GROUP BY ip
|
||||
) bf ON r.ip = bf.ip
|
||||
ORDER BY sophistication_score DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
score = float(row[4] or 0)
|
||||
if score > 80:
|
||||
tier = "APT-like"
|
||||
elif score > 50:
|
||||
tier = "Advanced"
|
||||
elif score > 20:
|
||||
tier = "Automated"
|
||||
else:
|
||||
tier = "Basic"
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4_rotation_count": int(row[1] or 0),
|
||||
"recurrence": int(row[2] or 0),
|
||||
"bruteforce_hits": int(row[3] or 0),
|
||||
"sophistication_score":score,
|
||||
"tier": tier,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/proactive-hunt")
|
||||
async def get_proactive_hunt(
|
||||
min_recurrence: int = Query(2, ge=1, description="Récurrence minimale"),
|
||||
min_days: int = Query(2, ge=0, description="Jours d'activité minimum"),
|
||||
limit: int = Query(50, ge=1, le=500),
|
||||
):
|
||||
"""IPs volant sous le radar : récurrentes mais sous le seuil de détection normal."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
recurrence,
|
||||
worst_score,
|
||||
worst_threat_level,
|
||||
first_seen,
|
||||
last_seen,
|
||||
dateDiff('day', first_seen, last_seen) AS days_active
|
||||
FROM mabase_prod.view_ip_recurrence
|
||||
WHERE recurrence >= %(min_recurrence)s
|
||||
AND abs(worst_score) < 0.5
|
||||
AND dateDiff('day', first_seen, last_seen) >= %(min_days)s
|
||||
ORDER BY recurrence DESC, worst_score ASC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {
|
||||
"min_recurrence": min_recurrence,
|
||||
"min_days": min_days,
|
||||
"limit": limit,
|
||||
})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
recurrence = int(row[1])
|
||||
worst_score = float(row[2] or 0)
|
||||
days_active = int(row[6] or 0)
|
||||
ratio = recurrence / (worst_score + 0.1)
|
||||
risk = "Évadeur potentiel" if ratio > 10 else "Persistant modéré"
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"recurrence": recurrence,
|
||||
"worst_score": round(worst_score, 4),
|
||||
"worst_threat_level": str(row[3] or ""),
|
||||
"first_seen": str(row[4]),
|
||||
"last_seen": str(row[5]),
|
||||
"days_active": days_active,
|
||||
"risk_assessment": risk,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
125
services/dashboard/backend/routes/search.py
Normal file
125
services/dashboard/backend/routes/search.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""
|
||||
Endpoint de recherche globale rapide — utilisé par la barre Cmd+K
|
||||
"""
|
||||
from fastapi import APIRouter, Query
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/search", tags=["search"])
|
||||
|
||||
IP_RE = r"^(\d{1,3}\.){0,3}\d{1,3}$"
|
||||
|
||||
|
||||
@router.get("/quick")
|
||||
async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
|
||||
"""
|
||||
Recherche unifiée sur IPs, JA4, ASN, hosts.
|
||||
Retourne jusqu'à 5 résultats par catégorie.
|
||||
"""
|
||||
q = q.strip()
|
||||
pattern = f"%{q}%"
|
||||
results = []
|
||||
|
||||
# ── IPs ──────────────────────────────────────────────────────────────────
|
||||
ip_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
count() AS hits,
|
||||
max(detected_at) AS last_seen,
|
||||
any(threat_level) AS threat_level
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ilike(toString(src_ip), %(p)s)
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY clean_ip
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in ip_rows.result_rows:
|
||||
ip = str(r[0])
|
||||
results.append({
|
||||
"type": "ip",
|
||||
"value": ip,
|
||||
"label": ip,
|
||||
"meta": f"{r[1]} détections · {r[3]}",
|
||||
"url": f"/detections/ip/{ip}",
|
||||
"investigation_url": f"/investigation/{ip}",
|
||||
})
|
||||
|
||||
# ── JA4 fingerprints ─────────────────────────────────────────────────────
|
||||
ja4_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS hits,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ilike(ja4, %(p)s)
|
||||
AND ja4 != ''
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY ja4
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in ja4_rows.result_rows:
|
||||
results.append({
|
||||
"type": "ja4",
|
||||
"value": str(r[0]),
|
||||
"label": str(r[0]),
|
||||
"meta": f"{r[1]} détections · {r[2]} IPs",
|
||||
"url": f"/investigation/ja4/{r[0]}",
|
||||
})
|
||||
|
||||
# ── Hosts ─────────────────────────────────────────────────────────────────
|
||||
host_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
host,
|
||||
count() AS hits,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ilike(host, %(p)s)
|
||||
AND host != ''
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY host
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in host_rows.result_rows:
|
||||
results.append({
|
||||
"type": "host",
|
||||
"value": str(r[0]),
|
||||
"label": str(r[0]),
|
||||
"meta": f"{r[1]} hits · {r[2]} IPs",
|
||||
"url": f"/detections?search={r[0]}",
|
||||
})
|
||||
|
||||
# ── ASN ───────────────────────────────────────────────────────────────────
|
||||
asn_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
asn_org,
|
||||
asn_number,
|
||||
count() AS hits,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE (ilike(asn_org, %(p)s) OR ilike(asn_number, %(p)s))
|
||||
AND asn_org != '' AND asn_number != ''
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY asn_org, asn_number
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in asn_rows.result_rows:
|
||||
results.append({
|
||||
"type": "asn",
|
||||
"value": str(r[1]),
|
||||
"label": f"AS{r[1]} — {r[0]}",
|
||||
"meta": f"{r[2]} hits · {r[3]} IPs",
|
||||
"url": f"/detections?asn={r[1]}",
|
||||
})
|
||||
|
||||
return {"query": q, "results": results}
|
||||
223
services/dashboard/backend/routes/tcp_spoofing.py
Normal file
223
services/dashboard/backend/routes/tcp_spoofing.py
Normal file
@ -0,0 +1,223 @@
|
||||
"""
|
||||
Endpoints pour la détection du TCP spoofing / fingerprinting OS
|
||||
|
||||
Approche multi-signal (p0f-style) :
|
||||
- TTL initial estimé → famille OS (Linux/Mac=64, Windows=128, Cisco/BSD=255)
|
||||
- MSS → type de réseau (Ethernet=1460, PPPoE=1452, VPN=1380-1420)
|
||||
- Taille de fenêtre → signature OS précise
|
||||
- Facteur d'échelle → affine la version kernel/stack TCP
|
||||
|
||||
Détection bots : signatures connues (Masscan/ZMap/Mirai) identifiées par combinaison
|
||||
win+scale+mss indépendamment de l'UA.
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
from ..services.tcp_fingerprint import (
|
||||
fingerprint_os,
|
||||
detect_spoof,
|
||||
declared_os_from_ua,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
|
||||
|
||||
|
||||
@router.get("/overview")
|
||||
async def get_tcp_spoofing_overview():
|
||||
"""Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale)."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
count() AS total_entries,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
countIf(tcp_ttl_raw = 0) AS no_tcp_data,
|
||||
countIf(tcp_ttl_raw > 0) AS with_tcp_data,
|
||||
countIf(tcp_ttl_raw > 0 AND tcp_ttl_raw <= 64) AS linux_mac_fp,
|
||||
countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp,
|
||||
countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp,
|
||||
countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
result = db.query(sql)
|
||||
row = result.result_rows[0]
|
||||
|
||||
# Distribution TTL (top 15)
|
||||
ttl_sql = """
|
||||
SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY ttl ORDER BY cnt DESC
|
||||
"""
|
||||
ttl_res = db.query(ttl_sql)
|
||||
|
||||
# Distribution MSS — nouveau signal clé (top 12)
|
||||
mss_sql = """
|
||||
SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0
|
||||
GROUP BY mss ORDER BY cnt DESC
|
||||
"""
|
||||
mss_res = db.query(mss_sql)
|
||||
|
||||
# Distribution fenêtre (top 10)
|
||||
win_sql = """
|
||||
SELECT tcp_win_raw AS win, count() AS cnt
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY win ORDER BY cnt DESC
|
||||
"""
|
||||
win_res = db.query(win_sql)
|
||||
|
||||
return {
|
||||
"total_entries": int(row[0]),
|
||||
"unique_ips": int(row[1]),
|
||||
"no_tcp_data": int(row[2]),
|
||||
"with_tcp_data": int(row[3]),
|
||||
"linux_mac_fingerprint": int(row[4]),
|
||||
"windows_fingerprint": int(row[5]),
|
||||
"cisco_bsd_fingerprint": int(row[6]),
|
||||
"bot_scanner_fingerprint": int(row[7]),
|
||||
"ttl_distribution": [
|
||||
{"ttl": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
|
||||
for r in ttl_res.result_rows
|
||||
],
|
||||
"mss_distribution": [
|
||||
{"mss": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
|
||||
for r in mss_res.result_rows
|
||||
],
|
||||
"window_size_distribution": [
|
||||
{"window_size": int(r[0]), "count": int(r[1])}
|
||||
for r in win_res.result_rows
|
||||
],
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/list")
|
||||
async def get_tcp_spoofing_list(
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0),
|
||||
spoof_only: bool = Query(False, description="Retourner uniquement les spoofs/bots confirmés"),
|
||||
):
|
||||
"""Liste avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale).
|
||||
Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool.
|
||||
"""
|
||||
try:
|
||||
count_sql = """
|
||||
SELECT count() FROM (
|
||||
SELECT src_ip, ja4
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY src_ip, ja4
|
||||
)
|
||||
"""
|
||||
total = int(db.query(count_sql).result_rows[0][0])
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip,
|
||||
ja4,
|
||||
any(tcp_ttl_raw) AS tcp_ttl,
|
||||
any(tcp_win_raw) AS tcp_window_size,
|
||||
any(tcp_scale_raw) AS tcp_win_scale,
|
||||
any(tcp_mss_raw) AS tcp_mss,
|
||||
any(first_ua) AS first_ua,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY src_ip, ja4
|
||||
ORDER BY hits DESC
|
||||
LIMIT %(limit)s OFFSET %(offset)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit, "offset": offset})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ip = str(row[0])
|
||||
ja4 = str(row[1] or "")
|
||||
ttl = int(row[2] or 0)
|
||||
win = int(row[3] or 0)
|
||||
scale = int(row[4] or 0)
|
||||
mss = int(row[5] or 0)
|
||||
ua = str(row[6] or "")
|
||||
hits = int(row[7] or 0)
|
||||
|
||||
fp = fingerprint_os(ttl, win, scale, mss)
|
||||
dec_os = declared_os_from_ua(ua)
|
||||
spoof_res = detect_spoof(fp, dec_os)
|
||||
|
||||
if spoof_only and not spoof_res.is_spoof:
|
||||
continue
|
||||
|
||||
items.append({
|
||||
"ip": ip,
|
||||
"ja4": ja4,
|
||||
"tcp_ttl": ttl,
|
||||
"tcp_window_size": win,
|
||||
"tcp_win_scale": scale,
|
||||
"tcp_mss": mss,
|
||||
"hits": hits,
|
||||
"first_ua": ua,
|
||||
"suspected_os": fp.os_name,
|
||||
"initial_ttl": fp.initial_ttl,
|
||||
"hop_count": fp.hop_count,
|
||||
"confidence": fp.confidence,
|
||||
"network_path": fp.network_path,
|
||||
"is_bot_tool": fp.is_bot_tool,
|
||||
"declared_os": dec_os,
|
||||
"spoof_flag": spoof_res.is_spoof,
|
||||
"spoof_reason": spoof_res.reason,
|
||||
})
|
||||
return {"items": items, "total": total}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/matrix")
|
||||
async def get_tcp_spoofing_matrix():
|
||||
"""Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
any(tcp_ttl_raw) AS ttl,
|
||||
any(tcp_win_raw) AS win,
|
||||
any(tcp_scale_raw) AS scale,
|
||||
any(tcp_mss_raw) AS mss,
|
||||
any(first_ua) AS ua,
|
||||
count() AS cnt
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY src_ip, ja4
|
||||
"""
|
||||
result = db.query(sql)
|
||||
counts: dict = {}
|
||||
for row in result.result_rows:
|
||||
ttl = int(row[0] or 0)
|
||||
win = int(row[1] or 0)
|
||||
scale = int(row[2] or 0)
|
||||
mss = int(row[3] or 0)
|
||||
ua = str(row[4] or "")
|
||||
cnt = int(row[5] or 1)
|
||||
|
||||
fp = fingerprint_os(ttl, win, scale, mss)
|
||||
dec_os = declared_os_from_ua(ua)
|
||||
spoof_res = detect_spoof(fp, dec_os)
|
||||
|
||||
key = (fp.os_name, dec_os, spoof_res.is_spoof, fp.is_bot_tool)
|
||||
counts[key] = counts.get(key, 0) + cnt
|
||||
|
||||
matrix = [
|
||||
{
|
||||
"suspected_os": k[0],
|
||||
"declared_os": k[1],
|
||||
"count": v,
|
||||
"is_spoof": k[2],
|
||||
"is_bot_tool": k[3],
|
||||
}
|
||||
for k, v in counts.items()
|
||||
]
|
||||
matrix.sort(key=lambda x: x["count"], reverse=True)
|
||||
return {"matrix": matrix}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
706
services/dashboard/backend/routes/variability.py
Normal file
706
services/dashboard/backend/routes/variability.py
Normal file
@ -0,0 +1,706 @@
|
||||
"""
|
||||
Endpoints pour la variabilité des attributs
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional
|
||||
from ..database import db
|
||||
from ..models import (
|
||||
VariabilityResponse, VariabilityAttributes, AttributeValue, Insight,
|
||||
UserAgentsResponse, UserAgentValue
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/variability", tags=["variability"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ROUTES SPÉCIFIQUES (doivent être avant les routes génériques)
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{attr_type}/{value:path}/ips", response_model=dict)
|
||||
async def get_associated_ips(
|
||||
attr_type: str,
|
||||
value: str,
|
||||
limit: int = Query(100, ge=1, le=1000, description="Nombre maximum d'IPs")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des IPs associées à un attribut
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
query = f"""
|
||||
SELECT src_ip, count() AS hit_count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
ORDER BY hit_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {"value": value, "limit": limit})
|
||||
|
||||
total_hits = sum(row[1] for row in result.result_rows) or 1
|
||||
ips = [
|
||||
{"ip": str(row[0]), "count": row[1], "percentage": round(row[1] * 100.0 / total_hits, 2)}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
# Compter le total
|
||||
count_query = f"""
|
||||
SELECT uniq(src_ip) AS total
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, {"value": value})
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return {
|
||||
"type": attr_type,
|
||||
"value": value,
|
||||
"ips": ips,
|
||||
"total": total,
|
||||
"showing": len(ips)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{attr_type}/{value:path}/attributes", response_model=dict)
|
||||
async def get_associated_attributes(
|
||||
attr_type: str,
|
||||
value: str,
|
||||
target_attr: str = Query(..., description="Type d'attribut à récupérer (user_agents, ja4, countries, asns, hosts)"),
|
||||
limit: int = Query(50, ge=1, le=500, description="Nombre maximum de résultats")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des attributs associés (ex: User-Agents pour un pays)
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
}
|
||||
|
||||
# Mapping des attributs cibles
|
||||
target_column_map = {
|
||||
"user_agents": None, # handled separately via view_dashboard_entities
|
||||
"ja4": "ja4",
|
||||
"countries": "country_code",
|
||||
"asns": "asn_number",
|
||||
"hosts": "host",
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(status_code=400, detail=f"Type '{attr_type}' invalide")
|
||||
|
||||
if target_attr not in target_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Attribut cible invalide. Supportés: {', '.join(target_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
target_column = target_column_map[target_attr]
|
||||
|
||||
# Pour user_agents: requête via view_dashboard_user_agents
|
||||
# Colonnes: src_ip, ja4, hour, log_date, user_agents, requests
|
||||
if target_column is None:
|
||||
if attr_type == "ip":
|
||||
ua_where = "toString(src_ip) = %(value)s"
|
||||
elif attr_type == "ja4":
|
||||
ua_where = "ja4 = %(value)s"
|
||||
else:
|
||||
# country/asn/host: pivot via ml_detected_anomalies
|
||||
ua_where = f"""toString(src_ip) IN (
|
||||
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)"""
|
||||
ua_q = f"""
|
||||
SELECT ua AS value, sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {ua_where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR AND ua != ''
|
||||
GROUP BY value ORDER BY count DESC LIMIT %(limit)s
|
||||
"""
|
||||
ua_result = db.query(ua_q, {"value": value, "limit": limit})
|
||||
items = [{"value": str(r[0]), "count": r[1], "percentage": round(float(r[2]), 2) if r[2] else 0.0}
|
||||
for r in ua_result.result_rows]
|
||||
return {"type": attr_type, "value": value, "target": target_attr, "items": items, "total": len(items), "showing": len(items)}
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
{target_column} AS value,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND {target_column} != '' AND {target_column} IS NOT NULL
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {"value": value, "limit": limit})
|
||||
|
||||
items = [
|
||||
{
|
||||
"value": str(row[0]),
|
||||
"count": row[1],
|
||||
"percentage": round(float(row[2]), 2) if row[2] else 0.0
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
# Compter le total
|
||||
count_query = f"""
|
||||
SELECT uniq({target_column}) AS total
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND {target_column} != '' AND {target_column} IS NOT NULL
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, {"value": value})
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return {
|
||||
"type": attr_type,
|
||||
"value": value,
|
||||
"target": target_attr,
|
||||
"items": items,
|
||||
"total": total,
|
||||
"showing": len(items)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{attr_type}/{value:path}/user_agents", response_model=UserAgentsResponse)
|
||||
async def get_user_agents(
|
||||
attr_type: str,
|
||||
value: str,
|
||||
limit: int = Query(100, ge=1, le=500, description="Nombre maximum de user-agents")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des User-Agents associés à un attribut (IP, JA4, pays, etc.)
|
||||
Les données sont récupérées depuis la vue materialisée view_dashboard_user_agents
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
# view_dashboard_user_agents colonnes: src_ip, ja4, hour, log_date, user_agents, requests
|
||||
if attr_type == "ip":
|
||||
where = "toString(src_ip) = %(value)s"
|
||||
params: dict = {"value": value, "limit": limit}
|
||||
elif attr_type == "ja4":
|
||||
where = "ja4 = %(value)s"
|
||||
params = {"value": value, "limit": limit}
|
||||
else:
|
||||
# country / asn / host: pivot via ml_detected_anomalies → IPs connus → vue par src_ip
|
||||
ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type]
|
||||
where = f"""toString(src_ip) IN (
|
||||
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {ml_col} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)"""
|
||||
params = {"value": value, "limit": limit}
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
ua AS user_agent,
|
||||
sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(query, params)
|
||||
|
||||
count_query = f"""
|
||||
SELECT uniqExact(ua) AS total
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
AND ua != ''
|
||||
"""
|
||||
count_result = db.query(count_query, params)
|
||||
|
||||
user_agents = [
|
||||
UserAgentValue(
|
||||
value=str(row[0]),
|
||||
count=row[1] or 0,
|
||||
percentage=round(float(row[2]), 2) if row[2] else 0.0,
|
||||
first_seen=row[3] if len(row) > 3 and row[3] else None,
|
||||
last_seen=row[4] if len(row) > 4 and row[4] else None,
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return {
|
||||
"type": attr_type,
|
||||
"value": value,
|
||||
"user_agents": user_agents,
|
||||
"total": total,
|
||||
"showing": len(user_agents)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ROUTE GÉNÉRIQUE (doit être en dernier)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_attribute_value(row, count_idx: int, percentage_idx: int,
|
||||
first_seen_idx: Optional[int] = None,
|
||||
last_seen_idx: Optional[int] = None,
|
||||
threat_idx: Optional[int] = None,
|
||||
unique_ips_idx: Optional[int] = None) -> AttributeValue:
|
||||
"""Helper pour créer un AttributeValue depuis une ligne ClickHouse"""
|
||||
return AttributeValue(
|
||||
value=str(row[0]),
|
||||
count=row[count_idx] or 0,
|
||||
percentage=round(float(row[percentage_idx]), 2) if row[percentage_idx] else 0.0,
|
||||
first_seen=row[first_seen_idx] if first_seen_idx is not None and len(row) > first_seen_idx else None,
|
||||
last_seen=row[last_seen_idx] if last_seen_idx is not None and len(row) > last_seen_idx else None,
|
||||
threat_levels=_parse_threat_levels(row[threat_idx]) if threat_idx is not None and len(row) > threat_idx and row[threat_idx] else None,
|
||||
unique_ips=row[unique_ips_idx] if unique_ips_idx is not None and len(row) > unique_ips_idx else None,
|
||||
primary_threat=_get_primary_threat(row[threat_idx]) if threat_idx is not None and len(row) > threat_idx and row[threat_idx] else None
|
||||
)
|
||||
|
||||
|
||||
def _parse_threat_levels(threat_str: str) -> dict:
|
||||
"""Parse une chaîne de type 'CRITICAL:5,HIGH:10' en dict"""
|
||||
if not threat_str:
|
||||
return {}
|
||||
result = {}
|
||||
for part in str(threat_str).split(','):
|
||||
if ':' in part:
|
||||
level, count = part.strip().split(':')
|
||||
result[level.strip()] = int(count.strip())
|
||||
return result
|
||||
|
||||
|
||||
def _get_primary_threat(threat_str: str) -> str:
|
||||
"""Retourne le niveau de menace principal"""
|
||||
if not threat_str:
|
||||
return ""
|
||||
levels_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
|
||||
for level in levels_order:
|
||||
if level in str(threat_str):
|
||||
return level
|
||||
return ""
|
||||
|
||||
|
||||
def _generate_insights(attr_type: str, value: str, attributes: VariabilityAttributes,
|
||||
total_detections: int, unique_ips: int) -> list:
|
||||
"""Génère des insights basés sur les données de variabilité"""
|
||||
insights = []
|
||||
|
||||
# User-Agent insights
|
||||
if len(attributes.user_agents) > 1:
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message=f"{len(attributes.user_agents)} User-Agents différents → Possible rotation/obfuscation"
|
||||
))
|
||||
|
||||
# JA4 insights
|
||||
if len(attributes.ja4) > 1:
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message=f"{len(attributes.ja4)} JA4 fingerprints différents → Possible rotation de fingerprint"
|
||||
))
|
||||
|
||||
# IP insights (pour les sélections non-IP)
|
||||
if attr_type != "ip" and unique_ips > 10:
|
||||
insights.append(Insight(
|
||||
type="info",
|
||||
message=f"{unique_ips} IPs différentes associées → Possible infrastructure distribuée"
|
||||
))
|
||||
|
||||
# ASN insights
|
||||
if len(attributes.asns) == 1 and attributes.asns[0].value:
|
||||
asn_label_lower = ""
|
||||
if attr_type == "asn":
|
||||
asn_label_lower = value.lower()
|
||||
# Vérifier si c'est un ASN de hosting/cloud
|
||||
hosting_keywords = ["ovh", "amazon", "aws", "google", "azure", "digitalocean", "linode", "vultr"]
|
||||
if any(kw in (attributes.asns[0].value or "").lower() for kw in hosting_keywords):
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message="ASN de type hosting/cloud → Souvent utilisé pour des bots"
|
||||
))
|
||||
|
||||
# Country insights
|
||||
if len(attributes.countries) > 5:
|
||||
insights.append(Insight(
|
||||
type="info",
|
||||
message=f"Présent dans {len(attributes.countries)} pays → Distribution géographique large"
|
||||
))
|
||||
|
||||
# Threat level insights
|
||||
critical_count = 0
|
||||
high_count = 0
|
||||
for tl in attributes.threat_levels:
|
||||
if tl.value == "CRITICAL":
|
||||
critical_count = tl.count
|
||||
elif tl.value == "HIGH":
|
||||
high_count = tl.count
|
||||
|
||||
if critical_count > total_detections * 0.3:
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message=f"{round(critical_count * 100 / total_detections)}% de détections CRITICAL → Menace sévère"
|
||||
))
|
||||
elif high_count > total_detections * 0.5:
|
||||
insights.append(Insight(
|
||||
type="info",
|
||||
message=f"{round(high_count * 100 / total_detections)}% de détections HIGH → Activité suspecte"
|
||||
))
|
||||
|
||||
return insights
|
||||
|
||||
|
||||
@router.get("/{attr_type}/{value:path}", response_model=VariabilityResponse)
|
||||
async def get_variability(attr_type: str, value: str):
|
||||
"""
|
||||
Récupère la variabilité des attributs associés à une valeur
|
||||
|
||||
attr_type: ip, ja4, country, asn, host, user_agent
|
||||
value: la valeur à investiguer
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes ClickHouse
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
"user_agent": "header_user_agent"
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
# Requête principale - Récupère toutes les détections pour cette valeur
|
||||
# On utilise toStartOfHour pour le timeseries et on évite header_user_agent si inexistant
|
||||
base_query = f"""
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
detected_at,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
'' AS user_agent,
|
||||
country_code,
|
||||
asn_number,
|
||||
asn_org,
|
||||
threat_level,
|
||||
model_name,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)
|
||||
"""
|
||||
|
||||
# Stats globales
|
||||
stats_query = f"""
|
||||
SELECT
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
stats_result = db.query(stats_query, {"value": value})
|
||||
|
||||
if not stats_result.result_rows or stats_result.result_rows[0][0] == 0:
|
||||
raise HTTPException(status_code=404, detail="Aucune donnée trouvée")
|
||||
|
||||
stats_row = stats_result.result_rows[0]
|
||||
total_detections = stats_row[0]
|
||||
unique_ips = stats_row[1]
|
||||
first_seen = stats_row[2]
|
||||
last_seen = stats_row[3]
|
||||
|
||||
# User-Agents depuis http_logs pour des comptes exacts par requête
|
||||
# (view_dashboard_user_agents déduplique par heure, ce qui sous-compte les hits)
|
||||
_ua_params: dict = {"value": value}
|
||||
if attr_type == "ip":
|
||||
_ua_logs_where = "src_ip = toIPv4(%(value)s)"
|
||||
ua_query_simple = f"""
|
||||
SELECT
|
||||
header_user_agent AS user_agent,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (
|
||||
SELECT count() FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
|
||||
), 2) AS percentage,
|
||||
min(time) AS first_seen,
|
||||
max(time) AS last_seen
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where}
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
AND header_user_agent != '' AND header_user_agent IS NOT NULL
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
ua_result = db.query(ua_query_simple, _ua_params)
|
||||
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
|
||||
elif attr_type == "ja4":
|
||||
_ua_logs_where = "ja4 = %(value)s"
|
||||
ua_query_simple = f"""
|
||||
SELECT
|
||||
header_user_agent AS user_agent,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (
|
||||
SELECT count() FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
|
||||
), 2) AS percentage,
|
||||
min(time) AS first_seen,
|
||||
max(time) AS last_seen
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where}
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
AND header_user_agent != '' AND header_user_agent IS NOT NULL
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
ua_result = db.query(ua_query_simple, _ua_params)
|
||||
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
|
||||
else:
|
||||
# country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA
|
||||
_ua_where = f"""toString(src_ip) IN (
|
||||
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)"""
|
||||
ua_query_simple = f"""
|
||||
SELECT
|
||||
ua AS user_agent,
|
||||
sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {_ua_where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
ua_result = db.query(ua_query_simple, _ua_params)
|
||||
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
|
||||
|
||||
# JA4 fingerprints
|
||||
ja4_query = f"""
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM ({base_query})
|
||||
WHERE ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
ja4_result = db.query(ja4_query, {"value": value})
|
||||
ja4s = [get_attribute_value(row, 1, 2, 3, 4) for row in ja4_result.result_rows]
|
||||
|
||||
# Pays
|
||||
country_query = f"""
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE country_code != '' AND country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
country_result = db.query(country_query, {"value": value})
|
||||
countries = [get_attribute_value(row, 1, 2) for row in country_result.result_rows]
|
||||
|
||||
# ASN
|
||||
asn_query = f"""
|
||||
SELECT
|
||||
concat('AS', toString(asn_number), ' - ', asn_org) AS asn_display,
|
||||
asn_number,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE asn_number != '' AND asn_number IS NOT NULL AND asn_number != '0'
|
||||
GROUP BY asn_display, asn_number
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
asn_result = db.query(asn_query, {"value": value})
|
||||
asns = [
|
||||
AttributeValue(
|
||||
value=str(row[0]),
|
||||
count=row[2] or 0,
|
||||
percentage=round(float(row[3]), 2) if row[3] else 0.0
|
||||
)
|
||||
for row in asn_result.result_rows
|
||||
]
|
||||
|
||||
# Hosts
|
||||
host_query = f"""
|
||||
SELECT
|
||||
host,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE host != '' AND host IS NOT NULL
|
||||
GROUP BY host
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
host_result = db.query(host_query, {"value": value})
|
||||
hosts = [get_attribute_value(row, 1, 2) for row in host_result.result_rows]
|
||||
|
||||
# Threat levels
|
||||
threat_query = f"""
|
||||
SELECT
|
||||
threat_level,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE threat_level != '' AND threat_level IS NOT NULL
|
||||
GROUP BY threat_level
|
||||
ORDER BY
|
||||
CASE threat_level
|
||||
WHEN 'CRITICAL' THEN 1
|
||||
WHEN 'HIGH' THEN 2
|
||||
WHEN 'MEDIUM' THEN 3
|
||||
WHEN 'LOW' THEN 4
|
||||
ELSE 5
|
||||
END
|
||||
"""
|
||||
|
||||
threat_result = db.query(threat_query, {"value": value})
|
||||
threat_levels = [get_attribute_value(row, 1, 2) for row in threat_result.result_rows]
|
||||
|
||||
# Model names
|
||||
model_query = f"""
|
||||
SELECT
|
||||
model_name,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE model_name != '' AND model_name IS NOT NULL
|
||||
GROUP BY model_name
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
model_result = db.query(model_query, {"value": value})
|
||||
model_names = [get_attribute_value(row, 1, 2) for row in model_result.result_rows]
|
||||
|
||||
# Construire la réponse
|
||||
attributes = VariabilityAttributes(
|
||||
user_agents=user_agents,
|
||||
ja4=ja4s,
|
||||
countries=countries,
|
||||
asns=asns,
|
||||
hosts=hosts,
|
||||
threat_levels=threat_levels,
|
||||
model_names=model_names
|
||||
)
|
||||
|
||||
# Générer les insights
|
||||
insights = _generate_insights(attr_type, value, attributes, total_detections, unique_ips)
|
||||
|
||||
return VariabilityResponse(
|
||||
type=attr_type,
|
||||
value=value,
|
||||
total_detections=total_detections,
|
||||
unique_ips=unique_ips,
|
||||
date_range={
|
||||
"first_seen": first_seen,
|
||||
"last_seen": last_seen
|
||||
},
|
||||
attributes=attributes,
|
||||
insights=insights
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
Reference in New Issue
Block a user