feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized
Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
1
services/dashboard/backend/__init__.py
Normal file
1
services/dashboard/backend/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Backend package
|
||||
27
services/dashboard/backend/config.py
Normal file
27
services/dashboard/backend/config.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""
|
||||
Configuration du Dashboard Bot Detector
|
||||
"""
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# ClickHouse
|
||||
CLICKHOUSE_HOST: str = "clickhouse"
|
||||
CLICKHOUSE_PORT: int = 8123
|
||||
CLICKHOUSE_DB: str = "mabase_prod"
|
||||
CLICKHOUSE_USER: str = "admin"
|
||||
CLICKHOUSE_PASSWORD: str = ""
|
||||
|
||||
# API
|
||||
API_HOST: str = "0.0.0.0"
|
||||
API_PORT: int = 8000
|
||||
|
||||
# CORS
|
||||
CORS_ORIGINS: list = ["http://localhost:3000", "http://127.0.0.1:3000"]
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
|
||||
|
||||
settings = Settings()
|
||||
7
services/dashboard/backend/database.py
Normal file
7
services/dashboard/backend/database.py
Normal file
@ -0,0 +1,7 @@
|
||||
"""
|
||||
ClickHouse connection — delegates to ja4_common shared client.
|
||||
"""
|
||||
from ja4_common.clickhouse import get_client as _get_client, ClickHouseClient
|
||||
|
||||
# Re-export for backward compatibility with existing route imports
|
||||
db: ClickHouseClient = _get_client()
|
||||
237
services/dashboard/backend/main.py
Normal file
237
services/dashboard/backend/main.py
Normal file
@ -0,0 +1,237 @@
|
||||
"""
|
||||
Bot Detector Dashboard - API Backend
|
||||
FastAPI application pour servir le dashboard web
|
||||
"""
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
import os
|
||||
|
||||
from .config import settings
|
||||
from .database import db
|
||||
from .routes import metrics, detections, variability, attributes, analysis, entities, incidents, audit, reputation, fingerprints
|
||||
from .routes import bruteforce, tcp_spoofing, header_fingerprint, heatmap, botnets, rotation, ml_features, investigation_summary, search, clustering
|
||||
|
||||
# Configuration logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Gestion du cycle de vie de l'application"""
|
||||
# Startup
|
||||
logger.info("Démarrage du Bot Detector Dashboard API...")
|
||||
logger.info(f"ClickHouse: {settings.CLICKHOUSE_HOST}:{settings.CLICKHOUSE_PORT}")
|
||||
logger.info(f"Database: {settings.CLICKHOUSE_DB}")
|
||||
|
||||
# Tester la connexion ClickHouse
|
||||
try:
|
||||
client = db.connect()
|
||||
client.ping()
|
||||
logger.info("Connexion ClickHouse établie avec succès")
|
||||
except Exception as e:
|
||||
logger.error(f"Échec de connexion ClickHouse: {e}")
|
||||
raise
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Arrêt du Bot Detector Dashboard API...")
|
||||
db.close()
|
||||
|
||||
|
||||
# Création de l'application FastAPI
|
||||
OPENAPI_TAGS = [
|
||||
{
|
||||
"name": "Metrics",
|
||||
"description": "Métriques globales : comptages, niveaux de menace, baseline et distribution des scores ML.",
|
||||
},
|
||||
{
|
||||
"name": "Detections",
|
||||
"description": "Liste paginée et filtrée des anomalies détectées par le modèle ML. Supporte tri, recherche texte et regroupement par IP.",
|
||||
},
|
||||
{
|
||||
"name": "investigation",
|
||||
"description": (
|
||||
"**Point d'entrée principal pour l'analyse d'une IP.** "
|
||||
"Agrège en un seul appel : score ML, brute-force, spoofing TCP, rotation JA4, persistance et timeline 24h. "
|
||||
"Retourne un `risk_score` heuristique de 0 à 100."
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "Reputation",
|
||||
"description": "Réputation externe d'une IP via IP-API.com et IPinfo.io (sans clé API). Détecte proxies, VPN, Tor, hébergeurs.",
|
||||
},
|
||||
{
|
||||
"name": "Analysis",
|
||||
"description": "Analyses approfondies par IP : subnet, pays, empreintes JA4, user-agents, recommandation SOC et gestion des classifications.",
|
||||
},
|
||||
{
|
||||
"name": "Entities",
|
||||
"description": "Investigation par entité (IP, JA4, subnet, user-agent, host). Retourne détections associées, user-agents, chemins, paramètres et entités liées.",
|
||||
},
|
||||
{
|
||||
"name": "Incidents",
|
||||
"description": "Clusters d'incidents actifs regroupés par similarité comportementale. Permet la classification et le suivi des incidents.",
|
||||
},
|
||||
{
|
||||
"name": "Fingerprints",
|
||||
"description": "Analyse des empreintes JA4/TLS : spoofing, matrice JA4↔UA, user-agents suspects, cohérence par IP, JA4 légitimes et corrélation ASN.",
|
||||
},
|
||||
{
|
||||
"name": "Bruteforce",
|
||||
"description": "Détection des attaques brute-force : cibles, attaquants, timeline et détail par host.",
|
||||
},
|
||||
{
|
||||
"name": "TCP Spoofing",
|
||||
"description": "Détection du spoofing TCP/OS fingerprinting : vue d'ensemble, liste et matrice TTL×MSS.",
|
||||
},
|
||||
{
|
||||
"name": "Header Fingerprint",
|
||||
"description": "Clusters de fingerprints d'en-têtes HTTP suspects et IPs associées.",
|
||||
},
|
||||
{
|
||||
"name": "Heatmap",
|
||||
"description": "Heatmap horaire du trafic, top hosts et matrice activité/heure.",
|
||||
},
|
||||
{
|
||||
"name": "Botnets",
|
||||
"description": "Détection de botnets : spread JA4, distribution géographique par JA4, résumé global.",
|
||||
},
|
||||
{
|
||||
"name": "Rotation",
|
||||
"description": "Détection de la rotation JA4 (évasion de détection), menaces persistantes, historique JA4 par IP et score de sophistication.",
|
||||
},
|
||||
{
|
||||
"name": "ML Features",
|
||||
"description": "Données brutes du modèle ML : top anomalies, radar par IP, distribution des scores, tendances, features B et scatter plot.",
|
||||
},
|
||||
{
|
||||
"name": "Attributes",
|
||||
"description": "Listes des valeurs distinctes d'attributs (JA4, user-agents, ASN, pays…) avec comptages.",
|
||||
},
|
||||
{
|
||||
"name": "Variability",
|
||||
"description": "Variabilité comportementale : IPs par attribut, attributs par valeur, analyse des user-agents.",
|
||||
},
|
||||
{
|
||||
"name": "Clustering",
|
||||
"description": "Clustering K-Means des IPs sur les features ML. Statut du cache, clusters, points et IPs par cluster.",
|
||||
},
|
||||
{
|
||||
"name": "Search",
|
||||
"description": "Recherche rapide cross-entités (IP, JA4, host, user-agent, pays, ASN).",
|
||||
},
|
||||
{
|
||||
"name": "Audit",
|
||||
"description": "Journal d'audit SOC : création de logs, consultation filtrée, statistiques et activité par utilisateur.",
|
||||
},
|
||||
]
|
||||
|
||||
app = FastAPI(
|
||||
title="Bot Detector Dashboard API",
|
||||
description=(
|
||||
"API REST du **Bot Detector SOC Dashboard**.\n\n"
|
||||
"Permet d'interroger la base ClickHouse (`mabase_prod`) pour visualiser et analyser "
|
||||
"les détections de bots générées par le service `bot_detector_ai`.\n\n"
|
||||
"**Endpoint clé :** `GET /api/investigation/{ip}/summary` — synthèse complète en un appel.\n\n"
|
||||
"Documentation interactive : `/docs` (Swagger UI) · `/redoc` (ReDoc)"
|
||||
),
|
||||
version="1.0.0",
|
||||
openapi_tags=OPENAPI_TAGS,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# Configuration CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.CORS_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Enregistrement des routes
|
||||
app.include_router(metrics.router)
|
||||
app.include_router(detections.router)
|
||||
app.include_router(variability.router)
|
||||
app.include_router(attributes.router)
|
||||
app.include_router(analysis.router)
|
||||
app.include_router(entities.router)
|
||||
app.include_router(incidents.router)
|
||||
app.include_router(audit.router)
|
||||
app.include_router(reputation.router)
|
||||
app.include_router(fingerprints.router)
|
||||
app.include_router(bruteforce.router)
|
||||
app.include_router(tcp_spoofing.router)
|
||||
app.include_router(header_fingerprint.router)
|
||||
app.include_router(heatmap.router)
|
||||
app.include_router(botnets.router)
|
||||
app.include_router(rotation.router)
|
||||
app.include_router(ml_features.router)
|
||||
app.include_router(investigation_summary.router)
|
||||
app.include_router(search.router)
|
||||
app.include_router(clustering.router)
|
||||
|
||||
|
||||
# Chemin vers le fichier index.html du frontend (utilisé par serve_frontend et serve_spa)
|
||||
_FRONTEND_INDEX = os.path.join(os.path.dirname(__file__), "..", "frontend", "dist", "index.html")
|
||||
|
||||
# Route pour servir le frontend
|
||||
@app.get("/")
|
||||
async def serve_frontend():
|
||||
"""Sert l'application React"""
|
||||
if os.path.exists(_FRONTEND_INDEX):
|
||||
return FileResponse(_FRONTEND_INDEX)
|
||||
return {"message": "Dashboard API - Frontend non construit. Voir /docs pour l'API."}
|
||||
|
||||
|
||||
# Servir les assets statiques
|
||||
_assets_path = os.path.join(os.path.dirname(__file__), "..", "frontend", "dist", "assets")
|
||||
if os.path.exists(_assets_path):
|
||||
try:
|
||||
app.mount("/assets", StaticFiles(directory=_assets_path), name="assets")
|
||||
except Exception as _e:
|
||||
logger.warning(f"Impossible de monter les assets statiques : {_e}")
|
||||
|
||||
|
||||
# Health check
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Endpoint de santé pour le health check Docker"""
|
||||
try:
|
||||
db.connect().ping()
|
||||
return {"status": "healthy", "clickhouse": "connected"}
|
||||
except Exception as e:
|
||||
return {"status": "unhealthy", "clickhouse": "disconnected", "error": str(e)}
|
||||
|
||||
|
||||
# Route catch-all pour le routing SPA (React Router) - DOIT ÊTRE EN DERNIER
|
||||
# Sauf pour /api/* qui doit être géré par les routers
|
||||
@app.get("/{full_path:path}")
|
||||
async def serve_spa(full_path: str):
|
||||
"""Redirige toutes les routes vers index.html pour le routing React"""
|
||||
# Ne pas intercepter les routes API
|
||||
if full_path.startswith("api/"):
|
||||
raise HTTPException(status_code=404, detail="API endpoint not found")
|
||||
|
||||
if os.path.exists(_FRONTEND_INDEX):
|
||||
return FileResponse(_FRONTEND_INDEX)
|
||||
return {"message": "Dashboard API - Frontend non construit"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(
|
||||
"main:app",
|
||||
host=settings.API_HOST,
|
||||
port=settings.API_PORT,
|
||||
reload=True
|
||||
)
|
||||
322
services/dashboard/backend/models.py
Normal file
322
services/dashboard/backend/models.py
Normal file
@ -0,0 +1,322 @@
|
||||
"""
|
||||
Modèles de données pour l'API
|
||||
"""
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ThreatLevel(str, Enum):
|
||||
CRITICAL = "CRITICAL"
|
||||
HIGH = "HIGH"
|
||||
MEDIUM = "MEDIUM"
|
||||
LOW = "LOW"
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# MÉTRIQUES
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class MetricsSummary(BaseModel):
|
||||
total_detections: int
|
||||
critical_count: int
|
||||
high_count: int
|
||||
medium_count: int
|
||||
low_count: int
|
||||
known_bots_count: int
|
||||
anomalies_count: int
|
||||
unique_ips: int
|
||||
|
||||
|
||||
class TimeSeriesPoint(BaseModel):
|
||||
hour: datetime
|
||||
total: int
|
||||
critical: int
|
||||
high: int
|
||||
medium: int
|
||||
low: int
|
||||
|
||||
|
||||
class MetricsResponse(BaseModel):
|
||||
summary: MetricsSummary
|
||||
timeseries: List[TimeSeriesPoint]
|
||||
threat_distribution: Dict[str, int]
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# DÉTECTIONS
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class Detection(BaseModel):
|
||||
detected_at: datetime
|
||||
src_ip: str
|
||||
ja4: str
|
||||
host: str
|
||||
bot_name: str
|
||||
anomaly_score: float
|
||||
threat_level: str
|
||||
model_name: str
|
||||
recurrence: int
|
||||
asn_number: str
|
||||
asn_org: str
|
||||
asn_detail: str
|
||||
asn_domain: str
|
||||
country_code: str
|
||||
asn_label: str
|
||||
hits: int
|
||||
hit_velocity: float
|
||||
fuzzing_index: float
|
||||
post_ratio: float
|
||||
reason: str
|
||||
client_headers: str = ""
|
||||
asn_score: Optional[float] = None
|
||||
asn_rep_label: str = ""
|
||||
first_seen: Optional[datetime] = None
|
||||
last_seen: Optional[datetime] = None
|
||||
unique_ja4s: Optional[List[str]] = None
|
||||
unique_hosts: Optional[List[str]] = None
|
||||
anubis_bot_name: str = ""
|
||||
anubis_bot_action: str = ""
|
||||
anubis_bot_category: str = ""
|
||||
|
||||
|
||||
class DetectionsListResponse(BaseModel):
|
||||
items: List[Detection]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# VARIABILITÉ
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class AttributeValue(BaseModel):
|
||||
value: str
|
||||
count: int
|
||||
percentage: float
|
||||
first_seen: Optional[datetime] = None
|
||||
last_seen: Optional[datetime] = None
|
||||
threat_levels: Optional[Dict[str, int]] = None
|
||||
unique_ips: Optional[int] = None
|
||||
primary_threat: Optional[str] = None
|
||||
|
||||
|
||||
class VariabilityAttributes(BaseModel):
|
||||
user_agents: List[AttributeValue] = Field(default_factory=list)
|
||||
ja4: List[AttributeValue] = Field(default_factory=list)
|
||||
countries: List[AttributeValue] = Field(default_factory=list)
|
||||
asns: List[AttributeValue] = Field(default_factory=list)
|
||||
hosts: List[AttributeValue] = Field(default_factory=list)
|
||||
threat_levels: List[AttributeValue] = Field(default_factory=list)
|
||||
model_names: List[AttributeValue] = Field(default_factory=list)
|
||||
|
||||
|
||||
class Insight(BaseModel):
|
||||
type: str # "warning", "info", "success"
|
||||
message: str
|
||||
|
||||
|
||||
class VariabilityResponse(BaseModel):
|
||||
type: str
|
||||
value: str
|
||||
total_detections: int
|
||||
unique_ips: int
|
||||
date_range: Dict[str, datetime]
|
||||
attributes: VariabilityAttributes
|
||||
insights: List[Insight] = Field(default_factory=list)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# ATTRIBUTS UNIQUES
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class AttributeListItem(BaseModel):
|
||||
value: str
|
||||
count: int
|
||||
|
||||
|
||||
class AttributeListResponse(BaseModel):
|
||||
type: str
|
||||
items: List[AttributeListItem]
|
||||
total: int
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# USER-AGENTS
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class UserAgentValue(BaseModel):
|
||||
value: str
|
||||
count: int
|
||||
percentage: float
|
||||
first_seen: Optional[datetime] = None
|
||||
last_seen: Optional[datetime] = None
|
||||
|
||||
|
||||
class UserAgentsResponse(BaseModel):
|
||||
type: str
|
||||
value: str
|
||||
user_agents: List[UserAgentValue]
|
||||
total: int
|
||||
showing: int
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# CLASSIFICATIONS (SOC / ML)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class ClassificationLabel(str, Enum):
|
||||
LEGITIMATE = "legitimate"
|
||||
SUSPICIOUS = "suspicious"
|
||||
MALICIOUS = "malicious"
|
||||
|
||||
|
||||
class ClassificationBase(BaseModel):
|
||||
ip: Optional[str] = None
|
||||
ja4: Optional[str] = None
|
||||
label: ClassificationLabel
|
||||
tags: List[str] = Field(default_factory=list)
|
||||
comment: str = ""
|
||||
confidence: float = Field(ge=0.0, le=1.0, default=0.5)
|
||||
analyst: str = "unknown"
|
||||
|
||||
|
||||
class ClassificationCreate(ClassificationBase):
|
||||
"""Données pour créer une classification"""
|
||||
features: dict = Field(default_factory=dict)
|
||||
|
||||
|
||||
class Classification(ClassificationBase):
|
||||
"""Classification complète avec métadonnées"""
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
created_at: datetime
|
||||
features: dict = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ClassificationsListResponse(BaseModel):
|
||||
items: List[Classification]
|
||||
total: int
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# ANALYSIS (CORRELATION)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class SubnetAnalysis(BaseModel):
|
||||
"""Analyse subnet/ASN"""
|
||||
ip: str
|
||||
subnet: str
|
||||
ips_in_subnet: List[str]
|
||||
total_in_subnet: int
|
||||
asn_number: str
|
||||
asn_org: str
|
||||
total_in_asn: int
|
||||
alert: bool # True si > 10 IPs du subnet
|
||||
|
||||
|
||||
class CountryData(BaseModel):
|
||||
"""Données pour un pays"""
|
||||
code: str
|
||||
name: str
|
||||
count: int
|
||||
percentage: float
|
||||
|
||||
|
||||
class CountryAnalysis(BaseModel):
|
||||
"""Analyse des pays"""
|
||||
top_countries: List[CountryData]
|
||||
baseline: dict # Pays habituels
|
||||
alert_country: Optional[str] = None # Pays surreprésenté
|
||||
|
||||
|
||||
class JA4SubnetData(BaseModel):
|
||||
"""Subnet pour un JA4"""
|
||||
subnet: str
|
||||
count: int
|
||||
|
||||
|
||||
class JA4Analysis(BaseModel):
|
||||
"""Analyse JA4"""
|
||||
ja4: str
|
||||
shared_ips_count: int
|
||||
top_subnets: List[JA4SubnetData]
|
||||
other_ja4_for_ip: List[str]
|
||||
|
||||
|
||||
class UserAgentData(BaseModel):
|
||||
"""Données pour un User-Agent"""
|
||||
value: str
|
||||
count: int
|
||||
percentage: float
|
||||
classification: str # "normal", "bot", "script"
|
||||
|
||||
|
||||
class UserAgentAnalysis(BaseModel):
|
||||
"""Analyse User-Agents"""
|
||||
ip_user_agents: List[UserAgentData]
|
||||
ja4_user_agents: List[UserAgentData]
|
||||
bot_percentage: float
|
||||
alert: bool # True si > 20% bots/scripts
|
||||
|
||||
|
||||
class CorrelationIndicators(BaseModel):
|
||||
"""Indicateurs de corrélation"""
|
||||
subnet_ips_count: int
|
||||
asn_ips_count: int
|
||||
country_percentage: float
|
||||
ja4_shared_ips: int
|
||||
user_agents_count: int
|
||||
bot_ua_percentage: float
|
||||
|
||||
|
||||
class ClassificationRecommendation(BaseModel):
|
||||
"""Recommandation de classification"""
|
||||
label: ClassificationLabel
|
||||
confidence: float
|
||||
indicators: CorrelationIndicators
|
||||
suggested_tags: List[str]
|
||||
reason: str
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# ENTITIES (UNIFIED VIEW)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class EntityStats(BaseModel):
|
||||
"""Statistiques pour une entité"""
|
||||
entity_type: str
|
||||
entity_value: str
|
||||
total_requests: int
|
||||
unique_ips: int
|
||||
first_seen: datetime
|
||||
last_seen: datetime
|
||||
|
||||
|
||||
class EntityRelatedAttributes(BaseModel):
|
||||
"""Attributs associés à une entité"""
|
||||
ips: List[str] = Field(default_factory=list)
|
||||
ja4s: List[str] = Field(default_factory=list)
|
||||
hosts: List[str] = Field(default_factory=list)
|
||||
asns: List[str] = Field(default_factory=list)
|
||||
countries: List[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class EntityAttributeValue(BaseModel):
|
||||
"""Valeur d'attribut avec count et percentage (pour les entities)"""
|
||||
value: str
|
||||
count: int
|
||||
percentage: float
|
||||
|
||||
|
||||
class EntityInvestigation(BaseModel):
|
||||
"""Investigation complète pour une entité"""
|
||||
stats: EntityStats
|
||||
related: EntityRelatedAttributes
|
||||
user_agents: List[EntityAttributeValue] = Field(default_factory=list)
|
||||
client_headers: List[EntityAttributeValue] = Field(default_factory=list)
|
||||
paths: List[EntityAttributeValue] = Field(default_factory=list)
|
||||
query_params: List[EntityAttributeValue] = Field(default_factory=list)
|
||||
1
services/dashboard/backend/routes/__init__.py
Normal file
1
services/dashboard/backend/routes/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Routes package
|
||||
686
services/dashboard/backend/routes/analysis.py
Normal file
686
services/dashboard/backend/routes/analysis.py
Normal file
@ -0,0 +1,686 @@
|
||||
"""
|
||||
Endpoints pour l'analyse de corrélations et la classification SOC
|
||||
"""
|
||||
from collections import defaultdict
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional, List
|
||||
import ipaddress
|
||||
import json
|
||||
|
||||
from ..database import db
|
||||
from ..models import (
|
||||
SubnetAnalysis, CountryAnalysis, CountryData, JA4Analysis, JA4SubnetData,
|
||||
UserAgentAnalysis, UserAgentData, CorrelationIndicators,
|
||||
ClassificationRecommendation, ClassificationLabel,
|
||||
ClassificationCreate, Classification, ClassificationsListResponse
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/analysis", tags=["analysis"])
|
||||
|
||||
# Mapping code ISO → nom lisible (utilisé par analyze_ip_country et analyze_country)
|
||||
_COUNTRY_NAMES: dict[str, str] = {
|
||||
"CN": "China", "US": "United States", "DE": "Germany",
|
||||
"FR": "France", "RU": "Russia", "GB": "United Kingdom",
|
||||
"NL": "Netherlands", "IN": "India", "BR": "Brazil",
|
||||
"JP": "Japan", "KR": "South Korea", "IT": "Italy",
|
||||
"ES": "Spain", "CA": "Canada", "AU": "Australia"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE SUBNET / ASN
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/subnet", response_model=SubnetAnalysis)
|
||||
async def analyze_subnet(ip: str):
|
||||
"""
|
||||
Analyse les IPs du même subnet et ASN
|
||||
"""
|
||||
try:
|
||||
# Calculer le subnet /24
|
||||
ip_obj = ipaddress.ip_address(ip)
|
||||
subnet = ipaddress.ip_network(f"{ip}/24", strict=False)
|
||||
subnet_str = str(subnet)
|
||||
|
||||
# Récupérer les infos ASN pour cette IP
|
||||
asn_query = """
|
||||
SELECT asn_number, asn_org
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
asn_result = db.query(asn_query, {"ip": ip})
|
||||
|
||||
if not asn_result.result_rows:
|
||||
# Fallback: utiliser données par défaut
|
||||
asn_number = "0"
|
||||
asn_org = "Unknown"
|
||||
else:
|
||||
asn_number = str(asn_result.result_rows[0][0] or "0")
|
||||
asn_org = asn_result.result_rows[0][1] or "Unknown"
|
||||
|
||||
# IPs du même subnet /24
|
||||
subnet_ips_query = """
|
||||
SELECT DISTINCT src_ip
|
||||
FROM ml_detected_anomalies
|
||||
WHERE toIPv4(src_ip) >= toIPv4(%(subnet_start)s)
|
||||
AND toIPv4(src_ip) <= toIPv4(%(subnet_end)s)
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
ORDER BY src_ip
|
||||
"""
|
||||
|
||||
subnet_result = db.query(subnet_ips_query, {
|
||||
"subnet_start": str(subnet.network_address),
|
||||
"subnet_end": str(subnet.broadcast_address)
|
||||
})
|
||||
|
||||
subnet_ips = [str(row[0]) for row in subnet_result.result_rows]
|
||||
|
||||
# Total IPs du même ASN
|
||||
if asn_number != "0":
|
||||
asn_total_query = """
|
||||
SELECT uniq(src_ip)
|
||||
FROM ml_detected_anomalies
|
||||
WHERE asn_number = %(asn_number)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
asn_total_result = db.query(asn_total_query, {"asn_number": asn_number})
|
||||
asn_total = asn_total_result.result_rows[0][0] if asn_total_result.result_rows else 0
|
||||
else:
|
||||
asn_total = 0
|
||||
|
||||
return SubnetAnalysis(
|
||||
ip=ip,
|
||||
subnet=subnet_str,
|
||||
ips_in_subnet=subnet_ips,
|
||||
total_in_subnet=len(subnet_ips),
|
||||
asn_number=asn_number,
|
||||
asn_org=asn_org,
|
||||
total_in_asn=asn_total,
|
||||
alert=len(subnet_ips) > 10
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{ip}/country", response_model=dict)
|
||||
async def analyze_ip_country(ip: str):
|
||||
"""
|
||||
Analyse le pays d'une IP spécifique et la répartition des autres pays du même ASN
|
||||
"""
|
||||
try:
|
||||
# Pays de l'IP
|
||||
ip_country_query = """
|
||||
SELECT country_code, asn_number
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
ip_result = db.query(ip_country_query, {"ip": ip})
|
||||
|
||||
if not ip_result.result_rows:
|
||||
return {"ip_country": None, "asn_countries": []}
|
||||
|
||||
ip_country_code = ip_result.result_rows[0][0]
|
||||
asn_number = ip_result.result_rows[0][1]
|
||||
|
||||
# Répartition des autres pays du même ASN
|
||||
asn_countries_query = """
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE asn_number = %(asn_number)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
asn_result = db.query(asn_countries_query, {"asn_number": asn_number})
|
||||
|
||||
total = sum(row[1] for row in asn_result.result_rows)
|
||||
|
||||
asn_countries = [
|
||||
{
|
||||
"code": row[0],
|
||||
"name": _COUNTRY_NAMES.get(row[0], row[0]),
|
||||
"count": row[1],
|
||||
"percentage": round((row[1] / total * 100), 2) if total > 0 else 0.0
|
||||
}
|
||||
for row in asn_result.result_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"ip_country": {
|
||||
"code": ip_country_code,
|
||||
"name": _COUNTRY_NAMES.get(ip_country_code, ip_country_code)
|
||||
},
|
||||
"asn_countries": asn_countries
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE PAYS
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/country", response_model=CountryAnalysis)
|
||||
async def analyze_country(days: int = Query(1, ge=1, le=30)):
|
||||
"""
|
||||
Analyse la distribution des pays
|
||||
"""
|
||||
try:
|
||||
# Top pays
|
||||
top_query = """
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(days)s DAY
|
||||
AND country_code != '' AND country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
top_result = db.query(top_query, {"days": days})
|
||||
|
||||
# Calculer le total pour le pourcentage
|
||||
total = sum(row[1] for row in top_result.result_rows)
|
||||
|
||||
top_countries = [
|
||||
CountryData(
|
||||
code=row[0],
|
||||
name=_COUNTRY_NAMES.get(row[0], row[0]),
|
||||
count=row[1],
|
||||
percentage=round((row[1] / total * 100), 2) if total > 0 else 0.0
|
||||
)
|
||||
for row in top_result.result_rows
|
||||
]
|
||||
|
||||
# Baseline (7 derniers jours)
|
||||
baseline_query = """
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 7 DAY
|
||||
AND country_code != '' AND country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
baseline_result = db.query(baseline_query)
|
||||
|
||||
baseline_total = sum(row[1] for row in baseline_result.result_rows)
|
||||
baseline = {
|
||||
row[0]: round((row[1] / baseline_total * 100), 2) if baseline_total > 0 else 0.0
|
||||
for row in baseline_result.result_rows
|
||||
}
|
||||
|
||||
# Détecter pays surreprésenté
|
||||
alert_country = None
|
||||
for country in top_countries:
|
||||
baseline_pct = baseline.get(country.code, 0)
|
||||
if baseline_pct > 0 and country.percentage > baseline_pct * 2 and country.percentage > 30:
|
||||
alert_country = country.code
|
||||
break
|
||||
|
||||
return CountryAnalysis(
|
||||
top_countries=top_countries,
|
||||
baseline=baseline,
|
||||
alert_country=alert_country
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/ja4", response_model=JA4Analysis)
|
||||
async def analyze_ja4(ip: str):
|
||||
"""
|
||||
Analyse le JA4 fingerprint
|
||||
"""
|
||||
try:
|
||||
# JA4 de cette IP
|
||||
ja4_query = """
|
||||
SELECT ja4
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
ja4_result = db.query(ja4_query, {"ip": ip})
|
||||
|
||||
if not ja4_result.result_rows:
|
||||
return JA4Analysis(
|
||||
ja4="",
|
||||
shared_ips_count=0,
|
||||
top_subnets=[],
|
||||
other_ja4_for_ip=[]
|
||||
)
|
||||
|
||||
ja4 = ja4_result.result_rows[0][0]
|
||||
|
||||
# IPs avec le même JA4
|
||||
shared_query = """
|
||||
SELECT uniq(src_ip)
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ja4 = %(ja4)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
shared_result = db.query(shared_query, {"ja4": ja4})
|
||||
shared_count = shared_result.result_rows[0][0] if shared_result.result_rows else 0
|
||||
|
||||
# Top subnets pour ce JA4 - Simplifié
|
||||
subnets_query = """
|
||||
SELECT
|
||||
src_ip,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ja4 = %(ja4)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
subnets_result = db.query(subnets_query, {"ja4": ja4})
|
||||
|
||||
# Grouper par subnet /24
|
||||
subnet_counts = defaultdict(int)
|
||||
for row in subnets_result.result_rows:
|
||||
ip_addr = str(row[0])
|
||||
parts = ip_addr.split('.')
|
||||
if len(parts) == 4:
|
||||
subnet = f"{parts[0]}.{parts[1]}.{parts[2]}.0/24"
|
||||
subnet_counts[subnet] += row[1]
|
||||
|
||||
top_subnets = [
|
||||
JA4SubnetData(subnet=subnet, count=count)
|
||||
for subnet, count in sorted(subnet_counts.items(), key=lambda x: x[1], reverse=True)[:10]
|
||||
]
|
||||
|
||||
# Autres JA4 pour cette IP
|
||||
other_ja4_query = """
|
||||
SELECT DISTINCT ja4
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
AND ja4 != %(current_ja4)s
|
||||
"""
|
||||
|
||||
other_result = db.query(other_ja4_query, {"ip": ip, "current_ja4": ja4})
|
||||
other_ja4 = [row[0] for row in other_result.result_rows]
|
||||
|
||||
return JA4Analysis(
|
||||
ja4=ja4,
|
||||
shared_ips_count=shared_count,
|
||||
top_subnets=top_subnets,
|
||||
other_ja4_for_ip=other_ja4
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ANALYSE USER-AGENTS
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/user-agents", response_model=UserAgentAnalysis)
|
||||
async def analyze_user_agents(ip: str):
|
||||
"""
|
||||
Analyse les User-Agents
|
||||
"""
|
||||
try:
|
||||
# User-Agents pour cette IP (depuis http_logs)
|
||||
ip_ua_query = """
|
||||
SELECT
|
||||
header_user_agent AS ua,
|
||||
count() AS count
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE src_ip = %(ip)s
|
||||
AND header_user_agent != '' AND header_user_agent IS NOT NULL
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY ua
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
# Total réel des requêtes (pour les pourcentages corrects)
|
||||
ip_total_query = """
|
||||
SELECT count()
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE src_ip = %(ip)s
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
ip_ua_result = db.query(ip_ua_query, {"ip": ip})
|
||||
ip_total_result = db.query(ip_total_query, {"ip": ip})
|
||||
|
||||
# Classification des UAs
|
||||
def classify_ua(ua: str) -> str:
|
||||
ua_lower = ua.lower()
|
||||
if any(bot in ua_lower for bot in ['bot', 'crawler', 'spider', 'curl', 'wget', 'python', 'requests', 'scrapy']):
|
||||
return 'bot'
|
||||
if any(script in ua_lower for script in ['python', 'java', 'php', 'ruby', 'perl', 'node']):
|
||||
return 'script'
|
||||
if not ua or ua.strip() == '':
|
||||
return 'script'
|
||||
return 'normal'
|
||||
|
||||
# Total réel de toutes les requêtes (pour des pourcentages corrects même avec LIMIT)
|
||||
total_count = ip_total_result.result_rows[0][0] if ip_total_result.result_rows else 0
|
||||
if total_count == 0:
|
||||
total_count = sum(row[1] for row in ip_ua_result.result_rows)
|
||||
|
||||
ip_user_agents = [
|
||||
UserAgentData(
|
||||
value=row[0],
|
||||
count=row[1],
|
||||
percentage=round((row[1] / total_count * 100), 2) if total_count > 0 else 0.0,
|
||||
classification=classify_ua(row[0])
|
||||
)
|
||||
for row in ip_ua_result.result_rows
|
||||
]
|
||||
|
||||
# Pour les UAs du JA4, on retourne les mêmes pour l'instant
|
||||
ja4_user_agents = ip_user_agents
|
||||
|
||||
# Pourcentage de bots
|
||||
bot_count = sum(ua.count for ua in ip_user_agents if ua.classification in ['bot', 'script'])
|
||||
bot_percentage = (bot_count / total_count * 100) if total_count > 0 else 0
|
||||
|
||||
return UserAgentAnalysis(
|
||||
ip_user_agents=ip_user_agents,
|
||||
ja4_user_agents=ja4_user_agents,
|
||||
bot_percentage=bot_percentage,
|
||||
alert=bot_percentage > 20
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# RECOMMANDATION DE CLASSIFICATION
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{ip}/recommendation", response_model=ClassificationRecommendation)
|
||||
async def get_classification_recommendation(ip: str):
|
||||
"""
|
||||
Génère une recommandation de classification basée sur les corrélations
|
||||
"""
|
||||
try:
|
||||
# Récupérer les analyses
|
||||
try:
|
||||
subnet_analysis = await analyze_subnet(ip)
|
||||
except Exception:
|
||||
subnet_analysis = None
|
||||
|
||||
try:
|
||||
country_analysis = await analyze_country(1)
|
||||
except Exception:
|
||||
country_analysis = None
|
||||
|
||||
try:
|
||||
ja4_analysis = await analyze_ja4(ip)
|
||||
except Exception:
|
||||
ja4_analysis = None
|
||||
|
||||
try:
|
||||
ua_analysis = await analyze_user_agents(ip)
|
||||
except Exception:
|
||||
ua_analysis = None
|
||||
|
||||
# Indicateurs par défaut
|
||||
indicators = CorrelationIndicators(
|
||||
subnet_ips_count=subnet_analysis.total_in_subnet if subnet_analysis else 0,
|
||||
asn_ips_count=subnet_analysis.total_in_asn if subnet_analysis else 0,
|
||||
country_percentage=0.0,
|
||||
ja4_shared_ips=ja4_analysis.shared_ips_count if ja4_analysis else 0,
|
||||
user_agents_count=len(ua_analysis.ja4_user_agents) if ua_analysis else 0,
|
||||
bot_ua_percentage=ua_analysis.bot_percentage if ua_analysis else 0.0
|
||||
)
|
||||
|
||||
# Score de confiance
|
||||
score = 0.0
|
||||
reasons = []
|
||||
tags = []
|
||||
|
||||
# Subnet > 10 IPs
|
||||
if subnet_analysis and subnet_analysis.total_in_subnet > 10:
|
||||
score += 0.25
|
||||
reasons.append(f"{subnet_analysis.total_in_subnet} IPs du même subnet")
|
||||
tags.append("distributed")
|
||||
|
||||
# JA4 partagé > 50 IPs
|
||||
if ja4_analysis and ja4_analysis.shared_ips_count > 50:
|
||||
score += 0.25
|
||||
reasons.append(f"{ja4_analysis.shared_ips_count} IPs avec même JA4")
|
||||
tags.append("ja4-rotation")
|
||||
|
||||
# Bot UA > 20%
|
||||
if ua_analysis and ua_analysis.bot_percentage > 20:
|
||||
score += 0.25
|
||||
reasons.append(f"{ua_analysis.bot_percentage:.0f}% UAs bots/scripts")
|
||||
tags.append("bot-ua")
|
||||
|
||||
# Pays surreprésenté
|
||||
if country_analysis and country_analysis.alert_country:
|
||||
score += 0.15
|
||||
reasons.append(f"Pays {country_analysis.alert_country} surreprésenté")
|
||||
tags.append(f"country-{country_analysis.alert_country.lower()}")
|
||||
|
||||
# ASN hosting
|
||||
if subnet_analysis:
|
||||
hosting_keywords = ["ovh", "amazon", "aws", "google", "azure", "digitalocean", "linode", "vultr", "china169", "chinamobile"]
|
||||
if any(kw in (subnet_analysis.asn_org or "").lower() for kw in hosting_keywords):
|
||||
score += 0.10
|
||||
tags.append("hosting-asn")
|
||||
|
||||
# Déterminer label
|
||||
if score >= 0.7:
|
||||
label = ClassificationLabel.MALICIOUS
|
||||
tags.append("campaign")
|
||||
elif score >= 0.4:
|
||||
label = ClassificationLabel.SUSPICIOUS
|
||||
else:
|
||||
label = ClassificationLabel.LEGITIMATE
|
||||
|
||||
reason = " | ".join(reasons) if reasons else "Aucun indicateur fort"
|
||||
|
||||
return ClassificationRecommendation(
|
||||
label=label,
|
||||
confidence=min(score, 1.0),
|
||||
indicators=indicators,
|
||||
suggested_tags=tags,
|
||||
reason=reason
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLASSIFICATIONS CRUD
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/classifications", response_model=Classification)
|
||||
async def create_classification(data: ClassificationCreate):
|
||||
"""
|
||||
Crée une classification pour une IP ou un JA4
|
||||
"""
|
||||
try:
|
||||
# Validation: soit ip, soit ja4 doit être fourni
|
||||
if not data.ip and not data.ja4:
|
||||
raise HTTPException(status_code=400, detail="IP ou JA4 requis")
|
||||
|
||||
query = """
|
||||
INSERT INTO mabase_prod.classifications
|
||||
(ip, ja4, label, tags, comment, confidence, features, analyst, created_at)
|
||||
VALUES
|
||||
(%(ip)s, %(ja4)s, %(label)s, %(tags)s, %(comment)s, %(confidence)s, %(features)s, %(analyst)s, now())
|
||||
"""
|
||||
|
||||
db.query(query, {
|
||||
"ip": data.ip or "",
|
||||
"ja4": data.ja4 or "",
|
||||
"label": data.label.value,
|
||||
"tags": data.tags,
|
||||
"comment": data.comment,
|
||||
"confidence": data.confidence,
|
||||
"features": json.dumps(data.features),
|
||||
"analyst": data.analyst
|
||||
})
|
||||
|
||||
# Récupérer la classification créée
|
||||
where_clause = "ip = %(entity)s" if data.ip else "ja4 = %(entity)s"
|
||||
select_query = f"""
|
||||
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
|
||||
FROM mabase_prod.classifications
|
||||
WHERE {where_clause}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = db.query(select_query, {"entity": data.ip or data.ja4})
|
||||
|
||||
if not result.result_rows:
|
||||
raise HTTPException(status_code=404, detail="Classification non trouvée")
|
||||
|
||||
row = result.result_rows[0]
|
||||
return Classification(
|
||||
ip=row[0] or None,
|
||||
ja4=row[1] or None,
|
||||
label=ClassificationLabel(row[2]),
|
||||
tags=row[3],
|
||||
comment=row[4],
|
||||
confidence=row[5],
|
||||
features=json.loads(row[6]) if row[6] else {},
|
||||
analyst=row[7],
|
||||
created_at=row[8]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/classifications", response_model=ClassificationsListResponse)
|
||||
async def list_classifications(
|
||||
ip: Optional[str] = Query(None, description="Filtrer par IP"),
|
||||
ja4: Optional[str] = Query(None, description="Filtrer par JA4"),
|
||||
label: Optional[str] = Query(None, description="Filtrer par label"),
|
||||
limit: int = Query(100, ge=1, le=1000)
|
||||
):
|
||||
"""
|
||||
Liste les classifications
|
||||
"""
|
||||
try:
|
||||
where_clauses = ["1=1"]
|
||||
params = {"limit": limit}
|
||||
|
||||
if ip:
|
||||
where_clauses.append("ip = %(ip)s")
|
||||
params["ip"] = ip
|
||||
|
||||
if ja4:
|
||||
where_clauses.append("ja4 = %(ja4)s")
|
||||
params["ja4"] = ja4
|
||||
|
||||
if label:
|
||||
where_clauses.append("label = %(label)s")
|
||||
params["label"] = label
|
||||
|
||||
where_clause = " AND ".join(where_clauses)
|
||||
|
||||
query = f"""
|
||||
SELECT ip, ja4, label, tags, comment, confidence, features, analyst, created_at
|
||||
FROM mabase_prod.classifications
|
||||
WHERE {where_clause}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, params)
|
||||
|
||||
classifications = [
|
||||
Classification(
|
||||
ip=row[0] or None,
|
||||
ja4=row[1] or None,
|
||||
label=ClassificationLabel(row[2]),
|
||||
tags=row[3],
|
||||
comment=row[4],
|
||||
confidence=row[5],
|
||||
features=json.loads(row[6]) if row[6] else {},
|
||||
analyst=row[7],
|
||||
created_at=row[8]
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
# Total
|
||||
count_query = f"""
|
||||
SELECT count()
|
||||
FROM mabase_prod.classifications
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, params)
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return ClassificationsListResponse(
|
||||
items=classifications,
|
||||
total=total
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/classifications/stats")
|
||||
async def get_classification_stats():
|
||||
"""
|
||||
Statistiques des classifications
|
||||
"""
|
||||
try:
|
||||
stats_query = """
|
||||
SELECT
|
||||
label,
|
||||
count() AS total,
|
||||
uniq(ip) AS unique_ips,
|
||||
avg(confidence) AS avg_confidence
|
||||
FROM mabase_prod.classifications
|
||||
GROUP BY label
|
||||
ORDER BY total DESC
|
||||
"""
|
||||
|
||||
result = db.query(stats_query)
|
||||
|
||||
stats = [
|
||||
{
|
||||
"label": row[0],
|
||||
"total": row[1],
|
||||
"unique_ips": row[2],
|
||||
"avg_confidence": float(row[3]) if row[3] else 0.0
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return {"stats": stats}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
92
services/dashboard/backend/routes/attributes.py
Normal file
92
services/dashboard/backend/routes/attributes.py
Normal file
@ -0,0 +1,92 @@
|
||||
"""
|
||||
Endpoints pour la liste des attributs uniques
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from ..database import db
|
||||
from ..models import AttributeListResponse, AttributeListItem
|
||||
|
||||
router = APIRouter(prefix="/api/attributes", tags=["attributes"])
|
||||
|
||||
|
||||
@router.get("/{attr_type}", response_model=AttributeListResponse)
|
||||
async def get_attributes(
|
||||
attr_type: str,
|
||||
limit: int = Query(100, ge=1, le=1000, description="Nombre maximum de résultats")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des valeurs uniques pour un type d'attribut
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
"threat_level": "threat_level",
|
||||
"model_name": "model_name",
|
||||
"asn_org": "asn_org"
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
# Requête de base
|
||||
base_query = f"""
|
||||
SELECT
|
||||
{column} AS value,
|
||||
count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
# Ajout du filtre pour exclure les valeurs vides/nulles
|
||||
# Gestion spéciale pour les types IPv6/IPv4 qui ne peuvent pas être comparés à ''
|
||||
if attr_type == "ip":
|
||||
# Pour les adresses IP, on convertit en string et on filtre
|
||||
query = f"""
|
||||
SELECT value, count FROM (
|
||||
SELECT toString({column}) AS value, count() AS count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY {column}
|
||||
)
|
||||
WHERE value != '' AND value IS NOT NULL
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
else:
|
||||
query = f"""
|
||||
{base_query}
|
||||
AND {column} != '' AND {column} IS NOT NULL
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {"limit": limit})
|
||||
|
||||
items = [
|
||||
AttributeListItem(
|
||||
value=str(row[0]),
|
||||
count=row[1]
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return AttributeListResponse(
|
||||
type=attr_type,
|
||||
items=items,
|
||||
total=len(items)
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
238
services/dashboard/backend/routes/audit.py
Normal file
238
services/dashboard/backend/routes/audit.py
Normal file
@ -0,0 +1,238 @@
|
||||
"""
|
||||
Routes pour l'audit et les logs d'activité
|
||||
"""
|
||||
import logging
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/audit", tags=["audit"])
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.post("/logs")
|
||||
async def create_audit_log(
|
||||
request: Request,
|
||||
action: str,
|
||||
entity_type: Optional[str] = None,
|
||||
entity_id: Optional[str] = None,
|
||||
entity_count: Optional[int] = None,
|
||||
details: Optional[dict] = None,
|
||||
user: Optional[str] = "soc_user"
|
||||
):
|
||||
"""
|
||||
Crée un log d'audit pour une action utilisateur
|
||||
"""
|
||||
try:
|
||||
# Récupérer l'IP du client
|
||||
client_ip = request.client.host if request.client else "unknown"
|
||||
|
||||
# Insérer dans ClickHouse
|
||||
insert_query = """
|
||||
INSERT INTO mabase_prod.audit_logs
|
||||
(timestamp, user_name, action, entity_type, entity_id, entity_count, details, client_ip)
|
||||
VALUES
|
||||
(%(timestamp)s, %(user)s, %(action)s, %(entity_type)s, %(entity_id)s, %(entity_count)s, %(details)s, %(client_ip)s)
|
||||
"""
|
||||
|
||||
params = {
|
||||
'timestamp': datetime.now(),
|
||||
'user': user,
|
||||
'action': action,
|
||||
'entity_type': entity_type,
|
||||
'entity_id': entity_id,
|
||||
'entity_count': entity_count,
|
||||
'details': str(details) if details else '',
|
||||
'client_ip': client_ip
|
||||
}
|
||||
|
||||
# Note: This requires the audit_logs table to exist
|
||||
# See deploy_audit_logs_table.sql
|
||||
try:
|
||||
db.query(insert_query, params)
|
||||
except Exception as e:
|
||||
# La table peut ne pas encore exister — on logue mais on ne bloque pas l'appelant
|
||||
logger.warning(f"Could not insert audit log: {e}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Audit log created",
|
||||
"action": action,
|
||||
"timestamp": params['timestamp'].isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/logs")
|
||||
async def get_audit_logs(
|
||||
hours: int = Query(24, ge=1, le=720, description="Fenêtre temporelle en heures"),
|
||||
user: Optional[str] = Query(None, description="Filtrer par utilisateur"),
|
||||
action: Optional[str] = Query(None, description="Filtrer par action"),
|
||||
entity_type: Optional[str] = Query(None, description="Filtrer par type d'entité"),
|
||||
limit: int = Query(100, ge=1, le=1000, description="Nombre maximum de résultats")
|
||||
):
|
||||
"""
|
||||
Récupère les logs d'audit avec filtres
|
||||
"""
|
||||
try:
|
||||
where_clauses = ["timestamp >= now() - INTERVAL %(hours)s HOUR"]
|
||||
params = {"hours": hours, "limit": limit}
|
||||
|
||||
if user:
|
||||
where_clauses.append("user_name = %(user)s")
|
||||
params["user"] = user
|
||||
|
||||
if action:
|
||||
where_clauses.append("action = %(action)s")
|
||||
params["action"] = action
|
||||
|
||||
if entity_type:
|
||||
where_clauses.append("entity_type = %(entity_type)s")
|
||||
params["entity_type"] = entity_type
|
||||
|
||||
where_clause = " AND ".join(where_clauses)
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
timestamp,
|
||||
user_name,
|
||||
action,
|
||||
entity_type,
|
||||
entity_id,
|
||||
entity_count,
|
||||
details,
|
||||
client_ip
|
||||
FROM mabase_prod.audit_logs
|
||||
WHERE {where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, params)
|
||||
|
||||
logs = []
|
||||
for row in result.result_rows:
|
||||
logs.append({
|
||||
"timestamp": row[0].isoformat() if row[0] else "",
|
||||
"user_name": row[1] or "",
|
||||
"action": row[2] or "",
|
||||
"entity_type": row[3] or "",
|
||||
"entity_id": row[4] or "",
|
||||
"entity_count": row[5] or 0,
|
||||
"details": row[6] or "",
|
||||
"client_ip": row[7] or ""
|
||||
})
|
||||
|
||||
return {
|
||||
"items": logs,
|
||||
"total": len(logs),
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# If table doesn't exist, return empty result
|
||||
if "Table" in str(e) and "doesn't exist" in str(e):
|
||||
return {
|
||||
"items": [],
|
||||
"total": 0,
|
||||
"period_hours": hours,
|
||||
"warning": "Audit logs table not created yet"
|
||||
}
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_audit_stats(
|
||||
hours: int = Query(24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Statistiques d'audit
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
action,
|
||||
count() AS count,
|
||||
uniq(user_name) AS unique_users,
|
||||
sum(entity_count) AS total_entities
|
||||
FROM mabase_prod.audit_logs
|
||||
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY action
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours})
|
||||
|
||||
stats = []
|
||||
for row in result.result_rows:
|
||||
stats.append({
|
||||
"action": row[0] or "",
|
||||
"count": row[1] or 0,
|
||||
"unique_users": row[2] or 0,
|
||||
"total_entities": row[3] or 0
|
||||
})
|
||||
|
||||
return {
|
||||
"items": stats,
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if "Table" in str(e) and "doesn't exist" in str(e):
|
||||
return {
|
||||
"items": [],
|
||||
"period_hours": hours,
|
||||
"warning": "Audit logs table not created yet"
|
||||
}
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/users/activity")
|
||||
async def get_user_activity(
|
||||
hours: int = Query(24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Activité par utilisateur
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
user_name,
|
||||
count() AS actions,
|
||||
uniq(action) AS action_types,
|
||||
min(timestamp) AS first_action,
|
||||
max(timestamp) AS last_action
|
||||
FROM mabase_prod.audit_logs
|
||||
WHERE timestamp >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY user_name
|
||||
ORDER BY actions DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours})
|
||||
|
||||
users = []
|
||||
for row in result.result_rows:
|
||||
users.append({
|
||||
"user_name": row[0] or "",
|
||||
"actions": row[1] or 0,
|
||||
"action_types": row[2] or 0,
|
||||
"first_action": row[3].isoformat() if row[3] else "",
|
||||
"last_action": row[4].isoformat() if row[4] else ""
|
||||
})
|
||||
|
||||
return {
|
||||
"items": users,
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if "Table" in str(e) and "doesn't exist" in str(e):
|
||||
return {
|
||||
"items": [],
|
||||
"period_hours": hours,
|
||||
"warning": "Audit logs table not created yet"
|
||||
}
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
105
services/dashboard/backend/routes/botnets.py
Normal file
105
services/dashboard/backend/routes/botnets.py
Normal file
@ -0,0 +1,105 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des botnets via la propagation des fingerprints JA4
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/botnets", tags=["botnets"])
|
||||
|
||||
|
||||
def _botnet_class(unique_countries: int) -> str:
|
||||
if unique_countries > 100:
|
||||
return "global_botnet"
|
||||
if unique_countries > 20:
|
||||
return "regional_botnet"
|
||||
return "concentrated"
|
||||
|
||||
|
||||
@router.get("/ja4-spread")
|
||||
async def get_ja4_spread():
|
||||
"""Propagation des JA4 fingerprints à travers les pays et les IPs."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
ja4,
|
||||
unique_ips,
|
||||
unique_countries,
|
||||
targeted_hosts
|
||||
FROM mabase_prod.view_host_ja4_anomalies
|
||||
ORDER BY unique_countries DESC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
unique_countries = int(row[2])
|
||||
targeted_hosts = int(row[3])
|
||||
dist_score = round(
|
||||
unique_countries / max(unique_ips ** 0.5, 0.001), 2
|
||||
)
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"unique_countries": unique_countries,
|
||||
"targeted_hosts": targeted_hosts,
|
||||
"distribution_score":dist_score,
|
||||
"botnet_class": _botnet_class(unique_countries),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ja4/{ja4}/countries")
|
||||
async def get_ja4_countries(ja4: str, limit: int = Query(30, ge=1, le=200)):
|
||||
"""Top pays pour un JA4 donné depuis agg_host_ip_ja4_1h."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
src_country_code AS country_code,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE ja4 = %(ja4)s
|
||||
GROUP BY src_country_code
|
||||
ORDER BY unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"ja4": ja4, "limit": limit})
|
||||
items = [
|
||||
{
|
||||
"country_code": str(row[0]),
|
||||
"unique_ips": int(row[1]),
|
||||
"hits": int(row[2]),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/summary")
|
||||
async def get_botnets_summary():
|
||||
"""Statistiques globales sur les botnets détectés."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
countIf(unique_countries > 100) AS total_global_botnets,
|
||||
sumIf(unique_ips, unique_countries > 50) AS total_ips_in_botnets,
|
||||
argMax(ja4, unique_countries) AS most_spread_ja4,
|
||||
argMax(ja4, unique_ips) AS most_ips_ja4
|
||||
FROM mabase_prod.view_host_ja4_anomalies
|
||||
"""
|
||||
result = db.query(sql)
|
||||
row = result.result_rows[0]
|
||||
return {
|
||||
"total_global_botnets": int(row[0]),
|
||||
"total_ips_in_botnets": int(row[1]),
|
||||
"most_spread_ja4": str(row[2]),
|
||||
"most_ips_ja4": str(row[3]),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
141
services/dashboard/backend/routes/bruteforce.py
Normal file
141
services/dashboard/backend/routes/bruteforce.py
Normal file
@ -0,0 +1,141 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des attaques par force brute sur les formulaires
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/bruteforce", tags=["bruteforce"])
|
||||
|
||||
|
||||
@router.get("/targets")
|
||||
async def get_bruteforce_targets():
|
||||
"""Liste des hôtes ciblés par brute-force, triés par total_hits DESC."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
host,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
groupArray(3)(ja4) AS top_ja4s
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
GROUP BY host
|
||||
ORDER BY total_hits DESC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
host = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
total_hits = int(row[2])
|
||||
total_params= int(row[3])
|
||||
top_ja4s = [str(j) for j in (row[4] or [])]
|
||||
attack_type = (
|
||||
"credential_stuffing"
|
||||
if total_hits > 0 and total_params / total_hits > 0.5
|
||||
else "enumeration"
|
||||
)
|
||||
items.append({
|
||||
"host": host,
|
||||
"unique_ips": unique_ips,
|
||||
"total_hits": total_hits,
|
||||
"total_params":total_params,
|
||||
"attack_type": attack_type,
|
||||
"top_ja4s": top_ja4s,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/attackers")
|
||||
async def get_bruteforce_attackers(limit: int = Query(50, ge=1, le=500)):
|
||||
"""Top IPs attaquantes triées par total_hits DESC."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
uniq(host) AS distinct_hosts,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
argMax(ja4, hits) AS ja4
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
GROUP BY src_ip
|
||||
ORDER BY total_hits DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"distinct_hosts":int(row[1]),
|
||||
"total_hits": int(row[2]),
|
||||
"total_params": int(row[3]),
|
||||
"ja4": str(row[4]),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/timeline")
|
||||
async def get_bruteforce_timeline():
|
||||
"""Hits par heure (dernières 72h) depuis agg_host_ip_ja4_1h."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS ips
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
hours = []
|
||||
for row in result.result_rows:
|
||||
hours.append({
|
||||
"hour": int(row[0]),
|
||||
"hits": int(row[1]),
|
||||
"ips": int(row[2]),
|
||||
})
|
||||
return {"hours": hours}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/host/{host:path}/attackers")
|
||||
async def get_host_attackers(host: str, limit: int = Query(20, ge=1, le=200)):
|
||||
"""Top IPs attaquant un hôte spécifique, avec JA4 et type d'attaque."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
argMax(ja4, hits) AS ja4,
|
||||
max(hits) AS max_hits_per_window
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
WHERE host = %(host)s
|
||||
GROUP BY src_ip
|
||||
ORDER BY total_hits DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"host": host, "limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
total_hits = int(row[1])
|
||||
total_params = int(row[2])
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"total_hits": total_hits,
|
||||
"total_params":total_params,
|
||||
"ja4": str(row[3] or ""),
|
||||
"attack_type": "credential_stuffing" if total_hits > 0 and total_params / total_hits > 0.5 else "enumeration",
|
||||
})
|
||||
return {"host": host, "items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
551
services/dashboard/backend/routes/clustering.py
Normal file
551
services/dashboard/backend/routes/clustering.py
Normal file
@ -0,0 +1,551 @@
|
||||
"""
|
||||
Clustering d'IPs multi-métriques — WebGL / deck.gl backend.
|
||||
|
||||
- Calcul sur la TOTALITÉ des IPs (GROUP BY src_ip, ja4 sans LIMIT)
|
||||
- K-means++ vectorisé (numpy) + PCA-2D + enveloppes convexes (scipy)
|
||||
- Calcul en background thread + cache 30 min
|
||||
- Endpoints : /clusters, /status, /cluster/{id}/points
|
||||
"""
|
||||
import math
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
from ..services.clustering_engine import (
|
||||
FEATURE_NAMES,
|
||||
build_feature_vector, kmeans_pp, pca_2d, compute_hulls,
|
||||
name_cluster, risk_score_from_centroid, standardize,
|
||||
risk_to_gradient_color,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/clustering", tags=["clustering"])
|
||||
|
||||
# ─── Cache global ──────────────────────────────────────────────────────────────
|
||||
_CACHE: dict[str, Any] = {
|
||||
"status": "idle", # idle | computing | ready | error
|
||||
"error": None,
|
||||
"result": None, # dict résultat complet
|
||||
"ts": 0.0, # timestamp dernière mise à jour
|
||||
"params": {},
|
||||
"cluster_ips": {}, # cluster_idx → [(ip, ja4, pca_x, pca_y, risk)]
|
||||
}
|
||||
_CACHE_TTL = 1800 # 30 minutes
|
||||
_LOCK = threading.Lock()
|
||||
_EXECUTOR = ThreadPoolExecutor(max_workers=1, thread_name_prefix="clustering")
|
||||
|
||||
# ─── Palette de couleurs (remplace l'ancienne logique menace) ─────────────────
|
||||
# Les couleurs sont désormais attribuées par index de cluster pour maximiser
|
||||
# la distinction visuelle, indépendamment du niveau de risque.
|
||||
|
||||
|
||||
# ─── SQL : TOUTES les IPs sans LIMIT ─────────────────────────────────────────
|
||||
_SQL_ALL_IPS = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS ip,
|
||||
t.ja4,
|
||||
any(t.tcp_ttl_raw) AS ttl,
|
||||
any(t.tcp_win_raw) AS win,
|
||||
any(t.tcp_scale_raw) AS scale,
|
||||
any(t.tcp_mss_raw) AS mss,
|
||||
any(t.first_ua) AS ua,
|
||||
sum(t.hits) AS hits,
|
||||
|
||||
avg(abs(ml.anomaly_score)) AS avg_score,
|
||||
avg(ml.hit_velocity) AS avg_velocity,
|
||||
avg(ml.fuzzing_index) AS avg_fuzzing,
|
||||
avg(ml.is_headless) AS pct_headless,
|
||||
avg(ml.post_ratio) AS avg_post,
|
||||
avg(ml.ip_id_zero_ratio) AS ip_id_zero,
|
||||
avg(ml.temporal_entropy) AS entropy,
|
||||
avg(ml.modern_browser_score) AS browser_score,
|
||||
avg(ml.alpn_http_mismatch) AS alpn_mismatch,
|
||||
avg(ml.is_alpn_missing) AS alpn_missing,
|
||||
avg(ml.multiplexing_efficiency) AS h2_eff,
|
||||
avg(ml.header_order_confidence) AS hdr_conf,
|
||||
avg(ml.ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
avg(ml.asset_ratio) AS asset_ratio,
|
||||
avg(ml.direct_access_ratio) AS direct_ratio,
|
||||
avg(ml.distinct_ja4_count) AS ja4_count,
|
||||
max(ml.is_ua_rotating) AS ua_rotating,
|
||||
|
||||
max(ml.threat_level) AS threat,
|
||||
any(ml.country_code) AS country,
|
||||
any(ml.asn_org) AS asn_org,
|
||||
|
||||
-- Features headers HTTP (depuis view_dashboard_entities)
|
||||
avg(ml.has_accept_language) AS hdr_accept_lang,
|
||||
any(vh.hdr_enc) AS hdr_has_encoding,
|
||||
any(vh.hdr_sec_fetch) AS hdr_has_sec_fetch,
|
||||
any(vh.hdr_count) AS hdr_count_raw,
|
||||
|
||||
-- Fingerprint HTTP Headers (depuis agg_header_fingerprint_1h + ml_detected_anomalies)
|
||||
-- header_order_shared_count : nb d'IPs partageant le même fingerprint
|
||||
-- → faible = fingerprint rare = comportement suspect
|
||||
avg(ml.header_order_shared_count) AS hfp_shared_count,
|
||||
-- distinct_header_orders : nb de fingerprints distincts émis par cette IP
|
||||
-- → élevé = rotation de fingerprint = comportement bot
|
||||
avg(ml.distinct_header_orders) AS hfp_distinct_orders,
|
||||
-- Cookie et Referer issus de la table dédiée aux empreintes
|
||||
any(hfp.hfp_cookie) AS hfp_cookie,
|
||||
any(hfp.hfp_referer) AS hfp_referer
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h t
|
||||
LEFT JOIN mabase_prod.ml_detected_anomalies ml
|
||||
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
|
||||
AND ml.detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
toIPv6(concat('::ffff:', toString(src_ip))) AS src_ip_v6,
|
||||
ja4,
|
||||
any(arrayExists(x -> x LIKE '%%Accept-Encoding%%', client_headers)) AS hdr_enc,
|
||||
any(arrayExists(x -> x LIKE '%%Sec-Fetch%%', client_headers)) AS hdr_sec_fetch,
|
||||
any(length(splitByChar(',', client_headers[1]))) AS hdr_count
|
||||
FROM mabase_prod.view_dashboard_entities
|
||||
WHERE length(client_headers) > 0
|
||||
AND log_date >= today() - 2
|
||||
GROUP BY src_ip_v6, ja4
|
||||
) vh ON t.src_ip = vh.src_ip_v6 AND t.ja4 = vh.ja4
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
src_ip,
|
||||
avg(has_cookie) AS hfp_cookie,
|
||||
avg(has_referer) AS hfp_referer
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY src_ip
|
||||
) hfp ON t.src_ip = hfp.src_ip
|
||||
WHERE t.window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
AND t.tcp_ttl_raw > 0
|
||||
GROUP BY t.src_ip, t.ja4
|
||||
"""
|
||||
|
||||
_SQL_COLS = [
|
||||
"ip", "ja4", "ttl", "win", "scale", "mss", "ua", "hits",
|
||||
"avg_score", "avg_velocity", "avg_fuzzing", "pct_headless", "avg_post",
|
||||
"ip_id_zero", "entropy", "browser_score", "alpn_mismatch", "alpn_missing",
|
||||
"h2_eff", "hdr_conf", "ua_ch_mismatch", "asset_ratio", "direct_ratio",
|
||||
"ja4_count", "ua_rotating", "threat", "country", "asn_org",
|
||||
"hdr_accept_lang", "hdr_has_encoding", "hdr_has_sec_fetch", "hdr_count_raw",
|
||||
"hfp_shared_count", "hfp_distinct_orders", "hfp_cookie", "hfp_referer",
|
||||
]
|
||||
|
||||
|
||||
# ─── Worker de clustering (thread pool) ──────────────────────────────────────
|
||||
|
||||
def _run_clustering_job(k: int, hours: int, sensitivity: float = 1.0) -> None:
|
||||
"""Exécuté dans le thread pool. Met à jour _CACHE.
|
||||
|
||||
sensitivity : multiplicateur de k [0.5 – 5.0].
|
||||
0.5 = vue très agrégée (k/2 clusters)
|
||||
1.0 = comportement par défaut
|
||||
2.0 = deux fois plus de clusters → groupes plus homogènes
|
||||
5.0 = granularité maximale (classification la plus fine)
|
||||
|
||||
k_actual est plafonné à 300 pour éviter des temps de calcul excessifs.
|
||||
n_init est réduit à 1 quand k_actual > 60 pour rester rapide.
|
||||
"""
|
||||
k_actual = max(4, min(300, round(k * sensitivity)))
|
||||
t0 = time.time()
|
||||
with _LOCK:
|
||||
_CACHE["status"] = "computing"
|
||||
_CACHE["error"] = None
|
||||
|
||||
try:
|
||||
log.info(f"[clustering] Démarrage k={k_actual} (base={k}×sens={sensitivity}) hours={hours}")
|
||||
|
||||
# ── 1. Chargement de toutes les IPs ──────────────────────────────
|
||||
result = db.query(_SQL_ALL_IPS, {"hours": hours})
|
||||
rows: list[dict] = []
|
||||
for row in result.result_rows:
|
||||
rows.append({col: row[i] for i, col in enumerate(_SQL_COLS)})
|
||||
|
||||
n = len(rows)
|
||||
log.info(f"[clustering] {n} IPs chargées")
|
||||
if n < k_actual:
|
||||
raise ValueError(f"Seulement {n} IPs disponibles (k={k_actual} requis)")
|
||||
|
||||
# ── 2. Construction de la matrice de features (numpy) ────────────
|
||||
X = np.array([build_feature_vector(r) for r in rows], dtype=np.float32)
|
||||
log.info(f"[clustering] Matrice X: {X.shape} — {X.nbytes/1024/1024:.1f} MB")
|
||||
|
||||
# ── 3. Standardisation z-score ────────────────────────────────────
|
||||
# Normalise par variance : features discriminantes (forte std)
|
||||
# contribuent plus que les features quasi-constantes.
|
||||
X64 = X.astype(np.float64)
|
||||
X_std, feat_mean, feat_std = standardize(X64)
|
||||
|
||||
# ── 4. K-means++ sur l'espace standardisé ────────────────────────
|
||||
# n_init réduit à 1 pour k élevé (> 60) afin de limiter le temps de calcul
|
||||
n_init = 1 if k_actual > 60 else 3
|
||||
km = kmeans_pp(X_std, k=k_actual, max_iter=80, n_init=n_init, seed=42)
|
||||
log.info(f"[clustering] K-means: {km.n_iter} iters, inertia={km.inertia:.2f}")
|
||||
|
||||
# Centroïdes dans l'espace original [0,1] pour affichage radar
|
||||
# (dé-standardisation : c_orig = c_std * std + mean, puis clip [0,1])
|
||||
centroids_orig = np.clip(km.centroids * feat_std + feat_mean, 0.0, 1.0)
|
||||
|
||||
# ── 5. PCA-2D sur les features ORIGINALES (normalisées [0,1]) ────
|
||||
coords = pca_2d(X64) # (n, 2), normalisé [0,1]
|
||||
|
||||
# ── 5b. Enveloppes convexes par cluster ──────────────────────────
|
||||
hulls = compute_hulls(coords, km.labels, k_actual)
|
||||
|
||||
# ── 6. Agrégation par cluster ─────────────────────────────────────
|
||||
cluster_rows: list[list[dict]] = [[] for _ in range(k_actual)]
|
||||
cluster_coords: list[list[list[float]]] = [[] for _ in range(k_actual)]
|
||||
cluster_ips_map: dict[int, list] = {j: [] for j in range(k_actual)}
|
||||
|
||||
for i, label in enumerate(km.labels):
|
||||
j = int(label)
|
||||
cluster_rows[j].append(rows[i])
|
||||
cluster_coords[j].append(coords[i].tolist())
|
||||
cluster_ips_map[j].append((
|
||||
rows[i]["ip"],
|
||||
rows[i]["ja4"],
|
||||
float(coords[i][0]),
|
||||
float(coords[i][1]),
|
||||
float(risk_score_from_centroid(centroids_orig[j])),
|
||||
))
|
||||
|
||||
# ── 7. Construction des nœuds ─────────────────────────────────────
|
||||
nodes = []
|
||||
for j in range(k_actual):
|
||||
if not cluster_rows[j]:
|
||||
continue
|
||||
|
||||
def avg_f(key: str, crows: list[dict] = cluster_rows[j]) -> float:
|
||||
return float(np.mean([float(r.get(key) or 0) for r in crows]))
|
||||
|
||||
mean_ttl = avg_f("ttl")
|
||||
mean_mss = avg_f("mss")
|
||||
mean_scale = avg_f("scale")
|
||||
mean_win = avg_f("win")
|
||||
|
||||
raw_stats = {"mean_ttl": mean_ttl, "mean_mss": mean_mss, "mean_scale": mean_scale}
|
||||
label_name = name_cluster(centroids_orig[j], raw_stats)
|
||||
risk = float(risk_score_from_centroid(centroids_orig[j]))
|
||||
color = risk_to_gradient_color(risk)
|
||||
|
||||
# Centroïde 2D = moyenne des coords du cluster
|
||||
cxy = np.mean(cluster_coords[j], axis=0).tolist() if cluster_coords[j] else [0.5, 0.5]
|
||||
ip_set = list({r["ip"] for r in cluster_rows[j]})
|
||||
ip_count = len(ip_set)
|
||||
hit_count = int(sum(float(r.get("hits") or 0) for r in cluster_rows[j]))
|
||||
|
||||
threats = [str(r.get("threat") or "") for r in cluster_rows[j] if r.get("threat")]
|
||||
countries = [str(r.get("country") or "") for r in cluster_rows[j] if r.get("country")]
|
||||
orgs = [str(r.get("asn_org") or "") for r in cluster_rows[j] if r.get("asn_org")]
|
||||
|
||||
def topk(lst: list[str], n: int = 5) -> list[str]:
|
||||
return [v for v, _ in Counter(lst).most_common(n) if v]
|
||||
|
||||
radar = [
|
||||
{"feature": name, "value": round(float(centroids_orig[j][i]), 4)}
|
||||
for i, name in enumerate(FEATURE_NAMES)
|
||||
]
|
||||
|
||||
radius = max(8, min(30, int(math.log1p(ip_count) * 2.2)))
|
||||
|
||||
sample_rows = sorted(cluster_rows[j], key=lambda r: float(r.get("hits") or 0), reverse=True)[:8]
|
||||
sample_ips = [r["ip"] for r in sample_rows]
|
||||
sample_ua = str(cluster_rows[j][0].get("ua") or "")
|
||||
|
||||
nodes.append({
|
||||
"id": f"c{j}_k{k_actual}",
|
||||
"cluster_idx": j,
|
||||
"label": label_name,
|
||||
"pca_x": round(cxy[0], 6),
|
||||
"pca_y": round(cxy[1], 6),
|
||||
"radius": radius,
|
||||
"color": color,
|
||||
"risk_score": round(risk, 4),
|
||||
|
||||
"mean_ttl": round(mean_ttl, 1),
|
||||
"mean_mss": round(mean_mss, 0),
|
||||
"mean_scale": round(mean_scale, 1),
|
||||
"mean_win": round(mean_win, 0),
|
||||
"mean_velocity":round(avg_f("avg_velocity"),3),
|
||||
"mean_fuzzing": round(avg_f("avg_fuzzing"), 3),
|
||||
"mean_headless":round(avg_f("pct_headless"),3),
|
||||
"mean_post": round(avg_f("avg_post"), 3),
|
||||
"mean_asset": round(avg_f("asset_ratio"), 3),
|
||||
"mean_direct": round(avg_f("direct_ratio"),3),
|
||||
"mean_alpn_mismatch": round(avg_f("alpn_mismatch"),3),
|
||||
"mean_h2_eff": round(avg_f("h2_eff"), 3),
|
||||
"mean_hdr_conf":round(avg_f("hdr_conf"), 3),
|
||||
"mean_ua_ch": round(avg_f("ua_ch_mismatch"),3),
|
||||
"mean_entropy": round(avg_f("entropy"), 3),
|
||||
"mean_ja4_diversity": round(avg_f("ja4_count"),3),
|
||||
"mean_ip_id_zero": round(avg_f("ip_id_zero"),3),
|
||||
"mean_browser_score": round(avg_f("browser_score"),1),
|
||||
"mean_ua_rotating": round(avg_f("ua_rotating"),3),
|
||||
|
||||
"ip_count": ip_count,
|
||||
"hit_count": hit_count,
|
||||
"top_threat": topk(threats, 1)[0] if threats else "",
|
||||
"top_countries":topk(countries, 5),
|
||||
"top_orgs": topk(orgs, 5),
|
||||
"sample_ips": sample_ips,
|
||||
"sample_ua": sample_ua,
|
||||
"radar": radar,
|
||||
|
||||
# Hull pour deck.gl PolygonLayer
|
||||
"hull": hulls.get(j, []),
|
||||
})
|
||||
|
||||
# ── 8. Arêtes k-NN entre clusters ────────────────────────────────
|
||||
edges = []
|
||||
seen: set[frozenset] = set()
|
||||
for i, ni in enumerate(nodes):
|
||||
ci = ni["cluster_idx"]
|
||||
dists = sorted(
|
||||
[(j, nj["cluster_idx"],
|
||||
float(np.sum((centroids_orig[ci] - centroids_orig[nj["cluster_idx"]]) ** 2)))
|
||||
for j, nj in enumerate(nodes) if j != i],
|
||||
key=lambda x: x[2]
|
||||
)
|
||||
for j_idx, cj, d2 in dists[:2]:
|
||||
key = frozenset([ni["id"], nodes[j_idx]["id"]])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
edges.append({
|
||||
"id": f"e_{ni['id']}_{nodes[j_idx]['id']}",
|
||||
"source": ni["id"],
|
||||
"target": nodes[j_idx]["id"],
|
||||
"similarity": round(1.0 / (1.0 + math.sqrt(d2)), 3),
|
||||
})
|
||||
|
||||
# ── 9. Stockage résultat + cache IPs ─────────────────────────────
|
||||
total_ips = sum(n_["ip_count"] for n_ in nodes)
|
||||
total_hits = sum(n_["hit_count"] for n_ in nodes)
|
||||
elapsed = round(time.time() - t0, 2)
|
||||
|
||||
result_dict = {
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"stats": {
|
||||
"total_clusters": len(nodes),
|
||||
"total_ips": total_ips,
|
||||
"total_hits": total_hits,
|
||||
"n_samples": n,
|
||||
"k": k_actual,
|
||||
"k_base": k,
|
||||
"sensitivity": sensitivity,
|
||||
"elapsed_s": elapsed,
|
||||
},
|
||||
"feature_names": FEATURE_NAMES,
|
||||
}
|
||||
|
||||
with _LOCK:
|
||||
_CACHE["result"] = result_dict
|
||||
_CACHE["cluster_ips"] = cluster_ips_map
|
||||
_CACHE["status"] = "ready"
|
||||
_CACHE["ts"] = time.time()
|
||||
_CACHE["params"] = {"k": k, "hours": hours, "sensitivity": sensitivity}
|
||||
_CACHE["error"] = None
|
||||
|
||||
log.info(f"[clustering] Terminé en {elapsed}s — {total_ips} IPs, {len(nodes)} clusters")
|
||||
|
||||
except Exception as e:
|
||||
log.exception("[clustering] Erreur lors du calcul")
|
||||
with _LOCK:
|
||||
_CACHE["status"] = "error"
|
||||
_CACHE["error"] = str(e)
|
||||
|
||||
|
||||
def _maybe_trigger(k: int, hours: int, sensitivity: float) -> None:
|
||||
"""Lance le calcul si cache absent, expiré ou paramètres différents."""
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
params = _CACHE["params"]
|
||||
ts = _CACHE["ts"]
|
||||
|
||||
cache_stale = (time.time() - ts) > _CACHE_TTL
|
||||
params_changed = (
|
||||
params.get("k") != k or
|
||||
params.get("hours") != hours or
|
||||
params.get("sensitivity") != sensitivity
|
||||
)
|
||||
|
||||
if status in ("computing",):
|
||||
return # déjà en cours
|
||||
|
||||
if status == "ready" and not cache_stale and not params_changed:
|
||||
return # cache frais
|
||||
|
||||
_EXECUTOR.submit(_run_clustering_job, k, hours, sensitivity)
|
||||
|
||||
|
||||
# ─── Endpoints ────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/status")
|
||||
async def get_status():
|
||||
"""État du calcul en cours (polling frontend)."""
|
||||
with _LOCK:
|
||||
return {
|
||||
"status": _CACHE["status"],
|
||||
"error": _CACHE["error"],
|
||||
"ts": _CACHE["ts"],
|
||||
"params": _CACHE["params"],
|
||||
"age_s": round(time.time() - _CACHE["ts"], 0) if _CACHE["ts"] else None,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/clusters")
|
||||
async def get_clusters(
|
||||
k: int = Query(20, ge=4, le=100, description="Nombre de clusters de base"),
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle (heures)"),
|
||||
sensitivity: float = Query(1.0, ge=0.5, le=5.0, description="Sensibilité : multiplicateur de k (5.0 = granularité maximale)"),
|
||||
force: bool = Query(False, description="Forcer le recalcul"),
|
||||
):
|
||||
"""
|
||||
Clustering multi-métriques sur TOUTES les IPs.
|
||||
|
||||
k_actual = round(k × sensitivity) — la sensibilité contrôle la granularité.
|
||||
Retourne immédiatement depuis le cache. Déclenche le calcul si nécessaire.
|
||||
"""
|
||||
if force:
|
||||
with _LOCK:
|
||||
_CACHE["status"] = "idle"
|
||||
_CACHE["ts"] = 0.0
|
||||
_CACHE["result"] = None
|
||||
_CACHE["cluster_ips"] = {}
|
||||
|
||||
_maybe_trigger(k, hours, sensitivity)
|
||||
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
result = _CACHE["result"]
|
||||
error = _CACHE["error"]
|
||||
|
||||
if status == "computing":
|
||||
return {"status": "computing", "message": "Calcul en cours, réessayez dans quelques secondes"}
|
||||
|
||||
if status == "error":
|
||||
raise HTTPException(status_code=500, detail=error or "Erreur inconnue")
|
||||
|
||||
if result is None:
|
||||
return {"status": "idle", "message": "Calcul démarré, réessayez dans quelques secondes"}
|
||||
|
||||
return {**result, "status": "ready"}
|
||||
|
||||
|
||||
@router.get("/cluster/{cluster_id}/points")
|
||||
async def get_cluster_points(
|
||||
cluster_id: str,
|
||||
limit: int = Query(5000, ge=1, le=20000),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""
|
||||
Coordonnées PCA + métadonnées de toutes les IPs d'un cluster.
|
||||
Utilisé par deck.gl ScatterplotLayer (drill-down ou zoom avancé).
|
||||
"""
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
ips_map = _CACHE["cluster_ips"]
|
||||
|
||||
if status != "ready" or not ips_map:
|
||||
raise HTTPException(status_code=404, detail="Cache absent — appelez /clusters d'abord")
|
||||
|
||||
try:
|
||||
idx = int(cluster_id.split("_")[0][1:])
|
||||
except (ValueError, IndexError):
|
||||
raise HTTPException(status_code=400, detail="cluster_id invalide (format: c{n}_k{k})")
|
||||
|
||||
members = ips_map.get(idx, [])
|
||||
total = len(members)
|
||||
page = members[offset: offset + limit]
|
||||
|
||||
points = [
|
||||
{"ip": m[0], "ja4": m[1], "pca_x": round(m[2], 6), "pca_y": round(m[3], 6), "risk": round(m[4], 3)}
|
||||
for m in page
|
||||
]
|
||||
return {"points": points, "total": total, "offset": offset, "limit": limit}
|
||||
|
||||
|
||||
@router.get("/cluster/{cluster_id}/ips")
|
||||
async def get_cluster_ips(
|
||||
cluster_id: str,
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
offset: int = Query(0, ge=0),
|
||||
):
|
||||
"""IPs avec détails SQL (backward-compat avec l'ancienne UI)."""
|
||||
with _LOCK:
|
||||
status = _CACHE["status"]
|
||||
ips_map = _CACHE["cluster_ips"]
|
||||
|
||||
if status != "ready" or not ips_map:
|
||||
raise HTTPException(status_code=404, detail="Cache absent — appelez /clusters d'abord")
|
||||
|
||||
try:
|
||||
idx = int(cluster_id.split("_")[0][1:])
|
||||
except (ValueError, IndexError):
|
||||
raise HTTPException(status_code=400, detail="cluster_id invalide")
|
||||
|
||||
members = ips_map.get(idx, [])
|
||||
total = len(members)
|
||||
page = members[offset: offset + limit]
|
||||
if not page:
|
||||
return {"ips": [], "total": total, "cluster_id": cluster_id}
|
||||
|
||||
safe_ips = [m[0].replace("'", "") for m in page[:200]]
|
||||
ip_filter = ", ".join(f"'{ip}'" for ip in safe_ips)
|
||||
|
||||
sql = f"""
|
||||
SELECT
|
||||
replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') AS src_ip,
|
||||
t.ja4,
|
||||
any(t.tcp_ttl_raw) AS ttl,
|
||||
any(t.tcp_win_raw) AS win,
|
||||
any(t.tcp_scale_raw) AS scale,
|
||||
any(t.tcp_mss_raw) AS mss,
|
||||
sum(t.hits) AS hits,
|
||||
any(t.first_ua) AS ua,
|
||||
round(avg(abs(ml.anomaly_score)), 3) AS avg_score,
|
||||
max(ml.threat_level) AS threat_level,
|
||||
any(ml.country_code) AS country_code,
|
||||
any(ml.asn_org) AS asn_org,
|
||||
round(avg(ml.fuzzing_index), 2) AS fuzzing,
|
||||
round(avg(ml.hit_velocity), 2) AS velocity
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h t
|
||||
LEFT JOIN mabase_prod.ml_detected_anomalies ml
|
||||
ON t.src_ip = ml.src_ip AND t.ja4 = ml.ja4
|
||||
AND ml.detected_at >= now() - INTERVAL 24 HOUR
|
||||
WHERE t.window_start >= now() - INTERVAL 24 HOUR
|
||||
AND replaceRegexpAll(toString(t.src_ip), '^::ffff:', '') IN ({ip_filter})
|
||||
GROUP BY t.src_ip, t.ja4
|
||||
ORDER BY hits DESC
|
||||
"""
|
||||
try:
|
||||
result = db.query(sql)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
ips = []
|
||||
for row in result.result_rows:
|
||||
ips.append({
|
||||
"ip": str(row[0] or ""),
|
||||
"ja4": str(row[1] or ""),
|
||||
"tcp_ttl": int(row[2] or 0),
|
||||
"tcp_win": int(row[3] or 0),
|
||||
"tcp_scale": int(row[4] or 0),
|
||||
"tcp_mss": int(row[5] or 0),
|
||||
"hits": int(row[6] or 0),
|
||||
"ua": str(row[7] or ""),
|
||||
"avg_score": float(row[8] or 0),
|
||||
"threat_level": str(row[9] or ""),
|
||||
"country_code": str(row[10] or ""),
|
||||
"asn_org": str(row[11] or ""),
|
||||
"fuzzing": float(row[12] or 0),
|
||||
"velocity": float(row[13] or 0),
|
||||
})
|
||||
|
||||
return {"ips": ips, "total": total, "cluster_id": cluster_id}
|
||||
450
services/dashboard/backend/routes/detections.py
Normal file
450
services/dashboard/backend/routes/detections.py
Normal file
@ -0,0 +1,450 @@
|
||||
"""
|
||||
Endpoints pour la liste des détections
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional, List
|
||||
from ..database import db
|
||||
from ..models import DetectionsListResponse, Detection
|
||||
|
||||
router = APIRouter(prefix="/api/detections", tags=["detections"])
|
||||
|
||||
# Mapping label ASN → score float (0 = très suspect, 1 = légitime)
|
||||
_ASN_LABEL_SCORES: dict[str, float] = {
|
||||
'human': 0.9, 'bot': 0.05, 'proxy': 0.25, 'vpn': 0.3,
|
||||
'tor': 0.1, 'datacenter': 0.4, 'scanner': 0.05, 'malicious': 0.05,
|
||||
}
|
||||
|
||||
|
||||
def _label_to_score(label: str) -> float | None:
|
||||
"""Convertit un label de réputation ASN en score numérique."""
|
||||
if not label:
|
||||
return None
|
||||
return _ASN_LABEL_SCORES.get(label.lower(), 0.5)
|
||||
|
||||
|
||||
@router.get("", response_model=DetectionsListResponse, summary="Liste paginée des détections")
|
||||
async def get_detections(
|
||||
page: int = Query(1, ge=1, description="Numéro de page"),
|
||||
page_size: int = Query(25, ge=1, le=100, description="Nombre de lignes par page"),
|
||||
threat_level: Optional[str] = Query(None, description="Filtrer par niveau de menace"),
|
||||
model_name: Optional[str] = Query(None, description="Filtrer par modèle"),
|
||||
country_code: Optional[str] = Query(None, description="Filtrer par pays"),
|
||||
asn_number: Optional[str] = Query(None, description="Filtrer par ASN"),
|
||||
search: Optional[str] = Query(None, description="Recherche texte (IP, JA4, Host)"),
|
||||
sort_by: str = Query("detected_at", description="Trier par"),
|
||||
sort_order: str = Query("DESC", description="Ordre (ASC/DESC)"),
|
||||
group_by_ip: bool = Query(False, description="Grouper par IP (first_seen/last_seen agrégés)"),
|
||||
score_type: Optional[str] = Query(None, description="Filtrer par type de score: BOT, REGLE, BOT_REGLE, SCORE")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des détections avec pagination et filtres
|
||||
"""
|
||||
try:
|
||||
# Construction de la requête
|
||||
where_clauses = ["detected_at >= now() - INTERVAL 24 HOUR"]
|
||||
params = {}
|
||||
|
||||
if threat_level:
|
||||
where_clauses.append("threat_level = %(threat_level)s")
|
||||
params["threat_level"] = threat_level
|
||||
|
||||
if model_name:
|
||||
where_clauses.append("model_name = %(model_name)s")
|
||||
params["model_name"] = model_name
|
||||
|
||||
if country_code:
|
||||
where_clauses.append("country_code = %(country_code)s")
|
||||
params["country_code"] = country_code.upper()
|
||||
|
||||
if asn_number:
|
||||
where_clauses.append("asn_number = %(asn_number)s")
|
||||
params["asn_number"] = asn_number
|
||||
|
||||
if search:
|
||||
where_clauses.append(
|
||||
"(ilike(toString(src_ip), %(search)s) OR ilike(ja4, %(search)s) OR ilike(host, %(search)s))"
|
||||
)
|
||||
params["search"] = f"%{search}%"
|
||||
|
||||
if score_type:
|
||||
st = score_type.upper()
|
||||
if st == "BOT":
|
||||
where_clauses.append("threat_level = 'KNOWN_BOT'")
|
||||
elif st == "REGLE":
|
||||
where_clauses.append("threat_level = 'ANUBIS_DENY'")
|
||||
elif st == "BOT_REGLE":
|
||||
where_clauses.append("threat_level IN ('KNOWN_BOT', 'ANUBIS_DENY')")
|
||||
elif st == "SCORE":
|
||||
where_clauses.append("threat_level NOT IN ('KNOWN_BOT', 'ANUBIS_DENY')")
|
||||
|
||||
where_clause = " AND ".join(where_clauses)
|
||||
|
||||
# Requête de comptage
|
||||
count_query = f"""
|
||||
SELECT count()
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, params)
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
# Requête principale
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
sort_order = "DESC" if sort_order.upper() == "DESC" else "ASC"
|
||||
|
||||
# ── Mode groupé par IP (first_seen / last_seen depuis la DB) ────────────
|
||||
if group_by_ip:
|
||||
valid_sort_grouped = ["anomaly_score", "hits", "hit_velocity", "first_seen", "last_seen", "src_ip", "detected_at"]
|
||||
grouped_sort = sort_by if sort_by in valid_sort_grouped else "last_seen"
|
||||
# detected_at → last_seen (max(detected_at) dans le GROUP BY)
|
||||
if grouped_sort == "detected_at":
|
||||
grouped_sort = "last_seen"
|
||||
# In outer query, min_score is exposed as anomaly_score — keep the alias
|
||||
outer_sort = "min_score" if grouped_sort == "anomaly_score" else grouped_sort
|
||||
|
||||
# Count distinct IPs
|
||||
count_ip_query = f"""
|
||||
SELECT uniq(src_ip)
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
cr = db.query(count_ip_query, params)
|
||||
total = cr.result_rows[0][0] if cr.result_rows else 0
|
||||
|
||||
grouped_query = f"""
|
||||
SELECT
|
||||
ip_data.src_ip,
|
||||
ip_data.first_seen,
|
||||
ip_data.last_seen,
|
||||
ip_data.detection_count,
|
||||
ip_data.unique_ja4s,
|
||||
ip_data.unique_hosts,
|
||||
ip_data.min_score AS anomaly_score,
|
||||
ip_data.threat_level_best,
|
||||
ip_data.model_name_best,
|
||||
ip_data.country_code,
|
||||
ip_data.asn_number,
|
||||
ip_data.asn_org,
|
||||
ip_data.hit_velocity,
|
||||
ip_data.hits,
|
||||
ip_data.asn_label,
|
||||
ar.label AS asn_rep_label,
|
||||
ip_data.anubis_bot_name_best,
|
||||
ip_data.anubis_bot_action_best,
|
||||
ip_data.anubis_bot_category_best
|
||||
FROM (
|
||||
SELECT
|
||||
src_ip,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen,
|
||||
count() AS detection_count,
|
||||
groupUniqArray(5)(ja4) AS unique_ja4s,
|
||||
groupUniqArray(5)(host) AS unique_hosts,
|
||||
min(anomaly_score) AS min_score,
|
||||
argMin(threat_level, anomaly_score) AS threat_level_best,
|
||||
argMin(model_name, anomaly_score) AS model_name_best,
|
||||
any(country_code) AS country_code,
|
||||
any(asn_number) AS asn_number,
|
||||
any(asn_org) AS asn_org,
|
||||
max(hit_velocity) AS hit_velocity,
|
||||
sum(hits) AS hits,
|
||||
any(asn_label) AS asn_label,
|
||||
argMin(anubis_bot_name, anomaly_score) AS anubis_bot_name_best,
|
||||
argMin(anubis_bot_action, anomaly_score) AS anubis_bot_action_best,
|
||||
argMin(anubis_bot_category, anomaly_score) AS anubis_bot_category_best
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {where_clause}
|
||||
GROUP BY src_ip
|
||||
) ip_data
|
||||
LEFT JOIN mabase_prod.asn_reputation ar
|
||||
ON ar.src_asn = toUInt32OrZero(ip_data.asn_number)
|
||||
ORDER BY {outer_sort} {sort_order}
|
||||
LIMIT %(limit)s OFFSET %(offset)s
|
||||
"""
|
||||
params["limit"] = page_size
|
||||
params["offset"] = offset
|
||||
gresult = db.query(grouped_query, params)
|
||||
|
||||
detections = []
|
||||
for row in gresult.result_rows:
|
||||
# row: src_ip, first_seen, last_seen, detection_count, unique_ja4s, unique_hosts,
|
||||
# anomaly_score, threat_level_best, model_name_best, country_code, asn_number,
|
||||
# asn_org, hit_velocity, hits, asn_label, asn_rep_label,
|
||||
# anubis_bot_name, anubis_bot_action, anubis_bot_category
|
||||
ja4s = list(row[4]) if row[4] else []
|
||||
hosts = list(row[5]) if row[5] else []
|
||||
detections.append(Detection(
|
||||
detected_at=row[1],
|
||||
src_ip=str(row[0]),
|
||||
ja4=ja4s[0] if ja4s else "",
|
||||
host=hosts[0] if hosts else "",
|
||||
bot_name="",
|
||||
anomaly_score=float(row[6]) if row[6] else 0.0,
|
||||
threat_level=row[7] or "LOW",
|
||||
model_name=row[8] or "",
|
||||
recurrence=int(row[3] or 0),
|
||||
asn_number=str(row[10]) if row[10] else "",
|
||||
asn_org=row[11] or "",
|
||||
asn_detail="",
|
||||
asn_domain="",
|
||||
country_code=row[9] or "",
|
||||
asn_label=row[14] or "",
|
||||
hits=int(row[13] or 0),
|
||||
hit_velocity=float(row[12]) if row[12] else 0.0,
|
||||
fuzzing_index=0.0,
|
||||
post_ratio=0.0,
|
||||
reason="",
|
||||
asn_rep_label=row[15] or "",
|
||||
asn_score=_label_to_score(row[15] or ""),
|
||||
first_seen=row[1],
|
||||
last_seen=row[2],
|
||||
unique_ja4s=ja4s,
|
||||
unique_hosts=hosts,
|
||||
anubis_bot_name=row[16] or "",
|
||||
anubis_bot_action=row[17] or "",
|
||||
anubis_bot_category=row[18] or "",
|
||||
))
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
return DetectionsListResponse(
|
||||
items=detections, total=total, page=page,
|
||||
page_size=page_size, total_pages=total_pages
|
||||
)
|
||||
|
||||
# ── Mode individuel (comportement original) ──────────────────────────────
|
||||
# Validation du tri
|
||||
valid_sort_columns = [
|
||||
"detected_at", "src_ip", "threat_level", "anomaly_score",
|
||||
"asn_number", "country_code", "hits", "hit_velocity"
|
||||
]
|
||||
if sort_by not in valid_sort_columns:
|
||||
sort_by = "detected_at"
|
||||
|
||||
main_query = f"""
|
||||
SELECT
|
||||
detected_at,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
bot_name,
|
||||
anomaly_score,
|
||||
threat_level,
|
||||
model_name,
|
||||
recurrence,
|
||||
asn_number,
|
||||
asn_org,
|
||||
asn_detail,
|
||||
asn_domain,
|
||||
country_code,
|
||||
asn_label,
|
||||
hits,
|
||||
hit_velocity,
|
||||
fuzzing_index,
|
||||
post_ratio,
|
||||
reason,
|
||||
ar.label AS asn_rep_label,
|
||||
anubis_bot_name,
|
||||
anubis_bot_action,
|
||||
anubis_bot_category
|
||||
FROM ml_detected_anomalies
|
||||
LEFT JOIN mabase_prod.asn_reputation ar ON ar.src_asn = toUInt32OrZero(asn_number)
|
||||
WHERE {where_clause}
|
||||
ORDER BY {sort_by} {sort_order}
|
||||
LIMIT %(limit)s OFFSET %(offset)s
|
||||
"""
|
||||
|
||||
params["limit"] = page_size
|
||||
params["offset"] = offset
|
||||
|
||||
result = db.query(main_query, params)
|
||||
|
||||
detections = [
|
||||
Detection(
|
||||
detected_at=row[0],
|
||||
src_ip=str(row[1]),
|
||||
ja4=row[2] or "",
|
||||
host=row[3] or "",
|
||||
bot_name=row[4] or "",
|
||||
anomaly_score=float(row[5]) if row[5] else 0.0,
|
||||
threat_level=row[6] or "LOW",
|
||||
model_name=row[7] or "",
|
||||
recurrence=row[8] or 0,
|
||||
asn_number=str(row[9]) if row[9] else "",
|
||||
asn_org=row[10] or "",
|
||||
asn_detail=row[11] or "",
|
||||
asn_domain=row[12] or "",
|
||||
country_code=row[13] or "",
|
||||
asn_label=row[14] or "",
|
||||
hits=row[15] or 0,
|
||||
hit_velocity=float(row[16]) if row[16] else 0.0,
|
||||
fuzzing_index=float(row[17]) if row[17] else 0.0,
|
||||
post_ratio=float(row[18]) if row[18] else 0.0,
|
||||
reason=row[19] or "",
|
||||
asn_rep_label=row[20] or "",
|
||||
asn_score=_label_to_score(row[20] or ""),
|
||||
anubis_bot_name=row[21] or "",
|
||||
anubis_bot_action=row[22] or "",
|
||||
anubis_bot_category=row[23] or "",
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
total_pages = (total + page_size - 1) // page_size
|
||||
|
||||
return DetectionsListResponse(
|
||||
items=detections,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur lors de la récupération des détections: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{detection_id}")
|
||||
async def get_detection_details(detection_id: str):
|
||||
"""
|
||||
Récupère les détails d'une détection spécifique
|
||||
detection_id peut être une IP ou un identifiant
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
detected_at,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
bot_name,
|
||||
anomaly_score,
|
||||
threat_level,
|
||||
model_name,
|
||||
recurrence,
|
||||
asn_number,
|
||||
asn_org,
|
||||
asn_detail,
|
||||
asn_domain,
|
||||
country_code,
|
||||
asn_label,
|
||||
hits,
|
||||
hit_velocity,
|
||||
fuzzing_index,
|
||||
post_ratio,
|
||||
port_exhaustion_ratio,
|
||||
orphan_ratio,
|
||||
tcp_jitter_variance,
|
||||
tcp_shared_count,
|
||||
true_window_size,
|
||||
window_mss_ratio,
|
||||
alpn_http_mismatch,
|
||||
is_alpn_missing,
|
||||
sni_host_mismatch,
|
||||
header_count,
|
||||
has_accept_language,
|
||||
has_cookie,
|
||||
has_referer,
|
||||
modern_browser_score,
|
||||
ua_ch_mismatch,
|
||||
header_order_shared_count,
|
||||
ip_id_zero_ratio,
|
||||
request_size_variance,
|
||||
multiplexing_efficiency,
|
||||
mss_mobile_mismatch,
|
||||
correlated,
|
||||
reason,
|
||||
asset_ratio,
|
||||
direct_access_ratio,
|
||||
is_ua_rotating,
|
||||
distinct_ja4_count,
|
||||
src_port_density,
|
||||
ja4_asn_concentration,
|
||||
ja4_country_concentration,
|
||||
is_rare_ja4
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = db.query(query, {"ip": detection_id})
|
||||
|
||||
if not result.result_rows:
|
||||
raise HTTPException(status_code=404, detail="Détection non trouvée")
|
||||
|
||||
row = result.result_rows[0]
|
||||
|
||||
return {
|
||||
"detected_at": row[0],
|
||||
"src_ip": str(row[1]),
|
||||
"ja4": row[2] or "",
|
||||
"host": row[3] or "",
|
||||
"bot_name": row[4] or "",
|
||||
"anomaly_score": float(row[5]) if row[5] else 0.0,
|
||||
"threat_level": row[6] or "LOW",
|
||||
"model_name": row[7] or "",
|
||||
"recurrence": row[8] or 0,
|
||||
"asn": {
|
||||
"number": str(row[9]) if row[9] else "",
|
||||
"org": row[10] or "",
|
||||
"detail": row[11] or "",
|
||||
"domain": row[12] or "",
|
||||
"label": row[14] or ""
|
||||
},
|
||||
"country": {
|
||||
"code": row[13] or "",
|
||||
},
|
||||
"metrics": {
|
||||
"hits": row[15] or 0,
|
||||
"hit_velocity": float(row[16]) if row[16] else 0.0,
|
||||
"fuzzing_index": float(row[17]) if row[17] else 0.0,
|
||||
"post_ratio": float(row[18]) if row[18] else 0.0,
|
||||
"port_exhaustion_ratio": float(row[19]) if row[19] else 0.0,
|
||||
"orphan_ratio": float(row[20]) if row[20] else 0.0,
|
||||
},
|
||||
"tcp": {
|
||||
"jitter_variance": float(row[21]) if row[21] else 0.0,
|
||||
"shared_count": row[22] or 0,
|
||||
"true_window_size": row[23] or 0,
|
||||
"window_mss_ratio": float(row[24]) if row[24] else 0.0,
|
||||
},
|
||||
"tls": {
|
||||
"alpn_http_mismatch": bool(row[25]) if row[25] is not None else False,
|
||||
"is_alpn_missing": bool(row[26]) if row[26] is not None else False,
|
||||
"sni_host_mismatch": bool(row[27]) if row[27] is not None else False,
|
||||
},
|
||||
"headers": {
|
||||
"count": row[28] or 0,
|
||||
"has_accept_language": bool(row[29]) if row[29] is not None else False,
|
||||
"has_cookie": bool(row[30]) if row[30] is not None else False,
|
||||
"has_referer": bool(row[31]) if row[31] is not None else False,
|
||||
"modern_browser_score": row[32] or 0,
|
||||
"ua_ch_mismatch": bool(row[33]) if row[33] is not None else False,
|
||||
"header_order_shared_count": row[34] or 0,
|
||||
},
|
||||
"behavior": {
|
||||
"ip_id_zero_ratio": float(row[35]) if row[35] else 0.0,
|
||||
"request_size_variance": float(row[36]) if row[36] else 0.0,
|
||||
"multiplexing_efficiency": float(row[37]) if row[37] else 0.0,
|
||||
"mss_mobile_mismatch": bool(row[38]) if row[38] is not None else False,
|
||||
"correlated": bool(row[39]) if row[39] is not None else False,
|
||||
},
|
||||
"advanced": {
|
||||
"asset_ratio": float(row[41]) if row[41] else 0.0,
|
||||
"direct_access_ratio": float(row[42]) if row[42] else 0.0,
|
||||
"is_ua_rotating": bool(row[43]) if row[43] is not None else False,
|
||||
"distinct_ja4_count": row[44] or 0,
|
||||
"src_port_density": float(row[45]) if row[45] else 0.0,
|
||||
"ja4_asn_concentration": float(row[46]) if row[46] else 0.0,
|
||||
"ja4_country_concentration": float(row[47]) if row[47] else 0.0,
|
||||
"is_rare_ja4": bool(row[48]) if row[48] is not None else False,
|
||||
},
|
||||
"reason": row[40] or ""
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
509
services/dashboard/backend/routes/entities.py
Normal file
509
services/dashboard/backend/routes/entities.py
Normal file
@ -0,0 +1,509 @@
|
||||
"""
|
||||
Routes pour l'investigation d'entités (IP, JA4, User-Agent, Client-Header, Host, Path, Query-Param)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional, List
|
||||
|
||||
from ..database import db
|
||||
from ..models import (
|
||||
EntityInvestigation,
|
||||
EntityStats,
|
||||
EntityRelatedAttributes,
|
||||
EntityAttributeValue
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/entities", tags=["Entities"])
|
||||
|
||||
# Ensemble des types d'entités valides
|
||||
VALID_ENTITY_TYPES = frozenset({
|
||||
'ip', 'ja4', 'user_agent', 'client_header', 'host', 'path', 'query_param'
|
||||
})
|
||||
|
||||
|
||||
def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Optional[EntityStats]:
|
||||
"""
|
||||
Récupère les statistiques pour une entité donnée
|
||||
"""
|
||||
query = """
|
||||
SELECT
|
||||
entity_type,
|
||||
entity_value,
|
||||
sum(requests) as total_requests,
|
||||
sum(unique_ips) as unique_ips,
|
||||
min(log_date) as first_seen,
|
||||
max(log_date) as last_seen
|
||||
FROM mabase_prod.view_dashboard_entities
|
||||
WHERE entity_type = %(entity_type)s
|
||||
AND entity_value = %(entity_value)s
|
||||
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
|
||||
GROUP BY entity_type, entity_value
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
'entity_type': entity_type,
|
||||
'entity_value': entity_value,
|
||||
'hours': hours
|
||||
})
|
||||
|
||||
if not result.result_rows:
|
||||
return None
|
||||
|
||||
row = result.result_rows[0]
|
||||
return EntityStats(
|
||||
entity_type=row[0],
|
||||
entity_value=row[1],
|
||||
total_requests=row[2],
|
||||
unique_ips=row[3],
|
||||
first_seen=row[4],
|
||||
last_seen=row[5]
|
||||
)
|
||||
|
||||
|
||||
def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24) -> EntityRelatedAttributes:
|
||||
"""
|
||||
Récupère les attributs associés à une entité
|
||||
"""
|
||||
# Requête pour agréger tous les attributs associés
|
||||
query = """
|
||||
SELECT
|
||||
(SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips,
|
||||
(SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s,
|
||||
(SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts,
|
||||
(SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns,
|
||||
(SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
'entity_type': entity_type,
|
||||
'entity_value': entity_value,
|
||||
'hours': hours
|
||||
})
|
||||
|
||||
if not result.result_rows or not any(result.result_rows[0]):
|
||||
return EntityRelatedAttributes(
|
||||
ips=[],
|
||||
ja4s=[],
|
||||
hosts=[],
|
||||
asns=[],
|
||||
countries=[]
|
||||
)
|
||||
|
||||
row = result.result_rows[0]
|
||||
return EntityRelatedAttributes(
|
||||
ips=[str(ip) for ip in (row[0] or []) if ip],
|
||||
ja4s=[ja4 for ja4 in (row[1] or []) if ja4],
|
||||
hosts=[host for host in (row[2] or []) if host],
|
||||
asns=[asn for asn in (row[3] or []) if asn],
|
||||
countries=[country for country in (row[4] or []) if country]
|
||||
)
|
||||
|
||||
|
||||
def get_array_values(entity_type: str, entity_value: str, array_field: str, hours: int = 24) -> List[EntityAttributeValue]:
|
||||
"""
|
||||
Extrait et retourne les valeurs d'un champ Array (user_agents, client_headers, etc.)
|
||||
"""
|
||||
query = f"""
|
||||
SELECT
|
||||
value,
|
||||
count() as count,
|
||||
round(count * 100.0 / sum(count) OVER (), 2) as percentage
|
||||
FROM (
|
||||
SELECT
|
||||
arrayJoin({array_field}) as value
|
||||
FROM mabase_prod.view_dashboard_entities
|
||||
WHERE entity_type = %(entity_type)s
|
||||
AND entity_value = %(entity_value)s
|
||||
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
|
||||
AND notEmpty({array_field})
|
||||
)
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
'entity_type': entity_type,
|
||||
'entity_value': entity_value,
|
||||
'hours': hours
|
||||
})
|
||||
|
||||
return [
|
||||
EntityAttributeValue(
|
||||
value=row[0],
|
||||
count=row[1],
|
||||
percentage=row[2]
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/subnet/{subnet:path}")
|
||||
async def get_subnet_investigation(
|
||||
subnet: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère toutes les IPs d'un subnet /24 avec leurs statistiques
|
||||
Utilise ml_detected_anomalies pour les détections + view_dashboard_entities pour les user-agents
|
||||
"""
|
||||
try:
|
||||
# Extraire l'IP de base du subnet (ex: 192.168.1.0/24 -> 192.168.1.0)
|
||||
subnet_ip = subnet.replace('/24', '').replace('/16', '').replace('/8', '')
|
||||
|
||||
# Extraire les 3 premiers octets pour le filtre (ex: 141.98.11)
|
||||
subnet_parts = subnet_ip.split('.')[:3]
|
||||
subnet_prefix = subnet_parts[0]
|
||||
subnet_mask = subnet_parts[1]
|
||||
subnet_third = subnet_parts[2]
|
||||
|
||||
# Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA
|
||||
stats_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
host,
|
||||
country_code,
|
||||
asn_number
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_filter AS (
|
||||
SELECT *
|
||||
FROM cleaned_ips
|
||||
WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', clean_ip)[3] = %(subnet_third)s
|
||||
),
|
||||
-- Récupérer les user-agents depuis view_dashboard_entities
|
||||
ua_data AS (
|
||||
SELECT
|
||||
entity_value AS ip,
|
||||
arrayJoin(user_agents) AS user_agent
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)
|
||||
AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', entity_value)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', entity_value)[3] = %(subnet_third)s
|
||||
)
|
||||
SELECT
|
||||
%(subnet)s AS subnet,
|
||||
uniq(clean_ip) AS total_ips,
|
||||
count() AS total_detections,
|
||||
uniq(ja4) AS unique_ja4,
|
||||
(SELECT uniq(user_agent) FROM ua_data) AS unique_ua,
|
||||
uniq(host) AS unique_hosts,
|
||||
argMax(country_code, detected_at) AS primary_country,
|
||||
argMax(asn_number, detected_at) AS primary_asn,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM subnet_filter
|
||||
"""
|
||||
|
||||
stats_result = db.query(stats_query, {
|
||||
"subnet": subnet,
|
||||
"subnet_prefix": subnet_prefix,
|
||||
"subnet_mask": subnet_mask,
|
||||
"subnet_third": subnet_third,
|
||||
"hours": hours
|
||||
})
|
||||
|
||||
if not stats_result.result_rows or stats_result.result_rows[0][1] == 0:
|
||||
raise HTTPException(status_code=404, detail="Subnet non trouvé")
|
||||
|
||||
stats_row = stats_result.result_rows[0]
|
||||
stats = {
|
||||
"subnet": subnet,
|
||||
"total_ips": stats_row[1] or 0,
|
||||
"total_detections": stats_row[2] or 0,
|
||||
"unique_ja4": stats_row[3] or 0,
|
||||
"unique_ua": stats_row[4] or 0,
|
||||
"unique_hosts": stats_row[5] or 0,
|
||||
"primary_country": stats_row[6] or "XX",
|
||||
"primary_asn": str(stats_row[7]) if stats_row[7] else "?",
|
||||
"first_seen": stats_row[8].isoformat() if stats_row[8] else "",
|
||||
"last_seen": stats_row[9].isoformat() if stats_row[9] else ""
|
||||
}
|
||||
|
||||
# Liste des IPs avec détails - 2 requêtes séparées + fusion en Python
|
||||
ips_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_filter AS (
|
||||
SELECT *
|
||||
FROM cleaned_ips
|
||||
WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s
|
||||
AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s
|
||||
AND splitByChar('.', clean_ip)[3] = %(subnet_third)s
|
||||
)
|
||||
SELECT
|
||||
clean_ip AS ip,
|
||||
count() AS total_detections,
|
||||
uniq(ja4) AS unique_ja4,
|
||||
argMax(country_code, detected_at) AS primary_country,
|
||||
argMax(asn_number, detected_at) AS primary_asn,
|
||||
argMax(threat_level, detected_at) AS threat_level,
|
||||
avg(anomaly_score) AS avg_score,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM subnet_filter
|
||||
GROUP BY ip
|
||||
ORDER BY total_detections DESC
|
||||
"""
|
||||
|
||||
# Exécuter la première requête pour obtenir les IPs
|
||||
ips_result = db.query(ips_query, {
|
||||
"subnet_prefix": subnet_prefix,
|
||||
"subnet_mask": subnet_mask,
|
||||
"subnet_third": subnet_third,
|
||||
"hours": hours
|
||||
})
|
||||
|
||||
# Extraire la liste des IPs pour la requête UA
|
||||
ip_list = [str(row[0]) for row in ips_result.result_rows]
|
||||
|
||||
# Requête pour les user-agents avec IN clause (utilise l'index)
|
||||
unique_ua_dict = {}
|
||||
if ip_list:
|
||||
# Formater la liste pour la clause IN
|
||||
ip_values = ', '.join(f"'{ip}'" for ip in ip_list)
|
||||
ua_query = f"""
|
||||
SELECT
|
||||
entity_value AS ip,
|
||||
uniq(arrayJoin(user_agents)) AS unique_ua
|
||||
FROM view_dashboard_entities
|
||||
PREWHERE entity_type = 'ip'
|
||||
WHERE entity_value IN ({ip_values})
|
||||
AND log_date >= today() - INTERVAL 30 DAY
|
||||
GROUP BY entity_value
|
||||
"""
|
||||
ua_result = db.query(ua_query, {})
|
||||
unique_ua_dict = {row[0]: row[1] for row in ua_result.result_rows}
|
||||
|
||||
# Fusionner les résultats
|
||||
ips = []
|
||||
for row in ips_result.result_rows:
|
||||
ips.append({
|
||||
"ip": str(row[0]),
|
||||
"total_detections": row[1],
|
||||
"unique_ja4": row[2],
|
||||
"unique_ua": unique_ua_dict.get(row[0], 0),
|
||||
"primary_country": row[3] or "XX",
|
||||
"primary_asn": str(row[4]) if row[4] else "?",
|
||||
"threat_level": row[5] or "LOW",
|
||||
"avg_score": abs(row[6] or 0),
|
||||
"first_seen": row[7].isoformat() if row[7] else "",
|
||||
"last_seen": row[8].isoformat() if row[8] else ""
|
||||
})
|
||||
|
||||
return {
|
||||
"stats": stats,
|
||||
"ips": ips
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/types")
|
||||
async def get_entity_types():
|
||||
"""
|
||||
Retourne la liste des types d'entités supportés.
|
||||
NOTE: Cette route DOIT être déclarée avant /{entity_type}/... pour ne pas être masquée.
|
||||
"""
|
||||
return {
|
||||
"entity_types": sorted(VALID_ENTITY_TYPES),
|
||||
"descriptions": {
|
||||
"ip": "Adresse IP source",
|
||||
"ja4": "Fingerprint JA4 TLS",
|
||||
"user_agent": "User-Agent HTTP",
|
||||
"client_header": "Client Header",
|
||||
"host": "Host HTTP",
|
||||
"path": "Path URL",
|
||||
"query_param": "Query Param"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}", response_model=EntityInvestigation)
|
||||
async def get_entity_investigation(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720, description="Fenêtre temporelle en heures")
|
||||
):
|
||||
"""
|
||||
Investigation complète pour une entité donnée
|
||||
|
||||
- **entity_type**: Type d'entité (ip, ja4, user_agent, client_header, host, path, query_param)
|
||||
- **entity_value**: Valeur de l'entité
|
||||
- **hours**: Fenêtre temporelle (défaut: 24h)
|
||||
|
||||
Retourne:
|
||||
- Stats générales
|
||||
- Attributs associés (IPs, JA4, Hosts, ASNs, Pays)
|
||||
- User-Agents
|
||||
- Client-Headers
|
||||
- Paths
|
||||
- Query-Params
|
||||
"""
|
||||
# Valider le type d'entité
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type d'entité invalide. Types supportés: {', '.join(VALID_ENTITY_TYPES)}"
|
||||
)
|
||||
|
||||
# Stats générales
|
||||
stats = get_entity_stats(entity_type, entity_value, hours)
|
||||
if not stats:
|
||||
raise HTTPException(status_code=404, detail="Entité non trouvée")
|
||||
|
||||
# Attributs associés
|
||||
related = get_related_attributes(entity_type, entity_value, hours)
|
||||
|
||||
# User-Agents
|
||||
user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours)
|
||||
|
||||
# Client-Headers
|
||||
client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours)
|
||||
|
||||
# Paths
|
||||
paths = get_array_values(entity_type, entity_value, 'paths', hours)
|
||||
|
||||
# Query-Params
|
||||
query_params = get_array_values(entity_type, entity_value, 'query_params', hours)
|
||||
|
||||
return EntityInvestigation(
|
||||
stats=stats,
|
||||
related=related,
|
||||
user_agents=user_agents,
|
||||
client_headers=client_headers,
|
||||
paths=paths,
|
||||
query_params=query_params
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/related")
|
||||
async def get_entity_related(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère uniquement les attributs associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type d'entité invalide. Types supportés: {', '.join(VALID_ENTITY_TYPES)}"
|
||||
)
|
||||
|
||||
related = get_related_attributes(entity_type, entity_value, hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"hours": hours,
|
||||
"related": related
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/user_agents")
|
||||
async def get_entity_user_agents(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les User-Agents associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"user_agents": user_agents,
|
||||
"total": len(user_agents)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/client_headers")
|
||||
async def get_entity_client_headers(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les Client-Headers associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"client_headers": client_headers,
|
||||
"total": len(client_headers)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/paths")
|
||||
async def get_entity_paths(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les Paths associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
paths = get_array_values(entity_type, entity_value, 'paths', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"paths": paths,
|
||||
"total": len(paths)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{entity_type}/{entity_value:path}/query_params")
|
||||
async def get_entity_query_params(
|
||||
entity_type: str,
|
||||
entity_value: str,
|
||||
hours: int = Query(default=24, ge=1, le=720)
|
||||
):
|
||||
"""
|
||||
Récupère les Query-Params associés à une entité
|
||||
"""
|
||||
if entity_type not in VALID_ENTITY_TYPES:
|
||||
raise HTTPException(status_code=400, detail="Type d'entité invalide")
|
||||
|
||||
query_params = get_array_values(entity_type, entity_value, 'query_params', hours)
|
||||
|
||||
return {
|
||||
"entity_type": entity_type,
|
||||
"entity_value": entity_value,
|
||||
"query_params": query_params,
|
||||
"total": len(query_params)
|
||||
}
|
||||
827
services/dashboard/backend/routes/fingerprints.py
Normal file
827
services/dashboard/backend/routes/fingerprints.py
Normal file
@ -0,0 +1,827 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des fingerprints JA4 et User-Agents
|
||||
|
||||
Objectifs:
|
||||
- Détecter le spoofing JA4 (fingerprint TLS qui prétend être un navigateur mais
|
||||
dont les User-Agents, les headers HTTP ou les métriques comportementales trahissent
|
||||
une origine bot/script)
|
||||
- Construire une matrice JA4 × User-Agent pour visualiser les associations suspectes
|
||||
- Analyser la distribution des User-Agents pour identifier les rotateurs et les bots
|
||||
qui usurpent des UA de navigateurs légitimes
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
import re
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/fingerprints", tags=["fingerprints"])
|
||||
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# Patterns indiquant clairement un bot/script sans simulation de navigateur
|
||||
_BOT_PATTERNS = re.compile(
|
||||
r"bot|crawler|spider|scraper|python|curl|wget|go-http|java/|axios|"
|
||||
r"libwww|httpclient|okhttp|requests|aiohttp|httpx|playwright|puppeteer|"
|
||||
r"selenium|headless|phantomjs",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Navigateurs légitimes communs — un JA4 de type "browser" devrait venir avec ces UAs
|
||||
_BROWSER_PATTERNS = re.compile(
|
||||
r"mozilla|chrome|safari|firefox|edge|opera|trident",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _classify_ua(ua: str) -> str:
|
||||
"""Retourne 'bot', 'browser', ou 'script'"""
|
||||
if not ua:
|
||||
return "empty"
|
||||
if _BOT_PATTERNS.search(ua):
|
||||
return "bot"
|
||||
if _BROWSER_PATTERNS.search(ua):
|
||||
return "browser"
|
||||
return "script"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 1 — Détection de spoofing JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/spoofing")
|
||||
async def get_ja4_spoofing(
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle"),
|
||||
min_detections: int = Query(10, ge=1, description="Nombre minimum de détections"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Identifie les JA4 fingerprints suspects de spoofing navigateur.
|
||||
|
||||
Un JA4 est considéré suspect quand:
|
||||
- Il présente un taux élevé de ua_ch_mismatch (header UA ≠ Client Hints)
|
||||
- Son modern_browser_score est élevé mais les UAs associés sont des bots/scripts
|
||||
- Il apparaît avec un taux élevé de sni_host_mismatch ou alpn_http_mismatch
|
||||
- is_rare_ja4 = true avec un volume important
|
||||
|
||||
Retourne un score de confiance de spoofing [0-100] pour chaque JA4.
|
||||
"""
|
||||
try:
|
||||
# Agrégation par JA4 avec tous les indicateurs de spoofing
|
||||
query = """
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
|
||||
-- Indicateurs de mismatch
|
||||
countIf(ua_ch_mismatch = true) AS ua_ch_mismatch_count,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
countIf(sni_host_mismatch = true) AS sni_mismatch_count,
|
||||
round(countIf(sni_host_mismatch = true) * 100.0 / count(), 2) AS sni_mismatch_pct,
|
||||
countIf(alpn_http_mismatch = true) AS alpn_mismatch_count,
|
||||
round(countIf(alpn_http_mismatch = true) * 100.0 / count(), 2) AS alpn_mismatch_pct,
|
||||
|
||||
-- Indicateurs comportementaux
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_ja4_count,
|
||||
round(countIf(is_rare_ja4 = true) * 100.0 / count(), 2) AS rare_ja4_pct,
|
||||
countIf(is_ua_rotating = true) AS ua_rotating_count,
|
||||
round(countIf(is_ua_rotating = true) * 100.0 / count(), 2) AS ua_rotating_pct,
|
||||
|
||||
-- Métriques TLS/TCP
|
||||
countIf(is_alpn_missing = true) AS alpn_missing_count,
|
||||
avg(distinct_ja4_count) AS avg_distinct_ja4_per_ip,
|
||||
|
||||
-- Répartition threat levels
|
||||
countIf(threat_level = 'CRITICAL') AS critical_count,
|
||||
countIf(threat_level = 'HIGH') AS high_count,
|
||||
|
||||
-- Botnet indicators
|
||||
avg(ja4_asn_concentration) AS avg_asn_concentration,
|
||||
avg(ja4_country_concentration) AS avg_country_concentration,
|
||||
|
||||
argMax(threat_level, detected_at) AS last_threat_level
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING total_detections >= %(min_detections)s
|
||||
ORDER BY ua_ch_mismatch_pct DESC, total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {
|
||||
"hours": hours,
|
||||
"min_detections": min_detections,
|
||||
"limit": limit,
|
||||
})
|
||||
|
||||
# Fetch top UA per JA4 from view_dashboard_user_agents
|
||||
ja4_list = [str(r[0]) for r in result.result_rows if r[0]]
|
||||
ua_by_ja4: dict = {}
|
||||
if ja4_list:
|
||||
ja4_sql = ", ".join(f"'{j}'" for j in ja4_list[:100])
|
||||
ua_q = f"""
|
||||
SELECT ja4, groupArray(5)(ua) AS top_uas
|
||||
FROM (
|
||||
SELECT ja4, arrayJoin(user_agents) AS ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ja4, ua
|
||||
ORDER BY ja4, cnt DESC
|
||||
)
|
||||
GROUP BY ja4
|
||||
"""
|
||||
try:
|
||||
ua_res = db.query(ua_q)
|
||||
for ua_row in ua_res.result_rows:
|
||||
j4 = str(ua_row[0])
|
||||
if ua_row[1]:
|
||||
ua_by_ja4[j4] = list(ua_row[1])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
ua_ch_mismatch_pct = float(row[4] or 0)
|
||||
sni_mismatch_pct = float(row[6] or 0)
|
||||
alpn_mismatch_pct = float(row[8] or 0)
|
||||
avg_browser_score = float(row[9] or 0)
|
||||
rare_ja4_pct = float(row[11] or 0)
|
||||
ua_rotating_pct = float(row[13] or 0)
|
||||
alpn_missing_count = int(row[14] or 0)
|
||||
total = int(row[1] or 1)
|
||||
|
||||
top_uas = ua_by_ja4.get(ja4, [])
|
||||
ua_classes = [_classify_ua(u) for u in top_uas]
|
||||
has_bot_ua = any(c == "bot" for c in ua_classes)
|
||||
has_browser_ua = any(c == "browser" for c in ua_classes)
|
||||
|
||||
# Spoofing confidence score [0-100]:
|
||||
# UA/CH mismatch est le signal le plus fort (poids 40)
|
||||
# Browser UA avec score navigateur élevé mais indicateurs bot (poids 25)
|
||||
# SNI/ALPN mismatches (poids 15)
|
||||
# is_rare_ja4 avec gros volume (poids 10)
|
||||
# UA rotating (poids 10)
|
||||
spoof_score = min(100, round(
|
||||
ua_ch_mismatch_pct * 0.40
|
||||
+ (avg_browser_score * 25 / 100 if has_bot_ua else 0)
|
||||
+ sni_mismatch_pct * 0.10
|
||||
+ alpn_mismatch_pct * 0.05
|
||||
+ rare_ja4_pct * 0.10
|
||||
+ ua_rotating_pct * 0.10
|
||||
+ (10 if alpn_missing_count > total * 0.3 else 0)
|
||||
))
|
||||
|
||||
# Classification du JA4
|
||||
if spoof_score >= 60:
|
||||
classification = "spoofed_browser"
|
||||
elif has_bot_ua and avg_browser_score < 30:
|
||||
classification = "known_bot"
|
||||
elif has_browser_ua and ua_ch_mismatch_pct < 10:
|
||||
classification = "legitimate_browser"
|
||||
else:
|
||||
classification = "suspicious"
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"classification": classification,
|
||||
"spoofing_score": spoof_score,
|
||||
"total_detections": int(row[1] or 0),
|
||||
"unique_ips": int(row[2] or 0),
|
||||
"indicators": {
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"sni_mismatch_pct": sni_mismatch_pct,
|
||||
"alpn_mismatch_pct": alpn_mismatch_pct,
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"rare_ja4_pct": rare_ja4_pct,
|
||||
"ua_rotating_pct": ua_rotating_pct,
|
||||
"alpn_missing_count": alpn_missing_count,
|
||||
"avg_asn_concentration": round(float(row[18] or 0), 3),
|
||||
"avg_country_concentration": round(float(row[19] or 0), 3),
|
||||
},
|
||||
"top_user_agents": [
|
||||
{"ua": u, "type": _classify_ua(u)} for u in top_uas
|
||||
],
|
||||
"threat_breakdown": {
|
||||
"critical": int(row[16] or 0),
|
||||
"high": int(row[17] or 0),
|
||||
"last_level": str(row[20] or "LOW"),
|
||||
},
|
||||
})
|
||||
|
||||
# Trier: spoofed_browser d'abord, puis par score
|
||||
items.sort(key=lambda x: (-x["spoofing_score"], -x["total_detections"]))
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"summary": {
|
||||
"spoofed_browser": sum(1 for i in items if i["classification"] == "spoofed_browser"),
|
||||
"known_bot": sum(1 for i in items if i["classification"] == "known_bot"),
|
||||
"suspicious": sum(1 for i in items if i["classification"] == "suspicious"),
|
||||
"legitimate_browser": sum(1 for i in items if i["classification"] == "legitimate_browser"),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 2 — Matrice JA4 × User-Agent
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ja4-ua-matrix")
|
||||
async def get_ja4_ua_matrix(
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
min_ips: int = Query(3, ge=1, description="Nombre minimum d'IPs pour inclure un JA4"),
|
||||
limit: int = Query(30, ge=1, le=100),
|
||||
):
|
||||
"""
|
||||
Matrice JA4 × User-Agent.
|
||||
|
||||
Pour chaque JA4:
|
||||
- Top User-Agents associés (depuis view_dashboard_entities)
|
||||
- Taux de ua_ch_mismatch
|
||||
- Classification UA (bot / browser / script)
|
||||
- Indicateur de spoofing si browser_score élevé + UA non-navigateur
|
||||
"""
|
||||
try:
|
||||
# Stats JA4 depuis ml_detected_anomalies
|
||||
stats_query = """
|
||||
SELECT
|
||||
ja4,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
count() AS total_detections,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_count,
|
||||
countIf(is_ua_rotating = true) AS rotating_count,
|
||||
argMax(threat_level, detected_at) AS last_threat
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
ORDER BY ua_ch_mismatch_pct DESC, unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
stats_res = db.query(stats_query, {"hours": hours, "min_ips": min_ips, "limit": limit})
|
||||
ja4_list = [str(r[0]) for r in stats_res.result_rows]
|
||||
|
||||
if not ja4_list:
|
||||
return {"items": [], "total": 0, "period_hours": hours}
|
||||
|
||||
# UAs par JA4 depuis view_dashboard_user_agents
|
||||
ja4_sql = ", ".join(f"'{j}'" for j in ja4_list)
|
||||
ua_query = f"""
|
||||
SELECT
|
||||
ja4,
|
||||
ua,
|
||||
sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ja4 IN ({ja4_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ja4, ua
|
||||
ORDER BY ja4, cnt DESC
|
||||
"""
|
||||
|
||||
ua_by_ja4: dict = {}
|
||||
try:
|
||||
ua_res = db.query(ua_query)
|
||||
for row in ua_res.result_rows:
|
||||
j4 = str(row[0])
|
||||
if j4 not in ua_by_ja4:
|
||||
ua_by_ja4[j4] = []
|
||||
if len(ua_by_ja4[j4]) < 8:
|
||||
ua_by_ja4[j4].append({"ua": str(row[1]), "count": int(row[2] or 0)})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
items = []
|
||||
for row in stats_res.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1] or 0)
|
||||
ua_ch_mismatch_pct = float(row[3] or 0)
|
||||
avg_browser_score = float(row[4] or 0)
|
||||
|
||||
top_uas = ua_by_ja4.get(ja4, [])
|
||||
ua_total = sum(u["count"] for u in top_uas) or 1
|
||||
|
||||
classified_uas = []
|
||||
for u in top_uas:
|
||||
ua_type = _classify_ua(u["ua"])
|
||||
classified_uas.append({
|
||||
"ua": u["ua"],
|
||||
"count": u["count"],
|
||||
"pct": round(u["count"] * 100 / ua_total, 1),
|
||||
"type": ua_type,
|
||||
})
|
||||
|
||||
bot_pct = sum(u["pct"] for u in classified_uas if u["type"] == "bot")
|
||||
browser_pct = sum(u["pct"] for u in classified_uas if u["type"] == "browser")
|
||||
|
||||
# Spoofing flag: JA4 ressemble à un navigateur (browser_score élevé)
|
||||
# mais les UAs sont des bots/scripts
|
||||
is_spoofing = avg_browser_score > 50 and bot_pct > 30 and ua_ch_mismatch_pct > 20
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"total_detections": int(row[2] or 0),
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"rare_count": int(row[5] or 0),
|
||||
"rotating_count": int(row[6] or 0),
|
||||
"last_threat": str(row[7] or "LOW"),
|
||||
"user_agents": classified_uas,
|
||||
"ua_summary": {
|
||||
"bot_pct": round(bot_pct, 1),
|
||||
"browser_pct": round(browser_pct, 1),
|
||||
"script_pct": round(100 - bot_pct - browser_pct, 1),
|
||||
"total_distinct": len(top_uas),
|
||||
},
|
||||
"is_spoofing_suspect": is_spoofing,
|
||||
})
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 3 — Analyse globale des User-Agents
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ua-analysis")
|
||||
async def get_ua_analysis(
|
||||
hours: int = Query(24, ge=1, le=168),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Analyse globale des User-Agents dans les détections.
|
||||
|
||||
Identifie:
|
||||
- UAs de type bot/script
|
||||
- UAs browser légitimes vs UAs browser utilisés par des bots (via ua_ch_mismatch)
|
||||
- UAs rares/suspects qui tournent (is_ua_rotating)
|
||||
- Distribution JA4 par UA pour détecter les UAs multi-fingerprints (rotation)
|
||||
"""
|
||||
try:
|
||||
# Top UAs globaux depuis view_dashboard_user_agents
|
||||
ua_global_query = """
|
||||
SELECT
|
||||
ua,
|
||||
sum(requests) AS ip_count
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE hour >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ua
|
||||
ORDER BY ip_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
ua_global_res = db.query(ua_global_query, {"hours": hours, "limit": limit})
|
||||
top_uas = [str(r[0]) for r in ua_global_res.result_rows]
|
||||
|
||||
# Pour chaque UA, chercher ses JA4 via view_dashboard_user_agents
|
||||
ua_sql = ", ".join(f"'{u.replace(chr(39), chr(39)*2)}'" for u in top_uas[:50]) if top_uas else "''"
|
||||
ja4_per_ua_query = f"""
|
||||
SELECT
|
||||
ua,
|
||||
uniq(ja4) AS unique_ja4s,
|
||||
groupUniqArray(3)(ja4) AS sample_ja4s
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE ua IN ({ua_sql})
|
||||
AND hour >= now() - INTERVAL {hours} HOUR
|
||||
AND ua != ''
|
||||
AND ja4 != ''
|
||||
GROUP BY ua
|
||||
"""
|
||||
ja4_by_ua: dict = {}
|
||||
try:
|
||||
ja4_res = db.query(ja4_per_ua_query)
|
||||
for r in ja4_res.result_rows:
|
||||
ja4_by_ua[str(r[0])] = {
|
||||
"unique_ja4s": int(r[1] or 0),
|
||||
"sample_ja4s": list(r[2] or []),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# IPs avec is_ua_rotating depuis ml_detected_anomalies
|
||||
rotating_query = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
avg(ua_ch_mismatch) AS avg_ua_ch_mismatch
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND is_ua_rotating = true
|
||||
GROUP BY clean_ip
|
||||
ORDER BY avg_ua_ch_mismatch DESC
|
||||
"""
|
||||
rotating_ips: list = []
|
||||
try:
|
||||
rot_res = db.query(rotating_query, {"hours": hours})
|
||||
rotating_ips = [str(r[0]) for r in rot_res.result_rows]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Construire la réponse
|
||||
items = []
|
||||
for row in ua_global_res.result_rows:
|
||||
ua = str(row[0])
|
||||
ip_count = int(row[1] or 0)
|
||||
ua_type = _classify_ua(ua)
|
||||
ja4_info = ja4_by_ua.get(ua, {"unique_ja4s": 0, "sample_ja4s": []})
|
||||
|
||||
# UA multi-JA4 est suspect: un vrai navigateur a généralement 1-2 JA4
|
||||
multi_ja4_flag = ja4_info["unique_ja4s"] > 3
|
||||
|
||||
items.append({
|
||||
"user_agent": ua,
|
||||
"type": ua_type,
|
||||
"ip_count": ip_count,
|
||||
"unique_ja4_count": ja4_info["unique_ja4s"],
|
||||
"sample_ja4s": ja4_info["sample_ja4s"],
|
||||
"is_multi_ja4_suspect": multi_ja4_flag,
|
||||
"risk_flags": _build_ua_risk_flags(ua, ua_type, ja4_info["unique_ja4s"], ip_count),
|
||||
})
|
||||
|
||||
# IPs avec rotation d'UA
|
||||
ua_rotating_stats = {
|
||||
"rotating_ip_count": len(rotating_ips),
|
||||
"sample_rotating_ips": rotating_ips[:10],
|
||||
}
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"ua_rotating_stats": ua_rotating_stats,
|
||||
"summary": {
|
||||
"bot_count": sum(1 for i in items if i["type"] == "bot"),
|
||||
"browser_count": sum(1 for i in items if i["type"] == "browser"),
|
||||
"script_count": sum(1 for i in items if i["type"] == "script"),
|
||||
"multi_ja4_suspect_count": sum(1 for i in items if i["is_multi_ja4_suspect"]),
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
def _build_ua_risk_flags(ua: str, ua_type: str, unique_ja4s: int, ip_count: int) -> list:
|
||||
flags = []
|
||||
if ua_type == "bot":
|
||||
flags.append("ua_bot_signature")
|
||||
elif ua_type == "script":
|
||||
flags.append("ua_script_library")
|
||||
if unique_ja4s > 5:
|
||||
flags.append("ja4_rotation_suspect")
|
||||
if unique_ja4s > 3 and ua_type == "browser":
|
||||
flags.append("browser_ua_multi_fingerprint")
|
||||
if ip_count > 100:
|
||||
flags.append("high_volume")
|
||||
return flags
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 4 — JA4 d'un IP spécifique: analyse de cohérence UA/JA4
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/ip/{ip}/coherence")
|
||||
async def get_ip_fingerprint_coherence(ip: str):
|
||||
"""
|
||||
Analyse la cohérence JA4/UA pour une IP spécifique.
|
||||
|
||||
Répond à la question: "Cette IP spoofait-elle son fingerprint?"
|
||||
|
||||
Calcule un score de cohérence basé sur:
|
||||
- Correspondance entre JA4 (TLS client fingerprint) et User-Agent
|
||||
- ua_ch_mismatch (User-Agent vs Client Hints)
|
||||
- modern_browser_score vs type d'UA réel
|
||||
- Nombre de JA4 distincts utilisés (rotation)
|
||||
- sni_host_mismatch, alpn_http_mismatch
|
||||
"""
|
||||
try:
|
||||
# Données depuis ml_detected_anomalies
|
||||
ml_query = """
|
||||
SELECT
|
||||
ja4,
|
||||
ua_ch_mismatch,
|
||||
modern_browser_score,
|
||||
sni_host_mismatch,
|
||||
alpn_http_mismatch,
|
||||
is_alpn_missing,
|
||||
is_rare_ja4,
|
||||
is_ua_rotating,
|
||||
distinct_ja4_count,
|
||||
header_count,
|
||||
has_accept_language,
|
||||
has_cookie,
|
||||
has_referer,
|
||||
header_order_shared_count,
|
||||
detected_at,
|
||||
threat_level,
|
||||
window_mss_ratio,
|
||||
tcp_jitter_variance,
|
||||
multiplexing_efficiency
|
||||
FROM ml_detected_anomalies
|
||||
WHERE src_ip = %(ip)s
|
||||
ORDER BY detected_at DESC
|
||||
"""
|
||||
ml_res = db.query(ml_query, {"ip": ip})
|
||||
|
||||
if not ml_res.result_rows:
|
||||
raise HTTPException(status_code=404, detail="IP non trouvée dans les détections")
|
||||
|
||||
# User-agents réels depuis view_dashboard_user_agents
|
||||
ua_query = """
|
||||
SELECT ua, sum(requests) AS cnt
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE toString(src_ip) = %(ip)s
|
||||
AND hour >= now() - INTERVAL 72 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY ua ORDER BY cnt DESC
|
||||
"""
|
||||
ua_res = db.query(ua_query, {"ip": ip})
|
||||
top_uas = [{"ua": str(r[0]), "count": int(r[1] or 0), "type": _classify_ua(str(r[0]))}
|
||||
for r in ua_res.result_rows]
|
||||
|
||||
# Agréger les indicateurs de la dernière session
|
||||
rows = ml_res.result_rows
|
||||
latest = rows[0]
|
||||
total_rows = len(rows)
|
||||
|
||||
ua_ch_mismatch_count = sum(1 for r in rows if r[1])
|
||||
sni_mismatch_count = sum(1 for r in rows if r[3])
|
||||
alpn_mismatch_count = sum(1 for r in rows if r[4])
|
||||
is_rare_count = sum(1 for r in rows if r[6])
|
||||
is_rotating = any(r[7] for r in rows)
|
||||
distinct_ja4s = {str(r[0]) for r in rows if r[0]}
|
||||
avg_browser_score = sum(int(r[2] or 0) for r in rows) / total_rows
|
||||
|
||||
# UA analysis
|
||||
has_browser_ua = any(u["type"] == "browser" for u in top_uas)
|
||||
has_bot_ua = any(u["type"] == "bot" for u in top_uas)
|
||||
primary_ua_type = top_uas[0]["type"] if top_uas else "empty"
|
||||
|
||||
# Calcul du score de spoofing
|
||||
spoof_score = min(100, round(
|
||||
(ua_ch_mismatch_count / total_rows * 100) * 0.40
|
||||
+ (avg_browser_score * 0.20 if has_bot_ua else 0)
|
||||
+ (sni_mismatch_count / total_rows * 100) * 0.10
|
||||
+ (alpn_mismatch_count / total_rows * 100) * 0.05
|
||||
+ (len(distinct_ja4s) * 5 if len(distinct_ja4s) > 2 else 0)
|
||||
+ (15 if is_rotating else 0)
|
||||
+ (10 if is_rare_count > total_rows * 0.5 else 0)
|
||||
))
|
||||
|
||||
# Verdict
|
||||
if spoof_score >= 70:
|
||||
verdict = "high_confidence_spoofing"
|
||||
elif spoof_score >= 40:
|
||||
verdict = "suspicious_spoofing"
|
||||
elif has_bot_ua and avg_browser_score < 20:
|
||||
verdict = "known_bot_no_spoofing"
|
||||
elif has_browser_ua and spoof_score < 20:
|
||||
verdict = "legitimate_browser"
|
||||
else:
|
||||
verdict = "inconclusive"
|
||||
|
||||
# Explication humaine
|
||||
explanation = []
|
||||
if ua_ch_mismatch_count > total_rows * 0.3:
|
||||
explanation.append(f"UA-Client-Hints mismatch sur {round(ua_ch_mismatch_count*100/total_rows)}% des requêtes")
|
||||
if has_bot_ua and avg_browser_score > 40:
|
||||
explanation.append(f"JA4 ressemble à un navigateur (score {round(avg_browser_score)}/100) mais UA est de type bot")
|
||||
if len(distinct_ja4s) > 2:
|
||||
explanation.append(f"{len(distinct_ja4s)} JA4 distincts utilisés → rotation de fingerprint")
|
||||
if is_rotating:
|
||||
explanation.append("is_ua_rotating détecté → rotation d'User-Agent confirmée")
|
||||
if sni_mismatch_count > 0:
|
||||
explanation.append(f"SNI ≠ Host header sur {sni_mismatch_count}/{total_rows} requêtes")
|
||||
if not explanation:
|
||||
explanation.append("Aucun indicateur de spoofing majeur détecté")
|
||||
|
||||
return {
|
||||
"ip": ip,
|
||||
"verdict": verdict,
|
||||
"spoofing_score": spoof_score,
|
||||
"explanation": explanation,
|
||||
"indicators": {
|
||||
"ua_ch_mismatch_rate": round(ua_ch_mismatch_count / total_rows * 100, 1),
|
||||
"sni_mismatch_rate": round(sni_mismatch_count / total_rows * 100, 1),
|
||||
"alpn_mismatch_rate": round(alpn_mismatch_count / total_rows * 100, 1),
|
||||
"avg_browser_score": round(avg_browser_score, 1),
|
||||
"distinct_ja4_count": len(distinct_ja4s),
|
||||
"is_ua_rotating": is_rotating,
|
||||
"rare_ja4_rate": round(is_rare_count / total_rows * 100, 1),
|
||||
},
|
||||
"fingerprints": {
|
||||
"ja4_list": list(distinct_ja4s),
|
||||
"latest_ja4": str(latest[0] or ""),
|
||||
},
|
||||
"user_agents": top_uas,
|
||||
"latest_detection": {
|
||||
"detected_at": latest[14].isoformat() if latest[14] else "",
|
||||
"threat_level": str(latest[15] or "LOW"),
|
||||
"modern_browser_score": int(latest[2] or 0),
|
||||
"header_count": int(latest[9] or 0),
|
||||
"has_accept_language": bool(latest[10]),
|
||||
"has_cookie": bool(latest[11]),
|
||||
"has_referer": bool(latest[12]),
|
||||
"header_order_shared_count": int(latest[13] or 0),
|
||||
},
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT 5 — JA4 légitimes (baseline / whitelist)
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/legitimate-ja4")
|
||||
async def get_legitimate_ja4(
|
||||
hours: int = Query(168, ge=24, le=720, description="Fenêtre pour établir la baseline"),
|
||||
min_ips: int = Query(50, ge=5, description="Nombre minimum d'IPs pour qualifier un JA4 de légitime"),
|
||||
):
|
||||
"""
|
||||
Établit une baseline des JA4 fingerprints légitimes.
|
||||
|
||||
Un JA4 est considéré légitime si:
|
||||
- Il est utilisé par un grand nombre d'IPs distinctes (> min_ips)
|
||||
- Son taux de ua_ch_mismatch est faible (< 5%)
|
||||
- Son modern_browser_score est élevé (> 60)
|
||||
- Il n'est PAS is_rare_ja4
|
||||
- Ses UAs sont dominés par des navigateurs connus
|
||||
|
||||
Utile comme whitelist pour réduire les faux positifs.
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
ja4,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
count() AS total_detections,
|
||||
round(countIf(ua_ch_mismatch = true) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
countIf(is_rare_ja4 = true) AS rare_count,
|
||||
round(countIf(threat_level = 'CRITICAL') * 100.0 / count(), 2) AS critical_pct,
|
||||
round(countIf(threat_level = 'HIGH') * 100.0 / count(), 2) AS high_pct
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
AND ua_ch_mismatch_pct < 5.0
|
||||
AND avg_browser_score > 60
|
||||
AND rare_count = 0
|
||||
ORDER BY unique_ips DESC
|
||||
"""
|
||||
|
||||
result = db.query(query, {"hours": hours, "min_ips": min_ips})
|
||||
|
||||
items = [
|
||||
{
|
||||
"ja4": str(row[0]),
|
||||
"unique_ips": int(row[1] or 0),
|
||||
"total_detections": int(row[2] or 0),
|
||||
"ua_ch_mismatch_pct": float(row[3] or 0),
|
||||
"avg_browser_score": round(float(row[4] or 0), 1),
|
||||
"critical_pct": float(row[6] or 0),
|
||||
"high_pct": float(row[7] or 0),
|
||||
"legitimacy_confidence": min(100, round(
|
||||
(1 - float(row[3] or 0) / 100) * 40
|
||||
+ float(row[4] or 0) * 0.40
|
||||
+ min(int(row[1] or 0) / min_ips, 1) * 20
|
||||
)),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
"note": "Ces JA4 sont candidats à une whitelist. Vérifier manuellement avant de whitelister.",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ENDPOINT — Corrélation JA4 × ASN / Pays (C5)
|
||||
# Détecte les JA4 fortement concentrés sur un seul ASN ou pays
|
||||
# → signal de botnet ciblé ou d'infrastructure de test/attaque partagée
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/asn-correlation")
|
||||
async def get_ja4_asn_correlation(
|
||||
min_concentration: float = Query(0.7, ge=0.0, le=1.0, description="Seuil min de concentration ASN ou pays"),
|
||||
min_ips: int = Query(5, ge=1, description="Nombre minimum d'IPs par JA4"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""
|
||||
Identifie les JA4 fingerprints fortement concentrés sur un seul ASN ou pays.
|
||||
Un JA4 avec asn_concentration ≥ 0.7 signifie que ≥70% des IPs utilisant ce fingerprint
|
||||
proviennent du même ASN → infrastructure de bot partagée ou datacenter suspect.
|
||||
"""
|
||||
try:
|
||||
# Two-pass: first aggregate per (ja4, asn) to get IP counts per ASN,
|
||||
# then aggregate per ja4 to compute concentration ratio
|
||||
sql = """
|
||||
SELECT
|
||||
ja4,
|
||||
sum(ips_per_combo) AS unique_ips,
|
||||
uniq(src_asn) AS unique_asns,
|
||||
uniq(src_country_code) AS unique_countries,
|
||||
toString(argMax(src_asn, ips_per_combo)) AS top_asn_number,
|
||||
argMax(asn_name, ips_per_combo) AS top_asn_name,
|
||||
argMax(src_country_code, country_ips) AS dominant_country,
|
||||
sum(total_hits) AS total_hits,
|
||||
round(max(ips_per_combo) / greatest(sum(ips_per_combo), 1), 3) AS asn_concentration,
|
||||
round(max(country_ips) / greatest(sum(ips_per_combo), 1), 3) AS country_concentration
|
||||
FROM (
|
||||
SELECT
|
||||
ja4,
|
||||
src_asn,
|
||||
src_country_code,
|
||||
any(src_as_name) AS asn_name,
|
||||
uniq(src_ip) AS ips_per_combo,
|
||||
uniq(src_ip) AS country_ips,
|
||||
sum(hits) AS total_hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
AND ja4 != ''
|
||||
GROUP BY ja4, src_asn, src_country_code
|
||||
)
|
||||
GROUP BY ja4
|
||||
HAVING unique_ips >= %(min_ips)s
|
||||
AND (asn_concentration >= %(min_conc)s OR country_concentration >= %(min_conc)s)
|
||||
ORDER BY asn_concentration DESC, unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"min_ips": min_ips, "min_conc": min_concentration, "limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ja4 = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
unique_asns = int(row[2])
|
||||
unique_countries = int(row[3])
|
||||
top_asn_number = str(row[4] or "")
|
||||
top_asn_name = str(row[5] or "")
|
||||
dominant_country = str(row[6] or "")
|
||||
total_hits = int(row[7] or 0)
|
||||
asn_concentration = float(row[8] or 0)
|
||||
country_concentration = float(row[9] or 0)
|
||||
|
||||
if asn_concentration >= 0.85:
|
||||
corr_type, risk = "asn_monopoly", "high"
|
||||
elif asn_concentration >= min_concentration:
|
||||
corr_type, risk = "asn_dominant", "medium"
|
||||
elif country_concentration >= min_concentration:
|
||||
corr_type, risk = "geo_targeted", "medium"
|
||||
else:
|
||||
corr_type, risk = "distributed", "low"
|
||||
|
||||
items.append({
|
||||
"ja4": ja4,
|
||||
"unique_ips": unique_ips,
|
||||
"unique_asns": unique_asns,
|
||||
"unique_countries": unique_countries,
|
||||
"top_asn_name": top_asn_name,
|
||||
"top_asn_number": top_asn_number,
|
||||
"dominant_country": dominant_country,
|
||||
"total_hits": total_hits,
|
||||
"asn_concentration": asn_concentration,
|
||||
"country_concentration":country_concentration,
|
||||
"correlation_type": corr_type,
|
||||
"risk": risk,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
101
services/dashboard/backend/routes/header_fingerprint.py
Normal file
101
services/dashboard/backend/routes/header_fingerprint.py
Normal file
@ -0,0 +1,101 @@
|
||||
"""
|
||||
Endpoints pour l'analyse des empreintes d'en-têtes HTTP
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
|
||||
|
||||
|
||||
@router.get("/clusters")
|
||||
async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
|
||||
"""Clusters d'empreintes d'en-têtes groupés par header_order_hash."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
header_order_hash AS hash,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
avg(modern_browser_score) AS avg_browser_score,
|
||||
sum(ua_ch_mismatch) AS ua_ch_mismatch_count,
|
||||
round(sum(ua_ch_mismatch) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
||||
groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes,
|
||||
round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct,
|
||||
round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
GROUP BY header_order_hash
|
||||
ORDER BY unique_ips DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
|
||||
total_sql = """
|
||||
SELECT uniq(header_order_hash)
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
"""
|
||||
total_clusters = int(db.query(total_sql).result_rows[0][0])
|
||||
|
||||
clusters = []
|
||||
for row in result.result_rows:
|
||||
h = str(row[0])
|
||||
unique_ips = int(row[1])
|
||||
avg_browser_score = float(row[2] or 0)
|
||||
ua_ch_mismatch_cnt = int(row[3])
|
||||
ua_ch_mismatch_pct = float(row[4] or 0)
|
||||
top_modes = list(set(str(m) for m in (row[5] or [])))
|
||||
has_cookie_pct = float(row[6] or 0)
|
||||
has_referer_pct = float(row[7] or 0)
|
||||
|
||||
if avg_browser_score >= 90 and ua_ch_mismatch_pct < 5:
|
||||
classification = "legitimate"
|
||||
elif ua_ch_mismatch_pct > 50:
|
||||
classification = "bot_suspicious"
|
||||
else:
|
||||
classification = "mixed"
|
||||
|
||||
clusters.append({
|
||||
"hash": h,
|
||||
"unique_ips": unique_ips,
|
||||
"avg_browser_score": round(avg_browser_score, 2),
|
||||
"ua_ch_mismatch_count":ua_ch_mismatch_cnt,
|
||||
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
||||
"top_sec_fetch_modes": top_modes,
|
||||
"has_cookie_pct": has_cookie_pct,
|
||||
"has_referer_pct": has_referer_pct,
|
||||
"classification": classification,
|
||||
})
|
||||
return {"clusters": clusters, "total_clusters": total_clusters}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/cluster/{hash}/ips")
|
||||
async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)):
|
||||
"""Liste des IPs appartenant à un cluster d'en-têtes donné."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(modern_browser_score) AS browser_score,
|
||||
any(ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
any(sec_fetch_mode) AS sec_fetch_mode,
|
||||
any(sec_fetch_dest) AS sec_fetch_dest
|
||||
FROM mabase_prod.agg_header_fingerprint_1h
|
||||
WHERE header_order_hash = %(hash)s
|
||||
GROUP BY src_ip
|
||||
ORDER BY browser_score DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"hash": hash, "limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"browser_score": int(row[1] or 0),
|
||||
"ua_ch_mismatch": int(row[2] or 0),
|
||||
"sec_fetch_mode": str(row[3] or ""),
|
||||
"sec_fetch_dest": str(row[4] or ""),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
144
services/dashboard/backend/routes/heatmap.py
Normal file
144
services/dashboard/backend/routes/heatmap.py
Normal file
@ -0,0 +1,144 @@
|
||||
"""
|
||||
Endpoints pour la heatmap temporelle (hits par heure / hôte)
|
||||
"""
|
||||
from collections import defaultdict
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/heatmap", tags=["heatmap"])
|
||||
|
||||
|
||||
@router.get("/hourly")
|
||||
async def get_heatmap_hourly():
|
||||
"""Hits agrégés par heure sur les 72 dernières heures."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
max(max_requests_per_sec) AS max_rps
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
hours = [
|
||||
{
|
||||
"hour": int(row[0]),
|
||||
"hits": int(row[1]),
|
||||
"unique_ips": int(row[2]),
|
||||
"max_rps": int(row[3]),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
return {"hours": hours}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/top-hosts")
|
||||
async def get_heatmap_top_hosts(limit: int = Query(20, ge=1, le=100)):
|
||||
"""Hôtes les plus ciblés avec répartition horaire sur 24h."""
|
||||
try:
|
||||
# Aggregate overall stats per host
|
||||
agg_sql = """
|
||||
SELECT
|
||||
host,
|
||||
sum(hits) AS total_hits,
|
||||
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
||||
uniq(ja4) AS unique_ja4s
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY host
|
||||
ORDER BY total_hits DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
agg_res = db.query(agg_sql, {"limit": limit})
|
||||
top_hosts = [str(r[0]) for r in agg_res.result_rows]
|
||||
host_stats = {
|
||||
str(r[0]): {
|
||||
"host": str(r[0]),
|
||||
"total_hits": int(r[1]),
|
||||
"unique_ips": int(r[2]),
|
||||
"unique_ja4s":int(r[3]),
|
||||
}
|
||||
for r in agg_res.result_rows
|
||||
}
|
||||
|
||||
if not top_hosts:
|
||||
return {"items": []}
|
||||
|
||||
# Hourly breakdown per host
|
||||
hourly_sql = """
|
||||
SELECT
|
||||
host,
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
AND host IN %(hosts)s
|
||||
GROUP BY host, hour
|
||||
"""
|
||||
hourly_res = db.query(hourly_sql, {"hosts": top_hosts})
|
||||
|
||||
hourly_map: dict = defaultdict(lambda: [0] * 24)
|
||||
for row in hourly_res.result_rows:
|
||||
h = str(row[0])
|
||||
hour = int(row[1])
|
||||
hits = int(row[2])
|
||||
hourly_map[h][hour] += hits
|
||||
|
||||
items = []
|
||||
for host in top_hosts:
|
||||
entry = dict(host_stats[host])
|
||||
entry["hourly_hits"] = hourly_map[host]
|
||||
items.append(entry)
|
||||
|
||||
return {"items": items}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/matrix")
|
||||
async def get_heatmap_matrix():
|
||||
"""Matrice top-15 hôtes × 24 heures (sum hits) sur les 72 dernières heures."""
|
||||
try:
|
||||
top_sql = """
|
||||
SELECT host, sum(hits) AS total_hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
GROUP BY host
|
||||
ORDER BY total_hits DESC
|
||||
"""
|
||||
top_res = db.query(top_sql)
|
||||
top_hosts = [str(r[0]) for r in top_res.result_rows]
|
||||
|
||||
if not top_hosts:
|
||||
return {"hosts": [], "matrix": []}
|
||||
|
||||
cell_sql = """
|
||||
SELECT
|
||||
host,
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 72 HOUR
|
||||
AND host IN %(hosts)s
|
||||
GROUP BY host, hour
|
||||
"""
|
||||
cell_res = db.query(cell_sql, {"hosts": top_hosts})
|
||||
|
||||
matrix_map: dict = defaultdict(lambda: [0] * 24)
|
||||
for row in cell_res.result_rows:
|
||||
h = str(row[0])
|
||||
hour = int(row[1])
|
||||
hits = int(row[2])
|
||||
matrix_map[h][hour] += hits
|
||||
|
||||
matrix = [matrix_map[h] for h in top_hosts]
|
||||
return {"hosts": top_hosts, "matrix": matrix}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
266
services/dashboard/backend/routes/incidents.py
Normal file
266
services/dashboard/backend/routes/incidents.py
Normal file
@ -0,0 +1,266 @@
|
||||
"""
|
||||
Routes pour la gestion des incidents clusterisés
|
||||
"""
|
||||
import hashlib
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/incidents", tags=["incidents"])
|
||||
|
||||
|
||||
@router.get("/clusters")
|
||||
async def get_incident_clusters(
|
||||
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle en heures"),
|
||||
min_severity: str = Query("LOW", description="Niveau de sévérité minimum"),
|
||||
limit: int = Query(20, ge=1, le=100, description="Nombre maximum de clusters")
|
||||
):
|
||||
"""
|
||||
Récupère les incidents clusterisés automatiquement
|
||||
|
||||
Les clusters sont formés par:
|
||||
- Subnet /24
|
||||
- JA4 fingerprint
|
||||
- Pattern temporel
|
||||
"""
|
||||
try:
|
||||
# Cluster par subnet /24 avec une IP exemple
|
||||
# Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x
|
||||
# toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x
|
||||
cluster_query = """
|
||||
WITH cleaned_ips AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
),
|
||||
subnet_groups AS (
|
||||
SELECT
|
||||
concat(
|
||||
splitByChar('.', clean_ip)[1], '.',
|
||||
splitByChar('.', clean_ip)[2], '.',
|
||||
splitByChar('.', clean_ip)[3], '.0/24'
|
||||
) AS subnet,
|
||||
count() AS total_detections,
|
||||
uniq(clean_ip) AS unique_ips,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen,
|
||||
argMax(ja4, detected_at) AS ja4,
|
||||
argMax(country_code, detected_at) AS country_code,
|
||||
argMax(asn_number, detected_at) AS asn_number,
|
||||
argMax(threat_level, detected_at) AS threat_level,
|
||||
avg(anomaly_score) AS avg_score,
|
||||
argMax(clean_ip, detected_at) AS sample_ip
|
||||
FROM cleaned_ips
|
||||
GROUP BY subnet
|
||||
HAVING total_detections >= 2
|
||||
)
|
||||
SELECT
|
||||
subnet,
|
||||
total_detections,
|
||||
unique_ips,
|
||||
first_seen,
|
||||
last_seen,
|
||||
ja4,
|
||||
country_code,
|
||||
asn_number,
|
||||
threat_level,
|
||||
avg_score,
|
||||
sample_ip
|
||||
FROM subnet_groups
|
||||
ORDER BY avg_score ASC, total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(cluster_query, {"hours": hours, "limit": limit})
|
||||
|
||||
# Collect sample IPs to fetch real UA and trend data in bulk
|
||||
sample_ips = [row[10] for row in result.result_rows if row[10]]
|
||||
|
||||
# Fetch real primary UA per sample IP from view_dashboard_entities
|
||||
ua_by_ip: dict = {}
|
||||
if sample_ips:
|
||||
ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50])
|
||||
ua_query = f"""
|
||||
SELECT entity_value, arrayElement(user_agents, 1) AS top_ua
|
||||
FROM view_dashboard_entities
|
||||
WHERE entity_type = 'ip'
|
||||
AND entity_value IN ({ip_list_sql})
|
||||
AND notEmpty(user_agents)
|
||||
GROUP BY entity_value, top_ua
|
||||
ORDER BY entity_value
|
||||
"""
|
||||
try:
|
||||
ua_result = db.query(ua_query)
|
||||
for ua_row in ua_result.result_rows:
|
||||
if ua_row[0] not in ua_by_ip and ua_row[1]:
|
||||
ua_by_ip[str(ua_row[0])] = str(ua_row[1])
|
||||
except Exception:
|
||||
pass # UA enrichment is best-effort
|
||||
|
||||
# Compute real trend: compare current window vs previous window of same duration
|
||||
trend_query = """
|
||||
WITH cleaned AS (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
detected_at,
|
||||
concat(
|
||||
splitByChar('.', clean_ip)[1], '.',
|
||||
splitByChar('.', clean_ip)[2], '.',
|
||||
splitByChar('.', clean_ip)[3], '.0/24'
|
||||
) AS subnet
|
||||
FROM ml_detected_anomalies
|
||||
),
|
||||
current_window AS (
|
||||
SELECT subnet, count() AS cnt
|
||||
FROM cleaned
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY subnet
|
||||
),
|
||||
prev_window AS (
|
||||
SELECT subnet, count() AS cnt
|
||||
FROM cleaned
|
||||
WHERE detected_at >= now() - INTERVAL %(hours2)s HOUR
|
||||
AND detected_at < now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY subnet
|
||||
)
|
||||
SELECT c.subnet, c.cnt AS current_cnt, p.cnt AS prev_cnt
|
||||
FROM current_window c
|
||||
LEFT JOIN prev_window p ON c.subnet = p.subnet
|
||||
"""
|
||||
trend_by_subnet: dict = {}
|
||||
try:
|
||||
trend_result = db.query(trend_query, {"hours": hours, "hours2": hours * 2})
|
||||
for tr in trend_result.result_rows:
|
||||
subnet_key = tr[0]
|
||||
curr = tr[1] or 0
|
||||
prev = tr[2] or 0
|
||||
if prev == 0:
|
||||
trend_by_subnet[subnet_key] = ("new", 100)
|
||||
else:
|
||||
pct = round(((curr - prev) / prev) * 100)
|
||||
trend_by_subnet[subnet_key] = ("up" if pct >= 0 else "down", abs(pct))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
clusters = []
|
||||
for row in result.result_rows:
|
||||
subnet = row[0]
|
||||
threat_level = row[8] or 'LOW'
|
||||
unique_ips = row[2] or 1
|
||||
avg_score = abs(row[9] or 0)
|
||||
sample_ip = row[10] if row[10] else subnet.split('/')[0]
|
||||
|
||||
critical_count = 1 if threat_level == 'CRITICAL' else 0
|
||||
high_count = 1 if threat_level == 'HIGH' else 0
|
||||
|
||||
risk_score = min(100, round(
|
||||
(critical_count * 30) +
|
||||
(high_count * 20) +
|
||||
(unique_ips * 5) +
|
||||
(avg_score * 100)
|
||||
))
|
||||
|
||||
if critical_count > 0 or risk_score >= 80:
|
||||
severity = "CRITICAL"
|
||||
elif high_count > (row[1] or 1) * 0.3 or risk_score >= 60:
|
||||
severity = "HIGH"
|
||||
elif high_count > 0 or risk_score >= 40:
|
||||
severity = "MEDIUM"
|
||||
else:
|
||||
severity = "LOW"
|
||||
|
||||
trend_dir, trend_pct = trend_by_subnet.get(subnet, ("stable", 0))
|
||||
primary_ua = ua_by_ip.get(sample_ip, "")
|
||||
|
||||
clusters.append({
|
||||
"id": f"INC-{hashlib.md5(subnet.encode()).hexdigest()[:8].upper()}",
|
||||
"score": risk_score,
|
||||
"severity": severity,
|
||||
"total_detections": row[1],
|
||||
"unique_ips": row[2],
|
||||
"subnet": subnet,
|
||||
"sample_ip": sample_ip,
|
||||
"ja4": row[5] or "",
|
||||
"primary_ua": primary_ua,
|
||||
"primary_target": row[3].strftime('%H:%M') if row[3] else "Unknown",
|
||||
"countries": [{"code": row[6] or "XX", "percentage": 100}],
|
||||
"asn": str(row[7]) if row[7] else "",
|
||||
"first_seen": row[3].isoformat() if row[3] else "",
|
||||
"last_seen": row[4].isoformat() if row[4] else "",
|
||||
"trend": trend_dir,
|
||||
"trend_percentage": trend_pct,
|
||||
})
|
||||
|
||||
return {
|
||||
"items": clusters,
|
||||
"total": len(clusters),
|
||||
"period_hours": hours
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{cluster_id}")
|
||||
async def get_incident_details(cluster_id: str):
|
||||
"""
|
||||
Récupère les détails d'un incident spécifique.
|
||||
Non encore implémenté — les détails par cluster seront disponibles dans une prochaine version.
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Détails par incident non encore implémentés. Utilisez /api/incidents/clusters pour la liste."
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{cluster_id}/classify")
|
||||
async def classify_incident(
|
||||
cluster_id: str,
|
||||
label: str,
|
||||
tags: List[str] = None,
|
||||
comment: str = ""
|
||||
):
|
||||
"""
|
||||
Classe un incident rapidement.
|
||||
Non encore implémenté — utilisez /api/analysis/{ip}/classify pour classifier une IP.
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=501,
|
||||
detail="Classification par incident non encore implémentée. Utilisez /api/analysis/{ip}/classify."
|
||||
)
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_incidents(
|
||||
status: str = Query("active", description="Statut des incidents"),
|
||||
severity: Optional[str] = Query(None, description="Filtrer par sévérité (LOW/MEDIUM/HIGH/CRITICAL)"),
|
||||
hours: int = Query(24, ge=1, le=168)
|
||||
):
|
||||
"""
|
||||
Liste tous les incidents avec filtres.
|
||||
Délègue à get_incident_clusters ; le filtre severity est appliqué post-requête.
|
||||
"""
|
||||
try:
|
||||
result = await get_incident_clusters(hours=hours, limit=100)
|
||||
items = result["items"]
|
||||
|
||||
if severity:
|
||||
sev_upper = severity.upper()
|
||||
items = [c for c in items if c.get("severity") == sev_upper]
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"total": len(items),
|
||||
"period_hours": hours,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
185
services/dashboard/backend/routes/investigation_summary.py
Normal file
185
services/dashboard/backend/routes/investigation_summary.py
Normal file
@ -0,0 +1,185 @@
|
||||
"""
|
||||
Endpoint d'investigation enrichie pour une IP donnée.
|
||||
Agrège en une seule requête les données provenant de toutes les sources :
|
||||
ml_detected_anomalies, view_form_bruteforce_detected, view_tcp_spoofing_detected,
|
||||
agg_host_ip_ja4_1h (rotation JA4), view_ip_recurrence, view_ai_features_1h.
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from ..database import db
|
||||
from ..services.tcp_fingerprint import fingerprint_os, detect_spoof, declared_os_from_ua
|
||||
|
||||
router = APIRouter(prefix="/api/investigation", tags=["investigation"])
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{ip}/summary",
|
||||
summary="Synthèse complète d'une IP",
|
||||
response_description="Score de risque 0-100, détections ML, brute-force, spoofing TCP, rotation JA4, persistance et timeline 24h",
|
||||
)
|
||||
async def get_ip_full_summary(ip: str):
|
||||
"""
|
||||
Synthèse complète pour une IP : toutes les sources en un appel.
|
||||
Normalise l'IP (accepte ::ffff:x.x.x.x ou x.x.x.x).
|
||||
"""
|
||||
clean_ip = ip.replace("::ffff:", "").strip()
|
||||
try:
|
||||
# ── 1. Score ML / features ─────────────────────────────────────────────
|
||||
ml_sql = """
|
||||
SELECT
|
||||
max(abs(anomaly_score)) AS max_score,
|
||||
any(threat_level) AS threat_level,
|
||||
any(bot_name) AS bot_name,
|
||||
count() AS total_detections,
|
||||
uniq(host) AS distinct_hosts,
|
||||
uniq(ja4) AS distinct_ja4
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
"""
|
||||
ml_res = db.query(ml_sql, {"ip": clean_ip})
|
||||
ml_row = ml_res.result_rows[0] if ml_res.result_rows else None
|
||||
ml_data = {
|
||||
"max_score": round(float(ml_row[0] or 0), 2) if ml_row else 0,
|
||||
"threat_level": str(ml_row[1] or "") if ml_row else "",
|
||||
"attack_type": str(ml_row[2] or "") if ml_row else "",
|
||||
"total_detections": int(ml_row[3] or 0) if ml_row else 0,
|
||||
"distinct_hosts": int(ml_row[4] or 0) if ml_row else 0,
|
||||
"distinct_ja4": int(ml_row[5] or 0) if ml_row else 0,
|
||||
}
|
||||
|
||||
# ── 2. Brute force ─────────────────────────────────────────────────────
|
||||
bf_sql = """
|
||||
SELECT
|
||||
uniq(host) AS hosts_attacked,
|
||||
sum(hits) AS total_hits,
|
||||
sum(query_params_count) AS total_params,
|
||||
groupArray(3)(host) AS top_hosts
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
"""
|
||||
bf_res = db.query(bf_sql, {"ip": clean_ip})
|
||||
bf_row = bf_res.result_rows[0] if bf_res.result_rows else None
|
||||
bf_data = {
|
||||
"active": bool(bf_row and int(bf_row[1] or 0) > 0),
|
||||
"hosts_attacked": int(bf_row[0] or 0) if bf_row else 0,
|
||||
"total_hits": int(bf_row[1] or 0) if bf_row else 0,
|
||||
"total_params": int(bf_row[2] or 0) if bf_row else 0,
|
||||
"top_hosts": [str(h) for h in (bf_row[3] or [])] if bf_row else [],
|
||||
}
|
||||
|
||||
# ── 3. TCP spoofing — fingerprinting multi-signal ─────────────────────
|
||||
tcp_sql = """
|
||||
SELECT
|
||||
any(tcp_ttl_raw) AS ttl,
|
||||
any(tcp_win_raw) AS win,
|
||||
any(tcp_scale_raw) AS scale,
|
||||
any(tcp_mss_raw) AS mss,
|
||||
any(first_ua) AS ua
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
AND tcp_ttl_raw > 0
|
||||
LIMIT 1
|
||||
"""
|
||||
tcp_res = db.query(tcp_sql, {"ip": clean_ip})
|
||||
tcp_data = {"detected": False, "tcp_ttl": None, "suspected_os": None}
|
||||
if tcp_res.result_rows:
|
||||
r = tcp_res.result_rows[0]
|
||||
ttl = int(r[0] or 0)
|
||||
win = int(r[1] or 0)
|
||||
scale = int(r[2] or 0)
|
||||
mss = int(r[3] or 0)
|
||||
ua = str(r[4] or "")
|
||||
fp = fingerprint_os(ttl, win, scale, mss)
|
||||
dec_os = declared_os_from_ua(ua)
|
||||
spoof_res = detect_spoof(fp, dec_os)
|
||||
tcp_data = {
|
||||
"detected": spoof_res.is_spoof,
|
||||
"tcp_ttl": ttl,
|
||||
"tcp_mss": mss,
|
||||
"tcp_win_scale": scale,
|
||||
"initial_ttl": fp.initial_ttl,
|
||||
"hop_count": fp.hop_count,
|
||||
"suspected_os": fp.os_name,
|
||||
"declared_os": dec_os,
|
||||
"confidence": fp.confidence,
|
||||
"network_path": fp.network_path,
|
||||
"is_bot_tool": fp.is_bot_tool,
|
||||
"spoof_reason": spoof_res.reason,
|
||||
}
|
||||
|
||||
# ── 4. JA4 rotation ────────────────────────────────────────────────────
|
||||
rot_sql = """
|
||||
SELECT distinct_ja4_count, total_hits
|
||||
FROM mabase_prod.view_host_ip_ja4_rotation
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
LIMIT 1
|
||||
"""
|
||||
rot_res = db.query(rot_sql, {"ip": clean_ip})
|
||||
rot_data = {"rotating": False, "distinct_ja4_count": 0}
|
||||
if rot_res.result_rows:
|
||||
row = rot_res.result_rows[0]
|
||||
cnt = int(row[0] or 0)
|
||||
rot_data = {"rotating": cnt > 1, "distinct_ja4_count": cnt, "total_hits": int(row[1] or 0)}
|
||||
|
||||
# ── 5. Persistance ─────────────────────────────────────────────────────
|
||||
pers_sql = """
|
||||
SELECT recurrence, worst_score, worst_threat_level, first_seen, last_seen
|
||||
FROM mabase_prod.view_ip_recurrence
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
LIMIT 1
|
||||
"""
|
||||
pers_res = db.query(pers_sql, {"ip": clean_ip})
|
||||
pers_data = {"persistent": False, "recurrence": 0}
|
||||
if pers_res.result_rows:
|
||||
row = pers_res.result_rows[0]
|
||||
pers_data = {
|
||||
"persistent": True,
|
||||
"recurrence": int(row[0] or 0),
|
||||
"worst_score": round(float(row[1] or 0), 2),
|
||||
"worst_threat_level":str(row[2] or ""),
|
||||
"first_seen": str(row[3]),
|
||||
"last_seen": str(row[4]),
|
||||
}
|
||||
|
||||
# ── 6. Timeline 24h ────────────────────────────────────────────────────
|
||||
tl_sql = """
|
||||
SELECT
|
||||
toHour(window_start) AS hour,
|
||||
sum(hits) AS hits,
|
||||
groupUniqArray(3)(ja4) AS ja4s
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
tl_res = db.query(tl_sql, {"ip": clean_ip})
|
||||
timeline = [
|
||||
{"hour": int(r[0]), "hits": int(r[1]), "ja4s": [str(j) for j in (r[2] or [])]}
|
||||
for r in tl_res.result_rows
|
||||
]
|
||||
|
||||
# ── Global risk score (heuristic) ──────────────────────────────────────
|
||||
risk = 0
|
||||
risk += min(50, ml_data["max_score"] * 50)
|
||||
if bf_data["active"]: risk += 20
|
||||
if tcp_data["detected"]:
|
||||
if tcp_data.get("is_bot_tool"): risk += 30 # outil de scan connu
|
||||
else: risk += 15 # spoof OS
|
||||
if rot_data["rotating"]: risk += min(15, rot_data["distinct_ja4_count"] * 3)
|
||||
if pers_data["persistent"]: risk += min(10, pers_data["recurrence"] * 2)
|
||||
risk = min(100, round(risk))
|
||||
|
||||
return {
|
||||
"ip": clean_ip,
|
||||
"risk_score": risk,
|
||||
"ml": ml_data,
|
||||
"bruteforce": bf_data,
|
||||
"tcp_spoofing":tcp_data,
|
||||
"ja4_rotation":rot_data,
|
||||
"persistence": pers_data,
|
||||
"timeline_24h":timeline,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
175
services/dashboard/backend/routes/metrics.py
Normal file
175
services/dashboard/backend/routes/metrics.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""
|
||||
Endpoints pour les métriques du dashboard
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from ..database import db
|
||||
from ..models import MetricsResponse, MetricsSummary, TimeSeriesPoint
|
||||
|
||||
router = APIRouter(prefix="/api/metrics", tags=["metrics"])
|
||||
|
||||
|
||||
@router.get("", response_model=MetricsResponse, summary="Métriques globales du dashboard")
|
||||
async def get_metrics():
|
||||
"""
|
||||
Récupère les métriques globales du dashboard
|
||||
"""
|
||||
try:
|
||||
# Résumé des métriques
|
||||
summary_query = """
|
||||
SELECT
|
||||
count() AS total_detections,
|
||||
countIf(threat_level = 'CRITICAL') AS critical_count,
|
||||
countIf(threat_level = 'HIGH') AS high_count,
|
||||
countIf(threat_level = 'MEDIUM') AS medium_count,
|
||||
countIf(threat_level = 'LOW') AS low_count,
|
||||
countIf(bot_name != '') AS known_bots_count,
|
||||
countIf(bot_name = '') AS anomalies_count,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
summary_result = db.query(summary_query)
|
||||
summary_row = summary_result.result_rows[0] if summary_result.result_rows else None
|
||||
|
||||
if not summary_row:
|
||||
raise HTTPException(status_code=404, detail="Aucune donnée disponible")
|
||||
|
||||
summary = MetricsSummary(
|
||||
total_detections=summary_row[0],
|
||||
critical_count=summary_row[1],
|
||||
high_count=summary_row[2],
|
||||
medium_count=summary_row[3],
|
||||
low_count=summary_row[4],
|
||||
known_bots_count=summary_row[5],
|
||||
anomalies_count=summary_row[6],
|
||||
unique_ips=summary_row[7]
|
||||
)
|
||||
|
||||
# Série temporelle (par heure)
|
||||
timeseries_query = """
|
||||
SELECT
|
||||
toStartOfHour(detected_at) AS hour,
|
||||
count() AS total,
|
||||
countIf(threat_level = 'CRITICAL') AS critical,
|
||||
countIf(threat_level = 'HIGH') AS high,
|
||||
countIf(threat_level = 'MEDIUM') AS medium,
|
||||
countIf(threat_level = 'LOW') AS low
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY hour
|
||||
ORDER BY hour
|
||||
"""
|
||||
|
||||
timeseries_result = db.query(timeseries_query)
|
||||
timeseries = [
|
||||
TimeSeriesPoint(
|
||||
hour=row[0],
|
||||
total=row[1],
|
||||
critical=row[2],
|
||||
high=row[3],
|
||||
medium=row[4],
|
||||
low=row[5]
|
||||
)
|
||||
for row in timeseries_result.result_rows
|
||||
]
|
||||
|
||||
# Distribution par menace
|
||||
threat_distribution = {
|
||||
"CRITICAL": summary.critical_count,
|
||||
"HIGH": summary.high_count,
|
||||
"MEDIUM": summary.medium_count,
|
||||
"LOW": summary.low_count
|
||||
}
|
||||
|
||||
return MetricsResponse(
|
||||
summary=summary,
|
||||
timeseries=timeseries,
|
||||
threat_distribution=threat_distribution
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur lors de la récupération des métriques: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/threats")
|
||||
async def get_threat_distribution():
|
||||
"""
|
||||
Récupère la répartition par niveau de menace
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
threat_level,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY threat_level
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
result = db.query(query)
|
||||
|
||||
return {
|
||||
"items": [
|
||||
{"threat_level": row[0], "count": row[1], "percentage": row[2]}
|
||||
for row in result.result_rows
|
||||
]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/baseline")
|
||||
async def get_metrics_baseline():
|
||||
"""
|
||||
Compare les métriques actuelles (24h) vs hier (24h-48h) pour afficher les tendances.
|
||||
"""
|
||||
try:
|
||||
query = """
|
||||
SELECT
|
||||
countIf(detected_at >= now() - INTERVAL 24 HOUR) AS today_total,
|
||||
countIf(detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_total,
|
||||
uniqIf(src_ip, detected_at >= now() - INTERVAL 24 HOUR) AS today_ips,
|
||||
uniqIf(src_ip, detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_ips,
|
||||
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 24 HOUR) AS today_critical,
|
||||
countIf(threat_level = 'CRITICAL' AND detected_at >= now() - INTERVAL 48 HOUR AND detected_at < now() - INTERVAL 24 HOUR) AS yesterday_critical
|
||||
FROM ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL 48 HOUR
|
||||
"""
|
||||
r = db.query(query)
|
||||
row = r.result_rows[0] if r.result_rows else None
|
||||
|
||||
def pct_change(today: int, yesterday: int) -> float:
|
||||
if yesterday == 0:
|
||||
return 100.0 if today > 0 else 0.0
|
||||
return round((today - yesterday) / yesterday * 100, 1)
|
||||
|
||||
today_total = int(row[0] or 0) if row else 0
|
||||
yesterday_total = int(row[1] or 0) if row else 0
|
||||
today_ips = int(row[2] or 0) if row else 0
|
||||
yesterday_ips = int(row[3] or 0) if row else 0
|
||||
today_crit = int(row[4] or 0) if row else 0
|
||||
yesterday_crit = int(row[5] or 0) if row else 0
|
||||
|
||||
return {
|
||||
"total_detections": {
|
||||
"today": today_total,
|
||||
"yesterday": yesterday_total,
|
||||
"pct_change": pct_change(today_total, yesterday_total),
|
||||
},
|
||||
"unique_ips": {
|
||||
"today": today_ips,
|
||||
"yesterday": yesterday_ips,
|
||||
"pct_change": pct_change(today_ips, yesterday_ips),
|
||||
},
|
||||
"critical_alerts": {
|
||||
"today": today_crit,
|
||||
"yesterday": yesterday_crit,
|
||||
"pct_change": pct_change(today_crit, yesterday_crit),
|
||||
},
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur baseline: {str(e)}")
|
||||
425
services/dashboard/backend/routes/ml_features.py
Normal file
425
services/dashboard/backend/routes/ml_features.py
Normal file
@ -0,0 +1,425 @@
|
||||
"""
|
||||
Endpoints pour les features ML / IA (scores d'anomalies, radar, scatter)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/ml", tags=["ml_features"])
|
||||
|
||||
|
||||
def _attack_type(fuzzing_index: float, hit_velocity: float,
|
||||
is_fake_nav: int, ua_ch_mismatch: int) -> str:
|
||||
if fuzzing_index > 50:
|
||||
return "brute_force"
|
||||
if hit_velocity > 1.0:
|
||||
return "flood"
|
||||
if is_fake_nav:
|
||||
return "scraper"
|
||||
if ua_ch_mismatch:
|
||||
return "spoofing"
|
||||
return "scanner"
|
||||
|
||||
|
||||
@router.get("/top-anomalies")
|
||||
async def get_top_anomalies(limit: int = Query(50, ge=1, le=500)):
|
||||
"""Top IPs anomales (24h) — bypass view_ai_features_1h pour éviter les window functions.
|
||||
Query directe sur agg_host_ip_ja4_1h + LEFT JOIN agg_header_fingerprint_1h.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(a.src_ip), '^::ffff:', '') AS ip,
|
||||
any(a.ja4) AS ja4,
|
||||
any(a.host) AS host,
|
||||
sum(a.hits) AS hits,
|
||||
round(uniqMerge(a.uniq_query_params)
|
||||
/ greatest(uniqMerge(a.uniq_paths), 1), 4) AS fuzzing_index,
|
||||
round(sum(a.hits)
|
||||
/ greatest(dateDiff('second', min(a.first_seen), max(a.last_seen)), 1), 2) AS hit_velocity,
|
||||
round(sum(a.count_head) / greatest(sum(a.hits), 1), 4) AS head_ratio,
|
||||
round(sum(a.count_no_sec_fetch) / greatest(sum(a.hits), 1), 4) AS sec_fetch_absence,
|
||||
round(sum(a.tls12_count) / greatest(sum(a.hits), 1), 4) AS tls12_ratio,
|
||||
round(sum(a.count_generic_accept) / greatest(sum(a.hits), 1), 4) AS generic_accept_ratio,
|
||||
any(a.src_country_code) AS country,
|
||||
any(a.src_as_name) AS asn_name,
|
||||
max(h.ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
max(h.modern_browser_score) AS browser_score,
|
||||
dictGetOrDefault('mabase_prod.dict_asn_reputation', 'label', toUInt64(any(a.src_asn)), 'unknown') AS asn_label,
|
||||
coalesce(
|
||||
nullIf(dictGetOrDefault('mabase_prod.dict_bot_ja4', 'bot_name', tuple(any(a.ja4)), ''), ''),
|
||||
''
|
||||
) AS bot_name
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h a
|
||||
LEFT JOIN mabase_prod.agg_header_fingerprint_1h h
|
||||
ON a.src_ip = h.src_ip AND a.window_start = h.window_start
|
||||
WHERE a.window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY a.src_ip
|
||||
ORDER BY fuzzing_index DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
fuzzing = float(row[4] or 0)
|
||||
velocity = float(row[5] or 0)
|
||||
ua_mm = int(row[12] or 0)
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4": str(row[1]),
|
||||
"host": str(row[2]),
|
||||
"hits": int(row[3] or 0),
|
||||
"fuzzing_index": fuzzing,
|
||||
"hit_velocity": velocity,
|
||||
"head_ratio": float(row[6] or 0),
|
||||
"sec_fetch_absence": float(row[7] or 0),
|
||||
"tls12_ratio": float(row[8] or 0),
|
||||
"generic_accept_ratio": float(row[9] or 0),
|
||||
"country": str(row[10] or ""),
|
||||
"asn_name": str(row[11] or ""),
|
||||
"ua_ch_mismatch": ua_mm,
|
||||
"browser_score": int(row[13] or 0),
|
||||
"asn_label": str(row[14] or ""),
|
||||
"bot_name": str(row[15] or ""),
|
||||
"attack_type": _attack_type(fuzzing, velocity, 0, ua_mm),
|
||||
})
|
||||
return {"items": items}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ip/{ip}/radar")
|
||||
async def get_ip_radar(ip: str):
|
||||
"""Scores radar pour une IP spécifique (8 dimensions d'anomalie)."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
avg(fuzzing_index) AS fuzzing_index,
|
||||
avg(hit_velocity) AS hit_velocity,
|
||||
avg(is_fake_navigation) AS is_fake_navigation,
|
||||
avg(ua_ch_mismatch) AS ua_ch_mismatch,
|
||||
avg(sni_host_mismatch) AS sni_host_mismatch,
|
||||
avg(orphan_ratio) AS orphan_ratio,
|
||||
avg(path_diversity_ratio) AS path_diversity_ratio,
|
||||
avg(anomalous_payload_ratio) AS anomalous_payload_ratio
|
||||
FROM mabase_prod.view_ai_features_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
AND window_start >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
result = db.query(sql, {"ip": ip})
|
||||
if not result.result_rows:
|
||||
raise HTTPException(status_code=404, detail="IP not found")
|
||||
row = result.result_rows[0]
|
||||
|
||||
def _f(v) -> float:
|
||||
return float(v or 0)
|
||||
|
||||
return {
|
||||
"ip": ip,
|
||||
"fuzzing_score": min(100.0, _f(row[0])),
|
||||
"velocity_score": min(100.0, _f(row[1]) * 100),
|
||||
"fake_nav_score": _f(row[2]) * 100,
|
||||
"ua_mismatch_score": _f(row[3]) * 100,
|
||||
"sni_mismatch_score": _f(row[4]) * 100,
|
||||
"orphan_score": min(100.0, _f(row[5]) * 100),
|
||||
"path_repetition_score": max(0.0, 100 - _f(row[6]) * 100),
|
||||
"payload_anomaly_score": min(100.0, _f(row[7]) * 100),
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/score-distribution")
|
||||
async def get_score_distribution():
|
||||
"""
|
||||
Distribution de TOUS les scores ML depuis ml_all_scores (3j).
|
||||
Single query avec conditional aggregates pour éviter le double scan.
|
||||
"""
|
||||
try:
|
||||
# Single scan — global totals + per-model breakdown via GROUPING SETS
|
||||
sql = """
|
||||
SELECT
|
||||
threat_level,
|
||||
model_name,
|
||||
count() AS total,
|
||||
round(avg(anomaly_score), 4) AS avg_score,
|
||||
round(min(anomaly_score), 4) AS min_score,
|
||||
countIf(threat_level = 'NORMAL') AS normal_count,
|
||||
countIf(threat_level NOT IN ('NORMAL','KNOWN_BOT')) AS anomaly_count,
|
||||
countIf(threat_level = 'KNOWN_BOT') AS bot_count
|
||||
FROM mabase_prod.ml_all_scores
|
||||
WHERE detected_at >= now() - INTERVAL 3 DAY
|
||||
GROUP BY threat_level, model_name
|
||||
ORDER BY model_name, total DESC
|
||||
"""
|
||||
result = db.query(sql)
|
||||
by_model: dict = {}
|
||||
grand_total = 0
|
||||
total_normal = total_anomaly = total_bot = 0
|
||||
for row in result.result_rows:
|
||||
level = str(row[0])
|
||||
model = str(row[1])
|
||||
total = int(row[2])
|
||||
grand_total += total
|
||||
total_normal += int(row[5] or 0)
|
||||
total_anomaly += int(row[6] or 0)
|
||||
total_bot += int(row[7] or 0)
|
||||
if model not in by_model:
|
||||
by_model[model] = []
|
||||
by_model[model].append({
|
||||
"threat_level": level,
|
||||
"total": total,
|
||||
"avg_score": float(row[3] or 0),
|
||||
"min_score": float(row[4] or 0),
|
||||
})
|
||||
|
||||
grand_total = max(grand_total, 1)
|
||||
return {
|
||||
"by_model": by_model,
|
||||
"totals": {
|
||||
"normal": total_normal,
|
||||
"anomaly": total_anomaly,
|
||||
"known_bot": total_bot,
|
||||
"grand_total": grand_total,
|
||||
"normal_pct": round(total_normal / grand_total * 100, 1),
|
||||
"anomaly_pct": round(total_anomaly / grand_total * 100, 1),
|
||||
"bot_pct": round(total_bot / grand_total * 100, 1),
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/score-trends")
|
||||
async def get_score_trends(hours: int = Query(72, ge=1, le=168)):
|
||||
"""
|
||||
Évolution temporelle des scores ML depuis ml_all_scores.
|
||||
Retourne le score moyen et les counts par heure et par modèle.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
toStartOfHour(window_start) AS hour,
|
||||
model_name,
|
||||
countIf(threat_level = 'NORMAL') AS normal_count,
|
||||
countIf(threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')) AS anomaly_count,
|
||||
countIf(threat_level = 'KNOWN_BOT') AS bot_count,
|
||||
round(avgIf(anomaly_score, threat_level IN ('LOW','MEDIUM','HIGH','CRITICAL')), 4) AS avg_anomaly_score
|
||||
FROM mabase_prod.ml_all_scores
|
||||
WHERE window_start >= now() - INTERVAL %(hours)s HOUR
|
||||
GROUP BY hour, model_name
|
||||
ORDER BY hour ASC, model_name
|
||||
"""
|
||||
result = db.query(sql, {"hours": hours})
|
||||
points = []
|
||||
for row in result.result_rows:
|
||||
points.append({
|
||||
"hour": str(row[0]),
|
||||
"model": str(row[1]),
|
||||
"normal_count": int(row[2] or 0),
|
||||
"anomaly_count": int(row[3] or 0),
|
||||
"bot_count": int(row[4] or 0),
|
||||
"avg_anomaly_score": float(row[5] or 0),
|
||||
})
|
||||
return {"points": points, "hours": hours}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/b-features")
|
||||
async def get_b_features(limit: int = Query(50, ge=1, le=200)):
|
||||
"""
|
||||
Agrégation des B-features (HTTP pures) pour les top IPs anomales.
|
||||
Source: agg_host_ip_ja4_1h (SimpleAggregateFunction columns).
|
||||
Expose: head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio.
|
||||
Ces features sont calculées dans view_ai_features_1h mais jamais visualisées dans le dashboard.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT ip, ja4, country, asn_name, total_hits AS hits,
|
||||
head_ratio, sec_fetch_absence, tls12_ratio, generic_accept_ratio, http10_ratio,
|
||||
missing_accept_enc_ratio, http_scheme_ratio
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(ja4) AS ja4,
|
||||
any(src_country_code) AS country,
|
||||
any(src_as_name) AS asn_name,
|
||||
sum(hits) AS total_hits,
|
||||
round(sum(count_head) / greatest(sum(hits),1), 4) AS head_ratio,
|
||||
round(sum(count_no_sec_fetch) / greatest(sum(hits),1), 4) AS sec_fetch_absence,
|
||||
round(sum(tls12_count) / greatest(sum(hits),1), 4) AS tls12_ratio,
|
||||
round(sum(count_generic_accept) / greatest(sum(hits),1), 4) AS generic_accept_ratio,
|
||||
round(sum(count_http10) / greatest(sum(hits),1), 4) AS http10_ratio,
|
||||
round(sum(count_no_accept_enc) / greatest(sum(hits),1), 4) AS missing_accept_enc_ratio,
|
||||
round(sum(count_http_scheme) / greatest(sum(hits),1), 4) AS http_scheme_ratio
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
WHERE sec_fetch_absence > 0.5 OR generic_accept_ratio > 0.3
|
||||
OR head_ratio > 0.1 OR tls12_ratio > 0.5 OR missing_accept_enc_ratio > 0.3
|
||||
ORDER BY (head_ratio + sec_fetch_absence + generic_accept_ratio + missing_accept_enc_ratio) DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4": str(row[1] or ""),
|
||||
"country": str(row[2] or ""),
|
||||
"asn_name": str(row[3] or ""),
|
||||
"hits": int(row[4] or 0),
|
||||
"head_ratio": float(row[5] or 0),
|
||||
"sec_fetch_absence": float(row[6] or 0),
|
||||
"tls12_ratio": float(row[7] or 0),
|
||||
"generic_accept_ratio": float(row[8] or 0),
|
||||
"http10_ratio": float(row[9] or 0),
|
||||
"missing_accept_enc_ratio":float(row[10] or 0),
|
||||
"http_scheme_ratio": float(row[11] or 0),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/campaigns")
|
||||
async def get_ml_campaigns(hours: int = Query(24, ge=1, le=168), limit: int = Query(20, ge=1, le=100)):
|
||||
"""
|
||||
Groupes d'anomalies détectées par DBSCAN (campaign_id >= 0).
|
||||
Si aucune campagne active, fallback sur clustering par /24 subnet + JA4 commun.
|
||||
Utile pour détecter les botnets distribués sans état de campagne DBSCAN.
|
||||
"""
|
||||
try:
|
||||
# First: check real campaigns
|
||||
campaign_sql = """
|
||||
SELECT
|
||||
campaign_id,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
any(threat_level) AS dominant_threat,
|
||||
groupUniqArray(3)(threat_level) AS threat_levels,
|
||||
groupUniqArray(3)(bot_name) AS bot_names,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND campaign_id >= 0
|
||||
GROUP BY campaign_id
|
||||
ORDER BY total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(campaign_sql, {"hours": hours, "limit": limit})
|
||||
campaigns = []
|
||||
for row in result.result_rows:
|
||||
campaigns.append({
|
||||
"id": f"C{row[0]}",
|
||||
"campaign_id": int(row[0]),
|
||||
"total_detections": int(row[1]),
|
||||
"unique_ips": int(row[2]),
|
||||
"dominant_threat": str(row[3] or ""),
|
||||
"threat_levels": list(row[4] or []),
|
||||
"bot_names": list(row[5] or []),
|
||||
"first_seen": str(row[6]),
|
||||
"last_seen": str(row[7]),
|
||||
"source": "dbscan",
|
||||
})
|
||||
|
||||
# Fallback: subnet-based clustering when DBSCAN has no campaigns
|
||||
if not campaigns:
|
||||
subnet_sql = """
|
||||
SELECT
|
||||
IPv4CIDRToRange(toIPv4(replaceRegexpAll(toString(src_ip),'^::ffff:','')), 24).1 AS subnet,
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
groupArray(3)(threat_level) AS threat_levels,
|
||||
any(bot_name) AS bot_name,
|
||||
any(ja4) AS sample_ja4,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM mabase_prod.ml_detected_anomalies
|
||||
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
|
||||
AND threat_level IN ('HIGH','CRITICAL','MEDIUM')
|
||||
GROUP BY subnet
|
||||
HAVING unique_ips >= 3
|
||||
ORDER BY total_detections DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result2 = db.query(subnet_sql, {"hours": hours, "limit": limit})
|
||||
for i, row in enumerate(result2.result_rows):
|
||||
subnet_str = str(row[0]) + "/24"
|
||||
campaigns.append({
|
||||
"id": f"S{i+1:03d}",
|
||||
"campaign_id": -1,
|
||||
"subnet": subnet_str,
|
||||
"total_detections": int(row[1]),
|
||||
"unique_ips": int(row[2]),
|
||||
"dominant_threat": str((row[3] or [""])[0]),
|
||||
"threat_levels": list(row[3] or []),
|
||||
"bot_names": [str(row[4] or "")],
|
||||
"sample_ja4": str(row[5] or ""),
|
||||
"first_seen": str(row[6]),
|
||||
"last_seen": str(row[7]),
|
||||
"source": "subnet_cluster",
|
||||
})
|
||||
|
||||
dbscan_active = any(c["campaign_id"] >= 0 for c in campaigns)
|
||||
return {
|
||||
"campaigns": campaigns,
|
||||
"total": len(campaigns),
|
||||
"dbscan_active": dbscan_active,
|
||||
"hours": hours,
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/scatter")
|
||||
async def get_ml_scatter(limit: int = Query(200, ge=1, le=1000)):
|
||||
"""Points scatter plot (fuzzing_index × hit_velocity) — bypass view_ai_features_1h."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
ip,
|
||||
ja4,
|
||||
round(fuzzing_index, 4) AS fuzzing_index,
|
||||
round(total_hits / greatest(dateDiff('second', min_first, max_last), 1), 2) AS hit_velocity,
|
||||
total_hits AS hits,
|
||||
round(total_count_head / greatest(total_hits, 1), 4) AS head_ratio,
|
||||
correlated
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
any(ja4) AS ja4,
|
||||
uniqMerge(uniq_query_params) / greatest(uniqMerge(uniq_paths), 1) AS fuzzing_index,
|
||||
sum(hits) AS total_hits,
|
||||
min(first_seen) AS min_first,
|
||||
max(last_seen) AS max_last,
|
||||
sum(count_head) AS total_count_head,
|
||||
max(correlated_raw) AS correlated
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
)
|
||||
ORDER BY fuzzing_index DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
points = []
|
||||
for row in result.result_rows:
|
||||
fuzzing = float(row[2] or 0)
|
||||
velocity = float(row[3] or 0)
|
||||
points.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4": str(row[1]),
|
||||
"fuzzing_index":fuzzing,
|
||||
"hit_velocity": velocity,
|
||||
"hits": int(row[4] or 0),
|
||||
"attack_type": _attack_type(fuzzing, velocity, 0, 0),
|
||||
})
|
||||
return {"points": points}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
125
services/dashboard/backend/routes/reputation.py
Normal file
125
services/dashboard/backend/routes/reputation.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""
|
||||
Routes pour la réputation IP (bases de données publiques)
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Path
|
||||
from typing import Dict, Any
|
||||
import re
|
||||
|
||||
from ..services.reputation_ip import get_reputation_service
|
||||
|
||||
router = APIRouter(prefix="/api/reputation", tags=["Reputation"])
|
||||
|
||||
# Pattern de validation d'IP (IPv4)
|
||||
IP_PATTERN = re.compile(r'^(\d{1,3}\.){3}\d{1,3}$')
|
||||
|
||||
|
||||
def is_valid_ipv4(ip: str) -> bool:
|
||||
"""Valide qu'une chaîne est une adresse IPv4 valide"""
|
||||
if not IP_PATTERN.match(ip):
|
||||
return False
|
||||
|
||||
# Vérifie que chaque octet est entre 0 et 255
|
||||
parts = ip.split('.')
|
||||
for part in parts:
|
||||
try:
|
||||
num = int(part)
|
||||
if num < 0 or num > 255:
|
||||
return False
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@router.get("/ip/{ip_address}", summary="Réputation complète d'une IP")
|
||||
async def get_ip_reputation(
|
||||
ip_address: str = Path(..., description="Adresse IP à vérifier")
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Récupère la réputation d'une adresse IP depuis les bases de données publiques
|
||||
|
||||
Sources utilisées (sans clé API):
|
||||
- IP-API.com: Géolocalisation + Proxy/Hosting detection
|
||||
- IPinfo.io: ASN + Organisation
|
||||
|
||||
Returns:
|
||||
Dict avec:
|
||||
- ip: Adresse IP vérifiée
|
||||
- timestamp: Date de la vérification
|
||||
- sources: Détails par source
|
||||
- aggregated: Résultats agrégés
|
||||
- is_proxy: bool
|
||||
- is_hosting: bool
|
||||
- is_vpn: bool
|
||||
- is_tor: bool
|
||||
- threat_score: 0-100
|
||||
- threat_level: clean/low/medium/high/critical
|
||||
- country: Pays
|
||||
- asn: Numéro ASN
|
||||
- asn_org: Organisation ASN
|
||||
- org: ISP/Organisation
|
||||
- warnings: Liste des alertes
|
||||
"""
|
||||
# Valide l'adresse IP
|
||||
if not is_valid_ipv4(ip_address):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Adresse IP invalide: {ip_address}. Format attendu: x.x.x.x"
|
||||
)
|
||||
|
||||
try:
|
||||
# Récupère le service de réputation
|
||||
reputation_service = get_reputation_service()
|
||||
|
||||
# Interroge les sources
|
||||
results = await reputation_service.get_reputation(ip_address)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Erreur lors de la vérification de réputation: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/ip/{ip_address}/summary", summary="Réputation simplifiée d'une IP")
|
||||
async def get_ip_reputation_summary(
|
||||
ip_address: str = Path(..., description="Adresse IP à vérifier")
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Version simplifiée de la réputation IP (juste les infos essentielles)
|
||||
|
||||
Utile pour affichage rapide dans les tableaux
|
||||
"""
|
||||
if not is_valid_ipv4(ip_address):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Adresse IP invalide: {ip_address}"
|
||||
)
|
||||
|
||||
try:
|
||||
reputation_service = get_reputation_service()
|
||||
full_results = await reputation_service.get_reputation(ip_address)
|
||||
|
||||
# Retourne juste l'essentiel
|
||||
aggregated = full_results.get('aggregated', {})
|
||||
|
||||
return {
|
||||
'ip': ip_address,
|
||||
'threat_level': aggregated.get('threat_level', 'unknown'),
|
||||
'threat_score': aggregated.get('threat_score', 0),
|
||||
'is_proxy': aggregated.get('is_proxy', False),
|
||||
'is_hosting': aggregated.get('is_hosting', False),
|
||||
'country': aggregated.get('country'),
|
||||
'country_code': aggregated.get('country_code'),
|
||||
'asn': aggregated.get('asn'),
|
||||
'org': aggregated.get('org'),
|
||||
'warnings_count': len(aggregated.get('warnings', []))
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Erreur: {str(e)}"
|
||||
)
|
||||
217
services/dashboard/backend/routes/rotation.py
Normal file
217
services/dashboard/backend/routes/rotation.py
Normal file
@ -0,0 +1,217 @@
|
||||
"""
|
||||
Endpoints pour la détection de la rotation de fingerprints JA4 et des menaces persistantes
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/rotation", tags=["rotation"])
|
||||
|
||||
|
||||
@router.get("/ja4-rotators")
|
||||
async def get_ja4_rotators(limit: int = Query(50, ge=1, le=500)):
|
||||
"""IPs qui effectuent le plus de rotation de fingerprints JA4."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
distinct_ja4_count,
|
||||
total_hits
|
||||
FROM mabase_prod.view_host_ip_ja4_rotation
|
||||
ORDER BY distinct_ja4_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
distinct = int(row[1])
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"distinct_ja4_count":distinct,
|
||||
"total_hits": int(row[2]),
|
||||
"evasion_score": min(100, distinct * 15),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/persistent-threats")
|
||||
async def get_persistent_threats(limit: int = Query(100, ge=1, le=1000)):
|
||||
"""Menaces persistantes triées par score de persistance."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
recurrence,
|
||||
worst_score,
|
||||
worst_threat_level,
|
||||
first_seen,
|
||||
last_seen
|
||||
FROM mabase_prod.view_ip_recurrence
|
||||
ORDER BY (least(100, recurrence * 20 + worst_score * 50)) DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
recurrence = int(row[1])
|
||||
worst_score = float(row[2] or 0)
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"recurrence": recurrence,
|
||||
"worst_score": worst_score,
|
||||
"worst_threat_level":str(row[3] or ""),
|
||||
"first_seen": str(row[4]),
|
||||
"last_seen": str(row[5]),
|
||||
"persistence_score": min(100, recurrence * 20 + worst_score * 50),
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ip/{ip}/ja4-history")
|
||||
async def get_ip_ja4_history(ip: str):
|
||||
"""Historique des JA4 utilisés par une IP donnée."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
ja4,
|
||||
sum(hits) AS hits,
|
||||
min(window_start) AS first_seen,
|
||||
max(window_start) AS last_seen
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE replaceRegexpAll(toString(src_ip), '^::ffff:', '') = %(ip)s
|
||||
GROUP BY ja4
|
||||
ORDER BY hits DESC
|
||||
"""
|
||||
result = db.query(sql, {"ip": ip})
|
||||
items = [
|
||||
{
|
||||
"ja4": str(row[0]),
|
||||
"hits": int(row[1]),
|
||||
"first_seen":str(row[2]),
|
||||
"last_seen": str(row[3]),
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
return {"ip": ip, "ja4_history": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/sophistication")
|
||||
async def get_sophistication(limit: int = Query(50, ge=1, le=500)):
|
||||
"""Score de sophistication adversaire par IP (rotation JA4 + récurrence + bruteforce).
|
||||
Single SQL JOIN query — aucun traitement Python sur 34K entrées.
|
||||
"""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
r.ip,
|
||||
r.distinct_ja4_count,
|
||||
coalesce(rec.recurrence, 0) AS recurrence,
|
||||
coalesce(bf.bruteforce_hits, 0) AS bruteforce_hits,
|
||||
round(least(100.0,
|
||||
r.distinct_ja4_count * 10
|
||||
+ coalesce(rec.recurrence, 0) * 20
|
||||
+ least(30.0, log(coalesce(bf.bruteforce_hits, 0) + 1) * 5)
|
||||
), 1) AS sophistication_score
|
||||
FROM (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
distinct_ja4_count
|
||||
FROM mabase_prod.view_host_ip_ja4_rotation
|
||||
) r
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
count() AS recurrence
|
||||
FROM mabase_prod.ml_detected_anomalies FINAL
|
||||
GROUP BY ip
|
||||
) rec ON r.ip = rec.ip
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
sum(hits) AS bruteforce_hits
|
||||
FROM mabase_prod.view_form_bruteforce_detected
|
||||
GROUP BY ip
|
||||
) bf ON r.ip = bf.ip
|
||||
ORDER BY sophistication_score DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
score = float(row[4] or 0)
|
||||
if score > 80:
|
||||
tier = "APT-like"
|
||||
elif score > 50:
|
||||
tier = "Advanced"
|
||||
elif score > 20:
|
||||
tier = "Automated"
|
||||
else:
|
||||
tier = "Basic"
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"ja4_rotation_count": int(row[1] or 0),
|
||||
"recurrence": int(row[2] or 0),
|
||||
"bruteforce_hits": int(row[3] or 0),
|
||||
"sophistication_score":score,
|
||||
"tier": tier,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/proactive-hunt")
|
||||
async def get_proactive_hunt(
|
||||
min_recurrence: int = Query(2, ge=1, description="Récurrence minimale"),
|
||||
min_days: int = Query(2, ge=0, description="Jours d'activité minimum"),
|
||||
limit: int = Query(50, ge=1, le=500),
|
||||
):
|
||||
"""IPs volant sous le radar : récurrentes mais sous le seuil de détection normal."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
||||
recurrence,
|
||||
worst_score,
|
||||
worst_threat_level,
|
||||
first_seen,
|
||||
last_seen,
|
||||
dateDiff('day', first_seen, last_seen) AS days_active
|
||||
FROM mabase_prod.view_ip_recurrence
|
||||
WHERE recurrence >= %(min_recurrence)s
|
||||
AND abs(worst_score) < 0.5
|
||||
AND dateDiff('day', first_seen, last_seen) >= %(min_days)s
|
||||
ORDER BY recurrence DESC, worst_score ASC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(sql, {
|
||||
"min_recurrence": min_recurrence,
|
||||
"min_days": min_days,
|
||||
"limit": limit,
|
||||
})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
recurrence = int(row[1])
|
||||
worst_score = float(row[2] or 0)
|
||||
days_active = int(row[6] or 0)
|
||||
ratio = recurrence / (worst_score + 0.1)
|
||||
risk = "Évadeur potentiel" if ratio > 10 else "Persistant modéré"
|
||||
items.append({
|
||||
"ip": str(row[0]),
|
||||
"recurrence": recurrence,
|
||||
"worst_score": round(worst_score, 4),
|
||||
"worst_threat_level": str(row[3] or ""),
|
||||
"first_seen": str(row[4]),
|
||||
"last_seen": str(row[5]),
|
||||
"days_active": days_active,
|
||||
"risk_assessment": risk,
|
||||
})
|
||||
return {"items": items, "total": len(items)}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
125
services/dashboard/backend/routes/search.py
Normal file
125
services/dashboard/backend/routes/search.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""
|
||||
Endpoint de recherche globale rapide — utilisé par la barre Cmd+K
|
||||
"""
|
||||
from fastapi import APIRouter, Query
|
||||
from ..database import db
|
||||
|
||||
router = APIRouter(prefix="/api/search", tags=["search"])
|
||||
|
||||
IP_RE = r"^(\d{1,3}\.){0,3}\d{1,3}$"
|
||||
|
||||
|
||||
@router.get("/quick")
|
||||
async def quick_search(q: str = Query(..., min_length=1, max_length=100)):
|
||||
"""
|
||||
Recherche unifiée sur IPs, JA4, ASN, hosts.
|
||||
Retourne jusqu'à 5 résultats par catégorie.
|
||||
"""
|
||||
q = q.strip()
|
||||
pattern = f"%{q}%"
|
||||
results = []
|
||||
|
||||
# ── IPs ──────────────────────────────────────────────────────────────────
|
||||
ip_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
|
||||
count() AS hits,
|
||||
max(detected_at) AS last_seen,
|
||||
any(threat_level) AS threat_level
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ilike(toString(src_ip), %(p)s)
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY clean_ip
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in ip_rows.result_rows:
|
||||
ip = str(r[0])
|
||||
results.append({
|
||||
"type": "ip",
|
||||
"value": ip,
|
||||
"label": ip,
|
||||
"meta": f"{r[1]} détections · {r[3]}",
|
||||
"url": f"/detections/ip/{ip}",
|
||||
"investigation_url": f"/investigation/{ip}",
|
||||
})
|
||||
|
||||
# ── JA4 fingerprints ─────────────────────────────────────────────────────
|
||||
ja4_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS hits,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ilike(ja4, %(p)s)
|
||||
AND ja4 != ''
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY ja4
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in ja4_rows.result_rows:
|
||||
results.append({
|
||||
"type": "ja4",
|
||||
"value": str(r[0]),
|
||||
"label": str(r[0]),
|
||||
"meta": f"{r[1]} détections · {r[2]} IPs",
|
||||
"url": f"/investigation/ja4/{r[0]}",
|
||||
})
|
||||
|
||||
# ── Hosts ─────────────────────────────────────────────────────────────────
|
||||
host_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
host,
|
||||
count() AS hits,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE ilike(host, %(p)s)
|
||||
AND host != ''
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY host
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in host_rows.result_rows:
|
||||
results.append({
|
||||
"type": "host",
|
||||
"value": str(r[0]),
|
||||
"label": str(r[0]),
|
||||
"meta": f"{r[1]} hits · {r[2]} IPs",
|
||||
"url": f"/detections?search={r[0]}",
|
||||
})
|
||||
|
||||
# ── ASN ───────────────────────────────────────────────────────────────────
|
||||
asn_rows = db.query(
|
||||
"""
|
||||
SELECT
|
||||
asn_org,
|
||||
asn_number,
|
||||
count() AS hits,
|
||||
uniq(src_ip) AS unique_ips
|
||||
FROM ml_detected_anomalies
|
||||
WHERE (ilike(asn_org, %(p)s) OR ilike(asn_number, %(p)s))
|
||||
AND asn_org != '' AND asn_number != ''
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY asn_org, asn_number
|
||||
ORDER BY hits DESC
|
||||
""",
|
||||
{"p": pattern},
|
||||
)
|
||||
for r in asn_rows.result_rows:
|
||||
results.append({
|
||||
"type": "asn",
|
||||
"value": str(r[1]),
|
||||
"label": f"AS{r[1]} — {r[0]}",
|
||||
"meta": f"{r[2]} hits · {r[3]} IPs",
|
||||
"url": f"/detections?asn={r[1]}",
|
||||
})
|
||||
|
||||
return {"query": q, "results": results}
|
||||
223
services/dashboard/backend/routes/tcp_spoofing.py
Normal file
223
services/dashboard/backend/routes/tcp_spoofing.py
Normal file
@ -0,0 +1,223 @@
|
||||
"""
|
||||
Endpoints pour la détection du TCP spoofing / fingerprinting OS
|
||||
|
||||
Approche multi-signal (p0f-style) :
|
||||
- TTL initial estimé → famille OS (Linux/Mac=64, Windows=128, Cisco/BSD=255)
|
||||
- MSS → type de réseau (Ethernet=1460, PPPoE=1452, VPN=1380-1420)
|
||||
- Taille de fenêtre → signature OS précise
|
||||
- Facteur d'échelle → affine la version kernel/stack TCP
|
||||
|
||||
Détection bots : signatures connues (Masscan/ZMap/Mirai) identifiées par combinaison
|
||||
win+scale+mss indépendamment de l'UA.
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
|
||||
from ..database import db
|
||||
from ..services.tcp_fingerprint import (
|
||||
fingerprint_os,
|
||||
detect_spoof,
|
||||
declared_os_from_ua,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/tcp-spoofing", tags=["tcp_spoofing"])
|
||||
|
||||
|
||||
@router.get("/overview")
|
||||
async def get_tcp_spoofing_overview():
|
||||
"""Statistiques globales avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale)."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
count() AS total_entries,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
countIf(tcp_ttl_raw = 0) AS no_tcp_data,
|
||||
countIf(tcp_ttl_raw > 0) AS with_tcp_data,
|
||||
countIf(tcp_ttl_raw > 0 AND tcp_ttl_raw <= 64) AS linux_mac_fp,
|
||||
countIf(tcp_ttl_raw > 64 AND tcp_ttl_raw <= 128) AS windows_fp,
|
||||
countIf(tcp_ttl_raw > 128) AS cisco_bsd_fp,
|
||||
countIf(tcp_win_raw = 5808 AND tcp_mss_raw = 1452 AND tcp_scale_raw = 4) AS bot_scanner_fp
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
result = db.query(sql)
|
||||
row = result.result_rows[0]
|
||||
|
||||
# Distribution TTL (top 15)
|
||||
ttl_sql = """
|
||||
SELECT tcp_ttl_raw AS ttl, count() AS cnt, uniq(src_ip) AS ips
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY ttl ORDER BY cnt DESC
|
||||
"""
|
||||
ttl_res = db.query(ttl_sql)
|
||||
|
||||
# Distribution MSS — nouveau signal clé (top 12)
|
||||
mss_sql = """
|
||||
SELECT tcp_mss_raw AS mss, count() AS cnt, uniq(src_ip) AS ips
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_mss_raw > 0
|
||||
GROUP BY mss ORDER BY cnt DESC
|
||||
"""
|
||||
mss_res = db.query(mss_sql)
|
||||
|
||||
# Distribution fenêtre (top 10)
|
||||
win_sql = """
|
||||
SELECT tcp_win_raw AS win, count() AS cnt
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY win ORDER BY cnt DESC
|
||||
"""
|
||||
win_res = db.query(win_sql)
|
||||
|
||||
return {
|
||||
"total_entries": int(row[0]),
|
||||
"unique_ips": int(row[1]),
|
||||
"no_tcp_data": int(row[2]),
|
||||
"with_tcp_data": int(row[3]),
|
||||
"linux_mac_fingerprint": int(row[4]),
|
||||
"windows_fingerprint": int(row[5]),
|
||||
"cisco_bsd_fingerprint": int(row[6]),
|
||||
"bot_scanner_fingerprint": int(row[7]),
|
||||
"ttl_distribution": [
|
||||
{"ttl": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
|
||||
for r in ttl_res.result_rows
|
||||
],
|
||||
"mss_distribution": [
|
||||
{"mss": int(r[0]), "count": int(r[1]), "ips": int(r[2])}
|
||||
for r in mss_res.result_rows
|
||||
],
|
||||
"window_size_distribution": [
|
||||
{"window_size": int(r[0]), "count": int(r[1])}
|
||||
for r in win_res.result_rows
|
||||
],
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/list")
|
||||
async def get_tcp_spoofing_list(
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
offset: int = Query(0, ge=0),
|
||||
spoof_only: bool = Query(False, description="Retourner uniquement les spoofs/bots confirmés"),
|
||||
):
|
||||
"""Liste avec fingerprinting multi-signal (TTL + MSS + fenêtre + scale).
|
||||
Inclut les champs enrichis : mss, win_scale, initial_ttl, hop_count, confidence, network_path, is_bot_tool.
|
||||
"""
|
||||
try:
|
||||
count_sql = """
|
||||
SELECT count() FROM (
|
||||
SELECT src_ip, ja4
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY src_ip, ja4
|
||||
)
|
||||
"""
|
||||
total = int(db.query(count_sql).result_rows[0][0])
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS src_ip,
|
||||
ja4,
|
||||
any(tcp_ttl_raw) AS tcp_ttl,
|
||||
any(tcp_win_raw) AS tcp_window_size,
|
||||
any(tcp_scale_raw) AS tcp_win_scale,
|
||||
any(tcp_mss_raw) AS tcp_mss,
|
||||
any(first_ua) AS first_ua,
|
||||
sum(hits) AS hits
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY src_ip, ja4
|
||||
ORDER BY hits DESC
|
||||
LIMIT %(limit)s OFFSET %(offset)s
|
||||
"""
|
||||
result = db.query(sql, {"limit": limit, "offset": offset})
|
||||
items = []
|
||||
for row in result.result_rows:
|
||||
ip = str(row[0])
|
||||
ja4 = str(row[1] or "")
|
||||
ttl = int(row[2] or 0)
|
||||
win = int(row[3] or 0)
|
||||
scale = int(row[4] or 0)
|
||||
mss = int(row[5] or 0)
|
||||
ua = str(row[6] or "")
|
||||
hits = int(row[7] or 0)
|
||||
|
||||
fp = fingerprint_os(ttl, win, scale, mss)
|
||||
dec_os = declared_os_from_ua(ua)
|
||||
spoof_res = detect_spoof(fp, dec_os)
|
||||
|
||||
if spoof_only and not spoof_res.is_spoof:
|
||||
continue
|
||||
|
||||
items.append({
|
||||
"ip": ip,
|
||||
"ja4": ja4,
|
||||
"tcp_ttl": ttl,
|
||||
"tcp_window_size": win,
|
||||
"tcp_win_scale": scale,
|
||||
"tcp_mss": mss,
|
||||
"hits": hits,
|
||||
"first_ua": ua,
|
||||
"suspected_os": fp.os_name,
|
||||
"initial_ttl": fp.initial_ttl,
|
||||
"hop_count": fp.hop_count,
|
||||
"confidence": fp.confidence,
|
||||
"network_path": fp.network_path,
|
||||
"is_bot_tool": fp.is_bot_tool,
|
||||
"declared_os": dec_os,
|
||||
"spoof_flag": spoof_res.is_spoof,
|
||||
"spoof_reason": spoof_res.reason,
|
||||
})
|
||||
return {"items": items, "total": total}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/matrix")
|
||||
async def get_tcp_spoofing_matrix():
|
||||
"""Matrice OS suspecté × OS déclaré avec fingerprinting multi-signal."""
|
||||
try:
|
||||
sql = """
|
||||
SELECT
|
||||
any(tcp_ttl_raw) AS ttl,
|
||||
any(tcp_win_raw) AS win,
|
||||
any(tcp_scale_raw) AS scale,
|
||||
any(tcp_mss_raw) AS mss,
|
||||
any(first_ua) AS ua,
|
||||
count() AS cnt
|
||||
FROM mabase_prod.agg_host_ip_ja4_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR AND tcp_ttl_raw > 0
|
||||
GROUP BY src_ip, ja4
|
||||
"""
|
||||
result = db.query(sql)
|
||||
counts: dict = {}
|
||||
for row in result.result_rows:
|
||||
ttl = int(row[0] or 0)
|
||||
win = int(row[1] or 0)
|
||||
scale = int(row[2] or 0)
|
||||
mss = int(row[3] or 0)
|
||||
ua = str(row[4] or "")
|
||||
cnt = int(row[5] or 1)
|
||||
|
||||
fp = fingerprint_os(ttl, win, scale, mss)
|
||||
dec_os = declared_os_from_ua(ua)
|
||||
spoof_res = detect_spoof(fp, dec_os)
|
||||
|
||||
key = (fp.os_name, dec_os, spoof_res.is_spoof, fp.is_bot_tool)
|
||||
counts[key] = counts.get(key, 0) + cnt
|
||||
|
||||
matrix = [
|
||||
{
|
||||
"suspected_os": k[0],
|
||||
"declared_os": k[1],
|
||||
"count": v,
|
||||
"is_spoof": k[2],
|
||||
"is_bot_tool": k[3],
|
||||
}
|
||||
for k, v in counts.items()
|
||||
]
|
||||
matrix.sort(key=lambda x: x["count"], reverse=True)
|
||||
return {"matrix": matrix}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
706
services/dashboard/backend/routes/variability.py
Normal file
706
services/dashboard/backend/routes/variability.py
Normal file
@ -0,0 +1,706 @@
|
||||
"""
|
||||
Endpoints pour la variabilité des attributs
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from typing import Optional
|
||||
from ..database import db
|
||||
from ..models import (
|
||||
VariabilityResponse, VariabilityAttributes, AttributeValue, Insight,
|
||||
UserAgentsResponse, UserAgentValue
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/variability", tags=["variability"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ROUTES SPÉCIFIQUES (doivent être avant les routes génériques)
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/{attr_type}/{value:path}/ips", response_model=dict)
|
||||
async def get_associated_ips(
|
||||
attr_type: str,
|
||||
value: str,
|
||||
limit: int = Query(100, ge=1, le=1000, description="Nombre maximum d'IPs")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des IPs associées à un attribut
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
query = f"""
|
||||
SELECT src_ip, count() AS hit_count
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY src_ip
|
||||
ORDER BY hit_count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {"value": value, "limit": limit})
|
||||
|
||||
total_hits = sum(row[1] for row in result.result_rows) or 1
|
||||
ips = [
|
||||
{"ip": str(row[0]), "count": row[1], "percentage": round(row[1] * 100.0 / total_hits, 2)}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
# Compter le total
|
||||
count_query = f"""
|
||||
SELECT uniq(src_ip) AS total
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, {"value": value})
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return {
|
||||
"type": attr_type,
|
||||
"value": value,
|
||||
"ips": ips,
|
||||
"total": total,
|
||||
"showing": len(ips)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{attr_type}/{value:path}/attributes", response_model=dict)
|
||||
async def get_associated_attributes(
|
||||
attr_type: str,
|
||||
value: str,
|
||||
target_attr: str = Query(..., description="Type d'attribut à récupérer (user_agents, ja4, countries, asns, hosts)"),
|
||||
limit: int = Query(50, ge=1, le=500, description="Nombre maximum de résultats")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des attributs associés (ex: User-Agents pour un pays)
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
}
|
||||
|
||||
# Mapping des attributs cibles
|
||||
target_column_map = {
|
||||
"user_agents": None, # handled separately via view_dashboard_entities
|
||||
"ja4": "ja4",
|
||||
"countries": "country_code",
|
||||
"asns": "asn_number",
|
||||
"hosts": "host",
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(status_code=400, detail=f"Type '{attr_type}' invalide")
|
||||
|
||||
if target_attr not in target_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Attribut cible invalide. Supportés: {', '.join(target_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
target_column = target_column_map[target_attr]
|
||||
|
||||
# Pour user_agents: requête via view_dashboard_user_agents
|
||||
# Colonnes: src_ip, ja4, hour, log_date, user_agents, requests
|
||||
if target_column is None:
|
||||
if attr_type == "ip":
|
||||
ua_where = "toString(src_ip) = %(value)s"
|
||||
elif attr_type == "ja4":
|
||||
ua_where = "ja4 = %(value)s"
|
||||
else:
|
||||
# country/asn/host: pivot via ml_detected_anomalies
|
||||
ua_where = f"""toString(src_ip) IN (
|
||||
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)"""
|
||||
ua_q = f"""
|
||||
SELECT ua AS value, sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {ua_where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR AND ua != ''
|
||||
GROUP BY value ORDER BY count DESC LIMIT %(limit)s
|
||||
"""
|
||||
ua_result = db.query(ua_q, {"value": value, "limit": limit})
|
||||
items = [{"value": str(r[0]), "count": r[1], "percentage": round(float(r[2]), 2) if r[2] else 0.0}
|
||||
for r in ua_result.result_rows]
|
||||
return {"type": attr_type, "value": value, "target": target_attr, "items": items, "total": len(items), "showing": len(items)}
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
{target_column} AS value,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / sum(count()) OVER (), 2) AS percentage
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND {target_column} != '' AND {target_column} IS NOT NULL
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
|
||||
result = db.query(query, {"value": value, "limit": limit})
|
||||
|
||||
items = [
|
||||
{
|
||||
"value": str(row[0]),
|
||||
"count": row[1],
|
||||
"percentage": round(float(row[2]), 2) if row[2] else 0.0
|
||||
}
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
# Compter le total
|
||||
count_query = f"""
|
||||
SELECT uniq({target_column}) AS total
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND {target_column} != '' AND {target_column} IS NOT NULL
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
count_result = db.query(count_query, {"value": value})
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return {
|
||||
"type": attr_type,
|
||||
"value": value,
|
||||
"target": target_attr,
|
||||
"items": items,
|
||||
"total": total,
|
||||
"showing": len(items)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/{attr_type}/{value:path}/user_agents", response_model=UserAgentsResponse)
|
||||
async def get_user_agents(
|
||||
attr_type: str,
|
||||
value: str,
|
||||
limit: int = Query(100, ge=1, le=500, description="Nombre maximum de user-agents")
|
||||
):
|
||||
"""
|
||||
Récupère la liste des User-Agents associés à un attribut (IP, JA4, pays, etc.)
|
||||
Les données sont récupérées depuis la vue materialisée view_dashboard_user_agents
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
# view_dashboard_user_agents colonnes: src_ip, ja4, hour, log_date, user_agents, requests
|
||||
if attr_type == "ip":
|
||||
where = "toString(src_ip) = %(value)s"
|
||||
params: dict = {"value": value, "limit": limit}
|
||||
elif attr_type == "ja4":
|
||||
where = "ja4 = %(value)s"
|
||||
params = {"value": value, "limit": limit}
|
||||
else:
|
||||
# country / asn / host: pivot via ml_detected_anomalies → IPs connus → vue par src_ip
|
||||
ml_col = {"country": "country_code", "asn": "asn_number", "host": "host"}[attr_type]
|
||||
where = f"""toString(src_ip) IN (
|
||||
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {ml_col} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)"""
|
||||
params = {"value": value, "limit": limit}
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
ua AS user_agent,
|
||||
sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
LIMIT %(limit)s
|
||||
"""
|
||||
result = db.query(query, params)
|
||||
|
||||
count_query = f"""
|
||||
SELECT uniqExact(ua) AS total
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
AND ua != ''
|
||||
"""
|
||||
count_result = db.query(count_query, params)
|
||||
|
||||
user_agents = [
|
||||
UserAgentValue(
|
||||
value=str(row[0]),
|
||||
count=row[1] or 0,
|
||||
percentage=round(float(row[2]), 2) if row[2] else 0.0,
|
||||
first_seen=row[3] if len(row) > 3 and row[3] else None,
|
||||
last_seen=row[4] if len(row) > 4 and row[4] else None,
|
||||
)
|
||||
for row in result.result_rows
|
||||
]
|
||||
|
||||
total = count_result.result_rows[0][0] if count_result.result_rows else 0
|
||||
|
||||
return {
|
||||
"type": attr_type,
|
||||
"value": value,
|
||||
"user_agents": user_agents,
|
||||
"total": total,
|
||||
"showing": len(user_agents)
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ROUTE GÉNÉRIQUE (doit être en dernier)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_attribute_value(row, count_idx: int, percentage_idx: int,
|
||||
first_seen_idx: Optional[int] = None,
|
||||
last_seen_idx: Optional[int] = None,
|
||||
threat_idx: Optional[int] = None,
|
||||
unique_ips_idx: Optional[int] = None) -> AttributeValue:
|
||||
"""Helper pour créer un AttributeValue depuis une ligne ClickHouse"""
|
||||
return AttributeValue(
|
||||
value=str(row[0]),
|
||||
count=row[count_idx] or 0,
|
||||
percentage=round(float(row[percentage_idx]), 2) if row[percentage_idx] else 0.0,
|
||||
first_seen=row[first_seen_idx] if first_seen_idx is not None and len(row) > first_seen_idx else None,
|
||||
last_seen=row[last_seen_idx] if last_seen_idx is not None and len(row) > last_seen_idx else None,
|
||||
threat_levels=_parse_threat_levels(row[threat_idx]) if threat_idx is not None and len(row) > threat_idx and row[threat_idx] else None,
|
||||
unique_ips=row[unique_ips_idx] if unique_ips_idx is not None and len(row) > unique_ips_idx else None,
|
||||
primary_threat=_get_primary_threat(row[threat_idx]) if threat_idx is not None and len(row) > threat_idx and row[threat_idx] else None
|
||||
)
|
||||
|
||||
|
||||
def _parse_threat_levels(threat_str: str) -> dict:
|
||||
"""Parse une chaîne de type 'CRITICAL:5,HIGH:10' en dict"""
|
||||
if not threat_str:
|
||||
return {}
|
||||
result = {}
|
||||
for part in str(threat_str).split(','):
|
||||
if ':' in part:
|
||||
level, count = part.strip().split(':')
|
||||
result[level.strip()] = int(count.strip())
|
||||
return result
|
||||
|
||||
|
||||
def _get_primary_threat(threat_str: str) -> str:
|
||||
"""Retourne le niveau de menace principal"""
|
||||
if not threat_str:
|
||||
return ""
|
||||
levels_order = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
|
||||
for level in levels_order:
|
||||
if level in str(threat_str):
|
||||
return level
|
||||
return ""
|
||||
|
||||
|
||||
def _generate_insights(attr_type: str, value: str, attributes: VariabilityAttributes,
|
||||
total_detections: int, unique_ips: int) -> list:
|
||||
"""Génère des insights basés sur les données de variabilité"""
|
||||
insights = []
|
||||
|
||||
# User-Agent insights
|
||||
if len(attributes.user_agents) > 1:
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message=f"{len(attributes.user_agents)} User-Agents différents → Possible rotation/obfuscation"
|
||||
))
|
||||
|
||||
# JA4 insights
|
||||
if len(attributes.ja4) > 1:
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message=f"{len(attributes.ja4)} JA4 fingerprints différents → Possible rotation de fingerprint"
|
||||
))
|
||||
|
||||
# IP insights (pour les sélections non-IP)
|
||||
if attr_type != "ip" and unique_ips > 10:
|
||||
insights.append(Insight(
|
||||
type="info",
|
||||
message=f"{unique_ips} IPs différentes associées → Possible infrastructure distribuée"
|
||||
))
|
||||
|
||||
# ASN insights
|
||||
if len(attributes.asns) == 1 and attributes.asns[0].value:
|
||||
asn_label_lower = ""
|
||||
if attr_type == "asn":
|
||||
asn_label_lower = value.lower()
|
||||
# Vérifier si c'est un ASN de hosting/cloud
|
||||
hosting_keywords = ["ovh", "amazon", "aws", "google", "azure", "digitalocean", "linode", "vultr"]
|
||||
if any(kw in (attributes.asns[0].value or "").lower() for kw in hosting_keywords):
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message="ASN de type hosting/cloud → Souvent utilisé pour des bots"
|
||||
))
|
||||
|
||||
# Country insights
|
||||
if len(attributes.countries) > 5:
|
||||
insights.append(Insight(
|
||||
type="info",
|
||||
message=f"Présent dans {len(attributes.countries)} pays → Distribution géographique large"
|
||||
))
|
||||
|
||||
# Threat level insights
|
||||
critical_count = 0
|
||||
high_count = 0
|
||||
for tl in attributes.threat_levels:
|
||||
if tl.value == "CRITICAL":
|
||||
critical_count = tl.count
|
||||
elif tl.value == "HIGH":
|
||||
high_count = tl.count
|
||||
|
||||
if critical_count > total_detections * 0.3:
|
||||
insights.append(Insight(
|
||||
type="warning",
|
||||
message=f"{round(critical_count * 100 / total_detections)}% de détections CRITICAL → Menace sévère"
|
||||
))
|
||||
elif high_count > total_detections * 0.5:
|
||||
insights.append(Insight(
|
||||
type="info",
|
||||
message=f"{round(high_count * 100 / total_detections)}% de détections HIGH → Activité suspecte"
|
||||
))
|
||||
|
||||
return insights
|
||||
|
||||
|
||||
@router.get("/{attr_type}/{value:path}", response_model=VariabilityResponse)
|
||||
async def get_variability(attr_type: str, value: str):
|
||||
"""
|
||||
Récupère la variabilité des attributs associés à une valeur
|
||||
|
||||
attr_type: ip, ja4, country, asn, host, user_agent
|
||||
value: la valeur à investiguer
|
||||
"""
|
||||
try:
|
||||
# Mapping des types vers les colonnes ClickHouse
|
||||
type_column_map = {
|
||||
"ip": "src_ip",
|
||||
"ja4": "ja4",
|
||||
"country": "country_code",
|
||||
"asn": "asn_number",
|
||||
"host": "host",
|
||||
"user_agent": "header_user_agent"
|
||||
}
|
||||
|
||||
if attr_type not in type_column_map:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Type invalide. Types supportés: {', '.join(type_column_map.keys())}"
|
||||
)
|
||||
|
||||
column = type_column_map[attr_type]
|
||||
|
||||
# Requête principale - Récupère toutes les détections pour cette valeur
|
||||
# On utilise toStartOfHour pour le timeseries et on évite header_user_agent si inexistant
|
||||
base_query = f"""
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
detected_at,
|
||||
src_ip,
|
||||
ja4,
|
||||
host,
|
||||
'' AS user_agent,
|
||||
country_code,
|
||||
asn_number,
|
||||
asn_org,
|
||||
threat_level,
|
||||
model_name,
|
||||
anomaly_score
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)
|
||||
"""
|
||||
|
||||
# Stats globales
|
||||
stats_query = f"""
|
||||
SELECT
|
||||
count() AS total_detections,
|
||||
uniq(src_ip) AS unique_ips,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s
|
||||
AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
"""
|
||||
|
||||
stats_result = db.query(stats_query, {"value": value})
|
||||
|
||||
if not stats_result.result_rows or stats_result.result_rows[0][0] == 0:
|
||||
raise HTTPException(status_code=404, detail="Aucune donnée trouvée")
|
||||
|
||||
stats_row = stats_result.result_rows[0]
|
||||
total_detections = stats_row[0]
|
||||
unique_ips = stats_row[1]
|
||||
first_seen = stats_row[2]
|
||||
last_seen = stats_row[3]
|
||||
|
||||
# User-Agents depuis http_logs pour des comptes exacts par requête
|
||||
# (view_dashboard_user_agents déduplique par heure, ce qui sous-compte les hits)
|
||||
_ua_params: dict = {"value": value}
|
||||
if attr_type == "ip":
|
||||
_ua_logs_where = "src_ip = toIPv4(%(value)s)"
|
||||
ua_query_simple = f"""
|
||||
SELECT
|
||||
header_user_agent AS user_agent,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (
|
||||
SELECT count() FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
|
||||
), 2) AS percentage,
|
||||
min(time) AS first_seen,
|
||||
max(time) AS last_seen
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where}
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
AND header_user_agent != '' AND header_user_agent IS NOT NULL
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
ua_result = db.query(ua_query_simple, _ua_params)
|
||||
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
|
||||
elif attr_type == "ja4":
|
||||
_ua_logs_where = "ja4 = %(value)s"
|
||||
ua_query_simple = f"""
|
||||
SELECT
|
||||
header_user_agent AS user_agent,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (
|
||||
SELECT count() FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where} AND time >= now() - INTERVAL 24 HOUR
|
||||
), 2) AS percentage,
|
||||
min(time) AS first_seen,
|
||||
max(time) AS last_seen
|
||||
FROM mabase_prod.http_logs
|
||||
WHERE {_ua_logs_where}
|
||||
AND time >= now() - INTERVAL 24 HOUR
|
||||
AND header_user_agent != '' AND header_user_agent IS NOT NULL
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
ua_result = db.query(ua_query_simple, _ua_params)
|
||||
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
|
||||
else:
|
||||
# country / asn / host: pivot via ml_detected_anomalies → IPs, puis view UA
|
||||
_ua_where = f"""toString(src_ip) IN (
|
||||
SELECT DISTINCT replaceRegexpAll(toString(src_ip), '^::ffff:', '')
|
||||
FROM ml_detected_anomalies
|
||||
WHERE {column} = %(value)s AND detected_at >= now() - INTERVAL 24 HOUR
|
||||
)"""
|
||||
ua_query_simple = f"""
|
||||
SELECT
|
||||
ua AS user_agent,
|
||||
sum(requests) AS count,
|
||||
round(sum(requests) * 100.0 / sum(sum(requests)) OVER (), 2) AS percentage,
|
||||
min(log_date) AS first_seen,
|
||||
max(log_date) AS last_seen
|
||||
FROM view_dashboard_user_agents
|
||||
ARRAY JOIN user_agents AS ua
|
||||
WHERE {_ua_where}
|
||||
AND hour >= now() - INTERVAL 24 HOUR
|
||||
AND ua != ''
|
||||
GROUP BY user_agent
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
ua_result = db.query(ua_query_simple, _ua_params)
|
||||
user_agents = [get_attribute_value(row, 1, 2, 3, 4) for row in ua_result.result_rows]
|
||||
|
||||
# JA4 fingerprints
|
||||
ja4_query = f"""
|
||||
SELECT
|
||||
ja4,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage,
|
||||
min(detected_at) AS first_seen,
|
||||
max(detected_at) AS last_seen
|
||||
FROM ({base_query})
|
||||
WHERE ja4 != '' AND ja4 IS NOT NULL
|
||||
GROUP BY ja4
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
ja4_result = db.query(ja4_query, {"value": value})
|
||||
ja4s = [get_attribute_value(row, 1, 2, 3, 4) for row in ja4_result.result_rows]
|
||||
|
||||
# Pays
|
||||
country_query = f"""
|
||||
SELECT
|
||||
country_code,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE country_code != '' AND country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
country_result = db.query(country_query, {"value": value})
|
||||
countries = [get_attribute_value(row, 1, 2) for row in country_result.result_rows]
|
||||
|
||||
# ASN
|
||||
asn_query = f"""
|
||||
SELECT
|
||||
concat('AS', toString(asn_number), ' - ', asn_org) AS asn_display,
|
||||
asn_number,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE asn_number != '' AND asn_number IS NOT NULL AND asn_number != '0'
|
||||
GROUP BY asn_display, asn_number
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
asn_result = db.query(asn_query, {"value": value})
|
||||
asns = [
|
||||
AttributeValue(
|
||||
value=str(row[0]),
|
||||
count=row[2] or 0,
|
||||
percentage=round(float(row[3]), 2) if row[3] else 0.0
|
||||
)
|
||||
for row in asn_result.result_rows
|
||||
]
|
||||
|
||||
# Hosts
|
||||
host_query = f"""
|
||||
SELECT
|
||||
host,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE host != '' AND host IS NOT NULL
|
||||
GROUP BY host
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
host_result = db.query(host_query, {"value": value})
|
||||
hosts = [get_attribute_value(row, 1, 2) for row in host_result.result_rows]
|
||||
|
||||
# Threat levels
|
||||
threat_query = f"""
|
||||
SELECT
|
||||
threat_level,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE threat_level != '' AND threat_level IS NOT NULL
|
||||
GROUP BY threat_level
|
||||
ORDER BY
|
||||
CASE threat_level
|
||||
WHEN 'CRITICAL' THEN 1
|
||||
WHEN 'HIGH' THEN 2
|
||||
WHEN 'MEDIUM' THEN 3
|
||||
WHEN 'LOW' THEN 4
|
||||
ELSE 5
|
||||
END
|
||||
"""
|
||||
|
||||
threat_result = db.query(threat_query, {"value": value})
|
||||
threat_levels = [get_attribute_value(row, 1, 2) for row in threat_result.result_rows]
|
||||
|
||||
# Model names
|
||||
model_query = f"""
|
||||
SELECT
|
||||
model_name,
|
||||
count() AS count,
|
||||
round(count() * 100.0 / (SELECT count() FROM ({base_query})), 2) AS percentage
|
||||
FROM ({base_query})
|
||||
WHERE model_name != '' AND model_name IS NOT NULL
|
||||
GROUP BY model_name
|
||||
ORDER BY count DESC
|
||||
"""
|
||||
|
||||
model_result = db.query(model_query, {"value": value})
|
||||
model_names = [get_attribute_value(row, 1, 2) for row in model_result.result_rows]
|
||||
|
||||
# Construire la réponse
|
||||
attributes = VariabilityAttributes(
|
||||
user_agents=user_agents,
|
||||
ja4=ja4s,
|
||||
countries=countries,
|
||||
asns=asns,
|
||||
hosts=hosts,
|
||||
threat_levels=threat_levels,
|
||||
model_names=model_names
|
||||
)
|
||||
|
||||
# Générer les insights
|
||||
insights = _generate_insights(attr_type, value, attributes, total_detections, unique_ips)
|
||||
|
||||
return VariabilityResponse(
|
||||
type=attr_type,
|
||||
value=value,
|
||||
total_detections=total_detections,
|
||||
unique_ips=unique_ips,
|
||||
date_range={
|
||||
"first_seen": first_seen,
|
||||
"last_seen": last_seen
|
||||
},
|
||||
attributes=attributes,
|
||||
insights=insights
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
|
||||
0
services/dashboard/backend/services/__init__.py
Normal file
0
services/dashboard/backend/services/__init__.py
Normal file
493
services/dashboard/backend/services/clustering_engine.py
Normal file
493
services/dashboard/backend/services/clustering_engine.py
Normal file
@ -0,0 +1,493 @@
|
||||
"""
|
||||
Moteur de clustering K-means++ multi-métriques (numpy + scipy vectorisé).
|
||||
|
||||
Ref:
|
||||
Arthur & Vassilvitskii (2007) — k-means++: The Advantages of Careful Seeding
|
||||
scipy.spatial.ConvexHull — enveloppe convexe (Graham/Qhull)
|
||||
sklearn-style API — centroids, labels_, inertia_
|
||||
|
||||
Features (31 dimensions, normalisées [0,1]) :
|
||||
0 ttl_n : TTL initial normalisé
|
||||
1 mss_n : MSS normalisé → type réseau
|
||||
2 scale_n : facteur de mise à l'échelle TCP
|
||||
3 win_n : fenêtre TCP normalisée
|
||||
4 score_n : score anomalie ML (abs)
|
||||
5 velocity_n : vélocité de requêtes (log1p)
|
||||
6 fuzzing_n : index de fuzzing (log1p)
|
||||
7 headless_n : ratio sessions headless
|
||||
8 post_n : ratio POST/total
|
||||
9 ip_id_zero_n : ratio IP-ID=0 (Linux/spoofé)
|
||||
10 entropy_n : entropie temporelle
|
||||
11 browser_n : score navigateur moderne
|
||||
12 alpn_n : mismatch ALPN/protocole
|
||||
13 alpn_absent_n : ratio ALPN absent
|
||||
14 h2_n : efficacité H2 multiplexing (log1p)
|
||||
15 hdr_conf_n : confiance ordre headers
|
||||
16 ua_ch_n : mismatch User-Agent-Client-Hints
|
||||
17 asset_n : ratio assets statiques
|
||||
18 direct_n : ratio accès directs
|
||||
19 ja4_div_n : diversité JA4 (log1p)
|
||||
20 ua_rot_n : UA rotatif (booléen)
|
||||
21 country_risk_n : risque pays source (CN/RU/KP → 1.0, US/DE/FR → 0.0)
|
||||
22 asn_cloud_n : hébergeur cloud/CDN/VPN (Cloudflare/AWS/OVH → 1.0)
|
||||
23 hdr_accept_lang_n : présence header Accept-Language (0=absent=bot-like)
|
||||
24 hdr_encoding_n : présence header Accept-Encoding (0=absent=bot-like)
|
||||
25 hdr_sec_fetch_n : présence headers Sec-Fetch-* (1=navigateur réel)
|
||||
26 hdr_count_n : nombre de headers HTTP normalisé (3=bot, 15=browser)
|
||||
27 hfp_popular_n : popularité du fingerprint headers (log-normalisé)
|
||||
fingerprint rare = suspect ; très populaire = browser légitime
|
||||
28 hfp_rotating_n : rotation de fingerprint (distinct_header_orders)
|
||||
plusieurs fingerprints distincts → bot en rotation
|
||||
29 hfp_cookie_n : présence header Cookie (engagement utilisateur réel)
|
||||
30 hfp_referer_n : présence header Referer (navigation HTTP normale)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import logging
|
||||
import numpy as np
|
||||
from dataclasses import dataclass, field
|
||||
from scipy.spatial import ConvexHull
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# ─── Encodage pays (risque source) ───────────────────────────────────────────
|
||||
# Source: MISP threat intel, Spamhaus DROP list, géographie offensive connue
|
||||
|
||||
_COUNTRY_RISK: dict[str, float] = {
|
||||
# Très haut risque : infrastructure offensive documentée
|
||||
"CN": 1.0, "RU": 1.0, "KP": 1.0, "IR": 1.0,
|
||||
"BY": 0.9, "SY": 0.9, "CU": 0.8,
|
||||
# Haut risque : transit/hébergement permissif, bulletproof hosters
|
||||
"HK": 0.75, "VN": 0.7, "UA": 0.65,
|
||||
"RO": 0.6, "PK": 0.6, "NG": 0.6,
|
||||
"BG": 0.55, "TR": 0.55, "BR": 0.5,
|
||||
"TH": 0.5, "IN": 0.45, "ID": 0.45,
|
||||
# Risque faible : pays à faible tolérance envers activité malveillante
|
||||
"US": 0.1, "DE": 0.1, "FR": 0.1, "GB": 0.1,
|
||||
"CA": 0.1, "JP": 0.1, "AU": 0.1, "NL": 0.15,
|
||||
"CH": 0.1, "SE": 0.1, "NO": 0.1, "DK": 0.1,
|
||||
"FI": 0.1, "AT": 0.1, "BE": 0.1, "IT": 0.15,
|
||||
"SG": 0.3, "TW": 0.2, "KR": 0.2, "RS": 0.4,
|
||||
}
|
||||
_DEFAULT_COUNTRY_RISK = 0.35 # pays inconnu → risque modéré
|
||||
|
||||
|
||||
def country_risk(cc: str | None) -> float:
|
||||
"""Score de risque [0,1] d'un code pays ISO-3166."""
|
||||
return _COUNTRY_RISK.get((cc or "").upper(), _DEFAULT_COUNTRY_RISK)
|
||||
|
||||
|
||||
# ─── Encodage ASN (type d'infrastructure) ────────────────────────────────────
|
||||
# Cloud/CDN/hosting → fort corrélé avec scanners automatisés et bots
|
||||
|
||||
_ASN_CLOUD_KEYWORDS = [
|
||||
# Hyperscalers
|
||||
"amazon", "aws", "google", "microsoft", "azure", "alibaba", "tencent", "huawei",
|
||||
# CDN / edge
|
||||
"cloudflare", "akamai", "fastly", "cloudfront", "incapsula", "imperva",
|
||||
"sucuri", "stackpath", "keycdn",
|
||||
# Hébergeurs
|
||||
"ovh", "hetzner", "digitalocean", "vultr", "linode", "akamai-linode",
|
||||
"leaseweb", "choopa", "packet", "equinix", "serverius", "combahton",
|
||||
"m247", "b2 net", "hostinger", "contabo",
|
||||
# Bulletproof / transit permissif connus
|
||||
"hwclouds", "multacom", "psychz", "serverius", "colocrossing",
|
||||
"frantech", "sharktech", "tzulo",
|
||||
# VPN / proxy commerciaux
|
||||
"nordvpn", "expressvpn", "mullvad", "protonvpn", "surfshark",
|
||||
"privateinternetaccess", "pia ", "cyberghost", "hotspot shield",
|
||||
"ipvanish", "hide.me",
|
||||
# Bots search engines / crawlers
|
||||
"facebook", "meta ", "twitter", "linkedin", "semrush", "ahrefs",
|
||||
"majestic", "moz ", "babbar", "sistrix", "criteo", "peer39",
|
||||
]
|
||||
|
||||
|
||||
def asn_cloud_score(asn_org: str | None) -> float:
|
||||
"""
|
||||
Score [0,1] : 1.0 = cloud/CDN/hébergement/VPN confirmé.
|
||||
Correspond à une infrastructure typiquement utilisée par les bots.
|
||||
"""
|
||||
if not asn_org:
|
||||
return 0.2 # inconnu → légèrement suspect
|
||||
s = asn_org.lower()
|
||||
for kw in _ASN_CLOUD_KEYWORDS:
|
||||
if kw in s:
|
||||
return 1.0
|
||||
return 0.0
|
||||
|
||||
|
||||
# ─── Définition des features ──────────────────────────────────────────────────
|
||||
|
||||
FEATURES: list[tuple[str, str, object]] = [
|
||||
# TCP stack
|
||||
("ttl", "TTL Initial", lambda v: min(1.0, (v or 0) / 255.0)),
|
||||
("mss", "MSS Réseau", lambda v: min(1.0, (v or 0) / 1460.0)),
|
||||
("scale", "Scale TCP", lambda v: min(1.0, (v or 0) / 14.0)),
|
||||
("win", "Fenêtre TCP", lambda v: min(1.0, (v or 0) / 65535.0)),
|
||||
# Anomalie ML
|
||||
("avg_velocity", "Vélocité (rps)", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(100))), ("avg_fuzzing", "Fuzzing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(300))),
|
||||
("pct_headless", "Headless", lambda v: min(1.0, float(v or 0))),
|
||||
("avg_post", "Ratio POST", lambda v: min(1.0, float(v or 0))),
|
||||
# IP-ID
|
||||
("ip_id_zero", "IP-ID Zéro", lambda v: min(1.0, float(v or 0))),
|
||||
# Temporel
|
||||
("entropy", "Entropie Temporelle", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(10))),
|
||||
# Navigateur
|
||||
("browser_score", "Score Navigateur", lambda v: min(1.0, float(v or 0) / 50.0)),
|
||||
# TLS / Protocole
|
||||
("alpn_mismatch", "ALPN Mismatch", lambda v: min(1.0, float(v or 0))),
|
||||
("alpn_missing", "ALPN Absent", lambda v: min(1.0, float(v or 0))),
|
||||
("h2_eff", "H2 Multiplexing", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(20))),
|
||||
("hdr_conf", "Ordre Headers", lambda v: min(1.0, float(v or 0))),
|
||||
("ua_ch_mismatch","UA-CH Mismatch", lambda v: min(1.0, float(v or 0))),
|
||||
# Comportement HTTP
|
||||
("asset_ratio", "Ratio Assets", lambda v: min(1.0, float(v or 0))),
|
||||
("direct_ratio", "Accès Direct", lambda v: min(1.0, float(v or 0))),
|
||||
# Diversité JA4
|
||||
("ja4_count", "Diversité JA4", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(30))),
|
||||
# UA rotatif
|
||||
("ua_rotating", "UA Rotatif", lambda v: 1.0 if float(v or 0) > 0 else 0.0),
|
||||
# ── Géographie & infrastructure (nouvelles features) ──────────────────
|
||||
("country", "Risque Pays", lambda v: country_risk(str(v) if v else None)),
|
||||
("asn_org", "Hébergeur Cloud/VPN", lambda v: asn_cloud_score(str(v) if v else None)),
|
||||
# ── Headers HTTP (présence / profil de la requête) ────────────────────
|
||||
# Absence d'Accept-Language ou Accept-Encoding = fort signal bot (bots simples l'omettent)
|
||||
# Sec-Fetch-* = exclusif aux navigateurs réels (fetch metadata)
|
||||
("hdr_accept_lang", "Accept-Language", lambda v: min(1.0, float(v or 0))),
|
||||
("hdr_has_encoding", "Accept-Encoding", lambda v: 1.0 if float(v or 0) > 0 else 0.0),
|
||||
("hdr_has_sec_fetch", "Sec-Fetch Headers", lambda v: 1.0 if float(v or 0) > 0 else 0.0),
|
||||
("hdr_count_raw", "Nb Headers", lambda v: min(1.0, float(v or 0) / 20.0)),
|
||||
# ── Fingerprint HTTP Headers (agg_header_fingerprint_1h) ──────────────
|
||||
# header_order_shared_count : nb d'IPs partageant ce fingerprint
|
||||
# élevé → populaire → browser légitime (normalisé log1p / log1p(500000))
|
||||
("hfp_shared_count", "FP Popularité", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(500_000))),
|
||||
# distinct_header_orders : nb de fingerprints distincts pour cette IP
|
||||
# élevé → rotation de fingerprint → bot (normalisé log1p / log1p(10))
|
||||
("hfp_distinct_orders", "FP Rotation", lambda v: min(1.0, math.log1p(float(v or 0)) / math.log1p(10))),
|
||||
# Cookie et Referer : signaux de navigation légitime
|
||||
("hfp_cookie", "Cookie Présent", lambda v: min(1.0, float(v or 0))),
|
||||
("hfp_referer", "Referer Présent", lambda v: min(1.0, float(v or 0))),
|
||||
]
|
||||
|
||||
FEATURE_KEYS = [f[0] for f in FEATURES]
|
||||
FEATURE_NAMES = [f[1] for f in FEATURES]
|
||||
FEATURE_NORMS = [f[2] for f in FEATURES]
|
||||
N_FEATURES = len(FEATURES)
|
||||
|
||||
|
||||
# ─── Construction du vecteur de features ─────────────────────────────────────
|
||||
|
||||
def build_feature_vector(row: dict) -> list[float]:
|
||||
"""Construit le vecteur normalisé [0,1]^23 depuis un dict SQL."""
|
||||
return [norm(row.get(key, 0)) for key, _, norm in FEATURES]
|
||||
|
||||
|
||||
# ─── Standardisation z-score ──────────────────────────────────────────────────
|
||||
|
||||
def standardize(X: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Z-score standardisation : chaque feature est centrée et mise à l'échelle
|
||||
par sa déviation standard.
|
||||
|
||||
Ref: Bishop (2006) PRML §9.1 — preprocessing recommandé pour K-means.
|
||||
|
||||
Retourne (X_std, mean, std) pour pouvoir projeter de nouveaux points.
|
||||
"""
|
||||
mean = X.mean(axis=0)
|
||||
std = X.std(axis=0)
|
||||
std[std < 1e-8] = 1.0 # évite la division par zéro pour features constantes
|
||||
return (X - mean) / std, mean, std
|
||||
|
||||
|
||||
# ─── K-means++ vectorisé (numpy) ─────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class KMeansResult:
|
||||
centroids: np.ndarray # (k, n_features)
|
||||
labels: np.ndarray # (n_points,) int32
|
||||
inertia: float
|
||||
n_iter: int
|
||||
|
||||
|
||||
def kmeans_pp(X: np.ndarray, k: int, max_iter: int = 60, n_init: int = 3,
|
||||
seed: int = 42) -> KMeansResult:
|
||||
"""
|
||||
K-means++ entièrement vectorisé avec numpy.
|
||||
n_init exécutions, meilleure inertie conservée.
|
||||
"""
|
||||
rng = np.random.default_rng(seed)
|
||||
n, d = X.shape
|
||||
best: KMeansResult | None = None
|
||||
|
||||
for _ in range(n_init):
|
||||
# ── Initialisation K-means++ ──────────────────────────────────────
|
||||
centers = [X[rng.integers(n)].copy()]
|
||||
for _ in range(k - 1):
|
||||
D = _min_sq_dist(X, np.array(centers))
|
||||
# Garantit des probabilités non-négatives (erreurs float, points dupliqués)
|
||||
D = np.clip(D, 0.0, None)
|
||||
total = D.sum()
|
||||
if total < 1e-12:
|
||||
# Tous les points sont confondus — tirage aléatoire
|
||||
centers.append(X[rng.integers(n)].copy())
|
||||
else:
|
||||
probs = D / total
|
||||
centers.append(X[rng.choice(n, p=probs)].copy())
|
||||
centers_arr = np.array(centers) # (k, d)
|
||||
|
||||
# ── Iterations ───────────────────────────────────────────────────
|
||||
labels = np.zeros(n, dtype=np.int32)
|
||||
for it in range(max_iter):
|
||||
# Assignation vectorisée : (n, k) distance²
|
||||
dists = _sq_dists(X, centers_arr) # (n, k)
|
||||
new_labels = np.argmin(dists, axis=1).astype(np.int32)
|
||||
|
||||
if it > 0 and np.all(new_labels == labels):
|
||||
break # convergence
|
||||
labels = new_labels
|
||||
|
||||
# Mise à jour des centroïdes
|
||||
for j in range(k):
|
||||
mask = labels == j
|
||||
if mask.any():
|
||||
centers_arr[j] = X[mask].mean(axis=0)
|
||||
|
||||
inertia = float(np.sum(np.min(_sq_dists(X, centers_arr), axis=1)))
|
||||
result = KMeansResult(centers_arr, labels, inertia, it + 1)
|
||||
if best is None or inertia < best.inertia:
|
||||
best = result
|
||||
|
||||
return best # type: ignore[return-value]
|
||||
|
||||
|
||||
def _sq_dists(X: np.ndarray, C: np.ndarray) -> np.ndarray:
|
||||
"""Distance² entre chaque point de X et chaque centroïde de C. O(n·k·d)."""
|
||||
# ||x - c||² = ||x||² + ||c||² - 2·x·cᵀ
|
||||
X2 = np.sum(X ** 2, axis=1, keepdims=True) # (n, 1)
|
||||
C2 = np.sum(C ** 2, axis=1, keepdims=True).T # (1, k)
|
||||
return X2 + C2 - 2.0 * X @ C.T # (n, k)
|
||||
|
||||
|
||||
def _min_sq_dist(X: np.ndarray, C: np.ndarray) -> np.ndarray:
|
||||
"""Distance² minimale de chaque point aux centroïdes existants."""
|
||||
return np.min(_sq_dists(X, C), axis=1)
|
||||
|
||||
|
||||
# ─── PCA 2D (numpy) ──────────────────────────────────────────────────────────
|
||||
|
||||
def pca_2d(X: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
PCA-2D vectorisée. Retourne les coordonnées normalisées [0,1] × [0,1].
|
||||
"""
|
||||
mean = X.mean(axis=0)
|
||||
Xc = X - mean
|
||||
# Power iteration pour les 2 premières composantes
|
||||
rng = np.random.default_rng(0)
|
||||
v1 = _power_iter(Xc, rng.standard_normal(Xc.shape[1]))
|
||||
proj1 = Xc @ v1
|
||||
# Déflation (Hotelling)
|
||||
Xc2 = Xc - np.outer(proj1, v1)
|
||||
v2 = _power_iter(Xc2, rng.standard_normal(Xc.shape[1]))
|
||||
proj2 = Xc2 @ v2
|
||||
|
||||
coords = np.column_stack([proj1, proj2])
|
||||
# Normalisation [0,1]
|
||||
mn, mx = coords.min(axis=0), coords.max(axis=0)
|
||||
rng_ = mx - mn
|
||||
rng_[rng_ == 0] = 1.0
|
||||
return (coords - mn) / rng_
|
||||
|
||||
|
||||
def _power_iter(X: np.ndarray, v: np.ndarray, n_iter: int = 30) -> np.ndarray:
|
||||
"""Power iteration : trouve le premier vecteur propre de XᵀX."""
|
||||
for _ in range(n_iter):
|
||||
v = X.T @ (X @ v)
|
||||
norm = np.linalg.norm(v)
|
||||
if norm < 1e-12:
|
||||
break
|
||||
v /= norm
|
||||
return v
|
||||
|
||||
|
||||
# ─── Enveloppe convexe (hull) par cluster ────────────────────────────────────
|
||||
|
||||
def compute_hulls(coords_2d: np.ndarray, labels: np.ndarray,
|
||||
k: int, min_pts: int = 4) -> dict[int, list[list[float]]]:
|
||||
"""
|
||||
Calcule l'enveloppe convexe (convex hull) des points PCA pour chaque cluster.
|
||||
Retourne {cluster_idx: [[x,y], ...]} (polygone fermé).
|
||||
"""
|
||||
hulls: dict[int, list[list[float]]] = {}
|
||||
for j in range(k):
|
||||
pts = coords_2d[labels == j]
|
||||
if len(pts) < min_pts:
|
||||
# Pas assez de points : bounding box
|
||||
if len(pts) > 0:
|
||||
mx_, my_ = pts.mean(axis=0)
|
||||
r = max(0.01, pts.std(axis=0).max())
|
||||
hulls[j] = [
|
||||
[mx_ - r, my_ - r], [mx_ + r, my_ - r],
|
||||
[mx_ + r, my_ + r], [mx_ - r, my_ + r],
|
||||
]
|
||||
continue
|
||||
try:
|
||||
hull = ConvexHull(pts)
|
||||
hull_pts = pts[hull.vertices].tolist()
|
||||
# Fermer le polygone
|
||||
hull_pts.append(hull_pts[0])
|
||||
hulls[j] = hull_pts
|
||||
except Exception:
|
||||
hulls[j] = []
|
||||
return hulls
|
||||
|
||||
|
||||
# ─── Nommage et scoring ───────────────────────────────────────────────────────
|
||||
|
||||
def name_cluster(centroid: np.ndarray, raw_stats: dict) -> str:
|
||||
"""Nom lisible basé sur les features dominantes du centroïde [0,1]."""
|
||||
s = centroid
|
||||
n = len(s)
|
||||
ttl_raw = float(raw_stats.get("mean_ttl", 0))
|
||||
mss_raw = float(raw_stats.get("mean_mss", 0))
|
||||
country_risk_v = s[20] if n > 20 else 0.0
|
||||
asn_cloud = s[21] if n > 21 else 0.0
|
||||
accept_lang = s[22] if n > 22 else 1.0
|
||||
accept_enc = s[23] if n > 23 else 1.0
|
||||
sec_fetch = s[24] if n > 24 else 0.0
|
||||
hdr_count = s[25] if n > 25 else 0.5
|
||||
hfp_popular = s[26] if n > 26 else 0.5
|
||||
hfp_rotating = s[27] if n > 27 else 0.0
|
||||
|
||||
# Scanner pur : aucun header browser, fingerprint rare, peu de headers
|
||||
if accept_lang < 0.15 and accept_enc < 0.15 and hdr_count < 0.25:
|
||||
return "🤖 Scanner pur (no headers)"
|
||||
# Fingerprint tournant : bot qui change de profil headers
|
||||
if hfp_rotating > 0.6:
|
||||
return "🔄 Bot fingerprint tournant"
|
||||
# Fingerprint très rare : bot artisanal unique
|
||||
if hfp_popular < 0.15:
|
||||
return "🕵️ Fingerprint rare suspect"
|
||||
# Scanners Masscan
|
||||
if s[0] > 0.16 and s[0] < 0.25 and mss_raw in range(1440, 1460) and s[2] > 0.25:
|
||||
return "🤖 Masscan Scanner"
|
||||
# Bots offensifs agressifs (fuzzing élevé)
|
||||
if s[4] > 0.40 and s[5] > 0.3:
|
||||
return "🤖 Bot agressif"
|
||||
# Bot qui simule un navigateur mais sans les vrais headers
|
||||
if s[15] > 0.40 and sec_fetch < 0.2 and accept_lang < 0.3:
|
||||
return "🤖 Bot UA simulé"
|
||||
# Pays à très haut risque avec infrastructure cloud
|
||||
if country_risk_v > 0.75 and asn_cloud > 0.5:
|
||||
return "🌏 Source pays risqué"
|
||||
# Cloud + UA-CH mismatch
|
||||
if s[15] > 0.50 and asn_cloud > 0.70:
|
||||
return "☁️ Bot cloud UA-CH"
|
||||
if s[15] > 0.60:
|
||||
return "🤖 UA-CH Mismatch"
|
||||
# Headless browser (Puppeteer/Playwright) : a les headers Sec-Fetch mais headless
|
||||
if s[6] > 0.50 and sec_fetch > 0.5:
|
||||
return "🤖 Headless Browser"
|
||||
if s[6] > 0.50:
|
||||
return "🤖 Headless (no Sec-Fetch)"
|
||||
# Cloud pur (CDN/crawler légitime ?)
|
||||
if asn_cloud > 0.85:
|
||||
return "☁️ Infrastructure cloud"
|
||||
# Pays à risque élevé sans autre signal
|
||||
if country_risk_v > 0.60:
|
||||
return "🌏 Trafic suspect (pays)"
|
||||
# Navigateur légitime : tous les signaux positifs y compris fingerprint populaire
|
||||
if (accept_lang > 0.7 and accept_enc > 0.7 and sec_fetch > 0.5
|
||||
and hdr_count > 0.5 and hfp_popular > 0.5):
|
||||
return "🌐 Navigateur légitime"
|
||||
# OS fingerprinting
|
||||
if s[3] > 0.85 and ttl_raw > 120:
|
||||
return "🖥️ Windows"
|
||||
if s[0] > 0.22 and s[0] < 0.28 and mss_raw > 1400:
|
||||
return "🐧 Linux"
|
||||
if mss_raw < 1380 and mss_raw > 0:
|
||||
return "🌐 Tunnel réseau"
|
||||
if s[4] > 0.40:
|
||||
return "⚡ Trafic rapide"
|
||||
if s[4] < 0.10 and asn_cloud < 0.30:
|
||||
return "✅ Trafic sain"
|
||||
return "📊 Cluster mixte"
|
||||
|
||||
|
||||
def risk_score_from_centroid(centroid: np.ndarray) -> float:
|
||||
"""
|
||||
Score de risque [0,1] depuis le centroïde (espace original [0,1]).
|
||||
30 features (avg_score supprimé) — poids calibrés pour sommer à 1.0.
|
||||
Indices décalés de -1 après suppression de avg_score (ancien idx 4).
|
||||
"""
|
||||
s = centroid
|
||||
n = len(s)
|
||||
country_risk_v = s[20] if n > 20 else 0.0
|
||||
asn_cloud = s[21] if n > 21 else 0.0
|
||||
no_accept_lang = 1.0 - (s[22] if n > 22 else 1.0)
|
||||
no_encoding = 1.0 - (s[23] if n > 23 else 1.0)
|
||||
no_sec_fetch = 1.0 - (s[24] if n > 24 else 0.0)
|
||||
few_headers = 1.0 - (s[25] if n > 25 else 0.5)
|
||||
hfp_rare = 1.0 - (s[26] if n > 26 else 0.5)
|
||||
hfp_rotating = s[27] if n > 27 else 0.0
|
||||
|
||||
# [4]=vélocité [5]=fuzzing [6]=headless [8]=ip_id_zero [15]=ua_ch_mismatch
|
||||
# Poids redistribués depuis l'ancien score ML anomalie (0.25) vers les signaux restants
|
||||
return float(np.clip(
|
||||
0.14 * s[5] + # fuzzing
|
||||
0.17 * s[15] + # UA-CH mismatch (fort signal impersonation navigateur)
|
||||
0.10 * s[6] + # headless
|
||||
0.09 * s[4] + # vélocité (rps)
|
||||
0.07 * s[8] + # IP-ID zéro
|
||||
0.09 * country_risk_v+ # risque pays source
|
||||
0.06 * asn_cloud + # infrastructure cloud/VPN
|
||||
0.04 * no_accept_lang+ # absence Accept-Language
|
||||
0.04 * no_encoding + # absence Accept-Encoding
|
||||
0.04 * no_sec_fetch + # absence Sec-Fetch
|
||||
0.04 * few_headers + # très peu de headers
|
||||
0.06 * hfp_rare + # fingerprint rare = suspect
|
||||
0.06 * hfp_rotating, # rotation de fingerprint = bot
|
||||
0.0, 1.0
|
||||
))
|
||||
|
||||
|
||||
# ─── Gradient de couleur basé sur le score de non-humanité ──────────────────
|
||||
# Le score [0,1] est mappé sur un dégradé HSL traversant tout le spectre :
|
||||
# bleu (humain) → cyan → vert → jaune-vert → jaune → orange → rouge (bot pur)
|
||||
# Hue : 220° (bleu froid) → 0° (rouge vif) en passant par tout l'arc chromatique.
|
||||
|
||||
def _hsl_to_hex(h: float, s: float, l: float) -> str:
|
||||
"""Convertit HSL (h:0-360, s:0-100, l:0-100) en chaîne '#rrggbb'."""
|
||||
s /= 100.0
|
||||
l /= 100.0
|
||||
c = (1.0 - abs(2.0 * l - 1.0)) * s
|
||||
x = c * (1.0 - abs((h / 60.0) % 2.0 - 1.0))
|
||||
m = l - c / 2.0
|
||||
if h < 60: r, g, b = c, x, 0.0
|
||||
elif h < 120: r, g, b = x, c, 0.0
|
||||
elif h < 180: r, g, b = 0.0, c, x
|
||||
elif h < 240: r, g, b = 0.0, x, c
|
||||
elif h < 300: r, g, b = x, 0.0, c
|
||||
else: r, g, b = c, 0.0, x
|
||||
ri, gi, bi = int((r + m) * 255), int((g + m) * 255), int((b + m) * 255)
|
||||
return f"#{ri:02x}{gi:02x}{bi:02x}"
|
||||
|
||||
|
||||
def risk_to_gradient_color(risk: float) -> str:
|
||||
"""
|
||||
Mappe un score de non-humanité [0,1] sur un dégradé HSL continu multi-stop.
|
||||
|
||||
risk = 0.0 → hue 220° (bleu froid — trafic humain légitime)
|
||||
risk = 0.25 → hue 165° (cyan-vert — léger signal suspect)
|
||||
risk = 0.50 → hue 110° (vert-jaune — comportement mixte)
|
||||
risk = 0.75 → hue 55° (jaune-orange — probable bot)
|
||||
risk = 1.0 → hue 0° (rouge vif — bot confirmé)
|
||||
|
||||
La saturation monte légèrement avec le risque pour accentuer la lisibilité.
|
||||
"""
|
||||
r = float(np.clip(risk, 0.0, 1.0))
|
||||
hue = (1.0 - r) * 220.0 # 220° → 0°
|
||||
saturation = 70.0 + r * 20.0 # 70% → 90%
|
||||
lightness = 58.0 - r * 10.0 # 58% → 48% (plus sombre = plus alarmant)
|
||||
return _hsl_to_hex(hue, saturation, lightness)
|
||||
312
services/dashboard/backend/services/reputation_ip.py
Normal file
312
services/dashboard/backend/services/reputation_ip.py
Normal file
@ -0,0 +1,312 @@
|
||||
"""
|
||||
Services de réputation IP - Bases de données publiques sans clé API
|
||||
"""
|
||||
import httpx
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout pour les requêtes HTTP
|
||||
HTTP_TIMEOUT = 10.0
|
||||
|
||||
|
||||
class IPReputationService:
|
||||
"""
|
||||
Service de réputation IP utilisant des bases de données publiques gratuites
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.http_client = httpx.AsyncClient(timeout=HTTP_TIMEOUT)
|
||||
|
||||
# Sources de réputation (sans clé API)
|
||||
self.sources = {
|
||||
'ip_api': 'http://ip-api.com/json/{ip}',
|
||||
'ipinfo': 'https://ipinfo.io/{ip}/json',
|
||||
}
|
||||
|
||||
async def get_reputation(self, ip: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Récupère la réputation d'une IP depuis toutes les sources disponibles
|
||||
|
||||
Args:
|
||||
ip: Adresse IP à vérifier
|
||||
|
||||
Returns:
|
||||
Dict avec les informations de réputation agrégées
|
||||
"""
|
||||
results = {
|
||||
'ip': ip,
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'sources': {},
|
||||
'aggregated': {
|
||||
'is_proxy': False,
|
||||
'is_hosting': False,
|
||||
'is_vpn': False,
|
||||
'is_tor': False,
|
||||
'threat_score': 0,
|
||||
'threat_level': 'unknown',
|
||||
'country': None,
|
||||
'asn': None,
|
||||
'org': None,
|
||||
'warnings': []
|
||||
}
|
||||
}
|
||||
|
||||
# Interroge chaque source
|
||||
for source_name, url_template in self.sources.items():
|
||||
try:
|
||||
url = url_template.format(ip=ip)
|
||||
response = await self.http_client.get(url)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
results['sources'][source_name] = self._parse_source_data(source_name, data)
|
||||
else:
|
||||
logger.warning(f"Source {source_name} returned status {response.status_code}")
|
||||
results['sources'][source_name] = {'error': f'Status {response.status_code}'}
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.warning(f"Timeout for source {source_name}")
|
||||
results['sources'][source_name] = {'error': 'Timeout'}
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching from {source_name}: {str(e)}")
|
||||
results['sources'][source_name] = {'error': str(e)}
|
||||
|
||||
# Agrège les résultats
|
||||
results['aggregated'] = self._aggregate_results(results['sources'])
|
||||
|
||||
return results
|
||||
|
||||
def _parse_source_data(self, source: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse les données d'une source spécifique
|
||||
"""
|
||||
if source == 'ip_api':
|
||||
return self._parse_ip_api(data)
|
||||
elif source == 'ipinfo':
|
||||
return self._parse_ipinfo(data)
|
||||
return data
|
||||
|
||||
def _parse_ip_api(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse les données de IP-API.com
|
||||
|
||||
Response example:
|
||||
{
|
||||
"status": "success",
|
||||
"country": "France",
|
||||
"countryCode": "FR",
|
||||
"region": "IDF",
|
||||
"regionName": "Île-de-France",
|
||||
"city": "Paris",
|
||||
"zip": "75001",
|
||||
"lat": 48.8534,
|
||||
"lon": 2.3488,
|
||||
"timezone": "Europe/Paris",
|
||||
"isp": "OVH SAS",
|
||||
"org": "OVH SAS",
|
||||
"as": "AS16276 OVH SAS",
|
||||
"asname": "OVH",
|
||||
"mobile": false,
|
||||
"proxy": false,
|
||||
"hosting": true,
|
||||
"query": "51.15.0.1"
|
||||
}
|
||||
"""
|
||||
if data.get('status') != 'success':
|
||||
return {'error': data.get('message', 'Unknown error')}
|
||||
|
||||
# Extraire l'ASN
|
||||
asn_full = data.get('as', '')
|
||||
asn_number = None
|
||||
asn_org = None
|
||||
|
||||
if asn_full:
|
||||
parts = asn_full.split(' ', 1)
|
||||
if len(parts) >= 1:
|
||||
asn_number = parts[0].replace('AS', '')
|
||||
if len(parts) >= 2:
|
||||
asn_org = parts[1]
|
||||
|
||||
return {
|
||||
'country': data.get('country'),
|
||||
'country_code': data.get('countryCode'),
|
||||
'region': data.get('regionName'),
|
||||
'city': data.get('city'),
|
||||
'isp': data.get('isp'),
|
||||
'org': data.get('org'),
|
||||
'asn': asn_number,
|
||||
'asn_org': asn_org,
|
||||
'is_proxy': data.get('proxy', False),
|
||||
'is_hosting': data.get('hosting', False),
|
||||
'is_mobile': data.get('mobile', False),
|
||||
'timezone': data.get('timezone'),
|
||||
'lat': data.get('lat'),
|
||||
'lon': data.get('lon'),
|
||||
'query': data.get('query')
|
||||
}
|
||||
|
||||
def _parse_ipinfo(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse les données de IPinfo.io
|
||||
|
||||
Response example:
|
||||
{
|
||||
"ip": "51.15.0.1",
|
||||
"city": "Paris",
|
||||
"region": "Île-de-France",
|
||||
"country": "FR",
|
||||
"loc": "48.8534,2.3488",
|
||||
"org": "AS16276 OVH SAS",
|
||||
"postal": "75001",
|
||||
"timezone": "Europe/Paris",
|
||||
"readme": "https://ipinfo.io/missingauth"
|
||||
}
|
||||
"""
|
||||
# Extraire l'ASN
|
||||
org_full = data.get('org', '')
|
||||
asn_number = None
|
||||
asn_org = None
|
||||
|
||||
if org_full:
|
||||
parts = org_full.split(' ', 1)
|
||||
if len(parts) >= 1:
|
||||
asn_number = parts[0].replace('AS', '')
|
||||
if len(parts) >= 2:
|
||||
asn_org = parts[1]
|
||||
|
||||
# Extraire lat/lon
|
||||
loc = data.get('loc', '')
|
||||
lat = None
|
||||
lon = None
|
||||
if loc:
|
||||
coords = loc.split(',')
|
||||
if len(coords) == 2:
|
||||
lat = float(coords[0])
|
||||
lon = float(coords[1])
|
||||
|
||||
return {
|
||||
'ip': data.get('ip'),
|
||||
'city': data.get('city'),
|
||||
'region': data.get('region'),
|
||||
'country': data.get('country'),
|
||||
'postal': data.get('postal'),
|
||||
'timezone': data.get('timezone'),
|
||||
'asn': asn_number,
|
||||
'asn_org': asn_org,
|
||||
'org': data.get('org'),
|
||||
'lat': lat,
|
||||
'lon': lon
|
||||
}
|
||||
|
||||
def _aggregate_results(self, sources: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Agrège les résultats de toutes les sources
|
||||
|
||||
Logique d'agrégation:
|
||||
- is_proxy: true si au moins une source le détecte
|
||||
- is_hosting: true si au moins une source le détecte
|
||||
- threat_score: basé sur les détections proxy/hosting/vpn/tor
|
||||
- threat_level: low/medium/high/critical basé sur le score
|
||||
"""
|
||||
aggregated = {
|
||||
'is_proxy': False,
|
||||
'is_hosting': False,
|
||||
'is_vpn': False,
|
||||
'is_tor': False,
|
||||
'threat_score': 0,
|
||||
'threat_level': 'unknown',
|
||||
'country': None,
|
||||
'country_code': None,
|
||||
'asn': None,
|
||||
'asn_org': None,
|
||||
'org': None,
|
||||
'city': None,
|
||||
'warnings': []
|
||||
}
|
||||
|
||||
threat_score = 0
|
||||
|
||||
for source_name, source_data in sources.items():
|
||||
if 'error' in source_data:
|
||||
continue
|
||||
|
||||
# Proxy detection
|
||||
if source_data.get('is_proxy'):
|
||||
aggregated['is_proxy'] = True
|
||||
threat_score += 30
|
||||
aggregated['warnings'].append(f'{source_name}: Proxy détecté')
|
||||
|
||||
# Hosting detection
|
||||
if source_data.get('is_hosting'):
|
||||
aggregated['is_hosting'] = True
|
||||
threat_score += 20
|
||||
aggregated['warnings'].append(f'{source_name}: Hébergement cloud/datacenter')
|
||||
|
||||
# VPN detection (si disponible)
|
||||
if source_data.get('is_vpn'):
|
||||
aggregated['is_vpn'] = True
|
||||
threat_score += 40
|
||||
aggregated['warnings'].append(f'{source_name}: VPN détecté')
|
||||
|
||||
# Tor detection (si disponible)
|
||||
if source_data.get('is_tor'):
|
||||
aggregated['is_tor'] = True
|
||||
threat_score += 50
|
||||
aggregated['warnings'].append(f'{source_name}: Exit node Tor détecté')
|
||||
|
||||
# Infos géographiques (prend la première disponible)
|
||||
if not aggregated['country'] and source_data.get('country'):
|
||||
aggregated['country'] = source_data.get('country')
|
||||
|
||||
if not aggregated['country_code'] and source_data.get('country_code'):
|
||||
aggregated['country_code'] = source_data.get('country_code')
|
||||
|
||||
# ASN (prend la première disponible)
|
||||
if not aggregated['asn'] and source_data.get('asn'):
|
||||
aggregated['asn'] = source_data.get('asn')
|
||||
|
||||
if not aggregated['asn_org'] and source_data.get('asn_org'):
|
||||
aggregated['asn_org'] = source_data.get('asn_org')
|
||||
|
||||
# Organisation/ISP
|
||||
if not aggregated['org'] and source_data.get('org'):
|
||||
aggregated['org'] = source_data.get('org')
|
||||
|
||||
# Ville
|
||||
if not aggregated['city'] and source_data.get('city'):
|
||||
aggregated['city'] = source_data.get('city')
|
||||
|
||||
# Calcul du niveau de menace
|
||||
aggregated['threat_score'] = min(100, threat_score)
|
||||
|
||||
if threat_score >= 80:
|
||||
aggregated['threat_level'] = 'critical'
|
||||
elif threat_score >= 60:
|
||||
aggregated['threat_level'] = 'high'
|
||||
elif threat_score >= 40:
|
||||
aggregated['threat_level'] = 'medium'
|
||||
elif threat_score >= 20:
|
||||
aggregated['threat_level'] = 'low'
|
||||
else:
|
||||
aggregated['threat_level'] = 'clean'
|
||||
|
||||
return aggregated
|
||||
|
||||
async def close(self):
|
||||
"""Ferme le client HTTP"""
|
||||
await self.http_client.aclose()
|
||||
|
||||
|
||||
# Singleton pour réutiliser le service
|
||||
_reputation_service: Optional[IPReputationService] = None
|
||||
|
||||
|
||||
def get_reputation_service() -> IPReputationService:
|
||||
"""Retourne l'instance singleton du service de réputation"""
|
||||
global _reputation_service
|
||||
if _reputation_service is None:
|
||||
_reputation_service = IPReputationService()
|
||||
return _reputation_service
|
||||
436
services/dashboard/backend/services/tcp_fingerprint.py
Normal file
436
services/dashboard/backend/services/tcp_fingerprint.py
Normal file
@ -0,0 +1,436 @@
|
||||
"""
|
||||
Service de fingerprinting OS par signature TCP — approche multi-signal inspirée de p0f.
|
||||
|
||||
Signaux utilisés (par ordre de poids) :
|
||||
1. TTL initial estimé (→ famille OS : Linux/Mac=64, Windows=128, Cisco/BSD=255)
|
||||
2. MSS (→ type de réseau : Ethernet=1460, PPPoE=1452, VPN=1380-1420)
|
||||
3. Taille de fenêtre (→ signature OS précise)
|
||||
4. Facteur d'échelle (→ affine la version du kernel/stack TCP)
|
||||
|
||||
Références :
|
||||
- p0f v3 (Michal Zalewski) — passive OS fingerprinting
|
||||
- Nmap OS detection (Gordon Lyon)
|
||||
- "OS Fingerprinting Revisited" (Beverly, 2004)
|
||||
- "Passive OS fingerprinting" (Orebaugh, Ramirez)
|
||||
- Recherche sur Masscan/ZMap : signatures SYN craftées connues
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ─── Constantes ───────────────────────────────────────────────────────────────
|
||||
|
||||
_INITIAL_TTLS = (64, 128, 255)
|
||||
|
||||
# MSS → type de chemin réseau (MTU - 40 octets d'en-têtes IP+TCP)
|
||||
_MSS_PATH: list[tuple[range, str]] = [
|
||||
(range(1461, 9001), "Ethernet/Jumbo"), # jumbo frames (CDN/datacenter)
|
||||
(range(1460, 1461), "Ethernet directe"), # MTU 1500 standard
|
||||
(range(1453, 1460), "Ethernet directe"), # légèrement réduit (padding)
|
||||
(range(1452, 1453), "PPPoE/DSL"), # MTU 1492
|
||||
(range(1436, 1452), "PPPoE/DSL ajusté"), # variations DSL
|
||||
(range(1420, 1436), "VPN léger"), # WireGuard / IPsec transport
|
||||
(range(1380, 1420), "VPN/Tunnel"), # OpenVPN / L2TP
|
||||
(range(1300, 1380), "VPN double ou mobile"),
|
||||
(range(0, 1300), "Lien bas débit / GPRS"),
|
||||
]
|
||||
|
||||
|
||||
# ─── Base de signatures OS ─────────────────────────────────────────────────────
|
||||
#
|
||||
# Format : chaque entrée est un dict avec :
|
||||
# ttl : int — TTL initial attendu (64 | 128 | 255)
|
||||
# win : set[int]|None — tailles de fenêtre attendues (None = ignorer)
|
||||
# scale : set[int]|None — facteurs d'échelle attendus (None = ignorer)
|
||||
# mss : set[int]|None — valeurs MSS attendues (None = ignorer)
|
||||
# name : str — libellé affiché
|
||||
# conf : float — poids de confiance de base (0–1)
|
||||
# bot : bool — outil de scan/bot connu
|
||||
|
||||
_SIGNATURES: list[dict] = [
|
||||
# ══════════════════════════════════════════════════════
|
||||
# OUTILS DE SCAN ET BOTS CONNUS (priorité maximale)
|
||||
# ══════════════════════════════════════════════════════
|
||||
|
||||
# Masscan / scanner personnalisé avec stack Linux modifiée (PPPoE MSS=1452)
|
||||
# Pattern très présent dans les données : ~111k requêtes, UA spoofé macOS/Windows
|
||||
{
|
||||
"ttl": 64, "win": {5808}, "scale": {4}, "mss": {1452},
|
||||
"name": "Bot-Scanner/Masscan", "conf": 0.97, "bot": True,
|
||||
},
|
||||
# Masscan TTL=255 (mode direct, pas de hop)
|
||||
{
|
||||
"ttl": 255, "win": {1024}, "scale": {0}, "mss": None,
|
||||
"name": "Bot-ZMap/Masscan", "conf": 0.96, "bot": True,
|
||||
},
|
||||
# Mirai variant (petite fenêtre, pas de scale, TTL Linux)
|
||||
{
|
||||
"ttl": 64, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
|
||||
"name": "Bot-Mirai", "conf": 0.92, "bot": True,
|
||||
},
|
||||
# Mirai variant (petite fenêtre Windows)
|
||||
{
|
||||
"ttl": 128, "win": {1024, 2048}, "scale": {0}, "mss": {1460},
|
||||
"name": "Bot-Mirai/Win", "conf": 0.92, "bot": True,
|
||||
},
|
||||
# Scapy / forge manuelle (fenêtre 8192 exactement + TTL 64 + pas de scale)
|
||||
{
|
||||
"ttl": 64, "win": {8192}, "scale": {0}, "mss": {1460},
|
||||
"name": "Bot-Scapy/Forge", "conf": 0.85, "bot": True,
|
||||
},
|
||||
# Nmap SYN scan (window=1024, MSS=1460, TTL=64 ou 128)
|
||||
{
|
||||
"ttl": 64, "win": {1}, "scale": None, "mss": None,
|
||||
"name": "Bot-ZMap", "conf": 0.95, "bot": True,
|
||||
},
|
||||
|
||||
# ══════════════════════════════════════════════════════
|
||||
# WINDOWS
|
||||
# ══════════════════════════════════════════════════════
|
||||
|
||||
# Windows 10 / 11 — signature standard (LAN direct)
|
||||
{
|
||||
"ttl": 128, "win": {64240}, "scale": {8}, "mss": {1460},
|
||||
"name": "Windows 10/11", "conf": 0.93, "bot": False,
|
||||
},
|
||||
# Windows 10/11 — derrière VPN/proxy (MSS réduit)
|
||||
{
|
||||
"ttl": 128, "win": {64240}, "scale": {8}, "mss": {1380, 1400, 1412, 1420, 1440},
|
||||
"name": "Windows 10/11 (VPN)", "conf": 0.90, "bot": False,
|
||||
},
|
||||
# Windows Server 2019/2022 — grande fenêtre
|
||||
{
|
||||
"ttl": 128, "win": {65535, 131072}, "scale": {8, 9}, "mss": {1460},
|
||||
"name": "Windows Server", "conf": 0.88, "bot": False,
|
||||
},
|
||||
# Windows 7/8.1
|
||||
{
|
||||
"ttl": 128, "win": {8192, 65535}, "scale": {4, 8}, "mss": {1460},
|
||||
"name": "Windows 7/8", "conf": 0.83, "bot": False,
|
||||
},
|
||||
# Windows générique (TTL=128, scale=8, tout MSS)
|
||||
{
|
||||
"ttl": 128, "win": None, "scale": {8}, "mss": None,
|
||||
"name": "Windows", "conf": 0.70, "bot": False,
|
||||
},
|
||||
|
||||
# ══════════════════════════════════════════════════════
|
||||
# ANDROID (stack BBRv2 / CUBIC moderne)
|
||||
# ══════════════════════════════════════════════════════
|
||||
|
||||
# Android 10+ — scale=9 ou 10, grande fenêtre (BBRv2)
|
||||
{
|
||||
"ttl": 64, "win": {65535, 131072, 42340, 35844}, "scale": {9, 10}, "mss": {1460},
|
||||
"name": "Android 10+", "conf": 0.82, "bot": False,
|
||||
},
|
||||
# Android via proxy TTL=128 (app Facebook, TikTok etc. passant par infra)
|
||||
{
|
||||
"ttl": 128, "win": {62727, 65535}, "scale": {7}, "mss": {1460},
|
||||
"name": "Android/App (proxy)", "conf": 0.75, "bot": False,
|
||||
},
|
||||
# Android derrière VPN (MSS réduit)
|
||||
{
|
||||
"ttl": 64, "win": {65535, 59640, 63940}, "scale": {8, 9, 10}, "mss": {1380, 1390, 1400, 1418, 1420},
|
||||
"name": "Android (VPN/mobile)", "conf": 0.78, "bot": False,
|
||||
},
|
||||
|
||||
# ══════════════════════════════════════════════════════
|
||||
# iOS / macOS
|
||||
# ══════════════════════════════════════════════════════
|
||||
|
||||
# iOS 14+ / macOS Monterey+ — scale=6, win=65535 (signature XNU)
|
||||
{
|
||||
"ttl": 64, "win": {65535, 32768}, "scale": {6}, "mss": {1460},
|
||||
"name": "iOS/macOS", "conf": 0.87, "bot": False,
|
||||
},
|
||||
# macOS Sonoma+ / iOS 17+ (scale=9, fenêtre plus grande)
|
||||
{
|
||||
"ttl": 64, "win": {65535, 32768}, "scale": {9}, "mss": {1460},
|
||||
"name": "macOS Sonoma+/iOS 17+", "conf": 0.83, "bot": False,
|
||||
},
|
||||
# macOS derrière VPN (MSS réduit)
|
||||
{
|
||||
"ttl": 64, "win": {65535}, "scale": {6, 9}, "mss": {1380, 1400, 1412, 1436},
|
||||
"name": "iOS/macOS (VPN)", "conf": 0.80, "bot": False,
|
||||
},
|
||||
|
||||
# ══════════════════════════════════════════════════════
|
||||
# LINUX (desktop/serveur)
|
||||
# ══════════════════════════════════════════════════════
|
||||
|
||||
# Linux 5.x+ — scale=7, win=64240 ou 65535 (kernel ≥ 4.19)
|
||||
{
|
||||
"ttl": 64, "win": {64240, 65320}, "scale": {7}, "mss": {1460},
|
||||
"name": "Linux 5.x+", "conf": 0.86, "bot": False,
|
||||
},
|
||||
# Linux 4.x / ChromeOS
|
||||
{
|
||||
"ttl": 64, "win": {29200, 65535, 43690, 32120}, "scale": {7}, "mss": {1460},
|
||||
"name": "Linux 4.x/ChromeOS", "conf": 0.83, "bot": False,
|
||||
},
|
||||
# Linux derrière VPN (MSS réduit)
|
||||
{
|
||||
"ttl": 64, "win": {64240, 65535, 42600}, "scale": {7}, "mss": {1380, 1400, 1420, 1436},
|
||||
"name": "Linux (VPN)", "conf": 0.80, "bot": False,
|
||||
},
|
||||
# Linux 2.6.x (ancien — win=5840/14600)
|
||||
{
|
||||
"ttl": 64, "win": {5840, 14600, 16384}, "scale": {4, 5}, "mss": {1460},
|
||||
"name": "Linux 2.6", "conf": 0.78, "bot": False,
|
||||
},
|
||||
|
||||
# ══════════════════════════════════════════════════════
|
||||
# BSD / ÉQUIPEMENTS RÉSEAU / CDN
|
||||
# ══════════════════════════════════════════════════════
|
||||
|
||||
# FreeBSD / OpenBSD (initial TTL=64)
|
||||
{
|
||||
"ttl": 64, "win": {65535}, "scale": {6}, "mss": {512, 1460},
|
||||
"name": "FreeBSD/OpenBSD", "conf": 0.74, "bot": False,
|
||||
},
|
||||
# Cisco IOS / équipements réseau (initial TTL=255, fenêtre petite)
|
||||
{
|
||||
"ttl": 255, "win": {4096, 4128, 8760}, "scale": {0, 1, 2}, "mss": {512, 1460},
|
||||
"name": "Cisco/Réseau", "conf": 0.87, "bot": False,
|
||||
},
|
||||
# CDN / Applebot (TTL=255, jumbo MSS, fenêtre élevée)
|
||||
{
|
||||
"ttl": 255, "win": {26883, 65535, 59640}, "scale": {7, 8}, "mss": {8373, 8365, 1460},
|
||||
"name": "CDN/Applebot (jumbo)", "conf": 0.85, "bot": False,
|
||||
},
|
||||
# BSD/Unix générique (TTL=255)
|
||||
{
|
||||
"ttl": 255, "win": None, "scale": {6, 7, 8}, "mss": {1460},
|
||||
"name": "BSD/Unix", "conf": 0.68, "bot": False,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ─── Data classes ──────────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class OsFingerprint:
|
||||
os_name: str
|
||||
initial_ttl: int
|
||||
hop_count: int
|
||||
confidence: float
|
||||
is_bot_tool: bool
|
||||
network_path: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SpoofResult:
|
||||
is_spoof: bool
|
||||
is_bot_tool: bool
|
||||
reason: str
|
||||
|
||||
|
||||
# ─── Fonctions utilitaires ─────────────────────────────────────────────────────
|
||||
|
||||
def _estimate_initial_ttl(observed_ttl: int) -> tuple[int, int]:
|
||||
"""Retourne (initial_ttl, hop_count).
|
||||
Cherche le TTL standard le plus bas >= observed_ttl.
|
||||
Rejette les hop counts > 45 (réseau légitimement long = peu probable).
|
||||
"""
|
||||
if observed_ttl <= 0:
|
||||
return 0, -1
|
||||
for initial in _INITIAL_TTLS:
|
||||
if observed_ttl <= initial:
|
||||
hop = initial - observed_ttl
|
||||
if hop <= 45:
|
||||
return initial, hop
|
||||
return 255, 255 - observed_ttl # TTL > 255 impossible, fallback
|
||||
|
||||
|
||||
def _infer_network_path(mss: int) -> str:
|
||||
"""Retourne le type de chemin réseau probable à partir du MSS."""
|
||||
if mss <= 0:
|
||||
return "Inconnu"
|
||||
for rng, label in _MSS_PATH:
|
||||
if mss in rng:
|
||||
return label
|
||||
return "Inconnu"
|
||||
|
||||
|
||||
def _os_family(os_name: str) -> str:
|
||||
"""Réduit un nom OS détaillé à sa famille pour comparaison avec l'UA."""
|
||||
n = os_name.lower()
|
||||
if "windows" in n:
|
||||
return "Windows"
|
||||
if "android" in n:
|
||||
return "Android"
|
||||
if "ios" in n or "macos" in n or "iphone" in n or "ipad" in n:
|
||||
return "Apple"
|
||||
if "linux" in n or "chromeos" in n:
|
||||
return "Linux"
|
||||
if "bsd" in n or "cisco" in n or "cdn" in n or "réseau" in n:
|
||||
return "Network"
|
||||
if "bot" in n or "scanner" in n or "mirai" in n or "zmap" in n:
|
||||
return "Bot"
|
||||
return "Unknown"
|
||||
|
||||
|
||||
def _ua_os_family(declared_os: str) -> str:
|
||||
"""Réduit l'OS déclaré (UA) à sa famille."""
|
||||
mapping = {
|
||||
"Windows": "Windows",
|
||||
"Android": "Android",
|
||||
"iOS": "Apple",
|
||||
"macOS": "Apple",
|
||||
"Linux": "Linux",
|
||||
"ChromeOS": "Linux",
|
||||
"BSD": "Network",
|
||||
}
|
||||
return mapping.get(declared_os, "Unknown")
|
||||
|
||||
|
||||
# ─── Fonctions publiques ───────────────────────────────────────────────────────
|
||||
|
||||
def declared_os_from_ua(ua: str) -> str:
|
||||
"""Infère l'OS déclaré à partir du User-Agent."""
|
||||
ua = ua or ""
|
||||
ul = ua.lower()
|
||||
if not ul:
|
||||
return "Unknown"
|
||||
if "windows nt" in ul:
|
||||
return "Windows"
|
||||
if "android" in ul:
|
||||
return "Android"
|
||||
if "iphone" in ul or "ipad" in ul or "cpu iphone" in ul or "cpu ipad" in ul:
|
||||
return "iOS"
|
||||
if "mac os x" in ul or "macos" in ul:
|
||||
return "macOS"
|
||||
if "cros" in ul or "chromeos" in ul:
|
||||
return "ChromeOS"
|
||||
if "linux" in ul:
|
||||
return "Linux"
|
||||
if "freebsd" in ul or "openbsd" in ul or "netbsd" in ul:
|
||||
return "BSD"
|
||||
return "Unknown"
|
||||
|
||||
|
||||
def fingerprint_os(ttl: int, win: int, scale: int, mss: int) -> OsFingerprint:
|
||||
"""Fingerprint OS multi-signal avec scoring pondéré.
|
||||
|
||||
Poids des signaux :
|
||||
- TTL initial 40 % (discriminant principal : famille OS)
|
||||
- MSS 30 % (type de réseau ET OS)
|
||||
- Fenêtre TCP 20 % (version/distrib précise)
|
||||
- Scale 10 % (affine la version kernel)
|
||||
"""
|
||||
initial_ttl, hop_count = _estimate_initial_ttl(ttl)
|
||||
network_path = _infer_network_path(mss)
|
||||
|
||||
if initial_ttl == 0:
|
||||
return OsFingerprint(
|
||||
os_name="Unknown", initial_ttl=0, hop_count=-1,
|
||||
confidence=0.0, is_bot_tool=False, network_path=network_path,
|
||||
)
|
||||
|
||||
best_score: float = -1.0
|
||||
best_sig: Optional[dict] = None
|
||||
|
||||
for sig in _SIGNATURES:
|
||||
# Le TTL est un filtre strict — pas de correspondance, on passe
|
||||
if sig["ttl"] != initial_ttl:
|
||||
continue
|
||||
|
||||
score: float = 0.40 # Score de base pour correspondance TTL
|
||||
|
||||
# MSS (poids 0.30)
|
||||
if sig["mss"] is not None:
|
||||
score += 0.30 if mss in sig["mss"] else -0.12
|
||||
|
||||
# Fenêtre (poids 0.20)
|
||||
if sig["win"] is not None:
|
||||
score += 0.20 if win in sig["win"] else -0.08
|
||||
|
||||
# Scale (poids 0.10)
|
||||
if sig["scale"] is not None:
|
||||
score += 0.10 if scale in sig["scale"] else -0.04
|
||||
|
||||
# Pénalité si hop count anormalement élevé (>30 hops)
|
||||
if hop_count > 30:
|
||||
score -= 0.05
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_sig = sig
|
||||
|
||||
if best_sig and best_score >= 0.38:
|
||||
# Pondérer la confiance finale par le score et le conf de la signature
|
||||
raw_conf = best_score * best_sig["conf"]
|
||||
confidence = round(min(max(raw_conf, 0.0), 1.0), 2)
|
||||
return OsFingerprint(
|
||||
os_name=best_sig["name"],
|
||||
initial_ttl=initial_ttl,
|
||||
hop_count=hop_count,
|
||||
confidence=confidence,
|
||||
is_bot_tool=best_sig["bot"],
|
||||
network_path=network_path,
|
||||
)
|
||||
|
||||
# Repli : classification TTL seule (confiance minimale)
|
||||
fallback = {64: "Linux/macOS", 128: "Windows", 255: "Cisco/BSD"}
|
||||
return OsFingerprint(
|
||||
os_name=fallback.get(initial_ttl, "Unknown"),
|
||||
initial_ttl=initial_ttl,
|
||||
hop_count=hop_count,
|
||||
confidence=round(0.40 * 0.65, 2), # confiance faible
|
||||
is_bot_tool=False,
|
||||
network_path=network_path,
|
||||
)
|
||||
|
||||
|
||||
def detect_spoof(fp: OsFingerprint, declared_os: str) -> SpoofResult:
|
||||
"""Détecte les incohérences OS entre TCP et UA.
|
||||
|
||||
Règles :
|
||||
1. Outil de scan connu → spoof/bot, quelle que soit l'UA
|
||||
2. Confiance < 0.50 → indéterminable
|
||||
3. OS incompatibles → spoof confirmé
|
||||
4. Cohérent → OK
|
||||
"""
|
||||
if fp.is_bot_tool:
|
||||
return SpoofResult(
|
||||
is_spoof=True,
|
||||
is_bot_tool=True,
|
||||
reason=f"Outil de scan détecté ({fp.os_name})",
|
||||
)
|
||||
|
||||
if fp.confidence < 0.50 or fp.os_name == "Unknown" or declared_os == "Unknown":
|
||||
return SpoofResult(
|
||||
is_spoof=False,
|
||||
is_bot_tool=False,
|
||||
reason="Corrélation insuffisante",
|
||||
)
|
||||
|
||||
tcp_family = _os_family(fp.os_name)
|
||||
ua_family = _ua_os_family(declared_os)
|
||||
|
||||
# Les familles Network/Bot sont toujours suspectes si l'UA prétend être un navigateur
|
||||
if tcp_family == "Network" and ua_family not in ("Network", "Unknown"):
|
||||
return SpoofResult(
|
||||
is_spoof=True,
|
||||
is_bot_tool=False,
|
||||
reason=f"Équipement réseau/CDN (TCP) vs {declared_os} (UA)",
|
||||
)
|
||||
|
||||
if tcp_family == "Unknown" or ua_family == "Unknown":
|
||||
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="OS indéterminé")
|
||||
|
||||
# Android passant par un proxy infra (ex: Facebook app → proxy Windows)
|
||||
# → pas forcément un spoof, noté mais non flaggé
|
||||
if declared_os == "Android" and tcp_family == "Windows" and "proxy" in fp.os_name.lower():
|
||||
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="App mobile via proxy infra")
|
||||
|
||||
if tcp_family != ua_family:
|
||||
return SpoofResult(
|
||||
is_spoof=True,
|
||||
is_bot_tool=False,
|
||||
reason=f"TCP→{tcp_family} vs UA→{ua_family}",
|
||||
)
|
||||
|
||||
return SpoofResult(is_spoof=False, is_bot_tool=False, reason="Cohérent")
|
||||
0
services/dashboard/backend/tests/__init__.py
Normal file
0
services/dashboard/backend/tests/__init__.py
Normal file
18
services/dashboard/backend/tests/conftest.py
Normal file
18
services/dashboard/backend/tests/conftest.py
Normal file
@ -0,0 +1,18 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_db():
|
||||
db = MagicMock()
|
||||
db.query.return_value = MagicMock(result_rows=[])
|
||||
return db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(mock_db):
|
||||
with patch("backend.database.db", mock_db):
|
||||
from backend.main import app
|
||||
with TestClient(app) as c:
|
||||
yield c, mock_db
|
||||
10
services/dashboard/backend/tests/test_audit.py
Normal file
10
services/dashboard/backend/tests/test_audit.py
Normal file
@ -0,0 +1,10 @@
|
||||
def test_audit_log_post(client):
|
||||
c, _ = client
|
||||
resp = c.post("/api/audit/logs?action=test_action&user=testuser")
|
||||
assert resp.status_code in (200, 422, 404)
|
||||
|
||||
|
||||
def test_audit_log_get(client):
|
||||
c, _ = client
|
||||
resp = c.get("/api/audit/logs?hours=1")
|
||||
assert resp.status_code in (200, 404)
|
||||
70
services/dashboard/backend/tests/test_detections.py
Normal file
70
services/dashboard/backend/tests/test_detections.py
Normal file
@ -0,0 +1,70 @@
|
||||
"""Tests for the detections routes and helper functions."""
|
||||
import pytest
|
||||
|
||||
|
||||
def test_detections_list_endpoint(client):
|
||||
"""GET /api/detections returns a valid status code."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [(50,)] # count query
|
||||
resp = c.get("/api/detections")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
|
||||
|
||||
def test_detections_list_with_filters(client):
|
||||
"""GET /api/detections supports filter query params."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [(0,)]
|
||||
resp = c.get("/api/detections?threat_level=CRITICAL&page=1&page_size=10")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
|
||||
|
||||
def test_detections_pagination(client):
|
||||
"""GET /api/detections supports pagination params."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [(0,)]
|
||||
resp = c.get("/api/detections?page=2&page_size=10")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
|
||||
|
||||
def test_label_to_score_known_labels():
|
||||
"""_label_to_score returns known float values for recognized labels."""
|
||||
from backend.routes.detections import _label_to_score
|
||||
assert _label_to_score("human") == pytest.approx(0.9)
|
||||
assert _label_to_score("bot") == pytest.approx(0.05)
|
||||
assert _label_to_score("tor") == pytest.approx(0.1)
|
||||
assert _label_to_score("proxy") == pytest.approx(0.25)
|
||||
|
||||
|
||||
def test_label_to_score_unknown_label():
|
||||
"""_label_to_score returns 0.5 for unrecognized labels."""
|
||||
from backend.routes.detections import _label_to_score
|
||||
assert _label_to_score("unknown_label") == pytest.approx(0.5)
|
||||
|
||||
|
||||
def test_label_to_score_empty_string():
|
||||
"""_label_to_score returns None for empty string."""
|
||||
from backend.routes.detections import _label_to_score
|
||||
assert _label_to_score("") is None
|
||||
|
||||
|
||||
def test_label_to_score_case_insensitive():
|
||||
"""_label_to_score is case-insensitive."""
|
||||
from backend.routes.detections import _label_to_score
|
||||
assert _label_to_score("HUMAN") == _label_to_score("human")
|
||||
assert _label_to_score("Bot") == _label_to_score("bot")
|
||||
|
||||
|
||||
def test_detections_search_filter(client):
|
||||
"""GET /api/detections supports search text filter."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [(0,)]
|
||||
resp = c.get("/api/detections?search=1.2.3")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
|
||||
|
||||
def test_detections_group_by_ip(client):
|
||||
"""GET /api/detections supports group_by_ip mode."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [(0,)]
|
||||
resp = c.get("/api/detections?group_by_ip=true")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
26
services/dashboard/backend/tests/test_health.py
Normal file
26
services/dashboard/backend/tests/test_health.py
Normal file
@ -0,0 +1,26 @@
|
||||
def test_health_returns_200(client):
|
||||
c, _ = client
|
||||
resp = c.get("/health")
|
||||
assert resp.status_code == 200
|
||||
|
||||
|
||||
def test_health_endpoint_body(client):
|
||||
"""Health endpoint returns a body with 'status'."""
|
||||
c, _ = client
|
||||
resp = c.get("/health")
|
||||
assert resp.status_code == 200
|
||||
# Body may be JSON or plain text
|
||||
try:
|
||||
data = resp.json()
|
||||
assert "status" in data
|
||||
except Exception:
|
||||
pass # Non-JSON health check body is also acceptable
|
||||
|
||||
|
||||
def test_health_db_not_required(client):
|
||||
"""Health check does not depend on DB availability."""
|
||||
c, mock_db = client
|
||||
mock_db.query.side_effect = Exception("DB down")
|
||||
resp = c.get("/health")
|
||||
# Health should still return 200 even if DB throws
|
||||
assert resp.status_code == 200
|
||||
34
services/dashboard/backend/tests/test_metrics.py
Normal file
34
services/dashboard/backend/tests/test_metrics.py
Normal file
@ -0,0 +1,34 @@
|
||||
def test_metrics_endpoint(client):
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [
|
||||
("1.2.3.4", "t1234567890abc", "UA/5.0", "FR", 100)
|
||||
]
|
||||
resp = c.get("/api/metrics/top-ips?hours=1&limit=10")
|
||||
assert resp.status_code in (200, 404, 422) # endpoint may not exist in all versions
|
||||
|
||||
|
||||
def test_metrics_main_endpoint(client):
|
||||
"""GET /api/metrics returns 200 when DB returns data."""
|
||||
c, mock_db = client
|
||||
# Summary row: total, critical, high, medium, low, known_bots, anomalies, unique_ips
|
||||
mock_db.query.return_value.result_rows = [
|
||||
(100, 5, 10, 20, 65, 15, 85, 50)
|
||||
]
|
||||
resp = c.get("/api/metrics")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
|
||||
|
||||
def test_metrics_main_no_data(client):
|
||||
"""GET /api/metrics returns 404 when DB returns no rows."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = []
|
||||
resp = c.get("/api/metrics")
|
||||
assert resp.status_code in (404, 500)
|
||||
|
||||
|
||||
def test_threats_endpoint(client):
|
||||
"""GET /api/metrics/threats returns acceptable status code."""
|
||||
c, mock_db = client
|
||||
mock_db.query.return_value.result_rows = [("CRITICAL", 5), ("HIGH", 10)]
|
||||
resp = c.get("/api/metrics/threats")
|
||||
assert resp.status_code in (200, 404, 422, 500)
|
||||
25
services/dashboard/backend/tests/test_reputation.py
Normal file
25
services/dashboard/backend/tests/test_reputation.py
Normal file
@ -0,0 +1,25 @@
|
||||
import pytest
|
||||
|
||||
PRIVATE_RANGES = [
|
||||
"127.0.0.1", "10.0.0.1", "192.168.1.1", "172.16.0.1",
|
||||
"169.254.0.1", "::1", "fc00::1"
|
||||
]
|
||||
|
||||
|
||||
def is_private_ip(ip: str) -> bool:
|
||||
import ipaddress
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
return addr.is_private or addr.is_loopback or addr.is_link_local
|
||||
except ValueError:
|
||||
return True
|
||||
|
||||
|
||||
def test_private_ips_rejected():
|
||||
for ip in PRIVATE_RANGES:
|
||||
assert is_private_ip(ip), f"{ip} should be private"
|
||||
|
||||
|
||||
def test_public_ip_accepted():
|
||||
assert not is_private_ip("8.8.8.8")
|
||||
assert not is_private_ip("1.1.1.1")
|
||||
Reference in New Issue
Block a user