""" Routes pour l'investigation d'entités (IP, JA4, User-Agent, Client-Header, Host, Path, Query-Param) """ from fastapi import APIRouter, HTTPException, Query from typing import Optional, List, Dict, Any from datetime import datetime import json from ..database import db from ..models import ( EntityInvestigation, EntityStats, EntityRelatedAttributes, EntityAttributeValue ) router = APIRouter(prefix="/api/entities", tags=["Entities"]) db = db # Mapping des types d'entités ENTITY_TYPES = { 'ip': 'ip', 'ja4': 'ja4', 'user_agent': 'user_agent', 'client_header': 'client_header', 'host': 'host', 'path': 'path', 'query_param': 'query_param' } def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Optional[EntityStats]: """ Récupère les statistiques pour une entité donnée """ query = """ SELECT entity_type, entity_value, sum(requests) as total_requests, sum(unique_ips) as unique_ips, min(log_date) as first_seen, max(log_date) as last_seen FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR GROUP BY entity_type, entity_value """ result = db.connect().query(query, { 'entity_type': entity_type, 'entity_value': entity_value, 'hours': hours }) if not result.result_rows: return None row = result.result_rows[0] return EntityStats( entity_type=row[0], entity_value=row[1], total_requests=row[2], unique_ips=row[3], first_seen=row[4], last_seen=row[5] ) def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24) -> EntityRelatedAttributes: """ Récupère les attributs associés à une entité """ # Requête pour agréger tous les attributs associés query = """ SELECT (SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR) as ips, (SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR AND ja4 != '') as ja4s, (SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR AND host != '') as hosts, (SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR AND notEmpty(asns)) as asns, (SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR AND notEmpty(countries)) as countries """ result = db.connect().query(query, { 'entity_type': entity_type, 'entity_value': entity_value, 'hours': hours }) if not result.result_rows or not any(result.result_rows[0]): return EntityRelatedAttributes( ips=[], ja4s=[], hosts=[], asns=[], countries=[] ) row = result.result_rows[0] return EntityRelatedAttributes( ips=[str(ip) for ip in (row[0] or []) if ip], ja4s=[ja4 for ja4 in (row[1] or []) if ja4], hosts=[host for host in (row[2] or []) if host], asns=[asn for asn in (row[3] or []) if asn], countries=[country for country in (row[4] or []) if country] ) def get_array_values(entity_type: str, entity_value: str, array_field: str, hours: int = 24) -> List[EntityAttributeValue]: """ Extrait et retourne les valeurs d'un champ Array (user_agents, client_headers, etc.) """ query = f""" SELECT value, count() as count, round(count * 100.0 / sum(count) OVER (), 2) as percentage FROM ( SELECT arrayJoin({array_field}) as value FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= now() - INTERVAL %(hours)s HOUR AND notEmpty({array_field}) ) GROUP BY value ORDER BY count DESC LIMIT 100 """ result = db.connect().query(query, { 'entity_type': entity_type, 'entity_value': entity_value, 'hours': hours }) return [ EntityAttributeValue( value=row[0], count=row[1], percentage=row[2] ) for row in result.result_rows ] @router.get("/{entity_type}/{entity_value:path}", response_model=EntityInvestigation) async def get_entity_investigation( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720, description="Fenêtre temporelle en heures") ): """ Investigation complète pour une entité donnée - **entity_type**: Type d'entité (ip, ja4, user_agent, client_header, host, path, query_param) - **entity_value**: Valeur de l'entité - **hours**: Fenêtre temporelle (défaut: 24h) Retourne: - Stats générales - Attributs associés (IPs, JA4, Hosts, ASNs, Pays) - User-Agents - Client-Headers - Paths - Query-Params """ # Valider le type d'entité if entity_type not in ENTITY_TYPES: raise HTTPException( status_code=400, detail=f"Type d'entité invalide. Types supportés: {', '.join(ENTITY_TYPES.keys())}" ) # Stats générales stats = get_entity_stats(entity_type, entity_value, hours) if not stats: raise HTTPException(status_code=404, detail="Entité non trouvée") # Attributs associés related = get_related_attributes(entity_type, entity_value, hours) # User-Agents user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours) # Client-Headers client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours) # Paths paths = get_array_values(entity_type, entity_value, 'paths', hours) # Query-Params query_params = get_array_values(entity_type, entity_value, 'query_params', hours) return EntityInvestigation( stats=stats, related=related, user_agents=user_agents, client_headers=client_headers, paths=paths, query_params=query_params ) @router.get("/{entity_type}/{entity_value:path}/related") async def get_entity_related( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère uniquement les attributs associés à une entité """ if entity_type not in ENTITY_TYPES: raise HTTPException( status_code=400, detail=f"Type d'entité invalide. Types supportés: {', '.join(ENTITY_TYPES.keys())}" ) related = get_related_attributes(entity_type, entity_value, hours) return { "entity_type": entity_type, "entity_value": entity_value, "hours": hours, "related": related } @router.get("/{entity_type}/{entity_value:path}/user_agents") async def get_entity_user_agents( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les User-Agents associés à une entité """ if entity_type not in ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours) return { "entity_type": entity_type, "entity_value": entity_value, "user_agents": user_agents, "total": len(user_agents) } @router.get("/{entity_type}/{entity_value:path}/client_headers") async def get_entity_client_headers( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les Client-Headers associés à une entité """ if entity_type not in ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours) return { "entity_type": entity_type, "entity_value": entity_value, "client_headers": client_headers, "total": len(client_headers) } @router.get("/{entity_type}/{entity_value:path}/paths") async def get_entity_paths( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les Paths associés à une entité """ if entity_type not in ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") paths = get_array_values(entity_type, entity_value, 'paths', hours) return { "entity_type": entity_type, "entity_value": entity_value, "paths": paths, "total": len(paths) } @router.get("/{entity_type}/{entity_value:path}/query_params") async def get_entity_query_params( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les Query-Params associés à une entité """ if entity_type not in ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") query_params = get_array_values(entity_type, entity_value, 'query_params', hours) return { "entity_type": entity_type, "entity_value": entity_value, "query_params": query_params, "total": len(query_params) } @router.get("/types") async def get_entity_types(): """ Retourne la liste des types d'entités supportés """ return { "entity_types": list(ENTITY_TYPES.values()), "descriptions": { "ip": "Adresse IP source", "ja4": "Fingerprint JA4 TLS", "user_agent": "User-Agent HTTP", "client_header": "Client Header HTTP", "host": "Host HTTP", "path": "Path URL", "query_param": "Paramètres de query (noms concaténés)" } }