""" Routes pour l'investigation d'entités (IP, JA4, User-Agent, Client-Header, Host, Path, Query-Param) """ from fastapi import APIRouter, HTTPException, Query from typing import Optional, List from ..database import db from ..models import ( EntityInvestigation, EntityStats, EntityRelatedAttributes, EntityAttributeValue ) router = APIRouter(prefix="/api/entities", tags=["Entities"]) # Ensemble des types d'entités valides VALID_ENTITY_TYPES = frozenset({ 'ip', 'ja4', 'user_agent', 'client_header', 'host', 'path', 'query_param' }) def get_entity_stats(entity_type: str, entity_value: str, hours: int = 24) -> Optional[EntityStats]: """ Récupère les statistiques pour une entité donnée """ query = """ SELECT entity_type, entity_value, sum(requests) as total_requests, sum(unique_ips) as unique_ips, min(log_date) as first_seen, max(log_date) as last_seen FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) GROUP BY entity_type, entity_value """ result = db.query(query, { 'entity_type': entity_type, 'entity_value': entity_value, 'hours': hours }) if not result.result_rows: return None row = result.result_rows[0] return EntityStats( entity_type=row[0], entity_value=row[1], total_requests=row[2], unique_ips=row[3], first_seen=row[4], last_seen=row[5] ) def get_related_attributes(entity_type: str, entity_value: str, hours: int = 24) -> EntityRelatedAttributes: """ Récupère les attributs associés à une entité """ # Requête pour agréger tous les attributs associés query = """ SELECT (SELECT groupUniqArray(toString(src_ip)) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR)) as ips, (SELECT groupUniqArray(ja4) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND ja4 != '') as ja4s, (SELECT groupUniqArray(host) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND host != '') as hosts, (SELECT groupUniqArrayArray(asns) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(asns)) as asns, (SELECT groupUniqArrayArray(countries) FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty(countries)) as countries """ result = db.query(query, { 'entity_type': entity_type, 'entity_value': entity_value, 'hours': hours }) if not result.result_rows or not any(result.result_rows[0]): return EntityRelatedAttributes( ips=[], ja4s=[], hosts=[], asns=[], countries=[] ) row = result.result_rows[0] return EntityRelatedAttributes( ips=[str(ip) for ip in (row[0] or []) if ip], ja4s=[ja4 for ja4 in (row[1] or []) if ja4], hosts=[host for host in (row[2] or []) if host], asns=[asn for asn in (row[3] or []) if asn], countries=[country for country in (row[4] or []) if country] ) def get_array_values(entity_type: str, entity_value: str, array_field: str, hours: int = 24) -> List[EntityAttributeValue]: """ Extrait et retourne les valeurs d'un champ Array (user_agents, client_headers, etc.) """ query = f""" SELECT value, count() as count, round(count * 100.0 / sum(count) OVER (), 2) as percentage FROM ( SELECT arrayJoin({array_field}) as value FROM mabase_prod.view_dashboard_entities WHERE entity_type = %(entity_type)s AND entity_value = %(entity_value)s AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND notEmpty({array_field}) ) GROUP BY value ORDER BY count DESC """ result = db.query(query, { 'entity_type': entity_type, 'entity_value': entity_value, 'hours': hours }) return [ EntityAttributeValue( value=row[0], count=row[1], percentage=row[2] ) for row in result.result_rows ] @router.get("/subnet/{subnet:path}") async def get_subnet_investigation( subnet: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère toutes les IPs d'un subnet /24 avec leurs statistiques Utilise ml_detected_anomalies pour les détections + view_dashboard_entities pour les user-agents """ try: # Extraire l'IP de base du subnet (ex: 192.168.1.0/24 -> 192.168.1.0) subnet_ip = subnet.replace('/24', '').replace('/16', '').replace('/8', '') # Extraire les 3 premiers octets pour le filtre (ex: 141.98.11) subnet_parts = subnet_ip.split('.')[:3] subnet_prefix = subnet_parts[0] subnet_mask = subnet_parts[1] subnet_third = subnet_parts[2] # Stats globales du subnet - utilise ml_detected_anomalies + view_dashboard_entities pour UA stats_query = """ WITH cleaned_ips AS ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, detected_at, ja4, host, country_code, asn_number FROM ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR ), subnet_filter AS ( SELECT * FROM cleaned_ips WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s AND splitByChar('.', clean_ip)[3] = %(subnet_third)s ), -- Récupérer les user-agents depuis view_dashboard_entities ua_data AS ( SELECT entity_value AS ip, arrayJoin(user_agents) AS user_agent FROM view_dashboard_entities WHERE entity_type = 'ip' AND log_date >= toDate(now() - INTERVAL %(hours)s HOUR) AND splitByChar('.', entity_value)[1] = %(subnet_prefix)s AND splitByChar('.', entity_value)[2] = %(subnet_mask)s AND splitByChar('.', entity_value)[3] = %(subnet_third)s ) SELECT %(subnet)s AS subnet, uniq(clean_ip) AS total_ips, count() AS total_detections, uniq(ja4) AS unique_ja4, (SELECT uniq(user_agent) FROM ua_data) AS unique_ua, uniq(host) AS unique_hosts, argMax(country_code, detected_at) AS primary_country, argMax(asn_number, detected_at) AS primary_asn, min(detected_at) AS first_seen, max(detected_at) AS last_seen FROM subnet_filter """ stats_result = db.query(stats_query, { "subnet": subnet, "subnet_prefix": subnet_prefix, "subnet_mask": subnet_mask, "subnet_third": subnet_third, "hours": hours }) if not stats_result.result_rows or stats_result.result_rows[0][1] == 0: raise HTTPException(status_code=404, detail="Subnet non trouvé") stats_row = stats_result.result_rows[0] stats = { "subnet": subnet, "total_ips": stats_row[1] or 0, "total_detections": stats_row[2] or 0, "unique_ja4": stats_row[3] or 0, "unique_ua": stats_row[4] or 0, "unique_hosts": stats_row[5] or 0, "primary_country": stats_row[6] or "XX", "primary_asn": str(stats_row[7]) if stats_row[7] else "?", "first_seen": stats_row[8].isoformat() if stats_row[8] else "", "last_seen": stats_row[9].isoformat() if stats_row[9] else "" } # Liste des IPs avec détails - 2 requêtes séparées + fusion en Python ips_query = """ WITH cleaned_ips AS ( SELECT replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip, detected_at, ja4, country_code, asn_number, threat_level, anomaly_score FROM ml_detected_anomalies WHERE detected_at >= now() - INTERVAL %(hours)s HOUR ), subnet_filter AS ( SELECT * FROM cleaned_ips WHERE splitByChar('.', clean_ip)[1] = %(subnet_prefix)s AND splitByChar('.', clean_ip)[2] = %(subnet_mask)s AND splitByChar('.', clean_ip)[3] = %(subnet_third)s ) SELECT clean_ip AS ip, count() AS total_detections, uniq(ja4) AS unique_ja4, argMax(country_code, detected_at) AS primary_country, argMax(asn_number, detected_at) AS primary_asn, argMax(threat_level, detected_at) AS threat_level, avg(anomaly_score) AS avg_score, min(detected_at) AS first_seen, max(detected_at) AS last_seen FROM subnet_filter GROUP BY ip ORDER BY total_detections DESC """ # Exécuter la première requête pour obtenir les IPs ips_result = db.query(ips_query, { "subnet_prefix": subnet_prefix, "subnet_mask": subnet_mask, "subnet_third": subnet_third, "hours": hours }) # Extraire la liste des IPs pour la requête UA ip_list = [str(row[0]) for row in ips_result.result_rows] # Requête pour les user-agents avec IN clause (utilise l'index) unique_ua_dict = {} if ip_list: # Formater la liste pour la clause IN ip_values = ', '.join(f"'{ip}'" for ip in ip_list) ua_query = f""" SELECT entity_value AS ip, uniq(arrayJoin(user_agents)) AS unique_ua FROM view_dashboard_entities PREWHERE entity_type = 'ip' WHERE entity_value IN ({ip_values}) AND log_date >= today() - INTERVAL 30 DAY GROUP BY entity_value """ ua_result = db.query(ua_query, {}) unique_ua_dict = {row[0]: row[1] for row in ua_result.result_rows} # Fusionner les résultats ips = [] for row in ips_result.result_rows: ips.append({ "ip": str(row[0]), "total_detections": row[1], "unique_ja4": row[2], "unique_ua": unique_ua_dict.get(row[0], 0), "primary_country": row[3] or "XX", "primary_asn": str(row[4]) if row[4] else "?", "threat_level": row[5] or "LOW", "avg_score": abs(row[6] or 0), "first_seen": row[7].isoformat() if row[7] else "", "last_seen": row[8].isoformat() if row[8] else "" }) return { "stats": stats, "ips": ips } except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}") @router.get("/types") async def get_entity_types(): """ Retourne la liste des types d'entités supportés. NOTE: Cette route DOIT être déclarée avant /{entity_type}/... pour ne pas être masquée. """ return { "entity_types": sorted(VALID_ENTITY_TYPES), "descriptions": { "ip": "Adresse IP source", "ja4": "Fingerprint JA4 TLS", "user_agent": "User-Agent HTTP", "client_header": "Client Header", "host": "Host HTTP", "path": "Path URL", "query_param": "Query Param" } } @router.get("/{entity_type}/{entity_value:path}", response_model=EntityInvestigation) async def get_entity_investigation( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720, description="Fenêtre temporelle en heures") ): """ Investigation complète pour une entité donnée - **entity_type**: Type d'entité (ip, ja4, user_agent, client_header, host, path, query_param) - **entity_value**: Valeur de l'entité - **hours**: Fenêtre temporelle (défaut: 24h) Retourne: - Stats générales - Attributs associés (IPs, JA4, Hosts, ASNs, Pays) - User-Agents - Client-Headers - Paths - Query-Params """ # Valider le type d'entité if entity_type not in VALID_ENTITY_TYPES: raise HTTPException( status_code=400, detail=f"Type d'entité invalide. Types supportés: {', '.join(VALID_ENTITY_TYPES)}" ) # Stats générales stats = get_entity_stats(entity_type, entity_value, hours) if not stats: raise HTTPException(status_code=404, detail="Entité non trouvée") # Attributs associés related = get_related_attributes(entity_type, entity_value, hours) # User-Agents user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours) # Client-Headers client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours) # Paths paths = get_array_values(entity_type, entity_value, 'paths', hours) # Query-Params query_params = get_array_values(entity_type, entity_value, 'query_params', hours) return EntityInvestigation( stats=stats, related=related, user_agents=user_agents, client_headers=client_headers, paths=paths, query_params=query_params ) @router.get("/{entity_type}/{entity_value:path}/related") async def get_entity_related( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère uniquement les attributs associés à une entité """ if entity_type not in VALID_ENTITY_TYPES: raise HTTPException( status_code=400, detail=f"Type d'entité invalide. Types supportés: {', '.join(VALID_ENTITY_TYPES)}" ) related = get_related_attributes(entity_type, entity_value, hours) return { "entity_type": entity_type, "entity_value": entity_value, "hours": hours, "related": related } @router.get("/{entity_type}/{entity_value:path}/user_agents") async def get_entity_user_agents( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les User-Agents associés à une entité """ if entity_type not in VALID_ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") user_agents = get_array_values(entity_type, entity_value, 'user_agents', hours) return { "entity_type": entity_type, "entity_value": entity_value, "user_agents": user_agents, "total": len(user_agents) } @router.get("/{entity_type}/{entity_value:path}/client_headers") async def get_entity_client_headers( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les Client-Headers associés à une entité """ if entity_type not in VALID_ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") client_headers = get_array_values(entity_type, entity_value, 'client_headers', hours) return { "entity_type": entity_type, "entity_value": entity_value, "client_headers": client_headers, "total": len(client_headers) } @router.get("/{entity_type}/{entity_value:path}/paths") async def get_entity_paths( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les Paths associés à une entité """ if entity_type not in VALID_ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") paths = get_array_values(entity_type, entity_value, 'paths', hours) return { "entity_type": entity_type, "entity_value": entity_value, "paths": paths, "total": len(paths) } @router.get("/{entity_type}/{entity_value:path}/query_params") async def get_entity_query_params( entity_type: str, entity_value: str, hours: int = Query(default=24, ge=1, le=720) ): """ Récupère les Query-Params associés à une entité """ if entity_type not in VALID_ENTITY_TYPES: raise HTTPException(status_code=400, detail="Type d'entité invalide") query_params = get_array_values(entity_type, entity_value, 'query_params', hours) return { "entity_type": entity_type, "entity_value": entity_value, "query_params": query_params, "total": len(query_params) }