feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions
--- a/services/dashboard/backend/routes/incidents.py
+++ b/services/dashboard/backend/routes/incidents.py
@ -0,0 +1,266 @@
+"""
+Routes pour la gestion des incidents clusterisés
+"""
+import hashlib
+from fastapi import APIRouter, HTTPException, Query
+from typing import List, Optional
+from datetime import datetime
+from ..database import db
+
+router = APIRouter(prefix="/api/incidents", tags=["incidents"])
+
+
+@router.get("/clusters")
+async def get_incident_clusters(
+    hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle en heures"),
+    min_severity: str = Query("LOW", description="Niveau de sévérité minimum"),
+    limit: int = Query(20, ge=1, le=100, description="Nombre maximum de clusters")
+):
+    """
+    Récupère les incidents clusterisés automatiquement
+    
+    Les clusters sont formés par:
+    - Subnet /24
+    - JA4 fingerprint
+    - Pattern temporel
+    """
+    try:
+        # Cluster par subnet /24 avec une IP exemple
+        # Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x
+        # toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x
+        cluster_query = """
+        WITH cleaned_ips AS (
+            SELECT
+                replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
+                detected_at,
+                ja4,
+                country_code,
+                asn_number,
+                threat_level,
+                anomaly_score
+            FROM ml_detected_anomalies
+            WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
+        ),
+        subnet_groups AS (
+            SELECT
+                concat(
+                    splitByChar('.', clean_ip)[1], '.',
+                    splitByChar('.', clean_ip)[2], '.',
+                    splitByChar('.', clean_ip)[3], '.0/24'
+                ) AS subnet,
+                count() AS total_detections,
+                uniq(clean_ip) AS unique_ips,
+                min(detected_at) AS first_seen,
+                max(detected_at) AS last_seen,
+                argMax(ja4, detected_at) AS ja4,
+                argMax(country_code, detected_at) AS country_code,
+                argMax(asn_number, detected_at) AS asn_number,
+                argMax(threat_level, detected_at) AS threat_level,
+                avg(anomaly_score) AS avg_score,
+                argMax(clean_ip, detected_at) AS sample_ip
+            FROM cleaned_ips
+            GROUP BY subnet
+            HAVING total_detections >= 2
+        )
+        SELECT
+            subnet,
+            total_detections,
+            unique_ips,
+            first_seen,
+            last_seen,
+            ja4,
+            country_code,
+            asn_number,
+            threat_level,
+            avg_score,
+            sample_ip
+        FROM subnet_groups
+        ORDER BY avg_score ASC, total_detections DESC
+        LIMIT %(limit)s
+        """
+
+        result = db.query(cluster_query, {"hours": hours, "limit": limit})
+        
+        # Collect sample IPs to fetch real UA and trend data in bulk
+        sample_ips = [row[10] for row in result.result_rows if row[10]]
+
+        # Fetch real primary UA per sample IP from view_dashboard_entities
+        ua_by_ip: dict = {}
+        if sample_ips:
+            ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50])
+            ua_query = f"""
+            SELECT entity_value, arrayElement(user_agents, 1) AS top_ua
+            FROM view_dashboard_entities
+            WHERE entity_type = 'ip'
+              AND entity_value IN ({ip_list_sql})
+              AND notEmpty(user_agents)
+            GROUP BY entity_value, top_ua
+            ORDER BY entity_value
+            """
+            try:
+                ua_result = db.query(ua_query)
+                for ua_row in ua_result.result_rows:
+                    if ua_row[0] not in ua_by_ip and ua_row[1]:
+                        ua_by_ip[str(ua_row[0])] = str(ua_row[1])
+            except Exception:
+                pass  # UA enrichment is best-effort
+
+        # Compute real trend: compare current window vs previous window of same duration
+        trend_query = """
+        WITH cleaned AS (
+            SELECT
+                replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
+                detected_at,
+                concat(
+                    splitByChar('.', clean_ip)[1], '.',
+                    splitByChar('.', clean_ip)[2], '.',
+                    splitByChar('.', clean_ip)[3], '.0/24'
+                ) AS subnet
+            FROM ml_detected_anomalies
+        ),
+        current_window AS (
+            SELECT subnet, count() AS cnt
+            FROM cleaned
+            WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
+            GROUP BY subnet
+        ),
+        prev_window AS (
+            SELECT subnet, count() AS cnt
+            FROM cleaned
+            WHERE detected_at >= now() - INTERVAL %(hours2)s HOUR
+              AND detected_at < now() - INTERVAL %(hours)s HOUR
+            GROUP BY subnet
+        )
+        SELECT c.subnet, c.cnt AS current_cnt, p.cnt AS prev_cnt
+        FROM current_window c
+        LEFT JOIN prev_window p ON c.subnet = p.subnet
+        """
+        trend_by_subnet: dict = {}
+        try:
+            trend_result = db.query(trend_query, {"hours": hours, "hours2": hours * 2})
+            for tr in trend_result.result_rows:
+                subnet_key = tr[0]
+                curr = tr[1] or 0
+                prev = tr[2] or 0
+                if prev == 0:
+                    trend_by_subnet[subnet_key] = ("new", 100)
+                else:
+                    pct = round(((curr - prev) / prev) * 100)
+                    trend_by_subnet[subnet_key] = ("up" if pct >= 0 else "down", abs(pct))
+        except Exception:
+            pass
+
+        clusters = []
+        for row in result.result_rows:
+            subnet = row[0]
+            threat_level = row[8] or 'LOW'
+            unique_ips = row[2] or 1
+            avg_score = abs(row[9] or 0)
+            sample_ip = row[10] if row[10] else subnet.split('/')[0]
+
+            critical_count = 1 if threat_level == 'CRITICAL' else 0
+            high_count = 1 if threat_level == 'HIGH' else 0
+
+            risk_score = min(100, round(
+                (critical_count * 30) +
+                (high_count * 20) +
+                (unique_ips * 5) +
+                (avg_score * 100)
+            ))
+
+            if critical_count > 0 or risk_score >= 80:
+                severity = "CRITICAL"
+            elif high_count > (row[1] or 1) * 0.3 or risk_score >= 60:
+                severity = "HIGH"
+            elif high_count > 0 or risk_score >= 40:
+                severity = "MEDIUM"
+            else:
+                severity = "LOW"
+
+            trend_dir, trend_pct = trend_by_subnet.get(subnet, ("stable", 0))
+            primary_ua = ua_by_ip.get(sample_ip, "")
+
+            clusters.append({
+                "id": f"INC-{hashlib.md5(subnet.encode()).hexdigest()[:8].upper()}",
+                "score": risk_score,
+                "severity": severity,
+                "total_detections": row[1],
+                "unique_ips": row[2],
+                "subnet": subnet,
+                "sample_ip": sample_ip,
+                "ja4": row[5] or "",
+                "primary_ua": primary_ua,
+                "primary_target": row[3].strftime('%H:%M') if row[3] else "Unknown",
+                "countries": [{"code": row[6] or "XX", "percentage": 100}],
+                "asn": str(row[7]) if row[7] else "",
+                "first_seen": row[3].isoformat() if row[3] else "",
+                "last_seen": row[4].isoformat() if row[4] else "",
+                "trend": trend_dir,
+                "trend_percentage": trend_pct,
+            })
+        
+        return {
+            "items": clusters,
+            "total": len(clusters),
+            "period_hours": hours
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
+
+
+@router.get("/{cluster_id}")
+async def get_incident_details(cluster_id: str):
+    """
+    Récupère les détails d'un incident spécifique.
+    Non encore implémenté — les détails par cluster seront disponibles dans une prochaine version.
+    """
+    raise HTTPException(
+        status_code=501,
+        detail="Détails par incident non encore implémentés. Utilisez /api/incidents/clusters pour la liste."
+    )
+
+
+@router.post("/{cluster_id}/classify")
+async def classify_incident(
+    cluster_id: str,
+    label: str,
+    tags: List[str] = None,
+    comment: str = ""
+):
+    """
+    Classe un incident rapidement.
+    Non encore implémenté — utilisez /api/analysis/{ip}/classify pour classifier une IP.
+    """
+    raise HTTPException(
+        status_code=501,
+        detail="Classification par incident non encore implémentée. Utilisez /api/analysis/{ip}/classify."
+    )
+
+
+@router.get("")
+async def list_incidents(
+    status: str = Query("active", description="Statut des incidents"),
+    severity: Optional[str] = Query(None, description="Filtrer par sévérité (LOW/MEDIUM/HIGH/CRITICAL)"),
+    hours: int = Query(24, ge=1, le=168)
+):
+    """
+    Liste tous les incidents avec filtres.
+    Délègue à get_incident_clusters ; le filtre severity est appliqué post-requête.
+    """
+    try:
+        result = await get_incident_clusters(hours=hours, limit=100)
+        items = result["items"]
+
+        if severity:
+            sev_upper = severity.upper()
+            items = [c for c in items if c.get("severity") == sev_upper]
+
+        return {
+            "items": items,
+            "total": len(items),
+            "period_hours": hours,
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")