feat: ja4-platform monorepo — 5 services unified, tests & RPM builds standardized

Services:
- ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap)
- logcorrelator: JA4 log correlation engine (Go, ClickHouse)
- mod_reqin_log: Apache module (C, JSON request logging)
- bot_detector: ML bot detection pipeline (Python)
- dashboard: FastAPI/Streamlit analytics UI (Python)

Shared libraries:
- shared/go/ja4common: logger, config, shutdown, ipfilter (Go module)
- shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package)
- shared/clickhouse/: canonical SQL migrations (10 files)

Build & packaging:
- Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10)
- go.work workspace linking sentinel, correlator, ja4common
- Makefile with test-all, build-all, rpm-* targets

Fixes applied:
- go.work: 1.21 → 1.24.6 (required by sentinel)
- correlator Dockerfiles: golang:1.21 → golang:1.24
- replace directives in go.mod for ja4common local path
- pyproject.toml: setuptools.backends → setuptools.build_meta
- Removed static libpcap linking (unavailable on Rocky 9)
- Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32)
- Rewrote corrupted test files (logger_test.go × 2)

Test coverage:
- correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%)
- sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse)

Documentation:
- README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-07 16:42:59 +02:00
commit d469e39da7
278 changed files with 1621301 additions and 0 deletions

View File

@ -0,0 +1,266 @@
"""
Routes pour la gestion des incidents clusterisés
"""
import hashlib
from fastapi import APIRouter, HTTPException, Query
from typing import List, Optional
from datetime import datetime
from ..database import db
router = APIRouter(prefix="/api/incidents", tags=["incidents"])
@router.get("/clusters")
async def get_incident_clusters(
hours: int = Query(24, ge=1, le=168, description="Fenêtre temporelle en heures"),
min_severity: str = Query("LOW", description="Niveau de sévérité minimum"),
limit: int = Query(20, ge=1, le=100, description="Nombre maximum de clusters")
):
"""
Récupère les incidents clusterisés automatiquement
Les clusters sont formés par:
- Subnet /24
- JA4 fingerprint
- Pattern temporel
"""
try:
# Cluster par subnet /24 avec une IP exemple
# Note: src_ip est en IPv6, les IPv4 sont stockés comme ::ffff:x.x.x.x
# toIPv4() convertit les IPv4-mapped, IPv4NumToString() retourne l'IPv4 en notation x.x.x.x
cluster_query = """
WITH cleaned_ips AS (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
detected_at,
ja4,
country_code,
asn_number,
threat_level,
anomaly_score
FROM ml_detected_anomalies
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
),
subnet_groups AS (
SELECT
concat(
splitByChar('.', clean_ip)[1], '.',
splitByChar('.', clean_ip)[2], '.',
splitByChar('.', clean_ip)[3], '.0/24'
) AS subnet,
count() AS total_detections,
uniq(clean_ip) AS unique_ips,
min(detected_at) AS first_seen,
max(detected_at) AS last_seen,
argMax(ja4, detected_at) AS ja4,
argMax(country_code, detected_at) AS country_code,
argMax(asn_number, detected_at) AS asn_number,
argMax(threat_level, detected_at) AS threat_level,
avg(anomaly_score) AS avg_score,
argMax(clean_ip, detected_at) AS sample_ip
FROM cleaned_ips
GROUP BY subnet
HAVING total_detections >= 2
)
SELECT
subnet,
total_detections,
unique_ips,
first_seen,
last_seen,
ja4,
country_code,
asn_number,
threat_level,
avg_score,
sample_ip
FROM subnet_groups
ORDER BY avg_score ASC, total_detections DESC
LIMIT %(limit)s
"""
result = db.query(cluster_query, {"hours": hours, "limit": limit})
# Collect sample IPs to fetch real UA and trend data in bulk
sample_ips = [row[10] for row in result.result_rows if row[10]]
# Fetch real primary UA per sample IP from view_dashboard_entities
ua_by_ip: dict = {}
if sample_ips:
ip_list_sql = ", ".join(f"'{ip}'" for ip in sample_ips[:50])
ua_query = f"""
SELECT entity_value, arrayElement(user_agents, 1) AS top_ua
FROM view_dashboard_entities
WHERE entity_type = 'ip'
AND entity_value IN ({ip_list_sql})
AND notEmpty(user_agents)
GROUP BY entity_value, top_ua
ORDER BY entity_value
"""
try:
ua_result = db.query(ua_query)
for ua_row in ua_result.result_rows:
if ua_row[0] not in ua_by_ip and ua_row[1]:
ua_by_ip[str(ua_row[0])] = str(ua_row[1])
except Exception:
pass # UA enrichment is best-effort
# Compute real trend: compare current window vs previous window of same duration
trend_query = """
WITH cleaned AS (
SELECT
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS clean_ip,
detected_at,
concat(
splitByChar('.', clean_ip)[1], '.',
splitByChar('.', clean_ip)[2], '.',
splitByChar('.', clean_ip)[3], '.0/24'
) AS subnet
FROM ml_detected_anomalies
),
current_window AS (
SELECT subnet, count() AS cnt
FROM cleaned
WHERE detected_at >= now() - INTERVAL %(hours)s HOUR
GROUP BY subnet
),
prev_window AS (
SELECT subnet, count() AS cnt
FROM cleaned
WHERE detected_at >= now() - INTERVAL %(hours2)s HOUR
AND detected_at < now() - INTERVAL %(hours)s HOUR
GROUP BY subnet
)
SELECT c.subnet, c.cnt AS current_cnt, p.cnt AS prev_cnt
FROM current_window c
LEFT JOIN prev_window p ON c.subnet = p.subnet
"""
trend_by_subnet: dict = {}
try:
trend_result = db.query(trend_query, {"hours": hours, "hours2": hours * 2})
for tr in trend_result.result_rows:
subnet_key = tr[0]
curr = tr[1] or 0
prev = tr[2] or 0
if prev == 0:
trend_by_subnet[subnet_key] = ("new", 100)
else:
pct = round(((curr - prev) / prev) * 100)
trend_by_subnet[subnet_key] = ("up" if pct >= 0 else "down", abs(pct))
except Exception:
pass
clusters = []
for row in result.result_rows:
subnet = row[0]
threat_level = row[8] or 'LOW'
unique_ips = row[2] or 1
avg_score = abs(row[9] or 0)
sample_ip = row[10] if row[10] else subnet.split('/')[0]
critical_count = 1 if threat_level == 'CRITICAL' else 0
high_count = 1 if threat_level == 'HIGH' else 0
risk_score = min(100, round(
(critical_count * 30) +
(high_count * 20) +
(unique_ips * 5) +
(avg_score * 100)
))
if critical_count > 0 or risk_score >= 80:
severity = "CRITICAL"
elif high_count > (row[1] or 1) * 0.3 or risk_score >= 60:
severity = "HIGH"
elif high_count > 0 or risk_score >= 40:
severity = "MEDIUM"
else:
severity = "LOW"
trend_dir, trend_pct = trend_by_subnet.get(subnet, ("stable", 0))
primary_ua = ua_by_ip.get(sample_ip, "")
clusters.append({
"id": f"INC-{hashlib.md5(subnet.encode()).hexdigest()[:8].upper()}",
"score": risk_score,
"severity": severity,
"total_detections": row[1],
"unique_ips": row[2],
"subnet": subnet,
"sample_ip": sample_ip,
"ja4": row[5] or "",
"primary_ua": primary_ua,
"primary_target": row[3].strftime('%H:%M') if row[3] else "Unknown",
"countries": [{"code": row[6] or "XX", "percentage": 100}],
"asn": str(row[7]) if row[7] else "",
"first_seen": row[3].isoformat() if row[3] else "",
"last_seen": row[4].isoformat() if row[4] else "",
"trend": trend_dir,
"trend_percentage": trend_pct,
})
return {
"items": clusters,
"total": len(clusters),
"period_hours": hours
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")
@router.get("/{cluster_id}")
async def get_incident_details(cluster_id: str):
"""
Récupère les détails d'un incident spécifique.
Non encore implémenté — les détails par cluster seront disponibles dans une prochaine version.
"""
raise HTTPException(
status_code=501,
detail="Détails par incident non encore implémentés. Utilisez /api/incidents/clusters pour la liste."
)
@router.post("/{cluster_id}/classify")
async def classify_incident(
cluster_id: str,
label: str,
tags: List[str] = None,
comment: str = ""
):
"""
Classe un incident rapidement.
Non encore implémenté — utilisez /api/analysis/{ip}/classify pour classifier une IP.
"""
raise HTTPException(
status_code=501,
detail="Classification par incident non encore implémentée. Utilisez /api/analysis/{ip}/classify."
)
@router.get("")
async def list_incidents(
status: str = Query("active", description="Statut des incidents"),
severity: Optional[str] = Query(None, description="Filtrer par sévérité (LOW/MEDIUM/HIGH/CRITICAL)"),
hours: int = Query(24, ge=1, le=168)
):
"""
Liste tous les incidents avec filtres.
Délègue à get_incident_clusters ; le filtre severity est appliqué post-requête.
"""
try:
result = await get_incident_clusters(hours=hours, limit=100)
items = result["items"]
if severity:
sev_upper = severity.upper()
items = [c for c in items if c.get("severity") == sev_upper]
return {
"items": items,
"total": len(items),
"period_hours": hours,
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Erreur: {str(e)}")