Services: - ja4sentinel: TLS/JA4 fingerprint capture daemon (Go, libpcap) - logcorrelator: JA4 log correlation engine (Go, ClickHouse) - mod_reqin_log: Apache module (C, JSON request logging) - bot_detector: ML bot detection pipeline (Python) - dashboard: FastAPI/Streamlit analytics UI (Python) Shared libraries: - shared/go/ja4common: logger, config, shutdown, ipfilter (Go module) - shared/python/ja4_common: ClickHouseClient, ClickHouseSettings (Python package) - shared/clickhouse/: canonical SQL migrations (10 files) Build & packaging: - Unified 3-stage Dockerfile.package for Go RPMs (el8/el9/el10) - go.work workspace linking sentinel, correlator, ja4common - Makefile with test-all, build-all, rpm-* targets Fixes applied: - go.work: 1.21 → 1.24.6 (required by sentinel) - correlator Dockerfiles: golang:1.21 → golang:1.24 - replace directives in go.mod for ja4common local path - pyproject.toml: setuptools.backends → setuptools.build_meta - Removed static libpcap linking (unavailable on Rocky 9) - Fixed data races in output/writers_test.go (sync.Mutex + atomic.Int32) - Rewrote corrupted test files (logger_test.go × 2) Test coverage: - correlator: 67.1% total (unixsocket 80.5%, config 91.7%, app 83.3%, multi 87.7%, stdout 100%) - sentinel: all 10 packages pass (api, capture, config, fingerprint, ipfilter, logging, output, tlsparse) Documentation: - README.md + docs/ (architecture, development, 5 services, shared libs, DB schema & migrations) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
102 lines
4.4 KiB
Python
102 lines
4.4 KiB
Python
"""
|
|
Endpoints pour l'analyse des empreintes d'en-têtes HTTP
|
|
"""
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
|
|
from ..database import db
|
|
|
|
router = APIRouter(prefix="/api/headers", tags=["header_fingerprint"])
|
|
|
|
|
|
@router.get("/clusters")
|
|
async def get_header_clusters(limit: int = Query(50, ge=1, le=200)):
|
|
"""Clusters d'empreintes d'en-têtes groupés par header_order_hash."""
|
|
try:
|
|
sql = """
|
|
SELECT
|
|
header_order_hash AS hash,
|
|
uniq(replaceRegexpAll(toString(src_ip), '^::ffff:', '')) AS unique_ips,
|
|
avg(modern_browser_score) AS avg_browser_score,
|
|
sum(ua_ch_mismatch) AS ua_ch_mismatch_count,
|
|
round(sum(ua_ch_mismatch) * 100.0 / count(), 2) AS ua_ch_mismatch_pct,
|
|
groupArray(5)(sec_fetch_mode) AS top_sec_fetch_modes,
|
|
round(sum(has_cookie) * 100.0 / count(), 2) AS has_cookie_pct,
|
|
round(sum(has_referer) * 100.0 / count(), 2) AS has_referer_pct
|
|
FROM mabase_prod.agg_header_fingerprint_1h
|
|
GROUP BY header_order_hash
|
|
ORDER BY unique_ips DESC
|
|
LIMIT %(limit)s
|
|
"""
|
|
result = db.query(sql, {"limit": limit})
|
|
|
|
total_sql = """
|
|
SELECT uniq(header_order_hash)
|
|
FROM mabase_prod.agg_header_fingerprint_1h
|
|
"""
|
|
total_clusters = int(db.query(total_sql).result_rows[0][0])
|
|
|
|
clusters = []
|
|
for row in result.result_rows:
|
|
h = str(row[0])
|
|
unique_ips = int(row[1])
|
|
avg_browser_score = float(row[2] or 0)
|
|
ua_ch_mismatch_cnt = int(row[3])
|
|
ua_ch_mismatch_pct = float(row[4] or 0)
|
|
top_modes = list(set(str(m) for m in (row[5] or [])))
|
|
has_cookie_pct = float(row[6] or 0)
|
|
has_referer_pct = float(row[7] or 0)
|
|
|
|
if avg_browser_score >= 90 and ua_ch_mismatch_pct < 5:
|
|
classification = "legitimate"
|
|
elif ua_ch_mismatch_pct > 50:
|
|
classification = "bot_suspicious"
|
|
else:
|
|
classification = "mixed"
|
|
|
|
clusters.append({
|
|
"hash": h,
|
|
"unique_ips": unique_ips,
|
|
"avg_browser_score": round(avg_browser_score, 2),
|
|
"ua_ch_mismatch_count":ua_ch_mismatch_cnt,
|
|
"ua_ch_mismatch_pct": ua_ch_mismatch_pct,
|
|
"top_sec_fetch_modes": top_modes,
|
|
"has_cookie_pct": has_cookie_pct,
|
|
"has_referer_pct": has_referer_pct,
|
|
"classification": classification,
|
|
})
|
|
return {"clusters": clusters, "total_clusters": total_clusters}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/cluster/{hash}/ips")
|
|
async def get_cluster_ips(hash: str, limit: int = Query(50, ge=1, le=500)):
|
|
"""Liste des IPs appartenant à un cluster d'en-têtes donné."""
|
|
try:
|
|
sql = """
|
|
SELECT
|
|
replaceRegexpAll(toString(src_ip), '^::ffff:', '') AS ip,
|
|
any(modern_browser_score) AS browser_score,
|
|
any(ua_ch_mismatch) AS ua_ch_mismatch,
|
|
any(sec_fetch_mode) AS sec_fetch_mode,
|
|
any(sec_fetch_dest) AS sec_fetch_dest
|
|
FROM mabase_prod.agg_header_fingerprint_1h
|
|
WHERE header_order_hash = %(hash)s
|
|
GROUP BY src_ip
|
|
ORDER BY browser_score DESC
|
|
LIMIT %(limit)s
|
|
"""
|
|
result = db.query(sql, {"hash": hash, "limit": limit})
|
|
items = []
|
|
for row in result.result_rows:
|
|
items.append({
|
|
"ip": str(row[0]),
|
|
"browser_score": int(row[1] or 0),
|
|
"ua_ch_mismatch": int(row[2] or 0),
|
|
"sec_fetch_mode": str(row[3] or ""),
|
|
"sec_fetch_dest": str(row[4] or ""),
|
|
})
|
|
return {"items": items, "total": len(items)}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|