refactor(bot-detector): extract monolith into modular package
Split bot_detector.py (~1982 lines) into 10 focused modules: - config.py: all configuration constants and optional imports - log.py: logging utilities (log_info, log_decision, append_training_history) - infra.py: ClickHouse client, health check HTTP server, shutdown - browser.py: multifactorial browser identification (5 axes) - scoring.py: drift detection, feature validation, SHAP, clustering - models.py: EIF, Autoencoder, XGBoost model management - preprocessing.py: data preprocessing and feature list definitions - pipeline.py: core semi-supervised scoring loop - cycle.py: main analysis cycle orchestration - __main__.py: entry point with startup banner Update Dockerfile to copy package directory and use python -m bot_detector. All 36 existing tests pass unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
89
services/bot-detector/bot_detector/infra.py
Normal file
89
services/bot-detector/bot_detector/infra.py
Normal file
@ -0,0 +1,89 @@
|
||||
"""Infrastructure : client ClickHouse, health check HTTP, arrêt propre.
|
||||
|
||||
Exécute le serveur de santé en thread daemon dès l'import.
|
||||
"""
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
|
||||
from ja4_common.clickhouse import get_client as _ja4_get_client
|
||||
|
||||
from .config import HEALTH_PORT
|
||||
from .log import log_info, log_decision
|
||||
|
||||
# ─── Arrêt propre ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _shutdown(sig, frame):
|
||||
"""Gestionnaire de signal SIGTERM/SIGINT : journalise l'arrêt et quitte proprement."""
|
||||
log_info(f"Signal {sig} reçu — arrêt propre.")
|
||||
log_decision('SERVICE_STOP', 'shutdown', '', {'signal': sig})
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
signal.signal(signal.SIGTERM, _shutdown)
|
||||
signal.signal(signal.SIGINT, _shutdown)
|
||||
|
||||
# ─── Health check ───────────────────────────────────────────────────────────
|
||||
|
||||
_service_healthy = True
|
||||
_health_lock = threading.Lock()
|
||||
|
||||
|
||||
def set_healthy(healthy: bool):
|
||||
"""Met à jour l'état de santé du service (thread-safe)."""
|
||||
global _service_healthy
|
||||
with _health_lock:
|
||||
_service_healthy = healthy
|
||||
|
||||
|
||||
def is_healthy() -> bool:
|
||||
"""Retourne l'état de santé courant."""
|
||||
with _health_lock:
|
||||
return _service_healthy
|
||||
|
||||
|
||||
class _HealthHandler(BaseHTTPRequestHandler):
|
||||
"""Gestionnaire HTTP minimal pour le point de santé du service."""
|
||||
|
||||
def do_GET(self):
|
||||
"""Répond à la requête GET : renvoie 200 OK ou 503 DEGRADED selon l'état du service."""
|
||||
healthy = is_healthy()
|
||||
code = 200 if healthy else 503
|
||||
self.send_response(code)
|
||||
self.end_headers()
|
||||
self.wfile.write(b'OK' if healthy else b'DEGRADED')
|
||||
|
||||
def log_message(self, *args):
|
||||
"""Supprime les logs HTTP internes pour ne pas polluer la sortie standard."""
|
||||
pass
|
||||
|
||||
|
||||
threading.Thread(
|
||||
target=lambda: HTTPServer(('', HEALTH_PORT), _HealthHandler).serve_forever(),
|
||||
daemon=True
|
||||
).start()
|
||||
|
||||
# ─── Client ClickHouse ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_client():
|
||||
"""Return the shared ja4_common ClickHouse client, reconnecting on ping failure."""
|
||||
return _ja4_get_client().connect()
|
||||
|
||||
|
||||
def score_to_threat_level(score: float) -> str:
|
||||
"""Convertit un score d'anomalie brut IsolationForest en niveau de menace textuel.
|
||||
|
||||
Seuils : CRITICAL < −0.30 | HIGH < −0.15 | MEDIUM < −0.05 | LOW < 0 | NORMAL ≥ 0.
|
||||
"""
|
||||
if score < -0.30:
|
||||
return 'CRITICAL'
|
||||
if score < -0.15:
|
||||
return 'HIGH'
|
||||
if score < -0.05:
|
||||
return 'MEDIUM'
|
||||
if score < 0:
|
||||
return 'LOW'
|
||||
return 'NORMAL'
|
||||
Reference in New Issue
Block a user