Background réaliste CsI(Tl) + hybridation mesuré/synthétique + dashboard continuum

- Remplace le continuum exponentiel par un modèle réaliste CsI(Tl) dans l'entraînement (bosse asymétrique ~110 keV + queue Compton) - Ajoute l'injection de background mesuré (70% mesuré / 30% synthétique) via --measured_background et MEASURED_BACKGROUND_PATH - Ajoute l'endpoint /api/background/continuum et le toggle "Continuum CsI" sur le dashboard background - Exclut le canal 1023 (overflow bin) de l'affichage web (NUM_CHANNELS=1023) - Corrige le lissage Gaussien du background (normalisation locale aux bords) - Met à jour README.md, CLAUDE.md, TUTORIEL.md, TOTO.md, vega_ml/README.md Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 18:14:00 +02:00
parent 1e0c1a5ea5
commit 75d271c696
17 changed files with 917 additions and 224 deletions
--- a/web/app/config.py
+++ b/web/app/config.py
@ -10,7 +10,7 @@ ISOTOPE_INDEX_PATH = Path(os.environ.get("ISOTOPE_INDEX_PATH", "/models/vega_iso

 ENERGY_OFFSET = float(os.environ.get("ENERGY_CALIBRATION_OFFSET", "0.33"))
 ENERGY_SLOPE = float(os.environ.get("ENERGY_CALIBRATION_SLOPE", "2.97"))
-NUM_CHANNELS = 1024
+NUM_CHANNELS = 1023  # Last channel (1023) is overflow bin, excluded from display


 def energy_axis():
--- a/web/app/routers/background.py
+++ b/web/app/routers/background.py
@ -1,24 +1,41 @@
 import json
 from fastapi import APIRouter, HTTPException
 from app.config import BACKGROUND_SNAPSHOT_PATH, BACKGROUND_PATH, energy_axis, NUM_CHANNELS
+from app.theoretical_bg import generate_theoretical_bg, generate_continuum_only
 import numpy as np

 router = APIRouter()


-@router.get("")
-async def get_background_info():
-    """Background metadata: elapsed time, CPS, top peaks."""
+def _load_snapshot():
+    """Load the live snapshot file, or raise 404."""
    if not BACKGROUND_SNAPSHOT_PATH.exists():
        raise HTTPException(status_code=404, detail="Background capture not available yet")
-
    try:
        with open(BACKGROUND_SNAPSHOT_PATH) as f:
-            snapshot = json.load(f)
+            return json.load(f)
    except (json.JSONDecodeError, OSError):
        raise HTTPException(status_code=500, detail="Background snapshot file corrupt")

-    # Check if full background is available
+
+def _load_reference():
+    """Load the 24h reference background, or return None."""
+    if not BACKGROUND_PATH.exists():
+        return None
+    try:
+        bg_data = np.load(str(BACKGROUND_PATH), allow_pickle=True).item()
+        return {
+            "counts": [round(float(c), 1) for c in bg_data["counts"][:NUM_CHANNELS]],
+            "live_time_s": round(float(bg_data["duration"]), 1),
+        }
+    except Exception:
+        return None
+
+
+@router.get("")
+async def get_background_info():
+    """Background metadata: elapsed time, CPS, top peaks."""
+    snapshot = _load_snapshot()
    full_available = BACKGROUND_PATH.exists()

    return {
@ -33,34 +50,46 @@ async def get_background_info():

@router.get("/spectrum")
 async def get_background_spectrum():
-    """Full background spectrum with energy axis."""
-    if not BACKGROUND_SNAPSHOT_PATH.exists():
-        raise HTTPException(status_code=404, detail="Background capture not available yet")
-
-    try:
-        with open(BACKGROUND_SNAPSHOT_PATH) as f:
-            snapshot = json.load(f)
-    except (json.JSONDecodeError, OSError):
-        raise HTTPException(status_code=500, detail="Background snapshot file corrupt")
-
-    counts = snapshot.get("spectrum", [0] * NUM_CHANNELS)
-
-    # If full background file exists, use it for better data
-    if BACKGROUND_PATH.exists():
-        try:
-            bg_data = np.load(str(BACKGROUND_PATH), allow_pickle=True).item()
-            counts = [round(float(c), 1) for c in bg_data["counts"]]
-            live_time = float(bg_data["duration"])
-        except Exception:
-            live_time = snapshot.get("live_time_s", 0)
-    else:
-        live_time = snapshot.get("live_time_s", 0)
+    """Live background spectrum (from snapshot) with energy axis."""
+    snapshot = _load_snapshot()
+    live_time = snapshot.get("live_time_s", 0)

    return {
        "channels": list(range(NUM_CHANNELS)),
        "energy_kev": energy_axis(),
-        "counts": counts,
+        "counts": snapshot.get("spectrum", [0] * 1024)[:NUM_CHANNELS],
        "live_time_s": live_time,
        "cps": snapshot.get("cps", 0),
        "top_peaks": snapshot.get("top_peaks", []),
-    }
+        "reference_available": BACKGROUND_PATH.exists(),
+    }
+
+
+@router.get("/reference")
+async def get_background_reference():
+    """24h reference background spectrum for overlay comparison."""
+    ref = _load_reference()
+    if ref is None:
+        raise HTTPException(status_code=404, detail="No 24h reference background available")
+
+    return {
+        "channels": list(range(NUM_CHANNELS)),
+        "energy_kev": energy_axis(),
+        "counts": ref["counts"],
+        "live_time_s": ref["live_time_s"],
+    }
+
+
+@router.get("/theoretical")
+async def get_theoretical_bg(cps: float = 6.0, live_time_s: float = 3600.0):
+    """Theoretical natural background spectrum (K-40, U-238 chain, Th-232 chain)."""
+    return generate_theoretical_bg(cps=cps, live_time_s=live_time_s)
+
+
+@router.get("/continuum")
+async def get_continuum(cps: float = 6.0, live_time_s: float = 3600.0):
+    """CsI(Tl) continuum shape only (hump + Compton tail, no photopeaks, no noise).
+
+    Matches the model used in training (generate_realistic_continuum).
+    """
+    return generate_continuum_only(cps=cps, live_time_s=live_time_s)
--- a/web/app/routers/spectrum.py
+++ b/web/app/routers/spectrum.py
@ -29,7 +29,7 @@ async def get_current_spectrum():
        "isotopes_detected": state.get("isotopes_detected", []),
        "channels": list(range(NUM_CHANNELS)),
        "energy_kev": energy_axis(),
-        "counts": state.get("counts", [0] * NUM_CHANNELS),
+        "counts": state.get("counts", [0] * 1024)[:NUM_CHANNELS],
    }


@ -45,7 +45,7 @@ async def get_difference_spectrum():
    except (json.JSONDecodeError, OSError):
        raise HTTPException(status_code=503, detail="Monitor state file corrupt")

-    counts = np.array(state.get("counts", [0] * NUM_CHANNELS), dtype=np.float64)
+    counts = np.array(state.get("counts", [0] * 1024), dtype=np.float64)[:NUM_CHANNELS]
    live_time = state.get("cumulated_live_time_s", 0)

    if live_time <= 0:
@ -55,7 +55,7 @@ async def get_difference_spectrum():

    if BACKGROUND_PATH.exists():
        bg_data = np.load(str(BACKGROUND_PATH), allow_pickle=True).item()
-        bg_counts = bg_data["counts"].astype(np.float64)
+        bg_counts = bg_data["counts"].astype(np.float64)[:NUM_CHANNELS]
        bg_live_time = float(bg_data["duration"])
        bg_rate = bg_counts / bg_live_time
        net_rate = np.clip(rate - bg_rate, 0, None)
@ -72,5 +72,5 @@ async def get_difference_spectrum():
        "channels": list(range(NUM_CHANNELS)),
        "energy_kev": energy_axis(),
        "counts": [round(float(c), 1) for c in net_counts],
-        "raw_counts": state.get("counts", []),
+        "raw_counts": state.get("counts", [])[:NUM_CHANNELS],
    }
--- a/web/app/theoretical_bg.py
+++ b/web/app/theoretical_bg.py
@ -0,0 +1,139 @@
+"""
+Theoretical natural background spectrum for CsI(Tl) detectors (Radiacode 103).
+
+Shape calibrated against real Radiacode 103 background measurements.
+The CsI(Tl) crystal (1 cm³, 8.4% FWHM) produces a spectrum with:
+- A dominant low-energy hump peaking around 100-120 keV
+- Exponential decay at higher energies
+- Subtle photopeaks from natural isotopes
+"""
+
+import numpy as np
+from app.config import ENERGY_OFFSET, ENERGY_SLOPE, NUM_CHANNELS
+
+
+# Photopeak lines: (energy_keV, relative_weight)
+# Weights tuned so peaks are visible above local continuum at typical CPS
+NATURAL_BG_LINES = [
+    (295.22, 0.10),   # Pb-214
+    (351.93, 0.18),   # Pb-214
+    (609.31, 0.15),   # Bi-214
+    (911.20, 0.08),   # Ac-228
+    (968.97, 0.05),   # Ac-228
+    (1120.29, 0.06),  # Bi-214
+    (1460.83, 0.12),  # K-40
+    (1764.49, 0.08),  # Bi-214
+    (2614.51, 0.18),  # Tl-208
+]
+
+
+def _gaussian(x, center, sigma, amplitude):
+    return amplitude * np.exp(-0.5 * ((x - center) / sigma) ** 2)
+
+
+def generate_theoretical_bg(cps: float = 6.0, live_time_s: float = 3600.0):
+    channels = np.arange(NUM_CHANNELS, dtype=np.float64)
+    energy_axis = ENERGY_OFFSET + ENERGY_SLOPE * channels
+    total_counts = cps * live_time_s
+
+    # ── 1. Main hump: asymmetric peak at ~105 keV ──
+    # Real data: rises from ~60 at 10keV to ~280 at 100-120keV, then falls
+    hump_center = 110.0
+    hump = np.zeros(NUM_CHANNELS, dtype=np.float64)
+    low_mask = energy_axis <= hump_center
+    hump[low_mask] = _gaussian(energy_axis[low_mask], hump_center, 55.0, 1.0)
+    hump[~low_mask] = _gaussian(energy_axis[~low_mask], hump_center, 50.0, 1.0)
+
+    # ── 2. Compton continuum tail ──
+    # Real data: ~136@200, ~80@250, ~44@295, ~14@400, ~5@600
+    tail = 0.45 * np.exp(-energy_axis / 240) + 0.04 * np.exp(-energy_axis / 700)
+
+    # ── 3. Low-energy noise floor ──
+    noise_floor = 0.008
+
+    # ── 4. Combine continuum ──
+    continuum = hump + tail + noise_floor
+
+    # ── 5. Photopeaks ──
+    # CsI(Tl) 8.4% FWHM at 662 keV, scaling as sqrt(E)
+    # sigma(E) = FWHM(E) / 2.355 = 0.084 * sqrt(E * 662) / 662 / 2.355
+    # Simplified: sigma = 23.6 * sqrt(E/662) keV
+    def sigma_keV(E):
+        return max(12.0, 23.6 * np.sqrt(max(E, 1.0) / 662.0))
+
+    peak_frac = 0.08  # 8% of total counts in resolved photopeaks
+    total_weight = sum(w for _, w in NATURAL_BG_LINES)
+
+    peaks = np.zeros(NUM_CHANNELS, dtype=np.float64)
+    for line_energy, weight in NATURAL_BG_LINES:
+        sig = sigma_keV(line_energy)
+        peak_counts = total_counts * peak_frac * (weight / total_weight)
+        amplitude = peak_counts / (sig * np.sqrt(2 * np.pi))
+        peaks += _gaussian(energy_axis, line_energy, sig, amplitude)
+
+    # ── 6. Combine and normalize ──
+    raw = continuum + peaks / total_counts  # peaks normalized later
+    raw *= total_counts / raw.sum()
+
+    # ── 7. Poisson-like noise ──
+    rng = np.random.default_rng(42)
+    noise = rng.normal(0, 1, NUM_CHANNELS) * np.sqrt(np.maximum(raw, 1.0)) * 0.25
+    raw += noise
+
+    # Floor at 0.9 for log scale
+    spectrum = np.clip(raw, 0.9, None)
+
+    key_lines = [
+        (295.22, "Pb-214"), (351.93, "Pb-214"),
+        (609.31, "Bi-214"), (911.20, "Ac-228"),
+        (1120.29, "Bi-214"), (1460.83, "K-40"),
+        (1764.49, "Bi-214"), (2614.51, "Tl-208"),
+    ]
+
+    return {
+        "energy_kev": [round(float(E), 2) for E in energy_axis],
+        "counts": [round(float(c), 1) for c in spectrum],
+        "cps": round(cps, 2),
+        "live_time_s": round(live_time_s, 1),
+        "lines": [
+            {"energy_keV": E, "name": name} for E, name in key_lines
+        ],
+    }
+
+
+def generate_continuum_only(cps: float = 6.0, live_time_s: float = 3600.0):
+    """Generate only the CsI(Tl) continuum shape (no photopeaks, no noise).
+
+    This matches the model used in training (generate_realistic_continuum in
+    spectrum_physics.py) for direct comparison with measured backgrounds.
+    """
+    channels = np.arange(NUM_CHANNELS, dtype=np.float64)
+    energy_axis = ENERGY_OFFSET + ENERGY_SLOPE * channels
+    total_counts = cps * live_time_s
+
+    # Asymmetric hump at ~110 keV
+    hump_center = 110.0
+    hump = np.where(
+        energy_axis <= hump_center,
+        np.exp(-0.5 * ((energy_axis - hump_center) / 55.0) ** 2),
+        np.exp(-0.5 * ((energy_axis - hump_center) / 50.0) ** 2),
+    )
+
+    # Compton continuum tail
+    tail = 0.45 * np.exp(-energy_axis / 240.0) + 0.04 * np.exp(-energy_axis / 700.0)
+
+    # Noise floor
+    noise_floor = 0.008
+
+    continuum = hump + tail + noise_floor
+
+    # Normalize to target total counts
+    if continuum.sum() > 0 and total_counts > 0:
+        continuum *= total_counts / continuum.sum()
+
+    return {
+        "energy_kev": [round(float(E), 2) for E in energy_axis],
+        "counts": [round(float(c), 1) for c in continuum],
+        "cps": round(cps, 2),
+        "live_time_s": round(live_time_s, 1),
+    }