diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..8347a5a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,78 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Radiacode 103 is a gamma-ray spectrometer isotope identification pipeline. It captures spectra from a Radiacode 103 USB detector, subtracts background radiation, and identifies isotopes using a CNN-FCNN multi-task PyTorch model (VegaModel, 34.5M params, 82 isotopes). The project runs as Docker containers orchestrated by docker-compose. + +## Architecture + +Three Docker containers, each with its own Dockerfile: + +- **train/** — Generates 50k synthetic spectra and trains VegaModel on GPU. Entrypoint runs generation then training sequentially. Code lives in `train/vega_ml/` (synthetic_spectra, training/vega). +- **detect/** — Production monitor. Connects to Radiacode 103 via USB, samples every 60s, accumulates spectrum, subtracts background, runs inference, writes JSON state and daily reports. Two scripts: `radiacode_monitor.py` (main loop) and `capture_background.py` (24h background capture). +- **web/** — FastAPI dashboard on port 8080. Serves a single-page HTML/JS frontend with tabs for spectrum, background, CPS timeline, and history. Reads monitor state from JSON files written by the detect container. + +Data flow: `detect` writes `monitor_state.json` + `cps_log.jsonl` + daily reports to `/data/` and `/logs/` → `web` reads them (read-only volume mounts). The `train` container reads/writes `/data/synthetic/` and `/models/`. + +### Web API Routes + +- `/api/status` — monitor status (connected, CPS, staleness) +- `/api/spectrum/current` — accumulated spectrum (1023 channels, overflow channel excluded) +- `/api/spectrum/difference` — background-subtracted spectrum +- `/api/background`, `/api/background/spectrum`, `/api/background/reference`, `/api/background/theoretical` — background data (live, 24h reference, theoretical CsI(Tl) model) +- `/api/cps/timeline` — CPS time series +- `/api/history`, `/api/history/{date}` — daily detection reports + +### Key Physics Constants + +Energy calibration: `E(keV) = 0.33 + 2.97 * channel_index` (env vars `ENERGY_CALIBRATION_OFFSET` and `ENERGY_CALIBRATION_SLOPE`). The detector has 1024 raw channels but channel 1023 is an overflow bin — only the first 1023 channels (20–3036 keV) are used for display and inference. CsI(Tl) crystal with 8.4% FWHM at 662 keV. + +## Commands + +```bash +# Build all images +docker compose build + +# Train model (GPU required, ~45 min on RTX 5060 Ti) +docker compose run --rm train + +# Capture 24h background (leave running, no radioactive source nearby) +docker compose run --rm -d --name radiacode-bg detect python capture_background.py + +# Start continuous detection monitor +docker compose up detect + +# Start web dashboard +docker compose up web + +# Run both detect and web +docker compose up detect web +``` + +No test suite exists in this project. No linter is configured. + +## VegaModel + +Defined in `train/vega_ml/training/vega/model.py`. Input: 1D spectrum (1023 channels, normalized to max). Output: classification logits (82 isotopes, apply sigmoid for probabilities) + activity predictions (Bq, scaled by max_activity_bq=1000). Loss: `VegaLoss = BCE(logits) + 0.1 * Huber(activities * mask)` — regression only penalizes present isotopes. + +The model checkpoint (`models/vega_best.pt`) stores `model_config` and `model_state_dict`. At inference, the detect container dynamically imports `VegaModel` and `IsotopeIndex` from the mounted `vega_ml` volume. + +## Synthetic Background Model + +The training background uses a realistic CsI(Tl) continuum shape (not a simple exponential): + +- **Continuum**: Asymmetric hump at ~110 keV (sigma_left=55, sigma_right=50 keV) + Compton tail (`0.45*exp(-E/240) + 0.04*exp(-E/700)`) + noise floor. Calibrated against real Radiacode 103 measurements. Implemented in `spectrum_physics.py::generate_realistic_continuum()`. +- **Isotope peaks**: K-40 (1460 keV), Pb-214 (295, 352 keV), Bi-214 (609, 1120, 1764 keV), Ac-228 (911 keV), Pb-212 (239 keV), Tl-208 (583, 2614 keV) — with stochastic activity variation per sample. +- **Hybrid training**: If `MEASURED_BACKGROUND_PATH` points to a valid `.npy` file, 70% measured + 30% synthetic continuum is used. This is controlled by `SpectrumConfig.measured_background_path` and the `--measured_background` CLI argument. + +## Configuration + +All config is via environment variables in `docker-compose.yml`. Key variables: +- `MODEL_PATH`, `ISOTOPE_INDEX_PATH`, `BACKGROUND_PATH` — file paths (container-mounted volumes) +- `VEGA_DEVICE` — `cpu` or `cuda` +- `THRESHOLD` — detection probability threshold (default 0.5) +- `SAMPLE_INTERVAL` — seconds between samples (default 60) +- `ENERGY_CALIBRATION_OFFSET/SLOPE` — energy calibration constants +- `MEASURED_BACKGROUND_PATH` — path to measured background `.npy` for hybrid training (default: `/data/background_24h.npy`) \ No newline at end of file diff --git a/README.md b/README.md index 237a404..b86bccc 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,14 @@ Radiacode 103 (USB) │ └── Rapport quotidien a 00h00 │ │ │ │ Modele: vega_best.pt (entraite sur RTX 5060 Ti) │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Conteneur web (FastAPI + Chart.js) │ +│ │ +│ Dashboard :8080 — Spectre en temps reel, │ +│ background, CPS, historique, isotopes detectes │ └─────────────────────────────────────────────────────────┘ ``` @@ -35,25 +43,48 @@ docker compose run --rm train # 3. Capturer le bruit de fond (24h, sans source radioactive) docker compose run --rm -d --name radiacode-bg detect python capture_background.py -# 4. Lancer la detection continue -docker compose up detect +# 4. Lancer la detection continue + dashboard +docker compose up detect web ``` ## Configuration Variables d'environnement (dans `docker-compose.yml`) : +### Entrainement (service `train`) + +| Variable | Defaut | Description | +|----------|--------|-------------| +| `NUM_SAMPLES` | `50000` | Nombre de spectres synthetiques | +| `EPOCHS` | `100` | Epochs max d'entrainement | +| `BATCH_SIZE` | `64` | Taille de batch | +| `LEARNING_RATE` | `0.001` | Taux d'apprentissage initial | +| `DETECTOR` | `radiacode_103` | Config du detecteur | +| `MIN_DURATION` | `43200` | Duree min des spectres (12h en s) | +| `MAX_DURATION` | `86400` | Duree max des spectres (24h en s) | +| `SEED` | `42` | Graine aleatoire | +| `MEASURED_BACKGROUND_PATH` | `/data/background_24h.npy` | Background mesure pour entrainement hybride | + +### Detection (service `detect`) + | Variable | Defaut | Description | |----------|--------|-------------| | `MODEL_PATH` | `/models/vega_best.pt` | Chemin du modele PyTorch | | `ISOTOPE_INDEX_PATH` | `/models/vega_isotope_index.txt` | Index des 82 isotopes | | `BACKGROUND_PATH` | `/data/background_24h.npy` | Fichier background de reference | | `THRESHOLD` | `0.5` | Seuil de probabilite pour la detection | -| `SAMPLE_INTERVAL` | `60` | Intervalle d'echantillonnage (secondes) | +| `SAMPLE_INTERVAL` | `60` | Intervalle d'echantillonnage (s) | | `REPORT_HOUR` | `0` | Heure du rapport quotidien | -| `MIN_LIVE_TIME` | `3600` | Live time minimum pour un rapport (secondes) | +| `MIN_LIVE_TIME` | `3600` | Live time minimum pour un rapport (s) | | `VEGA_DEVICE` | `cpu` | Device PyTorch (`cpu` ou `cuda`) | +### Dashboard (service `web`) + +| Variable | Defaut | Description | +|----------|--------|-------------| +| `ENERGY_CALIBRATION_OFFSET` | `0.33` | Calibration energetique (offset keV) | +| `ENERGY_CALIBRATION_SLOPE` | `2.97` | Calibration energetique (pente keV/canal) | + ## Bruit Poissonnien et modele ### Physique du bruit @@ -76,12 +107,19 @@ Le VegaModel est un CNN-FCNN multi-tache inspire de l'architecture Vega d'Open-R - **Classification** : 82 neurones avec sigmoid (presence/absence de chaque isotope) - **Regression** : 82 neurones (activite estimee en Bq, normalisee a max_activity_bq=1000) - **Architecture** : - - 3 blocs CNN (64, 128, 256 canaux) avec BatchNorm + ReLU + MaxPool + - 3 blocs CNN (64, 128, 256 canaux) avec BatchNorm + LeakyReLU + MaxPool - 2 couches FC (512, 256) avec Dropout(0.3) - **34 493 156 parametres** au total -- **Fonction de perte** : VegaLoss = classification_weight * BCE + regression_weight * MSE (ponderee pour ne penaliser l'activite que sur les isotopes presents) +- **Fonction de perte** : VegaLoss = classification_weight * BCE + regression_weight * Huber (ponderee pour ne penaliser l'activite que sur les isotopes presents) - **Entrainement** : 50 000 spectres synthetiques, 100 epochs, AMP (mixed precision), early stopping (patience=10) -- **Background dans les donnees synthetiques** : K-40, radon (Pb-214, Bi-214), thorium (Ac-228, Pb-212, Tl-208) simules avec des activites aleatoires realistes + +### Background d'entrainement + +Le background synthetique utilise un modele realiste calibre sur les mesures du Radiacode 103 : + +- **Continuum CsI(Tl)** : Bosse asymetrique a ~110 keV (sigma_gauche=55 keV, sigma_droite=50 keV) + queue Compton exponentielle (0.45*exp(-E/240) + 0.04*exp(-E/700)) + plancher de bruit. Ce modele remplace l'ancien continuum exponentiel qui ne reproduisait pas la forme reelle du spectre CsI(Tl). +- **Pics environnementaux** : K-40 (1460 keV), Pb-214 (295, 352 keV), Bi-214 (609, 1120, 1764 keV), Ac-228 (911 keV), Pb-212 (239 keV), Tl-208 (583, 2614 keV), avec activites aleatoires realistes +- **Entrainement hybride** : Si le fichier `background_24h.npy` est disponible, 70% du background est issu de la mesure reelle (mélange 70% mesuré / 30% synthétique) et 30% du modele synthetique, avec variation stochastique des pics par-dessus. Ceci ameliore la robustesse du modele face aux variations locales du background. ### Spectres synthetiques @@ -89,8 +127,8 @@ Les donnees d'entrainement simulent la physique complete : 1. **Pics photoelectriques** : Gaussiennes avec FWHM dependant de l'energie (8.4% a 662 keV pour le CsI(Tl)) 2. **Continuum Compton** : distribution de Klein-Nishina simplifiee sous chaque pic 3. **Bruit Poissonnien** : echantillonnage Poisson(N) pour chaque canal, simulant les fluctuations de comptage reelles -4. **Background environnemental** : continuum exponentiel + pics de K-40, radon, thorium avec activites aleatoires -5. **Efficacite du detecteur** : modele phenomenologique qui decroit avec l'energie (absorption basse energie + penetration haute energie) +4. **Background environnemental** : Continuum CsI(Tl) realiste + pics de K-40, radon, thorium avec activites aleatoires +5. **Efficacite du detecteur** : modele phenomenologique qui decroit avec l'energie 6. **Durees de 12-24h** : suffisamment longues pour que le rapport signal/bruit soit comparable aux mesures reelles ### Soustraction du background a l'inference @@ -105,8 +143,8 @@ normalized = net_rate / net_rate.max() # normalisation pour le mod ``` Cette approche hybride est optimale : -- Le modele apprend a ignorer les pics du background (K-40, radon, thorium) pendant l'entrainement -- La soustraction reelle elimine les variations locales du background (emplacement, altitude, materiaux) +- Le modele apprend a ignorer les pics du background pendant l'entrainement +- La soustraction reelle elimine les variations locales du background - Resultat : meilleure sensibilite et moins de faux positifs Le conteneur `train` execute deux phases : @@ -134,6 +172,20 @@ Le rapport contient : - CPS moyen - Isotopes detectes avec probabilite et activite estimee (Bq) +## Dashboard web + +Le conteneur `web` expose un dashboard sur le port 8080 avec : + +- **Onglet Spectre** : Spectre cumule en temps reel (lineaire ou log), soustraction du background, lignes d'energie des isotopes detectes, overlay du background de reference +- **Onglet Background** : Spectre du bruit de fond (live et 24h), modele theorique CsI(Tl), pics detectes, statistiques (duree, CPS, comptages) +- **Onglet CPS** : Evolution du comptage par seconde dans le temps, de 1h a 30 jours +- **Onglet Historique** : Liste des rapports quotidiens avec isotopes detectes + +### Points techniques du dashboard + +- Le canal 1024 (bin de debordement) est exclu de l'affichage — seuls les 1023 premiers canaux sont utilises (20-3036 keV) +- Le lissage du spectre (Gaussienne sigma=8 canaux) utilise une normalisation locale aux bords pour eviter les artefacts + ## Capture du bruit de fond Avant la detection, capturer le background pendant 24h sans source radioactive a proximite : @@ -155,30 +207,54 @@ cat data/background_snapshot.json ``` radiacode_103/ ├── docker-compose.yml # Orchestration des conteneurs +├── CLAUDE.md # Guide pour Claude Code +├── TUTORIEL.md # Tutoriel detaille ├── TOTO.md # Suivi des taches ├── README.md ├── train/ │ ├── Dockerfile # PyTorch 2.7.0 + CUDA 12.8 (Blackwell) -│ ├── requirements.txt │ ├── entrypoint.sh # Generation + entrainement -│ └── vega_ml/ # Code VegaModel (copie d'Open-RadiaCode-Android) +│ ├── requirements.txt +│ └── vega_ml/ # Code VegaModel │ ├── synthetic_spectra/ # Generateur de spectres synthetiques +│ │ ├── config.py # Configurations detecteur (Radiacode 101-110) +│ │ ├── generator.py # Generateur principal (SpectrumConfig) +│ │ ├── physics/ +│ │ │ └── spectrum_physics.py # Physique + background realiste CsI(Tl) +│ │ └── ground_truth/ # Base de donnees 82 isotopes │ ├── training/vega/ # Modele, dataset, trainer -│ └── inference/ +│ └── inference/ # Inference ├── detect/ │ ├── Dockerfile # Python 3.11-slim + radiacode + torch CPU │ ├── requirements.txt -│ ├── radiacode_monitor.py # Moniteur principal -│ └── capture_background.py # Capture du bruit de fond 24h +│ ├── radiacode_monitor.py # Moniteur principal +│ └── capture_background.py # Capture du bruit de fond 24h +├── web/ +│ ├── Dockerfile # Python 3.11-slim + FastAPI +│ ├── requirements.txt +│ ├── app/ +│ │ ├── main.py # FastAPI app + routes +│ │ ├── config.py # Config (canaux, calibration energetique) +│ │ ├── routers/ +│ │ │ ├── status.py # /api/status +│ │ │ ├── spectrum.py # /api/spectrum/current, /api/spectrum/difference +│ │ │ ├── background.py # /api/background/*, background theorique +│ │ │ ├── cps.py # /api/cps/timeline +│ │ │ └── history.py # /api/history, /api/history/{date} +│ │ └── theoretical_bg.py # Modele theorique CsI(Tl) pour le dashboard +│ └── static/ +│ ├── index.html # Dashboard SPA +│ ├── css/style.css +│ └── js/ # app.js, spectrum.js, background.js, cps.js, history.js ├── data/ -│ ├── synthetic/spectra/ # 50 000 spectres synthetiques (~4.2 Go) +│ ├── synthetic/spectra/ # 50 000 spectres synthetiques (~4.2 Go) │ └── background_snapshot.json ├── models/ -│ ├── vega_best.pt # Meilleur modele (395 Mo) -│ ├── vega_final.pt # Modele final -│ ├── vega_history.json # Metriques d'entrainement -│ └── vega_isotope_index.txt # 82 isotopes -└── logs/ # Rapports quotidiens JSON +│ ├── vega_best.pt # Meilleur modele (395 Mo) +│ ├── vega_final.pt # Modele final +│ ├── vega_history.json # Metriques d'entrainement +│ └── vega_isotope_index.txt # 82 isotopes +└── logs/ # Rapports quotidiens JSON ``` ## Materiel diff --git a/TOTO.md b/TOTO.md index 4df5853..3b36ea4 100644 --- a/TOTO.md +++ b/TOTO.md @@ -4,21 +4,23 @@ | Etape | Statut | Detail | |-------|--------|--------| -| Build Docker | Fait | train + detect | +| Build Docker | Fait | train + detect + web | | Generation spectres synthetiques | Fait | 50 000 echantillons (1D, 4.2 Go) | | Entrainement VegaModel | Fait | 100 epochs, val loss 0.0051, val acc 99.89% | | Modele sauvegarde | Fait | `models/vega_best.pt` (395 Mo), 82 isotopes | -| Capture background 24h | En cours | 0.2h/24h, 6.5 CPS | -| Detection continue | Pas encore | Apres background 24h | -| Test avec source | Pas encore | Apres detection continue | +| Capture background 24h | Fait | Background mesure disponible | +| Detection continue | Fait | Moniteur avec soustraction du background | +| Dashboard web | Fait | FastAPI + Chart.js, 4 onglets (spectre, background, CPS, historique) | +| Background realiste (entrainement) | Fait | Continuum CsI(Tl) + hybride mesuré/synthétique | +| Canal de debordement exclu | Fait | 1023 canaux (ch 1023 overflow exclu) | ## Prochaines etapes -- [ ] Attendre fin de la capture background 24h (conteneur `radiacode-bg` en cours) -- [ ] Lancer le moniteur : `docker compose up detect` +- [ ] Re-entrainer le modele avec le background realiste CsI(Tl) + hybridation du background mesure - [ ] Tester avec une source radioactive connue (Cs-137) -- [] Nettoyer les checkpoints d'epochs dans `models/` (garder seulement `vega_best.pt`, `vega_final.pt`, `vega_history.json`, `vega_isotope_index.txt`) +- [ ] Nettoyer les checkpoints d'epochs dans `models/` (garder seulement `vega_best.pt`, `vega_isotope_index.txt`) - [ ] Transfer vers Pi 4 pour la production +- [ ] Ajouter la courbe de continuum CsI(Tl) sur l'interface web background ## Bugs corriges @@ -27,4 +29,8 @@ - PyTorch 2.4 ne supporte pas sm_120 (Blackwell/RTX 5060 Ti) -> PyTorch 2.7.0 + CUDA 12.8 - DataParallel incompatible entre GPU d'architectures differentes (4060 Ti Ada + 5060 Ti Blackwell) -> mono-GPU - `radiacode` depend de `bluepy` (BLE) qui ne compile pas dans `python:3.11-slim` -> ajoute `build-essential libglib2.0-dev` -- Volume `./data` monte en read-only dans detect -> passe en read-write pour le snapshot JSON \ No newline at end of file +- Volume `./data` monte en read-only dans detect -> passe en read-write pour le snapshot JSON +- Canal 1023 (overflow bin) affiche comme un pic a 3039 keV -> exclus de l'affichage (NUM_CHANNELS=1023) +- Lissage Gaussien du background creait un artefact aux bords -> normalisation locale du noyau au lieu de reinjecter data[i] +- Background d'entrainement exponentiel ne ressemblait pas au spectre CsI(Tl) reel -> remplace par modele realiste (bosse asymetrique a 110 keV + queue Compton) +- Ajout de l'entrainement hybride : 70% background mesure + 30% synthetique quand `background_24h.npy` est disponible \ No newline at end of file diff --git a/TUTORIEL.md b/TUTORIEL.md index bd41aac..da28a4e 100644 --- a/TUTORIEL.md +++ b/TUTORIEL.md @@ -80,29 +80,40 @@ Les spectres synthetiques simulent des acquisitions de 12 a 24 heures (43200-864 - **Pics photoelectriques** : Gaussiennes dont la largeur (FWHM) depend de l'energie. Pour le CsI(Tl) du Radiacode 103, FWHM = 8.4% a 662 keV, avec la relation FWHM(E) = FWHM_662 * sqrt(E/662). - **Continuum Compton** : Distribution de Klein-Nishina simplifiee sous chaque pic, simulant la diffusion des photons gamma. - **Bruit Poissonnien** : Chaque canal est echantillonne depuis une loi Poisson(lambda), ce qui reproduit exactement les fluctuations statistiques reelles. -- **Background environnemental** : Continuum exponentiel + pics de K-40 (1460 keV), Pb-214 (295, 352 keV), Bi-214 (609, 1120, 1764 keV), Ac-228 (911 keV), Pb-212 (239 keV), Tl-208 (583, 2614 keV), avec des activites aleatoires realistes. +- **Background environnemental** : Continuum CsI(Tl) realiste (bosse asymetrique a ~110 keV + queue Compton exponentielle) + pics de K-40 (1460 keV), Pb-214 (295, 352 keV), Bi-214 (609, 1120, 1764 keV), Ac-228 (911 keV), Pb-212 (239 keV), Tl-208 (583, 2614 keV), avec des activites aleatoires realistes. - **Efficacite du detecteur** : Modele phenomenologique tenant compte de l'absorption basse energie et de la penetration haute energie du scintillateur CsI(Tl) de 1 cm3. +**Entrainement hybride (optionnel mais recommande) :** + +Si vous avez capture un fichier `background_24h.npy`, le generateur peut l'utiliser pour rendre les spectres synthetiques plus realistes. Le background mesure est melange a 70% avec le modele synthetique (30%), et les pics isotopiques sont ajoutes avec variation aleatoire par-dessus. + +```bash +# Avec background mesure (recommande si disponible) +docker compose run --rm train +# Le fichier /data/background_24h.npy est automatiquement utilise +# grace a MEASURED_BACKGROUND_PATH dans docker-compose.yml +``` + ### Phase 2 : Entrainement du VegaModel (~35 min sur RTX 5060 Ti) Le modele VegaModel est un CNN-FCNN multi-tache : ``` Entree : spectre 1D (1023 canaux, 20-3000 keV) - │ - ├── Bloc CNN 1 : Conv1d(1, 64, 7) → BN → ReLU → MaxPool - ├── Bloc CNN 2 : Conv1d(64, 128, 5) → BN → ReLU → MaxPool - ├── Bloc CNN 3 : Conv1d(128, 256, 3) → BN → ReLU → MaxPool - │ - ├── Tete classification : FC(256→512→256→82) → Sigmoid - │ 82 isotopes, probabilite de presence [0, 1] - │ - └── Tete regression : FC(256→512→256→82) + | + |-- Bloc CNN 1 : Conv1d(1, 64, 7) -> BN -> LeakyReLU -> MaxPool + |-- Bloc CNN 2 : Conv1d(64, 128, 5) -> BN -> LeakyReLU -> MaxPool + |-- Bloc CNN 3 : Conv1d(128, 256, 3) -> BN -> LeakyReLU -> MaxPool + | + |-- Tete classification : FC(256->512->256->82) -> Sigmoid + | 82 isotopes, probabilite de presence [0, 1] + | + +-- Tete regression : FC(256->512->256->82) Activite estimee en Bq pour chaque isotope ``` - **34 493 156 parametres** au total -- **Fonction de perte** : BCE (classification) + MSE ponderee (regression sur isotopes presents uniquement) +- **Fonction de perte** : BCE (classification) + Huber ponderee (regression sur isotopes presents uniquement) - **Mixed precision (AMP)** : Acceleration sur GPU via float16 - **Early stopping** : Patience de 10 epochs sans amelioration @@ -196,61 +207,55 @@ Le fichier `data/background_24h.npy` est genere avec : --- -## 5. Lancer la detection continue +## 5. Lancer la detection continue + dashboard ```bash -docker compose up detect +docker compose up detect web ``` +Le dashboard est accessible sur `http://localhost:8080` avec quatre onglets : +- **Spectre** : Spectre cumule en temps reel, soustraction du background, lignes d'energie des isotopes detectes +- **Background** : Spectre du bruit de fond (live et 24h), modele theorique CsI(Tl), pics detectes +- **CPS** : Evolution du comptage par seconde dans le temps (1h a 30 jours) +- **Historique** : Liste des rapports quotidiens avec isotopes detectes + ### Comment ca marche Toutes les 60 secondes, le moniteur : ``` -┌──────────────────────────────────────────────────────────────┐ -│ 1. Echantillonnage │ -│ Radiacode.spectrum() → 1024 canaux + duree │ -│ cumulated_counts += counts │ -│ cumulated_live_time += duration.total_seconds() │ -│ Radiacode.spectrum_reset() │ -│ │ -│ 2. A 00h00 chaque jour : Rapport │ -│ Si live_time > 1h : │ -│ rate = cumulated_counts / cumulated_live_time │ -│ bg_rate = bg_counts / bg_live_time │ -│ net_rate = clip(rate - bg_rate, 0, None) │ -│ normalized = net_rate / net_rate.max() │ -│ logits, activities = model(normalized) │ -│ probs = sigmoid(logits) │ -│ Pour chaque isotope avec prob > 0.5 : │ -│ rapport[name, prob%, activite_bq] │ -│ Sauvegarder dans logs/report_YYYY-MM-DD.json │ -│ Reset cumulateurs │ -│ │ -│ 3. Si detecteur debranche : │ -│ Attendre 60s, retenter la connexion │ -│ Les donnees cumulees sont conservees │ -└──────────────────────────────────────────────────────────────┘ + 1. Echantillonnage + Radiacode.spectrum() -> 1024 canaux + duree + cumulated_counts += counts + cumulated_live_time += duration.total_seconds() + Radiacode.spectrum_reset() + + 2. A 00h00 chaque jour : Rapport + Si live_time > 1h : + rate = cumulated_counts / cumulated_live_time + bg_rate = bg_counts / bg_live_time + net_rate = clip(rate - bg_rate, 0, None) + normalized = net_rate / net_rate.max() + logits, activities = model(normalized) + probs = sigmoid(logits) + Pour chaque isotope avec prob > 0.5 : + rapport[name, prob%, activite_bq] + Sauvegarder dans logs/report_YYYY-MM-DD.json + Reset cumulateurs + + 3. Si detecteur debranche : + Attendre 60s, retenter la connexion + Les donnees cumulees sont conservees ``` ### Pourquoi soustraire le background ? -Le modele est entraite avec du background synthetique, mais le background reel varie selon l'emplacement. La soustraction reelle ameliore la detection : +Le modele est entraite avec du background synthetique realiste (continuum CsI(Tl) + pics environnementaux), mais le background reel varie selon l'emplacement. La soustraction reelle ameliore la detection : - **Sans soustraction** : Le modele voit K-40 a 1460 keV et peut le signaler comme isotope detecte, meme si c'est juste du background - **Avec soustraction** : Le pic de K-40 du background est elimine, seul un signal supplementaire est analyse - **Resultat** : Moins de faux positifs, meilleure sensibilite pour les isotopes faibles -### Debranchement et rebranchement - -Le moniteur gere les deconnexions USB proprement : -- Si le Radiacode est debranche, `try_connect()` echoue et retourne `None` -- Le moniteur attend 60 secondes et retente -- Les compteurs cumules ne sont pas reinitialises -- Quand le detecteur est rebranche, l'accumulation reprend - -Cela permet de prendre le detecteur avec soi pendant la journee sans perdre les donnees de la nuit. - --- ## 6. Interpreter les rapports @@ -393,6 +398,7 @@ L'inference CPU sur Pi 4 prend environ 0.5-1 seconde par spectre, ce qui est suf | `MAX_DURATION` | `86400` | Duree max des spectres (24h en secondes) | | `NVIDIA_VISIBLE_DEVICES` | `1` | GPU a utiliser (0 ou 1) | | `CUDA_VISIBLE_DEVICES` | `1` | GPU visible par CUDA | +| `MEASURED_BACKGROUND_PATH` | `/data/background_24h.npy` | Background mesure pour entrainement hybride | ### Detection (`docker-compose.yml` - service `detect`) @@ -407,6 +413,13 @@ L'inference CPU sur Pi 4 prend environ 0.5-1 seconde par spectre, ce qui est suf | `REPORT_HOUR` | `0` | Heure du rapport quotidien | | `MIN_LIVE_TIME` | `3600` | Live time min pour rapport (s) | +### Dashboard (`docker-compose.yml` - service `web`) + +| Variable | Defaut | Description | +|----------|--------|-------------| +| `ENERGY_CALIBRATION_OFFSET` | `0.33` | Calibration energetique offset (keV) | +| `ENERGY_CALIBRATION_SLOPE` | `2.97` | Calibration energetique pente (keV/canal) | + ### Capture de background | Variable | Defaut | Description | diff --git a/docker-compose.yml b/docker-compose.yml index 4fc1e06..eed5281 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,8 @@ services: - MIN_DURATION=43200 - MAX_DURATION=86400 - SEED=42 + - MEASURED_BACKGROUND_PATH=/data/background_24h.npy + restart: "no" detect: build: @@ -51,9 +53,6 @@ services: - REPORT_HOUR=0 - MIN_LIVE_TIME=3600 - THRESHOLD=0.5 - depends_on: - train: - condition: service_completed_successfully restart: unless-stopped web: diff --git a/train/entrypoint.sh b/train/entrypoint.sh index 15ebfa2..7b2c3e4 100755 --- a/train/entrypoint.sh +++ b/train/entrypoint.sh @@ -11,6 +11,7 @@ DETECTOR="${DETECTOR:-radiacode_103}" MIN_DURATION="${MIN_DURATION:-43200}" MAX_DURATION="${MAX_DURATION:-86400}" SEED="${SEED:-42}" +MEASURED_BACKGROUND_PATH="${MEASURED_BACKGROUND_PATH:-}" echo "============================================" echo " Radiacode 103 — Pipeline d'entraînement" @@ -25,6 +26,12 @@ echo " Batch size : $BATCH_SIZE" echo " Learning rate: $LEARNING_RATE" echo "============================================" +MEASURED_BG_ARG="" +if [ -n "$MEASURED_BACKGROUND_PATH" ] && [ -f "$MEASURED_BACKGROUND_PATH" ]; then + MEASURED_BG_ARG="--measured_background $MEASURED_BACKGROUND_PATH" + echo "Using measured background: $MEASURED_BACKGROUND_PATH" +fi + echo "" echo "=== Phase 1 : Génération des spectres synthétiques ===" python -m vega_ml.synthetic_spectra.generate_spectra \ @@ -33,7 +40,8 @@ python -m vega_ml.synthetic_spectra.generate_spectra \ --detector "$DETECTOR" \ --min_duration "$MIN_DURATION" \ --max_duration "$MAX_DURATION" \ - --seed "$SEED" + --seed "$SEED" \ + $MEASURED_BG_ARG echo "" echo "=== Phase 2 : Entraînement du VegaModel ===" diff --git a/train/vega_ml/README.md b/train/vega_ml/README.md index b51cb98..903ea6f 100644 --- a/train/vega_ml/README.md +++ b/train/vega_ml/README.md @@ -8,22 +8,25 @@ A machine learning system for identifying radioactive isotopes from gamma-ray sp ✅ **Completed:** Vega ML model architecture (CNN-FCNN hybrid) ✅ **Completed:** Training pipeline with GPU support ✅ **Completed:** Inference engine -🔲 **Next:** Generate large training dataset (10,000-100,000 samples) -🔲 **Future:** Real-time inference on Radiacode devices +✅ **Completed:** Realistic CsI(Tl) background model +✅ **Completed:** Hybrid training (measured + synthetic background) +✅ **Completed:** Web dashboard (FastAPI + Chart.js) +🔲 **Next:** Retrain model with realistic background +🔲 **Future:** Real-time inference on Radiacode devices --- ## Overview -This project aims to build a neural network that can identify radioactive isotopes from gamma spectra. Since collecting real gamma spectra requires radioactive sources and is expensive/regulated, we generate **synthetic training data** based on realistic physics models. +This project builds a neural network that identifies radioactive isotopes from gamma spectra. Since collecting real spectra requires radioactive sources and is expensive/regulated, we generate **synthetic training data** based on realistic physics models. ### Target Hardware -- **Training:** NVIDIA RTX 5090 GPU (requires PyTorch nightly with CUDA 12.8) +- **Training:** NVIDIA RTX 5060 Ti GPU (Blackwell, requires PyTorch 2.7+ with CUDA 12.8) - **Inference:** Radiacode 101, 102, 103, 103G, 110 scintillation detectors ### Data Format -- **Input:** 2D spectrograms (time intervals × 1023 energy channels) -- **Output:** Multi-label isotope classification with activity estimation +- **Input:** 1D spectrum (1023 energy channels, 20-3000 keV, normalized to max) +- **Output:** Multi-label isotope classification (82 isotopes) with activity estimation (Bq) --- @@ -34,8 +37,7 @@ This project aims to build a neural network that can identify radioactive isotop ```bash # Create virtual environment python -m venv .venv -.venv\Scripts\activate # Windows -# or: source .venv/bin/activate # Linux/Mac +source .venv/bin/activate # Linux/Mac # Install dependencies pip install numpy scipy pillow @@ -47,25 +49,34 @@ pip install --pre torch torchvision --index-url https://download.pytorch.org/whl ### Generate Synthetic Data ```bash -# Generate 10 test samples -python -m synthetic_spectra.generate_spectra +# Generate 10 test samples (default) +python -m vega_ml.synthetic_spectra.generate_spectra --num_samples 10 --output_dir data/synthetic + +# With measured background for hybrid training (recommended) +python -m vega_ml.synthetic_spectra.generate_spectra \ + --num_samples 50000 \ + --output_dir data/synthetic \ + --measured_background /path/to/background_24h.npy ``` ### Train the Model ```bash # Quick test run (5 epochs, small dataset) -python training/vega/run_training.py --test +python -m vega_ml.training.vega.run_training --test # Full training -python training/vega/run_training.py --epochs 100 --batch-size 32 +python -m vega_ml.training.vega.run_training \ + --data-dir data/synthetic \ + --model-dir models \ + --epochs 100 --batch-size 64 ``` ### Run Inference ```bash # Run inference on synthetic data -python inference/run_inference.py --model models/vega_best.pt --data data/synthetic +python -m vega_ml.inference.run_inference --model models/vega_best.pt --data data/synthetic ``` --- @@ -95,56 +106,74 @@ python inference/run_inference.py --model models/vega_best.pt --data data/synthe ## Synthetic Spectra Generation +### Realistic Background Model + +The background continuum uses a realistic CsI(Tl) shape calibrated against real Radiacode 103 measurements, not a simple exponential: + +- **Asymmetric hump** at ~110 keV (sigma_left=55 keV, sigma_right=50 keV) — the dominant low-energy scatter peak characteristic of CsI(Tl) detectors +- **Compton tail**: 0.45*exp(-E/240) + 0.04*exp(-E/700) — realistic high-energy falloff +- **Noise floor** at 0.8% of peak — prevents zero-count channels + +This replaces the previous simple exponential `A*exp(-0.002*E)` which failed to reproduce the characteristic CsI(Tl) response. + +### Hybrid Training with Measured Background + +When a measured background file (`background_24h.npy`) is available, the generator blends it with the synthetic model: +- **70% measured** background shape (scaled to target CPS) +- **30% synthetic** continuum (for robustness against measurement artifacts) +- Stochastic isotope peaks (K-40, radon, thorium) are still added on top with random activity levels + +This is controlled by the `--measured_background` CLI argument or the `MEASURED_BACKGROUND_PATH` environment variable. + ### Features - **82 isotopes** with accurate gamma emission lines -- **Realistic physics:** Gaussian peaks, Poisson noise, Compton continuum, environmental background +- **Realistic physics:** Gaussian peaks, Poisson noise, Compton continuum, CsI(Tl) background shape - **Multiple detector models:** Radiacode 101, 102, 103, 103G, 110 with correct FWHM and energy ranges - **Configurable variation:** Activity levels, measurement durations, isotope combinations +- **Decay chains:** Uranium-238, Thorium-232 chains with secular equilibrium -### Sample Distribution +### Sample Distribution (v3) | Type | Proportion | Description | |------|------------|-------------| -| Single isotope | 40% | One source + background | -| Dual isotope | 30% | Two sources blended | -| Multi isotope | 20% | 3-5 sources combined | -| Background only | 10% | Environmental only | - -### Scaling Up -Edit `synthetic_spectra/generate_spectra.py` to generate larger datasets: -```python -generate_training_batch( - n_samples=100000, # Generate 100k samples - output_dir=Path("data/synthetic/spectra"), - detector_type="radiacode_103" -) -``` +| Background only | 15% | Environmental background only | +| Single calibration | 20% | One check source + background | +| Single medical | 8% | Medical isotope + background | +| Single industrial | 5% | Industrial source + background | +| Uranium chain | 10% | U-238 + daughters in equilibrium | +| Thorium chain | 10% | Th-232 + daughters in equilibrium | +| NORM | 7% | Naturally occurring radioactive material | +| Fallout | 5% | Cs-137 + Cs-134 signature | +| Mixed | 10% | Random 2-3 isotope mixes | +| Complex mix | 5% | 4-6 isotopes from various categories | +| Weak source | 5% | Near-detection-limit sources | --- ## Project Structure ``` -ml-for-isotope-identification/ +train/vega_ml/ ├── README.md # This file ├── agents.md # AI agent context documentation ├── .gitignore # Git ignore rules │ ├── synthetic_spectra/ # Spectrum generation package │ ├── __init__.py -│ ├── config.py # Detector configurations -│ ├── generator.py # Main generation logic -│ ├── generate_spectra.py # CLI batch generation +│ ├── config.py # Detector configurations (Radiacode 101-110) +│ ├── generator.py # Main generation logic (SpectrumConfig) +│ ├── generate_spectra.py # CLI batch generation (v1) +│ ├── generate_spectra_v3.py # CLI batch generation (v3, parallel) │ ├── ground_truth/ │ │ ├── isotope_data.py # 82 isotopes database │ │ └── decay_chains.py # Decay chain definitions │ └── physics/ -│ └── spectrum_physics.py # Physics calculations +│ └── spectrum_physics.py # Physics calculations + realistic CsI(Tl) background │ ├── training/ # Training infrastructure │ └── vega/ # Vega model package │ ├── __init__.py │ ├── isotope_index.py # Isotope ↔ index mapping -│ ├── model.py # VegaModel architecture +│ ├── model.py # VegaModel architecture + VegaLoss │ ├── dataset.py # PyTorch Dataset/DataLoader │ ├── train.py # Training loop & utilities │ └── run_training.py # CLI training script @@ -176,11 +205,14 @@ ml-for-isotope-identification/ | Radiacode 103G | GAGG(Ce) | 7.4% | 20-3000 keV | 1024 | | Radiacode 110 | CsI(Tl) | 8.4% | 20-3000 keV | 1024 | +Note: Only the first 1023 channels are used (channel 1023 is an overflow bin). + ### Physics Model -- **Peak shape:** Gaussian with FWHM scaling as √(E/662) -- **Expected counts:** λ = A × t × I × ε × T +- **Peak shape:** Gaussian with FWHM scaling as sqrt(E/662) for scintillators +- **Expected counts:** lambda = A * t * I * epsilon * T - **Noise:** Poisson counting statistics -- **Background:** Exponential continuum + environmental isotopes (K-40, Pb-214, Bi-214, etc.) +- **Background:** Realistic CsI(Tl) continuum (asymmetric hump + Compton tail) + environmental isotope peaks (K-40, radon daughters, thorium daughters) +- **Hybrid mode:** Measured background can be blended with synthetic (70/30 ratio) for maximum realism ### Isotope Categories - Natural background (K-40, Ra-226, Rn-222) @@ -199,21 +231,18 @@ ml-for-isotope-identification/ numpy>=1.24.0 scipy>=1.10.0 pillow>=9.0.0 -torch>=2.11.0 (nightly with CUDA 12.8 for RTX 5090) +scikit-learn>=1.3.0 +torch>=2.0.0 ``` ### GPU Support -The RTX 5090 (Blackwell architecture, sm_120) requires PyTorch nightly builds with CUDA 12.8: +For Blackwell GPUs (RTX 50-series, sm_120), use PyTorch 2.7+ with CUDA 12.8: ```bash pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128 ``` ### For AI Agents -See [agents.md](agents.md) for comprehensive documentation on: -- System architecture and design decisions -- Physics model implementation details -- Vega model architecture and training -- Configuration options and variation strategies +See [agents.md](agents.md) for comprehensive documentation on system architecture, physics model details, and configuration options. --- @@ -224,12 +253,11 @@ See [agents.md](agents.md) for comprehensive documentation on: - [x] ~~Implement CNN-FCNN model architecture (Vega)~~ - [x] ~~Create training script with logging~~ - [x] ~~Implement inference module~~ -- [ ] Generate large training dataset (100k samples) -- [ ] Train model to convergence -- [ ] Add data augmentation pipeline +- [x] ~~Realistic CsI(Tl) background model~~ +- [x] ~~Hybrid training with measured background~~ +- [ ] Retrain model with realistic background - [ ] Add model evaluation metrics & confusion matrix -- [ ] Implement real-time inference module -- [ ] Create Radiacode device integration +- [ ] Implement real-time inference on Radiacode devices --- diff --git a/train/vega_ml/synthetic_spectra/generate_spectra.py b/train/vega_ml/synthetic_spectra/generate_spectra.py index 28a6ef5..8df1215 100644 --- a/train/vega_ml/synthetic_spectra/generate_spectra.py +++ b/train/vega_ml/synthetic_spectra/generate_spectra.py @@ -136,6 +136,7 @@ def generate_training_batch( background_only_fraction: float = 0.1, save_png: bool = False, random_seed: int = None, + measured_background_path: str = None, ) -> list: """ Generate a batch of training samples with various configurations. @@ -210,6 +211,7 @@ def generate_training_batch( duration, detector_name=detector_name, include_background=True, + measured_background_path=measured_background_path, ) # Save spectrum (don't accumulate in memory) @@ -240,6 +242,7 @@ def generate_training_batch( duration, detector_name=detector_name, include_background=True, + measured_background_path=measured_background_path, ) save_spectrum( @@ -270,6 +273,7 @@ def generate_training_batch( duration, detector_name=detector_name, include_background=True, + measured_background_path=measured_background_path, ) save_spectrum( @@ -295,6 +299,7 @@ def generate_training_batch( sources=[], # No additional sources include_background=True, detector_name=detector_name, + measured_background_path=measured_background_path, ) spectrum = generator.generate_spectrum(config) @@ -367,6 +372,13 @@ def main(): default=100.0, help="Maximum source activity in Bq (default: 100.0)" ) + + parser.add_argument( + "--measured_background", + type=str, + default=None, + help="Path to measured background .npy file for hybrid training" + ) parser.add_argument( "--save_png", @@ -402,6 +414,7 @@ def main(): activity_range=(args.min_activity, args.max_activity), save_png=args.save_png, random_seed=args.seed, + measured_background_path=args.measured_background, ) print("\n" + "=" * 60) diff --git a/train/vega_ml/synthetic_spectra/generate_spectra_v3.py b/train/vega_ml/synthetic_spectra/generate_spectra_v3.py index 73126a9..537c5e8 100644 --- a/train/vega_ml/synthetic_spectra/generate_spectra_v3.py +++ b/train/vega_ml/synthetic_spectra/generate_spectra_v3.py @@ -405,6 +405,7 @@ def generate_single_sample(args: Tuple[int, dict]) -> Optional[str]: include_radon=bg_params['include_radon'], include_thorium=bg_params['include_thorium'], detector_name=config['detector_name'], + measured_background_path=config.get('measured_background_path'), ) # Generate spectrum @@ -437,6 +438,7 @@ def generate_training_data_v3( scenarios: Optional[List[SampleScenario]] = None, num_workers: int = None, random_seed: int = None, + measured_background_path: Optional[str] = None, ) -> int: """ Generate training samples in parallel. @@ -498,6 +500,7 @@ def generate_training_data_v3( 'bg_intensity_max': bg_intensity_range[1], 'base_seed': random_seed, 'scenarios': scenarios, + 'measured_background_path': measured_background_path, } # Create work items @@ -560,9 +563,11 @@ def main(): help='Minimum activity in Bq') parser.add_argument('--activity_max', type=float, default=100.0, help='Maximum activity in Bq') - + parser.add_argument('--measured_background', type=str, default=None, + help='Path to measured background .npy file for hybrid training') + args = parser.parse_args() - + generate_training_data_v3( num_samples=args.num_samples, output_dir=Path(args.output_dir), @@ -570,6 +575,7 @@ def main(): activity_range=(args.activity_min, args.activity_max), num_workers=args.workers, random_seed=args.seed, + measured_background_path=args.measured_background, ) diff --git a/train/vega_ml/synthetic_spectra/generator.py b/train/vega_ml/synthetic_spectra/generator.py index 2ecb26b..62170e0 100644 --- a/train/vega_ml/synthetic_spectra/generator.py +++ b/train/vega_ml/synthetic_spectra/generator.py @@ -63,6 +63,7 @@ class SpectrumConfig: include_k40: bool = True include_radon: bool = True include_thorium: bool = True + measured_background_path: Optional[str] = None # Detector configuration detector_name: str = "radiacode_103" @@ -166,7 +167,8 @@ class SpectrumGenerator: include_k40=background_config.get('include_k40', True), include_radon=background_config.get('include_radon', True), include_thorium=background_config.get('include_thorium', True), - detector_config=self.detector_config + detector_config=self.detector_config, + measured_background_path=background_config.get('measured_background_path') ) spectrum += bg_spectrum background_isotopes = bg_isotopes @@ -264,6 +266,7 @@ class SpectrumGenerator: 'include_k40': config.include_k40, 'include_radon': config.include_radon, 'include_thorium': config.include_thorium, + 'measured_background_path': config.measured_background_path, } ) all_source_isotopes.extend(src_iso) diff --git a/train/vega_ml/synthetic_spectra/physics/spectrum_physics.py b/train/vega_ml/synthetic_spectra/physics/spectrum_physics.py index 6512a0f..8131020 100644 --- a/train/vega_ml/synthetic_spectra/physics/spectrum_physics.py +++ b/train/vega_ml/synthetic_spectra/physics/spectrum_physics.py @@ -9,6 +9,7 @@ Implements the physics of gamma spectrum generation including: """ import numpy as np +from pathlib import Path from scipy import special from typing import Optional, Tuple, List from dataclasses import dataclass @@ -274,14 +275,14 @@ def generate_exponential_background( ) -> np.ndarray: """ Generate exponential background continuum. - + B(E) = A * exp(-b * E) - + Args: energy_bins: Array of energy bin centers (keV) amplitude: Background amplitude at E=0 decay_constant: Exponential decay constant (1/keV) - + Returns: Array of background counts """ @@ -294,26 +295,123 @@ def generate_polynomial_background( ) -> np.ndarray: """ Generate polynomial background. - + B(E) = Σ c_m * E^m - + Args: energy_bins: Array of energy bin centers (keV) coefficients: Polynomial coefficients [c0, c1, c2, ...] - + Returns: Array of background counts """ if coefficients is None: coefficients = [10.0, -0.005, 1e-6] # Default quadratic - + background = np.zeros_like(energy_bins) for m, c in enumerate(coefficients): background += c * (energy_bins ** m) - + return np.maximum(0, background) +def generate_realistic_continuum( + energy_bins: np.ndarray, + total_counts: float, + detector_config: Optional[DetectorConfig] = None +) -> np.ndarray: + """ + Generate realistic CsI(Tl) background continuum shape. + + Calibrated against real Radiacode 103 background measurements. + Produces the characteristic asymmetric hump at ~110 keV and + Compton-like tail that simple exponentials miss. + + Shape components: + - Asymmetric hump centered at ~110 keV (sigma_left=55, sigma_right=50 keV) + - Compton continuum: 0.45*exp(-E/240) + 0.04*exp(-E/700) + - Noise floor at 0.8% of peak + + Args: + energy_bins: Array of energy bin centers (keV) + total_counts: Target total counts in the continuum + detector_config: Detector configuration (unused, kept for API consistency) + + Returns: + Array of background counts matching real CsI(Tl) continuum shape + """ + E = energy_bins + + # Asymmetric hump at ~110 keV (low-energy scatter peak in CsI(Tl)) + hump_center = 110.0 + sigma_left = 55.0 # Broader on the low-energy side + sigma_right = 50.0 # Narrower on the high-energy side + hump = np.where( + E <= hump_center, + np.exp(-0.5 * ((E - hump_center) / sigma_left) ** 2), + np.exp(-0.5 * ((E - hump_center) / sigma_right) ** 2), + ) + + # Compton continuum tail + tail = 0.45 * np.exp(-E / 240.0) + 0.04 * np.exp(-E / 700.0) + + # Noise floor (low-level baseline) + noise_floor = 0.008 + + # Combine shape components + continuum = hump + tail + noise_floor + + # Normalize to target total counts + if continuum.sum() > 0 and total_counts > 0: + continuum *= total_counts / continuum.sum() + + return continuum + + +def load_measured_background( + path: str, + energy_bins: np.ndarray, + duration_seconds: float +) -> Optional[np.ndarray]: + """ + Load a measured background spectrum from a .npy file and rescale it + to match the target duration. + + The .npy file should contain a dict with keys 'counts' and 'duration'. + + Args: + path: Path to the .npy background file + energy_bins: Array of energy bin centers (keV) for alignment + duration_seconds: Target duration to scale the spectrum to + + Returns: + Background spectrum scaled to target duration, or None if file not found + """ + bg_path = Path(path) + if not bg_path.exists(): + return None + + try: + bg_data = np.load(str(bg_path), allow_pickle=True).item() + bg_counts = bg_data["counts"].astype(np.float64) + bg_duration = float(bg_data["duration"]) + + # Truncate or pad to match energy_bins length + num_channels = len(energy_bins) + if len(bg_counts) > num_channels: + bg_counts = bg_counts[:num_channels] + elif len(bg_counts) < num_channels: + bg_counts = np.pad(bg_counts, (0, num_channels - len(bg_counts))) + + # Scale to target duration (cps * target_duration) + if bg_duration > 0: + scale = duration_seconds / bg_duration + return bg_counts * scale + return None + except Exception: + return None + + def generate_environmental_background( energy_bins: np.ndarray, duration_seconds: float, @@ -321,17 +419,19 @@ def generate_environmental_background( include_k40: bool = True, include_radon: bool = True, include_thorium: bool = True, - detector_config: Optional[DetectorConfig] = None + detector_config: Optional[DetectorConfig] = None, + measured_background_path: Optional[str] = None ) -> Tuple[np.ndarray, List[str]]: """ Generate realistic environmental background spectrum. - + Includes: - - Exponential continuum (cosmic rays, scattered gammas) + - Realistic CsI(Tl) continuum shape (asymmetric hump + Compton tail) + - Or measured background if path provided and file exists - K-40 peak (1460 keV) - ubiquitous in environment - Radon daughters (Pb-214, Bi-214) - indoor air - Thorium daughters (Pb-212, Tl-208) - building materials - + Args: energy_bins: Array of energy bin centers (keV) duration_seconds: Acquisition time @@ -340,27 +440,47 @@ def generate_environmental_background( include_radon: Include radon daughter peaks include_thorium: Include thorium daughter peaks detector_config: Detector configuration - + measured_background_path: Path to .npy file with measured background. + If provided and file exists, used as the continuum base instead + of the synthetic continuum. Isotope peaks are still added on top + with stochastic variation for training diversity. + Returns: Tuple of (background_spectrum, list_of_background_isotopes) """ if detector_config is None: detector_config = get_default_config() - + background_isotopes = [] - - # Start with exponential continuum + + # Use measured background if available, otherwise synthetic continuum total_continuum_counts = background_cps * duration_seconds * 0.7 - background = generate_exponential_background( - energy_bins, - amplitude=total_continuum_counts / 500, - decay_constant=0.002 - ) - - # Normalize continuum to target count rate - if background.sum() > 0: - background *= (total_continuum_counts / background.sum()) - + + measured = None + if measured_background_path: + measured = load_measured_background( + measured_background_path, energy_bins, duration_seconds + ) + + if measured is not None: + # Scale measured background to match target CPS + measured_total = measured.sum() + if measured_total > 0 and total_continuum_counts > 0: + # Blend: 70% measured shape, 30% synthetic for robustness + synthetic = generate_realistic_continuum( + energy_bins, total_counts=total_continuum_counts * 0.3, + detector_config=detector_config + ) + measured_scaled = measured * (total_continuum_counts * 0.7 / measured_total) + background = measured_scaled + synthetic + else: + background = measured + else: + background = generate_realistic_continuum( + energy_bins, total_counts=total_continuum_counts, + detector_config=detector_config + ) + # Add K-40 peak (very common) if include_k40: k40_activity = np.random.uniform(0.5, 5.0) # Bq @@ -376,11 +496,11 @@ def generate_environmental_background( ) background += peak background_isotopes.append("K-40") - + # Add radon daughters if include_radon: radon_activity = np.random.uniform(0.1, 2.0) # Bq - + # Pb-214 lines for energy, intensity in [(295.22, 0.1842), (351.93, 0.356)]: peak = generate_peak_spectrum( @@ -394,7 +514,7 @@ def generate_environmental_background( detector_config ) background += peak - + # Bi-214 lines for energy, intensity in [(609.31, 0.4549), (1120.29, 0.1492), (1764.49, 0.1531)]: peak = generate_peak_spectrum( @@ -408,13 +528,13 @@ def generate_environmental_background( detector_config ) background += peak - + background_isotopes.extend(["Pb-214", "Bi-214"]) - + # Add thorium daughters if include_thorium: thorium_activity = np.random.uniform(0.05, 1.0) # Bq - + # Ac-228 line peak = generate_peak_spectrum( energy_bins, @@ -427,7 +547,7 @@ def generate_environmental_background( detector_config ) background += peak - + # Pb-212 line peak = generate_peak_spectrum( energy_bins, @@ -440,7 +560,7 @@ def generate_environmental_background( detector_config ) background += peak - + # Tl-208 lines for energy, intensity in [(583.19, 0.845 * 0.36), (2614.51, 0.998 * 0.36)]: # Branching ratio of 36% for Tl-208 path @@ -455,9 +575,9 @@ def generate_environmental_background( detector_config ) background += peak - + background_isotopes.extend(["Ac-228", "Pb-212", "Tl-208"]) - + return background, background_isotopes diff --git a/web/app/config.py b/web/app/config.py index 45396be..30eb039 100644 --- a/web/app/config.py +++ b/web/app/config.py @@ -10,7 +10,7 @@ ISOTOPE_INDEX_PATH = Path(os.environ.get("ISOTOPE_INDEX_PATH", "/models/vega_iso ENERGY_OFFSET = float(os.environ.get("ENERGY_CALIBRATION_OFFSET", "0.33")) ENERGY_SLOPE = float(os.environ.get("ENERGY_CALIBRATION_SLOPE", "2.97")) -NUM_CHANNELS = 1024 +NUM_CHANNELS = 1023 # Last channel (1023) is overflow bin, excluded from display def energy_axis(): diff --git a/web/app/routers/background.py b/web/app/routers/background.py index 9442a24..b76f0dc 100644 --- a/web/app/routers/background.py +++ b/web/app/routers/background.py @@ -1,24 +1,41 @@ import json from fastapi import APIRouter, HTTPException from app.config import BACKGROUND_SNAPSHOT_PATH, BACKGROUND_PATH, energy_axis, NUM_CHANNELS +from app.theoretical_bg import generate_theoretical_bg, generate_continuum_only import numpy as np router = APIRouter() -@router.get("") -async def get_background_info(): - """Background metadata: elapsed time, CPS, top peaks.""" +def _load_snapshot(): + """Load the live snapshot file, or raise 404.""" if not BACKGROUND_SNAPSHOT_PATH.exists(): raise HTTPException(status_code=404, detail="Background capture not available yet") - try: with open(BACKGROUND_SNAPSHOT_PATH) as f: - snapshot = json.load(f) + return json.load(f) except (json.JSONDecodeError, OSError): raise HTTPException(status_code=500, detail="Background snapshot file corrupt") - # Check if full background is available + +def _load_reference(): + """Load the 24h reference background, or return None.""" + if not BACKGROUND_PATH.exists(): + return None + try: + bg_data = np.load(str(BACKGROUND_PATH), allow_pickle=True).item() + return { + "counts": [round(float(c), 1) for c in bg_data["counts"][:NUM_CHANNELS]], + "live_time_s": round(float(bg_data["duration"]), 1), + } + except Exception: + return None + + +@router.get("") +async def get_background_info(): + """Background metadata: elapsed time, CPS, top peaks.""" + snapshot = _load_snapshot() full_available = BACKGROUND_PATH.exists() return { @@ -33,34 +50,46 @@ async def get_background_info(): @router.get("/spectrum") async def get_background_spectrum(): - """Full background spectrum with energy axis.""" - if not BACKGROUND_SNAPSHOT_PATH.exists(): - raise HTTPException(status_code=404, detail="Background capture not available yet") - - try: - with open(BACKGROUND_SNAPSHOT_PATH) as f: - snapshot = json.load(f) - except (json.JSONDecodeError, OSError): - raise HTTPException(status_code=500, detail="Background snapshot file corrupt") - - counts = snapshot.get("spectrum", [0] * NUM_CHANNELS) - - # If full background file exists, use it for better data - if BACKGROUND_PATH.exists(): - try: - bg_data = np.load(str(BACKGROUND_PATH), allow_pickle=True).item() - counts = [round(float(c), 1) for c in bg_data["counts"]] - live_time = float(bg_data["duration"]) - except Exception: - live_time = snapshot.get("live_time_s", 0) - else: - live_time = snapshot.get("live_time_s", 0) + """Live background spectrum (from snapshot) with energy axis.""" + snapshot = _load_snapshot() + live_time = snapshot.get("live_time_s", 0) return { "channels": list(range(NUM_CHANNELS)), "energy_kev": energy_axis(), - "counts": counts, + "counts": snapshot.get("spectrum", [0] * 1024)[:NUM_CHANNELS], "live_time_s": live_time, "cps": snapshot.get("cps", 0), "top_peaks": snapshot.get("top_peaks", []), - } \ No newline at end of file + "reference_available": BACKGROUND_PATH.exists(), + } + + +@router.get("/reference") +async def get_background_reference(): + """24h reference background spectrum for overlay comparison.""" + ref = _load_reference() + if ref is None: + raise HTTPException(status_code=404, detail="No 24h reference background available") + + return { + "channels": list(range(NUM_CHANNELS)), + "energy_kev": energy_axis(), + "counts": ref["counts"], + "live_time_s": ref["live_time_s"], + } + + +@router.get("/theoretical") +async def get_theoretical_bg(cps: float = 6.0, live_time_s: float = 3600.0): + """Theoretical natural background spectrum (K-40, U-238 chain, Th-232 chain).""" + return generate_theoretical_bg(cps=cps, live_time_s=live_time_s) + + +@router.get("/continuum") +async def get_continuum(cps: float = 6.0, live_time_s: float = 3600.0): + """CsI(Tl) continuum shape only (hump + Compton tail, no photopeaks, no noise). + + Matches the model used in training (generate_realistic_continuum). + """ + return generate_continuum_only(cps=cps, live_time_s=live_time_s) \ No newline at end of file diff --git a/web/app/routers/spectrum.py b/web/app/routers/spectrum.py index 666039b..1026d30 100644 --- a/web/app/routers/spectrum.py +++ b/web/app/routers/spectrum.py @@ -29,7 +29,7 @@ async def get_current_spectrum(): "isotopes_detected": state.get("isotopes_detected", []), "channels": list(range(NUM_CHANNELS)), "energy_kev": energy_axis(), - "counts": state.get("counts", [0] * NUM_CHANNELS), + "counts": state.get("counts", [0] * 1024)[:NUM_CHANNELS], } @@ -45,7 +45,7 @@ async def get_difference_spectrum(): except (json.JSONDecodeError, OSError): raise HTTPException(status_code=503, detail="Monitor state file corrupt") - counts = np.array(state.get("counts", [0] * NUM_CHANNELS), dtype=np.float64) + counts = np.array(state.get("counts", [0] * 1024), dtype=np.float64)[:NUM_CHANNELS] live_time = state.get("cumulated_live_time_s", 0) if live_time <= 0: @@ -55,7 +55,7 @@ async def get_difference_spectrum(): if BACKGROUND_PATH.exists(): bg_data = np.load(str(BACKGROUND_PATH), allow_pickle=True).item() - bg_counts = bg_data["counts"].astype(np.float64) + bg_counts = bg_data["counts"].astype(np.float64)[:NUM_CHANNELS] bg_live_time = float(bg_data["duration"]) bg_rate = bg_counts / bg_live_time net_rate = np.clip(rate - bg_rate, 0, None) @@ -72,5 +72,5 @@ async def get_difference_spectrum(): "channels": list(range(NUM_CHANNELS)), "energy_kev": energy_axis(), "counts": [round(float(c), 1) for c in net_counts], - "raw_counts": state.get("counts", []), + "raw_counts": state.get("counts", [])[:NUM_CHANNELS], } \ No newline at end of file diff --git a/web/app/theoretical_bg.py b/web/app/theoretical_bg.py new file mode 100644 index 0000000..4cb8b7a --- /dev/null +++ b/web/app/theoretical_bg.py @@ -0,0 +1,139 @@ +""" +Theoretical natural background spectrum for CsI(Tl) detectors (Radiacode 103). + +Shape calibrated against real Radiacode 103 background measurements. +The CsI(Tl) crystal (1 cm³, 8.4% FWHM) produces a spectrum with: +- A dominant low-energy hump peaking around 100-120 keV +- Exponential decay at higher energies +- Subtle photopeaks from natural isotopes +""" + +import numpy as np +from app.config import ENERGY_OFFSET, ENERGY_SLOPE, NUM_CHANNELS + + +# Photopeak lines: (energy_keV, relative_weight) +# Weights tuned so peaks are visible above local continuum at typical CPS +NATURAL_BG_LINES = [ + (295.22, 0.10), # Pb-214 + (351.93, 0.18), # Pb-214 + (609.31, 0.15), # Bi-214 + (911.20, 0.08), # Ac-228 + (968.97, 0.05), # Ac-228 + (1120.29, 0.06), # Bi-214 + (1460.83, 0.12), # K-40 + (1764.49, 0.08), # Bi-214 + (2614.51, 0.18), # Tl-208 +] + + +def _gaussian(x, center, sigma, amplitude): + return amplitude * np.exp(-0.5 * ((x - center) / sigma) ** 2) + + +def generate_theoretical_bg(cps: float = 6.0, live_time_s: float = 3600.0): + channels = np.arange(NUM_CHANNELS, dtype=np.float64) + energy_axis = ENERGY_OFFSET + ENERGY_SLOPE * channels + total_counts = cps * live_time_s + + # ── 1. Main hump: asymmetric peak at ~105 keV ── + # Real data: rises from ~60 at 10keV to ~280 at 100-120keV, then falls + hump_center = 110.0 + hump = np.zeros(NUM_CHANNELS, dtype=np.float64) + low_mask = energy_axis <= hump_center + hump[low_mask] = _gaussian(energy_axis[low_mask], hump_center, 55.0, 1.0) + hump[~low_mask] = _gaussian(energy_axis[~low_mask], hump_center, 50.0, 1.0) + + # ── 2. Compton continuum tail ── + # Real data: ~136@200, ~80@250, ~44@295, ~14@400, ~5@600 + tail = 0.45 * np.exp(-energy_axis / 240) + 0.04 * np.exp(-energy_axis / 700) + + # ── 3. Low-energy noise floor ── + noise_floor = 0.008 + + # ── 4. Combine continuum ── + continuum = hump + tail + noise_floor + + # ── 5. Photopeaks ── + # CsI(Tl) 8.4% FWHM at 662 keV, scaling as sqrt(E) + # sigma(E) = FWHM(E) / 2.355 = 0.084 * sqrt(E * 662) / 662 / 2.355 + # Simplified: sigma = 23.6 * sqrt(E/662) keV + def sigma_keV(E): + return max(12.0, 23.6 * np.sqrt(max(E, 1.0) / 662.0)) + + peak_frac = 0.08 # 8% of total counts in resolved photopeaks + total_weight = sum(w for _, w in NATURAL_BG_LINES) + + peaks = np.zeros(NUM_CHANNELS, dtype=np.float64) + for line_energy, weight in NATURAL_BG_LINES: + sig = sigma_keV(line_energy) + peak_counts = total_counts * peak_frac * (weight / total_weight) + amplitude = peak_counts / (sig * np.sqrt(2 * np.pi)) + peaks += _gaussian(energy_axis, line_energy, sig, amplitude) + + # ── 6. Combine and normalize ── + raw = continuum + peaks / total_counts # peaks normalized later + raw *= total_counts / raw.sum() + + # ── 7. Poisson-like noise ── + rng = np.random.default_rng(42) + noise = rng.normal(0, 1, NUM_CHANNELS) * np.sqrt(np.maximum(raw, 1.0)) * 0.25 + raw += noise + + # Floor at 0.9 for log scale + spectrum = np.clip(raw, 0.9, None) + + key_lines = [ + (295.22, "Pb-214"), (351.93, "Pb-214"), + (609.31, "Bi-214"), (911.20, "Ac-228"), + (1120.29, "Bi-214"), (1460.83, "K-40"), + (1764.49, "Bi-214"), (2614.51, "Tl-208"), + ] + + return { + "energy_kev": [round(float(E), 2) for E in energy_axis], + "counts": [round(float(c), 1) for c in spectrum], + "cps": round(cps, 2), + "live_time_s": round(live_time_s, 1), + "lines": [ + {"energy_keV": E, "name": name} for E, name in key_lines + ], + } + + +def generate_continuum_only(cps: float = 6.0, live_time_s: float = 3600.0): + """Generate only the CsI(Tl) continuum shape (no photopeaks, no noise). + + This matches the model used in training (generate_realistic_continuum in + spectrum_physics.py) for direct comparison with measured backgrounds. + """ + channels = np.arange(NUM_CHANNELS, dtype=np.float64) + energy_axis = ENERGY_OFFSET + ENERGY_SLOPE * channels + total_counts = cps * live_time_s + + # Asymmetric hump at ~110 keV + hump_center = 110.0 + hump = np.where( + energy_axis <= hump_center, + np.exp(-0.5 * ((energy_axis - hump_center) / 55.0) ** 2), + np.exp(-0.5 * ((energy_axis - hump_center) / 50.0) ** 2), + ) + + # Compton continuum tail + tail = 0.45 * np.exp(-energy_axis / 240.0) + 0.04 * np.exp(-energy_axis / 700.0) + + # Noise floor + noise_floor = 0.008 + + continuum = hump + tail + noise_floor + + # Normalize to target total counts + if continuum.sum() > 0 and total_counts > 0: + continuum *= total_counts / continuum.sum() + + return { + "energy_kev": [round(float(E), 2) for E in energy_axis], + "counts": [round(float(c), 1) for c in continuum], + "cps": round(cps, 2), + "live_time_s": round(live_time_s, 1), + } \ No newline at end of file diff --git a/web/static/index.html b/web/static/index.html index ba1b376..0b93957 100644 --- a/web/static/index.html +++ b/web/static/index.html @@ -4,8 +4,9 @@