Files
radiacode/train/vega_ml/synthetic_spectra/generate_spectra.py
Jacquin Antoine 0847a3fc80 Fix: CsI(Tl) non-linear response correction + detector calibration overhaul
Root cause of Am-241 misidentification: the Radiacode 103's CsI(Tl) crystal
shifts low-energy peaks upward (59.5 keV → 71.6 keV for Am-241) due to
non-proportional scintillation response. The model was trained on theoretical
peak positions and couldn't match the shifted real peaks.

Changes:
- Add inverse CsI(Tl) non-linear correction to inference pipeline
  (radiacode_monitor.py, web/config.py, test_detection.py)
  E_apparent = E_true * (1 + 0.37 * exp(-E_true/100))
  Corrects channel mapping so peaks appear at theoretical energies
- Fix energy calibration: DetectorConfig now uses E = 0.33 + 2.97*ch
  with 1023 channels, matching the real detector (was energy_min=20,
  skip_first_channel=True, different channel width)
- Add K-escape peaks for CsI(Tl) iodine X-ray escape (E - 28.5 keV)
- Add asymmetric peak shapes for low-energy tails (< 200 keV)
- Add log1p normalization in dataset and inference (replaces max-norm)
- Add background-subtracted training mode (subtract_background flag)
- Add low-signal augmentation (0.01-5 Bq activities, 30-300s durations)
- Update docker-compose.yml: batch_size=32, duration=30-300s,
  CSI_NONLINEAR_ALPHA/BETA env vars for detect and web
- Web dashboard: apply CsI correction to displayed spectra
- Various UI fixes (Chart.js width, zoom/pan, isotope lines)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 17:35:22 +02:00

511 lines
15 KiB
Python

"""
Synthetic Spectra Generation Script
This script generates synthetic gamma spectra for training isotope identification models.
Usage:
python generate_spectra.py --num_samples 10 --output_dir ./data/synthetic
Output:
- data/synthetic/spectra/*.npy - Spectrum arrays (time x 1023 channels)
- data/synthetic/spectra/*.png - Visual representations (optional)
- data/synthetic/labels.json - Annotations for all samples
"""
import argparse
import sys
from pathlib import Path
import json
from datetime import datetime
import numpy as np
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from synthetic_spectra.generator import (
SpectrumGenerator,
SpectrumConfig,
IsotopeSource,
GeneratedSpectrum,
save_spectrum,
generate_labels_json,
)
from synthetic_spectra.config import RADIACODE_CONFIGS
from synthetic_spectra.ground_truth import (
get_all_isotopes,
get_isotopes_by_category,
IsotopeCategory,
DECAY_CHAINS,
)
def get_common_isotope_pool() -> list:
"""Get a pool of commonly encountered isotopes for realistic training data."""
common_isotopes = [
# Calibration sources (very common in spectra)
"Cs-137", "Co-60", "Am-241", "Ba-133", "Eu-152", "Na-22", "Co-57",
# Medical isotopes (occasionally encountered)
"Tc-99m", "I-131", "I-123", "F-18", "Ga-67", "In-111", "Lu-177",
# Natural background (always present to some degree)
"K-40", "Pb-214", "Bi-214", "Pb-212", "Bi-212", "Tl-208", "Ac-228",
# Industrial sources
"Ir-192", "Se-75", "Mn-54", "Zn-65",
# Uranium/Thorium (NORM)
"U-235", "Ra-226", "Th-232",
# Reactor/Fallout
"Cs-134", "Sb-125", "Ce-144", "Co-58",
]
# Filter to only isotopes in our database with gamma lines
from synthetic_spectra.ground_truth import get_isotope
valid_isotopes = []
for name in common_isotopes:
iso = get_isotope(name)
if iso and len(iso.gamma_lines) > 0:
valid_isotopes.append(name)
return valid_isotopes
def generate_single_isotope_sample(
generator: SpectrumGenerator,
isotope_name: str,
activity_bq: float,
duration_seconds: float,
**kwargs
) -> GeneratedSpectrum:
"""Generate a clean sample with a single isotope."""
config = SpectrumConfig(
duration_seconds=duration_seconds,
sources=[
IsotopeSource(
isotope_name=isotope_name,
activity_bq=activity_bq,
include_daughters=True
)
],
**kwargs
)
return generator.generate_spectrum(config)
def generate_mixed_isotope_sample(
generator: SpectrumGenerator,
isotope_names: list,
activities_bq: list,
duration_seconds: float,
**kwargs
) -> GeneratedSpectrum:
"""Generate a sample with multiple blended isotopes."""
sources = [
IsotopeSource(
isotope_name=name,
activity_bq=activity,
include_daughters=True
)
for name, activity in zip(isotope_names, activities_bq)
]
config = SpectrumConfig(
duration_seconds=duration_seconds,
sources=sources,
**kwargs
)
return generator.generate_spectrum(config)
def generate_training_batch(
num_samples: int,
output_dir: Path,
detector_name: str = "radiacode_103",
duration_range: tuple = (30, 300),
activity_range: tuple = (1.0, 100.0),
single_isotope_fraction: float = 0.3,
dual_isotope_fraction: float = 0.2,
multi_isotope_fraction: float = 0.15,
background_only_fraction: float = 0.1,
low_signal_fraction: float = 0.15,
subtracted_fraction: float = 0.1,
save_png: bool = False,
random_seed: int = None,
measured_background_path: str = None,
) -> list:
"""
Generate a batch of training samples with various configurations.
Args:
num_samples: Total number of samples to generate
output_dir: Output directory for spectra and labels
detector_name: Radiacode device to simulate
duration_range: (min, max) duration in seconds
activity_range: (min, max) source activity in Bq
single_isotope_fraction: Fraction of single-isotope samples
dual_isotope_fraction: Fraction of two-isotope samples
multi_isotope_fraction: Fraction of 3+ isotope samples
background_only_fraction: Fraction of background-only samples
low_signal_fraction: Fraction of low-activity samples (0.01-5 Bq)
subtracted_fraction: Fraction of background-subtracted samples
save_png: Whether to also save PNG images
random_seed: Random seed for reproducibility
Returns:
List of generated spectra
"""
if random_seed is not None:
np.random.seed(random_seed)
# Create output directories
output_dir = Path(output_dir)
spectra_dir = output_dir / "spectra"
spectra_dir.mkdir(parents=True, exist_ok=True)
# Initialize generator
generator = SpectrumGenerator(
detector_config=RADIACODE_CONFIGS.get(detector_name),
random_seed=random_seed
)
# Get isotope pool
isotope_pool = get_common_isotope_pool()
print(f"Using isotope pool with {len(isotope_pool)} isotopes")
# Calculate sample counts for each category
n_single = int(num_samples * single_isotope_fraction)
n_dual = int(num_samples * dual_isotope_fraction)
n_multi = int(num_samples * multi_isotope_fraction)
n_background = int(num_samples * background_only_fraction)
n_low_signal = int(num_samples * low_signal_fraction)
n_subtracted = int(num_samples * subtracted_fraction)
# Adjust to ensure we hit exactly num_samples
remaining = num_samples - (n_single + n_dual + n_multi + n_background + n_low_signal + n_subtracted)
n_single += remaining
total_generated = 0
print(f"\nGenerating {num_samples} synthetic spectra:")
print(f" - Single isotope: {n_single}")
print(f" - Dual isotope: {n_dual}")
print(f" - Multi isotope (3+): {n_multi}")
print(f" - Background only: {n_background}")
print(f" - Low signal (0.01-5 Bq): {n_low_signal}")
print(f" - Background-subtracted: {n_subtracted}")
print()
sample_num = 0
# Generate single isotope samples
print("Generating single-isotope samples...")
for i in range(n_single):
isotope = np.random.choice(isotope_pool)
activity = np.random.uniform(*activity_range)
duration = np.random.uniform(*duration_range)
spectrum = generate_single_isotope_sample(
generator,
isotope,
activity,
duration,
detector_name=detector_name,
include_background=True,
measured_background_path=measured_background_path,
)
# Save spectrum (don't accumulate in memory)
save_spectrum(
spectrum,
spectra_dir,
save_image=True,
image_format='npy'
)
del spectrum # Free memory immediately
sample_num += 1
if sample_num % 100 == 0:
print(f" Generated {sample_num}/{num_samples} samples...")
# Generate dual isotope samples
print("Generating dual-isotope samples...")
for i in range(n_dual):
isotopes = np.random.choice(isotope_pool, size=2, replace=False)
activities = [np.random.uniform(*activity_range) for _ in range(2)]
duration = np.random.uniform(*duration_range)
spectrum = generate_mixed_isotope_sample(
generator,
list(isotopes),
activities,
duration,
detector_name=detector_name,
include_background=True,
measured_background_path=measured_background_path,
)
save_spectrum(
spectrum,
spectra_dir,
save_image=True,
image_format='npy'
)
del spectrum
sample_num += 1
if sample_num % 100 == 0:
print(f" Generated {sample_num}/{num_samples} samples...")
# Generate multi-isotope samples
print("Generating multi-isotope samples...")
for i in range(n_multi):
num_isotopes = np.random.randint(3, min(6, len(isotope_pool)))
isotopes = np.random.choice(isotope_pool, size=num_isotopes, replace=False)
activities = [np.random.uniform(*activity_range) for _ in range(num_isotopes)]
duration = np.random.uniform(*duration_range)
spectrum = generate_mixed_isotope_sample(
generator,
list(isotopes),
activities,
duration,
detector_name=detector_name,
include_background=True,
measured_background_path=measured_background_path,
)
save_spectrum(
spectrum,
spectra_dir,
save_image=True,
image_format='npy'
)
del spectrum
sample_num += 1
if sample_num % 100 == 0:
print(f" Generated {sample_num}/{num_samples} samples...")
# Generate background-only samples
print("Generating background-only samples...")
for i in range(n_background):
duration = np.random.uniform(*duration_range)
config = SpectrumConfig(
duration_seconds=duration,
sources=[], # No additional sources
include_background=True,
detector_name=detector_name,
measured_background_path=measured_background_path,
)
spectrum = generator.generate_spectrum(config)
save_spectrum(
spectrum,
spectra_dir,
save_image=True,
image_format='npy'
)
del spectrum
sample_num += 1
# Generate low-signal samples (weak sources, 0.01-5 Bq)
print("Generating low-signal samples...")
for i in range(n_low_signal):
isotope = np.random.choice(isotope_pool)
activity = np.random.uniform(0.01, 5.0)
duration = np.random.uniform(*duration_range)
spectrum = generate_single_isotope_sample(
generator,
isotope,
activity,
duration,
detector_name=detector_name,
include_background=True,
measured_background_path=measured_background_path,
)
save_spectrum(
spectrum,
spectra_dir,
save_image=True,
image_format='npy'
)
del spectrum
sample_num += 1
if sample_num % 100 == 0:
print(f" Generated {sample_num}/{num_samples} samples...")
# Generate background-subtracted samples (simulates inference pipeline)
print("Generating background-subtracted samples...")
for i in range(n_subtracted):
num_iso = np.random.choice([1, 2, 3], p=[0.5, 0.3, 0.2])
isotopes = np.random.choice(isotope_pool, size=num_iso, replace=False)
activities = [np.random.uniform(0.1, 50.0) for _ in range(num_iso)]
duration = np.random.uniform(*duration_range)
sources = [
IsotopeSource(
isotope_name=name,
activity_bq=activity,
include_daughters=True
)
for name, activity in zip(isotopes, activities)
]
config = SpectrumConfig(
duration_seconds=duration,
sources=sources,
include_background=True,
subtract_background=True,
detector_name=detector_name,
measured_background_path=measured_background_path,
)
spectrum = generator.generate_spectrum(config)
save_spectrum(
spectrum,
spectra_dir,
save_image=True,
image_format='npy'
)
del spectrum
sample_num += 1
if sample_num % 100 == 0:
print(f" Generated {sample_num}/{num_samples} samples...")
total_generated = sample_num
print(f"\nGenerated {total_generated} samples total")
def main():
parser = argparse.ArgumentParser(
description="Generate synthetic gamma spectra for ML training"
)
parser.add_argument(
"--num_samples",
type=int,
default=10,
help="Number of samples to generate (default: 10)"
)
parser.add_argument(
"--output_dir",
type=str,
default="O:/master_data_collection/isotopev2",
help="Output directory (default: O:/master_data_collection/isotopev2)"
)
parser.add_argument(
"--detector",
type=str,
default="radiacode_103",
choices=list(RADIACODE_CONFIGS.keys()),
help="Detector to simulate (default: radiacode_103)"
)
parser.add_argument(
"--min_duration",
type=float,
default=60,
help="Minimum spectrum duration in seconds (default: 60)"
)
parser.add_argument(
"--max_duration",
type=float,
default=300,
help="Maximum spectrum duration in seconds (default: 300)"
)
parser.add_argument(
"--min_activity",
type=float,
default=1.0,
help="Minimum source activity in Bq (default: 1.0)"
)
parser.add_argument(
"--max_activity",
type=float,
default=100.0,
help="Maximum source activity in Bq (default: 100.0)"
)
parser.add_argument(
"--measured_background",
type=str,
default=None,
help="Path to measured background .npy file for hybrid training"
)
parser.add_argument(
"--save_png",
action="store_true",
help="Also save PNG images of spectra"
)
parser.add_argument(
"--seed",
type=int,
default=None,
help="Random seed for reproducibility"
)
args = parser.parse_args()
print("=" * 60)
print("Synthetic Gamma Spectra Generator")
print("=" * 60)
print(f"Samples to generate: {args.num_samples}")
print(f"Output directory: {args.output_dir}")
print(f"Detector: {args.detector}")
print(f"Duration range: {args.min_duration}-{args.max_duration} seconds")
print(f"Activity range: {args.min_activity}-{args.max_activity} Bq")
print(f"Random seed: {args.seed}")
print("=" * 60)
generate_training_batch(
num_samples=args.num_samples,
output_dir=Path(args.output_dir),
detector_name=args.detector,
duration_range=(args.min_duration, args.max_duration),
activity_range=(args.min_activity, args.max_activity),
save_png=args.save_png,
random_seed=args.seed,
measured_background_path=args.measured_background,
)
print("\n" + "=" * 60)
print("Generation complete!")
print("=" * 60)
# Count generated files
spectra_dir = Path(args.output_dir) / "spectra"
npy_files = list(spectra_dir.glob("spectrum_*.npy"))
print(f"\nTotal samples generated: {len(npy_files)}")
if __name__ == "__main__":
main()