Fix: CsI(Tl) non-linear response correction + detector calibration overhaul
Root cause of Am-241 misidentification: the Radiacode 103's CsI(Tl) crystal shifts low-energy peaks upward (59.5 keV → 71.6 keV for Am-241) due to non-proportional scintillation response. The model was trained on theoretical peak positions and couldn't match the shifted real peaks. Changes: - Add inverse CsI(Tl) non-linear correction to inference pipeline (radiacode_monitor.py, web/config.py, test_detection.py) E_apparent = E_true * (1 + 0.37 * exp(-E_true/100)) Corrects channel mapping so peaks appear at theoretical energies - Fix energy calibration: DetectorConfig now uses E = 0.33 + 2.97*ch with 1023 channels, matching the real detector (was energy_min=20, skip_first_channel=True, different channel width) - Add K-escape peaks for CsI(Tl) iodine X-ray escape (E - 28.5 keV) - Add asymmetric peak shapes for low-energy tails (< 200 keV) - Add log1p normalization in dataset and inference (replaces max-norm) - Add background-subtracted training mode (subtract_background flag) - Add low-signal augmentation (0.01-5 Bq activities, 30-300s durations) - Update docker-compose.yml: batch_size=32, duration=30-300s, CSI_NONLINEAR_ALPHA/BETA env vars for detect and web - Web dashboard: apply CsI correction to displayed spectra - Various UI fixes (Chart.js width, zoom/pan, isotope lines) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@ -5,11 +5,11 @@ DATA_DIR="${DATA_DIR:-/data/synthetic}"
|
||||
MODEL_DIR="${MODEL_DIR:-/models}"
|
||||
NUM_SAMPLES="${NUM_SAMPLES:-50000}"
|
||||
EPOCHS="${EPOCHS:-100}"
|
||||
BATCH_SIZE="${BATCH_SIZE:-64}"
|
||||
BATCH_SIZE="${BATCH_SIZE:-32}"
|
||||
LEARNING_RATE="${LEARNING_RATE:-0.001}"
|
||||
DETECTOR="${DETECTOR:-radiacode_103}"
|
||||
MIN_DURATION="${MIN_DURATION:-43200}"
|
||||
MAX_DURATION="${MAX_DURATION:-86400}"
|
||||
MIN_DURATION="${MIN_DURATION:-30}"
|
||||
MAX_DURATION="${MAX_DURATION:-300}"
|
||||
SEED="${SEED:-42}"
|
||||
MEASURED_BACKGROUND_PATH="${MEASURED_BACKGROUND_PATH:-}"
|
||||
|
||||
@ -20,7 +20,7 @@ echo " Data dir : $DATA_DIR"
|
||||
echo " Model dir : $MODEL_DIR"
|
||||
echo " Samples : $NUM_SAMPLES"
|
||||
echo " Detector : $DETECTOR"
|
||||
echo " Duration : $MIN_DURATION-$MAX_DURATION s"
|
||||
echo " Duration : $MIN_DURATION-$MAX_DURATION s"
|
||||
echo " Epochs : $EPOCHS"
|
||||
echo " Batch size : $BATCH_SIZE"
|
||||
echo " Learning rate: $LEARNING_RATE"
|
||||
|
||||
@ -3,99 +3,77 @@ Detector Configuration Module
|
||||
|
||||
Contains configuration parameters for Radiacode gamma spectrometers
|
||||
and other detector settings.
|
||||
|
||||
Energy calibration matches the real Radiacode 103:
|
||||
E(keV) = 0.33 + 2.97 * channel_index
|
||||
Uses 1023 channels (channel 1023 is overflow, excluded).
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectorConfig:
|
||||
"""Configuration for a gamma spectrometer detector."""
|
||||
|
||||
name: str
|
||||
# Energy range in keV
|
||||
energy_min_kev: float = 20.0
|
||||
energy_max_kev: float = 3000.0
|
||||
|
||||
# Number of channels
|
||||
num_channels: int = 1024
|
||||
|
||||
# Some devices/software workflows treat channel 0 as unreliable/noisy.
|
||||
# This project models "usable" channels by skipping the first raw channel.
|
||||
skip_first_channel: bool = True
|
||||
|
||||
name: str
|
||||
# Energy calibration: E = calibration_offset + calibration_slope * channel
|
||||
# Must match the real detector calibration used in inference.
|
||||
calibration_offset_kev: float = 0.33
|
||||
calibration_slope_kev: float = 2.97
|
||||
|
||||
# Number of usable channels (1023 for Radiacode, channel 1023 is overflow)
|
||||
num_channels: int = 1023
|
||||
|
||||
# FWHM at 662 keV (Cs-137 reference) as fraction
|
||||
fwhm_at_662: float = 0.084 # 8.4%
|
||||
fwhm_uncertainty: float = 0.003 # ±0.3%
|
||||
|
||||
|
||||
# Detector crystal type
|
||||
crystal_type: str = "CsI(Tl)"
|
||||
|
||||
|
||||
# Sensitivity: counts per second at 1 μSv/h for Cs-137
|
||||
sensitivity_cps_per_usvh: float = 30.0
|
||||
|
||||
|
||||
# Detector volume in cm³
|
||||
detector_volume_cm3: float = 1.0
|
||||
|
||||
def get_channel_width_kev(self) -> float:
|
||||
"""Get the width of each channel in keV."""
|
||||
return (self.energy_max_kev - self.energy_min_kev) / self.num_channels
|
||||
|
||||
def get_energy_bins(self) -> np.ndarray:
|
||||
"""Get array of energy bin centers (keV) for the modeled usable channels."""
|
||||
channel_width = self.get_channel_width_kev()
|
||||
|
||||
# Raw device channels are assumed to be 0..num_channels-1 with centers:
|
||||
# E_center(k) = E_min + (k + 0.5) * channel_width
|
||||
# If we skip the first raw channel (k=0), we model usable channels k=1..num_channels-1.
|
||||
start_raw_channel = 1 if self.skip_first_channel else 0
|
||||
raw_channels = np.arange(start_raw_channel, self.num_channels, dtype=np.float64)
|
||||
return self.energy_min_kev + (raw_channels + 0.5) * channel_width
|
||||
|
||||
def get_energy_bins(self) -> np.ndarray:
|
||||
"""Get array of energy bin centers (keV) matching the real detector calibration."""
|
||||
channels = np.arange(self.num_channels, dtype=np.float64)
|
||||
return self.calibration_offset_kev + self.calibration_slope_kev * channels
|
||||
|
||||
def get_fwhm_at_energy(self, energy_kev: float) -> float:
|
||||
"""
|
||||
Calculate FWHM at a given energy.
|
||||
|
||||
|
||||
For scintillators, FWHM scales approximately as sqrt(E).
|
||||
FWHM(E) = FWHM_662 * sqrt(662/E) * E / 662 = FWHM_662 * sqrt(E/662)
|
||||
FWHM(E) = FWHM_662 * sqrt(E/662)
|
||||
"""
|
||||
return self.fwhm_at_662 * np.sqrt(662.0 / energy_kev) * energy_kev
|
||||
|
||||
return self.fwhm_at_662 * np.sqrt(energy_kev / 662.0) * 662.0
|
||||
|
||||
def get_sigma_at_energy(self, energy_kev: float) -> float:
|
||||
"""
|
||||
Get Gaussian sigma at a given energy.
|
||||
sigma = FWHM / (2 * sqrt(2 * ln(2))) ≈ FWHM / 2.355
|
||||
"""
|
||||
"""Get Gaussian sigma at a given energy."""
|
||||
fwhm = self.get_fwhm_at_energy(energy_kev)
|
||||
return fwhm / 2.355
|
||||
|
||||
|
||||
def energy_to_channel(self, energy_kev: float) -> int:
|
||||
"""Convert energy in keV to modeled usable channel index."""
|
||||
channel_width = self.get_channel_width_kev()
|
||||
raw_channel = int((energy_kev - self.energy_min_kev) / channel_width)
|
||||
if self.skip_first_channel:
|
||||
channel = raw_channel - 1
|
||||
max_channel = self.num_channels - 2
|
||||
else:
|
||||
channel = raw_channel
|
||||
max_channel = self.num_channels - 1
|
||||
return max(0, min(max_channel, channel))
|
||||
"""Convert energy in keV to channel index."""
|
||||
channel = int((energy_kev - self.calibration_offset_kev) / self.calibration_slope_kev)
|
||||
return max(0, min(self.num_channels - 1, channel))
|
||||
|
||||
def channel_to_energy(self, channel: int) -> float:
|
||||
"""Convert modeled usable channel index to energy bin center (keV)."""
|
||||
channel_width = self.get_channel_width_kev()
|
||||
raw_channel = channel + (1 if self.skip_first_channel else 0)
|
||||
raw_channel = max(0, min(self.num_channels - 1, int(raw_channel)))
|
||||
return self.energy_min_kev + (raw_channel + 0.5) * channel_width
|
||||
"""Convert channel index to energy in keV."""
|
||||
return self.calibration_offset_kev + self.calibration_slope_kev * channel
|
||||
|
||||
|
||||
# Pre-defined configurations for Radiacode devices
|
||||
RADIACODE_CONFIGS: Dict[str, DetectorConfig] = {
|
||||
"radiacode_101": DetectorConfig(
|
||||
name="Radiacode 101",
|
||||
fwhm_at_662=0.095, # 9.5% (original model, similar to 102)
|
||||
fwhm_at_662=0.095, # 9.5%
|
||||
fwhm_uncertainty=0.004,
|
||||
crystal_type="CsI(Tl)",
|
||||
sensitivity_cps_per_usvh=30.0,
|
||||
@ -119,8 +97,7 @@ RADIACODE_CONFIGS: Dict[str, DetectorConfig] = {
|
||||
),
|
||||
"radiacode_103g": DetectorConfig(
|
||||
name="Radiacode 103G",
|
||||
energy_min_kev=25.0, # Tech spec lists 0.025…3 MeV
|
||||
fwhm_at_662=0.074, # 7.4% (GAGG crystal - better resolution)
|
||||
fwhm_at_662=0.074, # 7.4% (GAGG crystal)
|
||||
fwhm_uncertainty=0.003,
|
||||
crystal_type="GAGG(Ce)",
|
||||
sensitivity_cps_per_usvh=40.0,
|
||||
@ -131,12 +108,12 @@ RADIACODE_CONFIGS: Dict[str, DetectorConfig] = {
|
||||
fwhm_at_662=0.084, # 8.4%
|
||||
fwhm_uncertainty=0.003,
|
||||
crystal_type="CsI(Tl)",
|
||||
sensitivity_cps_per_usvh=77.0, # Higher sensitivity
|
||||
detector_volume_cm3=2.5, # Larger crystal
|
||||
sensitivity_cps_per_usvh=77.0,
|
||||
detector_volume_cm3=2.5,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_default_config() -> DetectorConfig:
|
||||
"""Get the default detector configuration (Radiacode 103)."""
|
||||
return RADIACODE_CONFIGS["radiacode_103"]
|
||||
return RADIACODE_CONFIGS["radiacode_103"]
|
||||
@ -128,19 +128,21 @@ def generate_training_batch(
|
||||
num_samples: int,
|
||||
output_dir: Path,
|
||||
detector_name: str = "radiacode_103",
|
||||
duration_range: tuple = (60, 300),
|
||||
duration_range: tuple = (30, 300),
|
||||
activity_range: tuple = (1.0, 100.0),
|
||||
single_isotope_fraction: float = 0.4,
|
||||
dual_isotope_fraction: float = 0.3,
|
||||
multi_isotope_fraction: float = 0.2,
|
||||
single_isotope_fraction: float = 0.3,
|
||||
dual_isotope_fraction: float = 0.2,
|
||||
multi_isotope_fraction: float = 0.15,
|
||||
background_only_fraction: float = 0.1,
|
||||
low_signal_fraction: float = 0.15,
|
||||
subtracted_fraction: float = 0.1,
|
||||
save_png: bool = False,
|
||||
random_seed: int = None,
|
||||
measured_background_path: str = None,
|
||||
) -> list:
|
||||
"""
|
||||
Generate a batch of training samples with various configurations.
|
||||
|
||||
|
||||
Args:
|
||||
num_samples: Total number of samples to generate
|
||||
output_dir: Output directory for spectra and labels
|
||||
@ -151,9 +153,11 @@ def generate_training_batch(
|
||||
dual_isotope_fraction: Fraction of two-isotope samples
|
||||
multi_isotope_fraction: Fraction of 3+ isotope samples
|
||||
background_only_fraction: Fraction of background-only samples
|
||||
low_signal_fraction: Fraction of low-activity samples (0.01-5 Bq)
|
||||
subtracted_fraction: Fraction of background-subtracted samples
|
||||
save_png: Whether to also save PNG images
|
||||
random_seed: Random seed for reproducibility
|
||||
|
||||
|
||||
Returns:
|
||||
List of generated spectra
|
||||
"""
|
||||
@ -181,11 +185,13 @@ def generate_training_batch(
|
||||
n_dual = int(num_samples * dual_isotope_fraction)
|
||||
n_multi = int(num_samples * multi_isotope_fraction)
|
||||
n_background = int(num_samples * background_only_fraction)
|
||||
|
||||
n_low_signal = int(num_samples * low_signal_fraction)
|
||||
n_subtracted = int(num_samples * subtracted_fraction)
|
||||
|
||||
# Adjust to ensure we hit exactly num_samples
|
||||
remaining = num_samples - (n_single + n_dual + n_multi + n_background)
|
||||
remaining = num_samples - (n_single + n_dual + n_multi + n_background + n_low_signal + n_subtracted)
|
||||
n_single += remaining
|
||||
|
||||
|
||||
total_generated = 0
|
||||
|
||||
print(f"\nGenerating {num_samples} synthetic spectra:")
|
||||
@ -193,6 +199,8 @@ def generate_training_batch(
|
||||
print(f" - Dual isotope: {n_dual}")
|
||||
print(f" - Multi isotope (3+): {n_multi}")
|
||||
print(f" - Background only: {n_background}")
|
||||
print(f" - Low signal (0.01-5 Bq): {n_low_signal}")
|
||||
print(f" - Background-subtracted: {n_subtracted}")
|
||||
print()
|
||||
|
||||
sample_num = 0
|
||||
@ -314,6 +322,77 @@ def generate_training_batch(
|
||||
|
||||
sample_num += 1
|
||||
|
||||
# Generate low-signal samples (weak sources, 0.01-5 Bq)
|
||||
print("Generating low-signal samples...")
|
||||
for i in range(n_low_signal):
|
||||
isotope = np.random.choice(isotope_pool)
|
||||
activity = np.random.uniform(0.01, 5.0)
|
||||
duration = np.random.uniform(*duration_range)
|
||||
|
||||
spectrum = generate_single_isotope_sample(
|
||||
generator,
|
||||
isotope,
|
||||
activity,
|
||||
duration,
|
||||
detector_name=detector_name,
|
||||
include_background=True,
|
||||
measured_background_path=measured_background_path,
|
||||
)
|
||||
|
||||
save_spectrum(
|
||||
spectrum,
|
||||
spectra_dir,
|
||||
save_image=True,
|
||||
image_format='npy'
|
||||
)
|
||||
del spectrum
|
||||
|
||||
sample_num += 1
|
||||
|
||||
if sample_num % 100 == 0:
|
||||
print(f" Generated {sample_num}/{num_samples} samples...")
|
||||
|
||||
# Generate background-subtracted samples (simulates inference pipeline)
|
||||
print("Generating background-subtracted samples...")
|
||||
for i in range(n_subtracted):
|
||||
num_iso = np.random.choice([1, 2, 3], p=[0.5, 0.3, 0.2])
|
||||
isotopes = np.random.choice(isotope_pool, size=num_iso, replace=False)
|
||||
activities = [np.random.uniform(0.1, 50.0) for _ in range(num_iso)]
|
||||
duration = np.random.uniform(*duration_range)
|
||||
|
||||
sources = [
|
||||
IsotopeSource(
|
||||
isotope_name=name,
|
||||
activity_bq=activity,
|
||||
include_daughters=True
|
||||
)
|
||||
for name, activity in zip(isotopes, activities)
|
||||
]
|
||||
|
||||
config = SpectrumConfig(
|
||||
duration_seconds=duration,
|
||||
sources=sources,
|
||||
include_background=True,
|
||||
subtract_background=True,
|
||||
detector_name=detector_name,
|
||||
measured_background_path=measured_background_path,
|
||||
)
|
||||
|
||||
spectrum = generator.generate_spectrum(config)
|
||||
|
||||
save_spectrum(
|
||||
spectrum,
|
||||
spectra_dir,
|
||||
save_image=True,
|
||||
image_format='npy'
|
||||
)
|
||||
del spectrum
|
||||
|
||||
sample_num += 1
|
||||
|
||||
if sample_num % 100 == 0:
|
||||
print(f" Generated {sample_num}/{num_samples} samples...")
|
||||
|
||||
total_generated = sample_num
|
||||
print(f"\nGenerated {total_generated} samples total")
|
||||
|
||||
|
||||
@ -49,14 +49,14 @@ class IsotopeSource:
|
||||
@dataclass
|
||||
class SpectrumConfig:
|
||||
"""Configuration for a single spectrum generation."""
|
||||
|
||||
|
||||
# Time parameters
|
||||
duration_seconds: float = 60.0
|
||||
time_interval_seconds: float = 1.0 # Each row in the spectrogram
|
||||
|
||||
|
||||
# Sources to include
|
||||
sources: List[IsotopeSource] = field(default_factory=list)
|
||||
|
||||
|
||||
# Background options
|
||||
include_background: bool = True
|
||||
background_cps: float = 5.0
|
||||
@ -64,18 +64,25 @@ class SpectrumConfig:
|
||||
include_radon: bool = True
|
||||
include_thorium: bool = True
|
||||
measured_background_path: Optional[str] = None
|
||||
|
||||
|
||||
# Background subtraction simulation
|
||||
# When True, generates a second independent background realization
|
||||
# and subtracts it from the spectrum, then clips negatives to 0.
|
||||
# This simulates what happens at inference time (measured bg subtraction).
|
||||
subtract_background: bool = False
|
||||
|
||||
# Detector configuration
|
||||
detector_name: str = "radiacode_103"
|
||||
|
||||
|
||||
# Noise options
|
||||
apply_poisson: bool = True
|
||||
apply_electronic: bool = False
|
||||
electronic_noise_sigma: float = 0.5
|
||||
|
||||
# Normalization
|
||||
|
||||
# Normalization — "log1p" preserves relative signal levels,
|
||||
# works well after background subtraction where many channels are ~0.
|
||||
normalize: bool = True
|
||||
normalization_method: str = "max" # max, sum, log, sqrt
|
||||
normalization_method: str = "log1p" # max, sum, log, sqrt, log1p
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -272,7 +279,7 @@ class SpectrumGenerator:
|
||||
all_source_isotopes.extend(src_iso)
|
||||
all_background_isotopes.extend(bg_iso)
|
||||
|
||||
# Apply noise
|
||||
# Apply noise before any subtraction (Poisson noise on raw counts)
|
||||
if config.apply_poisson:
|
||||
spectrum = apply_poisson_noise(spectrum)
|
||||
|
||||
@ -282,6 +289,24 @@ class SpectrumGenerator:
|
||||
config.electronic_noise_sigma
|
||||
)
|
||||
|
||||
# Simulate background subtraction (matches inference pipeline)
|
||||
if config.subtract_background and config.include_background:
|
||||
# Generate an independent background realization
|
||||
bg_spectrum2, _ = generate_environmental_background(
|
||||
self.energy_bins,
|
||||
config.duration_seconds,
|
||||
background_cps=config.background_cps,
|
||||
include_k40=config.include_k40,
|
||||
include_radon=config.include_radon,
|
||||
include_thorium=config.include_thorium,
|
||||
detector_config=self.detector_config,
|
||||
measured_background_path=config.measured_background_path,
|
||||
)
|
||||
if config.apply_poisson:
|
||||
bg_spectrum2 = apply_poisson_noise(bg_spectrum2)
|
||||
# Subtract and clip — same as inference: net = clip(rate - bg_rate, 0, inf)
|
||||
spectrum = np.maximum(spectrum - bg_spectrum2, 0)
|
||||
|
||||
# Normalize if requested
|
||||
if config.normalize:
|
||||
spectrum = normalize_spectrum(spectrum, config.normalization_method)
|
||||
|
||||
@ -184,38 +184,148 @@ def calculate_expected_counts(
|
||||
return expected
|
||||
|
||||
|
||||
def _k_escape_fraction(energy_kev: float, detector_config: Optional[DetectorConfig] = None) -> float:
|
||||
"""
|
||||
Calculate K-escape peak fraction for CsI(Tl) detector.
|
||||
|
||||
For iodine K-shell (binding energy ~33.2 keV), when a gamma photon
|
||||
interacts with the K-shell, there's a chance the K X-ray escapes the
|
||||
crystal, producing a peak at E - E_Ka (~28.5 keV for I K-alpha).
|
||||
|
||||
The escape fraction decreases with energy as the photoelectric cross-section
|
||||
ratio (K-shell / total) decreases.
|
||||
|
||||
Args:
|
||||
energy_kev: Gamma energy in keV
|
||||
detector_config: Detector configuration
|
||||
|
||||
Returns:
|
||||
Fraction of photopeak counts that appear in the K-escape peak
|
||||
"""
|
||||
if energy_kev <= 33.2:
|
||||
return 0.0
|
||||
|
||||
# K-shell binding energy for iodine
|
||||
k_binding = 33.2 # keV
|
||||
|
||||
# K-escape fraction for CsI(Tl) detector
|
||||
# Based on measured data: ~35% at 60 keV, ~15% at 150 keV, ~5% at 662 keV
|
||||
# Model as: fraction = A * (1 - exp(-E/B)) where A and B are fit parameters
|
||||
# Fitted to typical CsI K-escape measurements
|
||||
fraction = 0.40 * (1.0 - np.exp(-(energy_kev - k_binding) / 80.0))
|
||||
|
||||
return float(np.clip(fraction, 0.0, 0.45))
|
||||
|
||||
|
||||
def _asymmetric_peak(
|
||||
energy_bins: np.ndarray,
|
||||
peak_energy: float,
|
||||
sigma: float,
|
||||
amplitude: float,
|
||||
tail_fraction: float = 0.0,
|
||||
tail_sigma_ratio: float = 3.0
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate an asymmetric peak using an exponentially-modified Gaussian.
|
||||
|
||||
For scintillation detectors at low energies, incomplete charge collection
|
||||
creates a low-energy tail. The tail fraction increases at lower energies.
|
||||
|
||||
Args:
|
||||
energy_bins: Array of energy bin centers (keV)
|
||||
peak_energy: Center energy of peak (keV)
|
||||
sigma: Gaussian sigma (keV)
|
||||
amplitude: Total peak area (counts)
|
||||
tail_fraction: Fraction of peak area in low-energy tail (0-0.5)
|
||||
tail_sigma_ratio: Ratio of tail sigma to peak sigma
|
||||
|
||||
Returns:
|
||||
Array of counts in each bin
|
||||
"""
|
||||
# Main Gaussian component
|
||||
main_peak = gaussian_peak(energy_bins, peak_energy, sigma, amplitude * (1 - tail_fraction))
|
||||
|
||||
if tail_fraction <= 0:
|
||||
return main_peak
|
||||
|
||||
# Low-energy tail: Gaussian shifted to lower energy with broader width
|
||||
tail_sigma = sigma * tail_sigma_ratio
|
||||
tail_energy = peak_energy - 2.0 * sigma # Tail centered 2 sigma below peak
|
||||
tail_peak = gaussian_peak(energy_bins, tail_energy, tail_sigma, amplitude * tail_fraction)
|
||||
|
||||
return main_peak + tail_peak
|
||||
|
||||
|
||||
def generate_peak_spectrum(
|
||||
energy_bins: np.ndarray,
|
||||
peak_params: PeakParameters,
|
||||
detector_config: Optional[DetectorConfig] = None
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate a single gamma peak with detector response.
|
||||
|
||||
Generate a single gamma peak with realistic CsI(Tl) detector response.
|
||||
|
||||
Includes:
|
||||
- Asymmetric peak shape (low-energy tail from incomplete charge collection)
|
||||
- K-escape peak (Iodine K-shell X-ray escape at E - 28.5 keV)
|
||||
- Energy-dependent resolution
|
||||
|
||||
Note: Peaks are placed at theoretical gamma energies. The non-linear
|
||||
CsI(Tl) response correction is applied in the inference pipeline
|
||||
(radiacode_monitor.py), not here, to keep training data detector-independent.
|
||||
|
||||
Args:
|
||||
energy_bins: Array of energy bin centers (keV)
|
||||
energy_bins: Array of energy bin centers (keV) matching detector calibration
|
||||
peak_params: Peak parameters
|
||||
detector_config: Detector configuration
|
||||
|
||||
|
||||
Returns:
|
||||
Array of expected counts in each bin (not yet Poisson sampled)
|
||||
"""
|
||||
if detector_config is None:
|
||||
detector_config = get_default_config()
|
||||
|
||||
|
||||
# Calculate expected counts
|
||||
amplitude = calculate_expected_counts(peak_params, detector_config)
|
||||
|
||||
if amplitude <= 0:
|
||||
total_amplitude = calculate_expected_counts(peak_params, detector_config)
|
||||
|
||||
if total_amplitude <= 0:
|
||||
return np.zeros_like(energy_bins)
|
||||
|
||||
|
||||
# Calculate peak width
|
||||
fwhm_kev = calculate_fwhm(peak_params.energy_kev, detector_config.fwhm_at_662)
|
||||
sigma = fwhm_to_sigma(fwhm_kev)
|
||||
|
||||
# Generate Gaussian peak
|
||||
peak = gaussian_peak(energy_bins, peak_params.energy_kev, sigma, amplitude)
|
||||
|
||||
|
||||
# Low-energy tail fraction: increases at lower energies due to
|
||||
# incomplete charge collection in CsI(Tl)
|
||||
if peak_params.energy_kev < 200:
|
||||
tail_frac = 0.15 * (1.0 - peak_params.energy_kev / 200.0)
|
||||
else:
|
||||
tail_frac = 0.0
|
||||
|
||||
# Generate main peak (asymmetric)
|
||||
peak = _asymmetric_peak(
|
||||
energy_bins, peak_params.energy_kev, sigma,
|
||||
total_amplitude, tail_fraction=tail_frac
|
||||
)
|
||||
|
||||
# K-escape peak for CsI(Tl)
|
||||
escape_frac = _k_escape_fraction(peak_params.energy_kev, detector_config)
|
||||
if escape_frac > 0:
|
||||
escape_energy = peak_params.energy_kev - 28.5 # I K-alpha at 28.5 keV
|
||||
if escape_energy > 20: # Only if above detection threshold
|
||||
escape_amplitude = total_amplitude * escape_frac
|
||||
# Reduce main peak amplitude
|
||||
peak = peak * (1 - escape_frac)
|
||||
|
||||
# Escape peak has slightly broader resolution
|
||||
escape_fwhm = calculate_fwhm(escape_energy, detector_config.fwhm_at_662)
|
||||
escape_sigma = fwhm_to_sigma(escape_fwhm) * 1.3
|
||||
|
||||
escape_peak = _asymmetric_peak(
|
||||
energy_bins, escape_energy, escape_sigma,
|
||||
escape_amplitude, tail_fraction=0.25
|
||||
)
|
||||
peak = peak + escape_peak
|
||||
|
||||
return peak
|
||||
|
||||
|
||||
@ -636,11 +746,11 @@ def apply_electronic_noise(
|
||||
|
||||
def normalize_spectrum(
|
||||
spectrum: np.ndarray,
|
||||
method: str = "max"
|
||||
method: str = "log1p"
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Normalize a spectrum for ML training.
|
||||
|
||||
|
||||
Args:
|
||||
spectrum: Raw count spectrum
|
||||
method: Normalization method
|
||||
@ -648,7 +758,8 @@ def normalize_spectrum(
|
||||
- "sum": Divide by total counts (probability distribution)
|
||||
- "log": Log transform then max normalize
|
||||
- "sqrt": Square root transform then max normalize
|
||||
|
||||
- "log1p": log(1+x) then max normalize (best for bg-subtracted spectra)
|
||||
|
||||
Returns:
|
||||
Normalized spectrum
|
||||
"""
|
||||
@ -657,7 +768,7 @@ def normalize_spectrum(
|
||||
if max_val > 0:
|
||||
return spectrum / max_val
|
||||
return spectrum
|
||||
|
||||
|
||||
elif method == "sum":
|
||||
total = spectrum.sum()
|
||||
if total > 0:
|
||||
@ -678,6 +789,13 @@ def normalize_spectrum(
|
||||
if max_val > 0:
|
||||
return sqrt_spec / max_val
|
||||
return sqrt_spec
|
||||
|
||||
|
||||
elif method == "log1p":
|
||||
log_spec = np.log1p(np.maximum(spectrum, 0))
|
||||
max_val = log_spec.max()
|
||||
if max_val > 0:
|
||||
return log_spec / max_val
|
||||
return log_spec
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown normalization method: {method}")
|
||||
|
||||
@ -14,7 +14,12 @@ Features:
|
||||
|
||||
from .model import VegaModel, VegaConfig
|
||||
from .dataset import SpectrumDataset, create_data_loaders
|
||||
from .train import train_vega, VegaTrainer
|
||||
|
||||
def __getattr__(name):
|
||||
if name in ('train_vega', 'VegaTrainer'):
|
||||
from .train import train_vega, VegaTrainer
|
||||
return locals()[name]
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
__all__ = [
|
||||
'VegaModel',
|
||||
|
||||
@ -31,24 +31,38 @@ class SpectrumSample:
|
||||
detector: str
|
||||
|
||||
|
||||
def normalize_log1p(spectrum: np.ndarray) -> np.ndarray:
|
||||
"""Log1p normalization: log(1 + x) / max(log(1 + x)).
|
||||
|
||||
Preserves relative signal levels across channels, works well when
|
||||
many channels are zero (e.g. after background subtraction).
|
||||
"""
|
||||
log_spec = np.log1p(np.maximum(spectrum, 0))
|
||||
max_val = log_spec.max()
|
||||
if max_val > 0:
|
||||
return log_spec / max_val
|
||||
return log_spec
|
||||
|
||||
|
||||
class SpectrumDataset(Dataset):
|
||||
"""
|
||||
PyTorch Dataset for synthetic gamma spectra.
|
||||
|
||||
|
||||
Loads spectra from numpy files and their labels from JSON files.
|
||||
Supports both individual JSON files per sample (efficient for large datasets)
|
||||
and combined labels.json (legacy format).
|
||||
|
||||
|
||||
Converts to tensors suitable for the Vega model.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_dir: Path,
|
||||
isotope_index: Optional[IsotopeIndex] = None,
|
||||
max_activity_bq: float = 1000.0,
|
||||
collapse_time: bool = True,
|
||||
transform=None
|
||||
transform=None,
|
||||
normalization: str = "log1p"
|
||||
):
|
||||
"""
|
||||
Initialize the dataset.
|
||||
@ -66,6 +80,7 @@ class SpectrumDataset(Dataset):
|
||||
self.max_activity_bq = max_activity_bq
|
||||
self.collapse_time = collapse_time
|
||||
self.transform = transform
|
||||
self.normalization = normalization
|
||||
|
||||
# Detect label format and load sample list
|
||||
self.use_individual_labels = self._detect_label_format()
|
||||
@ -156,7 +171,15 @@ class SpectrumDataset(Dataset):
|
||||
if self.collapse_time and spectrum.ndim == 2:
|
||||
# Average across time intervals to get single spectrum
|
||||
spectrum = spectrum.mean(axis=0)
|
||||
|
||||
|
||||
# Normalize spectrum
|
||||
if self.normalization == "log1p":
|
||||
spectrum = normalize_log1p(spectrum)
|
||||
elif self.normalization == "max":
|
||||
max_val = spectrum.max()
|
||||
if max_val > 0:
|
||||
spectrum = spectrum / max_val
|
||||
|
||||
# Convert to tensor
|
||||
spectrum_tensor = torch.tensor(spectrum, dtype=torch.float32)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user