- VegaModel CNN-FCNN 34.5M params, 82 isotopes, val acc 99.89% - Generation 50k spectres synthetiques 1D (12-24h durees) - Entrainement 100 epochs sur RTX 5060 Ti (CUDA 12.8, Blackwell) - Detection continue avec soustraction du background - Capture background 24h avec gestion deconnexion - Docker Compose : conteneur train (GPU) + detect (CPU/USB) - Modele entraite inclus (vega_best.pt, 395 Mo) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
527 lines
18 KiB
Python
527 lines
18 KiB
Python
"""
|
|
Synthetic Spectra Generation Script v2
|
|
|
|
Improvements over v1:
|
|
- Parallel generation using multiprocessing for 10x+ speedup
|
|
- Class-balanced isotope sampling to ensure all isotopes are represented
|
|
- More variable background noise (intensity, composition)
|
|
- Memory efficient - doesn't accumulate spectra in memory
|
|
- Progress bar with ETA
|
|
|
|
Usage:
|
|
python -m synthetic_spectra.generate_spectra_v2 --num_samples 100000 --workers 8
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
import json
|
|
from datetime import datetime
|
|
import numpy as np
|
|
from multiprocessing import Pool, cpu_count
|
|
from functools import partial
|
|
import time
|
|
from typing import List, Tuple, Dict, Optional
|
|
import os
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from synthetic_spectra.generator import (
|
|
SpectrumGenerator,
|
|
SpectrumConfig,
|
|
IsotopeSource,
|
|
GeneratedSpectrum,
|
|
save_spectrum,
|
|
)
|
|
from synthetic_spectra.config import RADIACODE_CONFIGS
|
|
from synthetic_spectra.ground_truth import get_isotope
|
|
|
|
|
|
# =============================================================================
|
|
# ISOTOPE POOL WITH CATEGORIES FOR BALANCED SAMPLING
|
|
# =============================================================================
|
|
|
|
ISOTOPE_CATEGORIES = {
|
|
"calibration": [
|
|
"Cs-137", "Co-60", "Am-241", "Ba-133", "Eu-152", "Na-22", "Co-57", "Mn-54"
|
|
],
|
|
"medical": [
|
|
"Tc-99m", "I-131", "I-123", "F-18", "Ga-67", "Ga-68", "In-111", "Lu-177", "Tl-201"
|
|
],
|
|
"industrial": [
|
|
"Ir-192", "Se-75", "Zn-65", "Co-58", "Cd-109"
|
|
],
|
|
"natural_background": [
|
|
"K-40", "Ra-226", "U-235", "U-238", "Th-232"
|
|
],
|
|
"decay_chain_u238": [
|
|
"Pb-214", "Bi-214", "Pb-210"
|
|
],
|
|
"decay_chain_th232": [
|
|
"Pb-212", "Bi-212", "Tl-208", "Ac-228", "Ra-224"
|
|
],
|
|
"reactor_fallout": [
|
|
"Cs-134", "I-131", "Sr-90", "Zr-95", "Nb-95", "Ru-103", "Ce-141", "Ce-144", "Sb-125"
|
|
],
|
|
}
|
|
|
|
|
|
def get_valid_isotope_pool() -> Tuple[List[str], Dict[str, List[str]]]:
|
|
"""
|
|
Get all valid isotopes (with gamma lines) organized by category.
|
|
|
|
Returns:
|
|
Tuple of (flat_list, category_dict)
|
|
"""
|
|
valid_categories = {}
|
|
all_isotopes = []
|
|
|
|
for category, isotopes in ISOTOPE_CATEGORIES.items():
|
|
valid = []
|
|
for name in isotopes:
|
|
iso = get_isotope(name)
|
|
if iso and len(iso.gamma_lines) > 0:
|
|
valid.append(name)
|
|
if name not in all_isotopes:
|
|
all_isotopes.append(name)
|
|
valid_categories[category] = valid
|
|
|
|
return all_isotopes, valid_categories
|
|
|
|
|
|
# =============================================================================
|
|
# BACKGROUND VARIATION
|
|
# =============================================================================
|
|
|
|
class BackgroundConfig:
|
|
"""Configuration for varied background generation."""
|
|
|
|
def __init__(
|
|
self,
|
|
intensity_min: float = 0.3,
|
|
intensity_max: float = 3.0,
|
|
k40_prob: float = 0.95, # Almost always present
|
|
radon_prob: float = 0.8, # Usually present indoors
|
|
thorium_prob: float = 0.6, # Sometimes present
|
|
):
|
|
self.intensity_min = intensity_min
|
|
self.intensity_max = intensity_max
|
|
self.k40_prob = k40_prob
|
|
self.radon_prob = radon_prob
|
|
self.thorium_prob = thorium_prob
|
|
|
|
def sample(self, rng: np.random.Generator) -> dict:
|
|
"""Sample a random background configuration."""
|
|
return {
|
|
'background_cps': rng.uniform(self.intensity_min, self.intensity_max) * 5.0,
|
|
'include_k40': rng.random() < self.k40_prob,
|
|
'include_radon': rng.random() < self.radon_prob,
|
|
'include_thorium': rng.random() < self.thorium_prob,
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# SINGLE SAMPLE GENERATION (for parallel workers)
|
|
# =============================================================================
|
|
|
|
def generate_single_sample(
|
|
args: Tuple[int, dict]
|
|
) -> Optional[str]:
|
|
"""
|
|
Generate a single sample. Designed to be called by worker processes.
|
|
|
|
Args:
|
|
args: Tuple of (sample_index, config_dict)
|
|
|
|
Returns:
|
|
Sample ID if successful, None if failed
|
|
"""
|
|
sample_idx, config = args
|
|
|
|
try:
|
|
# Create RNG with unique seed per sample
|
|
rng = np.random.default_rng(config['base_seed'] + sample_idx)
|
|
|
|
# Initialize generator (each worker creates its own)
|
|
detector_config = RADIACODE_CONFIGS.get(config['detector_name'])
|
|
generator = SpectrumGenerator(detector_config=detector_config)
|
|
|
|
# Determine sample type based on distribution
|
|
sample_type = config['sample_types'][sample_idx % len(config['sample_types'])]
|
|
|
|
# Get isotopes for this sample
|
|
isotope_pool = config['isotope_pool']
|
|
category_pools = config['category_pools']
|
|
|
|
# Sample background configuration
|
|
bg_config = BackgroundConfig(
|
|
intensity_min=config.get('bg_intensity_min', 0.3),
|
|
intensity_max=config.get('bg_intensity_max', 3.0),
|
|
)
|
|
bg_params = bg_config.sample(rng)
|
|
|
|
# Random duration
|
|
duration = rng.uniform(*config['duration_range'])
|
|
|
|
# Build sources based on sample type
|
|
sources = []
|
|
|
|
if sample_type == 'single':
|
|
# For class balance, cycle through isotopes
|
|
isotope_idx = sample_idx % len(isotope_pool)
|
|
isotope = isotope_pool[isotope_idx]
|
|
activity = rng.uniform(*config['activity_range'])
|
|
sources.append(IsotopeSource(
|
|
isotope_name=isotope,
|
|
activity_bq=activity,
|
|
include_daughters=True
|
|
))
|
|
|
|
elif sample_type == 'dual':
|
|
# Pick from different categories for variety
|
|
categories = list(category_pools.keys())
|
|
cat1, cat2 = rng.choice(categories, size=2, replace=True)
|
|
iso1 = rng.choice(category_pools[cat1]) if category_pools[cat1] else rng.choice(isotope_pool)
|
|
iso2 = rng.choice(category_pools[cat2]) if category_pools[cat2] else rng.choice(isotope_pool)
|
|
|
|
# Ensure different isotopes
|
|
while iso2 == iso1:
|
|
iso2 = rng.choice(isotope_pool)
|
|
|
|
for iso in [iso1, iso2]:
|
|
activity = rng.uniform(*config['activity_range'])
|
|
sources.append(IsotopeSource(
|
|
isotope_name=iso,
|
|
activity_bq=activity,
|
|
include_daughters=True
|
|
))
|
|
|
|
elif sample_type == 'multi':
|
|
# 3-5 isotopes from various categories
|
|
num_isotopes = rng.integers(3, 6)
|
|
selected = set()
|
|
|
|
for _ in range(num_isotopes):
|
|
cat = rng.choice(list(category_pools.keys()))
|
|
pool = category_pools[cat] if category_pools[cat] else isotope_pool
|
|
iso = rng.choice(pool)
|
|
|
|
# Avoid duplicates
|
|
attempts = 0
|
|
while iso in selected and attempts < 10:
|
|
iso = rng.choice(isotope_pool)
|
|
attempts += 1
|
|
|
|
if iso not in selected:
|
|
selected.add(iso)
|
|
activity = rng.uniform(*config['activity_range'])
|
|
sources.append(IsotopeSource(
|
|
isotope_name=iso,
|
|
activity_bq=activity,
|
|
include_daughters=True
|
|
))
|
|
|
|
# elif sample_type == 'background': sources stays empty
|
|
|
|
# Create spectrum config
|
|
spec_config = SpectrumConfig(
|
|
duration_seconds=duration,
|
|
sources=sources,
|
|
include_background=True,
|
|
background_cps=bg_params['background_cps'],
|
|
include_k40=bg_params['include_k40'],
|
|
include_radon=bg_params['include_radon'],
|
|
include_thorium=bg_params['include_thorium'],
|
|
detector_name=config['detector_name'],
|
|
)
|
|
|
|
# Generate spectrum
|
|
spectrum = generator.generate_spectrum(spec_config)
|
|
|
|
# Save spectrum
|
|
output_dir = Path(config['output_dir']) / "spectra"
|
|
save_spectrum(
|
|
spectrum,
|
|
output_dir,
|
|
save_image=True,
|
|
image_format='npy' # Skip PNG for speed
|
|
)
|
|
|
|
return spectrum.sample_id
|
|
|
|
except Exception as e:
|
|
print(f"Error generating sample {sample_idx}: {e}")
|
|
return None
|
|
|
|
|
|
# =============================================================================
|
|
# MAIN BATCH GENERATION
|
|
# =============================================================================
|
|
|
|
def generate_training_batch_parallel(
|
|
num_samples: int,
|
|
output_dir: Path,
|
|
detector_name: str = "radiacode_103",
|
|
duration_range: Tuple[float, float] = (60, 300),
|
|
activity_range: Tuple[float, float] = (1.0, 100.0),
|
|
single_isotope_fraction: float = 0.40,
|
|
dual_isotope_fraction: float = 0.30,
|
|
multi_isotope_fraction: float = 0.20,
|
|
background_only_fraction: float = 0.10,
|
|
bg_intensity_range: Tuple[float, float] = (0.3, 3.0),
|
|
num_workers: int = None,
|
|
random_seed: int = None,
|
|
chunk_size: int = 100,
|
|
) -> int:
|
|
"""
|
|
Generate training samples in parallel.
|
|
|
|
Args:
|
|
num_samples: Total number of samples to generate
|
|
output_dir: Output directory
|
|
detector_name: Detector to simulate
|
|
duration_range: (min, max) duration in seconds
|
|
activity_range: (min, max) activity in Bq
|
|
single_isotope_fraction: Fraction of single-isotope samples
|
|
dual_isotope_fraction: Fraction of dual-isotope samples
|
|
multi_isotope_fraction: Fraction of multi-isotope samples
|
|
background_only_fraction: Fraction of background-only samples
|
|
bg_intensity_range: (min, max) background intensity multiplier
|
|
num_workers: Number of parallel workers (default: CPU count - 1)
|
|
random_seed: Base random seed
|
|
chunk_size: Number of samples per worker batch
|
|
|
|
Returns:
|
|
Number of successfully generated samples
|
|
"""
|
|
if num_workers is None:
|
|
num_workers = max(1, cpu_count() - 1)
|
|
|
|
if random_seed is None:
|
|
random_seed = int(time.time())
|
|
|
|
# Create output directory
|
|
output_dir = Path(output_dir)
|
|
spectra_dir = output_dir / "spectra"
|
|
spectra_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Get isotope pools
|
|
isotope_pool, category_pools = get_valid_isotope_pool()
|
|
|
|
print(f"Isotope pool: {len(isotope_pool)} isotopes across {len(category_pools)} categories")
|
|
|
|
# Calculate sample counts
|
|
n_single = int(num_samples * single_isotope_fraction)
|
|
n_dual = int(num_samples * dual_isotope_fraction)
|
|
n_multi = int(num_samples * multi_isotope_fraction)
|
|
n_background = int(num_samples * background_only_fraction)
|
|
|
|
# Adjust to hit exact count
|
|
remaining = num_samples - (n_single + n_dual + n_multi + n_background)
|
|
n_single += remaining
|
|
|
|
# Create sample type list (shuffled for variety in batches)
|
|
sample_types = (
|
|
['single'] * n_single +
|
|
['dual'] * n_dual +
|
|
['multi'] * n_multi +
|
|
['background'] * n_background
|
|
)
|
|
np.random.seed(random_seed)
|
|
np.random.shuffle(sample_types)
|
|
|
|
print(f"\nGenerating {num_samples} samples with {num_workers} workers:")
|
|
print(f" - Single isotope: {n_single} ({single_isotope_fraction*100:.0f}%)")
|
|
print(f" - Dual isotope: {n_dual} ({dual_isotope_fraction*100:.0f}%)")
|
|
print(f" - Multi isotope: {n_multi} ({multi_isotope_fraction*100:.0f}%)")
|
|
print(f" - Background only: {n_background} ({background_only_fraction*100:.0f}%)")
|
|
print(f" - Background intensity: {bg_intensity_range[0]:.1f}x - {bg_intensity_range[1]:.1f}x")
|
|
print()
|
|
|
|
# Shared config for all workers
|
|
shared_config = {
|
|
'detector_name': detector_name,
|
|
'output_dir': str(output_dir),
|
|
'duration_range': duration_range,
|
|
'activity_range': activity_range,
|
|
'bg_intensity_min': bg_intensity_range[0],
|
|
'bg_intensity_max': bg_intensity_range[1],
|
|
'base_seed': random_seed,
|
|
'isotope_pool': isotope_pool,
|
|
'category_pools': category_pools,
|
|
'sample_types': sample_types,
|
|
}
|
|
|
|
# Generate samples in parallel
|
|
start_time = time.time()
|
|
successful = 0
|
|
|
|
# Create argument list
|
|
args_list = [(i, shared_config) for i in range(num_samples)]
|
|
|
|
# Use multiprocessing pool
|
|
with Pool(processes=num_workers) as pool:
|
|
# Process in chunks and report progress
|
|
for i in range(0, num_samples, chunk_size):
|
|
chunk_end = min(i + chunk_size, num_samples)
|
|
chunk_args = args_list[i:chunk_end]
|
|
|
|
results = pool.map(generate_single_sample, chunk_args)
|
|
|
|
chunk_success = sum(1 for r in results if r is not None)
|
|
successful += chunk_success
|
|
|
|
# Progress report
|
|
elapsed = time.time() - start_time
|
|
rate = successful / elapsed if elapsed > 0 else 0
|
|
eta = (num_samples - successful) / rate if rate > 0 else 0
|
|
|
|
print(f" Progress: {successful}/{num_samples} ({100*successful/num_samples:.1f}%) | "
|
|
f"Rate: {rate:.1f} samples/s | ETA: {eta/60:.1f} min")
|
|
|
|
total_time = time.time() - start_time
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Generation complete!")
|
|
print(f" Total samples: {successful}/{num_samples}")
|
|
print(f" Total time: {total_time/60:.1f} minutes")
|
|
print(f" Average rate: {successful/total_time:.1f} samples/second")
|
|
print(f"{'='*60}")
|
|
|
|
return successful
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate synthetic gamma spectra (v2 - parallel, balanced)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--num_samples", "-n",
|
|
type=int,
|
|
default=100000,
|
|
help="Number of samples to generate (default: 100000)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output_dir", "-o",
|
|
type=str,
|
|
default="O:/master_data_collection/isotopev2",
|
|
help="Output directory (default: O:/master_data_collection/isotopev2)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--detector",
|
|
type=str,
|
|
default="radiacode_103",
|
|
choices=list(RADIACODE_CONFIGS.keys()),
|
|
help="Detector to simulate (default: radiacode_103)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--workers", "-w",
|
|
type=int,
|
|
default=None,
|
|
help="Number of parallel workers (default: CPU count - 1)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--min_duration",
|
|
type=float,
|
|
default=60,
|
|
help="Minimum duration in seconds (default: 60)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--max_duration",
|
|
type=float,
|
|
default=300,
|
|
help="Maximum duration in seconds (default: 300)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--min_activity",
|
|
type=float,
|
|
default=1.0,
|
|
help="Minimum activity in Bq (default: 1.0)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--max_activity",
|
|
type=float,
|
|
default=100.0,
|
|
help="Maximum activity in Bq (default: 100.0)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--bg_min",
|
|
type=float,
|
|
default=0.3,
|
|
help="Minimum background intensity multiplier (default: 0.3)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--bg_max",
|
|
type=float,
|
|
default=3.0,
|
|
help="Maximum background intensity multiplier (default: 3.0)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--seed",
|
|
type=int,
|
|
default=None,
|
|
help="Random seed for reproducibility"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--chunk_size",
|
|
type=int,
|
|
default=100,
|
|
help="Samples per progress update (default: 100)"
|
|
)
|
|
|
|
# Sample type fractions
|
|
parser.add_argument("--single_frac", type=float, default=0.40)
|
|
parser.add_argument("--dual_frac", type=float, default=0.30)
|
|
parser.add_argument("--multi_frac", type=float, default=0.20)
|
|
parser.add_argument("--bg_frac", type=float, default=0.10)
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 60)
|
|
print("Synthetic Gamma Spectra Generator v2")
|
|
print(" - Parallel processing")
|
|
print(" - Class-balanced sampling")
|
|
print(" - Variable background")
|
|
print("=" * 60)
|
|
print(f"Samples: {args.num_samples:,}")
|
|
print(f"Workers: {args.workers or (cpu_count() - 1)}")
|
|
print(f"Output: {args.output_dir}")
|
|
print(f"Detector: {args.detector}")
|
|
print(f"Duration: {args.min_duration}-{args.max_duration}s")
|
|
print(f"Activity: {args.min_activity}-{args.max_activity} Bq")
|
|
print(f"Background: {args.bg_min}x-{args.bg_max}x")
|
|
print("=" * 60)
|
|
|
|
generate_training_batch_parallel(
|
|
num_samples=args.num_samples,
|
|
output_dir=Path(args.output_dir),
|
|
detector_name=args.detector,
|
|
duration_range=(args.min_duration, args.max_duration),
|
|
activity_range=(args.min_activity, args.max_activity),
|
|
single_isotope_fraction=args.single_frac,
|
|
dual_isotope_fraction=args.dual_frac,
|
|
multi_isotope_fraction=args.multi_frac,
|
|
background_only_fraction=args.bg_frac,
|
|
bg_intensity_range=(args.bg_min, args.bg_max),
|
|
num_workers=args.workers,
|
|
random_seed=args.seed,
|
|
chunk_size=args.chunk_size,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|