Pipeline complet Radiacode 103 - identification automatique d'isotopes
- VegaModel CNN-FCNN 34.5M params, 82 isotopes, val acc 99.89% - Generation 50k spectres synthetiques 1D (12-24h durees) - Entrainement 100 epochs sur RTX 5060 Ti (CUDA 12.8, Blackwell) - Detection continue avec soustraction du background - Capture background 24h avec gestion deconnexion - Docker Compose : conteneur train (GPU) + detect (CPU/USB) - Modele entraite inclus (vega_best.pt, 395 Mo) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
526
train/vega_ml/synthetic_spectra/generate_spectra_v2.py
Normal file
526
train/vega_ml/synthetic_spectra/generate_spectra_v2.py
Normal file
@ -0,0 +1,526 @@
|
||||
"""
|
||||
Synthetic Spectra Generation Script v2
|
||||
|
||||
Improvements over v1:
|
||||
- Parallel generation using multiprocessing for 10x+ speedup
|
||||
- Class-balanced isotope sampling to ensure all isotopes are represented
|
||||
- More variable background noise (intensity, composition)
|
||||
- Memory efficient - doesn't accumulate spectra in memory
|
||||
- Progress bar with ETA
|
||||
|
||||
Usage:
|
||||
python -m synthetic_spectra.generate_spectra_v2 --num_samples 100000 --workers 8
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import json
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from functools import partial
|
||||
import time
|
||||
from typing import List, Tuple, Dict, Optional
|
||||
import os
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from synthetic_spectra.generator import (
|
||||
SpectrumGenerator,
|
||||
SpectrumConfig,
|
||||
IsotopeSource,
|
||||
GeneratedSpectrum,
|
||||
save_spectrum,
|
||||
)
|
||||
from synthetic_spectra.config import RADIACODE_CONFIGS
|
||||
from synthetic_spectra.ground_truth import get_isotope
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ISOTOPE POOL WITH CATEGORIES FOR BALANCED SAMPLING
|
||||
# =============================================================================
|
||||
|
||||
ISOTOPE_CATEGORIES = {
|
||||
"calibration": [
|
||||
"Cs-137", "Co-60", "Am-241", "Ba-133", "Eu-152", "Na-22", "Co-57", "Mn-54"
|
||||
],
|
||||
"medical": [
|
||||
"Tc-99m", "I-131", "I-123", "F-18", "Ga-67", "Ga-68", "In-111", "Lu-177", "Tl-201"
|
||||
],
|
||||
"industrial": [
|
||||
"Ir-192", "Se-75", "Zn-65", "Co-58", "Cd-109"
|
||||
],
|
||||
"natural_background": [
|
||||
"K-40", "Ra-226", "U-235", "U-238", "Th-232"
|
||||
],
|
||||
"decay_chain_u238": [
|
||||
"Pb-214", "Bi-214", "Pb-210"
|
||||
],
|
||||
"decay_chain_th232": [
|
||||
"Pb-212", "Bi-212", "Tl-208", "Ac-228", "Ra-224"
|
||||
],
|
||||
"reactor_fallout": [
|
||||
"Cs-134", "I-131", "Sr-90", "Zr-95", "Nb-95", "Ru-103", "Ce-141", "Ce-144", "Sb-125"
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_valid_isotope_pool() -> Tuple[List[str], Dict[str, List[str]]]:
|
||||
"""
|
||||
Get all valid isotopes (with gamma lines) organized by category.
|
||||
|
||||
Returns:
|
||||
Tuple of (flat_list, category_dict)
|
||||
"""
|
||||
valid_categories = {}
|
||||
all_isotopes = []
|
||||
|
||||
for category, isotopes in ISOTOPE_CATEGORIES.items():
|
||||
valid = []
|
||||
for name in isotopes:
|
||||
iso = get_isotope(name)
|
||||
if iso and len(iso.gamma_lines) > 0:
|
||||
valid.append(name)
|
||||
if name not in all_isotopes:
|
||||
all_isotopes.append(name)
|
||||
valid_categories[category] = valid
|
||||
|
||||
return all_isotopes, valid_categories
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# BACKGROUND VARIATION
|
||||
# =============================================================================
|
||||
|
||||
class BackgroundConfig:
|
||||
"""Configuration for varied background generation."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
intensity_min: float = 0.3,
|
||||
intensity_max: float = 3.0,
|
||||
k40_prob: float = 0.95, # Almost always present
|
||||
radon_prob: float = 0.8, # Usually present indoors
|
||||
thorium_prob: float = 0.6, # Sometimes present
|
||||
):
|
||||
self.intensity_min = intensity_min
|
||||
self.intensity_max = intensity_max
|
||||
self.k40_prob = k40_prob
|
||||
self.radon_prob = radon_prob
|
||||
self.thorium_prob = thorium_prob
|
||||
|
||||
def sample(self, rng: np.random.Generator) -> dict:
|
||||
"""Sample a random background configuration."""
|
||||
return {
|
||||
'background_cps': rng.uniform(self.intensity_min, self.intensity_max) * 5.0,
|
||||
'include_k40': rng.random() < self.k40_prob,
|
||||
'include_radon': rng.random() < self.radon_prob,
|
||||
'include_thorium': rng.random() < self.thorium_prob,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SINGLE SAMPLE GENERATION (for parallel workers)
|
||||
# =============================================================================
|
||||
|
||||
def generate_single_sample(
|
||||
args: Tuple[int, dict]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a single sample. Designed to be called by worker processes.
|
||||
|
||||
Args:
|
||||
args: Tuple of (sample_index, config_dict)
|
||||
|
||||
Returns:
|
||||
Sample ID if successful, None if failed
|
||||
"""
|
||||
sample_idx, config = args
|
||||
|
||||
try:
|
||||
# Create RNG with unique seed per sample
|
||||
rng = np.random.default_rng(config['base_seed'] + sample_idx)
|
||||
|
||||
# Initialize generator (each worker creates its own)
|
||||
detector_config = RADIACODE_CONFIGS.get(config['detector_name'])
|
||||
generator = SpectrumGenerator(detector_config=detector_config)
|
||||
|
||||
# Determine sample type based on distribution
|
||||
sample_type = config['sample_types'][sample_idx % len(config['sample_types'])]
|
||||
|
||||
# Get isotopes for this sample
|
||||
isotope_pool = config['isotope_pool']
|
||||
category_pools = config['category_pools']
|
||||
|
||||
# Sample background configuration
|
||||
bg_config = BackgroundConfig(
|
||||
intensity_min=config.get('bg_intensity_min', 0.3),
|
||||
intensity_max=config.get('bg_intensity_max', 3.0),
|
||||
)
|
||||
bg_params = bg_config.sample(rng)
|
||||
|
||||
# Random duration
|
||||
duration = rng.uniform(*config['duration_range'])
|
||||
|
||||
# Build sources based on sample type
|
||||
sources = []
|
||||
|
||||
if sample_type == 'single':
|
||||
# For class balance, cycle through isotopes
|
||||
isotope_idx = sample_idx % len(isotope_pool)
|
||||
isotope = isotope_pool[isotope_idx]
|
||||
activity = rng.uniform(*config['activity_range'])
|
||||
sources.append(IsotopeSource(
|
||||
isotope_name=isotope,
|
||||
activity_bq=activity,
|
||||
include_daughters=True
|
||||
))
|
||||
|
||||
elif sample_type == 'dual':
|
||||
# Pick from different categories for variety
|
||||
categories = list(category_pools.keys())
|
||||
cat1, cat2 = rng.choice(categories, size=2, replace=True)
|
||||
iso1 = rng.choice(category_pools[cat1]) if category_pools[cat1] else rng.choice(isotope_pool)
|
||||
iso2 = rng.choice(category_pools[cat2]) if category_pools[cat2] else rng.choice(isotope_pool)
|
||||
|
||||
# Ensure different isotopes
|
||||
while iso2 == iso1:
|
||||
iso2 = rng.choice(isotope_pool)
|
||||
|
||||
for iso in [iso1, iso2]:
|
||||
activity = rng.uniform(*config['activity_range'])
|
||||
sources.append(IsotopeSource(
|
||||
isotope_name=iso,
|
||||
activity_bq=activity,
|
||||
include_daughters=True
|
||||
))
|
||||
|
||||
elif sample_type == 'multi':
|
||||
# 3-5 isotopes from various categories
|
||||
num_isotopes = rng.integers(3, 6)
|
||||
selected = set()
|
||||
|
||||
for _ in range(num_isotopes):
|
||||
cat = rng.choice(list(category_pools.keys()))
|
||||
pool = category_pools[cat] if category_pools[cat] else isotope_pool
|
||||
iso = rng.choice(pool)
|
||||
|
||||
# Avoid duplicates
|
||||
attempts = 0
|
||||
while iso in selected and attempts < 10:
|
||||
iso = rng.choice(isotope_pool)
|
||||
attempts += 1
|
||||
|
||||
if iso not in selected:
|
||||
selected.add(iso)
|
||||
activity = rng.uniform(*config['activity_range'])
|
||||
sources.append(IsotopeSource(
|
||||
isotope_name=iso,
|
||||
activity_bq=activity,
|
||||
include_daughters=True
|
||||
))
|
||||
|
||||
# elif sample_type == 'background': sources stays empty
|
||||
|
||||
# Create spectrum config
|
||||
spec_config = SpectrumConfig(
|
||||
duration_seconds=duration,
|
||||
sources=sources,
|
||||
include_background=True,
|
||||
background_cps=bg_params['background_cps'],
|
||||
include_k40=bg_params['include_k40'],
|
||||
include_radon=bg_params['include_radon'],
|
||||
include_thorium=bg_params['include_thorium'],
|
||||
detector_name=config['detector_name'],
|
||||
)
|
||||
|
||||
# Generate spectrum
|
||||
spectrum = generator.generate_spectrum(spec_config)
|
||||
|
||||
# Save spectrum
|
||||
output_dir = Path(config['output_dir']) / "spectra"
|
||||
save_spectrum(
|
||||
spectrum,
|
||||
output_dir,
|
||||
save_image=True,
|
||||
image_format='npy' # Skip PNG for speed
|
||||
)
|
||||
|
||||
return spectrum.sample_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating sample {sample_idx}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MAIN BATCH GENERATION
|
||||
# =============================================================================
|
||||
|
||||
def generate_training_batch_parallel(
|
||||
num_samples: int,
|
||||
output_dir: Path,
|
||||
detector_name: str = "radiacode_103",
|
||||
duration_range: Tuple[float, float] = (60, 300),
|
||||
activity_range: Tuple[float, float] = (1.0, 100.0),
|
||||
single_isotope_fraction: float = 0.40,
|
||||
dual_isotope_fraction: float = 0.30,
|
||||
multi_isotope_fraction: float = 0.20,
|
||||
background_only_fraction: float = 0.10,
|
||||
bg_intensity_range: Tuple[float, float] = (0.3, 3.0),
|
||||
num_workers: int = None,
|
||||
random_seed: int = None,
|
||||
chunk_size: int = 100,
|
||||
) -> int:
|
||||
"""
|
||||
Generate training samples in parallel.
|
||||
|
||||
Args:
|
||||
num_samples: Total number of samples to generate
|
||||
output_dir: Output directory
|
||||
detector_name: Detector to simulate
|
||||
duration_range: (min, max) duration in seconds
|
||||
activity_range: (min, max) activity in Bq
|
||||
single_isotope_fraction: Fraction of single-isotope samples
|
||||
dual_isotope_fraction: Fraction of dual-isotope samples
|
||||
multi_isotope_fraction: Fraction of multi-isotope samples
|
||||
background_only_fraction: Fraction of background-only samples
|
||||
bg_intensity_range: (min, max) background intensity multiplier
|
||||
num_workers: Number of parallel workers (default: CPU count - 1)
|
||||
random_seed: Base random seed
|
||||
chunk_size: Number of samples per worker batch
|
||||
|
||||
Returns:
|
||||
Number of successfully generated samples
|
||||
"""
|
||||
if num_workers is None:
|
||||
num_workers = max(1, cpu_count() - 1)
|
||||
|
||||
if random_seed is None:
|
||||
random_seed = int(time.time())
|
||||
|
||||
# Create output directory
|
||||
output_dir = Path(output_dir)
|
||||
spectra_dir = output_dir / "spectra"
|
||||
spectra_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get isotope pools
|
||||
isotope_pool, category_pools = get_valid_isotope_pool()
|
||||
|
||||
print(f"Isotope pool: {len(isotope_pool)} isotopes across {len(category_pools)} categories")
|
||||
|
||||
# Calculate sample counts
|
||||
n_single = int(num_samples * single_isotope_fraction)
|
||||
n_dual = int(num_samples * dual_isotope_fraction)
|
||||
n_multi = int(num_samples * multi_isotope_fraction)
|
||||
n_background = int(num_samples * background_only_fraction)
|
||||
|
||||
# Adjust to hit exact count
|
||||
remaining = num_samples - (n_single + n_dual + n_multi + n_background)
|
||||
n_single += remaining
|
||||
|
||||
# Create sample type list (shuffled for variety in batches)
|
||||
sample_types = (
|
||||
['single'] * n_single +
|
||||
['dual'] * n_dual +
|
||||
['multi'] * n_multi +
|
||||
['background'] * n_background
|
||||
)
|
||||
np.random.seed(random_seed)
|
||||
np.random.shuffle(sample_types)
|
||||
|
||||
print(f"\nGenerating {num_samples} samples with {num_workers} workers:")
|
||||
print(f" - Single isotope: {n_single} ({single_isotope_fraction*100:.0f}%)")
|
||||
print(f" - Dual isotope: {n_dual} ({dual_isotope_fraction*100:.0f}%)")
|
||||
print(f" - Multi isotope: {n_multi} ({multi_isotope_fraction*100:.0f}%)")
|
||||
print(f" - Background only: {n_background} ({background_only_fraction*100:.0f}%)")
|
||||
print(f" - Background intensity: {bg_intensity_range[0]:.1f}x - {bg_intensity_range[1]:.1f}x")
|
||||
print()
|
||||
|
||||
# Shared config for all workers
|
||||
shared_config = {
|
||||
'detector_name': detector_name,
|
||||
'output_dir': str(output_dir),
|
||||
'duration_range': duration_range,
|
||||
'activity_range': activity_range,
|
||||
'bg_intensity_min': bg_intensity_range[0],
|
||||
'bg_intensity_max': bg_intensity_range[1],
|
||||
'base_seed': random_seed,
|
||||
'isotope_pool': isotope_pool,
|
||||
'category_pools': category_pools,
|
||||
'sample_types': sample_types,
|
||||
}
|
||||
|
||||
# Generate samples in parallel
|
||||
start_time = time.time()
|
||||
successful = 0
|
||||
|
||||
# Create argument list
|
||||
args_list = [(i, shared_config) for i in range(num_samples)]
|
||||
|
||||
# Use multiprocessing pool
|
||||
with Pool(processes=num_workers) as pool:
|
||||
# Process in chunks and report progress
|
||||
for i in range(0, num_samples, chunk_size):
|
||||
chunk_end = min(i + chunk_size, num_samples)
|
||||
chunk_args = args_list[i:chunk_end]
|
||||
|
||||
results = pool.map(generate_single_sample, chunk_args)
|
||||
|
||||
chunk_success = sum(1 for r in results if r is not None)
|
||||
successful += chunk_success
|
||||
|
||||
# Progress report
|
||||
elapsed = time.time() - start_time
|
||||
rate = successful / elapsed if elapsed > 0 else 0
|
||||
eta = (num_samples - successful) / rate if rate > 0 else 0
|
||||
|
||||
print(f" Progress: {successful}/{num_samples} ({100*successful/num_samples:.1f}%) | "
|
||||
f"Rate: {rate:.1f} samples/s | ETA: {eta/60:.1f} min")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Generation complete!")
|
||||
print(f" Total samples: {successful}/{num_samples}")
|
||||
print(f" Total time: {total_time/60:.1f} minutes")
|
||||
print(f" Average rate: {successful/total_time:.1f} samples/second")
|
||||
print(f"{'='*60}")
|
||||
|
||||
return successful
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate synthetic gamma spectra (v2 - parallel, balanced)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--num_samples", "-n",
|
||||
type=int,
|
||||
default=100000,
|
||||
help="Number of samples to generate (default: 100000)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output_dir", "-o",
|
||||
type=str,
|
||||
default="O:/master_data_collection/isotopev2",
|
||||
help="Output directory (default: O:/master_data_collection/isotopev2)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--detector",
|
||||
type=str,
|
||||
default="radiacode_103",
|
||||
choices=list(RADIACODE_CONFIGS.keys()),
|
||||
help="Detector to simulate (default: radiacode_103)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--workers", "-w",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Number of parallel workers (default: CPU count - 1)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--min_duration",
|
||||
type=float,
|
||||
default=60,
|
||||
help="Minimum duration in seconds (default: 60)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max_duration",
|
||||
type=float,
|
||||
default=300,
|
||||
help="Maximum duration in seconds (default: 300)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--min_activity",
|
||||
type=float,
|
||||
default=1.0,
|
||||
help="Minimum activity in Bq (default: 1.0)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max_activity",
|
||||
type=float,
|
||||
default=100.0,
|
||||
help="Maximum activity in Bq (default: 100.0)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--bg_min",
|
||||
type=float,
|
||||
default=0.3,
|
||||
help="Minimum background intensity multiplier (default: 0.3)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--bg_max",
|
||||
type=float,
|
||||
default=3.0,
|
||||
help="Maximum background intensity multiplier (default: 3.0)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--seed",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Random seed for reproducibility"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--chunk_size",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Samples per progress update (default: 100)"
|
||||
)
|
||||
|
||||
# Sample type fractions
|
||||
parser.add_argument("--single_frac", type=float, default=0.40)
|
||||
parser.add_argument("--dual_frac", type=float, default=0.30)
|
||||
parser.add_argument("--multi_frac", type=float, default=0.20)
|
||||
parser.add_argument("--bg_frac", type=float, default=0.10)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("=" * 60)
|
||||
print("Synthetic Gamma Spectra Generator v2")
|
||||
print(" - Parallel processing")
|
||||
print(" - Class-balanced sampling")
|
||||
print(" - Variable background")
|
||||
print("=" * 60)
|
||||
print(f"Samples: {args.num_samples:,}")
|
||||
print(f"Workers: {args.workers or (cpu_count() - 1)}")
|
||||
print(f"Output: {args.output_dir}")
|
||||
print(f"Detector: {args.detector}")
|
||||
print(f"Duration: {args.min_duration}-{args.max_duration}s")
|
||||
print(f"Activity: {args.min_activity}-{args.max_activity} Bq")
|
||||
print(f"Background: {args.bg_min}x-{args.bg_max}x")
|
||||
print("=" * 60)
|
||||
|
||||
generate_training_batch_parallel(
|
||||
num_samples=args.num_samples,
|
||||
output_dir=Path(args.output_dir),
|
||||
detector_name=args.detector,
|
||||
duration_range=(args.min_duration, args.max_duration),
|
||||
activity_range=(args.min_activity, args.max_activity),
|
||||
single_isotope_fraction=args.single_frac,
|
||||
dual_isotope_fraction=args.dual_frac,
|
||||
multi_isotope_fraction=args.multi_frac,
|
||||
background_only_fraction=args.bg_frac,
|
||||
bg_intensity_range=(args.bg_min, args.bg_max),
|
||||
num_workers=args.workers,
|
||||
random_seed=args.seed,
|
||||
chunk_size=args.chunk_size,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user