From c891c6b23af1fc86decf30d6363ba4ed2db3cf26 Mon Sep 17 00:00:00 2001 From: Jacquin Antoine Date: Sun, 10 May 2026 01:04:02 +0200 Subject: [PATCH] Fix CUDA fork: spawn multiprocessing + graceful GPU fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - multiprocessing.set_start_method('spawn') pour éviter la corruption du contexte CUDA dans les processus forkés - to_gpu() et xp_*_filter() attrapent les erreurs CUDA et tombent sur CPU au lieu de crasher - _gpu_available() vérifie que le GPU est utilisable avant chaque opération - gpu_cleanup() attrape les exceptions au cas où le GPU serait indisponible Co-Authored-By: Claude Opus 4.6 --- lidar_pipeline/gpu.py | 75 +++++++++++++++++++++++++++++--------- lidar_pipeline/pipeline.py | 7 ++++ 2 files changed, 64 insertions(+), 18 deletions(-) diff --git a/lidar_pipeline/gpu.py b/lidar_pipeline/gpu.py index 5709bf7..151bc77 100644 --- a/lidar_pipeline/gpu.py +++ b/lidar_pipeline/gpu.py @@ -3,6 +3,9 @@ Provides CuPy/numpy abstraction layer. If CuPy is available and a CUDA GPU is detected, array operations are accelerated on the GPU. Otherwise, all operations fall back to numpy/scipy on CPU. + +GPU errors (e.g. in forked subprocesses) are caught gracefully and +cause an automatic fallback to CPU for the current operation. """ import logging @@ -16,64 +19,100 @@ HAS_GPU = False _gpu_name = None _gpu_mem_gb = 0 _xp = np # Default: CPU +_cp = None # cupy module (or None) +_cp_ndimage = None # cupyx.scipy.ndimage (or None) try: - import cupy as cp - import cupyx.scipy.ndimage as cp_ndimage + import cupy as _cupy + import cupyx.scipy.ndimage as _cupy_ndimage - _gpu_info = cp.cuda.runtime.getDeviceProperties(0) + _gpu_info = _cupy.cuda.runtime.getDeviceProperties(0) _gpu_name = _gpu_info['name'].decode() if isinstance(_gpu_info['name'], bytes) else str(_gpu_info['name']) _gpu_mem_gb = _gpu_info['totalGlobalMem'] // (1024 ** 3) HAS_GPU = True - _xp = cp + _xp = _cupy + _cp = _cupy + _cp_ndimage = _cupy_ndimage except (ImportError, Exception): pass +def _gpu_available(): + """Check if GPU is usable right now (may fail in forked subprocesses).""" + if not HAS_GPU: + return False + try: + _cp.cuda.runtime.getDevice() + return True + except Exception: + return False + + def log_gpu_status(): """Log GPU detection result. Called after logging is configured.""" - if HAS_GPU: + if _gpu_available(): logger.info(f"GPU détectée: {_gpu_name} ({_gpu_mem_gb} Go VRAM)") else: logger.info("Pas de GPU — mode CPU uniquement") def to_gpu(arr): - """Send array to GPU if available, otherwise return as float64 numpy.""" - if HAS_GPU: - return cp.asarray(arr.astype(np.float64)) + """Send array to GPU if available, otherwise return as float64 numpy. + + Falls back to CPU if GPU is unavailable (e.g. in forked subprocess). + """ + if _gpu_available(): + try: + return _cp.asarray(arr.astype(np.float64)) + except Exception: + pass # Fall back to CPU return arr.astype(np.float64) def to_cpu(arr): """Bring array back to CPU (numpy). No-op if already on CPU.""" - if HAS_GPU and isinstance(arr, cp.ndarray): - return cp.asnumpy(arr) + if _cp is not None and isinstance(arr, _cp.ndarray): + try: + return _cp.asnumpy(arr) + except Exception: + pass # Already on CPU or GPU error return arr def xp_gaussian_filter(arr, sigma): """Gaussian filter — uses GPU if array is on GPU, CPU otherwise.""" - if HAS_GPU and isinstance(arr, cp.ndarray): - return cp_ndimage.gaussian_filter(arr, sigma) + if _cp is not None and isinstance(arr, _cp.ndarray): + try: + return _cp_ndimage.gaussian_filter(arr, sigma) + except Exception: + arr = to_cpu(arr) return ndimage.gaussian_filter(arr, sigma) def xp_uniform_filter(arr, size): """Uniform filter — uses GPU if array is on GPU, CPU otherwise.""" - if HAS_GPU and isinstance(arr, cp.ndarray): - return cp_ndimage.uniform_filter(arr, size) + if _cp is not None and isinstance(arr, _cp.ndarray): + try: + return _cp_ndimage.uniform_filter(arr, size) + except Exception: + arr = to_cpu(arr) return ndimage.uniform_filter(arr, size) def xp_minimum_filter(arr, footprint=None, size=None): """Minimum filter — uses GPU if array is on GPU, CPU otherwise.""" - if HAS_GPU and isinstance(arr, cp.ndarray): - return cp_ndimage.minimum_filter(arr, footprint=footprint, size=size) + if _cp is not None and isinstance(arr, _cp.ndarray): + try: + return _cp_ndimage.minimum_filter(arr, footprint=footprint, size=size) + except Exception: + arr = to_cpu(arr) return ndimage.minimum_filter(arr, footprint=footprint, size=size) def gpu_cleanup(): """Free GPU memory. Call between visualizations to prevent OOM.""" - if HAS_GPU: - cp.get_default_memory_pool().free_all_blocks() \ No newline at end of file + if _cp is not None: + try: + _cp.get_default_memory_pool().free_all_blocks() + except Exception: + pass \ No newline at end of file diff --git a/lidar_pipeline/pipeline.py b/lidar_pipeline/pipeline.py index 237751f..cf0525e 100644 --- a/lidar_pipeline/pipeline.py +++ b/lidar_pipeline/pipeline.py @@ -8,12 +8,19 @@ LidarArchaeoPipeline coordinates the full processing chain: """ import logging +import multiprocessing import shutil import time from concurrent.futures import ProcessPoolExecutor, as_completed from pathlib import Path import subprocess +# Use 'spawn' to avoid CUDA context corruption in forked subprocesses +try: + multiprocessing.set_start_method('spawn') +except RuntimeError: + pass # Already set (e.g. in tests or when called multiple times) + from .dtm import classify_ground, create_dtm_fast from .visualizations import ( generate_hillshade, generate_slope, generate_aspect, generate_curvature,