Fix CUDA fork: spawn multiprocessing + graceful GPU fallback

- multiprocessing.set_start_method('spawn') pour éviter la corruption du contexte CUDA dans les processus forkés - to_gpu() et xp_*_filter() attrapent les erreurs CUDA et tombent sur CPU au lieu de crasher - _gpu_available() vérifie que le GPU est utilisable avant chaque opération - gpu_cleanup() attrape les exceptions au cas où le GPU serait indisponible Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-10 01:04:02 +02:00
parent ad762e682d
commit c891c6b23a
2 changed files with 64 additions and 18 deletions
--- a/lidar_pipeline/gpu.py
+++ b/lidar_pipeline/gpu.py
@ -3,6 +3,9 @@
 Provides CuPy/numpy abstraction layer. If CuPy is available and a CUDA GPU
 is detected, array operations are accelerated on the GPU. Otherwise, all
 operations fall back to numpy/scipy on CPU.
+
+GPU errors (e.g. in forked subprocesses) are caught gracefully and
+cause an automatic fallback to CPU for the current operation.
 """

 import logging
@ -16,64 +19,100 @@ HAS_GPU = False
 _gpu_name = None
 _gpu_mem_gb = 0
 _xp = np  # Default: CPU
+_cp = None     # cupy module (or None)
+_cp_ndimage = None  # cupyx.scipy.ndimage (or None)

 try:
-    import cupy as cp
-    import cupyx.scipy.ndimage as cp_ndimage
+    import cupy as _cupy
+    import cupyx.scipy.ndimage as _cupy_ndimage

-    _gpu_info = cp.cuda.runtime.getDeviceProperties(0)
+    _gpu_info = _cupy.cuda.runtime.getDeviceProperties(0)
    _gpu_name = _gpu_info['name'].decode() if isinstance(_gpu_info['name'], bytes) else str(_gpu_info['name'])
    _gpu_mem_gb = _gpu_info['totalGlobalMem'] // (1024 ** 3)
    HAS_GPU = True
-    _xp = cp
+    _xp = _cupy
+    _cp = _cupy
+    _cp_ndimage = _cupy_ndimage
 except (ImportError, Exception):
    pass


+def _gpu_available():
+    """Check if GPU is usable right now (may fail in forked subprocesses)."""
+    if not HAS_GPU:
+        return False
+    try:
+        _cp.cuda.runtime.getDevice()
+        return True
+    except Exception:
+        return False
+
+
 def log_gpu_status():
    """Log GPU detection result. Called after logging is configured."""
-    if HAS_GPU:
+    if _gpu_available():
        logger.info(f"GPU détectée: {_gpu_name} ({_gpu_mem_gb} Go VRAM)")
    else:
        logger.info("Pas de GPU — mode CPU uniquement")


 def to_gpu(arr):
-    """Send array to GPU if available, otherwise return as float64 numpy."""
-    if HAS_GPU:
-        return cp.asarray(arr.astype(np.float64))
+    """Send array to GPU if available, otherwise return as float64 numpy.
+
+    Falls back to CPU if GPU is unavailable (e.g. in forked subprocess).
+    """
+    if _gpu_available():
+        try:
+            return _cp.asarray(arr.astype(np.float64))
+        except Exception:
+            pass  # Fall back to CPU
    return arr.astype(np.float64)


 def to_cpu(arr):
    """Bring array back to CPU (numpy). No-op if already on CPU."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp.asnumpy(arr)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp.asnumpy(arr)
+        except Exception:
+            pass  # Already on CPU or GPU error
    return arr


 def xp_gaussian_filter(arr, sigma):
    """Gaussian filter — uses GPU if array is on GPU, CPU otherwise."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp_ndimage.gaussian_filter(arr, sigma)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp_ndimage.gaussian_filter(arr, sigma)
+        except Exception:
+            arr = to_cpu(arr)
    return ndimage.gaussian_filter(arr, sigma)


 def xp_uniform_filter(arr, size):
    """Uniform filter — uses GPU if array is on GPU, CPU otherwise."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp_ndimage.uniform_filter(arr, size)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp_ndimage.uniform_filter(arr, size)
+        except Exception:
+            arr = to_cpu(arr)
    return ndimage.uniform_filter(arr, size)


 def xp_minimum_filter(arr, footprint=None, size=None):
    """Minimum filter — uses GPU if array is on GPU, CPU otherwise."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
+        except Exception:
+            arr = to_cpu(arr)
    return ndimage.minimum_filter(arr, footprint=footprint, size=size)


 def gpu_cleanup():
    """Free GPU memory. Call between visualizations to prevent OOM."""
-    if HAS_GPU:
-        cp.get_default_memory_pool().free_all_blocks()
+    if _cp is not None:
+        try:
+            _cp.get_default_memory_pool().free_all_blocks()
+        except Exception:
+            pass
--- a/lidar_pipeline/pipeline.py
+++ b/lidar_pipeline/pipeline.py
@ -8,12 +8,19 @@ LidarArchaeoPipeline coordinates the full processing chain:
 """

 import logging
+import multiprocessing
 import shutil
 import time
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from pathlib import Path
 import subprocess

+# Use 'spawn' to avoid CUDA context corruption in forked subprocesses
+try:
+    multiprocessing.set_start_method('spawn')
+except RuntimeError:
+    pass  # Already set (e.g. in tests or when called multiple times)
+
 from .dtm import classify_ground, create_dtm_fast
 from .visualizations import (
    generate_hillshade, generate_slope, generate_aspect, generate_curvature,