From c891c6b23af1fc86decf30d6363ba4ed2db3cf26 Mon Sep 17 00:00:00 2001
From: Jacquin Antoine <antoine@arkel.fr>
Date: Sun, 10 May 2026 01:04:02 +0200
Subject: [PATCH] Fix CUDA fork: spawn multiprocessing + graceful GPU fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- multiprocessing.set_start_method('spawn') pour éviter la corruption
  du contexte CUDA dans les processus forkés
- to_gpu() et xp_*_filter() attrapent les erreurs CUDA et tombent
  sur CPU au lieu de crasher
- _gpu_available() vérifie que le GPU est utilisable avant chaque opération
- gpu_cleanup() attrape les exceptions au cas où le GPU serait indisponible

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 lidar_pipeline/gpu.py      | 75 +++++++++++++++++++++++++++++---------
 lidar_pipeline/pipeline.py |  7 ++++
 2 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/lidar_pipeline/gpu.py b/lidar_pipeline/gpu.py
index 5709bf7..151bc77 100644
--- a/lidar_pipeline/gpu.py
+++ b/lidar_pipeline/gpu.py
@@ -3,6 +3,9 @@
 Provides CuPy/numpy abstraction layer. If CuPy is available and a CUDA GPU
 is detected, array operations are accelerated on the GPU. Otherwise, all
 operations fall back to numpy/scipy on CPU.
+
+GPU errors (e.g. in forked subprocesses) are caught gracefully and
+cause an automatic fallback to CPU for the current operation.
 """
 
 import logging
@@ -16,64 +19,100 @@ HAS_GPU = False
 _gpu_name = None
 _gpu_mem_gb = 0
 _xp = np  # Default: CPU
+_cp = None     # cupy module (or None)
+_cp_ndimage = None  # cupyx.scipy.ndimage (or None)
 
 try:
-    import cupy as cp
-    import cupyx.scipy.ndimage as cp_ndimage
+    import cupy as _cupy
+    import cupyx.scipy.ndimage as _cupy_ndimage
 
-    _gpu_info = cp.cuda.runtime.getDeviceProperties(0)
+    _gpu_info = _cupy.cuda.runtime.getDeviceProperties(0)
     _gpu_name = _gpu_info['name'].decode() if isinstance(_gpu_info['name'], bytes) else str(_gpu_info['name'])
     _gpu_mem_gb = _gpu_info['totalGlobalMem'] // (1024 ** 3)
     HAS_GPU = True
-    _xp = cp
+    _xp = _cupy
+    _cp = _cupy
+    _cp_ndimage = _cupy_ndimage
 except (ImportError, Exception):
     pass
 
 
+def _gpu_available():
+    """Check if GPU is usable right now (may fail in forked subprocesses)."""
+    if not HAS_GPU:
+        return False
+    try:
+        _cp.cuda.runtime.getDevice()
+        return True
+    except Exception:
+        return False
+
+
 def log_gpu_status():
     """Log GPU detection result. Called after logging is configured."""
-    if HAS_GPU:
+    if _gpu_available():
         logger.info(f"GPU détectée: {_gpu_name} ({_gpu_mem_gb} Go VRAM)")
     else:
         logger.info("Pas de GPU — mode CPU uniquement")
 
 
 def to_gpu(arr):
-    """Send array to GPU if available, otherwise return as float64 numpy."""
-    if HAS_GPU:
-        return cp.asarray(arr.astype(np.float64))
+    """Send array to GPU if available, otherwise return as float64 numpy.
+
+    Falls back to CPU if GPU is unavailable (e.g. in forked subprocess).
+    """
+    if _gpu_available():
+        try:
+            return _cp.asarray(arr.astype(np.float64))
+        except Exception:
+            pass  # Fall back to CPU
     return arr.astype(np.float64)
 
 
 def to_cpu(arr):
     """Bring array back to CPU (numpy). No-op if already on CPU."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp.asnumpy(arr)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp.asnumpy(arr)
+        except Exception:
+            pass  # Already on CPU or GPU error
     return arr
 
 
 def xp_gaussian_filter(arr, sigma):
     """Gaussian filter — uses GPU if array is on GPU, CPU otherwise."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp_ndimage.gaussian_filter(arr, sigma)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp_ndimage.gaussian_filter(arr, sigma)
+        except Exception:
+            arr = to_cpu(arr)
     return ndimage.gaussian_filter(arr, sigma)
 
 
 def xp_uniform_filter(arr, size):
     """Uniform filter — uses GPU if array is on GPU, CPU otherwise."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp_ndimage.uniform_filter(arr, size)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp_ndimage.uniform_filter(arr, size)
+        except Exception:
+            arr = to_cpu(arr)
     return ndimage.uniform_filter(arr, size)
 
 
 def xp_minimum_filter(arr, footprint=None, size=None):
     """Minimum filter — uses GPU if array is on GPU, CPU otherwise."""
-    if HAS_GPU and isinstance(arr, cp.ndarray):
-        return cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
+    if _cp is not None and isinstance(arr, _cp.ndarray):
+        try:
+            return _cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
+        except Exception:
+            arr = to_cpu(arr)
     return ndimage.minimum_filter(arr, footprint=footprint, size=size)
 
 
 def gpu_cleanup():
     """Free GPU memory. Call between visualizations to prevent OOM."""
-    if HAS_GPU:
-        cp.get_default_memory_pool().free_all_blocks()
\ No newline at end of file
+    if _cp is not None:
+        try:
+            _cp.get_default_memory_pool().free_all_blocks()
+        except Exception:
+            pass
\ No newline at end of file
diff --git a/lidar_pipeline/pipeline.py b/lidar_pipeline/pipeline.py
index 237751f..cf0525e 100644
--- a/lidar_pipeline/pipeline.py
+++ b/lidar_pipeline/pipeline.py
@@ -8,12 +8,19 @@ LidarArchaeoPipeline coordinates the full processing chain:
 """
 
 import logging
+import multiprocessing
 import shutil
 import time
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from pathlib import Path
 import subprocess
 
+# Use 'spawn' to avoid CUDA context corruption in forked subprocesses
+try:
+    multiprocessing.set_start_method('spawn')
+except RuntimeError:
+    pass  # Already set (e.g. in tests or when called multiple times)
+
 from .dtm import classify_ground, create_dtm_fast
 from .visualizations import (
     generate_hillshade, generate_slope, generate_aspect, generate_curvature,