Fix CUDA fork: spawn multiprocessing + graceful GPU fallback

- multiprocessing.set_start_method('spawn') pour éviter la corruption
  du contexte CUDA dans les processus forkés
- to_gpu() et xp_*_filter() attrapent les erreurs CUDA et tombent
  sur CPU au lieu de crasher
- _gpu_available() vérifie que le GPU est utilisable avant chaque opération
- gpu_cleanup() attrape les exceptions au cas où le GPU serait indisponible

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-05-10 01:04:02 +02:00
parent ad762e682d
commit c891c6b23a
2 changed files with 64 additions and 18 deletions

View File

@ -3,6 +3,9 @@
Provides CuPy/numpy abstraction layer. If CuPy is available and a CUDA GPU
is detected, array operations are accelerated on the GPU. Otherwise, all
operations fall back to numpy/scipy on CPU.
GPU errors (e.g. in forked subprocesses) are caught gracefully and
cause an automatic fallback to CPU for the current operation.
"""
import logging
@ -16,64 +19,100 @@ HAS_GPU = False
_gpu_name = None
_gpu_mem_gb = 0
_xp = np # Default: CPU
_cp = None # cupy module (or None)
_cp_ndimage = None # cupyx.scipy.ndimage (or None)
try:
import cupy as cp
import cupyx.scipy.ndimage as cp_ndimage
import cupy as _cupy
import cupyx.scipy.ndimage as _cupy_ndimage
_gpu_info = cp.cuda.runtime.getDeviceProperties(0)
_gpu_info = _cupy.cuda.runtime.getDeviceProperties(0)
_gpu_name = _gpu_info['name'].decode() if isinstance(_gpu_info['name'], bytes) else str(_gpu_info['name'])
_gpu_mem_gb = _gpu_info['totalGlobalMem'] // (1024 ** 3)
HAS_GPU = True
_xp = cp
_xp = _cupy
_cp = _cupy
_cp_ndimage = _cupy_ndimage
except (ImportError, Exception):
pass
def _gpu_available():
"""Check if GPU is usable right now (may fail in forked subprocesses)."""
if not HAS_GPU:
return False
try:
_cp.cuda.runtime.getDevice()
return True
except Exception:
return False
def log_gpu_status():
"""Log GPU detection result. Called after logging is configured."""
if HAS_GPU:
if _gpu_available():
logger.info(f"GPU détectée: {_gpu_name} ({_gpu_mem_gb} Go VRAM)")
else:
logger.info("Pas de GPU — mode CPU uniquement")
def to_gpu(arr):
"""Send array to GPU if available, otherwise return as float64 numpy."""
if HAS_GPU:
return cp.asarray(arr.astype(np.float64))
"""Send array to GPU if available, otherwise return as float64 numpy.
Falls back to CPU if GPU is unavailable (e.g. in forked subprocess).
"""
if _gpu_available():
try:
return _cp.asarray(arr.astype(np.float64))
except Exception:
pass # Fall back to CPU
return arr.astype(np.float64)
def to_cpu(arr):
"""Bring array back to CPU (numpy). No-op if already on CPU."""
if HAS_GPU and isinstance(arr, cp.ndarray):
return cp.asnumpy(arr)
if _cp is not None and isinstance(arr, _cp.ndarray):
try:
return _cp.asnumpy(arr)
except Exception:
pass # Already on CPU or GPU error
return arr
def xp_gaussian_filter(arr, sigma):
"""Gaussian filter — uses GPU if array is on GPU, CPU otherwise."""
if HAS_GPU and isinstance(arr, cp.ndarray):
return cp_ndimage.gaussian_filter(arr, sigma)
if _cp is not None and isinstance(arr, _cp.ndarray):
try:
return _cp_ndimage.gaussian_filter(arr, sigma)
except Exception:
arr = to_cpu(arr)
return ndimage.gaussian_filter(arr, sigma)
def xp_uniform_filter(arr, size):
"""Uniform filter — uses GPU if array is on GPU, CPU otherwise."""
if HAS_GPU and isinstance(arr, cp.ndarray):
return cp_ndimage.uniform_filter(arr, size)
if _cp is not None and isinstance(arr, _cp.ndarray):
try:
return _cp_ndimage.uniform_filter(arr, size)
except Exception:
arr = to_cpu(arr)
return ndimage.uniform_filter(arr, size)
def xp_minimum_filter(arr, footprint=None, size=None):
"""Minimum filter — uses GPU if array is on GPU, CPU otherwise."""
if HAS_GPU and isinstance(arr, cp.ndarray):
return cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
if _cp is not None and isinstance(arr, _cp.ndarray):
try:
return _cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
except Exception:
arr = to_cpu(arr)
return ndimage.minimum_filter(arr, footprint=footprint, size=size)
def gpu_cleanup():
"""Free GPU memory. Call between visualizations to prevent OOM."""
if HAS_GPU:
cp.get_default_memory_pool().free_all_blocks()
if _cp is not None:
try:
_cp.get_default_memory_pool().free_all_blocks()
except Exception:
pass

View File

@ -8,12 +8,19 @@ LidarArchaeoPipeline coordinates the full processing chain:
"""
import logging
import multiprocessing
import shutil
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
import subprocess
# Use 'spawn' to avoid CUDA context corruption in forked subprocesses
try:
multiprocessing.set_start_method('spawn')
except RuntimeError:
pass # Already set (e.g. in tests or when called multiple times)
from .dtm import classify_ground, create_dtm_fast
from .visualizations import (
generate_hillshade, generate_slope, generate_aspect, generate_curvature,