Fix CUDA fork: spawn multiprocessing + graceful GPU fallback
- multiprocessing.set_start_method('spawn') pour éviter la corruption
du contexte CUDA dans les processus forkés
- to_gpu() et xp_*_filter() attrapent les erreurs CUDA et tombent
sur CPU au lieu de crasher
- _gpu_available() vérifie que le GPU est utilisable avant chaque opération
- gpu_cleanup() attrape les exceptions au cas où le GPU serait indisponible
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -3,6 +3,9 @@
|
||||
Provides CuPy/numpy abstraction layer. If CuPy is available and a CUDA GPU
|
||||
is detected, array operations are accelerated on the GPU. Otherwise, all
|
||||
operations fall back to numpy/scipy on CPU.
|
||||
|
||||
GPU errors (e.g. in forked subprocesses) are caught gracefully and
|
||||
cause an automatic fallback to CPU for the current operation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
@ -16,64 +19,100 @@ HAS_GPU = False
|
||||
_gpu_name = None
|
||||
_gpu_mem_gb = 0
|
||||
_xp = np # Default: CPU
|
||||
_cp = None # cupy module (or None)
|
||||
_cp_ndimage = None # cupyx.scipy.ndimage (or None)
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cupyx.scipy.ndimage as cp_ndimage
|
||||
import cupy as _cupy
|
||||
import cupyx.scipy.ndimage as _cupy_ndimage
|
||||
|
||||
_gpu_info = cp.cuda.runtime.getDeviceProperties(0)
|
||||
_gpu_info = _cupy.cuda.runtime.getDeviceProperties(0)
|
||||
_gpu_name = _gpu_info['name'].decode() if isinstance(_gpu_info['name'], bytes) else str(_gpu_info['name'])
|
||||
_gpu_mem_gb = _gpu_info['totalGlobalMem'] // (1024 ** 3)
|
||||
HAS_GPU = True
|
||||
_xp = cp
|
||||
_xp = _cupy
|
||||
_cp = _cupy
|
||||
_cp_ndimage = _cupy_ndimage
|
||||
except (ImportError, Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _gpu_available():
|
||||
"""Check if GPU is usable right now (may fail in forked subprocesses)."""
|
||||
if not HAS_GPU:
|
||||
return False
|
||||
try:
|
||||
_cp.cuda.runtime.getDevice()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def log_gpu_status():
|
||||
"""Log GPU detection result. Called after logging is configured."""
|
||||
if HAS_GPU:
|
||||
if _gpu_available():
|
||||
logger.info(f"GPU détectée: {_gpu_name} ({_gpu_mem_gb} Go VRAM)")
|
||||
else:
|
||||
logger.info("Pas de GPU — mode CPU uniquement")
|
||||
|
||||
|
||||
def to_gpu(arr):
|
||||
"""Send array to GPU if available, otherwise return as float64 numpy."""
|
||||
if HAS_GPU:
|
||||
return cp.asarray(arr.astype(np.float64))
|
||||
"""Send array to GPU if available, otherwise return as float64 numpy.
|
||||
|
||||
Falls back to CPU if GPU is unavailable (e.g. in forked subprocess).
|
||||
"""
|
||||
if _gpu_available():
|
||||
try:
|
||||
return _cp.asarray(arr.astype(np.float64))
|
||||
except Exception:
|
||||
pass # Fall back to CPU
|
||||
return arr.astype(np.float64)
|
||||
|
||||
|
||||
def to_cpu(arr):
|
||||
"""Bring array back to CPU (numpy). No-op if already on CPU."""
|
||||
if HAS_GPU and isinstance(arr, cp.ndarray):
|
||||
return cp.asnumpy(arr)
|
||||
if _cp is not None and isinstance(arr, _cp.ndarray):
|
||||
try:
|
||||
return _cp.asnumpy(arr)
|
||||
except Exception:
|
||||
pass # Already on CPU or GPU error
|
||||
return arr
|
||||
|
||||
|
||||
def xp_gaussian_filter(arr, sigma):
|
||||
"""Gaussian filter — uses GPU if array is on GPU, CPU otherwise."""
|
||||
if HAS_GPU and isinstance(arr, cp.ndarray):
|
||||
return cp_ndimage.gaussian_filter(arr, sigma)
|
||||
if _cp is not None and isinstance(arr, _cp.ndarray):
|
||||
try:
|
||||
return _cp_ndimage.gaussian_filter(arr, sigma)
|
||||
except Exception:
|
||||
arr = to_cpu(arr)
|
||||
return ndimage.gaussian_filter(arr, sigma)
|
||||
|
||||
|
||||
def xp_uniform_filter(arr, size):
|
||||
"""Uniform filter — uses GPU if array is on GPU, CPU otherwise."""
|
||||
if HAS_GPU and isinstance(arr, cp.ndarray):
|
||||
return cp_ndimage.uniform_filter(arr, size)
|
||||
if _cp is not None and isinstance(arr, _cp.ndarray):
|
||||
try:
|
||||
return _cp_ndimage.uniform_filter(arr, size)
|
||||
except Exception:
|
||||
arr = to_cpu(arr)
|
||||
return ndimage.uniform_filter(arr, size)
|
||||
|
||||
|
||||
def xp_minimum_filter(arr, footprint=None, size=None):
|
||||
"""Minimum filter — uses GPU if array is on GPU, CPU otherwise."""
|
||||
if HAS_GPU and isinstance(arr, cp.ndarray):
|
||||
return cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
|
||||
if _cp is not None and isinstance(arr, _cp.ndarray):
|
||||
try:
|
||||
return _cp_ndimage.minimum_filter(arr, footprint=footprint, size=size)
|
||||
except Exception:
|
||||
arr = to_cpu(arr)
|
||||
return ndimage.minimum_filter(arr, footprint=footprint, size=size)
|
||||
|
||||
|
||||
def gpu_cleanup():
|
||||
"""Free GPU memory. Call between visualizations to prevent OOM."""
|
||||
if HAS_GPU:
|
||||
cp.get_default_memory_pool().free_all_blocks()
|
||||
if _cp is not None:
|
||||
try:
|
||||
_cp.get_default_memory_pool().free_all_blocks()
|
||||
except Exception:
|
||||
pass
|
||||
@ -8,12 +8,19 @@ LidarArchaeoPipeline coordinates the full processing chain:
|
||||
"""
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
import shutil
|
||||
import time
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
|
||||
# Use 'spawn' to avoid CUDA context corruption in forked subprocesses
|
||||
try:
|
||||
multiprocessing.set_start_method('spawn')
|
||||
except RuntimeError:
|
||||
pass # Already set (e.g. in tests or when called multiple times)
|
||||
|
||||
from .dtm import classify_ground, create_dtm_fast
|
||||
from .visualizations import (
|
||||
generate_hillshade, generate_slope, generate_aspect, generate_curvature,
|
||||
|
||||
Reference in New Issue
Block a user