Skip SharedDEM computation when all visualizations already exist
Two optimizations to avoid ~2min wasted per file on re-runs: 1. pipeline.py: Check which visualizations need regeneration before computing SharedDEM. If all WebP outputs exist, skip SharedDEM entirely. If only IGN overlays need updating, also skip SharedDEM. 2. visualizations.py: Make SharedDEM attributes lazy (filled, gradient, lrm_15) so only the data actually needed is computed. For example, if only hillshade is regenerated, LRM at 15m is never calculated. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@ -162,11 +162,24 @@ class LidarArchaeoPipeline:
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _expected_webp_path(name, basename, file_vis_dir):
|
||||||
|
"""Return the expected WebP filename for a visualization step."""
|
||||||
|
if name == 'pos_open':
|
||||||
|
return file_vis_dir / f"{basename}_positive_openness.webp"
|
||||||
|
elif name == 'neg_open':
|
||||||
|
return file_vis_dir / f"{basename}_negative_openness.webp"
|
||||||
|
elif name == 'hillshade':
|
||||||
|
return file_vis_dir / f"{basename}_hillshade_multi.webp"
|
||||||
|
else:
|
||||||
|
return file_vis_dir / f"{basename}_{name}.webp"
|
||||||
|
|
||||||
def generate_all_visualizations(self, dtm_file, basename, resolution=None):
|
def generate_all_visualizations(self, dtm_file, basename, resolution=None):
|
||||||
"""Generate all archaeological visualizations for one DTM file.
|
"""Generate all archaeological visualizations for one DTM file.
|
||||||
|
|
||||||
Args:
|
Optimisation: SharedDEM is only computed if at least one visualization
|
||||||
resolution: Actual resolution from DTM geotransform. If None, uses self.resolution.
|
needs to be generated. When all WebP outputs exist, SharedDEM is
|
||||||
|
skipped entirely (saves ~2min per file on re-runs).
|
||||||
"""
|
"""
|
||||||
if resolution is None:
|
if resolution is None:
|
||||||
resolution = self.resolution
|
resolution = self.resolution
|
||||||
@ -175,22 +188,49 @@ class LidarArchaeoPipeline:
|
|||||||
# Create per-file subdirectory
|
# Create per-file subdirectory
|
||||||
file_vis_dir = self.vis_dir / basename
|
file_vis_dir = self.vis_dir / basename
|
||||||
file_vis_dir.mkdir(exist_ok=True)
|
file_vis_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
# Pre-compute shared DEM data (gradient, NaN mask, LRM) once for all visualizations
|
|
||||||
logger.info(" Pré-calcul données partagées (gradient, LRM)...")
|
|
||||||
t_shared = time.time()
|
|
||||||
shared = SharedDEM(dtm_file, resolution)
|
|
||||||
logger.info(f" ✓ Données partagées prêtes ({time.time()-t_shared:.1f}s)")
|
|
||||||
|
|
||||||
vis_results = {}
|
|
||||||
total = len(VIZ_STEPS)
|
total = len(VIZ_STEPS)
|
||||||
|
|
||||||
|
# Phase 1: determine which visualizations need generation
|
||||||
|
needs_generation = {} # name -> True/False
|
||||||
|
for name, func in VIZ_STEPS:
|
||||||
|
if self.force:
|
||||||
|
needs_generation[name] = True
|
||||||
|
else:
|
||||||
|
expected_webp = self._expected_webp_path(name, basename, file_vis_dir)
|
||||||
|
needs_generation[name] = not expected_webp.exists()
|
||||||
|
|
||||||
|
to_generate = [n for n, needed in needs_generation.items() if needed]
|
||||||
|
ign_only = all(name in ('ortho', 'topo') for name in to_generate)
|
||||||
|
needs_shared = any(name not in ('ortho', 'topo') for name in to_generate)
|
||||||
|
|
||||||
|
if not to_generate:
|
||||||
|
logger.info(" Toutes les visualisations déjà existantes — ignorées")
|
||||||
|
# Still need to return results dict for PDF check
|
||||||
|
vis_results = {}
|
||||||
|
for name, func in VIZ_STEPS:
|
||||||
|
vis_results[name] = self._expected_webp_path(name, basename, file_vis_dir)
|
||||||
|
return vis_results
|
||||||
|
|
||||||
|
# Phase 2: compute SharedDEM only if needed
|
||||||
|
shared = None
|
||||||
|
if needs_shared:
|
||||||
|
logger.info(" Pré-calcul données partagées (gradient, LRM)...")
|
||||||
|
t_shared = time.time()
|
||||||
|
shared = SharedDEM(dtm_file, resolution)
|
||||||
|
logger.info(f" ✓ Données partagées prêtes ({time.time()-t_shared:.1f}s)")
|
||||||
|
|
||||||
|
# Phase 3: generate visualizations
|
||||||
|
vis_results = {}
|
||||||
for idx, (name, func) in enumerate(VIZ_STEPS, 1):
|
for idx, (name, func) in enumerate(VIZ_STEPS, 1):
|
||||||
|
if not needs_generation[name]:
|
||||||
|
logger.info(f" [{idx}/{total}] {name}: déjà existant, ignoré")
|
||||||
|
vis_results[name] = self._expected_webp_path(name, basename, file_vis_dir)
|
||||||
|
continue
|
||||||
|
|
||||||
# When --force, delete existing TIF to ensure clean regeneration
|
# When --force, delete existing TIF to ensure clean regeneration
|
||||||
if self.force:
|
if self.force:
|
||||||
for tif in file_vis_dir.glob(f"{basename}_{name}.tif"):
|
for tif in file_vis_dir.glob(f"{basename}_{name}.tif"):
|
||||||
tif.unlink(missing_ok=True)
|
tif.unlink(missing_ok=True)
|
||||||
# Special cases for differently-named TIFs
|
|
||||||
if name == 'pos_open':
|
if name == 'pos_open':
|
||||||
for tif in file_vis_dir.glob(f"{basename}_positive_openness.tif"):
|
for tif in file_vis_dir.glob(f"{basename}_positive_openness.tif"):
|
||||||
tif.unlink(missing_ok=True)
|
tif.unlink(missing_ok=True)
|
||||||
@ -201,26 +241,6 @@ class LidarArchaeoPipeline:
|
|||||||
for tif in file_vis_dir.glob(f"{basename}_hillshade_multi.tif"):
|
for tif in file_vis_dir.glob(f"{basename}_hillshade_multi.tif"):
|
||||||
tif.unlink(missing_ok=True)
|
tif.unlink(missing_ok=True)
|
||||||
|
|
||||||
# Check if output WebP already exists (skip unless --force)
|
|
||||||
if not self.force:
|
|
||||||
# Determine expected WebP filename from the viz name
|
|
||||||
# Special cases for openness and IGN overlays
|
|
||||||
if name == 'pos_open':
|
|
||||||
expected_webp = file_vis_dir / f"{basename}_positive_openness.webp"
|
|
||||||
elif name == 'neg_open':
|
|
||||||
expected_webp = file_vis_dir / f"{basename}_negative_openness.webp"
|
|
||||||
elif name == 'hillshade':
|
|
||||||
expected_webp = file_vis_dir / f"{basename}_hillshade_multi.webp"
|
|
||||||
elif name in ('ortho', 'topo'):
|
|
||||||
expected_webp = file_vis_dir / f"{basename}_{name}.webp"
|
|
||||||
else:
|
|
||||||
expected_webp = file_vis_dir / f"{basename}_{name}.webp"
|
|
||||||
|
|
||||||
if expected_webp.exists():
|
|
||||||
logger.info(f" [{idx}/{total}] {name}: déjà existant, ignoré")
|
|
||||||
vis_results[name] = expected_webp # Track as existing file
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.info(f" [{idx}/{total}] {name}...")
|
logger.info(f" [{idx}/{total}] {name}...")
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -33,10 +33,13 @@ else:
|
|||||||
class SharedDEM:
|
class SharedDEM:
|
||||||
"""Pre-computed DEM data shared across all visualizations.
|
"""Pre-computed DEM data shared across all visualizations.
|
||||||
|
|
||||||
Reads the DEM once and pre-computes:
|
Reads the DEM once and lazily computes on first access:
|
||||||
- NaN mask and filled DEM (avoids 20+ calls to _fill_nans)
|
- NaN mask and filled DEM (avoids 20+ calls to _fill_nans)
|
||||||
- Gradient components (shared by hillshade, slope, aspect, curvature)
|
- Gradient components (shared by hillshade, slope, aspect, curvature)
|
||||||
- LRM at 15m kernel (shared by lrm + anomalies)
|
- LRM at 15m kernel (shared by lrm + anomalies)
|
||||||
|
|
||||||
|
Attributes are computed lazily on first access to avoid computing
|
||||||
|
data that is never used (e.g. LRM when only hillshade needs generation).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, dem_file, resolution):
|
def __init__(self, dem_file, resolution):
|
||||||
@ -48,25 +51,69 @@ class SharedDEM:
|
|||||||
self.nan_mask = np.isnan(dem_np)
|
self.nan_mask = np.isnan(dem_np)
|
||||||
self.dem_np = dem_np.astype(np.float32)
|
self.dem_np = dem_np.astype(np.float32)
|
||||||
|
|
||||||
# Pre-fill NaNs once (saves ~20 calls to NearestNDInterpolator)
|
# Lazy caches — computed on first access
|
||||||
self.filled, _ = _fill_nans(self.dem_np)
|
self._filled = None
|
||||||
|
self._gradient = None # (dy, dx, slope_rad, slope_deg, aspect)
|
||||||
|
self._lrm_15 = None
|
||||||
|
|
||||||
# Initialize GPU lazy caches before any filter calls
|
# GPU lazy caches
|
||||||
self._filled_gpu = None
|
self._filled_gpu = None
|
||||||
self._dem_gpu = None
|
self._dem_gpu = None
|
||||||
|
|
||||||
# Pre-compute gradient (shared by hillshade, slope, aspect, curvature)
|
@property
|
||||||
self.dy = np.gradient(self.filled, resolution, axis=0)
|
def filled(self):
|
||||||
self.dx = np.gradient(self.filled, resolution, axis=1)
|
"""Filled DEM (NaN interpolated) — computed lazily."""
|
||||||
self.slope_rad = np.arctan(np.sqrt(self.dx**2 + self.dy**2))
|
if self._filled is None:
|
||||||
self.slope_deg = np.degrees(self.slope_rad)
|
logger.debug(" → Calcul filled DEM (interpolation NaN)...")
|
||||||
self.aspect = np.mod(np.degrees(np.arctan2(self.dy, self.dx)), 360)
|
self._filled, _ = _fill_nans(self.dem_np)
|
||||||
|
return self._filled
|
||||||
|
|
||||||
# Pre-compute LRM at 15m (shared by lrm + anomalies)
|
@property
|
||||||
sigma_15 = 15.0 / resolution
|
def dy(self):
|
||||||
local_mean_15 = _filter_nanaware_from_filled(self, xp_gaussian_filter, sigma=sigma_15)
|
self._ensure_gradient()
|
||||||
self.lrm_15 = self.dem_np - local_mean_15
|
return self._gradient[0]
|
||||||
self.lrm_15[self.nan_mask] = np.nan
|
|
||||||
|
@property
|
||||||
|
def dx(self):
|
||||||
|
self._ensure_gradient()
|
||||||
|
return self._gradient[1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def slope_rad(self):
|
||||||
|
self._ensure_gradient()
|
||||||
|
return self._gradient[2]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def slope_deg(self):
|
||||||
|
self._ensure_gradient()
|
||||||
|
return self._gradient[3]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def aspect(self):
|
||||||
|
self._ensure_gradient()
|
||||||
|
return self._gradient[4]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lrm_15(self):
|
||||||
|
"""LRM at 15m kernel — computed lazily."""
|
||||||
|
if self._lrm_15 is None:
|
||||||
|
logger.debug(" → Calcul LRM 15m...")
|
||||||
|
sigma_15 = 15.0 / self.resolution
|
||||||
|
local_mean_15 = _filter_nanaware_from_filled(self, xp_gaussian_filter, sigma=sigma_15)
|
||||||
|
self._lrm_15 = self.dem_np - local_mean_15
|
||||||
|
self._lrm_15[self.nan_mask] = np.nan
|
||||||
|
return self._lrm_15
|
||||||
|
|
||||||
|
def _ensure_gradient(self):
|
||||||
|
"""Compute gradient components lazily on first access."""
|
||||||
|
if self._gradient is None:
|
||||||
|
logger.debug(" → Calcul gradient...")
|
||||||
|
dy = np.gradient(self.filled, self.resolution, axis=0)
|
||||||
|
dx = np.gradient(self.filled, self.resolution, axis=1)
|
||||||
|
slope_rad = np.arctan(np.sqrt(dx**2 + dy**2))
|
||||||
|
slope_deg = np.degrees(slope_rad)
|
||||||
|
aspect = np.mod(np.degrees(np.arctan2(dy, dx)), 360)
|
||||||
|
self._gradient = (dy, dx, slope_rad, slope_deg, aspect)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def filled_gpu(self):
|
def filled_gpu(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user