diff --git a/lidar_pipeline/pipeline.py b/lidar_pipeline/pipeline.py index 5b078a6..8d595a9 100644 --- a/lidar_pipeline/pipeline.py +++ b/lidar_pipeline/pipeline.py @@ -162,11 +162,24 @@ class LidarArchaeoPipeline: return False return True + @staticmethod + def _expected_webp_path(name, basename, file_vis_dir): + """Return the expected WebP filename for a visualization step.""" + if name == 'pos_open': + return file_vis_dir / f"{basename}_positive_openness.webp" + elif name == 'neg_open': + return file_vis_dir / f"{basename}_negative_openness.webp" + elif name == 'hillshade': + return file_vis_dir / f"{basename}_hillshade_multi.webp" + else: + return file_vis_dir / f"{basename}_{name}.webp" + def generate_all_visualizations(self, dtm_file, basename, resolution=None): """Generate all archaeological visualizations for one DTM file. - Args: - resolution: Actual resolution from DTM geotransform. If None, uses self.resolution. + Optimisation: SharedDEM is only computed if at least one visualization + needs to be generated. When all WebP outputs exist, SharedDEM is + skipped entirely (saves ~2min per file on re-runs). """ if resolution is None: resolution = self.resolution @@ -175,22 +188,49 @@ class LidarArchaeoPipeline: # Create per-file subdirectory file_vis_dir = self.vis_dir / basename file_vis_dir.mkdir(exist_ok=True) - - # Pre-compute shared DEM data (gradient, NaN mask, LRM) once for all visualizations - logger.info(" Pré-calcul données partagées (gradient, LRM)...") - t_shared = time.time() - shared = SharedDEM(dtm_file, resolution) - logger.info(f" ✓ Données partagées prêtes ({time.time()-t_shared:.1f}s)") - - vis_results = {} total = len(VIZ_STEPS) + # Phase 1: determine which visualizations need generation + needs_generation = {} # name -> True/False + for name, func in VIZ_STEPS: + if self.force: + needs_generation[name] = True + else: + expected_webp = self._expected_webp_path(name, basename, file_vis_dir) + needs_generation[name] = not expected_webp.exists() + + to_generate = [n for n, needed in needs_generation.items() if needed] + ign_only = all(name in ('ortho', 'topo') for name in to_generate) + needs_shared = any(name not in ('ortho', 'topo') for name in to_generate) + + if not to_generate: + logger.info(" Toutes les visualisations déjà existantes — ignorées") + # Still need to return results dict for PDF check + vis_results = {} + for name, func in VIZ_STEPS: + vis_results[name] = self._expected_webp_path(name, basename, file_vis_dir) + return vis_results + + # Phase 2: compute SharedDEM only if needed + shared = None + if needs_shared: + logger.info(" Pré-calcul données partagées (gradient, LRM)...") + t_shared = time.time() + shared = SharedDEM(dtm_file, resolution) + logger.info(f" ✓ Données partagées prêtes ({time.time()-t_shared:.1f}s)") + + # Phase 3: generate visualizations + vis_results = {} for idx, (name, func) in enumerate(VIZ_STEPS, 1): + if not needs_generation[name]: + logger.info(f" [{idx}/{total}] {name}: déjà existant, ignoré") + vis_results[name] = self._expected_webp_path(name, basename, file_vis_dir) + continue + # When --force, delete existing TIF to ensure clean regeneration if self.force: for tif in file_vis_dir.glob(f"{basename}_{name}.tif"): tif.unlink(missing_ok=True) - # Special cases for differently-named TIFs if name == 'pos_open': for tif in file_vis_dir.glob(f"{basename}_positive_openness.tif"): tif.unlink(missing_ok=True) @@ -201,26 +241,6 @@ class LidarArchaeoPipeline: for tif in file_vis_dir.glob(f"{basename}_hillshade_multi.tif"): tif.unlink(missing_ok=True) - # Check if output WebP already exists (skip unless --force) - if not self.force: - # Determine expected WebP filename from the viz name - # Special cases for openness and IGN overlays - if name == 'pos_open': - expected_webp = file_vis_dir / f"{basename}_positive_openness.webp" - elif name == 'neg_open': - expected_webp = file_vis_dir / f"{basename}_negative_openness.webp" - elif name == 'hillshade': - expected_webp = file_vis_dir / f"{basename}_hillshade_multi.webp" - elif name in ('ortho', 'topo'): - expected_webp = file_vis_dir / f"{basename}_{name}.webp" - else: - expected_webp = file_vis_dir / f"{basename}_{name}.webp" - - if expected_webp.exists(): - logger.info(f" [{idx}/{total}] {name}: déjà existant, ignoré") - vis_results[name] = expected_webp # Track as existing file - continue - logger.info(f" [{idx}/{total}] {name}...") t0 = time.time() try: diff --git a/lidar_pipeline/visualizations.py b/lidar_pipeline/visualizations.py index 5943a16..b6088a6 100644 --- a/lidar_pipeline/visualizations.py +++ b/lidar_pipeline/visualizations.py @@ -33,10 +33,13 @@ else: class SharedDEM: """Pre-computed DEM data shared across all visualizations. - Reads the DEM once and pre-computes: + Reads the DEM once and lazily computes on first access: - NaN mask and filled DEM (avoids 20+ calls to _fill_nans) - Gradient components (shared by hillshade, slope, aspect, curvature) - LRM at 15m kernel (shared by lrm + anomalies) + + Attributes are computed lazily on first access to avoid computing + data that is never used (e.g. LRM when only hillshade needs generation). """ def __init__(self, dem_file, resolution): @@ -48,25 +51,69 @@ class SharedDEM: self.nan_mask = np.isnan(dem_np) self.dem_np = dem_np.astype(np.float32) - # Pre-fill NaNs once (saves ~20 calls to NearestNDInterpolator) - self.filled, _ = _fill_nans(self.dem_np) + # Lazy caches — computed on first access + self._filled = None + self._gradient = None # (dy, dx, slope_rad, slope_deg, aspect) + self._lrm_15 = None - # Initialize GPU lazy caches before any filter calls + # GPU lazy caches self._filled_gpu = None self._dem_gpu = None - # Pre-compute gradient (shared by hillshade, slope, aspect, curvature) - self.dy = np.gradient(self.filled, resolution, axis=0) - self.dx = np.gradient(self.filled, resolution, axis=1) - self.slope_rad = np.arctan(np.sqrt(self.dx**2 + self.dy**2)) - self.slope_deg = np.degrees(self.slope_rad) - self.aspect = np.mod(np.degrees(np.arctan2(self.dy, self.dx)), 360) + @property + def filled(self): + """Filled DEM (NaN interpolated) — computed lazily.""" + if self._filled is None: + logger.debug(" → Calcul filled DEM (interpolation NaN)...") + self._filled, _ = _fill_nans(self.dem_np) + return self._filled - # Pre-compute LRM at 15m (shared by lrm + anomalies) - sigma_15 = 15.0 / resolution - local_mean_15 = _filter_nanaware_from_filled(self, xp_gaussian_filter, sigma=sigma_15) - self.lrm_15 = self.dem_np - local_mean_15 - self.lrm_15[self.nan_mask] = np.nan + @property + def dy(self): + self._ensure_gradient() + return self._gradient[0] + + @property + def dx(self): + self._ensure_gradient() + return self._gradient[1] + + @property + def slope_rad(self): + self._ensure_gradient() + return self._gradient[2] + + @property + def slope_deg(self): + self._ensure_gradient() + return self._gradient[3] + + @property + def aspect(self): + self._ensure_gradient() + return self._gradient[4] + + @property + def lrm_15(self): + """LRM at 15m kernel — computed lazily.""" + if self._lrm_15 is None: + logger.debug(" → Calcul LRM 15m...") + sigma_15 = 15.0 / self.resolution + local_mean_15 = _filter_nanaware_from_filled(self, xp_gaussian_filter, sigma=sigma_15) + self._lrm_15 = self.dem_np - local_mean_15 + self._lrm_15[self.nan_mask] = np.nan + return self._lrm_15 + + def _ensure_gradient(self): + """Compute gradient components lazily on first access.""" + if self._gradient is None: + logger.debug(" → Calcul gradient...") + dy = np.gradient(self.filled, self.resolution, axis=0) + dx = np.gradient(self.filled, self.resolution, axis=1) + slope_rad = np.arctan(np.sqrt(dx**2 + dy**2)) + slope_deg = np.degrees(slope_rad) + aspect = np.mod(np.degrees(np.arctan2(dy, dx)), 360) + self._gradient = (dy, dx, slope_rad, slope_deg, aspect) @property def filled_gpu(self):