Skip ground classification when DTM already exists

If the DTM .tif exists and --force is not set, skip both ground classification and DTM generation entirely. Previously, the pipeline would spend 3+ minutes reclassifying ground even when the DTM was already present and would be reused anyway. Also includes: SharedDEM cache, enhanced WebP cartouche (compass rose, adaptive scale bar, enriched info bar), removed COG/viewer, UTF-8 fix for parallel workers, skip logic for DTM and PDF. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-13 23:41:21 +02:00
parent f01683819c
commit 5b74322077
9 changed files with 564 additions and 942 deletions
--- a/lidar_pipeline/pipeline.py
+++ b/lidar_pipeline/pipeline.py
@ -12,6 +12,7 @@ import multiprocessing
 import shutil
 import time
 from concurrent.futures import ProcessPoolExecutor, as_completed
+from datetime import datetime
 from pathlib import Path
 import subprocess

@ -55,6 +56,7 @@ _file_filter = FilePrefixFilter()

 from .dtm import classify_ground, create_dtm_fast
 from .visualizations import (
+    SharedDEM,
    generate_hillshade, generate_slope, generate_aspect, generate_curvature,
    generate_lrm, generate_svf, generate_openness,
    generate_mslrm, generate_tpi, generate_sailore,
@ -63,8 +65,7 @@ from .visualizations import (
 )
 from .gpu import gpu_cleanup
 from .ign import generate_ign_overlay
-from .rendering import tif_to_png, generate_pdf_report, convert_to_cog, generate_cog_metadata
-from .viewer import generate_viewer
+from .rendering import tif_to_png, generate_pdf_report


 # Ordered list of visualization steps.
@ -106,7 +107,7 @@ VIZ_STEPS = [
 class LidarArchaeoPipeline:
    """Orchestrates the LiDAR archaeological analysis pipeline."""

-    def __init__(self, input_dir, output_dir, resolution=0.5, workers=1, force=False, ground_method='auto', force_classify=False, keep_tif=False, no_viewer=False):
+    def __init__(self, input_dir, output_dir, resolution=0.5, workers=1, force=False, ground_method='auto', force_classify=False, keep_tif=False):
        self.input_dir = Path(input_dir)
        self.output_dir = Path(output_dir)
        self.resolution = resolution
@ -115,7 +116,6 @@ class LidarArchaeoPipeline:
        self.ground_method = ground_method
        self.force_classify = force_classify
        self.keep_tif = keep_tif
-        self.no_viewer = no_viewer
        self.temp_dir = self.output_dir / "temp"

        if not self.input_dir.exists():
@ -140,7 +140,6 @@ class LidarArchaeoPipeline:
        logger.info(f"  Classification sol : {self.ground_method}")
        logger.info(f"  Force classif.: {'OUI' if self.force_classify else 'non'}")
        logger.info(f"  Keep TIFF    : {'OUI' if self.keep_tif else 'non'}")
-        logger.info(f"  Viewer web   : {'non' if self.no_viewer else 'OUI'}")

    def find_laz_files(self):
        """Find all LAZ/LAS files in input directory."""
@ -173,6 +172,12 @@ class LidarArchaeoPipeline:
        file_vis_dir = self.vis_dir / basename
        file_vis_dir.mkdir(exist_ok=True)

+        # Pre-compute shared DEM data (gradient, NaN mask, LRM) once for all visualizations
+        logger.info("    Pré-calcul données partagées (gradient, LRM)...")
+        t_shared = time.time()
+        shared = SharedDEM(dtm_file, self.resolution)
+        logger.info(f"    ✓ Données partagées prêtes ({time.time()-t_shared:.1f}s)")
+
        vis_results = {}
        total = len(VIZ_STEPS)
        elapsed_times = []
@ -216,7 +221,11 @@ class LidarArchaeoPipeline:
            logger.info(f"    [{idx}/{total}] {name}...")
            t0 = time.time()
            try:
-                result = func(dtm_file, basename, file_vis_dir, self.resolution)
+                # IGN overlays don't use SharedDEM (they download external data)
+                if name in ('ortho', 'topo'):
+                    result = func(dtm_file, basename, file_vis_dir, self.resolution)
+                else:
+                    result = func(dtm_file, basename, file_vis_dir, self.resolution, shared=shared)
                vis_results[name] = result
                elapsed = time.time() - t0
                elapsed_times.append(elapsed)
@ -237,20 +246,23 @@ class LidarArchaeoPipeline:
            gpu_cleanup()

        # Convert to WebP (only newly generated TIFs, not skipped ones)
-        # Also generate COGs for web viewer if enabled
        logger.info("  Conversion images WebP:")
-        cog_dir = file_vis_dir / "cog"
-        if not self.no_viewer:
-            cog_dir.mkdir(exist_ok=True)
+        source_info = {
+            'method': self.ground_method,
+            'date': datetime.now().strftime('%Y-%m-%d'),
+            'basename': basename,
+        }
        for name, tif_file in vis_results.items():
            if tif_file and isinstance(tif_file, Path) and tif_file.suffix == '.tif' and tif_file.exists():
-                # Generate COG before WebP conversion (which may delete the TIF)
-                if not self.no_viewer:
-                    convert_to_cog(tif_file, cog_dir)
-                webp_file = tif_to_png(tif_file, file_vis_dir, self.resolution, keep_tif=self.keep_tif or not self.no_viewer)
+                webp_file = tif_to_png(tif_file, file_vis_dir, self.resolution, keep_tif=self.keep_tif, source_info=source_info)
                if webp_file:
                    logger.info(f"    ✓ {webp_file.name}")

+        # Clean up remaining TIF files unless --keep-tif
+        if not self.keep_tif:
+            for tif in file_vis_dir.glob("*.tif"):
+                tif.unlink(missing_ok=True)
+
        return vis_results

    def process_file(self, laz_file):
@ -263,60 +275,55 @@ class LidarArchaeoPipeline:
        logger.info(f"FICHIER : {basename}")
        logger.info("=" * 60)

-        # Step 1: Ground classification
-        logger.info("[1/6] Classification du sol...")
-        t1 = time.time()
-        las_file = classify_ground(laz_file, self.temp_dir, method=self.ground_method, force=self.force_classify)
-        t_classif = time.time() - t1
-        if not las_file:
-            logger.error(f"  ✗ Échec classification ({t_classif:.1f}s)")
-            return False
-        logger.info(f"  ✓ Classification terminée ({t_classif:.1f}s)")
+        # Skip ground classification + DTM if DTM already exists (unless --force)
+        dtm_path = self.dtm_dir / f"{basename}_dtm.tif"
+        if dtm_path.exists() and not self.force:
+            logger.info("[1/5] Classification du sol — sautée (DTM existant)")
+            logger.info("[2/5] Génération DTM — sautée (DTM existant)")
+            dtm_file = dtm_path
+            t_classif = 0
+            t_dtm = 0
+        else:
+            # Step 1: Ground classification
+            logger.info("[1/5] Classification du sol...")
+            t1 = time.time()
+            las_file = classify_ground(laz_file, self.temp_dir, method=self.ground_method, force=self.force_classify)
+            t_classif = time.time() - t1
+            if not las_file:
+                logger.error(f"  ✗ Échec classification ({t_classif:.1f}s)")
+                return False
+            logger.info(f"  ✓ Classification terminée ({t_classif:.1f}s)")

-        # Step 2: Generate DTM
-        logger.info("[2/6] Génération DTM...")
-        t2 = time.time()
-        dtm_file = create_dtm_fast(las_file, basename, self.dtm_dir, self.resolution)
-        t_dtm = time.time() - t2
-        if not dtm_file:
-            logger.error(f"  ✗ Échec DTM ({t_dtm:.1f}s)")
-            return False
-        logger.info(f"  ✓ DTM terminé ({t_dtm:.1f}s)")
+            # Step 2: Generate DTM
+            logger.info("[2/5] Génération DTM...")
+            t2 = time.time()
+            dtm_file = create_dtm_fast(las_file, basename, self.dtm_dir, self.resolution, force=self.force)
+            t_dtm = time.time() - t2
+            if not dtm_file:
+                logger.error(f"  ✗ Échec DTM ({t_dtm:.1f}s)")
+                return False
+            logger.info(f"  ✓ DTM terminé ({t_dtm:.1f}s)")

        # Step 3: Visualizations
-        logger.info("[3/6] Visualisations archéologiques...")
+        logger.info("[3/5] Visualisations archéologiques...")
        self.generate_all_visualizations(dtm_file, basename)

        # Step 4: PDF report
        file_vis_dir = self.vis_dir / basename
-        logger.info("[4/6] Rapport PDF A3...")
-        t4 = time.time()
-        generate_pdf_report(basename, file_vis_dir, self.pdf_dir, self.resolution)
-        t_pdf = time.time() - t4
-        logger.info(f"  ✓ Rapport PDF terminé ({t_pdf:.1f}s)")
-
-        # Step 5: COGs for web viewer
-        logger.info("[5/6] Génération métadonnées viewer web...")
-        t5 = time.time()
-        if not self.no_viewer:
-            # Convert DTM to COG as well
-            dtm_cog_dir = self.dtm_dir / "cog"
-            dtm_cog_dir.mkdir(exist_ok=True)
-            for dtm_file in sorted(self.dtm_dir.glob(f"{basename}_dtm.tif")):
-                convert_to_cog(dtm_file, dtm_cog_dir)
-            generate_cog_metadata(self.vis_dir, basename)
-        t_cog = time.time() - t5
-        logger.info(f"  ✓ Métadonnées viewer web terminées ({t_cog:.1f}s)")
-
-        # Step 6: Web viewer
-        if not self.no_viewer:
-            logger.info("[6/6] Génération viewer web...")
-            t6 = time.time()
-            generate_viewer(basename, file_vis_dir, self.vis_dir)
-            t_viewer = time.time() - t6
-            logger.info(f"  ✓ Viewer web terminé ({t_viewer:.1f}s)")
+        pdf_file = self.pdf_dir / f"{basename}_rapport.pdf"
+        if pdf_file.exists() and not self.force:
+            logger.info(f"[4/5] Rapport PDF déjà existant — ignoré: {pdf_file.name}")
        else:
-            logger.info("[6/6] Viewer web: ignoré (--no-viewer)")
+            logger.info("[4/5] Rapport PDF A3...")
+            t4 = time.time()
+            generate_pdf_report(basename, file_vis_dir, self.pdf_dir, self.resolution)
+            t_pdf = time.time() - t4
+            logger.info(f"  ✓ Rapport PDF terminé ({t_pdf:.1f}s)")
+
+        # Step 5: Clean up DTM TIF unless --keep-tif
+        if not self.keep_tif:
+            for dtm_file in sorted(self.dtm_dir.glob(f"{basename}_dtm.tif")):
+                dtm_file.unlink(missing_ok=True)

        t_total = time.time() - t_start
        logger.info(f"✓ {basename} terminé en {t_total:.1f}s")
@ -349,7 +356,7 @@ class LidarArchaeoPipeline:
            logger.info(f"Fichiers: {len(files)}")
            with ProcessPoolExecutor(max_workers=self.workers) as executor:
                future_to_file = {
-                    executor.submit(_process_file_standalone, str(laz_file), str(self.input_dir), str(self.output_dir), self.resolution, self.force, self.ground_method, self.force_classify, self.keep_tif, self.no_viewer): laz_file
+                    executor.submit(_process_file_standalone, str(laz_file), str(self.input_dir), str(self.output_dir), self.resolution, self.force, self.ground_method, self.force_classify, self.keep_tif): laz_file
                    for laz_file in files
                }
                done = 0
@ -411,7 +418,7 @@ class LidarArchaeoPipeline:
            logger.warning(f"  Note: Impossible de supprimer les fichiers temporaires: {e}")


-def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, force=False, ground_method='auto', force_classify=False, keep_tif=False, no_viewer=False):
+def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, force=False, ground_method='auto', force_classify=False, keep_tif=False):
    """Standalone function for multiprocessing — creates its own pipeline instance.

    Each worker gets its own temp directory to avoid file conflicts.
@ -419,6 +426,11 @@ def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, fo
    # Configure logging in worker process (spawn doesn't inherit parent config)
    import logging
    import sys
+    # Ensure UTF-8 output — spawn workers may default to ASCII
+    if hasattr(sys.stdout, 'reconfigure'):
+        sys.stdout.reconfigure(encoding='utf-8', errors='replace')
+    if hasattr(sys.stderr, 'reconfigure'):
+        sys.stderr.reconfigure(encoding='utf-8', errors='replace')
    worker_logger = logging.getLogger("lidar")
    if not worker_logger.handlers:
        handler = logging.StreamHandler(sys.stdout)
@ -427,7 +439,7 @@ def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, fo
        worker_logger.addHandler(handler)
    worker_logger.addFilter(_file_filter)

-    pipeline = LidarArchaeoPipeline(input_dir, output_dir, resolution=resolution, workers=1, force=force, ground_method=ground_method, force_classify=force_classify, keep_tif=keep_tif, no_viewer=no_viewer)
+    pipeline = LidarArchaeoPipeline(input_dir, output_dir, resolution=resolution, workers=1, force=force, ground_method=ground_method, force_classify=force_classify, keep_tif=keep_tif)
    basename = _file_basename(laz_file_str)
    pipeline.temp_dir = pipeline.output_dir / "temp" / basename
    pipeline.temp_dir.mkdir(exist_ok=True)