"""Pipeline orchestration for LiDAR archaeological analysis. LidarArchaeoPipeline coordinates the full processing chain: 1. Ground classification (PDAL/SMRF) 2. DTM generation 3. Visualization generation (17 products) 4. Rendering (WebP + PDF report) """ import logging import multiprocessing import shutil import time from concurrent.futures import ProcessPoolExecutor, as_completed from datetime import datetime from pathlib import Path import subprocess # Use 'spawn' to avoid CUDA context corruption in forked subprocesses try: multiprocessing.set_start_method('spawn') except RuntimeError: pass # Already set (e.g. in tests or when called multiple times) logger = logging.getLogger("lidar") def _file_basename(path): """Extract base name from a LAZ/LAS file, removing all known extensions. Handles double extensions like .copc.laz correctly: 'file.copc.laz' -> 'file', not 'file.copc' """ name = Path(path).name # Remove known LiDAR extensions (order matters: longest first) for ext in ['.copc.laz', '.copc.las', '.laz', '.las']: if name.lower().endswith(ext): return name[:-len(ext)] return Path(path).stem class FilePrefixFilter(logging.Filter): """Adds a file prefix to log messages when processing a specific file.""" def __init__(self): super().__init__() self.basename = None def filter(self, record): if self.basename: record.msg = f"[{self.basename}] {record.msg}" return True # Module-level filter instance so process_file can set it _file_filter = FilePrefixFilter() from .dtm import classify_ground, create_dtm_fast from .visualizations import ( SharedDEM, generate_hillshade, generate_slope, generate_aspect, generate_curvature, generate_lrm, generate_openness, generate_mslrm, generate_tpi, generate_sailore, generate_roughness, generate_anomalies, generate_wavelet, generate_flow, ) from .gpu import gpu_cleanup from .ign import generate_ign_overlay from .rendering import tif_to_png # Ordered list of visualization steps. # Each entry: (name, function_or_lambda) # Adding a new visualization = add a generate_* function + register here. VIZ_STEPS = [ ('hillshade', generate_hillshade), ('slope', generate_slope), ('aspect', generate_aspect), ('curvature', generate_curvature), ('lrm', generate_lrm), ('pos_open', lambda d, b, v, r, shared=None: generate_openness(d, b, v, r, positive=True, shared=shared)), ('neg_open', lambda d, b, v, r, shared=None: generate_openness(d, b, v, r, positive=False, shared=shared)), ('mslrm', generate_mslrm), ('tpi', generate_tpi), ('sailore', generate_sailore), ('roughness', generate_roughness), ('anomalies', generate_anomalies), ('wavelet', generate_wavelet), ('flow', generate_flow), ('ortho', lambda d, b, v, r: generate_ign_overlay( d, b, v, r, layer='ORTHOIMAGERY.ORTHOPHOTOS', title='Photographie Aérienne IGN', legend_label='Orthophotographie\nImage aérienne', description='Photographie aérienne IGN (Orthophoto)', out_suffix='ortho')), ('topo', lambda d, b, v, r: generate_ign_overlay( d, b, v, r, layer='GEOGRAPHICALGRIDSYSTEMS.PLANIGNV2', title='Carte Topographique IGN', legend_label='Carte IGN\nPlan topographique', description='Carte topographique IGN (Plan IGN)', out_suffix='topo')), ] class LidarArchaeoPipeline: """Orchestrates the LiDAR archaeological analysis pipeline.""" def __init__(self, input_dir, output_dir, resolution=0.5, workers=1, force=False, ground_method='auto', force_classify=False, keep_tif=False, quality=98, only_viz=None, skip_viz=None, output_format='avif'): self.input_dir = Path(input_dir) self.output_dir = Path(output_dir) # Accept single float or comma-separated string for multi-resolution if isinstance(resolution, str): self.resolutions = [float(r.strip()) for r in resolution.split(',')] elif isinstance(resolution, (list, tuple)): self.resolutions = [float(r) for r in resolution] else: self.resolutions = [float(resolution)] self.resolution = self.resolutions[0] # Primary resolution (backward compat) self.workers = workers self.force = force self.ground_method = ground_method self.force_classify = force_classify self.keep_tif = keep_tif self.quality = quality self.only_viz = only_viz self.skip_viz = skip_viz self.output_format = output_format self.temp_dir = self.output_dir / "temp" if not self.input_dir.exists(): raise ValueError(f"Répertoire introuvable: {self.input_dir}") self.output_dir.mkdir(parents=True, exist_ok=True) self.temp_dir.mkdir(exist_ok=True) self.dtm_dir = self.output_dir / "DTM" self.vis_dir = self.output_dir / "visualisations" for d in [self.dtm_dir, self.vis_dir]: d.mkdir(exist_ok=True) # Filter visualizations based on --only / --skip all_viz_names = [name for name, _ in VIZ_STEPS] if only_viz: invalid = set(only_viz) - set(all_viz_names) if invalid: raise ValueError(f"Visualisations inconnues: {', '.join(invalid)}. Disponibles: {', '.join(all_viz_names)}") self.viz_steps = [(n, f) for n, f in VIZ_STEPS if n in only_viz] elif skip_viz: invalid = set(skip_viz) - set(all_viz_names) if invalid: raise ValueError(f"Visualisations inconnues: {', '.join(invalid)}. Disponibles: {', '.join(all_viz_names)}") self.viz_steps = [(n, f) for n, f in VIZ_STEPS if n not in skip_viz] else: self.viz_steps = VIZ_STEPS logger.info("Pipeline initialisé") logger.info(f" Entrée : {self.input_dir}") logger.info(f" Sortie : {self.output_dir}") if len(self.resolutions) > 1: logger.info(f" Résolutions : {', '.join(f'{r}m/px' for r in self.resolutions)}") else: logger.info(f" Résolution : {self.resolution}m/px") logger.info(f" Workers : {workers}") logger.info(f" Force : {'OUI' if self.force else 'non (skip existing)'}") logger.info(f" Classification sol : {self.ground_method}") logger.info(f" Force classif.: {'OUI' if self.force_classify else 'non'}") logger.info(f" Keep TIFF : {'OUI' if self.keep_tif else 'non'}") logger.info(f" Qualité {self.output_format.upper()}: {self.quality if self.quality < 100 else 'lossless'}") if only_viz: logger.info(f" Visualisations: uniquement {', '.join(only_viz)}") elif skip_viz: logger.info(f" Visualisations: tout sauf {', '.join(skip_viz)}") logger.info(f" Visualisations: {len(self.viz_steps)}/{len(VIZ_STEPS)}") def find_laz_files(self): """Find all LAZ/LAS files in input directory.""" files = list(self.input_dir.glob("*.laz")) + list(self.input_dir.glob("*.las")) logger.info(f"{len(files)} fichier(s) LiDAR trouvé(s)") for f in sorted(files): logger.debug(f" {f.name}") return sorted(files) def check_tools(self): """Check that required external tools are available.""" for name, cmd in [('pdal', 'pdal --version'), ('gdal', 'gdalinfo --version')]: try: result = subprocess.run(cmd.split(), capture_output=True, check=True, text=True) version = result.stdout.strip().split('\n')[0] logger.info(f" ✓ {name}: {version}") except (subprocess.CalledProcessError, FileNotFoundError): logger.error(f" ✗ {name} non disponible") return False return True @staticmethod def _expected_output_path(name, basename, file_vis_dir, output_format='avif'): """Return the expected output filename for a visualization step.""" ext = 'avif' if output_format == 'avif' else 'webp' if name == 'pos_open': return file_vis_dir / f"{basename}_positive_openness.{ext}" elif name == 'neg_open': return file_vis_dir / f"{basename}_negative_openness.{ext}" elif name == 'hillshade': return file_vis_dir / f"{basename}_hillshade_multi.{ext}" else: return file_vis_dir / f"{basename}_{name}.{ext}" def generate_all_visualizations(self, dtm_file, basename, resolution=None, vis_dir=None): """Generate all archaeological visualizations for one DTM file. Optimisation: SharedDEM is only computed if at least one visualization needs to be generated. When all WebP outputs exist, SharedDEM is skipped entirely (saves ~2min per file on re-runs). """ if resolution is None: resolution = self.resolution logger.info(" Génération visualisations:") # Use provided vis_dir (for multi-resolution subdirectories) or default file_vis_dir = vis_dir if vis_dir else (self.vis_dir / basename) file_vis_dir.mkdir(exist_ok=True) total = len(self.viz_steps) # Phase 1: determine which visualizations need generation needs_generation = {} # name -> True/False for name, func in self.viz_steps: if self.force: needs_generation[name] = True else: expected_webp = self._expected_output_path(name, basename, file_vis_dir, self.output_format) needs_generation[name] = not expected_webp.exists() to_generate = [n for n, needed in needs_generation.items() if needed] ign_only = all(name in ('ortho', 'topo') for name in to_generate) needs_shared = any(name not in ('ortho', 'topo') for name in to_generate) if not to_generate: logger.info(" Toutes les visualisations déjà existantes — ignorées") # Still need to return results dict for PDF check vis_results = {} for name, func in self.viz_steps: vis_results[name] = self._expected_output_path(name, basename, file_vis_dir, self.output_format) return vis_results # Phase 2: compute SharedDEM only if needed shared = None if needs_shared: logger.info(" Pré-calcul données partagées (gradient, LRM)...") t_shared = time.time() shared = SharedDEM(dtm_file, resolution) logger.info(f" ✓ Données partagées prêtes ({time.time()-t_shared:.1f}s)") # Phase 3: generate visualizations vis_results = {} for idx, (name, func) in enumerate(self.viz_steps, 1): if not needs_generation[name]: logger.info(f" [{idx}/{total}] {name}: déjà existant, ignoré") vis_results[name] = self._expected_output_path(name, basename, file_vis_dir, self.output_format) continue # When --force, delete existing TIF to ensure clean regeneration if self.force: for tif in file_vis_dir.glob(f"{basename}_{name}.tif"): tif.unlink(missing_ok=True) if name == 'pos_open': for tif in file_vis_dir.glob(f"{basename}_positive_openness.tif"): tif.unlink(missing_ok=True) elif name == 'neg_open': for tif in file_vis_dir.glob(f"{basename}_negative_openness.tif"): tif.unlink(missing_ok=True) elif name == 'hillshade': for tif in file_vis_dir.glob(f"{basename}_hillshade_multi.tif"): tif.unlink(missing_ok=True) logger.info(f" [{idx}/{total}] {name}...") t0 = time.time() try: # IGN overlays don't use SharedDEM (they download external data) if name in ('ortho', 'topo'): result = func(dtm_file, basename, file_vis_dir, resolution) else: result = func(dtm_file, basename, file_vis_dir, resolution, shared=shared) vis_results[name] = result elapsed = time.time() - t0 if result: logger.info(f" [{idx}/{total}] ✓ {name} ({elapsed:.1f}s)") else: logger.warning(f" [{idx}/{total}] ✗ {name} — no output ({elapsed:.1f}s)") except Exception as e: vis_results[name] = None logger.error(f" [{idx}/{total}] ✗ {name}: {e}", exc_info=True) # Free GPU memory between visualizations to prevent OOM gpu_cleanup() # Convert to output format (only newly generated TIFs, not skipped ones) fmt_label = self.output_format.upper() logger.info(f" Conversion images {fmt_label}:") source_info = { 'method': self.ground_method, 'date': datetime.now().strftime('%Y-%m-%d'), 'basename': basename, } for name, tif_file in vis_results.items(): if tif_file and isinstance(tif_file, Path) and tif_file.suffix == '.tif' and tif_file.exists(): img_file = tif_to_png(tif_file, file_vis_dir, resolution, keep_tif=self.keep_tif, source_info=source_info, quality=self.quality, output_format=self.output_format) if img_file: logger.info(f" ✓ {img_file.name}") # Clean up remaining TIF files unless --keep-tif if not self.keep_tif: for tif in file_vis_dir.glob("*.tif"): tif.unlink(missing_ok=True) return vis_results @staticmethod def _res_suffix(resolution): """Return suffix for additional resolutions (empty string for primary).""" if resolution == 0.5: return "" # Default resolution — no suffix res_str = f"{resolution}".replace('.', 'p') return f"_r{res_str}" def process_file(self, laz_file): """Process a single LAZ file through the full pipeline. If self.resolutions has multiple entries, processes each resolution: - Primary resolution uses current naming (no suffix) - Additional resolutions use _r0p2 suffix in directories/filenames - Ground classification is done once and shared across resolutions """ basename = _file_basename(laz_file) _file_filter.basename = basename t_start = time.time() logger.info("=" * 60) logger.info(f"FICHIER : {basename}") logger.info("=" * 60) # Validate file integrity before any processing from .dtm import validate_laz if not validate_laz(laz_file): return False # Step 1: Ground classification (shared across all resolutions) las_file = None t_classif = 0 for i, res in enumerate(self.resolutions): res_suffix = self._res_suffix(res) dtm_path = self.dtm_dir / f"{basename}_dtm{res_suffix}.tif" if dtm_path.exists(): import rasterio try: with rasterio.open(dtm_path) as src: existing_res = abs(src.transform.a) if abs(existing_res - res) > 0.01: logger.info(f" DTM{res_suffix} existant à {existing_res}m/px — résolution demandée {res}m/px → régénération") dtm_path.unlink() else: if i == 0: logger.info(f"[1/5] Classification du sol — sautée (DTM existant)") logger.info(f"[2/5] Génération DTM {res}m/px — sautée (DTM existant)") else: logger.info(f" DTM {res}m/px déjà existant — ignoré") continue except Exception: logger.warning(f"Impossible de lire le DTM existant — régénération") dtm_path.unlink() # Need to classify/generate DTM for this resolution if las_file is None: # First time: do ground classification logger.info("[1/5] Classification du sol...") t1 = time.time() las_file = classify_ground(laz_file, self.temp_dir, method=self.ground_method, force=self.force_classify) t_classif = time.time() - t1 if not las_file: logger.error(f" ✗ Échec classification ({t_classif:.1f}s)") return False logger.info(f" ✓ Classification terminée ({t_classif:.1f}s)") # Generate DTM at this resolution logger.info(f"{'[2/5]' if i == 0 else ' '} Génération DTM {res}m/px...") t2 = time.time() dtm_file = create_dtm_fast(las_file, basename, self.dtm_dir, res, force=self.force, output_suffix=res_suffix) t_dtm = time.time() - t2 if not dtm_file: logger.error(f" ✗ Échec DTM {res}m/px ({t_dtm:.1f}s)") if i == 0: return False # Primary resolution failure is fatal continue # Additional resolution failure is non-fatal logger.info(f" ✓ DTM {res}m/px terminé ({t_dtm:.1f}s)") # Process each resolution: visualizations + PDF all_vis_results = {} for res in self.resolutions: res_suffix = self._res_suffix(res) dtm_path = self.dtm_dir / f"{basename}_dtm{res_suffix}.tif" if not dtm_path.exists(): logger.warning(f" DTM {res}m/px manquant — visualisations ignorées") continue import rasterio with rasterio.open(dtm_path) as src: actual_res = abs(src.transform.a) if len(self.resolutions) > 1: logger.info(f" --- Résolution {res}m/px ---") # For additional resolutions, use suffixed subdirectory if res_suffix: vis_dir = self.vis_dir / f"{basename}{res_suffix}" else: vis_dir = self.vis_dir / basename vis_dir.mkdir(exist_ok=True) self.generate_all_visualizations(dtm_path, basename, actual_res, vis_dir=vis_dir) t_total = time.time() - t_start logger.info(f"✓ {basename} terminé en {t_total:.1f}s") _file_filter.basename = None return True def process_all(self): """Process all LAZ files in input directory.""" files = self.find_laz_files() if not files: logger.error("Aucun fichier LAZ/LAS trouvé !") return logger.info("=" * 60) logger.info("PIPELINE ARCHÉOLOGIQUE LiDAR") logger.info("=" * 60) logger.info("Vérification des outils...") if not self.check_tools(): logger.error("Outils manquants — abandon") return results = {} t_pipeline_start = time.time() if self.workers > 1 and len(files) > 1: logger.info(f"Traitement parallèle avec {self.workers} workers...") logger.info(f"Fichiers: {len(files)}") with ProcessPoolExecutor(max_workers=self.workers) as executor: # Pass resolutions as comma-separated string for multiprocessing serialization resolutions_str = ','.join(str(r) for r in self.resolutions) future_to_file = { executor.submit(_process_file_standalone, str(laz_file), str(self.input_dir), str(self.output_dir), resolutions_str, self.force, self.ground_method, self.force_classify, self.keep_tif, self.quality, self.only_viz, self.skip_viz, self.output_format): laz_file for laz_file in files } done = 0 try: for future in as_completed(future_to_file): laz_file = future_to_file[future] done += 1 try: success = future.result() results[laz_file.name] = success status = "✓" if success else "✗" logger.info(f" [{done}/{len(files)}] {status} {laz_file.name}") except Exception as e: logger.error(f" [{done}/{len(files)}] ✗ {laz_file.name}: {e}") logger.debug(f" Traceback:", exc_info=True) results[laz_file.name] = False except KeyboardInterrupt: logger.info("Interruption — annulation des travaux en cours...") for f in future_to_file: f.cancel() executor.shutdown(wait=False, cancel_futures=True) logger.info("Travaux annulés.") return else: total = len(files) for idx, laz_file in enumerate(files, 1): logger.info(f"--- Fichier {idx}/{total} ---") try: results[laz_file.name] = self.process_file(laz_file) except KeyboardInterrupt: logger.info("Interruption — arrêt immédiat.") return except Exception as e: logger.error(f"✗ Erreur traitement {laz_file.name}: {e}") logger.debug("Traceback:", exc_info=True) results[laz_file.name] = False # Summary t_pipeline_total = time.time() - t_pipeline_start success_count = sum(1 for v in results.values() if v) fail_count = sum(1 for v in results.values() if not v) logger.info("=" * 60) logger.info("RÉSUMÉ") logger.info("=" * 60) for name, ok in results.items(): status = "✓" if ok else "✗" logger.info(f" {status} {name}") logger.info("-" * 60) logger.info(f" Succès : {success_count}/{len(results)}") if fail_count: logger.info(f" Échecs : {fail_count}/{len(results)}") logger.info(f" Durée totale : {t_pipeline_total:.1f}s ({t_pipeline_total/60:.1f}min)") logger.info(f"\nRésultats dans: {self.output_dir}") logger.info(f" • DTM : {self.dtm_dir}") logger.info(f" • Visualisations: {self.vis_dir}") # Clean up temporary files logger.info("Nettoyage des fichiers temporaires...") try: if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) # Also clean up any subdirectories inside temp/ temp_base = self.output_dir / "temp" if temp_base.exists(): shutil.rmtree(temp_base) logger.info(" ✓ Fichiers temporaires supprimés") except Exception as e: logger.warning(f" Note: Impossible de supprimer les fichiers temporaires: {e}") def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, force=False, ground_method='auto', force_classify=False, keep_tif=False, quality=98, only_viz=None, skip_viz=None, output_format='avif'): """Standalone function for multiprocessing — creates its own pipeline instance. Each worker gets its own temp directory to avoid file conflicts. """ # Configure logging in worker process (spawn doesn't inherit parent config) import logging import sys # Ensure UTF-8 output — spawn workers may default to ASCII if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8', errors='replace') if hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(encoding='utf-8', errors='replace') worker_logger = logging.getLogger("lidar") if not worker_logger.handlers: handler = logging.StreamHandler(sys.stdout) handler.setFormatter(logging.Formatter("%(message)s")) worker_logger.setLevel(logging.INFO) worker_logger.addHandler(handler) worker_logger.addFilter(_file_filter) pipeline = LidarArchaeoPipeline(input_dir, output_dir, resolution=resolution, workers=1, force=force, ground_method=ground_method, force_classify=force_classify, keep_tif=keep_tif, quality=quality, only_viz=only_viz, skip_viz=skip_viz, output_format=output_format) basename = _file_basename(laz_file_str) pipeline.temp_dir = pipeline.output_dir / "temp" / basename pipeline.temp_dir.mkdir(exist_ok=True) laz_file = Path(laz_file_str) result = pipeline.process_file(laz_file) # Clean up per-file temp directory try: if pipeline.temp_dir.exists(): shutil.rmtree(pipeline.temp_dir) except Exception: pass return result