"""DTM generation from classified LiDAR point clouds. Handles ground classification via PDAL (SMRF or CSF) and DTM rasterisation using scipy binned_statistic_2d. Zones without LiDAR data remain as NaN. """ import json import logging import subprocess from pathlib import Path import numpy as np import rasterio from rasterio.transform import from_bounds from scipy.stats import binned_statistic_2d logger = logging.getLogger("lidar") def _create_ground_pipeline(input_laz, output_las, method): """Create a PDAL pipeline JSON for ground classification. All methods include a ReturnNumber/NumberOfReturns >= 1 filter to handle LiDAR HD files that may contain points with invalid return numbers. Pre-processing steps (PDAL recommended workflow): 1. Reset Classification to 0 2. ELM (Extended Local Minimum) — mark low outliers as noise (Classification=7) 3. Statistical outlier removal 4. Ground classification (SMRF or CSF) 5. Extract ground points (Classification=2) Args: input_laz: Path to input LAZ/LAS file. output_las: Path to output classified LAS file. method: Ground classification method ('smrf' or 'csf'). Returns: JSON string of the PDAL pipeline. """ # Common ReturnNumber filter for LiDAR HD compatibility return_filter = { "type": "filters.range", "limits": "ReturnNumber[1:],NumberOfReturns[1:]" } # Reset Classification to 0 before preprocessing reset_classification = { "type": "filters.assign", "assignment": "Classification[:]=0" } # ELM (Extended Local Minimum) — mark low outliers as noise # Parameters tuned for rocky limestone terrain with low vegetation: # - cell=5.0m: fine resolution to capture rocky relief # - threshold=2.0m: high threshold to avoid marking rock outcrops as noise elm_filter = { "type": "filters.elm", "cell": 5.0, "threshold": 2.0 } # Statistical outlier removal outlier_filter = { "type": "filters.outlier", "method": "statistical", "mean_k": 8, "multiplier": 3.0 } # Classification filter (ground points only) ground_filter = { "type": "filters.range", "limits": "Classification[2:2]" } # Method-specific ground classification filter if method == 'smrf': ground_step = { "type": "filters.smrf", "ignore": "Classification[7:7]", "slope": 1.0, "window": 16.0, "threshold": 0.5, "scalar": 1.25 } elif method == 'csf': ground_step = { "type": "filters.csf", "resolution": 0.5, "rigidness": 3, "smooth": True, "threshold": 0.5 } else: raise ValueError(f"Méthode de classification inconnue: {method}") pipeline = { "pipeline": [ str(input_laz), return_filter, reset_classification, elm_filter, outlier_filter, ground_step, ground_filter, { "type": "writers.las", "filename": str(output_las), "extra_dims": "all" } ] } return json.dumps(pipeline) def create_smrf_pipeline(input_laz, output_las): """Create a PDAL pipeline JSON for SMRF ground classification.""" return _create_ground_pipeline(input_laz, output_las, 'smrf') def create_csf_pipeline(input_laz, output_las): """Create a PDAL pipeline JSON for CSF ground classification.""" return _create_ground_pipeline(input_laz, output_las, 'csf') def validate_laz(laz_file): """Quick integrity check for a LAZ/LAS file. Tries laspy first (fast header read), then PDAL as fallback for COPC files that laspy cannot read. Also checks that the file contains points. Returns: True if file is readable and contains points, False otherwise. """ # Try laspy first (fast) import laspy try: with laspy.open(str(laz_file)) as f: header = f.header point_count = header.point_count if point_count == 0: logger.error(f" ✗ Fichier vide (0 points): {laz_file.name}") logger.error(f" → Re-télécharger depuis https://ign.fr/lidar-hd") return False return True except Exception: pass # Fallback: try PDAL (handles COPC v1.1 that laspy can't read) import subprocess try: result = subprocess.run( ["pdal", "info", str(laz_file), "--summary"], capture_output=True, text=True, timeout=30 ) if result.returncode == 0: # Check point count from PDAL info output import json as _json try: info = _json.loads(result.stdout) count = info.get('summary', {}).get('num_points', 0) if count == 0: logger.error(f" ✗ Fichier vide (0 points PDAL): {laz_file.name}") logger.error(f" → Re-télécharger depuis https://ign.fr/lidar-hd") return False except Exception: pass # Can't parse — assume valid return True logger.error(f" ✗ Fichier illisible: {laz_file.name}") logger.error(f" PDAL: {result.stderr.strip()[:200]}") except (subprocess.TimeoutExpired, FileNotFoundError): logger.error(f" ✗ Impossible de vérifier le fichier: {laz_file.name}") logger.error(f" → Re-télécharger depuis https://ign.fr/lidar-hd") return False def _read_with_pdal(laz_file): """Read a LAZ/LAS file via PDAL when laspy fails (e.g. COPC v1.1). Returns a laspy.LasData object, or None on failure. """ import subprocess import tempfile import os try: # Convert COPC to LAS via PDAL, then read with laspy with tempfile.NamedTemporaryFile(suffix='.las', delete=False) as tmp: tmp_path = tmp.name pipeline = json.dumps({ "pipeline": [ str(laz_file), {"type": "writers.las", "filename": tmp_path} ] }) result = subprocess.run( ["pdal", "pipeline", "--stdin"], input=pipeline, capture_output=True, text=True, timeout=300 ) if result.returncode != 0: logger.warning(f" PDAL conversion échouée: {result.stderr[:200]}") try: os.unlink(tmp_path) except Exception: pass return None import laspy las = laspy.read(tmp_path) try: os.unlink(tmp_path) except Exception: pass if len(las.points) == 0: logger.warning(f" PDAL: conversion réussie mais 0 points") return None return las except Exception as e: logger.warning(f" PDAL fallback échoué: {e}") return None def detect_ground_method(laz_file): """Detect the best ground classification method based on point cloud statistics. Auto-selects between SMRF and CSF: - SMRF: fast, robust for most natural terrain (PDAL recommended default) - CSF: cloth simulation, better for complex/urban terrain Falls back to SMRF if the file cannot be read or attributes are missing. Args: laz_file: Path to input LAZ/LAS file. Returns: String: 'smrf' or 'csf' """ import laspy # Try laspy first, then PDAL for COPC files las = None try: las = laspy.read(str(laz_file)) except Exception as e: logger.warning(f" laspy: {e}") logger.info(f" → Lecture via PDAL pour auto-détection...") las = _read_with_pdal(laz_file) if las is None: logger.info(f" → Méthode: SMRF (défaut — lecture impossible)") return 'smrf' total_points = len(las.points) if total_points == 0: logger.warning(f" Nuage vide (0 points) — méthode par défaut: SMRF") return 'smrf' z = np.array(las.z) # Height variance (always available) z_std = float(np.std(z)) z_range = float(np.max(z) - np.min(z)) # Try to get NumberOfReturns (may not exist in all point formats) single_return_ratio = 0.0 try: num_returns = np.array(las.NumberOfReturns) single_return_count = int(np.sum(num_returns == 1)) single_return_ratio = single_return_count / total_points if total_points > 0 else 0 except AttributeError: logger.debug(" NumberOfReturns non disponible — utilisation de la variance Z uniquement") logger.info(f" Analyse du nuage: {total_points:,} points, " f"ratio_retours_uniques={single_return_ratio:.2f}, " f"écart_Z={z_std:.1f}m, amplitude_Z={z_range:.1f}m") # Decision logic: # - High single-return ratio (>0.6) → urban (buildings, roads) → CSF (cloth simulation) # - High elevation variance (>30m) → complex/mountainous terrain → CSF # - Default → SMRF (fast, robust for most natural terrain) if single_return_ratio > 0.6: method = 'csf' reason = f"ratio retours uniques={single_return_ratio:.2f} > 0.6 → milieu urbain" elif z_std > 30: method = 'csf' reason = f"écart_Z={z_std:.1f}m > 30m → terrain complexe" else: method = 'smrf' reason = f"terrain naturel standard" logger.info(f" → Méthode: {method.upper()} ({reason})") return method def classify_ground(laz_file, temp_dir, method='auto', force=False): """Classify ground points using PDAL ground classification filter. Args: laz_file: Path to input LAZ/LAS file. temp_dir: Directory for temporary files (pipeline.json, ground.las). method: Ground classification method ('auto', 'smrf', or 'csf'). force: If True, reclassify even if output file already exists. Returns: Path to classified ground LAS file, or None on failure. """ import laspy # noqa: ensure available # Auto-detect method if requested if method == 'auto': method = detect_ground_method(laz_file) logger.info(f" Classification sol: {method.upper()} (auto)") else: logger.info(f" Classification sol: {method.upper()} (forcé)") # Use shared basename extraction function from .pipeline import _file_basename laz_base = _file_basename(laz_file) output_las = temp_dir / f"{laz_base}_ground_{method}.las" if output_las.exists() and not force: logger.info(f" Classification {method.upper()} déjà effectuée — fichier existant réutilisé") return output_las if force and output_las.exists(): logger.info(f" Reclassification forcée — suppression de {output_las.name}") output_las.unlink() pipeline_json = _create_ground_pipeline(laz_file, output_las, method) pipeline_file = temp_dir / f"pipeline_{method}.json" with open(pipeline_file, 'w') as f: f.write(pipeline_json) try: subprocess.run( ["pdal", "pipeline", str(pipeline_file)], capture_output=True, check=True ) # Verify that ground file has points if output_las.exists() and output_las.stat().st_size < 100: logger.error(f" ✗ Fichier ground vide (taille < 100 octets)") output_las.unlink(missing_ok=True) return None logger.info(f" ✓ Classification sol {method.upper()} terminée") return output_las except subprocess.CalledProcessError as e: error_msg = e.stderr.decode() if e.stderr else str(e) logger.warning(f" ✗ Erreur classification PDAL ({method.upper()}): {error_msg}") # Try repairing file with laspy if PDAL fails on EVLR/VLR if 'VLR' in error_msg or 'Invalid' in error_msg: logger.info(f" → Tentative de réparation du fichier avec laspy...") repaired_las = temp_dir / f"{laz_base}_repaired.las" if _repair_laz_with_laspy(laz_file, repaired_las): # Retry PDAL pipeline with repaired file pipeline_json = _create_ground_pipeline(repaired_las, output_las, method) with open(pipeline_file, 'w') as f: f.write(pipeline_json) try: subprocess.run( ["pdal", "pipeline", str(pipeline_file)], capture_output=True, check=True ) logger.info(f" ✓ Classification sol {method.upper()} terminée (fichier réparé)") return output_las except subprocess.CalledProcessError as e2: error_msg2 = e2.stderr.decode() if e2.stderr else str(e2) logger.error(f" ✗ Échec classification même après réparation: {error_msg2}") else: logger.error(f" ✗ Impossible de réparer le fichier") return None def _repair_laz_with_laspy(input_laz, output_las): """Try to repair a corrupt LAZ file by re-reading with laspy and saving as LAS. Works around PDAL errors like 'Invalid Extended VLR size' by stripping problematic VLR/EVLR metadata during re-save. Args: input_laz: Path to corrupt LAZ/LAS file. output_las: Path for repaired LAS output. Returns: True if repair succeeded, False otherwise. """ import laspy try: las = laspy.read(str(input_laz)) las.write(str(output_las)) logger.info(f" ✓ Fichier réparé via laspy ({len(las.points):,} points)") return True except Exception as e: logger.warning(f" ✗ Réparation laspy échouée: {e}") return False def create_dtm_fast(las_file, basename, dtm_dir, resolution, force=False, output_suffix=""): """Create DTM using fast binning method with gap filling. Args: las_file: Path to classified ground LAS file. basename: Base name for output file. dtm_dir: Directory for output DTM GeoTIFF. resolution: Grid resolution in meters per pixel. force: If True, regenerate even if DTM already exists. output_suffix: Suffix for output filename (e.g. '_r0p2' for additional resolutions). Returns: Path to output DTM GeoTIFF, or None on failure. """ output_tif = dtm_dir / f"{basename}_dtm{output_suffix}.tif" if output_tif.exists() and not force: logger.info(f" DTM déjà existant — fichier réutilisé: {output_tif.name}") return output_tif import laspy logger.info(" → Génération DTM...") try: las = laspy.read(str(las_file)) except Exception as e: # laspy can't read COPC v1.1 — try PDAL conversion logger.warning(f" laspy: {e}") logger.info(f" → Conversion via PDAL pour lecture COPC...") las = _read_with_pdal(las_file) if las is None: logger.error(f" ✗ Impossible de lire {las_file.name}") return None if len(las.points) == 0: logger.error(f" ✗ Fichier vide (0 points): {las_file.name}") return None try: min_x, max_x = float(las.header.min[0]), float(las.header.max[0]) min_y, max_y = float(las.header.min[1]), float(las.header.max[1]) width = int(np.ceil((max_x - min_x) / resolution)) height = int(np.ceil((max_y - min_y) / resolution)) logger.debug(f" Bounds: X[{min_x:.1f}, {max_x:.1f}] Y[{min_y:.1f}, {max_y:.1f}]") logger.debug(f" Grid: {width}x{height} pixels ({len(las.points):,} points)") logger.info(f" Rasterisation {width}x{height} ({len(las.points):,} points)...") stat = binned_statistic_2d( las.x, las.y, las.z, statistic='mean', bins=[width, height], range=[[min_x, max_x], [min_y, max_y]] ) dtm = stat.statistic.T dtm = dtm[::-1, :] # Flip Y so north is at top # Fill small gaps (< 1m from existing data) while keeping large gaps as NaN nan_count = np.count_nonzero(np.isnan(dtm)) if nan_count > 0: total = dtm.size nan_pct = 100.0 * nan_count / total logger.info(f" {nan_count:,} pixels sans données ({nan_pct:.1f}%)") max_gap_pixels = max(1, int(1.0 / resolution)) from rasterio.fill import fillnodata valid_mask = ~np.isnan(dtm) dtm_filled = fillnodata(dtm, mask=valid_mask, max_search_distance=max_gap_pixels) small_gap_mask = np.isnan(dtm) & ~np.isnan(dtm_filled) filled_count = np.count_nonzero(small_gap_mask) if filled_count > 0: dtm = np.where(small_gap_mask, dtm_filled, dtm) logger.info(f" {filled_count:,} petits trous comblés (< {max_gap_pixels}px)") remaining = np.count_nonzero(np.isnan(dtm)) logger.info(f" {remaining:,} pixels restent sans données (grands écarts)") # Save as GeoTIFF output_tif = dtm_dir / f"{basename}_dtm.tif" transform = from_bounds(min_x, min_y, max_x, max_y, width, height) with rasterio.open( output_tif, 'w', driver='GTiff', height=height, width=width, count=1, dtype='float32', crs='EPSG:2154', transform=transform, nodata=float('nan'), compress='lzw' ) as dst: dst.write(dtm.astype('float32'), 1) logger.info(f" ✓ DTM créé: {output_tif.name}") return output_tif except Exception as e: logger.error(f" ✗ Erreur DTM: {e}", exc_info=True) return None