From 7ac08f75dcb9f6a3ff5fbda1b017db32c9fb2839 Mon Sep 17 00:00:00 2001 From: Jacquin Antoine Date: Thu, 14 May 2026 17:59:32 +0200 Subject: [PATCH] Add LAZ integrity check to skip corrupted files early Validate file readability before PDAL classification. Corrupted/truncated files are detected instantly via laspy header read and skipped with a clear error message pointing to re-download, instead of wasting time on PDAL and repair attempts that will fail anyway. Co-Authored-By: Claude Opus 4.6 --- lidar_pipeline/dtm.py | 23 +++++++++++++++++++++++ lidar_pipeline/pipeline.py | 5 +++++ 2 files changed, 28 insertions(+) diff --git a/lidar_pipeline/dtm.py b/lidar_pipeline/dtm.py index 7dd0ddd..140866f 100644 --- a/lidar_pipeline/dtm.py +++ b/lidar_pipeline/dtm.py @@ -124,6 +124,29 @@ def create_csf_pipeline(input_laz, output_las): return _create_ground_pipeline(input_laz, output_las, 'csf') +def validate_laz(laz_file): + """Quick integrity check for a LAZ/LAS file. + + Reads the header with laspy to detect truncated or corrupted files + before launching expensive PDAL processing. + + Returns: + True if file is readable, False otherwise. + """ + import laspy + try: + with laspy.open(str(laz_file)) as f: + # Just read the header — fast and catches truncated files + header = f.header + _ = header.point_count + return True + except Exception as e: + logger.error(f" ✗ Fichier corrompu ou incomplet: {laz_file.name}") + logger.error(f" {e}") + logger.error(f" → Re-télécharger depuis https://ign.fr/lidar-hd") + return False + + def detect_ground_method(laz_file): """Detect the best ground classification method based on point cloud statistics. diff --git a/lidar_pipeline/pipeline.py b/lidar_pipeline/pipeline.py index bd21759..5b078a6 100644 --- a/lidar_pipeline/pipeline.py +++ b/lidar_pipeline/pipeline.py @@ -272,6 +272,11 @@ class LidarArchaeoPipeline: logger.info(f"FICHIER : {basename}") logger.info("=" * 60) + # Validate file integrity before any processing + from .dtm import validate_laz + if not validate_laz(laz_file): + return False + # Skip ground classification + DTM if DTM already exists with matching resolution # --force only affects visualizations/PDF, not classification/DTM # Use --force-classification to force reclassification