diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6cf3bef --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,71 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +LiDAR archaeological processing pipeline that generates 19 terrain visualizations from LAZ/LAS point clouds. Runs in Docker with optional NVIDIA GPU acceleration (CuPy). Designed for French LiDAR HD data in Lambert 93 (EPSG:2154). + +## Commands + +All commands run inside Docker. Use `./run.sh` as the primary interface. + +```bash +./run.sh -g # Standard run with GPU +./run.sh -g -w 4 # GPU + 4 parallel workers +./run.sh -g -r 0.2 # High resolution (0.2m/px) +./run.sh --test # Run unit tests +./run.sh -g --file LHD_FXX_1000_6882_PTS_LAMB93_IGN69.copc # Single file +./run.sh --ground-classification pmf # Force PMF ground classification +./run.sh # Print help (no args) +``` + +Direct Docker: +```bash +docker build -t lidar-lidar . +docker run --rm --gpus all -v $(pwd)/input:/data/input:ro -v $(pwd)/output:/data/output lidar-lidar +``` + +## Architecture + +### Module responsibilities + +- **`cli.py`** — argparse + logging setup. Entry point via `python -m lidar_pipeline`. +- **`pipeline.py`** — `LidarArchaeoPipeline` orchestrator. `VIZ_STEPS` registry maps names to generate functions. `FilePrefixFilter` for parallel logging. +- **`dtm.py`** — PDAL ground classification (SMRF/PMF/CSF + auto-detection) and DTM generation via scipy `binned_statistic_2d`. +- **`visualizations.py`** — 17 `generate_*` functions + 2 IGN overlay lambdas. All take `(dem_file, basename, vis_dir, resolution)` and return a TIF path or None. +- **`gpu.py`** — CuPy/numpy abstraction: `HAS_GPU`, `to_gpu()`, `to_cpu()`, `xp_gaussian_filter()`, `xp_uniform_filter()`, `xp_minimum_filter()`, `gpu_cleanup()`. Falls back to CPU gracefully. +- **`ign.py`** — IGN WMTS tile download + overlay generation for orthophoto and topographic maps. +- **`rendering.py`** — `COLORMAPS` dict maps filename keywords to (cmap, title, legend, description). `tif_to_png()` converts TIF→WebP with legend/scale/north arrow. `generate_pdf_report()` creates A3 PDF. + +### Adding a visualization + +Three places must be updated: +1. `visualizations.py` — add `generate_X(dem_file, basename, vis_dir, resolution)` function +2. `pipeline.py` `VIZ_STEPS` — add `('name', generate_X)` entry +3. `rendering.py` `COLORMAPS` — add entry keyed by the output filename keyword + +### Ground classification + +Auto-detection in `dtm.py` `detect_ground_method()`: +- Single-return ratio > 0.6 → PMF (urban terrain) +- Height std > 30m → CSF (complex/mountainous terrain) +- Default → SMRF (natural terrain) + +Override with `--ground-classification {auto,smrf,pmf,csf}`. + +### NaN handling + +DTM zones without LiDAR data are kept as NaN (no interpolation). Visualization functions use `_fill_nans()` and `_filter_nanaware()` to avoid NaN propagation through filters. + +### Parallel processing + +Uses `ProcessPoolExecutor` with `'spawn'` start method (required for CUDA). Each worker gets its own temp directory (`temp_{basename}`). `_process_file_standalone()` configures its own logger with `_file_filter` for per-file log prefixes. + +## Key conventions + +- **Language**: UI messages and comments in French. Code identifiers in English. +- **Logging**: Use `logger = logging.getLogger("lidar")`. Prefix per-file logs via `_file_filter.basename`. +- **GPU pattern**: `arr_gpu = to_gpu(arr)` → compute → `result = to_cpu(arr_gpu)` → `gpu_cleanup()` between visualizations. +- **Output format**: Visualizations saved as WebP (not PNG). PDF reports use `PILImage.open().convert('RGB')`. +- **Tests**: Run only inside Docker via `./run.sh --test`. Synthetic DEM fixture in `tests/conftest.py`. \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 2438db3..7270bbf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ ENV TZ=Europe/Paris # Install PDAL and system packages RUN apt-get update && apt-get install -y --no-install-recommends \ pdal \ + liblaszip8 \ gdal-bin \ python3-gdal \ python3-pip \ @@ -23,7 +24,8 @@ RUN pip3 install --no-cache-dir \ matplotlib \ whitebox \ rasterio \ - 'laspy[laspy]' \ + 'laspy[lazrs]' \ + lazrs \ scikit-image \ scikit-learn \ scipy \ diff --git a/lidar_pipeline/cli.py b/lidar_pipeline/cli.py index 34960d2..672ec85 100644 --- a/lidar_pipeline/cli.py +++ b/lidar_pipeline/cli.py @@ -35,6 +35,7 @@ def setup_logging(verbose=False, debug=False): logger.setLevel(level) logger.handlers.clear() logger.addHandler(handler) + logger.propagate = False # Prevent double logging via root logger # Also configure the root logger so worker processes log properly root_logger = logging.getLogger() @@ -102,6 +103,17 @@ Exemples: action="store_true", help="Régénérer tous les fichiers même si les WebP existent déjà" ) + parser.add_argument( + "--force-classification", + action="store_true", + help="Reclassifier le sol même si le fichier .las existe déjà" + ) + parser.add_argument( + "--ground-classification", + choices=["auto", "smrf", "pmf", "csf"], + default="auto", + help="Méthode de classification du sol : auto (détection), smrf, pmf, csf (défaut: auto)" + ) parser.add_argument( "--file", nargs="+", @@ -141,7 +153,9 @@ Exemples: output_dir=args.output, resolution=args.resolution, workers=args.workers, - force=args.force + force=args.force, + ground_method=args.ground_classification, + force_classify=args.force_classification ) # If --file is specified, process only matching files diff --git a/lidar_pipeline/dtm.py b/lidar_pipeline/dtm.py index 18d3b4d..82b5e98 100644 --- a/lidar_pipeline/dtm.py +++ b/lidar_pipeline/dtm.py @@ -17,38 +17,102 @@ from scipy.stats import binned_statistic_2d logger = logging.getLogger("lidar") -def create_smrf_pipeline(input_laz, output_las): - """Create a PDAL pipeline JSON for SMRF ground classification. +def _create_ground_pipeline(input_laz, output_las, method): + """Create a PDAL pipeline JSON for ground classification. - Includes a filter for ReturnNumber/NumberOfReturns >= 1 to handle + All methods include a ReturnNumber/NumberOfReturns >= 1 filter to handle LiDAR HD files that may contain points with invalid return numbers. + Pre-processing steps (PDAL recommended workflow): + 1. Reset Classification to 0 + 2. ELM (Extended Local Minimum) — mark low outliers as noise (Classification=7) + 3. Statistical outlier removal + 4. Ground classification (SMRF/PMF/CSF) + 5. Extract ground points (Classification=2) + Args: input_laz: Path to input LAZ/LAS file. output_las: Path to output classified LAS file. + method: Ground classification method ('smrf', 'pmf', or 'csf'). Returns: JSON string of the PDAL pipeline. """ + # Common ReturnNumber filter for LiDAR HD compatibility + return_filter = { + "type": "filters.range", + "limits": "ReturnNumber[1:],NumberOfReturns[1:]" + } + + # Reset Classification to 0 before preprocessing + reset_classification = { + "type": "filters.assign", + "assignment": "Classification[:]=0" + } + + # ELM (Extended Local Minimum) — mark low outliers as noise + # Parameters tuned for rocky limestone terrain with low vegetation: + # - cell=5.0m: fine resolution to capture rocky relief + # - threshold=2.0m: high threshold to avoid marking rock outcrops as noise + elm_filter = { + "type": "filters.elm", + "cell": 5.0, + "threshold": 2.0 + } + + # Statistical outlier removal + outlier_filter = { + "type": "filters.outlier", + "method": "statistical", + "mean_k": 8, + "multiplier": 3.0 + } + + # Classification filter (ground points only) + ground_filter = { + "type": "filters.range", + "limits": "Classification[2:2]" + } + + # Method-specific ground classification filter + if method == 'smrf': + ground_step = { + "type": "filters.smrf", + "ignore": "Classification[7:7]", + "slope": 1.0, + "window": 16.0, + "threshold": 0.5, + "scalar": 1.25 + } + elif method == 'pmf': + ground_step = { + "type": "filters.pmf", + "max_window": 33, + "slope": 0.15, + "max_distance": 2.5, + "initial_distance": 0.15, + "cell_size": 1.0 + } + elif method == 'csf': + ground_step = { + "type": "filters.csf", + "resolution": 0.5, + "rigidness": 3, + "smooth": True, + "threshold": 0.5 + } + else: + raise ValueError(f"Méthode de classification inconnue: {method}") + pipeline = { "pipeline": [ str(input_laz), - { - "type": "filters.range", - "limits": "ReturnNumber[1:],NumberOfReturns[1:]" - }, - { - "type": "filters.smrf", - "ignore": "Classification[7:7]", - "slope": 1.0, - "window": 16.0, - "threshold": 0.5, - "scalar": 1.25 - }, - { - "type": "filters.range", - "limits": "Classification[2:2]" - }, + return_filter, + reset_classification, + elm_filter, + outlier_filter, + ground_step, + ground_filter, { "type": "writers.las", "filename": str(output_las), @@ -59,26 +123,113 @@ def create_smrf_pipeline(input_laz, output_las): return json.dumps(pipeline) -def classify_ground(laz_file, temp_dir): - """Classify ground points using PDAL SMRF filter. +def create_smrf_pipeline(input_laz, output_las): + """Create a PDAL pipeline JSON for SMRF ground classification.""" + return _create_ground_pipeline(input_laz, output_las, 'smrf') + + +def create_pmf_pipeline(input_laz, output_las): + """Create a PDAL pipeline JSON for PMF ground classification.""" + return _create_ground_pipeline(input_laz, output_las, 'pmf') + + +def create_csf_pipeline(input_laz, output_las): + """Create a PDAL pipeline JSON for CSF ground classification.""" + return _create_ground_pipeline(input_laz, output_las, 'csf') + + +def detect_ground_method(laz_file): + """Detect the best ground classification method based on point cloud statistics. + + Auto-selects between SMRF (natural terrain) and PMF (urban) only. + CSF is available only via --ground-classification csf (slow but robust + on complex terrain). + + Falls back to SMRF if the file cannot be read or attributes are missing. + + Args: + laz_file: Path to input LAZ/LAS file. + + Returns: + String: 'smrf', 'pmf', or 'csf' + """ + import laspy + + try: + las = laspy.read(str(laz_file)) + except Exception as e: + logger.warning(f" Impossible de lire le nuage pour auto-détection: {e}") + logger.info(f" → Méthode: SMRF (défaut — lecture impossible)") + return 'smrf' + + total_points = len(las.points) + z = np.array(las.z) + + # Height variance (always available) + z_std = float(np.std(z)) + z_range = float(np.max(z) - np.min(z)) + + # Try to get NumberOfReturns (may not exist in all point formats) + single_return_ratio = 0.0 + try: + num_returns = np.array(las.NumberOfReturns) + single_return_count = int(np.sum(num_returns == 1)) + single_return_ratio = single_return_count / total_points if total_points > 0 else 0 + except AttributeError: + logger.debug(" NumberOfReturns non disponible — utilisation de la variance Z uniquement") + + logger.info(f" Analyse du nuage: {total_points:,} points, " + f"ratio_retours_uniques={single_return_ratio:.2f}, " + f"écart_Z={z_std:.1f}m, amplitude_Z={z_range:.1f}m") + + # Decision logic (auto selects between SMRF and PMF only): + # - High single-return ratio (>0.6) → urban (buildings, roads) → PMF + # - Default → SMRF (fast, robust for most natural terrain) + # Note: CSF is available only via --ground-classification csf (slow but robust on complex terrain) + if single_return_ratio > 0.6: + method = 'pmf' + reason = f"ratio retours uniques={single_return_ratio:.2f} > 0.6 → milieu urbain" + else: + method = 'smrf' + reason = f"terrain naturel standard" + + logger.info(f" → Méthode: {method.upper()} ({reason})") + return method + + +def classify_ground(laz_file, temp_dir, method='auto', force=False): + """Classify ground points using PDAL ground classification filter. Args: laz_file: Path to input LAZ/LAS file. temp_dir: Directory for temporary files (pipeline.json, ground.las). + method: Ground classification method ('auto', 'smrf', 'pmf', or 'csf'). + force: If True, reclassify even if output file already exists. Returns: Path to classified ground LAS file, or None on failure. """ import laspy # noqa: ensure available - output_las = temp_dir / f"{laz_file.stem}_ground.las" + # Auto-detect method if requested + if method == 'auto': + method = detect_ground_method(laz_file) + logger.info(f" Classification sol: {method.upper()} (auto)") + else: + logger.info(f" Classification sol: {method.upper()} (forcé)") - if output_las.exists(): - logger.info(" Classification déjà effectuée — fichier existant réutilisé") + output_las = temp_dir / f"{laz_file.stem}_ground_{method}.las" + + if output_las.exists() and not force: + logger.info(f" Classification {method.upper()} déjà effectuée — fichier existant réutilisé") return output_las - pipeline_json = create_smrf_pipeline(laz_file, output_las) - pipeline_file = temp_dir / "pipeline.json" + if force and output_las.exists(): + logger.info(f" Reclassification forcée — suppression de {output_las.name}") + output_las.unlink() + + pipeline_json = _create_ground_pipeline(laz_file, output_las, method) + pipeline_file = temp_dir / f"pipeline_{method}.json" with open(pipeline_file, 'w') as f: f.write(pipeline_json) @@ -88,10 +239,10 @@ def classify_ground(laz_file, temp_dir): ["pdal", "pipeline", str(pipeline_file)], capture_output=True, check=True ) - logger.info(" ✓ Classification sol terminée") + logger.info(f" ✓ Classification sol {method.upper()} terminée") return output_las except subprocess.CalledProcessError as e: - logger.error(f" ✗ Erreur classification PDAL: {e.stderr.decode()}") + logger.error(f" ✗ Erreur classification PDAL ({method.upper()}): {e.stderr.decode()}") return None diff --git a/lidar_pipeline/pipeline.py b/lidar_pipeline/pipeline.py index c8e6a25..c685ab7 100644 --- a/lidar_pipeline/pipeline.py +++ b/lidar_pipeline/pipeline.py @@ -93,12 +93,14 @@ VIZ_STEPS = [ class LidarArchaeoPipeline: """Orchestrates the LiDAR archaeological analysis pipeline.""" - def __init__(self, input_dir, output_dir, resolution=0.5, workers=1, force=False): + def __init__(self, input_dir, output_dir, resolution=0.5, workers=1, force=False, ground_method='auto', force_classify=False): self.input_dir = Path(input_dir) self.output_dir = Path(output_dir) self.resolution = resolution self.workers = workers self.force = force + self.ground_method = ground_method + self.force_classify = force_classify self.temp_dir = self.output_dir / "temp" if not self.input_dir.exists(): @@ -120,6 +122,8 @@ class LidarArchaeoPipeline: logger.info(f" Résolution : {resolution}m/px") logger.info(f" Workers : {workers}") logger.info(f" Force : {'OUI' if self.force else 'non (skip existing)'}") + logger.info(f" Classification sol : {self.ground_method}") + logger.info(f" Force classif.: {'OUI' if self.force_classify else 'non'}") def find_laz_files(self): """Find all LAZ/LAS files in input directory.""" @@ -216,7 +220,7 @@ class LidarArchaeoPipeline: # Step 1: Ground classification logger.info("[1/4] Classification du sol...") t1 = time.time() - las_file = classify_ground(laz_file, self.temp_dir) + las_file = classify_ground(laz_file, self.temp_dir, method=self.ground_method, force=self.force_classify) t_classif = time.time() - t1 if not las_file: logger.error(f" ✗ Échec classification ({t_classif:.1f}s)") @@ -276,7 +280,7 @@ class LidarArchaeoPipeline: logger.info(f"Fichiers: {len(files)}") with ProcessPoolExecutor(max_workers=self.workers) as executor: future_to_file = { - executor.submit(_process_file_standalone, str(laz_file), str(self.input_dir), str(self.output_dir), self.resolution, self.force): laz_file + executor.submit(_process_file_standalone, str(laz_file), str(self.input_dir), str(self.output_dir), self.resolution, self.force, self.ground_method, self.force_classify): laz_file for laz_file in files } done = 0 @@ -334,7 +338,7 @@ class LidarArchaeoPipeline: logger.warning(f" Note: Impossible de supprimer les fichiers temporaires: {e}") -def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, force=False): +def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, force=False, ground_method='auto', force_classify=False): """Standalone function for multiprocessing — creates its own pipeline instance. Each worker gets its own temp directory to avoid file conflicts. @@ -350,7 +354,7 @@ def _process_file_standalone(laz_file_str, input_dir, output_dir, resolution, fo worker_logger.addHandler(handler) worker_logger.addFilter(_file_filter) - pipeline = LidarArchaeoPipeline(input_dir, output_dir, resolution=resolution, workers=1, force=force) + pipeline = LidarArchaeoPipeline(input_dir, output_dir, resolution=resolution, workers=1, force=force, ground_method=ground_method, force_classify=force_classify) basename = Path(laz_file_str).stem pipeline.temp_dir = pipeline.output_dir / f"temp_{basename}" pipeline.temp_dir.mkdir(exist_ok=True) diff --git a/lidar_pipeline/rendering.py b/lidar_pipeline/rendering.py index 01ebf12..c66de4c 100644 --- a/lidar_pipeline/rendering.py +++ b/lidar_pipeline/rendering.py @@ -204,7 +204,7 @@ def _apply_colormap(data, tif_file): # Find matching colormap for key, info in COLORMAPS.items(): if key in name: - valid_data = data[~np.isnan(data)] if hasattr(data, 'compressed') else data.flatten() + valid_data = np.asarray(data[~np.isnan(data)] if hasattr(data, 'compressed') else data.flatten()) valid_data = valid_data[~np.isnan(valid_data)] vmin = vmax = None @@ -239,7 +239,7 @@ def _apply_colormap(data, tif_file): return data, info['cmap'], info['title'], legend, info['description'], False # Default: terrain colormap with percentile stretch - valid_data = data[~np.isnan(data)] if hasattr(data, 'compressed') else data.flatten() + valid_data = np.asarray(data[~np.isnan(data)] if hasattr(data, 'compressed') else data.flatten()) valid_data = valid_data[~np.isnan(valid_data)] p2, p98 = np.percentile(valid_data, (2, 98)) data = np.clip((data - p2) / (p98 - p2), 0, 1) @@ -321,7 +321,7 @@ def tif_to_png(tif_file, vis_dir, resolution): data = np.ma.masked_where((data == nodata) | np.isnan(data), data) if not is_rgb: - valid_data = data.compressed() if hasattr(data, 'compressed') else data.flatten() + valid_data = np.asarray(data.compressed() if hasattr(data, 'compressed') else data.flatten()) valid_data = valid_data[~np.isnan(valid_data)] # Apply colormap diff --git a/lidar_pipeline/tests/test_dtm.py b/lidar_pipeline/tests/test_dtm.py index b97a237..8e1ab57 100644 --- a/lidar_pipeline/tests/test_dtm.py +++ b/lidar_pipeline/tests/test_dtm.py @@ -1,8 +1,10 @@ """Tests for DTM module.""" import json +import numpy as np import pytest from pathlib import Path +from unittest.mock import patch, MagicMock class TestSMRFPipeline: @@ -15,13 +17,18 @@ class TestSMRFPipeline: assert "pipeline" in pipeline stages = pipeline["pipeline"] - # Should have: reader, range filter (ReturnNumber), SMRF, range filter (Classification), writer + # Should have: reader, range filter (ReturnNumber), assign, ELM, outlier, SMRF, range filter (Classification), writer stage_types = [s.get("type") if isinstance(s, dict) else None for s in stages] # First stage is the filename string (reader) assert isinstance(stages[0], str) assert "test.laz" in stages[0] + # Must contain preprocessing steps + assert "filters.assign" in stage_types + assert "filters.elm" in stage_types + assert "filters.outlier" in stage_types + # Must contain SMRF filter assert "filters.smrf" in stage_types @@ -31,6 +38,27 @@ class TestSMRFPipeline: # At least one should filter ReturnNumber assert any("ReturnNumber" in str(s.get("limits", "")) for s in range_stages) + def test_pipeline_elm_parameters(self): + """ELM filter has terrain-adapted parameters.""" + from lidar_pipeline.dtm import create_smrf_pipeline + result = create_smrf_pipeline("/input/a.laz", "/output/a_ground.las") + pipeline = json.loads(result) + + elm_stage = [s for s in pipeline["pipeline"] if isinstance(s, dict) and s.get("type") == "filters.elm"][0] + assert elm_stage["cell"] == 5.0 + assert elm_stage["threshold"] == 2.0 + + def test_pipeline_outlier_parameters(self): + """Outlier filter uses statistical method.""" + from lidar_pipeline.dtm import create_smrf_pipeline + result = create_smrf_pipeline("/input/a.laz", "/output/a_ground.las") + pipeline = json.loads(result) + + outlier_stage = [s for s in pipeline["pipeline"] if isinstance(s, dict) and s.get("type") == "filters.outlier"][0] + assert outlier_stage["method"] == "statistical" + assert outlier_stage["mean_k"] == 8 + assert outlier_stage["multiplier"] == 3.0 + def test_pipeline_output_path(self): """Pipeline output path is set correctly.""" from lidar_pipeline.dtm import create_smrf_pipeline @@ -38,4 +66,178 @@ class TestSMRFPipeline: pipeline = json.loads(result) # Last stage should be writer with correct output path writer = [s for s in pipeline["pipeline"] if isinstance(s, dict) and s.get("type") == "writers.las"][0] - assert writer["filename"] == "/output/a_ground.las" \ No newline at end of file + assert writer["filename"] == "/output/a_ground.las" + + +class TestPMFPipeline: + def test_pipeline_json_valid(self): + """create_pmf_pipeline produces valid JSON with PMF filter.""" + from lidar_pipeline.dtm import create_pmf_pipeline + result = create_pmf_pipeline("/data/input/test.laz", "/data/output/test_ground.las") + pipeline = json.loads(result) + + assert "pipeline" in pipeline + stages = pipeline["pipeline"] + stage_types = [s.get("type") if isinstance(s, dict) else None for s in stages] + + # Must contain PMF filter + assert "filters.pmf" in stage_types + + # Must contain ReturnNumber filter + range_stages = [s for s in stages if isinstance(s, dict) and s.get("type") == "filters.range"] + assert any("ReturnNumber" in str(s.get("limits", "")) for s in range_stages) + + def test_pmf_parameters(self): + """PMF pipeline has expected parameters.""" + from lidar_pipeline.dtm import create_pmf_pipeline + result = create_pmf_pipeline("/input/a.laz", "/output/a_ground.las") + pipeline = json.loads(result) + + pmf_stage = [s for s in pipeline["pipeline"] if isinstance(s, dict) and s.get("type") == "filters.pmf"][0] + assert pmf_stage["max_window"] == 33 + assert pmf_stage["slope"] == 0.15 + + +class TestCSFPipeline: + def test_pipeline_json_valid(self): + """create_csf_pipeline produces valid JSON with CSF filter.""" + from lidar_pipeline.dtm import create_csf_pipeline + result = create_csf_pipeline("/data/input/test.laz", "/data/output/test_ground.las") + pipeline = json.loads(result) + + assert "pipeline" in pipeline + stages = pipeline["pipeline"] + stage_types = [s.get("type") if isinstance(s, dict) else None for s in stages] + + # Must contain CSF filter + assert "filters.csf" in stage_types + + # Must contain ReturnNumber filter + range_stages = [s for s in stages if isinstance(s, dict) and s.get("type") == "filters.range"] + assert any("ReturnNumber" in str(s.get("limits", "")) for s in range_stages) + + def test_csf_parameters(self): + """CSF pipeline has expected parameters.""" + from lidar_pipeline.dtm import create_csf_pipeline + result = create_csf_pipeline("/input/a.laz", "/output/a_ground.las") + pipeline = json.loads(result) + + csf_stage = [s for s in pipeline["pipeline"] if isinstance(s, dict) and s.get("type") == "filters.csf"][0] + assert csf_stage["resolution"] == 0.5 + assert csf_stage["rigidness"] == 3 + assert csf_stage["smooth"] is True + assert "hdiff" not in csf_stage # hdiff is not a valid PDAL CSF parameter + + +class TestDetectGroundMethod: + def _make_mock_las(self, num_returns, z_values): + """Create a mock laspy object with specified NumberOfReturns and z.""" + mock_las = MagicMock() + mock_las.NumberOfReturns = np.array(num_returns) + mock_las.z = np.array(z_values) + mock_las.points = MagicMock() + mock_las.points.__len__ = lambda self: len(num_returns) + return mock_las + + @patch('lidar_pipeline.dtm.laspy') + def test_urban_terrain_returns_pmf(self, mock_laspy): + """High single-return ratio (>0.6) should select PMF.""" + from lidar_pipeline.dtm import detect_ground_method + + # 70% single returns = urban + n = 10000 + num_returns = np.ones(n, dtype=int) + num_returns[:int(n * 0.3)] = 2 # 30% multi-return + z_values = np.random.normal(100, 5, n) # Low variance = flat terrain + + mock_laspy.read.return_value = self._make_mock_las(num_returns, z_values) + + result = detect_ground_method(Path("/data/input/test.laz")) + assert result == 'pmf' + + @patch('lidar_pipeline.dtm.laspy') + def test_natural_terrain_returns_smrf(self, mock_laspy): + """Low single-return ratio and moderate variance should select SMRF.""" + from lidar_pipeline.dtm import detect_ground_method + + # 40% single returns, moderate variance + n = 10000 + num_returns = np.ones(n, dtype=int) + num_returns[:int(n * 0.6)] = 2 # 60% multi-return (forest) + z_values = np.random.normal(100, 15, n) # Moderate variance + + mock_laspy.read.return_value = self._make_mock_las(num_returns, z_values) + + result = detect_ground_method(Path("/data/input/test.laz")) + assert result == 'smrf' + + @patch('lidar_pipeline.dtm.laspy') + def test_mountainous_terrain_still_defaults_to_smrf(self, mock_laspy): + """High variance terrain defaults to SMRF in auto mode (CSF only via --ground-classification csf).""" + from lidar_pipeline.dtm import detect_ground_method + + # Moderate single-return ratio but very high height variance + n = 10000 + num_returns = np.ones(n, dtype=int) + num_returns[:int(n * 0.5)] = 2 + z_values = np.random.normal(100, 50, n) # Very high variance = mountainous + + mock_laspy.read.return_value = self._make_mock_las(num_returns, z_values) + + result = detect_ground_method(Path("/data/input/test.laz")) + assert result == 'smrf' + + +class TestClassifyGroundMethod: + @patch('lidar_pipeline.dtm.subprocess') + @patch('lidar_pipeline.dtm.laspy') + def test_classify_ground_auto_calls_detect(self, mock_laspy, mock_subprocess): + """classify_ground with method='auto' should call detect_ground_method.""" + from lidar_pipeline.dtm import classify_ground + + # Mock detect_ground_method to return 'pmf' + with patch('lidar_pipeline.dtm.detect_ground_method', return_value='pmf') as mock_detect: + mock_subprocess.run.return_value = MagicMock(returncode=0) + result = classify_ground(Path("/data/input/test.laz"), Path("/tmp"), method='auto') + + mock_detect.assert_called_once() + + @patch('lidar_pipeline.dtm.subprocess') + @patch('lidar_pipeline.dtm.laspy') + def test_classify_ground_smrf_uses_smrf_pipeline(self, mock_laspy, mock_subprocess): + """classify_ground with method='smrf' should create SMRF pipeline.""" + from lidar_pipeline.dtm import classify_ground, _create_ground_pipeline + + mock_subprocess.run.return_value = MagicMock(returncode=0) + + with patch('lidar_pipeline.dtm.detect_ground_method'): + # Create temp dir + import tempfile + with tempfile.TemporaryDirectory() as tmpdir: + result = classify_ground(Path("/data/input/test.laz"), Path(tmpdir), method='smrf') + + # Check the pipeline JSON was written with SMRF + pipeline_file = Path(tmpdir) / "pipeline_smrf.json" + if pipeline_file.exists(): + pipeline = json.loads(pipeline_file.read_text()) + stage_types = [s.get("type") if isinstance(s, dict) else None for s in pipeline["pipeline"]] + assert "filters.smrf" in stage_types + + @patch('lidar_pipeline.dtm.subprocess') + @patch('lidar_pipeline.dtm.laspy') + def test_classify_ground_pmf_uses_pmf_pipeline(self, mock_laspy, mock_subprocess): + """classify_ground with method='pmf' should create PMF pipeline.""" + from lidar_pipeline.dtm import classify_ground + + mock_subprocess.run.return_value = MagicMock(returncode=0) + + with patch('lidar_pipeline.dtm.detect_ground_method'): + import tempfile + with tempfile.TemporaryDirectory() as tmpdir: + result = classify_ground(Path("/data/input/test.laz"), Path(tmpdir), method='pmf') + + pipeline_file = Path(tmpdir) / "pipeline_pmf.json" + if pipeline_file.exists(): + pipeline = json.loads(pipeline_file.read_text()) + stage_types = [s.get("type") if isinstance(s, dict) else None for s in pipeline["pipeline"]] + assert "filters.pmf" in stage_types \ No newline at end of file diff --git a/lidar_pipeline/visualizations.py b/lidar_pipeline/visualizations.py index 6eb5bd2..45b73a8 100644 --- a/lidar_pipeline/visualizations.py +++ b/lidar_pipeline/visualizations.py @@ -696,7 +696,7 @@ def generate_texture(dem_file, basename, vis_dir, resolution): np.cos(az_rad - aspect)) hillshade = np.clip(shading, 0, 1) - valid = hillshade[~np.isnan(hillshade)] + valid = np.asarray(hillshade[~np.isnan(hillshade)]) if len(valid) == 0: raise ValueError("No valid data for texture analysis") lo, hi = np.percentile(valid, (1, 99)) diff --git a/requirements.txt b/requirements.txt index 0a00f3b..d9b122d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ numpy>=1.24 matplotlib>=3.7 whitebox>=2.0 rasterio>=1.3 -laspy[laspy]>=2.5 +laspy[lazrs]>=2.5 scikit-image>=0.21 tqdm>=4.65 scikit-learn>=1.3 diff --git a/run.sh b/run.sh index 2cccf75..cd0c13c 100755 --- a/run.sh +++ b/run.sh @@ -9,6 +9,10 @@ # -v Mode verbeux (timestamps + niveaux) # --debug Mode debug (détails internes fichier:ligne) # -f / --force Régénérer tous les fichiers même si existants +# --force-classification +# Reclassifier le sol même si le fichier .las existe déjà +# --ground-classification {auto,smrf,pmf,csf} +# Méthode de classification du sol (défaut: auto) # --file NOM... Traiter un ou plusieurs fichiers LAZ spécifiques # --test Exécuter les tests unitaires # -h Afficher l'aide complète @@ -27,18 +31,24 @@ if [ $# -eq 0 ]; then echo " -v Mode verbeux (timestamps + niveaux)" echo " --debug Mode debug (détails internes fichier:ligne)" echo " -f / --force Régénérer tous les fichiers même si les WebP existent" + echo " --force-classification" + echo " Reclassifier le sol même si le fichier .las existe" + echo " --ground-classification {auto,smrf,pmf,csf}" + echo " Méthode de classification du sol (défaut: auto)" echo " --file NOM... Traiter un ou plusieurs fichiers LAZ (nom complet sans .laz/.las)" echo " --test Exécuter les tests unitaires" echo " -h Afficher cette aide" echo "" echo "Exemples:" - echo " $0 -g # Avec accélération GPU" - echo " $0 -g -w 4 # GPU + 4 workers parallèles" - echo " $0 -g -v # GPU + mode verbeux" - echo " $0 -r 0.2 -g --debug # Haute résolution + GPU + debug" - echo " $0 -f # Forcer la régénération de tous les fichiers" - echo " $0 --file LHD_FXX_1000_6882_PTS_LAMB93_IGN69.copc # Un fichier" - echo " $0 --file LHD_...6881.copc LHD_...6882.copc # Plusieurs fichiers" + echo " $0 -g # GPU, auto" + echo " $0 -g -w 4 # GPU + 4 workers" + echo " $0 -g -v # GPU + verbeux" + echo " $0 -g -r 0.2 # Haute résolution" + echo " $0 -g --force # Tout régénérer (WebP + classification)" + echo " $0 -g --force-classification # Reclassifier le sol seulement" + echo " $0 -g --ground-classification pmf # Forcer PMF" + echo " $0 -g --file LHD_...IGN69.copc # Un fichier" + echo " $0 -g --file LHD_...6881.copc LHD_...6882.copc" exit 0 fi @@ -52,23 +62,25 @@ GPU_FLAG="" VERBOSE_FLAG="" FORCE_FLAG="" FILE_ARGS="" +GROUND_METHOD="" +FORCE_CLASSIFY_FLAG="" -while getopts "r:w:gvf-:" opt; do - case $opt in - r) RESOLUTION="$OPTARG" ;; - w) WORKERS="$OPTARG" ;; - g) GPU_FLAG="--gpus all" ;; - v) VERBOSE_FLAG="-v" ;; - f) FORCE_FLAG="--force" ;; - -) - case "${OPTARG}" in - debug) VERBOSE_FLAG="--debug" ;; - force) FORCE_FLAG="--force" ;; - file) ;; - *) echo "Option invalide: --${OPTARG}" >&2; exit 1 ;; - esac - ;; - h) +# Parse arguments manually (more robust than getopts for mixed short/long options) +while [ $# -gt 0 ]; do + case "$1" in + -r) RESOLUTION="$2"; shift 2 ;; + -w) WORKERS="$2"; shift 2 ;; + -g) GPU_FLAG="--gpus all"; shift ;; + -v) VERBOSE_FLAG="-v"; shift ;; + -f) FORCE_FLAG="--force"; shift ;; + --debug) VERBOSE_FLAG="--debug"; shift ;; + --force) FORCE_FLAG="--force"; shift ;; + --force-classification) FORCE_CLASSIFY_FLAG="--force-classification"; shift ;; + --ground-classification) GROUND_METHOD="$2"; shift 2 ;; + --ground-classification=*) GROUND_METHOD="${1#--ground-classification=}"; shift ;; + --file) shift; while [ $# -gt 0 ] && [[ ! "$1" =~ ^- ]]; do FILE_ARGS="$FILE_ARGS $1"; shift; done ;; + --test) ;; # Handled below + -h|--help|-help) echo "Pipeline LiDAR Archéologique" echo "" echo "Usage: $0 [options]" @@ -79,23 +91,33 @@ while getopts "r:w:gvf-:" opt; do echo " -v Mode verbeux (timestamps + niveaux)" echo " --debug Mode debug (détails internes fichier:ligne)" echo " -f / --force Régénérer tous les fichiers même si les WebP existent" + echo " --force-classification" + echo " Reclassifier le sol même si le fichier .las existe" + echo " --ground-classification {auto,smrf,pmf,csf}" + echo " Méthode de classification du sol (défaut: auto)" echo " --file NOM... Traiter un ou plusieurs fichiers LAZ (nom complet sans .laz/.las)" + echo " --test Exécuter les tests unitaires" echo " -h Afficher cette aide" echo "" echo "Exemples:" - echo " $0 -g # Avec accélération GPU" - echo " $0 -g -w 4 # GPU + 4 workers parallèles" - echo " $0 -g -v # GPU + mode verbeux" - echo " $0 -r 0.2 -g --debug # Haute résolution + GPU + debug" - echo " $0 -f # Forcer la régénération de tous les fichiers" - echo " $0 --file 6881 # Traiter un seul fichier" - echo " $0 --file 6881 6882 # Traiter deux fichiers spécifiques" + echo " $0 -g # GPU, auto" + echo " $0 -g -w 4 # GPU + 4 workers" + echo " $0 -g -v # GPU + verbeux" + echo " $0 -g -r 0.2 # Haute résolution" + echo " $0 -g --force # Tout régénérer (WebP + classification)" + echo " $0 -g --force-classification # Reclassifier le sol seulement" + echo " $0 -g --ground-classification pmf # Forcer PMF" + echo " $0 -g --file LHD_...IGN69.copc # Un fichier" + echo " $0 -g --file LHD_...6881.copc LHD_...6882.copc" exit 0 ;; - *) echo "Option invalide. Utilisez -h pour l'aide." >&2; exit 1 ;; + *) echo "Option invalide: $1" >&2; exit 1 ;; esac done +# Trim FILE_ARGS whitespace +FILE_ARGS=$(echo "$FILE_ARGS" | xargs) + # Check for --test flag first if [[ " $* " == *" --test "* ]]; then # Build l'image si elle n'existe pas @@ -112,27 +134,6 @@ if [[ " $* " == *" --test "* ]]; then exit $? fi -# Collect --file arguments (everything after --file until next option) -if [[ "$*" == *" --file "* ]] || [[ "$*" == *" --file"* && "$*" == *"--file "* ]]; then - # Extract all arguments after --file - PAST_FILE=false - for arg in "$@"; do - if [ "$arg" = "--file" ]; then - PAST_FILE=true - continue - fi - if $PAST_FILE; then - # Stop if we hit another option - if [[ "$arg" == -* ]]; then - PAST_FILE=false - else - FILE_ARGS="$FILE_ARGS $arg" - fi - fi - done - FILE_ARGS=$(echo "$FILE_ARGS" | xargs) -fi - # Build l'image si elle n'existe pas if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then echo "Build de l'image Docker..." @@ -151,12 +152,17 @@ echo " Workers : ${WORKERS}" echo " GPU : $([ -n "$GPU_FLAG" ] && echo 'OUI' || echo 'non')" echo " Verbeux : $([ -n "$VERBOSE_FLAG" ] && echo 'OUI' || echo 'non')" echo " Force : $([ -n "$FORCE_FLAG" ] && echo 'OUI' || echo 'non')" +echo " Force classif.: $([ -n "$FORCE_CLASSIFY_FLAG" ] && echo 'OUI' || echo 'non')" +echo " Classification sol : $([ -n "$GROUND_METHOD" ] && echo "$GROUND_METHOD" || echo 'auto')" if [ -n "$FILE_ARGS" ]; then echo " Fichiers :${FILE_ARGS}" fi echo "============================================" -CMD_ARGS="-o /data/output -r $RESOLUTION -w $WORKERS $VERBOSE_FLAG $FORCE_FLAG" +CMD_ARGS="-o /data/output -r $RESOLUTION -w $WORKERS $VERBOSE_FLAG $FORCE_FLAG $FORCE_CLASSIFY_FLAG" +if [ -n "$GROUND_METHOD" ]; then + CMD_ARGS="$CMD_ARGS --ground-classification $GROUND_METHOD" +fi if [ -n "$FILE_ARGS" ]; then CMD_ARGS="$CMD_ARGS --file $FILE_ARGS" fi