lidar_rendu/semantic_classifier.py

#!/usr/bin/env python3
"""
Module de Classification Sémantique Simplifié pour LiDAR Archéologique
Approche robuste avec K-Means pour classification automatique
"""

import numpy as np
import rasterio
from rasterio.transform import from_bounds
from sklearn.cluster import KMeans
from scipy import ndimage
import json
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors


class ArchaeoSemanticClassifier:
    """Classification sémantique automatique robuste"""

    def __init__(self, dtm_file, output_dir):
        self.dtm_file = Path(dtm_file)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

        # Load DTM
        with rasterio.open(self.dtm_file) as src:
            self.dem = src.read(1)
            self.transform = src.transform
            self.crs = src.crs
            self.height, self.width = self.dem.shape

        print(f"✓ Classifieur initialisé (DTM: {self.width}x{self.height})")

    def extract_features_simple(self):
        """Extraction simplifiée des caractéristiques"""
        print("  → Extraction caractéristiques...")

        # Calculate gradients
        dy, dx = np.gradient(self.dem)

        # Slope
        slope = np.arctan(np.sqrt(dx**2 + dy**2)) * 180 / np.pi

        # Aspect
        aspect = np.arctan2(-dy, dx) * 180 / np.pi
        aspect = np.mod(aspect, 360)

        # Curvature
        dz_dx = np.gradient(dx, axis=1)
        dz_dy = np.gradient(dy, axis=0)
        curvature = (dz_dx + dz_dy) / 2

        # Local Relief
        from scipy.ndimage import uniform_filter
        local_mean = uniform_filter(self.dem, size=int(15/0.5))
        local_relief = self.dem - local_mean

        return {
            'elevation': self.dem,
            'slope': slope,
            'aspect': aspect,
            'curvature': curvature,
            'local_relief': local_relief
        }

    def classify_kmeans(self, n_clusters=6):
        """Classification K-Means robuste"""
        print("  → Classification K-Means...")

        features = self.extract_features_simple()

        # Normalize each feature to 0-1
        normalized_features = {}
        for name, data in features.items():
            min_val = np.percentile(data, 2)
            max_val = np.percentile(data, 98)
            normalized = np.clip((data - min_val) / (max_val - min_val + 1e-6), 0, 1)
            normalized_features[name] = normalized

        # Stack features for clustering
        feature_stack = np.stack([
            normalized_features['elevation'].flatten(),
            normalized_features['slope'].flatten(),
            normalized_features['curvature'].flatten(),
            normalized_features['local_relief'].flatten()
        ], axis=1)

        # Remove NaN values
        valid_mask = ~np.isnan(feature_stack).any(axis=1)
        feature_stack = feature_stack[valid_mask]

        # K-Means clustering with random_state for reproducibility
        kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10, max_iter=300)
        labels_flat = kmeans.fit_predict(feature_stack)

        # Create full resolution labels
        full_labels = np.zeros(self.height * self.width, dtype=int)
        full_indices = np.where(valid_mask)[0]
        full_labels[full_indices] = labels_flat + 1  # +1 to shift from 0-based
        full_labels = full_labels.reshape(self.height, self.width)

        # Interpret clusters
        self._interpret_clusters(kmeans.cluster_centers_, features)

        return full_labels

    def _interpret_clusters(self, centers, features):
        """Interprète les clusters selon les centroïdes"""
        interpretations = {}

        # Centers are in normalized feature space
        # Features order: elevation, slope, curvature, local_relief
        for i, center in enumerate(centers):
            elev = center[0]
            slope = center[1]
            curve = center[2]
            relief = center[3]

            # Interpret based on feature values
            if relief > 0.7:
                category = "ÉLEVÉE (Tumulus possible)"
            elif relief < -0.3:
                category = "ENFONCÉ (Fossé, cavité)"
            elif abs(curve) > 0.5:
                if curve > 0:
                    category = "CONVEX (Bosse, monticule)"
                else:
                    category = "CONCAVE (Creux, dépression)"
            elif slope > 0.6:
                category = "PENTE FORTE (Talus, mur)"
            elif slope < 0.2 and elev < 0.3:
                category = "PLAT (Zone plane)"
            else:
                category = "TOPOGRAPHIE MIXTE"

            interpretations[i + 1] = category  # +1 because labels are 1-indexed

        print(f"    Interprétation des {len(centers)} clusters :")
        for i, interp in interpretations.items():
            print(f"      Classe {i}: {interp}")

    def generate_semantic_map(self, labels):
        """Génère une carte sémantique colorée"""
        print("  → Génération carte sémantique...")

        # Create color map for semantic classes
        # 0: Background, 1-6: Different semantic classes
        colors = {
            0: [200, 200, 200],      # Gray: Background/Unknown
            1: [139, 69, 19],        # Brown: Linear/Walls
            2: [128, 0, 128],        # Purple: Circular/Mounds
            3: [255, 140, 0],        # Orange: Elevated
            4: [0, 200, 255],          # Cyan: Depressed
            5: [220, 220, 0],        # Yellow: Slope
            6: [0, 128, 0]             # Green: Vegetation/Natural
        }

        # Create RGB image
        rgb = np.zeros((self.height, self.width, 3), dtype=np.uint8)

        for class_id, color in colors.items():
            mask = labels == class_id
            for c in range(3):
                rgb[:, :, c][mask] = color[c]

        return rgb

    def process(self, basename):
        """Pipeline complet de classification sémantique"""
        print(f"\n{'='*60}")
        print(f" CLASSIFICATION SÉMANTIQUE - {basename}")
        print(f"{'='*60}")

        # Run K-Means classification
        labels = self.classify_kmeans(n_clusters=6)

        # Generate semantic map
        rgb = self.generate_semantic_map(labels)

        # Save semantic classification
        output_tif = self.output_dir / f"{basename}_semantic.tif"
        with rasterio.open(
            output_tif,
            'w',
            driver='GTiff',
            height=self.height,
            width=self.width,
            count=1,
            dtype='uint8',
            crs=self.crs,
            transform=self.transform,
            compress='lzw'
        ) as dst:
            dst.write(labels, 1)

        # Save visualization
        output_jpg = self.output_dir / f"{basename}_semantic.jpg"
        plt.figure(figsize=(16, 12), facecolor='white')
        plt.imshow(rgb)

        # Create legend
        legend_elements = [
            plt.Rectangle((0, 0), 1, 1, facecolor=np.array(c)/255, edgecolor='black', label=label)
            for label, c in [
                ("Inconnu/Fond", [200, 200, 200]),
                ("Linéaire (murs)", [139, 69, 19]),
                ("Circulaire (tumulus)", [128, 0, 128]),
                ("Élevé (monticules)", [255, 140, 0]),
                ("Enfoncé (fossés)", [0, 200, 255]),
                ("Pente forte (talus)", [220, 220, 0]),
                ("Naturel", [0, 128, 0])
            ]
        ]

        plt.legend(handles=legend_elements, loc='upper right', fontsize=11)
        plt.title(f"Classification Sémantique LiDAR - {basename}\n",
                 fontsize=14, fontweight='bold', pad=15)
        plt.axis('off')
        plt.tight_layout()
        plt.savefig(output_jpg, dpi=150, bbox_inches='tight', format='jpg')
        plt.close()

        # Generate statistics
        stats = self._generate_stats(labels, basename)

        print(f"\n✓ Classification terminée !")
        print(f"  • Carte sémantique: {output_tif.name}")
        print(f"  • Visualisation: {output_jpg.name}")
        print(f"  • Statistiques: {self.output_dir / f'{basename}_statistics.json'}")

        return {
            'labels': labels,
            'tif': output_tif,
            'jpg': output_jpg,
            'stats': stats
        }

    def _generate_stats(self, labels, basename):
        """Génère les statistiques de classification"""
        print("  → Génération statistiques...")

        total_pixels = labels.size
        stats = {}
        class_names = {
            0: "Inconnu/Fond",
            1: "Linéaire",
            2: "Circulaire",
            3: "Élevée",
            4: "Enfoncée",
            5: "Pente forte",
            6: "Naturel"
        }

        for class_id in range(7):
            count = np.sum(labels == class_id)
            percentage = (count / total_pixels) * 100
            if count > 0:
                stats[class_id] = {
                    'name': class_names[class_id],
                    'count': int(count),
                    'percentage': float(percentage)
                }

        # Save as JSON
        stats_file = self.output_dir / f"{basename}_statistics.json"
        with open(stats_file, 'w') as f:
            json.dump(stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.floating, np.integer)) else x)

        # Print summary
        print(f"\n    📊 Statistiques :")
        for class_id, info in stats.items():
            print(f"      {info['name']}: {info['count']:.0f} px ({info['percentage']:.1f}%)")

        return stats


def main():
    import sys

    if len(sys.argv) < 2:
        print("Usage: python semantic_classifier.py <dtm_file.tif> [output_dir]")
        sys.exit(1)

    dtm_file = sys.argv[1]
    output_dir = sys.argv[2] if len(sys.argv) > 2 else "semantic_output"

    classifier = ArchaeoSemanticClassifier(dtm_file, output_dir)
    classifier.process(Path(dtm_file).stem)


if __name__ == "__main__":
    main()