#!/bin/bash # download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN # # Usage: # ./download_lidar.sh LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz # ./download_lidar.sh 1049_6895 # ./download_lidar.sh 1049_6895 1029_6884 # ./download_lidar.sh --list-zones # ./download_lidar.sh --search 1049 # ./download_lidar.sh --zone RE --output input/ # ./download_lidar.sh --zone RE --dry-run # # L'API IGN expose les fichiers organisés par zones (RE, SE, AE, etc.) # Le script cherche automatiquement dans quelle zone se trouve chaque tuile. set -euo pipefail BASE_URL="https://data.geopf.fr/telechargement/download/LiDARHD-NUALID" API_URL="https://data.geopf.fr/telechargement/resource/LiDARHD-NUALID" OUTPUT_DIR="${OUTPUT_DIR:-.}" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${CYAN}[INFO]${NC} $*"; } log_ok() { echo -e "${GREEN}[OK]${NC} $*"; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } log_err() { echo -e "${RED}[ERR]${NC} $*"; } # Normalize a tile identifier to a full filename # Supports: "1049_6895", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz" normalize_name() { local input="$1" # If it's already a full filename, return as-is if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69.copc.laz ]]; then echo "$input" return fi if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69 ]]; then echo "${input}.copc.laz" return fi # Bare coordinates like "1049_6895" echo "LHD_FXX_${input}_PTS_LAMB93_IGN69.copc.laz" } # List all available zone releases list_zones() { log_info "Récupération de la liste des zones..." local page=1 local total_pages="" while true; do local content content=$(curl -s "${API_URL}?page=${page}&limit=50") || true if [ -z "$content" ]; then break fi # Parse zone codes and dates echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom'} try: tree = ET.parse(sys.stdin) root = tree.getroot() for entry in root.findall('atom:entry', ns): title = entry.find('atom:title', ns) if title is not None and title.text: # Extract zone code and date parts = title.text.split('_') # NUALHD_1-0__LAZ_LAMB93_{ZONE}_{DATE} zone = parts[-2] if len(parts) >= 2 else '?' date = parts[-1] if len(parts) >= 1 else '?' print(f'{zone}\t{date}\t{title.text}') except Exception: pass " 2>/dev/null || true # Check if more pages if [ -z "$total_pages" ]; then total_pages=$(echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'} try: tree = ET.parse(sys.stdin) root = tree.getroot() pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1') print(pc) except: print('1') " 2>/dev/null) fi if [ "$page" -ge "${total_pages:-1}" ]; then break fi page=$((page + 1)) done | sort } # Search for a tile across all zones search_tile() { local tile_name="$1" local filename filename=$(normalize_name "$tile_name") log_info "Recherche de ${filename} dans toutes les zones..." local page=1 local found=0 local total_pages="" while true; do local content content=$(curl -s "${API_URL}?page=${page}&limit=50") || true [ -z "$content" ] && break # Get sub-resources and search for the file echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom'} try: tree = ET.parse(sys.stdin) root = tree.getroot() for entry in root.findall('atom:entry', ns): title = entry.find('atom:title', ns) if title is not None and title.text: print(title.text) except: pass " 2>/dev/null | while read -r release; do # Check if file exists in this release local url="${BASE_URL}/${release}/${filename}" local code code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null) if [ "$code" = "200" ]; then log_ok "Trouvé: ${filename} dans la zone ${release}" echo " URL: ${url}" found=1 fi done if [ -z "$total_pages" ]; then total_pages=$(echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'} try: tree = ET.parse(sys.stdin) root = tree.getroot() pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1') print(pc) except: print('1') " 2>/dev/null) fi if [ "$page" -ge "${total_pages:-1}" ]; then break fi page=$((page + 1)) done if [ "$found" -eq 0 ]; then log_err "${filename} non trouvé dans aucune zone" fi } # Download a single file with resume support download_file() { local url="$1" local output_path="$2" local filename filename=$(basename "$output_path") # Resume download with retries local max_retries=15 local retry=0 while [ $retry -lt $max_retries ]; do retry=$((retry + 1)) local result result=$(curl -L --http1.1 -C - -o "$output_path" "$url" 2>&1) local exit_code=$? if [ $exit_code -eq 0 ]; then # Verify it's a valid LAS/LAZ file local magic magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "") if [ "$magic" = "4c415346" ]; then local size size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") log_ok "${filename} téléchargé (${size} octets)" return 0 else log_err "${filename} n'est pas un fichier LAZ valide (magic: ${magic})" rm -f "$output_path" return 1 fi fi # curl exit code 18 = partial download, resume if [ $exit_code -eq 18 ]; then local size size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") log_warn "Téléchargement partiel (${size} octets), tentative ${retry}/${max_retries}..." sleep 2 continue fi # HTTP 404 if echo "$result" | grep -q "404"; then log_err "Fichier non trouvé (404): ${url}" rm -f "$output_path" return 1 fi log_err "Erreur curl (${exit_code}): ${result}" return 1 done # If we exhausted retries, check if partial file is usable local magic magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "") if [ "$magic" = "4c415346" ]; then local size size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") log_warn "Fichier partiel mais valide: ${filename} (${size} octets)" return 0 fi log_err "Échec après ${max_retries} tentatives" rm -f "$output_path" return 1 } # Find which zone contains a given tile and download it find_and_download() { local tile_name="$1" local output_dir="$2" local filename filename=$(normalize_name "$tile_name") local output_path="${output_dir}/${filename}" # If file already exists and is valid, skip if [ -f "$output_path" ]; then local magic magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "") if [ "$magic" = "4c415346" ]; then local size size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") if [ "$size" -gt 1000000 ]; then log_info "${filename} déjà présent (${size} octets) — ignoré" return 0 fi fi # File exists but seems invalid/truncated, re-download log_warn "${filename} existe mais semble incomplet, re-téléchargement..." fi log_info "Recherche de ${filename}..." # Try all zone releases local page=1 local total_pages="" while true; do local content content=$(curl -s "${API_URL}?page=${page}&limit=50") || true [ -z "$content" ] && break echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom'} try: tree = ET.parse(sys.stdin) root = tree.getroot() for entry in root.findall('atom:entry', ns): title = entry.find('atom:title', ns) if title is not None and title.text: print(title.text) except: pass " 2>/dev/null | while read -r release; do local url="${BASE_URL}/${release}/${filename}" local code code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000") if [ "$code" = "200" ]; then log_ok "Trouvé dans la zone ${release}" download_file "$url" "$output_path" return 0 fi done if [ -z "$total_pages" ]; then total_pages=$(echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'} try: tree = ET.parse(sys.stdin) root = tree.getroot() pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1') print(pc) except: print('1') " 2>/dev/null) fi if [ "$page" -ge "${total_pages:-1}" ]; then break fi page=$((page + 1)) done log_err "${filename} non trouvé dans aucune zone disponible" return 1 } # Download all tiles from a specific zone download_zone() { local zone="$1" local output_dir="$2" local dry_run="${3:-false}" # Find the release name for this zone local release="" local page=1 while true; do local content content=$(curl -s "${API_URL}?page=${page}&limit=50") || true [ -z "$content" ] && break release=$(echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom'} try: tree = ET.parse(sys.stdin) root = tree.getroot() for entry in root.findall('atom:entry', ns): title = entry.find('atom:title', ns) if title is not None and title.text: # Extract zone code from title like NUALHD_1-0__LAZ_LAMB93_RE_2025-02-17 parts = title.text.split('_') z = parts[-2] if len(parts) >= 2 else '' if z == '${zone}': print(title.text) break except: pass " 2>/dev/null) if [ -n "$release" ]; then break fi page=$((page + 1)) # Safety limit if [ $page -gt 25 ]; then break fi done if [ -z "$release" ]; then log_err "Zone '${zone}' non trouvée" return 1 fi log_info "Zone ${zone}: ${release}" # Get list of files in this release local sub_url="${API_URL}/${release}?limit=50" local total_files=0 local page=1 while true; do local content content=$(curl -s "${API_URL}/${release}?page=${page}&limit=50") || true [ -z "$content" ] && break local files files=$(echo "$content" | python3 -c " import sys, xml.etree.ElementTree as ET ns = {'atom': 'http://www.w3.org/2005/Atom'} try: tree = ET.parse(sys.stdin) root = tree.getroot() for entry in root.findall('atom:entry', ns): title = entry.find('atom:title', ns) if title is not None and title.text: print(title.text) except: pass " 2>/dev/null) if [ -z "$files" ]; then break fi local count count=$(echo "$files" | wc -l) total_files=$((total_files + count)) if [ "$dry_run" = "true" ]; then echo "$files" | head -10 echo "... (${count} fichiers sur cette page)" else echo "$files" | while read -r fname; do local url="${BASE_URL}/${release}/${fname}" download_file "$url" "${output_dir}/${fname}" done fi page=$((page + 1)) # Safety limit if [ $page -gt 500 ]; then break fi done log_info "Total: ${total_files} fichiers dans la zone ${zone}" } # Show usage usage() { cat <<'EOF' download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN Usage: ./download_lidar.sh [tuile2...] Télécharger une ou plusieurs tuiles ./download_lidar.sh --search Chercher une tuile dans toutes les zones ./download_lidar.sh --list-zones Lister les zones disponibles ./download_lidar.sh --zone [--dry-run] Télécharger une zone entière ./download_lidar.sh --check [dossier/] Vérifier l'intégrité des fichiers LAZ Formats de tuile acceptés: 1049_6895 Coordonnées seules LHD_FXX_1049_6895_PTS_LAMB93_IGN69 Nom complet sans extension LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz Nom complet avec extension Options: -o, --output DIR Répertoire de destination (défaut: .) -n, --dry-run Afficher sans télécharger (avec --zone) --check [DIR] Vérifier l'intégrité des fichiers LAZ (défaut: ./) --list-zones Lister toutes les zones disponibles --search MOTIF Chercher un motif dans les noms de tuile --zone ZONE Télécharger tous les fichiers d'une zone -h, --help Afficher cette aide Exemples: ./download_lidar.sh 1049_6895 ./download_lidar.sh LHD_FXX_0713_6347_PTS_LAMB93_IGN69.copc.laz -o input/ ./download_lidar.sh --search 1049 ./download_lidar.sh --list-zones ./download_lidar.sh --zone RE -o input/ ./download_lidar.sh --check input/ EOF } # Check integrity of LAZ files check_files() { local dir="${1:-.}" local ok=0 local fail=0 log_info "Vérification de l'intégrité des fichiers LAZ dans ${dir}/" for f in "${dir}"/*.copc.laz "${dir}"/*.laz; do [ -f "$f" ] || continue local magic magic=$(head -c 4 "$f" 2>/dev/null | xxd -p 2>/dev/null || echo "") local size size=$(stat -c%s "$f" 2>/dev/null || echo "0") if [ "$magic" = "4c415346" ] && [ "$size" -gt 1000000 ]; then log_ok "$(basename $f) (${size} octets)" ok=$((ok + 1)) elif [ "$magic" = "7b227469" ]; then log_err "$(basename $f) est une page HTML (404), pas un fichier LAZ" fail=$((fail + 1)) elif [ "$size" -lt 1000 ]; then log_err "$(basename $f) trop petit (${size} octets) — probablement corrompu" fail=$((fail + 1)) else log_warn "$(basename $f) magic=${magic}, taille=${size} — vérification nécessaire" fail=$((fail + 1)) fi done log_info "Résultat: ${ok} OK, ${fail} problème(s)" return 0 } # Main CMD="" TILES=() ZONE="" DRY_RUN=false while [ $# -gt 0 ]; do case "$1" in -h|--help) usage exit 0 ;; --list-zones) CMD="list-zones" shift ;; --search) CMD="search" shift TILES+=("$1") shift ;; --zone) CMD="zone" shift ZONE="$1" shift ;; --check) CMD="check" shift OUTPUT_DIR="${1:-.}" [ -d "$OUTPUT_DIR" ] && shift || true ;; -o|--output) shift OUTPUT_DIR="$1" shift ;; -n|--dry-run) DRY_RUN=true shift ;; -*) log_err "Option inconnue: $1" usage exit 1 ;; *) TILES+=("$1") shift ;; esac done case "${CMD}" in list-zones) list_zones ;; search) for tile in "${TILES[@]}"; do search_tile "$tile" done ;; zone) if [ -z "$ZONE" ]; then log_err "Zone non spécifiée. Utilisez --zone " exit 1 fi mkdir -p "$OUTPUT_DIR" download_zone "$ZONE" "$OUTPUT_DIR" "$DRY_RUN" ;; check) check_files "$OUTPUT_DIR" ;; *) # Default: download tiles if [ ${#TILES[@]} -eq 0 ]; then usage exit 1 fi mkdir -p "$OUTPUT_DIR" for tile in "${TILES[@]}"; do find_and_download "$tile" "$OUTPUT_DIR" done ;; esac