diff --git a/download_lidar.sh b/download_lidar.sh new file mode 100755 index 0000000..1d813ae --- /dev/null +++ b/download_lidar.sh @@ -0,0 +1,584 @@ +#!/bin/bash +# download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN +# +# Usage: +# ./download_lidar.sh LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz +# ./download_lidar.sh 1049_6895 +# ./download_lidar.sh 1049_6895 1029_6884 +# ./download_lidar.sh --list-zones +# ./download_lidar.sh --search 1049 +# ./download_lidar.sh --zone RE --output input/ +# ./download_lidar.sh --zone RE --dry-run +# +# L'API IGN expose les fichiers organisés par zones (RE, SE, AE, etc.) +# Le script cherche automatiquement dans quelle zone se trouve chaque tuile. + +set -euo pipefail + +BASE_URL="https://data.geopf.fr/telechargement/download/LiDARHD-NUALID" +API_URL="https://data.geopf.fr/telechargement/resource/LiDARHD-NUALID" +OUTPUT_DIR="${OUTPUT_DIR:-.}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +log_info() { echo -e "${CYAN}[INFO]${NC} $*"; } +log_ok() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_err() { echo -e "${RED}[ERR]${NC} $*"; } + +# Normalize a tile identifier to a full filename +# Supports: "1049_6895", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz" +normalize_name() { + local input="$1" + # If it's already a full filename, return as-is + if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69.copc.laz ]]; then + echo "$input" + return + fi + if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69 ]]; then + echo "${input}.copc.laz" + return + fi + # Bare coordinates like "1049_6895" + echo "LHD_FXX_${input}_PTS_LAMB93_IGN69.copc.laz" +} + +# List all available zone releases +list_zones() { + log_info "Récupération de la liste des zones..." + local page=1 + local total_pages="" + + while true; do + local content + content=$(curl -s "${API_URL}?page=${page}&limit=50") || true + + if [ -z "$content" ]; then + break + fi + + # Parse zone codes and dates + echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + for entry in root.findall('atom:entry', ns): + title = entry.find('atom:title', ns) + if title is not None and title.text: + # Extract zone code and date + parts = title.text.split('_') + # NUALHD_1-0__LAZ_LAMB93_{ZONE}_{DATE} + zone = parts[-2] if len(parts) >= 2 else '?' + date = parts[-1] if len(parts) >= 1 else '?' + print(f'{zone}\t{date}\t{title.text}') +except Exception: + pass +" 2>/dev/null || true + + # Check if more pages + if [ -z "$total_pages" ]; then + total_pages=$(echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1') + print(pc) +except: + print('1') +" 2>/dev/null) + fi + + if [ "$page" -ge "${total_pages:-1}" ]; then + break + fi + page=$((page + 1)) + done | sort +} + +# Search for a tile across all zones +search_tile() { + local tile_name="$1" + local filename + filename=$(normalize_name "$tile_name") + + log_info "Recherche de ${filename} dans toutes les zones..." + + local page=1 + local found=0 + local total_pages="" + + while true; do + local content + content=$(curl -s "${API_URL}?page=${page}&limit=50") || true + [ -z "$content" ] && break + + # Get sub-resources and search for the file + echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + for entry in root.findall('atom:entry', ns): + title = entry.find('atom:title', ns) + if title is not None and title.text: + print(title.text) +except: + pass +" 2>/dev/null | while read -r release; do + # Check if file exists in this release + local url="${BASE_URL}/${release}/${filename}" + local code + code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null) + if [ "$code" = "200" ]; then + log_ok "Trouvé: ${filename} dans la zone ${release}" + echo " URL: ${url}" + found=1 + fi + done + + if [ -z "$total_pages" ]; then + total_pages=$(echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1') + print(pc) +except: + print('1') +" 2>/dev/null) + fi + + if [ "$page" -ge "${total_pages:-1}" ]; then + break + fi + page=$((page + 1)) + done + + if [ "$found" -eq 0 ]; then + log_err "${filename} non trouvé dans aucune zone" + fi +} + +# Download a single file with resume support +download_file() { + local url="$1" + local output_path="$2" + local filename + filename=$(basename "$output_path") + + # Resume download with retries + local max_retries=15 + local retry=0 + + while [ $retry -lt $max_retries ]; do + retry=$((retry + 1)) + local result + result=$(curl -L --http1.1 -C - -o "$output_path" "$url" 2>&1) + local exit_code=$? + + if [ $exit_code -eq 0 ]; then + # Verify it's a valid LAS/LAZ file + local magic + magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "") + if [ "$magic" = "4c415346" ]; then + local size + size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") + log_ok "${filename} téléchargé (${size} octets)" + return 0 + else + log_err "${filename} n'est pas un fichier LAZ valide (magic: ${magic})" + rm -f "$output_path" + return 1 + fi + fi + + # curl exit code 18 = partial download, resume + if [ $exit_code -eq 18 ]; then + local size + size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") + log_warn "Téléchargement partiel (${size} octets), tentative ${retry}/${max_retries}..." + sleep 2 + continue + fi + + # HTTP 404 + if echo "$result" | grep -q "404"; then + log_err "Fichier non trouvé (404): ${url}" + rm -f "$output_path" + return 1 + fi + + log_err "Erreur curl (${exit_code}): ${result}" + return 1 + done + + # If we exhausted retries, check if partial file is usable + local magic + magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "") + if [ "$magic" = "4c415346" ]; then + local size + size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") + log_warn "Fichier partiel mais valide: ${filename} (${size} octets)" + return 0 + fi + + log_err "Échec après ${max_retries} tentatives" + rm -f "$output_path" + return 1 +} + +# Find which zone contains a given tile and download it +find_and_download() { + local tile_name="$1" + local output_dir="$2" + local filename + filename=$(normalize_name "$tile_name") + local output_path="${output_dir}/${filename}" + + # If file already exists and is valid, skip + if [ -f "$output_path" ]; then + local magic + magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "") + if [ "$magic" = "4c415346" ]; then + local size + size=$(stat -c%s "$output_path" 2>/dev/null || echo "0") + if [ "$size" -gt 1000000 ]; then + log_info "${filename} déjà présent (${size} octets) — ignoré" + return 0 + fi + fi + # File exists but seems invalid/truncated, re-download + log_warn "${filename} existe mais semble incomplet, re-téléchargement..." + fi + + log_info "Recherche de ${filename}..." + + # Try all zone releases + local page=1 + local total_pages="" + + while true; do + local content + content=$(curl -s "${API_URL}?page=${page}&limit=50") || true + [ -z "$content" ] && break + + echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + for entry in root.findall('atom:entry', ns): + title = entry.find('atom:title', ns) + if title is not None and title.text: + print(title.text) +except: + pass +" 2>/dev/null | while read -r release; do + local url="${BASE_URL}/${release}/${filename}" + local code + code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000") + if [ "$code" = "200" ]; then + log_ok "Trouvé dans la zone ${release}" + download_file "$url" "$output_path" + return 0 + fi + done + + if [ -z "$total_pages" ]; then + total_pages=$(echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1') + print(pc) +except: + print('1') +" 2>/dev/null) + fi + + if [ "$page" -ge "${total_pages:-1}" ]; then + break + fi + page=$((page + 1)) + done + + log_err "${filename} non trouvé dans aucune zone disponible" + return 1 +} + +# Download all tiles from a specific zone +download_zone() { + local zone="$1" + local output_dir="$2" + local dry_run="${3:-false}" + + # Find the release name for this zone + local release="" + local page=1 + + while true; do + local content + content=$(curl -s "${API_URL}?page=${page}&limit=50") || true + [ -z "$content" ] && break + + release=$(echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + for entry in root.findall('atom:entry', ns): + title = entry.find('atom:title', ns) + if title is not None and title.text: + # Extract zone code from title like NUALHD_1-0__LAZ_LAMB93_RE_2025-02-17 + parts = title.text.split('_') + z = parts[-2] if len(parts) >= 2 else '' + if z == '${zone}': + print(title.text) + break +except: + pass +" 2>/dev/null) + + if [ -n "$release" ]; then + break + fi + page=$((page + 1)) + # Safety limit + if [ $page -gt 25 ]; then + break + fi + done + + if [ -z "$release" ]; then + log_err "Zone '${zone}' non trouvée" + return 1 + fi + + log_info "Zone ${zone}: ${release}" + + # Get list of files in this release + local sub_url="${API_URL}/${release}?limit=50" + local total_files=0 + local page=1 + + while true; do + local content + content=$(curl -s "${API_URL}/${release}?page=${page}&limit=50") || true + [ -z "$content" ] && break + + local files + files=$(echo "$content" | python3 -c " +import sys, xml.etree.ElementTree as ET +ns = {'atom': 'http://www.w3.org/2005/Atom'} +try: + tree = ET.parse(sys.stdin) + root = tree.getroot() + for entry in root.findall('atom:entry', ns): + title = entry.find('atom:title', ns) + if title is not None and title.text: + print(title.text) +except: + pass +" 2>/dev/null) + + if [ -z "$files" ]; then + break + fi + + local count + count=$(echo "$files" | wc -l) + total_files=$((total_files + count)) + + if [ "$dry_run" = "true" ]; then + echo "$files" | head -10 + echo "... (${count} fichiers sur cette page)" + else + echo "$files" | while read -r fname; do + local url="${BASE_URL}/${release}/${fname}" + download_file "$url" "${output_dir}/${fname}" + done + fi + + page=$((page + 1)) + # Safety limit + if [ $page -gt 500 ]; then + break + fi + done + + log_info "Total: ${total_files} fichiers dans la zone ${zone}" +} + +# Show usage +usage() { + cat <<'EOF' +download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN + +Usage: + ./download_lidar.sh [tuile2...] Télécharger une ou plusieurs tuiles + ./download_lidar.sh --search Chercher une tuile dans toutes les zones + ./download_lidar.sh --list-zones Lister les zones disponibles + ./download_lidar.sh --zone [--dry-run] Télécharger une zone entière + ./download_lidar.sh --check [dossier/] Vérifier l'intégrité des fichiers LAZ + +Formats de tuile acceptés: + 1049_6895 Coordonnées seules + LHD_FXX_1049_6895_PTS_LAMB93_IGN69 Nom complet sans extension + LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz Nom complet avec extension + +Options: + -o, --output DIR Répertoire de destination (défaut: .) + -n, --dry-run Afficher sans télécharger (avec --zone) + --check [DIR] Vérifier l'intégrité des fichiers LAZ (défaut: ./) + --list-zones Lister toutes les zones disponibles + --search MOTIF Chercher un motif dans les noms de tuile + --zone ZONE Télécharger tous les fichiers d'une zone + -h, --help Afficher cette aide + +Exemples: + ./download_lidar.sh 1049_6895 + ./download_lidar.sh LHD_FXX_0713_6347_PTS_LAMB93_IGN69.copc.laz -o input/ + ./download_lidar.sh --search 1049 + ./download_lidar.sh --list-zones + ./download_lidar.sh --zone RE -o input/ + ./download_lidar.sh --check input/ +EOF +} + +# Check integrity of LAZ files +check_files() { + local dir="${1:-.}" + local ok=0 + local fail=0 + + log_info "Vérification de l'intégrité des fichiers LAZ dans ${dir}/" + + for f in "${dir}"/*.copc.laz "${dir}"/*.laz; do + [ -f "$f" ] || continue + local magic + magic=$(head -c 4 "$f" 2>/dev/null | xxd -p 2>/dev/null || echo "") + local size + size=$(stat -c%s "$f" 2>/dev/null || echo "0") + + if [ "$magic" = "4c415346" ] && [ "$size" -gt 1000000 ]; then + log_ok "$(basename $f) (${size} octets)" + ok=$((ok + 1)) + elif [ "$magic" = "7b227469" ]; then + log_err "$(basename $f) est une page HTML (404), pas un fichier LAZ" + fail=$((fail + 1)) + elif [ "$size" -lt 1000 ]; then + log_err "$(basename $f) trop petit (${size} octets) — probablement corrompu" + fail=$((fail + 1)) + else + log_warn "$(basename $f) magic=${magic}, taille=${size} — vérification nécessaire" + fail=$((fail + 1)) + fi + done + + log_info "Résultat: ${ok} OK, ${fail} problème(s)" + return 0 +} + +# Main +CMD="" +TILES=() +ZONE="" +DRY_RUN=false + +while [ $# -gt 0 ]; do + case "$1" in + -h|--help) + usage + exit 0 + ;; + --list-zones) + CMD="list-zones" + shift + ;; + --search) + CMD="search" + shift + TILES+=("$1") + shift + ;; + --zone) + CMD="zone" + shift + ZONE="$1" + shift + ;; + --check) + CMD="check" + shift + OUTPUT_DIR="${1:-.}" + [ -d "$OUTPUT_DIR" ] && shift || true + ;; + -o|--output) + shift + OUTPUT_DIR="$1" + shift + ;; + -n|--dry-run) + DRY_RUN=true + shift + ;; + -*) + log_err "Option inconnue: $1" + usage + exit 1 + ;; + *) + TILES+=("$1") + shift + ;; + esac +done + +case "${CMD}" in + list-zones) + list_zones + ;; + search) + for tile in "${TILES[@]}"; do + search_tile "$tile" + done + ;; + zone) + if [ -z "$ZONE" ]; then + log_err "Zone non spécifiée. Utilisez --zone " + exit 1 + fi + mkdir -p "$OUTPUT_DIR" + download_zone "$ZONE" "$OUTPUT_DIR" "$DRY_RUN" + ;; + check) + check_files "$OUTPUT_DIR" + ;; + *) + # Default: download tiles + if [ ${#TILES[@]} -eq 0 ]; then + usage + exit 1 + fi + mkdir -p "$OUTPUT_DIR" + for tile in "${TILES[@]}"; do + find_and_download "$tile" "$OUTPUT_DIR" + done + ;; +esac \ No newline at end of file