Add download script for IGN LiDAR HD LAZ files

Supports tile download by coordinates, zone-based bulk download,
integrity checking, and auto-search across zones with resume support.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-05-14 20:34:50 +02:00
parent 02218b2cfc
commit cf3e680b02

584
download_lidar.sh Executable file
View File

@ -0,0 +1,584 @@
#!/bin/bash
# download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN
#
# Usage:
# ./download_lidar.sh LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz
# ./download_lidar.sh 1049_6895
# ./download_lidar.sh 1049_6895 1029_6884
# ./download_lidar.sh --list-zones
# ./download_lidar.sh --search 1049
# ./download_lidar.sh --zone RE --output input/
# ./download_lidar.sh --zone RE --dry-run
#
# L'API IGN expose les fichiers organisés par zones (RE, SE, AE, etc.)
# Le script cherche automatiquement dans quelle zone se trouve chaque tuile.
set -euo pipefail
BASE_URL="https://data.geopf.fr/telechargement/download/LiDARHD-NUALID"
API_URL="https://data.geopf.fr/telechargement/resource/LiDARHD-NUALID"
OUTPUT_DIR="${OUTPUT_DIR:-.}"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${CYAN}[INFO]${NC} $*"; }
log_ok() { echo -e "${GREEN}[OK]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_err() { echo -e "${RED}[ERR]${NC} $*"; }
# Normalize a tile identifier to a full filename
# Supports: "1049_6895", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz"
normalize_name() {
local input="$1"
# If it's already a full filename, return as-is
if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69.copc.laz ]]; then
echo "$input"
return
fi
if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69 ]]; then
echo "${input}.copc.laz"
return
fi
# Bare coordinates like "1049_6895"
echo "LHD_FXX_${input}_PTS_LAMB93_IGN69.copc.laz"
}
# List all available zone releases
list_zones() {
log_info "Récupération de la liste des zones..."
local page=1
local total_pages=""
while true; do
local content
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
if [ -z "$content" ]; then
break
fi
# Parse zone codes and dates
echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns)
if title is not None and title.text:
# Extract zone code and date
parts = title.text.split('_')
# NUALHD_1-0__LAZ_LAMB93_{ZONE}_{DATE}
zone = parts[-2] if len(parts) >= 2 else '?'
date = parts[-1] if len(parts) >= 1 else '?'
print(f'{zone}\t{date}\t{title.text}')
except Exception:
pass
" 2>/dev/null || true
# Check if more pages
if [ -z "$total_pages" ]; then
total_pages=$(echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
print(pc)
except:
print('1')
" 2>/dev/null)
fi
if [ "$page" -ge "${total_pages:-1}" ]; then
break
fi
page=$((page + 1))
done | sort
}
# Search for a tile across all zones
search_tile() {
local tile_name="$1"
local filename
filename=$(normalize_name "$tile_name")
log_info "Recherche de ${filename} dans toutes les zones..."
local page=1
local found=0
local total_pages=""
while true; do
local content
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
[ -z "$content" ] && break
# Get sub-resources and search for the file
echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns)
if title is not None and title.text:
print(title.text)
except:
pass
" 2>/dev/null | while read -r release; do
# Check if file exists in this release
local url="${BASE_URL}/${release}/${filename}"
local code
code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null)
if [ "$code" = "200" ]; then
log_ok "Trouvé: ${filename} dans la zone ${release}"
echo " URL: ${url}"
found=1
fi
done
if [ -z "$total_pages" ]; then
total_pages=$(echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
print(pc)
except:
print('1')
" 2>/dev/null)
fi
if [ "$page" -ge "${total_pages:-1}" ]; then
break
fi
page=$((page + 1))
done
if [ "$found" -eq 0 ]; then
log_err "${filename} non trouvé dans aucune zone"
fi
}
# Download a single file with resume support
download_file() {
local url="$1"
local output_path="$2"
local filename
filename=$(basename "$output_path")
# Resume download with retries
local max_retries=15
local retry=0
while [ $retry -lt $max_retries ]; do
retry=$((retry + 1))
local result
result=$(curl -L --http1.1 -C - -o "$output_path" "$url" 2>&1)
local exit_code=$?
if [ $exit_code -eq 0 ]; then
# Verify it's a valid LAS/LAZ file
local magic
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
if [ "$magic" = "4c415346" ]; then
local size
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
log_ok "${filename} téléchargé (${size} octets)"
return 0
else
log_err "${filename} n'est pas un fichier LAZ valide (magic: ${magic})"
rm -f "$output_path"
return 1
fi
fi
# curl exit code 18 = partial download, resume
if [ $exit_code -eq 18 ]; then
local size
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
log_warn "Téléchargement partiel (${size} octets), tentative ${retry}/${max_retries}..."
sleep 2
continue
fi
# HTTP 404
if echo "$result" | grep -q "404"; then
log_err "Fichier non trouvé (404): ${url}"
rm -f "$output_path"
return 1
fi
log_err "Erreur curl (${exit_code}): ${result}"
return 1
done
# If we exhausted retries, check if partial file is usable
local magic
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
if [ "$magic" = "4c415346" ]; then
local size
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
log_warn "Fichier partiel mais valide: ${filename} (${size} octets)"
return 0
fi
log_err "Échec après ${max_retries} tentatives"
rm -f "$output_path"
return 1
}
# Find which zone contains a given tile and download it
find_and_download() {
local tile_name="$1"
local output_dir="$2"
local filename
filename=$(normalize_name "$tile_name")
local output_path="${output_dir}/${filename}"
# If file already exists and is valid, skip
if [ -f "$output_path" ]; then
local magic
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
if [ "$magic" = "4c415346" ]; then
local size
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
if [ "$size" -gt 1000000 ]; then
log_info "${filename} déjà présent (${size} octets) — ignoré"
return 0
fi
fi
# File exists but seems invalid/truncated, re-download
log_warn "${filename} existe mais semble incomplet, re-téléchargement..."
fi
log_info "Recherche de ${filename}..."
# Try all zone releases
local page=1
local total_pages=""
while true; do
local content
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
[ -z "$content" ] && break
echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns)
if title is not None and title.text:
print(title.text)
except:
pass
" 2>/dev/null | while read -r release; do
local url="${BASE_URL}/${release}/${filename}"
local code
code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000")
if [ "$code" = "200" ]; then
log_ok "Trouvé dans la zone ${release}"
download_file "$url" "$output_path"
return 0
fi
done
if [ -z "$total_pages" ]; then
total_pages=$(echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
print(pc)
except:
print('1')
" 2>/dev/null)
fi
if [ "$page" -ge "${total_pages:-1}" ]; then
break
fi
page=$((page + 1))
done
log_err "${filename} non trouvé dans aucune zone disponible"
return 1
}
# Download all tiles from a specific zone
download_zone() {
local zone="$1"
local output_dir="$2"
local dry_run="${3:-false}"
# Find the release name for this zone
local release=""
local page=1
while true; do
local content
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
[ -z "$content" ] && break
release=$(echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns)
if title is not None and title.text:
# Extract zone code from title like NUALHD_1-0__LAZ_LAMB93_RE_2025-02-17
parts = title.text.split('_')
z = parts[-2] if len(parts) >= 2 else ''
if z == '${zone}':
print(title.text)
break
except:
pass
" 2>/dev/null)
if [ -n "$release" ]; then
break
fi
page=$((page + 1))
# Safety limit
if [ $page -gt 25 ]; then
break
fi
done
if [ -z "$release" ]; then
log_err "Zone '${zone}' non trouvée"
return 1
fi
log_info "Zone ${zone}: ${release}"
# Get list of files in this release
local sub_url="${API_URL}/${release}?limit=50"
local total_files=0
local page=1
while true; do
local content
content=$(curl -s "${API_URL}/${release}?page=${page}&limit=50") || true
[ -z "$content" ] && break
local files
files=$(echo "$content" | python3 -c "
import sys, xml.etree.ElementTree as ET
ns = {'atom': 'http://www.w3.org/2005/Atom'}
try:
tree = ET.parse(sys.stdin)
root = tree.getroot()
for entry in root.findall('atom:entry', ns):
title = entry.find('atom:title', ns)
if title is not None and title.text:
print(title.text)
except:
pass
" 2>/dev/null)
if [ -z "$files" ]; then
break
fi
local count
count=$(echo "$files" | wc -l)
total_files=$((total_files + count))
if [ "$dry_run" = "true" ]; then
echo "$files" | head -10
echo "... (${count} fichiers sur cette page)"
else
echo "$files" | while read -r fname; do
local url="${BASE_URL}/${release}/${fname}"
download_file "$url" "${output_dir}/${fname}"
done
fi
page=$((page + 1))
# Safety limit
if [ $page -gt 500 ]; then
break
fi
done
log_info "Total: ${total_files} fichiers dans la zone ${zone}"
}
# Show usage
usage() {
cat <<'EOF'
download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN
Usage:
./download_lidar.sh <tuile> [tuile2...] Télécharger une ou plusieurs tuiles
./download_lidar.sh --search <motif> Chercher une tuile dans toutes les zones
./download_lidar.sh --list-zones Lister les zones disponibles
./download_lidar.sh --zone <ZONE> [--dry-run] Télécharger une zone entière
./download_lidar.sh --check [dossier/] Vérifier l'intégrité des fichiers LAZ
Formats de tuile acceptés:
1049_6895 Coordonnées seules
LHD_FXX_1049_6895_PTS_LAMB93_IGN69 Nom complet sans extension
LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz Nom complet avec extension
Options:
-o, --output DIR Répertoire de destination (défaut: .)
-n, --dry-run Afficher sans télécharger (avec --zone)
--check [DIR] Vérifier l'intégrité des fichiers LAZ (défaut: ./)
--list-zones Lister toutes les zones disponibles
--search MOTIF Chercher un motif dans les noms de tuile
--zone ZONE Télécharger tous les fichiers d'une zone
-h, --help Afficher cette aide
Exemples:
./download_lidar.sh 1049_6895
./download_lidar.sh LHD_FXX_0713_6347_PTS_LAMB93_IGN69.copc.laz -o input/
./download_lidar.sh --search 1049
./download_lidar.sh --list-zones
./download_lidar.sh --zone RE -o input/
./download_lidar.sh --check input/
EOF
}
# Check integrity of LAZ files
check_files() {
local dir="${1:-.}"
local ok=0
local fail=0
log_info "Vérification de l'intégrité des fichiers LAZ dans ${dir}/"
for f in "${dir}"/*.copc.laz "${dir}"/*.laz; do
[ -f "$f" ] || continue
local magic
magic=$(head -c 4 "$f" 2>/dev/null | xxd -p 2>/dev/null || echo "")
local size
size=$(stat -c%s "$f" 2>/dev/null || echo "0")
if [ "$magic" = "4c415346" ] && [ "$size" -gt 1000000 ]; then
log_ok "$(basename $f) (${size} octets)"
ok=$((ok + 1))
elif [ "$magic" = "7b227469" ]; then
log_err "$(basename $f) est une page HTML (404), pas un fichier LAZ"
fail=$((fail + 1))
elif [ "$size" -lt 1000 ]; then
log_err "$(basename $f) trop petit (${size} octets) — probablement corrompu"
fail=$((fail + 1))
else
log_warn "$(basename $f) magic=${magic}, taille=${size} — vérification nécessaire"
fail=$((fail + 1))
fi
done
log_info "Résultat: ${ok} OK, ${fail} problème(s)"
return 0
}
# Main
CMD=""
TILES=()
ZONE=""
DRY_RUN=false
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
usage
exit 0
;;
--list-zones)
CMD="list-zones"
shift
;;
--search)
CMD="search"
shift
TILES+=("$1")
shift
;;
--zone)
CMD="zone"
shift
ZONE="$1"
shift
;;
--check)
CMD="check"
shift
OUTPUT_DIR="${1:-.}"
[ -d "$OUTPUT_DIR" ] && shift || true
;;
-o|--output)
shift
OUTPUT_DIR="$1"
shift
;;
-n|--dry-run)
DRY_RUN=true
shift
;;
-*)
log_err "Option inconnue: $1"
usage
exit 1
;;
*)
TILES+=("$1")
shift
;;
esac
done
case "${CMD}" in
list-zones)
list_zones
;;
search)
for tile in "${TILES[@]}"; do
search_tile "$tile"
done
;;
zone)
if [ -z "$ZONE" ]; then
log_err "Zone non spécifiée. Utilisez --zone <CODE>"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
download_zone "$ZONE" "$OUTPUT_DIR" "$DRY_RUN"
;;
check)
check_files "$OUTPUT_DIR"
;;
*)
# Default: download tiles
if [ ${#TILES[@]} -eq 0 ]; then
usage
exit 1
fi
mkdir -p "$OUTPUT_DIR"
for tile in "${TILES[@]}"; do
find_and_download "$tile" "$OUTPUT_DIR"
done
;;
esac