Add download script for IGN LiDAR HD LAZ files
Supports tile download by coordinates, zone-based bulk download, integrity checking, and auto-search across zones with resume support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
584
download_lidar.sh
Executable file
584
download_lidar.sh
Executable file
@ -0,0 +1,584 @@
|
||||
#!/bin/bash
|
||||
# download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN
|
||||
#
|
||||
# Usage:
|
||||
# ./download_lidar.sh LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz
|
||||
# ./download_lidar.sh 1049_6895
|
||||
# ./download_lidar.sh 1049_6895 1029_6884
|
||||
# ./download_lidar.sh --list-zones
|
||||
# ./download_lidar.sh --search 1049
|
||||
# ./download_lidar.sh --zone RE --output input/
|
||||
# ./download_lidar.sh --zone RE --dry-run
|
||||
#
|
||||
# L'API IGN expose les fichiers organisés par zones (RE, SE, AE, etc.)
|
||||
# Le script cherche automatiquement dans quelle zone se trouve chaque tuile.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BASE_URL="https://data.geopf.fr/telechargement/download/LiDARHD-NUALID"
|
||||
API_URL="https://data.geopf.fr/telechargement/resource/LiDARHD-NUALID"
|
||||
OUTPUT_DIR="${OUTPUT_DIR:-.}"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
||||
log_ok() { echo -e "${GREEN}[OK]${NC} $*"; }
|
||||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
log_err() { echo -e "${RED}[ERR]${NC} $*"; }
|
||||
|
||||
# Normalize a tile identifier to a full filename
|
||||
# Supports: "1049_6895", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz"
|
||||
normalize_name() {
|
||||
local input="$1"
|
||||
# If it's already a full filename, return as-is
|
||||
if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69.copc.laz ]]; then
|
||||
echo "$input"
|
||||
return
|
||||
fi
|
||||
if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69 ]]; then
|
||||
echo "${input}.copc.laz"
|
||||
return
|
||||
fi
|
||||
# Bare coordinates like "1049_6895"
|
||||
echo "LHD_FXX_${input}_PTS_LAMB93_IGN69.copc.laz"
|
||||
}
|
||||
|
||||
# List all available zone releases
|
||||
list_zones() {
|
||||
log_info "Récupération de la liste des zones..."
|
||||
local page=1
|
||||
local total_pages=""
|
||||
|
||||
while true; do
|
||||
local content
|
||||
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
||||
|
||||
if [ -z "$content" ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
# Parse zone codes and dates
|
||||
echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
for entry in root.findall('atom:entry', ns):
|
||||
title = entry.find('atom:title', ns)
|
||||
if title is not None and title.text:
|
||||
# Extract zone code and date
|
||||
parts = title.text.split('_')
|
||||
# NUALHD_1-0__LAZ_LAMB93_{ZONE}_{DATE}
|
||||
zone = parts[-2] if len(parts) >= 2 else '?'
|
||||
date = parts[-1] if len(parts) >= 1 else '?'
|
||||
print(f'{zone}\t{date}\t{title.text}')
|
||||
except Exception:
|
||||
pass
|
||||
" 2>/dev/null || true
|
||||
|
||||
# Check if more pages
|
||||
if [ -z "$total_pages" ]; then
|
||||
total_pages=$(echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
|
||||
print(pc)
|
||||
except:
|
||||
print('1')
|
||||
" 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [ "$page" -ge "${total_pages:-1}" ]; then
|
||||
break
|
||||
fi
|
||||
page=$((page + 1))
|
||||
done | sort
|
||||
}
|
||||
|
||||
# Search for a tile across all zones
|
||||
search_tile() {
|
||||
local tile_name="$1"
|
||||
local filename
|
||||
filename=$(normalize_name "$tile_name")
|
||||
|
||||
log_info "Recherche de ${filename} dans toutes les zones..."
|
||||
|
||||
local page=1
|
||||
local found=0
|
||||
local total_pages=""
|
||||
|
||||
while true; do
|
||||
local content
|
||||
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
||||
[ -z "$content" ] && break
|
||||
|
||||
# Get sub-resources and search for the file
|
||||
echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
for entry in root.findall('atom:entry', ns):
|
||||
title = entry.find('atom:title', ns)
|
||||
if title is not None and title.text:
|
||||
print(title.text)
|
||||
except:
|
||||
pass
|
||||
" 2>/dev/null | while read -r release; do
|
||||
# Check if file exists in this release
|
||||
local url="${BASE_URL}/${release}/${filename}"
|
||||
local code
|
||||
code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null)
|
||||
if [ "$code" = "200" ]; then
|
||||
log_ok "Trouvé: ${filename} dans la zone ${release}"
|
||||
echo " URL: ${url}"
|
||||
found=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$total_pages" ]; then
|
||||
total_pages=$(echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
|
||||
print(pc)
|
||||
except:
|
||||
print('1')
|
||||
" 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [ "$page" -ge "${total_pages:-1}" ]; then
|
||||
break
|
||||
fi
|
||||
page=$((page + 1))
|
||||
done
|
||||
|
||||
if [ "$found" -eq 0 ]; then
|
||||
log_err "${filename} non trouvé dans aucune zone"
|
||||
fi
|
||||
}
|
||||
|
||||
# Download a single file with resume support
|
||||
download_file() {
|
||||
local url="$1"
|
||||
local output_path="$2"
|
||||
local filename
|
||||
filename=$(basename "$output_path")
|
||||
|
||||
# Resume download with retries
|
||||
local max_retries=15
|
||||
local retry=0
|
||||
|
||||
while [ $retry -lt $max_retries ]; do
|
||||
retry=$((retry + 1))
|
||||
local result
|
||||
result=$(curl -L --http1.1 -C - -o "$output_path" "$url" 2>&1)
|
||||
local exit_code=$?
|
||||
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
# Verify it's a valid LAS/LAZ file
|
||||
local magic
|
||||
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
||||
if [ "$magic" = "4c415346" ]; then
|
||||
local size
|
||||
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
||||
log_ok "${filename} téléchargé (${size} octets)"
|
||||
return 0
|
||||
else
|
||||
log_err "${filename} n'est pas un fichier LAZ valide (magic: ${magic})"
|
||||
rm -f "$output_path"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# curl exit code 18 = partial download, resume
|
||||
if [ $exit_code -eq 18 ]; then
|
||||
local size
|
||||
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
||||
log_warn "Téléchargement partiel (${size} octets), tentative ${retry}/${max_retries}..."
|
||||
sleep 2
|
||||
continue
|
||||
fi
|
||||
|
||||
# HTTP 404
|
||||
if echo "$result" | grep -q "404"; then
|
||||
log_err "Fichier non trouvé (404): ${url}"
|
||||
rm -f "$output_path"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_err "Erreur curl (${exit_code}): ${result}"
|
||||
return 1
|
||||
done
|
||||
|
||||
# If we exhausted retries, check if partial file is usable
|
||||
local magic
|
||||
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
||||
if [ "$magic" = "4c415346" ]; then
|
||||
local size
|
||||
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
||||
log_warn "Fichier partiel mais valide: ${filename} (${size} octets)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_err "Échec après ${max_retries} tentatives"
|
||||
rm -f "$output_path"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Find which zone contains a given tile and download it
|
||||
find_and_download() {
|
||||
local tile_name="$1"
|
||||
local output_dir="$2"
|
||||
local filename
|
||||
filename=$(normalize_name "$tile_name")
|
||||
local output_path="${output_dir}/${filename}"
|
||||
|
||||
# If file already exists and is valid, skip
|
||||
if [ -f "$output_path" ]; then
|
||||
local magic
|
||||
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
||||
if [ "$magic" = "4c415346" ]; then
|
||||
local size
|
||||
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
||||
if [ "$size" -gt 1000000 ]; then
|
||||
log_info "${filename} déjà présent (${size} octets) — ignoré"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
# File exists but seems invalid/truncated, re-download
|
||||
log_warn "${filename} existe mais semble incomplet, re-téléchargement..."
|
||||
fi
|
||||
|
||||
log_info "Recherche de ${filename}..."
|
||||
|
||||
# Try all zone releases
|
||||
local page=1
|
||||
local total_pages=""
|
||||
|
||||
while true; do
|
||||
local content
|
||||
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
||||
[ -z "$content" ] && break
|
||||
|
||||
echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
for entry in root.findall('atom:entry', ns):
|
||||
title = entry.find('atom:title', ns)
|
||||
if title is not None and title.text:
|
||||
print(title.text)
|
||||
except:
|
||||
pass
|
||||
" 2>/dev/null | while read -r release; do
|
||||
local url="${BASE_URL}/${release}/${filename}"
|
||||
local code
|
||||
code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ]; then
|
||||
log_ok "Trouvé dans la zone ${release}"
|
||||
download_file "$url" "$output_path"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$total_pages" ]; then
|
||||
total_pages=$(echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
|
||||
print(pc)
|
||||
except:
|
||||
print('1')
|
||||
" 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [ "$page" -ge "${total_pages:-1}" ]; then
|
||||
break
|
||||
fi
|
||||
page=$((page + 1))
|
||||
done
|
||||
|
||||
log_err "${filename} non trouvé dans aucune zone disponible"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Download all tiles from a specific zone
|
||||
download_zone() {
|
||||
local zone="$1"
|
||||
local output_dir="$2"
|
||||
local dry_run="${3:-false}"
|
||||
|
||||
# Find the release name for this zone
|
||||
local release=""
|
||||
local page=1
|
||||
|
||||
while true; do
|
||||
local content
|
||||
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
||||
[ -z "$content" ] && break
|
||||
|
||||
release=$(echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
for entry in root.findall('atom:entry', ns):
|
||||
title = entry.find('atom:title', ns)
|
||||
if title is not None and title.text:
|
||||
# Extract zone code from title like NUALHD_1-0__LAZ_LAMB93_RE_2025-02-17
|
||||
parts = title.text.split('_')
|
||||
z = parts[-2] if len(parts) >= 2 else ''
|
||||
if z == '${zone}':
|
||||
print(title.text)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ -n "$release" ]; then
|
||||
break
|
||||
fi
|
||||
page=$((page + 1))
|
||||
# Safety limit
|
||||
if [ $page -gt 25 ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$release" ]; then
|
||||
log_err "Zone '${zone}' non trouvée"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_info "Zone ${zone}: ${release}"
|
||||
|
||||
# Get list of files in this release
|
||||
local sub_url="${API_URL}/${release}?limit=50"
|
||||
local total_files=0
|
||||
local page=1
|
||||
|
||||
while true; do
|
||||
local content
|
||||
content=$(curl -s "${API_URL}/${release}?page=${page}&limit=50") || true
|
||||
[ -z "$content" ] && break
|
||||
|
||||
local files
|
||||
files=$(echo "$content" | python3 -c "
|
||||
import sys, xml.etree.ElementTree as ET
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
||||
try:
|
||||
tree = ET.parse(sys.stdin)
|
||||
root = tree.getroot()
|
||||
for entry in root.findall('atom:entry', ns):
|
||||
title = entry.find('atom:title', ns)
|
||||
if title is not None and title.text:
|
||||
print(title.text)
|
||||
except:
|
||||
pass
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ -z "$files" ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
local count
|
||||
count=$(echo "$files" | wc -l)
|
||||
total_files=$((total_files + count))
|
||||
|
||||
if [ "$dry_run" = "true" ]; then
|
||||
echo "$files" | head -10
|
||||
echo "... (${count} fichiers sur cette page)"
|
||||
else
|
||||
echo "$files" | while read -r fname; do
|
||||
local url="${BASE_URL}/${release}/${fname}"
|
||||
download_file "$url" "${output_dir}/${fname}"
|
||||
done
|
||||
fi
|
||||
|
||||
page=$((page + 1))
|
||||
# Safety limit
|
||||
if [ $page -gt 500 ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_info "Total: ${total_files} fichiers dans la zone ${zone}"
|
||||
}
|
||||
|
||||
# Show usage
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN
|
||||
|
||||
Usage:
|
||||
./download_lidar.sh <tuile> [tuile2...] Télécharger une ou plusieurs tuiles
|
||||
./download_lidar.sh --search <motif> Chercher une tuile dans toutes les zones
|
||||
./download_lidar.sh --list-zones Lister les zones disponibles
|
||||
./download_lidar.sh --zone <ZONE> [--dry-run] Télécharger une zone entière
|
||||
./download_lidar.sh --check [dossier/] Vérifier l'intégrité des fichiers LAZ
|
||||
|
||||
Formats de tuile acceptés:
|
||||
1049_6895 Coordonnées seules
|
||||
LHD_FXX_1049_6895_PTS_LAMB93_IGN69 Nom complet sans extension
|
||||
LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz Nom complet avec extension
|
||||
|
||||
Options:
|
||||
-o, --output DIR Répertoire de destination (défaut: .)
|
||||
-n, --dry-run Afficher sans télécharger (avec --zone)
|
||||
--check [DIR] Vérifier l'intégrité des fichiers LAZ (défaut: ./)
|
||||
--list-zones Lister toutes les zones disponibles
|
||||
--search MOTIF Chercher un motif dans les noms de tuile
|
||||
--zone ZONE Télécharger tous les fichiers d'une zone
|
||||
-h, --help Afficher cette aide
|
||||
|
||||
Exemples:
|
||||
./download_lidar.sh 1049_6895
|
||||
./download_lidar.sh LHD_FXX_0713_6347_PTS_LAMB93_IGN69.copc.laz -o input/
|
||||
./download_lidar.sh --search 1049
|
||||
./download_lidar.sh --list-zones
|
||||
./download_lidar.sh --zone RE -o input/
|
||||
./download_lidar.sh --check input/
|
||||
EOF
|
||||
}
|
||||
|
||||
# Check integrity of LAZ files
|
||||
check_files() {
|
||||
local dir="${1:-.}"
|
||||
local ok=0
|
||||
local fail=0
|
||||
|
||||
log_info "Vérification de l'intégrité des fichiers LAZ dans ${dir}/"
|
||||
|
||||
for f in "${dir}"/*.copc.laz "${dir}"/*.laz; do
|
||||
[ -f "$f" ] || continue
|
||||
local magic
|
||||
magic=$(head -c 4 "$f" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
||||
local size
|
||||
size=$(stat -c%s "$f" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$magic" = "4c415346" ] && [ "$size" -gt 1000000 ]; then
|
||||
log_ok "$(basename $f) (${size} octets)"
|
||||
ok=$((ok + 1))
|
||||
elif [ "$magic" = "7b227469" ]; then
|
||||
log_err "$(basename $f) est une page HTML (404), pas un fichier LAZ"
|
||||
fail=$((fail + 1))
|
||||
elif [ "$size" -lt 1000 ]; then
|
||||
log_err "$(basename $f) trop petit (${size} octets) — probablement corrompu"
|
||||
fail=$((fail + 1))
|
||||
else
|
||||
log_warn "$(basename $f) magic=${magic}, taille=${size} — vérification nécessaire"
|
||||
fail=$((fail + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
log_info "Résultat: ${ok} OK, ${fail} problème(s)"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Main
|
||||
CMD=""
|
||||
TILES=()
|
||||
ZONE=""
|
||||
DRY_RUN=false
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
--list-zones)
|
||||
CMD="list-zones"
|
||||
shift
|
||||
;;
|
||||
--search)
|
||||
CMD="search"
|
||||
shift
|
||||
TILES+=("$1")
|
||||
shift
|
||||
;;
|
||||
--zone)
|
||||
CMD="zone"
|
||||
shift
|
||||
ZONE="$1"
|
||||
shift
|
||||
;;
|
||||
--check)
|
||||
CMD="check"
|
||||
shift
|
||||
OUTPUT_DIR="${1:-.}"
|
||||
[ -d "$OUTPUT_DIR" ] && shift || true
|
||||
;;
|
||||
-o|--output)
|
||||
shift
|
||||
OUTPUT_DIR="$1"
|
||||
shift
|
||||
;;
|
||||
-n|--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
log_err "Option inconnue: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
TILES+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "${CMD}" in
|
||||
list-zones)
|
||||
list_zones
|
||||
;;
|
||||
search)
|
||||
for tile in "${TILES[@]}"; do
|
||||
search_tile "$tile"
|
||||
done
|
||||
;;
|
||||
zone)
|
||||
if [ -z "$ZONE" ]; then
|
||||
log_err "Zone non spécifiée. Utilisez --zone <CODE>"
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
download_zone "$ZONE" "$OUTPUT_DIR" "$DRY_RUN"
|
||||
;;
|
||||
check)
|
||||
check_files "$OUTPUT_DIR"
|
||||
;;
|
||||
*)
|
||||
# Default: download tiles
|
||||
if [ ${#TILES[@]} -eq 0 ]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
for tile in "${TILES[@]}"; do
|
||||
find_and_download "$tile" "$OUTPUT_DIR"
|
||||
done
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user