Supports tile download by coordinates, zone-based bulk download, integrity checking, and auto-search across zones with resume support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
584 lines
17 KiB
Bash
Executable File
584 lines
17 KiB
Bash
Executable File
#!/bin/bash
|
|
# download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN
|
|
#
|
|
# Usage:
|
|
# ./download_lidar.sh LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz
|
|
# ./download_lidar.sh 1049_6895
|
|
# ./download_lidar.sh 1049_6895 1029_6884
|
|
# ./download_lidar.sh --list-zones
|
|
# ./download_lidar.sh --search 1049
|
|
# ./download_lidar.sh --zone RE --output input/
|
|
# ./download_lidar.sh --zone RE --dry-run
|
|
#
|
|
# L'API IGN expose les fichiers organisés par zones (RE, SE, AE, etc.)
|
|
# Le script cherche automatiquement dans quelle zone se trouve chaque tuile.
|
|
|
|
set -euo pipefail
|
|
|
|
BASE_URL="https://data.geopf.fr/telechargement/download/LiDARHD-NUALID"
|
|
API_URL="https://data.geopf.fr/telechargement/resource/LiDARHD-NUALID"
|
|
OUTPUT_DIR="${OUTPUT_DIR:-.}"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
|
log_ok() { echo -e "${GREEN}[OK]${NC} $*"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
|
log_err() { echo -e "${RED}[ERR]${NC} $*"; }
|
|
|
|
# Normalize a tile identifier to a full filename
|
|
# Supports: "1049_6895", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69", "LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz"
|
|
normalize_name() {
|
|
local input="$1"
|
|
# If it's already a full filename, return as-is
|
|
if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69.copc.laz ]]; then
|
|
echo "$input"
|
|
return
|
|
fi
|
|
if [[ "$input" == LHD_FXX_*_PTS_LAMB93_IGN69 ]]; then
|
|
echo "${input}.copc.laz"
|
|
return
|
|
fi
|
|
# Bare coordinates like "1049_6895"
|
|
echo "LHD_FXX_${input}_PTS_LAMB93_IGN69.copc.laz"
|
|
}
|
|
|
|
# List all available zone releases
|
|
list_zones() {
|
|
log_info "Récupération de la liste des zones..."
|
|
local page=1
|
|
local total_pages=""
|
|
|
|
while true; do
|
|
local content
|
|
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
|
|
|
if [ -z "$content" ]; then
|
|
break
|
|
fi
|
|
|
|
# Parse zone codes and dates
|
|
echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
for entry in root.findall('atom:entry', ns):
|
|
title = entry.find('atom:title', ns)
|
|
if title is not None and title.text:
|
|
# Extract zone code and date
|
|
parts = title.text.split('_')
|
|
# NUALHD_1-0__LAZ_LAMB93_{ZONE}_{DATE}
|
|
zone = parts[-2] if len(parts) >= 2 else '?'
|
|
date = parts[-1] if len(parts) >= 1 else '?'
|
|
print(f'{zone}\t{date}\t{title.text}')
|
|
except Exception:
|
|
pass
|
|
" 2>/dev/null || true
|
|
|
|
# Check if more pages
|
|
if [ -z "$total_pages" ]; then
|
|
total_pages=$(echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
|
|
print(pc)
|
|
except:
|
|
print('1')
|
|
" 2>/dev/null)
|
|
fi
|
|
|
|
if [ "$page" -ge "${total_pages:-1}" ]; then
|
|
break
|
|
fi
|
|
page=$((page + 1))
|
|
done | sort
|
|
}
|
|
|
|
# Search for a tile across all zones
|
|
search_tile() {
|
|
local tile_name="$1"
|
|
local filename
|
|
filename=$(normalize_name "$tile_name")
|
|
|
|
log_info "Recherche de ${filename} dans toutes les zones..."
|
|
|
|
local page=1
|
|
local found=0
|
|
local total_pages=""
|
|
|
|
while true; do
|
|
local content
|
|
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
|
[ -z "$content" ] && break
|
|
|
|
# Get sub-resources and search for the file
|
|
echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
for entry in root.findall('atom:entry', ns):
|
|
title = entry.find('atom:title', ns)
|
|
if title is not None and title.text:
|
|
print(title.text)
|
|
except:
|
|
pass
|
|
" 2>/dev/null | while read -r release; do
|
|
# Check if file exists in this release
|
|
local url="${BASE_URL}/${release}/${filename}"
|
|
local code
|
|
code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null)
|
|
if [ "$code" = "200" ]; then
|
|
log_ok "Trouvé: ${filename} dans la zone ${release}"
|
|
echo " URL: ${url}"
|
|
found=1
|
|
fi
|
|
done
|
|
|
|
if [ -z "$total_pages" ]; then
|
|
total_pages=$(echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
|
|
print(pc)
|
|
except:
|
|
print('1')
|
|
" 2>/dev/null)
|
|
fi
|
|
|
|
if [ "$page" -ge "${total_pages:-1}" ]; then
|
|
break
|
|
fi
|
|
page=$((page + 1))
|
|
done
|
|
|
|
if [ "$found" -eq 0 ]; then
|
|
log_err "${filename} non trouvé dans aucune zone"
|
|
fi
|
|
}
|
|
|
|
# Download a single file with resume support
|
|
download_file() {
|
|
local url="$1"
|
|
local output_path="$2"
|
|
local filename
|
|
filename=$(basename "$output_path")
|
|
|
|
# Resume download with retries
|
|
local max_retries=15
|
|
local retry=0
|
|
|
|
while [ $retry -lt $max_retries ]; do
|
|
retry=$((retry + 1))
|
|
local result
|
|
result=$(curl -L --http1.1 -C - -o "$output_path" "$url" 2>&1)
|
|
local exit_code=$?
|
|
|
|
if [ $exit_code -eq 0 ]; then
|
|
# Verify it's a valid LAS/LAZ file
|
|
local magic
|
|
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
|
if [ "$magic" = "4c415346" ]; then
|
|
local size
|
|
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
|
log_ok "${filename} téléchargé (${size} octets)"
|
|
return 0
|
|
else
|
|
log_err "${filename} n'est pas un fichier LAZ valide (magic: ${magic})"
|
|
rm -f "$output_path"
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# curl exit code 18 = partial download, resume
|
|
if [ $exit_code -eq 18 ]; then
|
|
local size
|
|
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
|
log_warn "Téléchargement partiel (${size} octets), tentative ${retry}/${max_retries}..."
|
|
sleep 2
|
|
continue
|
|
fi
|
|
|
|
# HTTP 404
|
|
if echo "$result" | grep -q "404"; then
|
|
log_err "Fichier non trouvé (404): ${url}"
|
|
rm -f "$output_path"
|
|
return 1
|
|
fi
|
|
|
|
log_err "Erreur curl (${exit_code}): ${result}"
|
|
return 1
|
|
done
|
|
|
|
# If we exhausted retries, check if partial file is usable
|
|
local magic
|
|
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
|
if [ "$magic" = "4c415346" ]; then
|
|
local size
|
|
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
|
log_warn "Fichier partiel mais valide: ${filename} (${size} octets)"
|
|
return 0
|
|
fi
|
|
|
|
log_err "Échec après ${max_retries} tentatives"
|
|
rm -f "$output_path"
|
|
return 1
|
|
}
|
|
|
|
# Find which zone contains a given tile and download it
|
|
find_and_download() {
|
|
local tile_name="$1"
|
|
local output_dir="$2"
|
|
local filename
|
|
filename=$(normalize_name "$tile_name")
|
|
local output_path="${output_dir}/${filename}"
|
|
|
|
# If file already exists and is valid, skip
|
|
if [ -f "$output_path" ]; then
|
|
local magic
|
|
magic=$(head -c 4 "$output_path" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
|
if [ "$magic" = "4c415346" ]; then
|
|
local size
|
|
size=$(stat -c%s "$output_path" 2>/dev/null || echo "0")
|
|
if [ "$size" -gt 1000000 ]; then
|
|
log_info "${filename} déjà présent (${size} octets) — ignoré"
|
|
return 0
|
|
fi
|
|
fi
|
|
# File exists but seems invalid/truncated, re-download
|
|
log_warn "${filename} existe mais semble incomplet, re-téléchargement..."
|
|
fi
|
|
|
|
log_info "Recherche de ${filename}..."
|
|
|
|
# Try all zone releases
|
|
local page=1
|
|
local total_pages=""
|
|
|
|
while true; do
|
|
local content
|
|
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
|
[ -z "$content" ] && break
|
|
|
|
echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
for entry in root.findall('atom:entry', ns):
|
|
title = entry.find('atom:title', ns)
|
|
if title is not None and title.text:
|
|
print(title.text)
|
|
except:
|
|
pass
|
|
" 2>/dev/null | while read -r release; do
|
|
local url="${BASE_URL}/${release}/${filename}"
|
|
local code
|
|
code=$(curl -sI -L -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000")
|
|
if [ "$code" = "200" ]; then
|
|
log_ok "Trouvé dans la zone ${release}"
|
|
download_file "$url" "$output_path"
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
if [ -z "$total_pages" ]; then
|
|
total_pages=$(echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom', 'gpf_dl': 'https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
pc = root.get('{https://data.geopf.fr/annexes/ressources/xsd/gpf_dl.xsd}pagecount', '1')
|
|
print(pc)
|
|
except:
|
|
print('1')
|
|
" 2>/dev/null)
|
|
fi
|
|
|
|
if [ "$page" -ge "${total_pages:-1}" ]; then
|
|
break
|
|
fi
|
|
page=$((page + 1))
|
|
done
|
|
|
|
log_err "${filename} non trouvé dans aucune zone disponible"
|
|
return 1
|
|
}
|
|
|
|
# Download all tiles from a specific zone
|
|
download_zone() {
|
|
local zone="$1"
|
|
local output_dir="$2"
|
|
local dry_run="${3:-false}"
|
|
|
|
# Find the release name for this zone
|
|
local release=""
|
|
local page=1
|
|
|
|
while true; do
|
|
local content
|
|
content=$(curl -s "${API_URL}?page=${page}&limit=50") || true
|
|
[ -z "$content" ] && break
|
|
|
|
release=$(echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
for entry in root.findall('atom:entry', ns):
|
|
title = entry.find('atom:title', ns)
|
|
if title is not None and title.text:
|
|
# Extract zone code from title like NUALHD_1-0__LAZ_LAMB93_RE_2025-02-17
|
|
parts = title.text.split('_')
|
|
z = parts[-2] if len(parts) >= 2 else ''
|
|
if z == '${zone}':
|
|
print(title.text)
|
|
break
|
|
except:
|
|
pass
|
|
" 2>/dev/null)
|
|
|
|
if [ -n "$release" ]; then
|
|
break
|
|
fi
|
|
page=$((page + 1))
|
|
# Safety limit
|
|
if [ $page -gt 25 ]; then
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ -z "$release" ]; then
|
|
log_err "Zone '${zone}' non trouvée"
|
|
return 1
|
|
fi
|
|
|
|
log_info "Zone ${zone}: ${release}"
|
|
|
|
# Get list of files in this release
|
|
local sub_url="${API_URL}/${release}?limit=50"
|
|
local total_files=0
|
|
local page=1
|
|
|
|
while true; do
|
|
local content
|
|
content=$(curl -s "${API_URL}/${release}?page=${page}&limit=50") || true
|
|
[ -z "$content" ] && break
|
|
|
|
local files
|
|
files=$(echo "$content" | python3 -c "
|
|
import sys, xml.etree.ElementTree as ET
|
|
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
try:
|
|
tree = ET.parse(sys.stdin)
|
|
root = tree.getroot()
|
|
for entry in root.findall('atom:entry', ns):
|
|
title = entry.find('atom:title', ns)
|
|
if title is not None and title.text:
|
|
print(title.text)
|
|
except:
|
|
pass
|
|
" 2>/dev/null)
|
|
|
|
if [ -z "$files" ]; then
|
|
break
|
|
fi
|
|
|
|
local count
|
|
count=$(echo "$files" | wc -l)
|
|
total_files=$((total_files + count))
|
|
|
|
if [ "$dry_run" = "true" ]; then
|
|
echo "$files" | head -10
|
|
echo "... (${count} fichiers sur cette page)"
|
|
else
|
|
echo "$files" | while read -r fname; do
|
|
local url="${BASE_URL}/${release}/${fname}"
|
|
download_file "$url" "${output_dir}/${fname}"
|
|
done
|
|
fi
|
|
|
|
page=$((page + 1))
|
|
# Safety limit
|
|
if [ $page -gt 500 ]; then
|
|
break
|
|
fi
|
|
done
|
|
|
|
log_info "Total: ${total_files} fichiers dans la zone ${zone}"
|
|
}
|
|
|
|
# Show usage
|
|
usage() {
|
|
cat <<'EOF'
|
|
download_lidar.sh — Téléchargement de fichiers LiDAR HD depuis l'IGN
|
|
|
|
Usage:
|
|
./download_lidar.sh <tuile> [tuile2...] Télécharger une ou plusieurs tuiles
|
|
./download_lidar.sh --search <motif> Chercher une tuile dans toutes les zones
|
|
./download_lidar.sh --list-zones Lister les zones disponibles
|
|
./download_lidar.sh --zone <ZONE> [--dry-run] Télécharger une zone entière
|
|
./download_lidar.sh --check [dossier/] Vérifier l'intégrité des fichiers LAZ
|
|
|
|
Formats de tuile acceptés:
|
|
1049_6895 Coordonnées seules
|
|
LHD_FXX_1049_6895_PTS_LAMB93_IGN69 Nom complet sans extension
|
|
LHD_FXX_1049_6895_PTS_LAMB93_IGN69.copc.laz Nom complet avec extension
|
|
|
|
Options:
|
|
-o, --output DIR Répertoire de destination (défaut: .)
|
|
-n, --dry-run Afficher sans télécharger (avec --zone)
|
|
--check [DIR] Vérifier l'intégrité des fichiers LAZ (défaut: ./)
|
|
--list-zones Lister toutes les zones disponibles
|
|
--search MOTIF Chercher un motif dans les noms de tuile
|
|
--zone ZONE Télécharger tous les fichiers d'une zone
|
|
-h, --help Afficher cette aide
|
|
|
|
Exemples:
|
|
./download_lidar.sh 1049_6895
|
|
./download_lidar.sh LHD_FXX_0713_6347_PTS_LAMB93_IGN69.copc.laz -o input/
|
|
./download_lidar.sh --search 1049
|
|
./download_lidar.sh --list-zones
|
|
./download_lidar.sh --zone RE -o input/
|
|
./download_lidar.sh --check input/
|
|
EOF
|
|
}
|
|
|
|
# Check integrity of LAZ files
|
|
check_files() {
|
|
local dir="${1:-.}"
|
|
local ok=0
|
|
local fail=0
|
|
|
|
log_info "Vérification de l'intégrité des fichiers LAZ dans ${dir}/"
|
|
|
|
for f in "${dir}"/*.copc.laz "${dir}"/*.laz; do
|
|
[ -f "$f" ] || continue
|
|
local magic
|
|
magic=$(head -c 4 "$f" 2>/dev/null | xxd -p 2>/dev/null || echo "")
|
|
local size
|
|
size=$(stat -c%s "$f" 2>/dev/null || echo "0")
|
|
|
|
if [ "$magic" = "4c415346" ] && [ "$size" -gt 1000000 ]; then
|
|
log_ok "$(basename $f) (${size} octets)"
|
|
ok=$((ok + 1))
|
|
elif [ "$magic" = "7b227469" ]; then
|
|
log_err "$(basename $f) est une page HTML (404), pas un fichier LAZ"
|
|
fail=$((fail + 1))
|
|
elif [ "$size" -lt 1000 ]; then
|
|
log_err "$(basename $f) trop petit (${size} octets) — probablement corrompu"
|
|
fail=$((fail + 1))
|
|
else
|
|
log_warn "$(basename $f) magic=${magic}, taille=${size} — vérification nécessaire"
|
|
fail=$((fail + 1))
|
|
fi
|
|
done
|
|
|
|
log_info "Résultat: ${ok} OK, ${fail} problème(s)"
|
|
return 0
|
|
}
|
|
|
|
# Main
|
|
CMD=""
|
|
TILES=()
|
|
ZONE=""
|
|
DRY_RUN=false
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
--list-zones)
|
|
CMD="list-zones"
|
|
shift
|
|
;;
|
|
--search)
|
|
CMD="search"
|
|
shift
|
|
TILES+=("$1")
|
|
shift
|
|
;;
|
|
--zone)
|
|
CMD="zone"
|
|
shift
|
|
ZONE="$1"
|
|
shift
|
|
;;
|
|
--check)
|
|
CMD="check"
|
|
shift
|
|
OUTPUT_DIR="${1:-.}"
|
|
[ -d "$OUTPUT_DIR" ] && shift || true
|
|
;;
|
|
-o|--output)
|
|
shift
|
|
OUTPUT_DIR="$1"
|
|
shift
|
|
;;
|
|
-n|--dry-run)
|
|
DRY_RUN=true
|
|
shift
|
|
;;
|
|
-*)
|
|
log_err "Option inconnue: $1"
|
|
usage
|
|
exit 1
|
|
;;
|
|
*)
|
|
TILES+=("$1")
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
case "${CMD}" in
|
|
list-zones)
|
|
list_zones
|
|
;;
|
|
search)
|
|
for tile in "${TILES[@]}"; do
|
|
search_tile "$tile"
|
|
done
|
|
;;
|
|
zone)
|
|
if [ -z "$ZONE" ]; then
|
|
log_err "Zone non spécifiée. Utilisez --zone <CODE>"
|
|
exit 1
|
|
fi
|
|
mkdir -p "$OUTPUT_DIR"
|
|
download_zone "$ZONE" "$OUTPUT_DIR" "$DRY_RUN"
|
|
;;
|
|
check)
|
|
check_files "$OUTPUT_DIR"
|
|
;;
|
|
*)
|
|
# Default: download tiles
|
|
if [ ${#TILES[@]} -eq 0 ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
mkdir -p "$OUTPUT_DIR"
|
|
for tile in "${TILES[@]}"; do
|
|
find_and_download "$tile" "$OUTPUT_DIR"
|
|
done
|
|
;;
|
|
esac |