#!/usr/bin/env bash # update-csv-data.sh — Download and generate all CSV reference data for JA4 platform. # # Outputs: # data/bot_ip.csv — Known bot/scanner IPs + Tor exit nodes # data/bot_ja4.csv — Known bot JA4 TLS fingerprints # data/asn_reputation.csv — ASN→label mapping (human/datacenter/hosting) # data/iplocate-ip-to-asn.csv — CIDR→ASN for dictionary lookup # # Usage: # ./scripts/update-csv-data.sh # generate all # ./scripts/update-csv-data.sh --install-stubs # also copy to test csv-stubs # # Requirements: curl, python3 (stdlib only) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" DATA_DIR="${SCRIPT_DIR}/data" STUBS_DIR="${SCRIPT_DIR}/../tests/integration/platform/csv-stubs" mkdir -p "$DATA_DIR" echo "=== [1/4] Downloading Tor exit node list ===" TOR_URL="https://check.torproject.org/torbulkexitlist" TOR_TMP="${DATA_DIR}/tor_exit_nodes.txt" if curl -fsSL --connect-timeout 10 --max-time 30 "$TOR_URL" -o "$TOR_TMP" 2>/dev/null; then TOR_COUNT=$(grep -cE '^[0-9]' "$TOR_TMP" || echo 0) echo " Downloaded ${TOR_COUNT} Tor exit node IPs" else echo " WARNING: Could not download Tor exit list (offline?), using fallback" TOR_TMP="" fi echo "=== [2/4] Generating bot_ip.csv ===" python3 "${SCRIPT_DIR}/generate_bot_ip.py" \ --output "${DATA_DIR}/bot_ip.csv" \ ${TOR_TMP:+--tor-file "$TOR_TMP"} echo " $(wc -l < "${DATA_DIR}/bot_ip.csv") entries" echo "=== [3/4] Generating bot_ja4.csv ===" python3 "${SCRIPT_DIR}/generate_bot_ja4.py" \ --output "${DATA_DIR}/bot_ja4.csv" echo " $(wc -l < "${DATA_DIR}/bot_ja4.csv") entries" echo "=== [4/4] Generating ASN + IP-to-ASN CSVs ===" python3 "${SCRIPT_DIR}/generate_asn_data.py" \ --output-asn "${DATA_DIR}/asn_reputation.csv" \ --output-ipasn "${DATA_DIR}/iplocate-ip-to-asn.csv" echo " ASN reputation: $(wc -l < "${DATA_DIR}/asn_reputation.csv") entries" echo " IP-to-ASN: $(wc -l < "${DATA_DIR}/iplocate-ip-to-asn.csv") entries" # Optionally install into test stubs if [[ "${1:-}" == "--install-stubs" ]]; then echo "" echo "=== Installing to test csv-stubs ===" cp -v "${DATA_DIR}/bot_ip.csv" "$STUBS_DIR/" cp -v "${DATA_DIR}/bot_ja4.csv" "$STUBS_DIR/" cp -v "${DATA_DIR}/asn_reputation.csv" "$STUBS_DIR/" cp -v "${DATA_DIR}/iplocate-ip-to-asn.csv" "$STUBS_DIR/" echo "Done." fi # Cleanup rm -f "${DATA_DIR}/tor_exit_nodes.txt" echo "" echo "All CSV data generated in ${DATA_DIR}/" echo "Run with --install-stubs to copy to test fixtures."