- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries) - scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families - scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes - scripts/update-csv-data.sh: master orchestrator with --install-stubs - api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores - pages.py: add /network route - csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
68 lines
2.5 KiB
Bash
Executable File
68 lines
2.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# update-csv-data.sh — Download and generate all CSV reference data for JA4 platform.
|
|
#
|
|
# Outputs:
|
|
# data/bot_ip.csv — Known bot/scanner IPs + Tor exit nodes
|
|
# data/bot_ja4.csv — Known bot JA4 TLS fingerprints
|
|
# data/asn_reputation.csv — ASN→label mapping (human/datacenter/hosting)
|
|
# data/iplocate-ip-to-asn.csv — CIDR→ASN for dictionary lookup
|
|
#
|
|
# Usage:
|
|
# ./scripts/update-csv-data.sh # generate all
|
|
# ./scripts/update-csv-data.sh --install-stubs # also copy to test csv-stubs
|
|
#
|
|
# Requirements: curl, python3 (stdlib only)
|
|
set -euo pipefail
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
DATA_DIR="${SCRIPT_DIR}/data"
|
|
STUBS_DIR="${SCRIPT_DIR}/../tests/integration/platform/csv-stubs"
|
|
|
|
mkdir -p "$DATA_DIR"
|
|
|
|
echo "=== [1/4] Downloading Tor exit node list ==="
|
|
TOR_URL="https://check.torproject.org/torbulkexitlist"
|
|
TOR_TMP="${DATA_DIR}/tor_exit_nodes.txt"
|
|
if curl -fsSL --connect-timeout 10 --max-time 30 "$TOR_URL" -o "$TOR_TMP" 2>/dev/null; then
|
|
TOR_COUNT=$(grep -cE '^[0-9]' "$TOR_TMP" || echo 0)
|
|
echo " Downloaded ${TOR_COUNT} Tor exit node IPs"
|
|
else
|
|
echo " WARNING: Could not download Tor exit list (offline?), using fallback"
|
|
TOR_TMP=""
|
|
fi
|
|
|
|
echo "=== [2/4] Generating bot_ip.csv ==="
|
|
python3 "${SCRIPT_DIR}/generate_bot_ip.py" \
|
|
--output "${DATA_DIR}/bot_ip.csv" \
|
|
${TOR_TMP:+--tor-file "$TOR_TMP"}
|
|
echo " $(wc -l < "${DATA_DIR}/bot_ip.csv") entries"
|
|
|
|
echo "=== [3/4] Generating bot_ja4.csv ==="
|
|
python3 "${SCRIPT_DIR}/generate_bot_ja4.py" \
|
|
--output "${DATA_DIR}/bot_ja4.csv"
|
|
echo " $(wc -l < "${DATA_DIR}/bot_ja4.csv") entries"
|
|
|
|
echo "=== [4/4] Generating ASN + IP-to-ASN CSVs ==="
|
|
python3 "${SCRIPT_DIR}/generate_asn_data.py" \
|
|
--output-asn "${DATA_DIR}/asn_reputation.csv" \
|
|
--output-ipasn "${DATA_DIR}/iplocate-ip-to-asn.csv"
|
|
echo " ASN reputation: $(wc -l < "${DATA_DIR}/asn_reputation.csv") entries"
|
|
echo " IP-to-ASN: $(wc -l < "${DATA_DIR}/iplocate-ip-to-asn.csv") entries"
|
|
|
|
# Optionally install into test stubs
|
|
if [[ "${1:-}" == "--install-stubs" ]]; then
|
|
echo ""
|
|
echo "=== Installing to test csv-stubs ==="
|
|
cp -v "${DATA_DIR}/bot_ip.csv" "$STUBS_DIR/"
|
|
cp -v "${DATA_DIR}/bot_ja4.csv" "$STUBS_DIR/"
|
|
cp -v "${DATA_DIR}/asn_reputation.csv" "$STUBS_DIR/"
|
|
cp -v "${DATA_DIR}/iplocate-ip-to-asn.csv" "$STUBS_DIR/"
|
|
echo "Done."
|
|
fi
|
|
|
|
# Cleanup
|
|
rm -f "${DATA_DIR}/tor_exit_nodes.txt"
|
|
|
|
echo ""
|
|
echo "All CSV data generated in ${DATA_DIR}/"
|
|
echo "Run with --install-stubs to copy to test fixtures."
|