feat: CSV generation scripts, API filter params, enriched CSV stubs
- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries) - scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families - scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes - scripts/update-csv-data.sh: master orchestrator with --install-stubs - api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores - pages.py: add /network route - csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
67
scripts/update-csv-data.sh
Executable file
67
scripts/update-csv-data.sh
Executable file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env bash
|
||||
# update-csv-data.sh — Download and generate all CSV reference data for JA4 platform.
|
||||
#
|
||||
# Outputs:
|
||||
# data/bot_ip.csv — Known bot/scanner IPs + Tor exit nodes
|
||||
# data/bot_ja4.csv — Known bot JA4 TLS fingerprints
|
||||
# data/asn_reputation.csv — ASN→label mapping (human/datacenter/hosting)
|
||||
# data/iplocate-ip-to-asn.csv — CIDR→ASN for dictionary lookup
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/update-csv-data.sh # generate all
|
||||
# ./scripts/update-csv-data.sh --install-stubs # also copy to test csv-stubs
|
||||
#
|
||||
# Requirements: curl, python3 (stdlib only)
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
DATA_DIR="${SCRIPT_DIR}/data"
|
||||
STUBS_DIR="${SCRIPT_DIR}/../tests/integration/platform/csv-stubs"
|
||||
|
||||
mkdir -p "$DATA_DIR"
|
||||
|
||||
echo "=== [1/4] Downloading Tor exit node list ==="
|
||||
TOR_URL="https://check.torproject.org/torbulkexitlist"
|
||||
TOR_TMP="${DATA_DIR}/tor_exit_nodes.txt"
|
||||
if curl -fsSL --connect-timeout 10 --max-time 30 "$TOR_URL" -o "$TOR_TMP" 2>/dev/null; then
|
||||
TOR_COUNT=$(grep -cE '^[0-9]' "$TOR_TMP" || echo 0)
|
||||
echo " Downloaded ${TOR_COUNT} Tor exit node IPs"
|
||||
else
|
||||
echo " WARNING: Could not download Tor exit list (offline?), using fallback"
|
||||
TOR_TMP=""
|
||||
fi
|
||||
|
||||
echo "=== [2/4] Generating bot_ip.csv ==="
|
||||
python3 "${SCRIPT_DIR}/generate_bot_ip.py" \
|
||||
--output "${DATA_DIR}/bot_ip.csv" \
|
||||
${TOR_TMP:+--tor-file "$TOR_TMP"}
|
||||
echo " $(wc -l < "${DATA_DIR}/bot_ip.csv") entries"
|
||||
|
||||
echo "=== [3/4] Generating bot_ja4.csv ==="
|
||||
python3 "${SCRIPT_DIR}/generate_bot_ja4.py" \
|
||||
--output "${DATA_DIR}/bot_ja4.csv"
|
||||
echo " $(wc -l < "${DATA_DIR}/bot_ja4.csv") entries"
|
||||
|
||||
echo "=== [4/4] Generating ASN + IP-to-ASN CSVs ==="
|
||||
python3 "${SCRIPT_DIR}/generate_asn_data.py" \
|
||||
--output-asn "${DATA_DIR}/asn_reputation.csv" \
|
||||
--output-ipasn "${DATA_DIR}/iplocate-ip-to-asn.csv"
|
||||
echo " ASN reputation: $(wc -l < "${DATA_DIR}/asn_reputation.csv") entries"
|
||||
echo " IP-to-ASN: $(wc -l < "${DATA_DIR}/iplocate-ip-to-asn.csv") entries"
|
||||
|
||||
# Optionally install into test stubs
|
||||
if [[ "${1:-}" == "--install-stubs" ]]; then
|
||||
echo ""
|
||||
echo "=== Installing to test csv-stubs ==="
|
||||
cp -v "${DATA_DIR}/bot_ip.csv" "$STUBS_DIR/"
|
||||
cp -v "${DATA_DIR}/bot_ja4.csv" "$STUBS_DIR/"
|
||||
cp -v "${DATA_DIR}/asn_reputation.csv" "$STUBS_DIR/"
|
||||
cp -v "${DATA_DIR}/iplocate-ip-to-asn.csv" "$STUBS_DIR/"
|
||||
echo "Done."
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
rm -f "${DATA_DIR}/tor_exit_nodes.txt"
|
||||
|
||||
echo ""
|
||||
echo "All CSV data generated in ${DATA_DIR}/"
|
||||
echo "Run with --install-stubs to copy to test fixtures."
|
||||
Reference in New Issue
Block a user