Files
ja4-platform/scripts/generate_bot_ip.py
toto b6184e6529 feat: CSV generation scripts, API filter params, enriched CSV stubs
- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries)
- scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families
- scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes
- scripts/update-csv-data.sh: master orchestrator with --install-stubs
- api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores
- pages.py: add /network route
- csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-08 15:05:43 +02:00

173 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
generate_bot_ip.py — Generate bot_ip.csv from known scanner networks + Tor exit nodes.
Sources:
• Tor exit nodes: downloaded list or hardcoded fallback
• Shodan: known scanner ranges (census.shodan.io, 2024)
• Censys: known scanner ranges (censys.io, 2024)
• Binaryedge, SecurityTrails, ZoomEye, Stretchoid: known ranges
• GreyNoise: top mass-scanner IPs (manually curated)
Output format (no header):
<ip_or_cidr>,<bot_name>
"""
import argparse
import ipaddress
import sys
# --- Known scanner networks (public, well-documented) ---
KNOWN_SCANNERS = {
# Shodan — https://wiki.ipfire.org/dns/public-servers (census.shodan.io)
"Shodan_Scanner": [
"66.240.192.0/24", "66.240.205.0/24", "66.240.236.0/24",
"71.6.135.0/24", "71.6.146.0/24", "71.6.158.0/24", "71.6.165.0/24",
"80.82.77.0/24", "80.82.78.0/24",
"82.221.105.0/24", "82.221.106.0/24",
"85.25.43.0/24", "85.25.103.0/24",
"93.120.27.0/24",
"94.102.49.0/24",
"188.138.9.0/24",
"198.20.69.0/24", "198.20.70.0/24", "198.20.87.0/24", "198.20.99.0/24",
"209.126.110.0/24",
],
# Censys — https://support.censys.io/hc/en-us/articles/360043177092
"Censys_Scanner": [
"162.142.125.0/24", "167.248.133.0/24", "167.94.138.0/24",
"167.94.145.0/24", "167.94.146.0/24",
"192.35.168.0/23",
],
# BinaryEdge — https://docs.binaryedge.io/
"BinaryEdge_Scanner": [
"154.89.5.0/24",
"45.143.200.0/22",
],
# Stretchoid — persistent scanner botnet
"Stretchoid_Scanner": [
"198.235.24.0/24",
"205.210.31.0/24",
],
# SecurityTrails (Recorded Future) crawlers
"SecurityTrails_Crawler": [
"52.250.0.0/16",
],
# ZoomEye (Knownsec)
"ZoomEye_Scanner": [
"106.75.0.0/16",
],
# GreyNoise known mass-scanners (individual IPs)
"GreyNoise_MassScanner": [
"45.155.205.233/32", "45.155.205.220/32", "45.155.205.205/32",
"45.155.205.190/32", "45.155.205.175/32", "45.155.205.160/32",
"45.155.205.146/32", "45.155.205.131/32",
"193.32.162.10/32", "193.32.162.11/32", "193.32.162.25/32",
"193.32.162.30/32", "193.32.162.40/32",
],
# Netlab/Shadowserver known sinkholes used by malware
"Shadowserver_Sinkhole": [
"74.82.47.0/24",
"184.105.139.0/24", "184.105.247.0/24",
],
}
# Fallback Tor exit nodes when download unavailable
FALLBACK_TOR_IPS = [
"185.220.101.34", "185.220.101.35", "185.220.101.36", "185.220.101.37",
"185.220.101.38", "185.220.101.39", "185.220.101.40", "185.220.101.41",
"185.220.101.42", "185.220.101.43", "185.220.101.44", "185.220.101.45",
"185.220.101.46", "185.220.101.47", "185.220.101.48", "185.220.101.49",
"185.220.101.50", "185.220.101.51", "185.220.101.52", "185.220.101.53",
"185.220.101.54", "185.220.101.55", "185.220.101.56", "185.220.101.57",
"185.220.101.58", "185.220.101.59", "185.220.101.60", "185.220.101.61",
"185.220.101.62", "185.220.101.63", "185.220.101.64", "185.220.101.65",
"185.220.101.66", "185.220.101.67", "185.220.101.68", "185.220.101.69",
"185.220.101.70", "185.220.101.71", "185.220.101.72", "185.220.101.73",
"185.220.101.74", "185.220.101.75", "185.220.101.76", "185.220.101.77",
"185.220.101.78", "185.220.101.79", "185.220.101.80", "185.220.101.81",
"185.220.101.82", "185.220.101.83", "185.220.101.84", "185.220.101.85",
"185.220.101.86", "185.220.101.87", "185.220.101.88", "185.220.101.89",
"185.220.101.90", "185.220.101.91", "185.220.101.92", "185.220.101.93",
"185.220.101.94", "185.220.101.95", "185.220.101.96", "185.220.101.97",
"185.220.100.240", "185.220.100.241", "185.220.100.242", "185.220.100.243",
"185.220.100.244", "185.220.100.245", "185.220.100.246", "185.220.100.247",
"185.220.100.248", "185.220.100.249", "185.220.100.250", "185.220.100.251",
"185.220.100.252", "185.220.100.253", "185.220.100.254", "185.220.100.255",
"178.20.55.16", "178.20.55.18", "178.20.55.182",
"23.129.64.130", "23.129.64.131", "23.129.64.132", "23.129.64.133",
"23.129.64.134", "23.129.64.135", "23.129.64.136", "23.129.64.137",
"23.129.64.138", "23.129.64.139", "23.129.64.140", "23.129.64.141",
"23.129.64.142", "23.129.64.143", "23.129.64.144", "23.129.64.145",
"23.129.64.146", "23.129.64.147", "23.129.64.148", "23.129.64.149",
"23.129.64.150", "23.129.64.151", "23.129.64.152", "23.129.64.153",
"104.244.76.13", "104.244.76.14", "104.244.76.15", "104.244.76.16",
"104.244.76.17", "104.244.76.18", "104.244.76.19", "104.244.76.20",
"199.249.230.64", "199.249.230.65", "199.249.230.66", "199.249.230.67",
"199.249.230.68", "199.249.230.69", "199.249.230.70", "199.249.230.71",
"199.249.230.72", "199.249.230.73", "199.249.230.74", "199.249.230.75",
"199.249.230.76", "199.249.230.77", "199.249.230.78", "199.249.230.79",
"199.249.230.80", "199.249.230.81", "199.249.230.82", "199.249.230.83",
"199.249.230.84", "199.249.230.85", "199.249.230.86", "199.249.230.87",
"199.249.230.88", "199.249.230.89",
]
def load_tor_ips(tor_file):
"""Load Tor exit node IPs from downloaded file."""
ips = set()
try:
with open(tor_file) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
try:
ipaddress.ip_address(line)
ips.add(line)
except ValueError:
pass
except FileNotFoundError:
pass
return ips
def main():
parser = argparse.ArgumentParser(description="Generate bot_ip.csv")
parser.add_argument("--output", default="bot_ip.csv")
parser.add_argument("--tor-file", help="Path to downloaded Tor exit node list")
args = parser.parse_args()
entries = []
seen = set()
# Add known scanner networks
for bot_name, networks in KNOWN_SCANNERS.items():
for net in networks:
key = net
if key not in seen:
seen.add(key)
entries.append((net, bot_name))
# Add Tor exit nodes
if args.tor_file:
tor_ips = load_tor_ips(args.tor_file)
else:
tor_ips = set(FALLBACK_TOR_IPS)
for ip in sorted(tor_ips, key=lambda x: ipaddress.ip_address(x)):
key = f"{ip}/32"
if key not in seen:
seen.add(key)
entries.append((key, "Tor_Exit_Node"))
with open(args.output, "w") as f:
for net, name in entries:
f.write(f"{net},{name}\n")
print(f"[bot_ip] Generated {len(entries)} entries "
f"({len(tor_ips)} Tor nodes, "
f"{len(entries) - len(tor_ips)} scanner ranges)")
if __name__ == "__main__":
main()