- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries) - scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families - scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes - scripts/update-csv-data.sh: master orchestrator with --install-stubs - api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores - pages.py: add /network route - csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
156 lines
6.9 KiB
Python
156 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_asn_data.py — Generate ASN reputation + IP-to-ASN lookup CSVs.
|
|
|
|
Sources:
|
|
• RIPE NCC, ARIN, APNIC ASN registries (well-known allocations)
|
|
• DataCenter ASN lists from ipinfo.io and bgp.he.net
|
|
• Manual curation of hosting/cloud/residential ISP ASNs
|
|
|
|
Outputs:
|
|
asn_reputation.csv: src_asn,label
|
|
iplocate-ip-to-asn.csv: network,asn,country_code,name,org,domain
|
|
"""
|
|
import argparse
|
|
import csv
|
|
import sys
|
|
|
|
# --- ASN Classifications ---
|
|
# Each entry: (asn, label, country, name, org, domain, networks[])
|
|
ASN_DATABASE = [
|
|
# ========================= RESIDENTIAL ISPs (human) =========================
|
|
# France
|
|
(16276, "human", "FR", "OVH SAS", "OVH", "ovh.com",
|
|
["91.121.0.0/16", "151.80.0.0/16", "137.74.0.0/16", "5.196.0.0/16", "54.36.0.0/16"]),
|
|
(15557, "human", "FR", "SFR SA", "SFR", "sfr.com",
|
|
["78.41.0.0/16", "90.28.0.0/14", "109.0.0.0/14"]),
|
|
(3215, "human", "FR", "Orange SA", "Orange", "orange.fr",
|
|
["90.0.0.0/8", "86.192.0.0/11", "81.48.0.0/14"]),
|
|
(12322, "human", "FR", "Free SAS", "Free", "free.fr",
|
|
["82.64.0.0/14", "78.220.0.0/14", "88.120.0.0/13"]),
|
|
|
|
# Germany
|
|
(5432, "human", "DE", "Deutsche Telekom AG", "Telekom", "telekom.de",
|
|
["212.0.0.0/8", "91.64.0.0/14", "2.200.0.0/14"]),
|
|
(3320, "human", "DE", "Deutsche Telekom DTAG", "DTAG", "telekom.de",
|
|
["80.128.0.0/11"]),
|
|
(6805, "human", "DE", "Telefonica Germany", "O2", "o2online.de",
|
|
["176.0.0.0/12"]),
|
|
|
|
# Netherlands
|
|
(1136, "human", "NL", "KPN Internet BV", "KPN", "kpn.com",
|
|
["84.116.0.0/16", "145.90.0.0/16"]),
|
|
(1103, "human", "NL", "SURF", "SURFnet", "surf.nl",
|
|
["145.0.0.0/16"]),
|
|
|
|
# UK
|
|
(2856, "human", "GB", "BT Group plc", "BT", "bt.com",
|
|
["77.108.0.0/16", "81.128.0.0/11", "86.128.0.0/11"]),
|
|
(8913, "human", "GB", "Virgin Media", "Virgin Media", "virginmedia.com",
|
|
["82.45.0.0/16", "86.0.0.0/11"]),
|
|
(5607, "human", "GB", "Sky UK Limited", "Sky", "sky.com",
|
|
["90.192.0.0/11", "151.224.0.0/13"]),
|
|
|
|
# Spain
|
|
(3352, "human", "ES", "Telefonica Spain", "Telefonica", "telefonica.es",
|
|
["62.98.0.0/16", "80.24.0.0/14", "83.32.0.0/11"]),
|
|
|
|
# Italy
|
|
(3269, "human", "IT", "Telecom Italia", "TIM", "telecomitalia.it",
|
|
["79.0.0.0/12", "82.48.0.0/12"]),
|
|
|
|
# US residential
|
|
(7922, "human", "US", "Comcast Cable", "Comcast", "comcast.net",
|
|
["50.128.0.0/9", "73.0.0.0/8", "75.64.0.0/13"]),
|
|
(7018, "human", "US", "AT&T Services", "AT&T", "att.com",
|
|
["12.0.0.0/8", "32.0.0.0/11"]),
|
|
(701, "human", "US", "Verizon Business", "Verizon", "verizon.com",
|
|
["71.160.0.0/11", "74.64.0.0/11"]),
|
|
(20115, "human", "US", "Charter Communications", "Spectrum", "charter.com",
|
|
["24.16.0.0/13", "65.32.0.0/11"]),
|
|
|
|
# Japan
|
|
(2516, "human", "JP", "KDDI Corporation", "KDDI", "kddi.com",
|
|
["106.128.0.0/10", "111.86.0.0/15"]),
|
|
(4713, "human", "JP", "NTT Communications", "OCN", "ntt.com",
|
|
["114.144.0.0/14", "118.238.0.0/15"]),
|
|
|
|
# ========================= SEARCH ENGINES (human) =========================
|
|
(15169, "human", "US", "Google LLC", "Google", "google.com",
|
|
["66.249.64.0/19", "64.233.160.0/19", "72.14.192.0/18"]),
|
|
(8075, "human", "US", "Microsoft Corporation", "Bing", "microsoft.com",
|
|
["157.55.0.0/16", "207.46.0.0/16", "40.76.0.0/14"]),
|
|
(32934, "human", "US", "Facebook Inc", "Meta", "facebook.com",
|
|
["69.63.176.0/20", "66.220.144.0/20", "31.13.24.0/21"]),
|
|
(13414, "human", "US", "Twitter Inc", "Twitter", "twitter.com",
|
|
["199.59.148.0/22", "199.16.156.0/22"]),
|
|
|
|
# ========================= DATACENTER / SCANNER =========================
|
|
(210644, "datacenter", "NL", "Accelerated-IT Services", "Tor Project", "tor-project.org",
|
|
["185.220.100.0/22", "185.220.101.0/24", "185.220.102.0/24"]),
|
|
(209083, "datacenter", "DE", "Contabo GmbH", "Contabo", "contabo.de",
|
|
["45.155.205.0/24", "62.171.128.0/17", "5.161.0.0/16"]),
|
|
(14061, "datacenter", "US", "DigitalOcean LLC", "DigitalOcean", "digitalocean.com",
|
|
["64.225.0.0/16", "104.131.0.0/16", "138.197.0.0/16", "159.65.0.0/16"]),
|
|
(16509, "datacenter", "US", "Amazon.com ARIN", "AWS", "amazonaws.com",
|
|
["3.0.0.0/8", "18.0.0.0/8", "52.0.0.0/8", "54.0.0.0/8"]),
|
|
(396982, "datacenter", "US", "Google Cloud", "GCP", "cloud.google.com",
|
|
["34.0.0.0/8", "35.184.0.0/13"]),
|
|
(8560, "datacenter", "DE", "IONOS SE", "IONOS", "ionos.com",
|
|
["74.208.0.0/16", "212.227.0.0/16"]),
|
|
(24940, "datacenter", "DE", "Hetzner Online GmbH", "Hetzner", "hetzner.com",
|
|
["136.243.0.0/16", "138.201.0.0/16", "144.76.0.0/16", "178.63.0.0/16"]),
|
|
(20473, "datacenter", "US", "The Constant Company", "Vultr", "vultr.com",
|
|
["45.32.0.0/16", "64.237.32.0/19", "108.61.0.0/16"]),
|
|
(63949, "datacenter", "US", "Linode LLC", "Linode", "linode.com",
|
|
["45.33.0.0/17", "45.56.0.0/16", "50.116.0.0/18"]),
|
|
(13335, "datacenter", "US", "Cloudflare Inc", "Cloudflare", "cloudflare.com",
|
|
["104.16.0.0/12", "172.64.0.0/13", "162.158.0.0/15"]),
|
|
|
|
# ========================= HOSTING =========================
|
|
(197695, "hosting", "RU", "Reg.ru Hosting", "Reg.ru", "reg.ru",
|
|
["193.32.162.0/24", "194.58.92.0/22"]),
|
|
(51167, "hosting", "DE", "Contabo GmbH", "Contabo Hosting", "contabo.de",
|
|
["78.46.0.0/15"]),
|
|
(46606, "hosting", "US", "Unified Layer", "Bluehost", "bluehost.com",
|
|
["162.241.0.0/16", "198.57.128.0/17"]),
|
|
(26496, "hosting", "US", "GoDaddy.com", "GoDaddy", "godaddy.com",
|
|
["184.168.0.0/16", "198.71.128.0/17"]),
|
|
]
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate ASN reputation and IP-to-ASN CSVs")
|
|
parser.add_argument("--output-asn", default="asn_reputation.csv")
|
|
parser.add_argument("--output-ipasn", default="iplocate-ip-to-asn.csv")
|
|
args = parser.parse_args()
|
|
|
|
# Generate asn_reputation.csv
|
|
seen_asn = set()
|
|
with open(args.output_asn, "w") as f:
|
|
f.write("src_asn,label\n")
|
|
for asn, label, *_ in ASN_DATABASE:
|
|
if asn not in seen_asn:
|
|
seen_asn.add(asn)
|
|
f.write(f"{asn},{label}\n")
|
|
|
|
# Generate iplocate-ip-to-asn.csv
|
|
with open(args.output_ipasn, "w") as f:
|
|
f.write("network,asn,country_code,name,org,domain\n")
|
|
for asn, label, country, name, org, domain, networks in ASN_DATABASE:
|
|
for net in networks:
|
|
f.write(f"{net},{asn},{country},{name},{org},{domain}\n")
|
|
|
|
total_nets = sum(len(entry[6]) for entry in ASN_DATABASE)
|
|
human_count = sum(1 for entry in ASN_DATABASE if entry[1] == "human")
|
|
dc_count = sum(1 for entry in ASN_DATABASE if entry[1] == "datacenter")
|
|
host_count = sum(1 for entry in ASN_DATABASE if entry[1] == "hosting")
|
|
|
|
print(f"[asn] {len(seen_asn)} unique ASNs: "
|
|
f"{human_count} human, {dc_count} datacenter, {host_count} hosting")
|
|
print(f"[ipasn] {total_nets} network prefixes mapped")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|