feat: CSV generation scripts, API filter params, enriched CSV stubs
- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries) - scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families - scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes - scripts/update-csv-data.sh: master orchestrator with --install-stubs - api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores - pages.py: add /network route - csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
155
scripts/generate_asn_data.py
Normal file
155
scripts/generate_asn_data.py
Normal file
@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
generate_asn_data.py — Generate ASN reputation + IP-to-ASN lookup CSVs.
|
||||
|
||||
Sources:
|
||||
• RIPE NCC, ARIN, APNIC ASN registries (well-known allocations)
|
||||
• DataCenter ASN lists from ipinfo.io and bgp.he.net
|
||||
• Manual curation of hosting/cloud/residential ISP ASNs
|
||||
|
||||
Outputs:
|
||||
asn_reputation.csv: src_asn,label
|
||||
iplocate-ip-to-asn.csv: network,asn,country_code,name,org,domain
|
||||
"""
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
|
||||
# --- ASN Classifications ---
|
||||
# Each entry: (asn, label, country, name, org, domain, networks[])
|
||||
ASN_DATABASE = [
|
||||
# ========================= RESIDENTIAL ISPs (human) =========================
|
||||
# France
|
||||
(16276, "human", "FR", "OVH SAS", "OVH", "ovh.com",
|
||||
["91.121.0.0/16", "151.80.0.0/16", "137.74.0.0/16", "5.196.0.0/16", "54.36.0.0/16"]),
|
||||
(15557, "human", "FR", "SFR SA", "SFR", "sfr.com",
|
||||
["78.41.0.0/16", "90.28.0.0/14", "109.0.0.0/14"]),
|
||||
(3215, "human", "FR", "Orange SA", "Orange", "orange.fr",
|
||||
["90.0.0.0/8", "86.192.0.0/11", "81.48.0.0/14"]),
|
||||
(12322, "human", "FR", "Free SAS", "Free", "free.fr",
|
||||
["82.64.0.0/14", "78.220.0.0/14", "88.120.0.0/13"]),
|
||||
|
||||
# Germany
|
||||
(5432, "human", "DE", "Deutsche Telekom AG", "Telekom", "telekom.de",
|
||||
["212.0.0.0/8", "91.64.0.0/14", "2.200.0.0/14"]),
|
||||
(3320, "human", "DE", "Deutsche Telekom DTAG", "DTAG", "telekom.de",
|
||||
["80.128.0.0/11"]),
|
||||
(6805, "human", "DE", "Telefonica Germany", "O2", "o2online.de",
|
||||
["176.0.0.0/12"]),
|
||||
|
||||
# Netherlands
|
||||
(1136, "human", "NL", "KPN Internet BV", "KPN", "kpn.com",
|
||||
["84.116.0.0/16", "145.90.0.0/16"]),
|
||||
(1103, "human", "NL", "SURF", "SURFnet", "surf.nl",
|
||||
["145.0.0.0/16"]),
|
||||
|
||||
# UK
|
||||
(2856, "human", "GB", "BT Group plc", "BT", "bt.com",
|
||||
["77.108.0.0/16", "81.128.0.0/11", "86.128.0.0/11"]),
|
||||
(8913, "human", "GB", "Virgin Media", "Virgin Media", "virginmedia.com",
|
||||
["82.45.0.0/16", "86.0.0.0/11"]),
|
||||
(5607, "human", "GB", "Sky UK Limited", "Sky", "sky.com",
|
||||
["90.192.0.0/11", "151.224.0.0/13"]),
|
||||
|
||||
# Spain
|
||||
(3352, "human", "ES", "Telefonica Spain", "Telefonica", "telefonica.es",
|
||||
["62.98.0.0/16", "80.24.0.0/14", "83.32.0.0/11"]),
|
||||
|
||||
# Italy
|
||||
(3269, "human", "IT", "Telecom Italia", "TIM", "telecomitalia.it",
|
||||
["79.0.0.0/12", "82.48.0.0/12"]),
|
||||
|
||||
# US residential
|
||||
(7922, "human", "US", "Comcast Cable", "Comcast", "comcast.net",
|
||||
["50.128.0.0/9", "73.0.0.0/8", "75.64.0.0/13"]),
|
||||
(7018, "human", "US", "AT&T Services", "AT&T", "att.com",
|
||||
["12.0.0.0/8", "32.0.0.0/11"]),
|
||||
(701, "human", "US", "Verizon Business", "Verizon", "verizon.com",
|
||||
["71.160.0.0/11", "74.64.0.0/11"]),
|
||||
(20115, "human", "US", "Charter Communications", "Spectrum", "charter.com",
|
||||
["24.16.0.0/13", "65.32.0.0/11"]),
|
||||
|
||||
# Japan
|
||||
(2516, "human", "JP", "KDDI Corporation", "KDDI", "kddi.com",
|
||||
["106.128.0.0/10", "111.86.0.0/15"]),
|
||||
(4713, "human", "JP", "NTT Communications", "OCN", "ntt.com",
|
||||
["114.144.0.0/14", "118.238.0.0/15"]),
|
||||
|
||||
# ========================= SEARCH ENGINES (human) =========================
|
||||
(15169, "human", "US", "Google LLC", "Google", "google.com",
|
||||
["66.249.64.0/19", "64.233.160.0/19", "72.14.192.0/18"]),
|
||||
(8075, "human", "US", "Microsoft Corporation", "Bing", "microsoft.com",
|
||||
["157.55.0.0/16", "207.46.0.0/16", "40.76.0.0/14"]),
|
||||
(32934, "human", "US", "Facebook Inc", "Meta", "facebook.com",
|
||||
["69.63.176.0/20", "66.220.144.0/20", "31.13.24.0/21"]),
|
||||
(13414, "human", "US", "Twitter Inc", "Twitter", "twitter.com",
|
||||
["199.59.148.0/22", "199.16.156.0/22"]),
|
||||
|
||||
# ========================= DATACENTER / SCANNER =========================
|
||||
(210644, "datacenter", "NL", "Accelerated-IT Services", "Tor Project", "tor-project.org",
|
||||
["185.220.100.0/22", "185.220.101.0/24", "185.220.102.0/24"]),
|
||||
(209083, "datacenter", "DE", "Contabo GmbH", "Contabo", "contabo.de",
|
||||
["45.155.205.0/24", "62.171.128.0/17", "5.161.0.0/16"]),
|
||||
(14061, "datacenter", "US", "DigitalOcean LLC", "DigitalOcean", "digitalocean.com",
|
||||
["64.225.0.0/16", "104.131.0.0/16", "138.197.0.0/16", "159.65.0.0/16"]),
|
||||
(16509, "datacenter", "US", "Amazon.com ARIN", "AWS", "amazonaws.com",
|
||||
["3.0.0.0/8", "18.0.0.0/8", "52.0.0.0/8", "54.0.0.0/8"]),
|
||||
(396982, "datacenter", "US", "Google Cloud", "GCP", "cloud.google.com",
|
||||
["34.0.0.0/8", "35.184.0.0/13"]),
|
||||
(8560, "datacenter", "DE", "IONOS SE", "IONOS", "ionos.com",
|
||||
["74.208.0.0/16", "212.227.0.0/16"]),
|
||||
(24940, "datacenter", "DE", "Hetzner Online GmbH", "Hetzner", "hetzner.com",
|
||||
["136.243.0.0/16", "138.201.0.0/16", "144.76.0.0/16", "178.63.0.0/16"]),
|
||||
(20473, "datacenter", "US", "The Constant Company", "Vultr", "vultr.com",
|
||||
["45.32.0.0/16", "64.237.32.0/19", "108.61.0.0/16"]),
|
||||
(63949, "datacenter", "US", "Linode LLC", "Linode", "linode.com",
|
||||
["45.33.0.0/17", "45.56.0.0/16", "50.116.0.0/18"]),
|
||||
(13335, "datacenter", "US", "Cloudflare Inc", "Cloudflare", "cloudflare.com",
|
||||
["104.16.0.0/12", "172.64.0.0/13", "162.158.0.0/15"]),
|
||||
|
||||
# ========================= HOSTING =========================
|
||||
(197695, "hosting", "RU", "Reg.ru Hosting", "Reg.ru", "reg.ru",
|
||||
["193.32.162.0/24", "194.58.92.0/22"]),
|
||||
(51167, "hosting", "DE", "Contabo GmbH", "Contabo Hosting", "contabo.de",
|
||||
["78.46.0.0/15"]),
|
||||
(46606, "hosting", "US", "Unified Layer", "Bluehost", "bluehost.com",
|
||||
["162.241.0.0/16", "198.57.128.0/17"]),
|
||||
(26496, "hosting", "US", "GoDaddy.com", "GoDaddy", "godaddy.com",
|
||||
["184.168.0.0/16", "198.71.128.0/17"]),
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate ASN reputation and IP-to-ASN CSVs")
|
||||
parser.add_argument("--output-asn", default="asn_reputation.csv")
|
||||
parser.add_argument("--output-ipasn", default="iplocate-ip-to-asn.csv")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Generate asn_reputation.csv
|
||||
seen_asn = set()
|
||||
with open(args.output_asn, "w") as f:
|
||||
f.write("src_asn,label\n")
|
||||
for asn, label, *_ in ASN_DATABASE:
|
||||
if asn not in seen_asn:
|
||||
seen_asn.add(asn)
|
||||
f.write(f"{asn},{label}\n")
|
||||
|
||||
# Generate iplocate-ip-to-asn.csv
|
||||
with open(args.output_ipasn, "w") as f:
|
||||
f.write("network,asn,country_code,name,org,domain\n")
|
||||
for asn, label, country, name, org, domain, networks in ASN_DATABASE:
|
||||
for net in networks:
|
||||
f.write(f"{net},{asn},{country},{name},{org},{domain}\n")
|
||||
|
||||
total_nets = sum(len(entry[6]) for entry in ASN_DATABASE)
|
||||
human_count = sum(1 for entry in ASN_DATABASE if entry[1] == "human")
|
||||
dc_count = sum(1 for entry in ASN_DATABASE if entry[1] == "datacenter")
|
||||
host_count = sum(1 for entry in ASN_DATABASE if entry[1] == "hosting")
|
||||
|
||||
print(f"[asn] {len(seen_asn)} unique ASNs: "
|
||||
f"{human_count} human, {dc_count} datacenter, {host_count} hosting")
|
||||
print(f"[ipasn] {total_nets} network prefixes mapped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user