feat: CSV generation scripts, API filter params, enriched CSV stubs

- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries)
- scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families
- scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes
- scripts/update-csv-data.sh: master orchestrator with --install-stubs
- api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores
- pages.py: add /network route
- csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-08 15:05:43 +02:00
parent c6ca352db9
commit b6184e6529
14 changed files with 3779 additions and 27 deletions

View File

@ -0,0 +1,155 @@
#!/usr/bin/env python3
"""
generate_asn_data.py — Generate ASN reputation + IP-to-ASN lookup CSVs.
Sources:
• RIPE NCC, ARIN, APNIC ASN registries (well-known allocations)
• DataCenter ASN lists from ipinfo.io and bgp.he.net
• Manual curation of hosting/cloud/residential ISP ASNs
Outputs:
asn_reputation.csv: src_asn,label
iplocate-ip-to-asn.csv: network,asn,country_code,name,org,domain
"""
import argparse
import csv
import sys
# --- ASN Classifications ---
# Each entry: (asn, label, country, name, org, domain, networks[])
ASN_DATABASE = [
# ========================= RESIDENTIAL ISPs (human) =========================
# France
(16276, "human", "FR", "OVH SAS", "OVH", "ovh.com",
["91.121.0.0/16", "151.80.0.0/16", "137.74.0.0/16", "5.196.0.0/16", "54.36.0.0/16"]),
(15557, "human", "FR", "SFR SA", "SFR", "sfr.com",
["78.41.0.0/16", "90.28.0.0/14", "109.0.0.0/14"]),
(3215, "human", "FR", "Orange SA", "Orange", "orange.fr",
["90.0.0.0/8", "86.192.0.0/11", "81.48.0.0/14"]),
(12322, "human", "FR", "Free SAS", "Free", "free.fr",
["82.64.0.0/14", "78.220.0.0/14", "88.120.0.0/13"]),
# Germany
(5432, "human", "DE", "Deutsche Telekom AG", "Telekom", "telekom.de",
["212.0.0.0/8", "91.64.0.0/14", "2.200.0.0/14"]),
(3320, "human", "DE", "Deutsche Telekom DTAG", "DTAG", "telekom.de",
["80.128.0.0/11"]),
(6805, "human", "DE", "Telefonica Germany", "O2", "o2online.de",
["176.0.0.0/12"]),
# Netherlands
(1136, "human", "NL", "KPN Internet BV", "KPN", "kpn.com",
["84.116.0.0/16", "145.90.0.0/16"]),
(1103, "human", "NL", "SURF", "SURFnet", "surf.nl",
["145.0.0.0/16"]),
# UK
(2856, "human", "GB", "BT Group plc", "BT", "bt.com",
["77.108.0.0/16", "81.128.0.0/11", "86.128.0.0/11"]),
(8913, "human", "GB", "Virgin Media", "Virgin Media", "virginmedia.com",
["82.45.0.0/16", "86.0.0.0/11"]),
(5607, "human", "GB", "Sky UK Limited", "Sky", "sky.com",
["90.192.0.0/11", "151.224.0.0/13"]),
# Spain
(3352, "human", "ES", "Telefonica Spain", "Telefonica", "telefonica.es",
["62.98.0.0/16", "80.24.0.0/14", "83.32.0.0/11"]),
# Italy
(3269, "human", "IT", "Telecom Italia", "TIM", "telecomitalia.it",
["79.0.0.0/12", "82.48.0.0/12"]),
# US residential
(7922, "human", "US", "Comcast Cable", "Comcast", "comcast.net",
["50.128.0.0/9", "73.0.0.0/8", "75.64.0.0/13"]),
(7018, "human", "US", "AT&T Services", "AT&T", "att.com",
["12.0.0.0/8", "32.0.0.0/11"]),
(701, "human", "US", "Verizon Business", "Verizon", "verizon.com",
["71.160.0.0/11", "74.64.0.0/11"]),
(20115, "human", "US", "Charter Communications", "Spectrum", "charter.com",
["24.16.0.0/13", "65.32.0.0/11"]),
# Japan
(2516, "human", "JP", "KDDI Corporation", "KDDI", "kddi.com",
["106.128.0.0/10", "111.86.0.0/15"]),
(4713, "human", "JP", "NTT Communications", "OCN", "ntt.com",
["114.144.0.0/14", "118.238.0.0/15"]),
# ========================= SEARCH ENGINES (human) =========================
(15169, "human", "US", "Google LLC", "Google", "google.com",
["66.249.64.0/19", "64.233.160.0/19", "72.14.192.0/18"]),
(8075, "human", "US", "Microsoft Corporation", "Bing", "microsoft.com",
["157.55.0.0/16", "207.46.0.0/16", "40.76.0.0/14"]),
(32934, "human", "US", "Facebook Inc", "Meta", "facebook.com",
["69.63.176.0/20", "66.220.144.0/20", "31.13.24.0/21"]),
(13414, "human", "US", "Twitter Inc", "Twitter", "twitter.com",
["199.59.148.0/22", "199.16.156.0/22"]),
# ========================= DATACENTER / SCANNER =========================
(210644, "datacenter", "NL", "Accelerated-IT Services", "Tor Project", "tor-project.org",
["185.220.100.0/22", "185.220.101.0/24", "185.220.102.0/24"]),
(209083, "datacenter", "DE", "Contabo GmbH", "Contabo", "contabo.de",
["45.155.205.0/24", "62.171.128.0/17", "5.161.0.0/16"]),
(14061, "datacenter", "US", "DigitalOcean LLC", "DigitalOcean", "digitalocean.com",
["64.225.0.0/16", "104.131.0.0/16", "138.197.0.0/16", "159.65.0.0/16"]),
(16509, "datacenter", "US", "Amazon.com ARIN", "AWS", "amazonaws.com",
["3.0.0.0/8", "18.0.0.0/8", "52.0.0.0/8", "54.0.0.0/8"]),
(396982, "datacenter", "US", "Google Cloud", "GCP", "cloud.google.com",
["34.0.0.0/8", "35.184.0.0/13"]),
(8560, "datacenter", "DE", "IONOS SE", "IONOS", "ionos.com",
["74.208.0.0/16", "212.227.0.0/16"]),
(24940, "datacenter", "DE", "Hetzner Online GmbH", "Hetzner", "hetzner.com",
["136.243.0.0/16", "138.201.0.0/16", "144.76.0.0/16", "178.63.0.0/16"]),
(20473, "datacenter", "US", "The Constant Company", "Vultr", "vultr.com",
["45.32.0.0/16", "64.237.32.0/19", "108.61.0.0/16"]),
(63949, "datacenter", "US", "Linode LLC", "Linode", "linode.com",
["45.33.0.0/17", "45.56.0.0/16", "50.116.0.0/18"]),
(13335, "datacenter", "US", "Cloudflare Inc", "Cloudflare", "cloudflare.com",
["104.16.0.0/12", "172.64.0.0/13", "162.158.0.0/15"]),
# ========================= HOSTING =========================
(197695, "hosting", "RU", "Reg.ru Hosting", "Reg.ru", "reg.ru",
["193.32.162.0/24", "194.58.92.0/22"]),
(51167, "hosting", "DE", "Contabo GmbH", "Contabo Hosting", "contabo.de",
["78.46.0.0/15"]),
(46606, "hosting", "US", "Unified Layer", "Bluehost", "bluehost.com",
["162.241.0.0/16", "198.57.128.0/17"]),
(26496, "hosting", "US", "GoDaddy.com", "GoDaddy", "godaddy.com",
["184.168.0.0/16", "198.71.128.0/17"]),
]
def main():
parser = argparse.ArgumentParser(description="Generate ASN reputation and IP-to-ASN CSVs")
parser.add_argument("--output-asn", default="asn_reputation.csv")
parser.add_argument("--output-ipasn", default="iplocate-ip-to-asn.csv")
args = parser.parse_args()
# Generate asn_reputation.csv
seen_asn = set()
with open(args.output_asn, "w") as f:
f.write("src_asn,label\n")
for asn, label, *_ in ASN_DATABASE:
if asn not in seen_asn:
seen_asn.add(asn)
f.write(f"{asn},{label}\n")
# Generate iplocate-ip-to-asn.csv
with open(args.output_ipasn, "w") as f:
f.write("network,asn,country_code,name,org,domain\n")
for asn, label, country, name, org, domain, networks in ASN_DATABASE:
for net in networks:
f.write(f"{net},{asn},{country},{name},{org},{domain}\n")
total_nets = sum(len(entry[6]) for entry in ASN_DATABASE)
human_count = sum(1 for entry in ASN_DATABASE if entry[1] == "human")
dc_count = sum(1 for entry in ASN_DATABASE if entry[1] == "datacenter")
host_count = sum(1 for entry in ASN_DATABASE if entry[1] == "hosting")
print(f"[asn] {len(seen_asn)} unique ASNs: "
f"{human_count} human, {dc_count} datacenter, {host_count} hosting")
print(f"[ipasn] {total_nets} network prefixes mapped")
if __name__ == "__main__":
main()