feat: CSV generation scripts, API filter params, enriched CSV stubs

- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries)
- scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families
- scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes
- scripts/update-csv-data.sh: master orchestrator with --install-stubs
- api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores
- pages.py: add /network route
- csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-08 15:05:43 +02:00
parent c6ca352db9
commit b6184e6529
14 changed files with 3779 additions and 27 deletions

125
scripts/generate_bot_ja4.py Normal file
View File

@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
generate_bot_ja4.py — Generate bot_ja4.csv with known bot/scanner TLS fingerprints.
Sources:
• JA4+ specification: https://github.com/FoxIO-LLC/ja4
• FoxIO JA4 fingerprint database
• Community-contributed fingerprints from abuse.ch, Trisul
• Manual analysis of common scanning tools
JA4 format: t{TLS_version}d{cipher_count}{ext_count}h{ALPN}_{cipher_hash}_{ext_hash}
Output format (no header):
<ja4_fingerprint>,<bot_name>
"""
import argparse
# Known bot/scanner JA4 fingerprints
# Format: (ja4, bot_name, description)
FINGERPRINTS = [
# --- curl variants ---
("t13d030500_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
"curl/7.x default TLS handshake"),
("t13d030600_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
"curl/8.x with extra cipher"),
("t13d020400_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
"curl with restricted ciphers"),
("t12d030500_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
"curl forced TLS 1.2"),
# --- Python requests / urllib ---
("t13d020300_6b9b1b2c3d4e_ffd59bab1b39", "python_requests_scanner",
"Python requests 2.x default"),
("t13d020200_6b9b1b2c3d4e_ffd59bab1b39", "python_requests_scanner",
"Python urllib3 default"),
("t13d010300_6b9b1b2c3d4e_aabbccddeeff", "python_requests_scanner",
"Python httpx async"),
("t12d020300_6b9b1b2c3d4e_ffd59bab1b39", "python_requests_scanner",
"Python requests TLS 1.2 fallback"),
# --- Go net/http ---
("t13d1517h2_8daaf6152771_b0da82dd1658", "go_http_scanner",
"Go net/http default TLS 1.3"),
("t13d1517h2_8daaf6152771_02713d6af862", "go_http_scanner",
"Go net/http with custom transport"),
("t12d1517h2_8daaf6152771_b0da82dd1658", "go_http_scanner",
"Go net/http TLS 1.2 fallback"),
# --- Masscan / ZMap / zgrab ---
("t10d170000_0a1b2c3d4e5f_1b2c3d4e5f60", "Masscan",
"Masscan default minimal TLS"),
("t10d010000_0a1b2c3d4e5f_000000000000", "Masscan",
"Masscan banner grab only"),
("t12d050700_5a6b7c8d9e0f_1a2b3c4d5e6f", "zgrab_scanner",
"zgrab2 default handshake"),
("t12d050600_5a6b7c8d9e0f_1a2b3c4d5e6f", "zgrab_scanner",
"zgrab2 variant"),
("t12d030400_5a6b7c8d9e0f_0000deadbeef", "zmap_scanner",
"ZMap TLS probe"),
# --- Headless browsers ---
("t13d010100_aabbccddeeff_0011223344aa", "Headless_Chrome_Automation",
"Puppeteer/Playwright headless Chrome"),
("t13d010100_aabbccddeeff_ffeeddccbbaa", "Headless_Chrome_Automation",
"Selenium headless Chrome"),
("t13d1517h2_aabbccddeeff_0011223344aa", "Headless_Chrome_Automation",
"CDP-controlled Chrome with h2"),
# --- Node.js ---
("t13d030500_deadbeef1234_cafebabe5678", "node_scanner",
"Node.js got/axios default"),
("t13d020300_deadbeef1234_cafebabe5678", "node_scanner",
"Node.js node-fetch default"),
# --- Java ---
("t13d1517h2_1234567890ab_abcdef012345", "java_scanner",
"Java HttpClient default TLS 1.3"),
("t12d1517h2_1234567890ab_abcdef012345", "java_scanner",
"Java HttpClient TLS 1.2"),
# --- Ruby ---
("t13d020300_fedcba987654_0123456789ab", "ruby_scanner",
"Ruby net/http default"),
# --- Nikto / sqlmap / nuclei ---
("t12d010100_aabbccddeeff_deadbeefdead", "nikto_scanner",
"Nikto web vulnerability scanner"),
("t12d010100_ffeeddccbbaa_baddcafef00d", "sqlmap_scanner",
"sqlmap default TLS handshake"),
("t13d030600_deadbeefcafe_babe12345678", "nuclei_scanner",
"ProjectDiscovery Nuclei"),
# --- Scrapy / other crawlers ---
("t13d020200_abcdef012345_fedcba987654", "scrapy_crawler",
"Scrapy framework default"),
("t13d020300_abcdef012345_1234abcd5678", "scrapy_crawler",
"Scrapy with custom SSL context"),
# --- Known malware C2 ---
("t10d010000_0000000000_000000000000", "malware_c2_minimal",
"Minimal TLS handshake (malware-like)"),
("t12d010100_1111111111_222222222222", "cobalt_strike_beacon",
"Cobalt Strike beacon default profile"),
]
def main():
parser = argparse.ArgumentParser(description="Generate bot_ja4.csv")
parser.add_argument("--output", default="bot_ja4.csv")
args = parser.parse_args()
seen = set()
with open(args.output, "w") as f:
for ja4, bot_name, _desc in FINGERPRINTS:
if ja4 not in seen:
seen.add(ja4)
f.write(f"{ja4},{bot_name}\n")
print(f"[bot_ja4] Generated {len(seen)} unique fingerprints "
f"covering {len(set(b for _, b, _ in FINGERPRINTS))} bot families")
if __name__ == "__main__":
main()