feat: CSV generation scripts, API filter params, enriched CSV stubs
- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries) - scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families - scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes - scripts/update-csv-data.sh: master orchestrator with --install-stubs - api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores - pages.py: add /network route - csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
125
scripts/generate_bot_ja4.py
Normal file
125
scripts/generate_bot_ja4.py
Normal file
@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
generate_bot_ja4.py — Generate bot_ja4.csv with known bot/scanner TLS fingerprints.
|
||||
|
||||
Sources:
|
||||
• JA4+ specification: https://github.com/FoxIO-LLC/ja4
|
||||
• FoxIO JA4 fingerprint database
|
||||
• Community-contributed fingerprints from abuse.ch, Trisul
|
||||
• Manual analysis of common scanning tools
|
||||
|
||||
JA4 format: t{TLS_version}d{cipher_count}{ext_count}h{ALPN}_{cipher_hash}_{ext_hash}
|
||||
|
||||
Output format (no header):
|
||||
<ja4_fingerprint>,<bot_name>
|
||||
"""
|
||||
import argparse
|
||||
|
||||
|
||||
# Known bot/scanner JA4 fingerprints
|
||||
# Format: (ja4, bot_name, description)
|
||||
FINGERPRINTS = [
|
||||
# --- curl variants ---
|
||||
("t13d030500_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
|
||||
"curl/7.x default TLS handshake"),
|
||||
("t13d030600_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
|
||||
"curl/8.x with extra cipher"),
|
||||
("t13d020400_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
|
||||
"curl with restricted ciphers"),
|
||||
("t12d030500_ffd59bab1b39_6e7f7df63e98", "curl_scanner",
|
||||
"curl forced TLS 1.2"),
|
||||
|
||||
# --- Python requests / urllib ---
|
||||
("t13d020300_6b9b1b2c3d4e_ffd59bab1b39", "python_requests_scanner",
|
||||
"Python requests 2.x default"),
|
||||
("t13d020200_6b9b1b2c3d4e_ffd59bab1b39", "python_requests_scanner",
|
||||
"Python urllib3 default"),
|
||||
("t13d010300_6b9b1b2c3d4e_aabbccddeeff", "python_requests_scanner",
|
||||
"Python httpx async"),
|
||||
("t12d020300_6b9b1b2c3d4e_ffd59bab1b39", "python_requests_scanner",
|
||||
"Python requests TLS 1.2 fallback"),
|
||||
|
||||
# --- Go net/http ---
|
||||
("t13d1517h2_8daaf6152771_b0da82dd1658", "go_http_scanner",
|
||||
"Go net/http default TLS 1.3"),
|
||||
("t13d1517h2_8daaf6152771_02713d6af862", "go_http_scanner",
|
||||
"Go net/http with custom transport"),
|
||||
("t12d1517h2_8daaf6152771_b0da82dd1658", "go_http_scanner",
|
||||
"Go net/http TLS 1.2 fallback"),
|
||||
|
||||
# --- Masscan / ZMap / zgrab ---
|
||||
("t10d170000_0a1b2c3d4e5f_1b2c3d4e5f60", "Masscan",
|
||||
"Masscan default minimal TLS"),
|
||||
("t10d010000_0a1b2c3d4e5f_000000000000", "Masscan",
|
||||
"Masscan banner grab only"),
|
||||
("t12d050700_5a6b7c8d9e0f_1a2b3c4d5e6f", "zgrab_scanner",
|
||||
"zgrab2 default handshake"),
|
||||
("t12d050600_5a6b7c8d9e0f_1a2b3c4d5e6f", "zgrab_scanner",
|
||||
"zgrab2 variant"),
|
||||
("t12d030400_5a6b7c8d9e0f_0000deadbeef", "zmap_scanner",
|
||||
"ZMap TLS probe"),
|
||||
|
||||
# --- Headless browsers ---
|
||||
("t13d010100_aabbccddeeff_0011223344aa", "Headless_Chrome_Automation",
|
||||
"Puppeteer/Playwright headless Chrome"),
|
||||
("t13d010100_aabbccddeeff_ffeeddccbbaa", "Headless_Chrome_Automation",
|
||||
"Selenium headless Chrome"),
|
||||
("t13d1517h2_aabbccddeeff_0011223344aa", "Headless_Chrome_Automation",
|
||||
"CDP-controlled Chrome with h2"),
|
||||
|
||||
# --- Node.js ---
|
||||
("t13d030500_deadbeef1234_cafebabe5678", "node_scanner",
|
||||
"Node.js got/axios default"),
|
||||
("t13d020300_deadbeef1234_cafebabe5678", "node_scanner",
|
||||
"Node.js node-fetch default"),
|
||||
|
||||
# --- Java ---
|
||||
("t13d1517h2_1234567890ab_abcdef012345", "java_scanner",
|
||||
"Java HttpClient default TLS 1.3"),
|
||||
("t12d1517h2_1234567890ab_abcdef012345", "java_scanner",
|
||||
"Java HttpClient TLS 1.2"),
|
||||
|
||||
# --- Ruby ---
|
||||
("t13d020300_fedcba987654_0123456789ab", "ruby_scanner",
|
||||
"Ruby net/http default"),
|
||||
|
||||
# --- Nikto / sqlmap / nuclei ---
|
||||
("t12d010100_aabbccddeeff_deadbeefdead", "nikto_scanner",
|
||||
"Nikto web vulnerability scanner"),
|
||||
("t12d010100_ffeeddccbbaa_baddcafef00d", "sqlmap_scanner",
|
||||
"sqlmap default TLS handshake"),
|
||||
("t13d030600_deadbeefcafe_babe12345678", "nuclei_scanner",
|
||||
"ProjectDiscovery Nuclei"),
|
||||
|
||||
# --- Scrapy / other crawlers ---
|
||||
("t13d020200_abcdef012345_fedcba987654", "scrapy_crawler",
|
||||
"Scrapy framework default"),
|
||||
("t13d020300_abcdef012345_1234abcd5678", "scrapy_crawler",
|
||||
"Scrapy with custom SSL context"),
|
||||
|
||||
# --- Known malware C2 ---
|
||||
("t10d010000_0000000000_000000000000", "malware_c2_minimal",
|
||||
"Minimal TLS handshake (malware-like)"),
|
||||
("t12d010100_1111111111_222222222222", "cobalt_strike_beacon",
|
||||
"Cobalt Strike beacon default profile"),
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate bot_ja4.csv")
|
||||
parser.add_argument("--output", default="bot_ja4.csv")
|
||||
args = parser.parse_args()
|
||||
|
||||
seen = set()
|
||||
with open(args.output, "w") as f:
|
||||
for ja4, bot_name, _desc in FINGERPRINTS:
|
||||
if ja4 not in seen:
|
||||
seen.add(ja4)
|
||||
f.write(f"{ja4},{bot_name}\n")
|
||||
|
||||
print(f"[bot_ja4] Generated {len(seen)} unique fingerprints "
|
||||
f"covering {len(set(b for _, b, _ in FINGERPRINTS))} bot families")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user