feat: browser JA4 detection, Anubis bot rules, worldwide ASN data
- Add generate_browser_ja4.py: 1,186 browser JA4 fingerprints from FoxIO + ja4db.com covering 11 families (Chromium, Firefox, Safari, Edge, Tor, Opera, Vivaldi...) - Rewrite generate_bot_ip.py: Anubis YAML rules (Google, Bing, Apple, DuckDuck, OpenAI, Perplexity bots) + Tor exit nodes + cloud scanner IPs (3,555 entries) - Rewrite generate_asn_data.py: worldwide iptoasn.com data (78,049 ASNs, 714K CIDRs) - Add dict_browser_ja4 ClickHouse dictionary + browser_family in AI features views - Add /api/browsers dashboard endpoint - Fix CSV quoting for fields containing commas (User-Agent strings) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
251
scripts/generate_browser_ja4.py
Normal file
251
scripts/generate_browser_ja4.py
Normal file
@ -0,0 +1,251 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
generate_browser_ja4.py — Generate browser_ja4.csv with known browser TLS fingerprints.
|
||||
|
||||
Sources:
|
||||
• FoxIO official ja4plus-mapping.csv:
|
||||
https://github.com/FoxIO-LLC/ja4/blob/main/ja4plus-mapping.csv
|
||||
• ja4db.com public API: https://ja4db.com/api/read/
|
||||
• JA4+ specification: https://github.com/FoxIO-LLC/ja4
|
||||
|
||||
JA4 fingerprints identify TLS libraries, not individual browser versions.
|
||||
All Chromium-based browsers (Chrome, Edge, Brave, Opera, Vivaldi, Samsung Internet)
|
||||
share BoringSSL and produce identical JA4 fingerprints.
|
||||
Firefox uses NSS, Safari uses Apple SecureTransport/Network.framework.
|
||||
|
||||
Output format (no header):
|
||||
<ja4_fingerprint>,<browser_family>,<tls_library>,<context>
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
|
||||
# =============================================================================
|
||||
# Curated browser JA4 fingerprints from FoxIO official mapping
|
||||
# Format: (ja4, browser_family, tls_library, context)
|
||||
# =============================================================================
|
||||
BROWSER_FINGERPRINTS = [
|
||||
# --- Chromium (Chrome, Edge, Brave, Opera, Vivaldi, Samsung Internet) ---
|
||||
# BoringSSL — all Chromium-based browsers produce identical JA4
|
||||
("t13d1516h2_8daaf6152771_02713d6af862", "Chromium",
|
||||
"BoringSSL", "TCP fresh connection"),
|
||||
("t13d1517h2_8daaf6152771_b0da82dd1658", "Chromium",
|
||||
"BoringSSL", "TCP with PSK extension"),
|
||||
("t13d1517h2_8daaf6152771_b1ff8ab2d16f", "Chromium",
|
||||
"BoringSSL", "TCP alt extension set"),
|
||||
("t13i1515h2_8daaf6152771_02713d6af862", "Chromium",
|
||||
"BoringSSL", "TCP to IP (no SNI)"),
|
||||
("t13i1516h2_8daaf6152771_b0da82dd1658", "Chromium",
|
||||
"BoringSSL", "TCP to IP with PSK"),
|
||||
("t13i1516h2_8daaf6152771_b1ff8ab2d16f", "Chromium",
|
||||
"BoringSSL", "TCP to IP alt ext"),
|
||||
("q13d0312h3_55b375c5d22e_06cda9e17597", "Chromium",
|
||||
"BoringSSL", "QUIC HTTP/3 to domain"),
|
||||
("q13i0311h3_55b375c5d22e_06cda9e17597", "Chromium",
|
||||
"BoringSSL", "QUIC HTTP/3 to IP"),
|
||||
|
||||
# --- Mozilla Firefox ---
|
||||
# NSS library
|
||||
("t13d1715h2_5b57614c22b0_7121afd63204", "Firefox",
|
||||
"NSS", "TCP to domain"),
|
||||
("t13i1714h2_5b57614c22b0_7121afd63204", "Firefox",
|
||||
"NSS", "TCP to IP (no SNI)"),
|
||||
# Firefox with different extension counts (minor NSS updates)
|
||||
("t13d1715h2_5b57614c22b0_3c5d18be5765", "Firefox",
|
||||
"NSS", "TCP domain (NSS variant)"),
|
||||
("t13d1716h2_5b57614c22b0_7121afd63204", "Firefox",
|
||||
"NSS", "TCP domain (16 ext)"),
|
||||
|
||||
# --- Apple Safari (macOS + iOS) ---
|
||||
# Apple SecureTransport / Network.framework
|
||||
("t13d2014h2_a09f3c656075_14788d8d241b", "Safari",
|
||||
"SecureTransport", "TCP to domain"),
|
||||
("t13i2013h2_a09f3c656075_14788d8d241b", "Safari",
|
||||
"SecureTransport", "TCP to IP (no SNI)"),
|
||||
# Safari variants with different extension hashes
|
||||
("t13d1714h2_5b57614c22b0_14788d8d241b", "Safari",
|
||||
"SecureTransport", "Safari/iOS via WebKit (hybrid)"),
|
||||
|
||||
# --- Chrome on iOS ---
|
||||
# Uses Apple's TLS stack, not BoringSSL (Apple enforces this on iOS)
|
||||
("t13d1714h2_5b57614c22b0_14788d8d241b", "Chrome_iOS",
|
||||
"SecureTransport", "Chrome on iOS (Apple TLS)"),
|
||||
|
||||
# --- Tor Browser ---
|
||||
# Based on Firefox ESR but with a hardened NSS configuration
|
||||
("t13d1614h2_5b57614c22b0_7121afd63204", "Tor_Browser",
|
||||
"NSS", "TCP (hardened Firefox ESR)"),
|
||||
|
||||
# --- Common mobile browser patterns ---
|
||||
# Android WebView uses BoringSSL (same as Chromium)
|
||||
("t13d1516h2_8daaf6152771_02713d6af862", "Android_WebView",
|
||||
"BoringSSL", "Android WebView (same as Chromium)"),
|
||||
|
||||
# --- Older browser versions (TLS 1.2 fallback) ---
|
||||
("t12d1516h2_8daaf6152771_02713d6af862", "Chromium_Legacy",
|
||||
"BoringSSL", "Chromium TLS 1.2 only (old/restricted)"),
|
||||
("t12d1715h2_5b57614c22b0_7121afd63204", "Firefox_Legacy",
|
||||
"NSS", "Firefox TLS 1.2 only (old)"),
|
||||
("t12d2014h2_a09f3c656075_14788d8d241b", "Safari_Legacy",
|
||||
"SecureTransport", "Safari TLS 1.2 only (old)"),
|
||||
]
|
||||
|
||||
# Unique cipher hashes that identify TLS libraries
|
||||
CIPHER_HASH_FAMILIES = {
|
||||
"8daaf6152771": "Chromium/BoringSSL",
|
||||
"55b375c5d22e": "Chromium/BoringSSL (QUIC)",
|
||||
"5b57614c22b0": "Firefox/NSS",
|
||||
"a09f3c656075": "Safari/SecureTransport",
|
||||
}
|
||||
|
||||
JA4DB_URL = "https://ja4db.com/api/read/"
|
||||
FOXIO_URL = ("https://raw.githubusercontent.com/FoxIO-LLC/ja4/"
|
||||
"main/ja4plus-mapping.csv")
|
||||
|
||||
# Valid JA4 fingerprint: protocol prefix + hex hashes separated by underscores
|
||||
# Example: t13d1516h2_8daaf6152771_02713d6af862
|
||||
import re
|
||||
_JA4_RE = re.compile(r'^[tdq]\d{2}[di]\d{4}[a-z0-9]{2}_[0-9a-f]{12}_[0-9a-f]{12}$')
|
||||
|
||||
|
||||
def _is_valid_ja4(s):
|
||||
"""Validate that a string is a well-formed JA4 fingerprint (ASCII hex only)."""
|
||||
return bool(_JA4_RE.match(s))
|
||||
|
||||
|
||||
def download_ja4db():
|
||||
"""Download from ja4db.com API and extract browser fingerprints."""
|
||||
entries = []
|
||||
rejected = 0
|
||||
try:
|
||||
req = urllib.request.Request(JA4DB_URL, headers={"User-Agent": "ja4-platform/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
data = json.loads(r.read().decode())
|
||||
for item in data:
|
||||
ja4 = item.get("ja4_fingerprint")
|
||||
if not ja4 or not _is_valid_ja4(ja4):
|
||||
if ja4:
|
||||
rejected += 1
|
||||
continue
|
||||
app = (item.get("application") or "").strip()
|
||||
ua = (item.get("user_agent_string") or "").strip()
|
||||
if app and "browser" in app.lower():
|
||||
entries.append((ja4, app, "ja4db.com (verified)" if item.get("verified") else "ja4db.com"))
|
||||
elif ua:
|
||||
family = _classify_ua(ua)
|
||||
if family:
|
||||
entries.append((ja4, family, f"ja4db.com UA: {ua[:60]}"))
|
||||
print(f" [ja4db.com] Downloaded {len(entries)} browser fingerprints "
|
||||
f"(rejected {rejected} malformed)")
|
||||
except Exception as e:
|
||||
print(f" [ja4db.com] Warning: download failed ({e})", file=sys.stderr)
|
||||
return entries
|
||||
|
||||
|
||||
def download_foxio_mapping():
|
||||
"""Download FoxIO official ja4plus-mapping.csv."""
|
||||
entries = []
|
||||
try:
|
||||
req = urllib.request.Request(FOXIO_URL, headers={"User-Agent": "ja4-platform/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=15) as r:
|
||||
lines = r.read().decode().strip().split("\n")
|
||||
# Skip header
|
||||
for line in lines[1:]:
|
||||
parts = line.split(",")
|
||||
if len(parts) < 5:
|
||||
continue
|
||||
app = parts[0].strip()
|
||||
lib = parts[1].strip()
|
||||
ja4 = parts[4].strip()
|
||||
if not ja4 or not _is_valid_ja4(ja4):
|
||||
continue
|
||||
if "browser" in app.lower() or app in ("Mozilla Firefox", "Safari"):
|
||||
entries.append((ja4, app, f"FoxIO official (lib: {lib})"))
|
||||
print(f" [FoxIO] Downloaded {len(entries)} browser fingerprints")
|
||||
except Exception as e:
|
||||
print(f" [FoxIO] Warning: download failed ({e})", file=sys.stderr)
|
||||
return entries
|
||||
|
||||
|
||||
def _classify_ua(ua):
|
||||
"""Classify a user-agent string into a browser family."""
|
||||
ua_lower = ua.lower()
|
||||
# Order matters — check more specific strings first
|
||||
if "crios/" in ua_lower:
|
||||
return "Chrome_iOS"
|
||||
if "fxios/" in ua_lower:
|
||||
return "Firefox_iOS"
|
||||
if "edg/" in ua_lower or "edge/" in ua_lower:
|
||||
return "Edge"
|
||||
if "opr/" in ua_lower or "opera" in ua_lower:
|
||||
return "Opera"
|
||||
if "brave" in ua_lower:
|
||||
return "Brave"
|
||||
if "vivaldi" in ua_lower:
|
||||
return "Vivaldi"
|
||||
if "samsungbrowser" in ua_lower:
|
||||
return "Samsung_Internet"
|
||||
if "firefox/" in ua_lower:
|
||||
return "Firefox"
|
||||
if "safari/" in ua_lower and "chrome/" not in ua_lower:
|
||||
return "Safari"
|
||||
if "chrome/" in ua_lower:
|
||||
return "Chromium"
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate browser_ja4.csv")
|
||||
parser.add_argument("--output", default="browser_ja4.csv",
|
||||
help="Output CSV file path")
|
||||
parser.add_argument("--no-download", action="store_true",
|
||||
help="Skip downloading from remote sources")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Collect all fingerprints: (ja4, browser_family, tls_library, context)
|
||||
seen = {} # ja4 -> (browser_family, tls_library, context)
|
||||
|
||||
# 1. Start with curated list (highest priority)
|
||||
for ja4, family, lib, ctx in BROWSER_FINGERPRINTS:
|
||||
if ja4 not in seen:
|
||||
seen[ja4] = (family, lib, ctx)
|
||||
|
||||
# 2. Download from remote sources
|
||||
if not args.no_download:
|
||||
print("[browser_ja4] Downloading from remote sources...")
|
||||
for ja4, family, source in download_foxio_mapping():
|
||||
if ja4 not in seen:
|
||||
lib = _lib_from_cipher_hash(ja4)
|
||||
seen[ja4] = (family, lib, source)
|
||||
|
||||
for ja4, family, source in download_ja4db():
|
||||
if ja4 not in seen:
|
||||
lib = _lib_from_cipher_hash(ja4)
|
||||
seen[ja4] = (family, lib, source)
|
||||
|
||||
# 3. Write output (use csv module to properly handle commas in context field)
|
||||
import csv
|
||||
with open(args.output, "w", newline="") as f:
|
||||
writer = csv.writer(f)
|
||||
for ja4 in sorted(seen.keys()):
|
||||
family, lib, ctx = seen[ja4]
|
||||
writer.writerow([ja4, family, lib, ctx])
|
||||
|
||||
families = set(f for f, _, _ in seen.values())
|
||||
print(f"[browser_ja4] Generated {len(seen)} unique fingerprints "
|
||||
f"covering {len(families)} browser families: {', '.join(sorted(families))}")
|
||||
|
||||
|
||||
def _lib_from_cipher_hash(ja4):
|
||||
"""Infer TLS library from the cipher hash portion of a JA4 fingerprint."""
|
||||
parts = ja4.split("_")
|
||||
if len(parts) >= 2:
|
||||
cipher_hash = parts[1]
|
||||
return CIPHER_HASH_FAMILIES.get(cipher_hash, "unknown")
|
||||
return "unknown"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user