- Add generate_browser_ja4.py: 1,186 browser JA4 fingerprints from FoxIO + ja4db.com covering 11 families (Chromium, Firefox, Safari, Edge, Tor, Opera, Vivaldi...) - Rewrite generate_bot_ip.py: Anubis YAML rules (Google, Bing, Apple, DuckDuck, OpenAI, Perplexity bots) + Tor exit nodes + cloud scanner IPs (3,555 entries) - Rewrite generate_asn_data.py: worldwide iptoasn.com data (78,049 ASNs, 714K CIDRs) - Add dict_browser_ja4 ClickHouse dictionary + browser_family in AI features views - Add /api/browsers dashboard endpoint - Fix CSV quoting for fields containing commas (User-Agent strings) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
252 lines
9.7 KiB
Python
252 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_browser_ja4.py — Generate browser_ja4.csv with known browser TLS fingerprints.
|
|
|
|
Sources:
|
|
• FoxIO official ja4plus-mapping.csv:
|
|
https://github.com/FoxIO-LLC/ja4/blob/main/ja4plus-mapping.csv
|
|
• ja4db.com public API: https://ja4db.com/api/read/
|
|
• JA4+ specification: https://github.com/FoxIO-LLC/ja4
|
|
|
|
JA4 fingerprints identify TLS libraries, not individual browser versions.
|
|
All Chromium-based browsers (Chrome, Edge, Brave, Opera, Vivaldi, Samsung Internet)
|
|
share BoringSSL and produce identical JA4 fingerprints.
|
|
Firefox uses NSS, Safari uses Apple SecureTransport/Network.framework.
|
|
|
|
Output format (no header):
|
|
<ja4_fingerprint>,<browser_family>,<tls_library>,<context>
|
|
"""
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
import urllib.request
|
|
|
|
# =============================================================================
|
|
# Curated browser JA4 fingerprints from FoxIO official mapping
|
|
# Format: (ja4, browser_family, tls_library, context)
|
|
# =============================================================================
|
|
BROWSER_FINGERPRINTS = [
|
|
# --- Chromium (Chrome, Edge, Brave, Opera, Vivaldi, Samsung Internet) ---
|
|
# BoringSSL — all Chromium-based browsers produce identical JA4
|
|
("t13d1516h2_8daaf6152771_02713d6af862", "Chromium",
|
|
"BoringSSL", "TCP fresh connection"),
|
|
("t13d1517h2_8daaf6152771_b0da82dd1658", "Chromium",
|
|
"BoringSSL", "TCP with PSK extension"),
|
|
("t13d1517h2_8daaf6152771_b1ff8ab2d16f", "Chromium",
|
|
"BoringSSL", "TCP alt extension set"),
|
|
("t13i1515h2_8daaf6152771_02713d6af862", "Chromium",
|
|
"BoringSSL", "TCP to IP (no SNI)"),
|
|
("t13i1516h2_8daaf6152771_b0da82dd1658", "Chromium",
|
|
"BoringSSL", "TCP to IP with PSK"),
|
|
("t13i1516h2_8daaf6152771_b1ff8ab2d16f", "Chromium",
|
|
"BoringSSL", "TCP to IP alt ext"),
|
|
("q13d0312h3_55b375c5d22e_06cda9e17597", "Chromium",
|
|
"BoringSSL", "QUIC HTTP/3 to domain"),
|
|
("q13i0311h3_55b375c5d22e_06cda9e17597", "Chromium",
|
|
"BoringSSL", "QUIC HTTP/3 to IP"),
|
|
|
|
# --- Mozilla Firefox ---
|
|
# NSS library
|
|
("t13d1715h2_5b57614c22b0_7121afd63204", "Firefox",
|
|
"NSS", "TCP to domain"),
|
|
("t13i1714h2_5b57614c22b0_7121afd63204", "Firefox",
|
|
"NSS", "TCP to IP (no SNI)"),
|
|
# Firefox with different extension counts (minor NSS updates)
|
|
("t13d1715h2_5b57614c22b0_3c5d18be5765", "Firefox",
|
|
"NSS", "TCP domain (NSS variant)"),
|
|
("t13d1716h2_5b57614c22b0_7121afd63204", "Firefox",
|
|
"NSS", "TCP domain (16 ext)"),
|
|
|
|
# --- Apple Safari (macOS + iOS) ---
|
|
# Apple SecureTransport / Network.framework
|
|
("t13d2014h2_a09f3c656075_14788d8d241b", "Safari",
|
|
"SecureTransport", "TCP to domain"),
|
|
("t13i2013h2_a09f3c656075_14788d8d241b", "Safari",
|
|
"SecureTransport", "TCP to IP (no SNI)"),
|
|
# Safari variants with different extension hashes
|
|
("t13d1714h2_5b57614c22b0_14788d8d241b", "Safari",
|
|
"SecureTransport", "Safari/iOS via WebKit (hybrid)"),
|
|
|
|
# --- Chrome on iOS ---
|
|
# Uses Apple's TLS stack, not BoringSSL (Apple enforces this on iOS)
|
|
("t13d1714h2_5b57614c22b0_14788d8d241b", "Chrome_iOS",
|
|
"SecureTransport", "Chrome on iOS (Apple TLS)"),
|
|
|
|
# --- Tor Browser ---
|
|
# Based on Firefox ESR but with a hardened NSS configuration
|
|
("t13d1614h2_5b57614c22b0_7121afd63204", "Tor_Browser",
|
|
"NSS", "TCP (hardened Firefox ESR)"),
|
|
|
|
# --- Common mobile browser patterns ---
|
|
# Android WebView uses BoringSSL (same as Chromium)
|
|
("t13d1516h2_8daaf6152771_02713d6af862", "Android_WebView",
|
|
"BoringSSL", "Android WebView (same as Chromium)"),
|
|
|
|
# --- Older browser versions (TLS 1.2 fallback) ---
|
|
("t12d1516h2_8daaf6152771_02713d6af862", "Chromium_Legacy",
|
|
"BoringSSL", "Chromium TLS 1.2 only (old/restricted)"),
|
|
("t12d1715h2_5b57614c22b0_7121afd63204", "Firefox_Legacy",
|
|
"NSS", "Firefox TLS 1.2 only (old)"),
|
|
("t12d2014h2_a09f3c656075_14788d8d241b", "Safari_Legacy",
|
|
"SecureTransport", "Safari TLS 1.2 only (old)"),
|
|
]
|
|
|
|
# Unique cipher hashes that identify TLS libraries
|
|
CIPHER_HASH_FAMILIES = {
|
|
"8daaf6152771": "Chromium/BoringSSL",
|
|
"55b375c5d22e": "Chromium/BoringSSL (QUIC)",
|
|
"5b57614c22b0": "Firefox/NSS",
|
|
"a09f3c656075": "Safari/SecureTransport",
|
|
}
|
|
|
|
JA4DB_URL = "https://ja4db.com/api/read/"
|
|
FOXIO_URL = ("https://raw.githubusercontent.com/FoxIO-LLC/ja4/"
|
|
"main/ja4plus-mapping.csv")
|
|
|
|
# Valid JA4 fingerprint: protocol prefix + hex hashes separated by underscores
|
|
# Example: t13d1516h2_8daaf6152771_02713d6af862
|
|
import re
|
|
_JA4_RE = re.compile(r'^[tdq]\d{2}[di]\d{4}[a-z0-9]{2}_[0-9a-f]{12}_[0-9a-f]{12}$')
|
|
|
|
|
|
def _is_valid_ja4(s):
|
|
"""Validate that a string is a well-formed JA4 fingerprint (ASCII hex only)."""
|
|
return bool(_JA4_RE.match(s))
|
|
|
|
|
|
def download_ja4db():
|
|
"""Download from ja4db.com API and extract browser fingerprints."""
|
|
entries = []
|
|
rejected = 0
|
|
try:
|
|
req = urllib.request.Request(JA4DB_URL, headers={"User-Agent": "ja4-platform/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as r:
|
|
data = json.loads(r.read().decode())
|
|
for item in data:
|
|
ja4 = item.get("ja4_fingerprint")
|
|
if not ja4 or not _is_valid_ja4(ja4):
|
|
if ja4:
|
|
rejected += 1
|
|
continue
|
|
app = (item.get("application") or "").strip()
|
|
ua = (item.get("user_agent_string") or "").strip()
|
|
if app and "browser" in app.lower():
|
|
entries.append((ja4, app, "ja4db.com (verified)" if item.get("verified") else "ja4db.com"))
|
|
elif ua:
|
|
family = _classify_ua(ua)
|
|
if family:
|
|
entries.append((ja4, family, f"ja4db.com UA: {ua[:60]}"))
|
|
print(f" [ja4db.com] Downloaded {len(entries)} browser fingerprints "
|
|
f"(rejected {rejected} malformed)")
|
|
except Exception as e:
|
|
print(f" [ja4db.com] Warning: download failed ({e})", file=sys.stderr)
|
|
return entries
|
|
|
|
|
|
def download_foxio_mapping():
|
|
"""Download FoxIO official ja4plus-mapping.csv."""
|
|
entries = []
|
|
try:
|
|
req = urllib.request.Request(FOXIO_URL, headers={"User-Agent": "ja4-platform/1.0"})
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
lines = r.read().decode().strip().split("\n")
|
|
# Skip header
|
|
for line in lines[1:]:
|
|
parts = line.split(",")
|
|
if len(parts) < 5:
|
|
continue
|
|
app = parts[0].strip()
|
|
lib = parts[1].strip()
|
|
ja4 = parts[4].strip()
|
|
if not ja4 or not _is_valid_ja4(ja4):
|
|
continue
|
|
if "browser" in app.lower() or app in ("Mozilla Firefox", "Safari"):
|
|
entries.append((ja4, app, f"FoxIO official (lib: {lib})"))
|
|
print(f" [FoxIO] Downloaded {len(entries)} browser fingerprints")
|
|
except Exception as e:
|
|
print(f" [FoxIO] Warning: download failed ({e})", file=sys.stderr)
|
|
return entries
|
|
|
|
|
|
def _classify_ua(ua):
|
|
"""Classify a user-agent string into a browser family."""
|
|
ua_lower = ua.lower()
|
|
# Order matters — check more specific strings first
|
|
if "crios/" in ua_lower:
|
|
return "Chrome_iOS"
|
|
if "fxios/" in ua_lower:
|
|
return "Firefox_iOS"
|
|
if "edg/" in ua_lower or "edge/" in ua_lower:
|
|
return "Edge"
|
|
if "opr/" in ua_lower or "opera" in ua_lower:
|
|
return "Opera"
|
|
if "brave" in ua_lower:
|
|
return "Brave"
|
|
if "vivaldi" in ua_lower:
|
|
return "Vivaldi"
|
|
if "samsungbrowser" in ua_lower:
|
|
return "Samsung_Internet"
|
|
if "firefox/" in ua_lower:
|
|
return "Firefox"
|
|
if "safari/" in ua_lower and "chrome/" not in ua_lower:
|
|
return "Safari"
|
|
if "chrome/" in ua_lower:
|
|
return "Chromium"
|
|
return None
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate browser_ja4.csv")
|
|
parser.add_argument("--output", default="browser_ja4.csv",
|
|
help="Output CSV file path")
|
|
parser.add_argument("--no-download", action="store_true",
|
|
help="Skip downloading from remote sources")
|
|
args = parser.parse_args()
|
|
|
|
# Collect all fingerprints: (ja4, browser_family, tls_library, context)
|
|
seen = {} # ja4 -> (browser_family, tls_library, context)
|
|
|
|
# 1. Start with curated list (highest priority)
|
|
for ja4, family, lib, ctx in BROWSER_FINGERPRINTS:
|
|
if ja4 not in seen:
|
|
seen[ja4] = (family, lib, ctx)
|
|
|
|
# 2. Download from remote sources
|
|
if not args.no_download:
|
|
print("[browser_ja4] Downloading from remote sources...")
|
|
for ja4, family, source in download_foxio_mapping():
|
|
if ja4 not in seen:
|
|
lib = _lib_from_cipher_hash(ja4)
|
|
seen[ja4] = (family, lib, source)
|
|
|
|
for ja4, family, source in download_ja4db():
|
|
if ja4 not in seen:
|
|
lib = _lib_from_cipher_hash(ja4)
|
|
seen[ja4] = (family, lib, source)
|
|
|
|
# 3. Write output (use csv module to properly handle commas in context field)
|
|
import csv
|
|
with open(args.output, "w", newline="") as f:
|
|
writer = csv.writer(f)
|
|
for ja4 in sorted(seen.keys()):
|
|
family, lib, ctx = seen[ja4]
|
|
writer.writerow([ja4, family, lib, ctx])
|
|
|
|
families = set(f for f, _, _ in seen.values())
|
|
print(f"[browser_ja4] Generated {len(seen)} unique fingerprints "
|
|
f"covering {len(families)} browser families: {', '.join(sorted(families))}")
|
|
|
|
|
|
def _lib_from_cipher_hash(ja4):
|
|
"""Infer TLS library from the cipher hash portion of a JA4 fingerprint."""
|
|
parts = ja4.split("_")
|
|
if len(parts) >= 2:
|
|
cipher_hash = parts[1]
|
|
return CIPHER_HASH_FAMILIES.get(cipher_hash, "unknown")
|
|
return "unknown"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|