#!/usr/bin/env python3 """ generate_traffic.py — Realistic HTTP/HTTPS traffic generator for integration tests Simulates varied web traffic including: - Multiple browser User-Agents (Chrome, Firefox, Safari, Edge) - Bot / crawler traffic (Googlebot, Bingbot, curl, wget, python-requests) - Multiple HTTP methods (GET, POST, PUT, DELETE, HEAD, OPTIONS, PATCH) - Varied paths, query strings, form data, JSON payloads - Both HTTP (port 80) and HTTPS (port 443) - HTTP/1.0, HTTP/1.1, HTTP/2.0 (via httpx[http2]) - Different Accept/Language/Encoding headers - Cookie / Referer / X-Forwarded-For always set — ensures src_ip diversity - Multiple SSL contexts to vary TLS ClientHello parameters Usage: python generate_traffic.py [--host platform] [--http-port 80] [--https-port 443] [--requests 500] [--workers 10] """ import argparse import concurrent.futures import http.client import json import random import ssl import time import urllib.error import urllib.request from dataclasses import dataclass, field from typing import Optional # --------------------------------------------------------------------------- # Realistic data pools # --------------------------------------------------------------------------- BROWSERS = [ # Chrome 120 Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # Chrome 118 Linux "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", # Firefox 121 Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", # Firefox 120 Linux "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0", # Safari 17 macOS "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", # Edge 120 Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0", # Chrome Android "Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.115 Mobile Safari/537.36", # Safari iPhone "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1", ] BOTS = [ "Googlebot/2.1 (+http://www.google.com/bot.html)", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", "Twitterbot/1.0", "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "curl/7.88.1", "python-requests/2.31.0", "wget/1.21.3", "Wget/1.21 (linux-gnu)", "Go-http-client/1.1", "Java/11.0.18", "masscan/1.3 (https://github.com/robertdavidgraham/masscan)", "zgrab/0.x", "libwww-perl/6.72", ] PATHS = [ "/", "/health", "/index.html", "/index.php", "/login", "/api/v1/users", "/api/v1/status", "/api/v2/metrics", "/admin", "/admin/login", "/.env", "/.git/HEAD", "/wp-login.php", "/wp-admin/", "/phpmyadmin/", "/xmlrpc.php", "/robots.txt", "/sitemap.xml", "/favicon.ico", "/static/js/app.js", "/static/css/main.css", "/images/logo.png", "/api/search?q=test&limit=10", "/api/search?q=", "/api/users?page=1&per_page=20&sort=created_at", "/download?file=../../../etc/passwd", "/cgi-bin/test.cgi", ] QUERY_PARAMS = [ "", "?id=1", "?id=1+OR+1%3D1", "?debug=true", "?lang=fr", "?ref=google", "?utm_source=newsletter&utm_medium=email&utm_campaign=spring2024", "?token=eyJhbGciOiJIUzI1NiJ9.dGVzdA.abc", "?callback=jsonp_callback", "?page=1&limit=100&sort=-created_at", ] ACCEPT_LANGS = [ "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7", "en-US,en;q=0.9", "de-DE,de;q=0.9,en;q=0.8", "ja-JP,ja;q=0.9,en-US;q=0.8", "zh-CN,zh;q=0.9", "es-ES,es;q=0.9,en;q=0.8", "*", ] REFERERS = [ "", "https://www.google.com/search?q=test", "https://www.bing.com/search?q=example", "https://t.co/abc123", "https://www.facebook.com/", "https://example.com/page", ] SEC_FETCH_MODES = ["navigate", "cors", "no-cors", "same-origin", "websocket"] SEC_FETCH_DESTS = ["document", "script", "style", "image", "fetch", "empty"] SEC_FETCH_SITES = ["none", "same-origin", "same-site", "cross-site"] JSON_BODIES = [ '{"username":"admin","password":"password123"}', '{"query":"SELECT * FROM users","limit":100}', '{"email":"test@example.com","action":"subscribe"}', '{"data":{"key":"value","nested":{"array":[1,2,3]}}}', ] FORM_BODIES = [ "username=admin&password=admin", "email=test%40example.com&message=Hello+World", "q=test+query&submit=Search", ] # --------------------------------------------------------------------------- # IP pools for X-Forwarded-For (mod_remoteip uses this as src_ip in logs) # Ranges must match iplocate-ip-to-asn.csv entries so ASN lookup succeeds. # # HUMAN — residential ISP ranges → asn_label='human' → feeds ML baseline HUMAN_IPS = ( # OVH FR (ASN 16276) — 91.121.0.0/16 [f"91.121.{o3}.{o4}" for o3 in range(0, 12) for o4 in range(1, 60)] # SFR FR (ASN 15557) — 78.41.0.0/16 + [f"78.41.{o3}.{o4}" for o3 in range(0, 4) for o4 in range(1, 40)] # Orange FR (ASN 3215) — 90.x.x.x + [f"90.{o2}.{o3}.{o4}" for o2 in range(10, 14) for o3 in range(0, 4) for o4 in range(1, 20)] ) random.shuffle(HUMAN_IPS) # DATACENTER/BOT — scanner/Tor ranges → asn_label='datacenter' → ML scores these BOT_IPS = ( # Tor exits / Accelerated-IT (ASN 210644) — 185.220.101.x [f"185.220.101.{i}" for i in range(1, 101)] # Contabo scanner (ASN 209083) — 45.155.205.x + [f"45.155.205.{i}" for i in range(1, 51)] # Reg.ru (ASN 197695) — 193.32.162.x + [f"193.32.162.{i}" for i in range(1, 31)] ) # Legacy alias (kept for existing code) XFF_IPS = HUMAN_IPS[:20] + BOT_IPS[:10] # --------------------------------------------------------------------------- # SSL context variants — different cipher/protocol settings produce different # TLS ClientHello messages (and thus different JA4/JA3 fingerprints). # --------------------------------------------------------------------------- def make_ssl_contexts(): contexts = [] # Default context (OS defaults) ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE contexts.append(("default", ctx)) # TLS 1.2 only try: ctx12 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ctx12.check_hostname = False ctx12.verify_mode = ssl.CERT_NONE ctx12.maximum_version = ssl.TLSVersion.TLSv1_2 ctx12.minimum_version = ssl.TLSVersion.TLSv1_2 contexts.append(("tls12", ctx12)) except Exception: pass # TLS 1.3 only try: ctx13 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ctx13.check_hostname = False ctx13.verify_mode = ssl.CERT_NONE ctx13.minimum_version = ssl.TLSVersion.TLSv1_3 contexts.append(("tls13", ctx13)) except Exception: pass # Reduced cipher set try: ctx_few = ssl.create_default_context() ctx_few.check_hostname = False ctx_few.verify_mode = ssl.CERT_NONE ctx_few.set_ciphers("AES128-GCM-SHA256:AES256-GCM-SHA384") contexts.append(("few_ciphers", ctx_few)) except Exception: pass return contexts SSL_CONTEXTS = make_ssl_contexts() # --------------------------------------------------------------------------- # Request builder # --------------------------------------------------------------------------- @dataclass class RequestScenario: method: str url: str headers: dict body: Optional[bytes] = None ssl_ctx: Optional[ssl.SSLContext] = None label: str = "" def _random_headers(ua: str, is_bot: bool = False, xff_ip: str = None) -> dict: headers = { "User-Agent": ua, "Accept": random.choice([ "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "application/json, text/plain, */*", "*/*", "text/html,application/xhtml+xml,*/*;q=0.8", ]), "Accept-Encoding": random.choice([ "gzip, deflate, br", "gzip, deflate", "identity", "br;q=1.0, gzip;q=0.8", ]), "Accept-Language": random.choice(ACCEPT_LANGS), "Connection": random.choice(["keep-alive", "close"]), # X-Forwarded-For: always set so mod_remoteip gives each request a # distinct src_ip in the ClickHouse pipeline (r->useragent_ip). "X-Forwarded-For": xff_ip or ( random.choice(BOT_IPS) if is_bot else random.choice(HUMAN_IPS) ), } # Sec-Fetch headers (browsers only) if not is_bot and random.random() < 0.7: headers["Sec-Fetch-Mode"] = random.choice(SEC_FETCH_MODES) headers["Sec-Fetch-Dest"] = random.choice(SEC_FETCH_DESTS) headers["Sec-Fetch-Site"] = random.choice(SEC_FETCH_SITES) # Referer sometimes ref = random.choice(REFERERS) if ref: headers["Referer"] = ref # Cache headers if random.random() < 0.4: headers["Cache-Control"] = random.choice(["no-cache", "max-age=0", "no-store"]) # Cookie sometimes if random.random() < 0.2: session_id = "%032x" % random.getrandbits(128) headers["Cookie"] = f"session={session_id}; lang={random.choice(['fr','en','de'])}" return headers def build_scenarios(host: str, http_port: int, https_port: int, count: int) -> list: """Build a list of varied request scenarios.""" scenarios = [] base_http = f"http://{host}:{http_port}" base_https = f"https://{host}:{https_port}" # --- Browser-like HTTPS GET requests (most common) --- for _ in range(int(count * 0.30)): ua = random.choice(BROWSERS) path = random.choice(PATHS) qs = random.choice(QUERY_PARAMS) ssl_name, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="GET", url=f"{base_https}{path}{qs}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), ssl_ctx=ssl_ctx, label=f"browser-https-{ssl_name}", )) # --- Browser-like HTTP GET requests --- for _ in range(int(count * 0.10)): ua = random.choice(BROWSERS) path = random.choice(PATHS) qs = random.choice(QUERY_PARAMS) scenarios.append(RequestScenario( method="GET", url=f"{base_http}{path}{qs}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), label="browser-http", )) # --- Bot / crawler HTTPS requests --- for _ in range(int(count * 0.15)): ua = random.choice(BOTS) path = random.choice(PATHS) ssl_name, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="GET", url=f"{base_https}{path}", headers=_random_headers(ua, is_bot=True, xff_ip=random.choice(BOT_IPS)), ssl_ctx=ssl_ctx, label=f"bot-https-{ssl_name}", )) # --- Bot HTTP requests --- for _ in range(int(count * 0.05)): ua = random.choice(BOTS) path = random.choice(PATHS) scenarios.append(RequestScenario( method="GET", url=f"{base_http}{path}", headers=_random_headers(ua, is_bot=True, xff_ip=random.choice(BOT_IPS)), label="bot-http", )) # --- POST HTTPS with JSON body --- for _ in range(int(count * 0.15)): ua = random.choice(BROWSERS) body_str = random.choice(JSON_BODIES) body = body_str.encode() hdrs = _random_headers(ua, xff_ip=random.choice(HUMAN_IPS)) hdrs["Content-Type"] = "application/json" hdrs["Content-Length"] = str(len(body)) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="POST", url=f"{base_https}{random.choice(['/login','/api/v1/users','/api/v2/metrics','/health'])}", headers=hdrs, body=body, ssl_ctx=ssl_ctx, label="post-json-https", )) # --- POST HTTP with form data --- for _ in range(int(count * 0.05)): ua = random.choice(BROWSERS + BOTS) body_str = random.choice(FORM_BODIES) body = body_str.encode() hdrs = _random_headers(ua, xff_ip=random.choice(BOT_IPS)) hdrs["Content-Type"] = "application/x-www-form-urlencoded" hdrs["Content-Length"] = str(len(body)) scenarios.append(RequestScenario( method="POST", url=f"{base_http}/login", headers=hdrs, body=body, label="post-form-http", )) # --- HEAD requests --- for _ in range(int(count * 0.05)): ua = random.choice(BROWSERS + BOTS) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="HEAD", url=f"{base_https}{random.choice(PATHS)}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), ssl_ctx=ssl_ctx, label="head-https", )) # --- PUT / PATCH --- for _ in range(int(count * 0.05)): ua = random.choice(BROWSERS) body = json.dumps({"id": random.randint(1, 999), "value": "updated"}).encode() hdrs = _random_headers(ua, xff_ip=random.choice(HUMAN_IPS)) hdrs["Content-Type"] = "application/json" hdrs["Content-Length"] = str(len(body)) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method=random.choice(["PUT", "PATCH"]), url=f"{base_https}/api/v1/users/{random.randint(1,999)}", headers=hdrs, body=body, ssl_ctx=ssl_ctx, label="put-patch-https", )) # --- DELETE --- for _ in range(int(count * 0.02)): ua = random.choice(BROWSERS) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="DELETE", url=f"{base_https}/api/v1/users/{random.randint(1,999)}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), ssl_ctx=ssl_ctx, label="delete-https", )) # --- OPTIONS (CORS preflight) --- for _ in range(int(count * 0.03)): ua = random.choice(BROWSERS) hdrs = _random_headers(ua, xff_ip=random.choice(HUMAN_IPS)) hdrs["Origin"] = random.choice(["https://app.example.com", "http://localhost:3000"]) hdrs["Access-Control-Request-Method"] = random.choice(["POST", "PUT", "DELETE"]) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="OPTIONS", url=f"{base_https}{random.choice(['/api/v1/users','/api/v2/metrics'])}", headers=hdrs, ssl_ctx=ssl_ctx, label="options-cors", )) # --- HTTP/1.0 explicite sur HTTP (port 80) --- # http.client permet de forcer le protocole HTTP/1.0 via _http_vsn h10_count = max(10, int(count * 0.05)) for _ in range(h10_count): ua = random.choice(BROWSERS + BOTS) path = random.choice(["/", "/health", "/index.html", "/robots.txt"]) scenarios.append(RequestScenario( method="GET", url=f"{base_http}{path}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS + BOT_IPS)), label="http10-plain", )) # --- HTTP/1.0 explicite sur HTTPS --- for _ in range(max(5, int(count * 0.03))): ua = random.choice(BROWSERS + BOTS) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="GET", url=f"{base_https}/health", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), ssl_ctx=ssl_ctx, label="http10-tls", )) # --- HTTP/2 explicite (httpx[http2]) --- h2_count = max(20, int(count * 0.10)) for _ in range(h2_count): ua = random.choice(BROWSERS) path = random.choice(PATHS) qs = random.choice(QUERY_PARAMS) scenarios.append(RequestScenario( method=random.choice(["GET", "GET", "GET", "POST"]), url=f"{base_https}{path}{qs}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), body=json.dumps({"h2": True}).encode() if random.random() < 0.2 else None, label="http2-explicit", )) # Fill remaining with browser HTTPS GETs while len(scenarios) < count: ua = random.choice(BROWSERS) _, ssl_ctx = random.choice(SSL_CONTEXTS) scenarios.append(RequestScenario( method="GET", url=f"{base_https}/health?filler={random.randint(1,9999)}", headers=_random_headers(ua, xff_ip=random.choice(HUMAN_IPS)), ssl_ctx=ssl_ctx, label="filler-https", )) random.shuffle(scenarios) return scenarios[:count] # --------------------------------------------------------------------------- # Executor # --------------------------------------------------------------------------- stats = {"ok": 0, "err": 0, "by_label": {}} def _send_http10(scenario: RequestScenario) -> dict: """Envoie une requête en HTTP/1.0 pur via http.client.""" t0 = time.monotonic() try: from urllib.parse import urlparse parsed = urlparse(scenario.url) host = parsed.hostname port = parsed.port or (443 if parsed.scheme == "https" else 80) path = parsed.path or "/" if parsed.query: path += "?" + parsed.query if parsed.scheme == "https": ctx = scenario.ssl_ctx or ssl.create_default_context() if hasattr(ctx, "check_hostname"): ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE conn = http.client.HTTPSConnection(host, port, timeout=5, context=ctx) else: conn = http.client.HTTPConnection(host, port, timeout=5) # Forcer HTTP/1.0 conn._http_vsn = 10 conn._http_vsn_str = "HTTP/1.0" hdrs = {k: v for k, v in scenario.headers.items() if k.lower() not in ("connection",)} conn.request(scenario.method, path, body=scenario.body, headers=hdrs) resp = conn.getresponse() resp.read(4096) return {"ok": True, "status": resp.status, "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} except Exception as e: return {"ok": False, "error": str(e)[:80], "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} finally: try: conn.close() except Exception: pass def _send_http2(scenario: RequestScenario) -> dict: """Envoie une requête HTTP/2 via httpx (négociation ALPN h2).""" t0 = time.monotonic() try: import httpx with httpx.Client(http2=True, verify=False, timeout=5.0) as client: hdrs = {k: v for k, v in scenario.headers.items() if k.lower() not in ("connection", "content-length")} resp = client.request( method=scenario.method, url=scenario.url, headers=hdrs, content=scenario.body, ) return {"ok": True, "status": resp.status_code, "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000), "http_version": resp.http_version} except Exception as e: return {"ok": False, "error": str(e)[:80], "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} def send_request(scenario: RequestScenario) -> dict: """Dispatcher : HTTP/1.0, HTTP/2, ou HTTP/1.1 selon le label.""" if scenario.label.startswith("http10"): return _send_http10(scenario) if scenario.label == "http2-explicit": return _send_http2(scenario) # HTTP/1.1 via urllib (chemin existant) t0 = time.monotonic() try: req = urllib.request.Request( url=scenario.url, data=scenario.body, method=scenario.method, headers=scenario.headers, ) ctx = scenario.ssl_ctx with urllib.request.urlopen(req, context=ctx, timeout=5) as resp: _ = resp.read(4096) return {"ok": True, "status": resp.status, "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} except urllib.error.HTTPError as e: return {"ok": True, "status": e.code, "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} except Exception as e: return {"ok": False, "error": str(e)[:80], "label": scenario.label, "ms": int((time.monotonic() - t0) * 1000)} def run(host: str, http_port: int, https_port: int, total: int, workers: int): scenarios = build_scenarios(host, http_port, https_port, total) print(f"[traffic-gen] Sending {len(scenarios)} requests to {host} " f"(http:{http_port} https:{https_port}) with {workers} workers") label_counts: dict = {} ok = err = 0 with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as pool: futures = {pool.submit(send_request, s): s for s in scenarios} for fut in concurrent.futures.as_completed(futures): res = fut.result() lbl = res.get("label", "?") label_counts[lbl] = label_counts.get(lbl, 0) + 1 if res["ok"]: ok += 1 else: err += 1 print(f"[traffic-gen] WARN {lbl}: {res.get('error','?')}") print(f"[traffic-gen] Done: {ok} OK, {err} errors") print("[traffic-gen] Breakdown by scenario:") for lbl, cnt in sorted(label_counts.items()): print(f" {lbl:35s} {cnt:4d}") return err == 0 or (ok / (ok + err)) > 0.8 # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- if __name__ == "__main__": parser = argparse.ArgumentParser(description="Realistic traffic generator") parser.add_argument("--host", default="platform") parser.add_argument("--http-port", type=int, default=80) parser.add_argument("--https-port", type=int, default=443) parser.add_argument("--requests", type=int, default=500) parser.add_argument("--workers", type=int, default=10) args = parser.parse_args() success = run(args.host, args.http_port, args.https_port, args.requests, args.workers) raise SystemExit(0 if success else 1)