From 36b5065a0aa6daf46e19b068fc04d9eafec2be9c Mon Sep 17 00:00:00 2001 From: Jacquin Antoine Date: Thu, 16 Apr 2026 14:25:24 +0200 Subject: [PATCH] feat(e2e): add multi-IP endpoint architecture with dedicated traffic VM Replace single-service-per-endpoint with all-ips mode running nginx, apache, and hitch+varnish simultaneously on 3 dedicated IPs per VM (eth1 alias IPs). Add a dedicated traffic VM with curl-impersonate for realistic TLS fingerprints, parallelized traffic generation, and paired SNI_HOSTS/TARGET_IPS lists for per-VM per-service hostname identification (e.g. rocky9-nginx-platform.test). Key changes: - run-tests-vm.sh: add setup_all_ips(), IP-specific Listen/bind directives with reset-before-apply pattern, graceful service availability checks - run-e2e-test.sh: traffic VM architecture, all-ips mode, eth1 network, paired IP/SNI lists, updated cleanup for alias IPs - generate-traffic.sh: parallel background jobs, curl-impersonate detection, auto source interface detection via ip route get, Host header in HTTP traffic - Vagrantfile: add traffic VM with provision-traffic.sh - provision-traffic.sh: install curl-impersonate and httpx for traffic gen - test-rpm.sh: multi-interface TC check, updated ja4ebpf config - clickhouse-init.sh: load CSV stubs for Anubis/bot-networks dictionaries - Remove obsolete correlator/sentinel/mod-reqin-log docs - Add h2_settings_ack column to http_logs schema - Upgrade Go toolchain to 1.25.0 Co-Authored-By: Claude Opus 4.6 --- docs/services/correlator.md | 220 ----------- docs/services/mod-reqin-log.md | 200 ---------- docs/services/sentinel.md | 247 ------------- go.work | 2 +- go.work.sum | 2 + services/ja4ebpf/go.mod | 8 +- services/ja4ebpf/go.sum | 14 +- shared/clickhouse/04_mv_http_logs.sql | 2 + tests/integration/platform/clickhouse-init.sh | 30 ++ tests/vm/.vagrant/bundler/global.sol | 2 +- tests/vm/Vagrantfile | 20 + tests/vm/analysis/docker-compose.yml | 2 +- tests/vm/generate-traffic.sh | 191 +++++----- tests/vm/provision-traffic.sh | 52 +++ tests/vm/run-e2e-test.sh | 341 ++++++++++++------ tests/vm/run-tests-vm.sh | 243 +++++++++++-- tests/vm/test-rpm.sh | 22 +- 17 files changed, 674 insertions(+), 924 deletions(-) delete mode 100644 docs/services/correlator.md delete mode 100644 docs/services/mod-reqin-log.md delete mode 100644 docs/services/sentinel.md create mode 100755 tests/vm/provision-traffic.sh diff --git a/docs/services/correlator.md b/docs/services/correlator.md deleted file mode 100644 index a36723f..0000000 --- a/docs/services/correlator.md +++ /dev/null @@ -1,220 +0,0 @@ -# Correlator - -The correlator (`logcorrelator`) is a Go daemon that joins HTTP events from [mod-reqin-log](mod-reqin-log.md) (source A) with TLS/network events from [sentinel](sentinel.md) (source B) into unified correlated log entries. It uses a `src_ip:src_port` key with a configurable time window to match events, supports HTTP Keep-Alive connections, and writes results to ClickHouse, file, and/or stdout. - -## Correlation Algorithm - -### Key Matching - -Events are correlated by their **correlation key**: `src_ip:src_port`. Since a client's ephemeral source port uniquely identifies a TCP connection, matching on this pair reliably joins the HTTP request (seen by Apache) with the TLS handshake (seen by sentinel) from the same connection. - -### Time Window - -Events must arrive within the configured time window (default: **10 seconds**) to be matched. This accounts for: -- Processing latency between Apache and sentinel -- Packet capture buffering -- UNIX socket delivery ordering - -### Keep-Alive Support - -In `one_to_many` mode (default), a single TLS handshake event (source B) can match **multiple** HTTP requests (source A) on the same TCP connection: - -1. Source B event arrives → buffered with TTL (default: 120 s) -2. Source A event arrives with same key → correlation match, B event TTL resets -3. Next A event on same connection → matches same B event (TTL resets again) -4. Connection closes → B event expires after TTL - -Each A event within a Keep-Alive session gets an incrementing `keepalives` counter. - -### Orphan Handling - -- **Source A orphans** (HTTP without TLS match): Emitted after `apache_emit_delay_ms` (default: 500 ms) with `correlated=false`, `orphan_side=A` -- **Source B orphans** (TLS without HTTP match): Not emitted by default (`network_emit: false`) -- **Buffer overflow**: Oldest events are rotated out and emitted as orphans - -### Field Merging - -When two events are correlated: -- HTTP fields (method, path, headers, etc.) come from source A -- TLS/network fields (JA4, JA3, IP/TCP metadata) come from source B -- On field collision with different values: both are kept with `a_` and `b_` prefixes - -## Configuration Reference - -Configuration is loaded from a YAML file (default: `/etc/logcorrelator/logcorrelator.yml`). - -### Log Settings - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `log.level` | string | `INFO` | Log level: `DEBUG`, `INFO`, `WARN`, `ERROR` | - -### Input Settings - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `inputs.unix_sockets[].name` | string | — | Human-readable source name (e.g., `http`, `network`) | -| `inputs.unix_sockets[].path` | string | — | UNIX socket path to listen on | -| `inputs.unix_sockets[].format` | string | `json` | Input format | -| `inputs.unix_sockets[].source_type` | string | — | Event source: `A` (HTTP), `B` (Network) | -| `inputs.unix_sockets[].socket_permissions` | string | `0666` | Socket file permissions (octal) | - -### Output Settings - -#### File Output - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `outputs.file.enabled` | bool | `true` | Enable file output | -| `outputs.file.path` | string | `/var/log/logcorrelator/correlated.log` | Output file path | - -#### ClickHouse Output - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `outputs.clickhouse.enabled` | bool | `false` | Enable ClickHouse output | -| `outputs.clickhouse.dsn` | string | — | ClickHouse DSN (e.g., `clickhouse://user:pass@host:9000/db`) | -| `outputs.clickhouse.table` | string | — | Target table name | -| `outputs.clickhouse.batch_size` | int | `500` | Records per batch insert | -| `outputs.clickhouse.flush_interval_ms` | int | `200` | Flush interval in milliseconds | -| `outputs.clickhouse.max_buffer_size` | int | `5000` | Maximum in-memory buffer size | -| `outputs.clickhouse.drop_on_overflow` | bool | `true` | Drop records when buffer is full | -| `outputs.clickhouse.async_insert` | bool | `true` | Use ClickHouse async inserts | -| `outputs.clickhouse.timeout_ms` | int | `1000` | Operation timeout in milliseconds | - -#### Stdout Output - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `outputs.stdout.enabled` | bool | `false` | Enable stdout output | -| `outputs.stdout.level` | string | — | Output verbosity filter | - -### Correlation Settings - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `correlation.time_window.value` | int | `10` | Time window value | -| `correlation.time_window.unit` | string | `s` | Time window unit (`s`, `ms`) | -| `correlation.orphan_policy.apache_always_emit` | bool | `true` | Always emit A events even without B match | -| `correlation.orphan_policy.apache_emit_delay_ms` | int | `500` | Delay before emitting orphan A (ms) | -| `correlation.orphan_policy.network_emit` | bool | `false` | Emit B events without A match | -| `correlation.matching.mode` | string | `one_to_many` | Matching mode: `one_to_one` or `one_to_many` | -| `correlation.buffers.max_http_items` | int | `10000` | Max buffered HTTP (source A) events | -| `correlation.buffers.max_network_items` | int | `20000` | Max buffered network (source B) events | -| `correlation.ttl.network_ttl_s` | int | `120` | TTL for source B events (seconds) | -| `correlation.exclude_source_ips` | []string | `[]` | IPs or CIDRs to exclude from correlation | -| `correlation.include_dest_ports` | []int | `[]` | If non-empty, only correlate events on these ports | - -### Metrics Settings - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `metrics.enabled` | bool | `false` | Enable metrics HTTP server | -| `metrics.addr` | string | `:8080` | Metrics server listen address | - -## Input Events - -### Source A (HTTP — from mod-reqin-log) - -JSON fields: `time`, `src_ip`, `src_port`, `dst_ip`, `dst_port`, `method`, `scheme`, `host`, `path`, `query`, `http_version`, `client_headers`, `header_*` - -### Source B (Network — from sentinel) - -JSON fields: `src_ip`, `src_port`, `dst_ip`, `dst_port`, `ip_meta_*`, `tcp_meta_*`, `tls_version`, `tls_sni`, `tls_alpn`, `ja4`, `ja3`, `ja3_hash`, `conn_id`, `syn_to_clienthello_ms`, `timestamp` - -## Output CorrelatedLog JSON Schema - -```json -{ - "timestamp": "2026-03-09T14:30:00Z", - "src_ip": "203.0.113.42", - "src_port": 52341, - "dst_ip": "192.168.1.10", - "dst_port": 443, - "correlated": true, - "method": "GET", - "host": "example.com", - "path": "/api/v1/users", - "ja4": "t13d1516h2_8daaf6152771_b0da82dd1658", - "ja3_hash": "e7d705a3286e19ea42f587b344ee6865", - "ip_meta_ttl": 64, - "tcp_meta_window_size": 65535, - "tls_version": "1.3", - "tls_sni": "example.com", - "tls_alpn": "h2", - "header_User-Agent": "Mozilla/5.0 ...", - "keepalives": 3 -} -``` - -Core fields are always present; additional fields are merged from A and B event raw data. - -## ClickHouse Sink - -- **Protocol**: ClickHouse native TCP (port 9000) via `clickhouse-go/v2` -- **Target table**: `http_logs_raw` (raw JSON stored, then parsed by materialized views) -- **Batch inserts**: Buffered up to `batch_size` records (default 500) -- **Flush interval**: Default 200 ms timer triggers flush if batch not full -- **Retry behavior**: Up to 3 retries with exponential backoff (100 ms base) -- **Connection ping**: 5-second timeout on startup -- **Buffer overflow**: Records dropped when buffer exceeds `max_buffer_size` (configurable) - -## Metrics HTTP Server - -When `metrics.enabled: true`, exposes: - -| Endpoint | Description | -|----------|-------------| -| `GET /metrics` | Correlation metrics as JSON (events received, correlated, orphans, buffer sizes) | -| `GET /health` | Health check endpoint | - -## systemd Service - -```ini -[Unit] -Description=logcorrelator service -After=network.target - -[Service] -Type=simple -User=logcorrelator -Group=logcorrelator -ExecStart=/usr/bin/logcorrelator -config /etc/logcorrelator/logcorrelator.yml -ExecReload=/bin/kill -HUP $MAINPID -Restart=on-failure -RestartSec=5 -RuntimeDirectory=logcorrelator -RuntimeDirectoryMode=0755 - -# Security hardening -NoNewPrivileges=true -ProtectSystem=strict -ProtectHome=true -ReadWritePaths=/var/log/logcorrelator /etc/logcorrelator - -# Resource limits -LimitNOFILE=65536 -TimeoutStartSec=10 -TimeoutStopSec=30 - -[Install] -WantedBy=multi-user.target -``` - -### Security Hardening - -- Runs as dedicated `logcorrelator` user/group -- `NoNewPrivileges=true` — prevents privilege escalation -- `ProtectSystem=strict` — read-only filesystem except `ReadWritePaths` -- `ProtectHome=true` — no access to home directories -- `RuntimeDirectory=logcorrelator` — systemd creates socket directory with correct ownership - -## RPM Package Contents - -| Path | Description | -|------|-------------| -| `/usr/bin/logcorrelator` | Binary | -| `/etc/logcorrelator/logcorrelator.yml` | Configuration file | -| `/usr/lib/systemd/system/logcorrelator.service` | systemd unit | -| `/var/log/logcorrelator/` | Log directory | -| `/var/run/logcorrelator/` | Socket directory (RuntimeDirectory) | diff --git a/docs/services/mod-reqin-log.md b/docs/services/mod-reqin-log.md deleted file mode 100644 index 6f348b0..0000000 --- a/docs/services/mod-reqin-log.md +++ /dev/null @@ -1,200 +0,0 @@ -# mod-reqin-log - -`mod_reqin_log` is an Apache HTTPD module (C shared object) that captures HTTP request metadata and sends it as JSON to a UNIX datagram socket. It serves as the HTTP-layer ingestion point for the ja4-platform pipeline, feeding request data to the [correlator](correlator.md) for joining with TLS fingerprint data from [sentinel](sentinel.md). - -## Purpose - -Apache processes HTTP requests after TLS termination, so it has access to the decoded HTTP method, path, headers, and client IP/port. mod-reqin-log hooks into the `post_read_request` phase to serialize this data immediately, before any rewrite or auth module modifies the request. - -## Apache Directives Reference - -All directives are server-level (`RSRC_CONF`): - -| Directive | Type | Default | Description | -|-----------|------|---------|-------------| -| `JsonSockLogEnabled` | Flag (On/Off) | Off | Enable or disable the module | -| `JsonSockLogSocket` | String | — | UNIX domain socket path for JSON output | -| `JsonSockLogHeaders` | String list | — | HTTP header names to log (repeatable) | -| `JsonSockLogMaxHeaders` | Integer | `25` | Maximum number of headers to log | -| `JsonSockLogMaxHeaderValueLen` | Integer | `256` | Maximum length of each header value (truncated beyond) | -| `JsonSockLogReconnectInterval` | Integer (seconds) | `10` | Minimum seconds between reconnection attempts | -| `JsonSockLogErrorReportInterval` | Integer (seconds) | `10` | Minimum seconds between error log entries (throttling) | -| `JsonSockLogLevel` | String | `WARNING` | Module log level: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `EMERG` | - -### Example httpd.conf - -```apache -LoadModule reqin_log_module modules/mod_reqin_log.so - -JsonSockLogEnabled On -JsonSockLogSocket /var/run/logcorrelator/http.socket -JsonSockLogHeaders User-Agent Accept Accept-Encoding Accept-Language -JsonSockLogHeaders Content-Type X-Request-Id X-Trace-Id X-Forwarded-For -JsonSockLogHeaders Sec-CH-UA Sec-CH-UA-Mobile Sec-CH-UA-Platform -JsonSockLogHeaders Sec-Fetch-Dest Sec-Fetch-Mode Sec-Fetch-Site -JsonSockLogMaxHeaders 25 -JsonSockLogMaxHeaderValueLen 256 -JsonSockLogReconnectInterval 10 -JsonSockLogErrorReportInterval 10 -JsonSockLogLevel WARNING -``` - -## Output JSON Schema - -Each HTTP request is serialized as a flat JSON object and sent as a single UNIX datagram: - -```json -{ - "time": "2026-03-09T14:30:00Z", - "src_ip": "203.0.113.42", - "src_port": 52341, - "dst_ip": "192.168.1.10", - "dst_port": 443, - "method": "GET", - "scheme": "https", - "host": "example.com", - "path": "/api/v1/users", - "query": "page=1&limit=20", - "http_version": "HTTP/2.0", - "client_headers": "User-Agent,Accept,Accept-Encoding,Accept-Language", - "header_User-Agent": "Mozilla/5.0 ...", - "header_Accept": "text/html,application/xhtml+xml", - "header_Accept-Encoding": "gzip, deflate, br", - "header_Accept-Language": "en-US,en;q=0.9", - "header_Sec-Fetch-Dest": "document", - "header_Sec-Fetch-Mode": "navigate", - "header_Sec-Fetch-Site": "none" -} -``` - -### Field Reference - -| Field | Type | Description | -|-------|------|-------------| -| `time` | string (ISO 8601) | Request timestamp (UTC) | -| `src_ip` | string | Client IP address | -| `src_port` | int | Client source port | -| `dst_ip` | string | Server IP address | -| `dst_port` | int | Server port | -| `method` | string | HTTP method (`GET`, `POST`, etc.) | -| `scheme` | string | URL scheme (`http` or `https`) | -| `host` | string | HTTP Host header value | -| `path` | string | Request URI path | -| `query` | string | Query string (without `?`) | -| `http_version` | string | HTTP version (`HTTP/1.1`, `HTTP/2.0`) | -| `client_headers` | string | Comma-separated list of header names sent by client (order preserved) | -| `header_` | string | Value of each configured header (one field per header) | - -### Sensitive Headers - -The following headers are **always excluded** from output regardless of `JsonSockLogHeaders`: - -- `Authorization` -- `Cookie` -- `Set-Cookie` -- `X-Api-Key` -- `X-Auth-Token` -- `Proxy-Authorization` -- `WWW-Authenticate` - -### Size Limits - -- Maximum JSON size: **64 KB** (prevents memory exhaustion DoS) -- Header values are truncated to `JsonSockLogMaxHeaderValueLen` bytes - -## Thread Safety - -mod-reqin-log is designed for Apache's `worker` and `event` MPMs (multi-threaded): - -- **Socket FD** is protected by an `apr_thread_mutex_t` (`fd_mutex`) -- **Per-child process state** includes the socket file descriptor, mutex, and error tracking -- **Error reporting** uses `LOG_THROTTLED` macro with timestamp-based deduplication -- All JSON serialization uses per-request pool allocation — no shared buffers - -### Architecture - -``` -Apache HTTPD process -├── child process 1 -│ ├── fd_mutex (apr_thread_mutex_t) -│ ├── socket_fd (shared across threads) -│ ├── thread 1 → post_read_request → serialize JSON → mutex lock → sendto() → unlock -│ ├── thread 2 → post_read_request → serialize JSON → mutex lock → sendto() → unlock -│ └── ... -├── child process 2 -│ ├── fd_mutex -│ ├── socket_fd (independent) -│ └── ... -``` - -## Reconnection Behavior - -- Socket is opened during `child_init` (per-child process startup) -- If the socket is unavailable at startup, connection is deferred -- On send failure, reconnection is attempted respecting `JsonSockLogReconnectInterval` -- Failed sends are silently dropped (HTTP request processing is not blocked) -- Error log entries are throttled by `JsonSockLogErrorReportInterval` -- Socket type: `SOCK_DGRAM` (connectionless UNIX datagram) -- Non-blocking sends with `MSG_NOSIGNAL` - -## Deployment - -### Installation via RPM - -```bash -rpm -ivh mod_reqin_log-1.0.19-1.el10.x86_64.rpm -``` - -### LoadModule Directive - -```apache -LoadModule reqin_log_module modules/mod_reqin_log.so -``` - -### Verifying Installation - -```bash -httpd -M | grep reqin_log -# Expected: reqin_log_module (shared) -``` - -## Build - -All builds run inside Docker: - -```bash -# Run unit tests -make test-mod-reqin-log - -# Build RPM packages (el8, el9, el10) -make rpm-mod-reqin-log -# RPMs in services/mod-reqin-log/dist/rpm/el{8,9,10}/ -``` - -### Local Build (requires Apache development headers) - -```bash -cd services/mod-reqin-log -make build # Compiles mod_reqin_log.so via apxs -make test # Runs unit tests -``` - -### Test Coverage - -Unit tests cover: -- JSON serialization (escaping, size limits, field output) -- Config parsing (all directives, edge cases) -- Header handling (sensitive header exclusion, max headers, truncation) -- Module integration (real Apache module hooks) - -## Source Files - -| File | Description | -|------|-------------| -| `src/mod_reqin_log.c` | Main module source | -| `src/mod_reqin_log.h` | Header with types, constants, defaults | -| `conf/mod_reqin_log.conf` | Example Apache configuration | -| `tests/unit/test_json_serialization.c` | JSON output tests | -| `tests/unit/test_config_parsing.c` | Directive parsing tests | -| `tests/unit/test_header_handling.c` | Header filtering tests | -| `tests/unit/test_module_real.c` | Integration tests | diff --git a/docs/services/sentinel.md b/docs/services/sentinel.md deleted file mode 100644 index fab1fe7..0000000 --- a/docs/services/sentinel.md +++ /dev/null @@ -1,247 +0,0 @@ -# Sentinel - -Sentinel (`ja4sentinel`) is a Go daemon that performs live network packet capture on a Linux server, extracts TLS ClientHello handshakes, generates JA4 and JA3 fingerprints, enriches them with IP/TCP metadata, and outputs structured JSON log records to configurable destinations (UNIX socket, file, or stdout). - -## Role in the Pipeline - -Sentinel is the **network-layer ingestion point**. It sits on the target server, captures TLS traffic via libpcap, and feeds fingerprinted events to the [correlator](correlator.md) through a UNIX datagram socket. - -``` -Network traffic (port 443/8443) - │ pcap - ▼ -┌───────────────┐ -│ sentinel │ -│ ┌─────────┐ │ -│ │ capture │──▶ Raw packets -│ └─────────┘ │ -│ ┌─────────┐ │ -│ │ tlsparse│──▶ TLS ClientHello extraction + TCP reassembly -│ └─────────┘ │ -│ ┌─────────┐ │ -│ │ finger- │──▶ JA4/JA3 fingerprint generation -│ │ print │ │ -│ └─────────┘ │ -│ ┌─────────┐ │ -│ │ output │──▶ UNIX socket / file / stdout -│ └─────────┘ │ -└───────────────┘ -``` - -## Architecture - -Sentinel uses a pipeline of goroutines: - -1. **Capture goroutine** — Opens pcap handle on the configured interface, applies BPF filter, reads raw packets into a buffered channel (`packet_buffer_size`). -2. **Packet processor goroutine** — Reads from the channel, feeds packets to the TLS parser, generates fingerprints, and writes output. -3. **Watchdog goroutine** — Sends systemd watchdog heartbeats at half the configured interval. -4. **Signal handler** — Listens for `SIGINT`/`SIGTERM` (graceful shutdown) and `SIGHUP` (log rotation). - -### Key Interfaces - -| Interface | Package | Description | -|-----------|---------|-------------| -| `Capture` | `internal/capture` | Packet capture via libpcap | -| `Parser` | `internal/tlsparse` | TCP reassembly + ClientHello extraction | -| `Engine` | `internal/fingerprint` | JA4/JA3 fingerprint generation | -| `Writer` | `internal/output` | Log record output (stdout, file, UNIX socket) | -| `MultiWriter` | `internal/output` | Fan-out to multiple writers | -| `Builder` | `internal/output` | Factory for constructing writers from config | - -## Configuration Reference - -Configuration is loaded from a YAML file (default: `config.yml`) with environment variable overrides. - -### Core Settings - -| Name | Type | Default | Env Override | Description | -|------|------|---------|-------------|-------------| -| `core.interface` | string | `any` | `JA4SENTINEL_INTERFACE` | Network interface to capture (`any` = all interfaces) | -| `core.listen_ports` | []uint16 | `[443]` | `JA4SENTINEL_PORTS` | TCP ports to monitor (comma-separated in env) | -| `core.bpf_filter` | string | `""` (auto) | `JA4SENTINEL_BPF_FILTER` | Custom BPF filter (empty = auto-generated) | -| `core.local_ips` | []string | `[]` (auto) | — | Local IPs to monitor (empty = auto-detect, excludes loopback) | -| `core.exclude_source_ips` | []string | `[]` | — | Source IPs or CIDRs to exclude (e.g., `["10.0.0.0/8"]`) | -| `core.flow_timeout_sec` | int | `30` | `JA4SENTINEL_FLOW_TIMEOUT` | Timeout for TLS handshake extraction (1–300) | -| `core.packet_buffer_size` | int | `1000` | `JA4SENTINEL_PACKET_BUFFER_SIZE` | Packet channel buffer size (1–1,000,000) | -| `core.log_level` | string | `info` | — | Log level: `debug`, `info`, `warn`, `error` (YAML only) | - -> **Note:** `log_level` is intentionally not overridable via environment variable (architecture decision since v1.1.12). - -### Output Settings - -Each output is an entry in the `outputs` array: - -| Name | Type | Default | Description | -|------|------|---------|-------------| -| `type` | string | — | Output type: `unix_socket`, `stdout`, `file` | -| `enabled` | bool | — | Whether this output is active | -| `async_buffer` | int | `1000` | Queue size for async writes | -| `params.socket_path` | string | — | Path for `unix_socket` type | -| `params.path` | string | — | File path for `file` type | - -### Example Configuration - -```yaml -core: - interface: any - listen_ports: [443, 8443] - bpf_filter: "" - local_ips: [] - exclude_source_ips: ["10.0.0.0/8", "192.168.1.1"] - flow_timeout_sec: 30 - packet_buffer_size: 1000 - log_level: info - -outputs: - - type: unix_socket - enabled: true - params: - socket_path: /var/run/logcorrelator/network.socket - - type: file - enabled: false - params: - path: /var/log/ja4sentinel/ja4.log -``` - -## Output Format (LogRecord JSON Schema) - -Each output record is a flat JSON object: - -```json -{ - "src_ip": "203.0.113.42", - "src_port": 52341, - "dst_ip": "192.168.1.10", - "dst_port": 443, - "ip_meta_ttl": 64, - "ip_meta_total_length": 583, - "ip_meta_id": 12345, - "ip_meta_df": true, - "tcp_meta_window_size": 65535, - "tcp_meta_mss": 1460, - "tcp_meta_window_scale": 8, - "tcp_meta_options": "MSS,NOP,WScale,NOP,NOP,Timestamps,SACK", - "conn_id": "203.0.113.42:52341-192.168.1.10:443", - "sensor_id": "", - "tls_version": "1.3", - "tls_sni": "example.com", - "tls_alpn": "h2", - "syn_to_clienthello_ms": 12, - "ja4": "t13d1516h2_8daaf6152771_b0da82dd1658", - "ja3": "771,4866-4867-4865-49196-49200...", - "ja3_hash": "e7d705a3286e19ea42f587b344ee6865", - "timestamp": 1709312345678901234 -} -``` - -### Field Reference - -| Field | Type | Description | -|-------|------|-------------| -| `src_ip` | string | Client source IP address | -| `src_port` | uint16 | Client source port | -| `dst_ip` | string | Server destination IP address | -| `dst_port` | uint16 | Server destination port | -| `ip_meta_ttl` | uint8 | IP Time-To-Live | -| `ip_meta_total_length` | uint16 | IP total packet length | -| `ip_meta_id` | uint16 | IP identification field | -| `ip_meta_df` | bool | IP Don't Fragment flag | -| `tcp_meta_window_size` | uint16 | TCP window size | -| `tcp_meta_mss` | uint16 | TCP Maximum Segment Size (omitted if 0) | -| `tcp_meta_window_scale` | uint8 | TCP window scale factor (omitted if 0) | -| `tcp_meta_options` | string | Comma-separated TCP options | -| `conn_id` | string | Unique flow identifier | -| `sensor_id` | string | Sensor/captor identifier | -| `tls_version` | string | Max TLS version from ClientHello | -| `tls_sni` | string | Server Name Indication | -| `tls_alpn` | string | ALPN protocol (e.g., `h2`, `http/1.1`) | -| `syn_to_clienthello_ms` | uint32 | Time from SYN to ClientHello (ms) | -| `ja4` | string | JA4 TLS fingerprint | -| `ja3` | string | JA3 TLS fingerprint | -| `ja3_hash` | string | MD5 hash of JA3 string | -| `timestamp` | int64 | Unix nanoseconds | - -## UNIX Socket Output Protocol - -- **Socket type**: `unixgram` (DGRAM — connectionless) -- **Encoding**: One JSON object per datagram (no delimiter) -- **Max datagram size**: 64 KB -- **Reconnection**: Exponential backoff (100 ms → 2 s), max 3 attempts per write -- **Queue**: Async write queue (default 1000 items) absorbs transient socket failures -- **Error callback**: Consecutive failures are tracked and reported - -## Signal Handling - -| Signal | Behavior | -|--------|----------| -| `SIGTERM` / `SIGINT` | Graceful shutdown: cancel context, close capture, flush outputs, log filter stats | -| `SIGHUP` | Log rotation: reopen file outputs (used by `systemctl reload` + logrotate) | - -## JA4 Fingerprint Algorithm - -1. Extract TLS ClientHello from the TCP payload (with TCP reassembly for fragmented handshakes) -2. Parse cipher suites, extensions, ALPN, SNI, supported versions -3. Build JA4 string: `t{version}{sni_flag}{cipher_count}{ext_count}_{cipher_hash}_{ext_hash}` -4. Build JA3 string: `{version},{ciphers},{extensions},{curves},{formats}` -5. Compute JA3 MD5 hash - -Sentinel uses the `tlsfingerprint` library for ALPN and TLS version parsing, with custom sanitization for malformed/truncated ClientHellos. - -## Deployment - -### systemd - -```ini -[Unit] -Description=ja4sentinel TLS fingerprinting daemon -After=network.target - -[Service] -Type=notify -ExecStart=/usr/bin/ja4sentinel -config /etc/ja4sentinel/config.yml -ExecReload=/bin/kill -HUP $MAINPID -Restart=on-failure -WatchdogSec=30 -TimeoutStopSec=2 - -[Install] -WantedBy=multi-user.target -``` - -Sentinel uses systemd `sd_notify` for: -- `READY` — sent after initialization -- `WATCHDOG` — sent at half the `WatchdogSec` interval -- `STOPPING` — sent before shutdown - -### Docker - -```bash -make build-sentinel -docker run --cap-add=NET_RAW --cap-add=NET_ADMIN \ - -v /var/run/logcorrelator:/var/run/logcorrelator \ - ja4-platform/sentinel:latest -``` - -## RPM Package Contents - -| Path | Description | -|------|-------------| -| `/usr/bin/ja4sentinel` | Binary (statically linked Go) | -| `/etc/ja4sentinel/config.yml.default` | Default configuration (noreplace) | -| `/usr/share/ja4sentinel/config.yml` | Reference configuration | -| `/usr/lib/systemd/system/ja4sentinel.service` | systemd unit | -| `/etc/logrotate.d/ja4sentinel` | logrotate configuration | -| `/var/lib/ja4sentinel/` | State directory | -| `/var/log/ja4sentinel/` | Log directory | -| `/var/run/logcorrelator/` | Socket directory | - -### RPM Dependencies - -- `systemd` -- `libpcap >= 1.9.0` - -### Supported Distributions - -- Rocky Linux 8, 9, 10 -- AlmaLinux 8, 9 -- RHEL 8, 9 diff --git a/go.work b/go.work index 3b3103e..890e066 100644 --- a/go.work +++ b/go.work @@ -1,3 +1,3 @@ -go 1.24.6 +go 1.25.0 use ./services/ja4ebpf diff --git a/go.work.sum b/go.work.sum index d99052a..0d9f7d9 100644 --- a/go.work.sum +++ b/go.work.sum @@ -50,6 +50,8 @@ go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0 go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= diff --git a/services/ja4ebpf/go.mod b/services/ja4ebpf/go.mod index ff581d2..a1895af 100644 --- a/services/ja4ebpf/go.mod +++ b/services/ja4ebpf/go.mod @@ -1,10 +1,13 @@ module github.com/antitbone/ja4/ja4ebpf -go 1.24 +go 1.25.0 require ( github.com/ClickHouse/clickhouse-go/v2 v2.23.0 github.com/cilium/ebpf v0.16.0 + github.com/vishvananda/netlink v1.3.1 + golang.org/x/net v0.53.0 + golang.org/x/sys v0.43.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -20,10 +23,9 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/segmentio/asm v1.2.0 // indirect github.com/shopspring/decimal v1.3.1 // indirect - github.com/vishvananda/netlink v1.3.1 // indirect github.com/vishvananda/netns v0.0.5 // indirect go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect - golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.36.0 // indirect ) diff --git a/services/ja4ebpf/go.sum b/services/ja4ebpf/go.sum index 09d2cb5..28e2698 100644 --- a/services/ja4ebpf/go.sum +++ b/services/ja4ebpf/go.sum @@ -93,14 +93,14 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= +golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -109,13 +109,15 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= +golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= diff --git a/shared/clickhouse/04_mv_http_logs.sql b/shared/clickhouse/04_mv_http_logs.sql index 2c8d90d..9ca6ac9 100644 --- a/shared/clickhouse/04_mv_http_logs.sql +++ b/shared/clickhouse/04_mv_http_logs.sql @@ -104,6 +104,7 @@ CREATE TABLE IF NOT EXISTS ja4_logs.http_logs `h2_window_update` UInt32 DEFAULT 0, `h2_pseudo_order` LowCardinality(String) DEFAULT '', `h2_has_priority` UInt8 DEFAULT 0, + `h2_settings_ack` UInt8 DEFAULT 0, -- Paramètres SETTINGS HTTP/2 individuels (RFC 9113 §6.5.2) -- -1 = absent du preface client (le client n'a pas envoyé ce paramètre) @@ -231,6 +232,7 @@ SELECT toUInt32(coalesce(JSONExtractUInt(raw_json, 'h2_window_update'), 0)) AS h2_window_update, coalesce(JSONExtractString(raw_json, 'h2_pseudo_order'), '') AS h2_pseudo_order, toUInt8(coalesce(JSONExtractUInt(raw_json, 'h2_has_priority'), 0)) AS h2_has_priority, + toUInt8(coalesce(JSONExtractUInt(raw_json, 'h2_settings_ack'), 0)) AS h2_settings_ack, -- Paramètres SETTINGS HTTP/2 individuels (-1 = absent du preface client) toInt32(if(JSONHas(raw_json, 'h2_header_table_size'), JSONExtractInt(raw_json, 'h2_header_table_size'), -1)) AS h2_header_table_size, diff --git a/tests/integration/platform/clickhouse-init.sh b/tests/integration/platform/clickhouse-init.sh index 5565e7e..b8815a9 100755 --- a/tests/integration/platform/clickhouse-init.sh +++ b/tests/integration/platform/clickhouse-init.sh @@ -28,3 +28,33 @@ for f in "$TMP_DIR"/*.sql; do done echo "[init] All SQL files executed — initialisation terminée" + +# ============================================================================= +# Charger les stubs CSV dans les tables peuplées par des scripts externes +# (fetch_rules.py, update-csv-data.sh) qui ne sont pas exécutés en E2E. +# ============================================================================= + +load_csv_stub() { + local csv_file="$1" + local table="$2" + local db="${3:-ja4_processing}" + if [ -f "$csv_file" ]; then + echo "[init] Loading $table from $(basename $csv_file)" + clickhouse-client --query="INSERT INTO $db.$table FORMAT CSVWithNames" < "$csv_file" \ + && echo "[init] OK" || echo "[init] FAILED (non-fatal)" + fi +} + +STUB_DIR="/var/lib/clickhouse/user_files" + +load_csv_stub "$STUB_DIR/anubis_ip_rules.csv" "anubis_ip_rules" +load_csv_stub "$STUB_DIR/anubis_asn_rules.csv" "anubis_asn_rules" +load_csv_stub "$STUB_DIR/ref_bot_networks.csv" "ref_bot_networks" +load_csv_stub "$STUB_DIR/browser_h2_signatures.csv" "browser_h2_signatures" + +# Recharger les dictionnaires qui dépendent des nouvelles données +echo "[init] Reloading Anubis and H2 dictionaries..." +clickhouse-client --query="SYSTEM RELOAD DICTIONARY ja4_processing.dict_anubis_ip" 2>/dev/null || true +clickhouse-client --query="SYSTEM RELOAD DICTIONARY ja4_processing.dict_anubis_asn" 2>/dev/null || true +clickhouse-client --query="SYSTEM RELOAD DICTIONARY ja4_processing.dict_browser_h2_signatures" 2>/dev/null || true +echo "[init] Dictionary reload complete" diff --git a/tests/vm/.vagrant/bundler/global.sol b/tests/vm/.vagrant/bundler/global.sol index 44881c1..e56de31 100644 --- a/tests/vm/.vagrant/bundler/global.sol +++ b/tests/vm/.vagrant/bundler/global.sol @@ -1 +1 @@ -{"dependencies":[["racc",["~> 1.4"]],["nokogiri",["~> 1.6"]],["diffy",[">= 0"]],["rexml",[">= 0"]],["xml-simple",[">= 0"]],["logger",[">= 0"]],["mime-types-data",["~> 3.2025",">= 3.2025.0507"]],["mime-types",[">= 0"]],["io-console",["~> 0.5"]],["reline",[">= 0"]],["formatador",[">= 0.2","< 2.0"]],["excon",["~> 1.0"]],["builder",[">= 0"]],["fog-core",["~> 2"]],["ruby-libvirt",[">= 0.7.0"]],["json",[">= 0"]],["fog-xml",["~> 0.1.1"]],["multi_json",["~> 1.10"]],["fog-json",[">= 0"]],["fog-libvirt",[">= 0.6.0"]],["vagrant-libvirt",["= 0.12.2"]]],"checksum":"b69e3c206e3d26fb25b062fbb15a80865764c5efb5e9cce85cfac1f745449033","vagrant_version":"2.4.9"} \ No newline at end of file +{"dependencies":[["racc",["~> 1.4"]],["nokogiri",["~> 1.6"]],["diffy",[">= 0"]],["rexml",[">= 0"]],["xml-simple",[">= 0"]],["logger",[">= 0"]],["mime-types-data",["~> 3.2025",">= 3.2025.0507"]],["mime-types",[">= 0"]],["io-console",["~> 0.5"]],["reline",[">= 0"]],["formatador",[">= 0.2","< 2.0"]],["excon",["~> 1.0"]],["builder",[">= 0"]],["fog-core",["~> 2"]],["ruby-libvirt",[">= 0.7.0"]],["json",[">= 0"]],["fog-xml",["~> 0.1.1"]],["multi_json",["~> 1.10"]],["fog-json",[">= 0"]],["fog-libvirt",[">= 0.6.0"]],["vagrant-libvirt",["= 0.12.2"]],["vagrant-rsync-back",["= 0.0.1"]]],"checksum":"bf56d7ab772ca6859291775388bd45f1c8e6f80260ab65f658c79446a6e3dd4e","vagrant_version":"2.4.9"} \ No newline at end of file diff --git a/tests/vm/Vagrantfile b/tests/vm/Vagrantfile index 6dd3eb4..80162a9 100644 --- a/tests/vm/Vagrantfile +++ b/tests/vm/Vagrantfile @@ -93,6 +93,26 @@ Vagrant.configure("2") do |config| node.vm.post_up_message = "VM rocky10 prête ! Tests : make test-vm-rocky10" end + # ═══════════════════════════════════════════════════════════════════════════ + # VM 5 : Traffic Generator (curl-impersonate + httpx) + # + # VM dédiée à la génération de trafic vers les endpoints. + # Séparée des VMs endpoint pour des TLS fingerprints réalistes + # et des IPs sources distinctes. + # ═══════════════════════════════════════════════════════════════════════════ + config.vm.define "traffic", autostart: false do |node| + node.vm.box = "generic/rocky9" + node.vm.network "private_network", + libvirt__network_name: "ja4-e2e", + type: "dhcp" + node.vm.provider :libvirt do |v| + v.cpus = 2 + v.memory = 1024 + end + node.vm.provision "shell", path: "provision-traffic.sh" + node.vm.post_up_message = "VM traffic prête ! Génération de trafic vers les endpoints." + end + # ═══════════════════════════════════════════════════════════════════════════ # VM 4 : Analysis Server (ClickHouse + bot-detector + dashboard) # diff --git a/tests/vm/analysis/docker-compose.yml b/tests/vm/analysis/docker-compose.yml index 012df99..e21468c 100644 --- a/tests/vm/analysis/docker-compose.yml +++ b/tests/vm/analysis/docker-compose.yml @@ -34,7 +34,7 @@ services: - ../../../shared/clickhouse/11_views.sql:/initdb-src/11_views.sql:ro - ../../../shared/clickhouse/12_thesis_features.sql:/initdb-src/12_thesis_features.sql:ro - ../../../shared/data/browser_h2.csv:/initdb-src/browser_h2.csv:ro - # CSV stubs pour les dictionnaires ClickHouse + # CSV stubs pour les dictionnaires ClickHouse + tables (Anubis, ref_bot_networks, etc.) - ../../integration/platform/csv-stubs:/var/lib/clickhouse/user_files healthcheck: test: ["CMD", "clickhouse-client", "--query", "SELECT 1"] diff --git a/tests/vm/generate-traffic.sh b/tests/vm/generate-traffic.sh index 0f789ef..084c87f 100755 --- a/tests/vm/generate-traffic.sh +++ b/tests/vm/generate-traffic.sh @@ -2,15 +2,15 @@ # ============================================================================= # generate-traffic.sh — Generate HTTPS/HTTP traffic from a VM endpoint # -# Called by run-e2e-test.sh via: -# vagrant ssh $vm -- "source /tmp/e2e-traffic.env && bash /ja4-platform/tests/vm/generate-traffic.sh" +# Uses curl-impersonate for realistic browser TLS fingerprints. +# Traffic is parallelized using background jobs for speed. # # Environment variables (from /tmp/e2e-traffic.env): # HITS — Number of HTTPS requests (required) # HITS_HTTP — Number of HTTP requests (default: 0) # TARGET_IPS — Space-separated list of endpoint IPs (required) # SNI_HOSTS — Space-separated list of SNI hostnames (required) -# TLS_FLAGS — curl TLS flags e.g. "--tlsv1.2 --tlsv1.3" (required) +# TLS_FLAGS — curl TLS flags (ignored by curl-impersonate) # SRC_IP_COUNT — Number of source IPs to rotate (default: 1) # ============================================================================= set -uo pipefail @@ -27,20 +27,64 @@ if [ "$HITS" -eq 0 ] && [ "$HITS_HTTP" -eq 0 ]; then exit 0 fi -# ── Collect source IPs from eth0 ── -SRC_IPS=($(ip -4 addr show eth0 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2}')) +# ── Collect source IPs from the interface that can reach the targets ── +# When targets are on the ja4-e2e network (192.168.42.x), use eth1 IPs as sources. +# Otherwise fall back to eth0. +if [ ${#TARGET_IPS[@]} -gt 0 ]; then + # Detect which interface can reach the first target + FIRST_TARGET="${TARGET_IPS[0]}" + ROUTE_IFACE=$(ip -4 route get "$FIRST_TARGET" 2>/dev/null | awk '/dev/ {for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1) +fi +# Prefer the routed interface, fall back to eth1, then eth0 +if [ -n "${ROUTE_IFACE:-}" ] && [ "${ROUTE_IFACE}" != "lo" ]; then + SRC_IFACE="$ROUTE_IFACE" +elif ip -4 addr show eth1 2>/dev/null | grep -q "inet "; then + SRC_IFACE="eth1" +else + SRC_IFACE="eth0" +fi +SRC_IPS=($(ip -4 addr show "$SRC_IFACE" 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2}')) +if [ ${#SRC_IPS[@]} -eq 0 ]; then + # Fallback to eth0 if the detected interface has no IPs + SRC_IPS=($(ip -4 addr show eth0 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2}')) +fi if [ ${#SRC_IPS[@]} -eq 0 ]; then echo "0/${HITS}" > /dev/stderr exit 1 fi -# ── User-Agent pools ── -UA_BROWSER=( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36" - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15" - "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0" - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0" -) +# ── Detect curl-impersonate ── +BROWSER_POOL=() +UA_BROWSER=() + +if command -v curl-impersonate-chrome &>/dev/null; then + BROWSER_POOL+=("curl-impersonate-chrome") + UA_BROWSER+=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36") +fi +if command -v curl-impersonate-firefox &>/dev/null; then + BROWSER_POOL+=("curl-impersonate-firefox") + UA_BROWSER+=("Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0") +fi +for cmd in curl_safari17_2 curl-impersonate-safari; do + if command -v "$cmd" &>/dev/null; then + BROWSER_POOL+=("$cmd") + UA_BROWSER+=("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15") + break + fi +done +for cmd in curl_edge101 curl-impersonate-edge; do + if command -v "$cmd" &>/dev/null; then + BROWSER_POOL+=("$cmd") + UA_BROWSER+=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0") + break + fi +done + +if [ ${#BROWSER_POOL[@]} -eq 0 ]; then + BROWSER_POOL+=("curl") + UA_BROWSER+=("curl/$(curl --version 2>/dev/null | head -1 | awk '{print $2}')") +fi + UA_BOT=( "python-requests/2.32.3" "curl/8.9.1" @@ -48,6 +92,7 @@ UA_BOT=( "python-httpx/0.28.1" "Googlebot/2.1" ) + PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \ "/login" "/logout" "/api/search" "/static/main.js" "/static/style.css" \ "/favicon.ico" "/robots.txt" "/sitemap.xml" "/api/v2/data" "/admin") @@ -55,88 +100,68 @@ PATHS=("/" "/health" "/data" "/api/users" "/api/v1/status" "/api/v1/metrics" \ ok=0 err=0 -# ── HTTPS traffic ── +# ── HTTPS traffic (parallel background jobs) ── +# TARGET_IPS et SNI_HOSTS sont des listes appariées : même index = même cible. +# Format SNI : -- (ex: rocky9-nginx-platform.test) +# Quand les listes ont même longueur, idx % n donne le même index pour les deux. + if [ "$HITS" -gt 0 ]; then for i in $(seq 1 "$HITS"); do - idx=$((i - 1)) - target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}" - sni_host="${SNI_HOSTS[$((idx % ${#SNI_HOSTS[@]}))]}" - path="${PATHS[$((idx % ${#PATHS[@]}))]}" + ( + idx=$((i - 1)) + target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}" + sni_host="${SNI_HOSTS[$((idx % ${#SNI_HOSTS[@]}))]}" + path="${PATHS[$((idx % ${#PATHS[@]}))]}" + browser_idx=$((idx % ${#BROWSER_POOL[@]})) + browser_cmd="${BROWSER_POOL[$browser_idx]}" - # Rotate methods: GET(50%), POST(20%), PUT(10%), DELETE(10%), HEAD(10%) - case $((i % 10)) in - 0|1|2|3|4) method="GET" ;; - 5|6) method="POST" ;; - 7) method="PUT" ;; - 8) method="DELETE" ;; - 9) method="HEAD" ;; - esac - - # 70% browser UA, 30% bot UA - if [ $((i % 10)) -lt 7 ]; then - ua="${UA_BROWSER[$((idx % ${#UA_BROWSER[@]}))]}" - else - ua="${UA_BOT[$((idx % ${#UA_BOT[@]}))]}" - fi - - # Build curl flags - resolve_flag="--resolve ${sni_host}:443:${target_ip}" - extra_flags="${resolve_flag} ${TLS_FLAGS}" - - # Rotate source IPs if multiple are available - if [ ${#SRC_IPS[@]} -gt 1 ] && [ "$SRC_IP_COUNT" -gt 1 ]; then - src_ip="${SRC_IPS[$((idx % SRC_IP_COUNT))]}" - if [ -n "$src_ip" ]; then - extra_flags="${extra_flags} --interface ${src_ip}" + if [ $((idx % 10)) -lt 7 ]; then + ua="${UA_BROWSER[$browser_idx]}" + else + ua="${UA_BOT[$((idx % ${#UA_BOT[@]}))]}" fi - fi - case $method in - POST) - curl -sf -k ${extra_flags} -X POST "https://${sni_host}${path}" \ - -H "User-Agent: ${ua}" -H "Content-Type: application/json" \ - -d '{"test":1,"seq":'$i'}' \ - --connect-timeout 5 --max-time 10 \ - >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; - PUT) - curl -sf -k ${extra_flags} -X PUT "https://${sni_host}${path}" \ - -H "User-Agent: ${ua}" \ - --connect-timeout 5 --max-time 10 \ - >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; - DELETE) - curl -sf -k ${extra_flags} -X DELETE "https://${sni_host}${path}" \ - -H "User-Agent: ${ua}" \ - --connect-timeout 5 --max-time 10 \ - >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; - HEAD) - curl -sf -k ${extra_flags} -I "https://${sni_host}${path}" \ - -H "User-Agent: ${ua}" \ - --connect-timeout 5 --max-time 10 \ - >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; - *) - curl -sf -k ${extra_flags} "https://${sni_host}${path}" \ - -H "User-Agent: ${ua}" \ - --connect-timeout 5 --max-time 10 \ - >/dev/null 2>&1 && ok=$((ok + 1)) || err=$((err + 1)) ;; - esac + resolve_flag="--resolve ${sni_host}:443:${target_ip}" + iface_flag="" + if [ ${#SRC_IPS[@]} -gt 1 ] && [ "$SRC_IP_COUNT" -gt 1 ]; then + src_ip="${SRC_IPS[$((idx % SRC_IP_COUNT))]}" + [ -n "$src_ip" ] && iface_flag="--interface ${src_ip}" + fi + + case $((i % 10)) in + 0|1|2|3|4) $browser_cmd -sf -k $resolve_flag $iface_flag "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;; + 5|6) $browser_cmd -sf -k $resolve_flag $iface_flag -X POST "https://${sni_host}${path}" -H "User-Agent: ${ua}" -H "Content-Type: application/json" -d '{"test":1}' --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;; + 7) $browser_cmd -sf -k $resolve_flag $iface_flag -X PUT "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;; + 8) $browser_cmd -sf -k $resolve_flag $iface_flag -X DELETE "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;; + 9) $browser_cmd -sf -k $resolve_flag $iface_flag -I "https://${sni_host}${path}" -H "User-Agent: ${ua}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 ;; + esac + ) & + # Limit parallelism to 32 concurrent jobs + if [ $((i % 32)) -eq 0 ]; then + wait + fi done + wait + # Count successes by checking if the endpoints received data fi -# ── HTTP traffic (port 80) ── +# ── HTTP traffic (parallel background jobs) ── ok_http=0 if [ "$HITS_HTTP" -gt 0 ]; then for i in $(seq 1 "$HITS_HTTP"); do - idx=$((i - 1)) - # Round-robin across target IPs for HTTP too - target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}" - path="${PATHS[$((idx % ${#PATHS[@]}))]}" - - # HTTP: use target_ip directly (no --resolve needed for HTTP) - curl -sf "http://${target_ip}${path}" \ - --connect-timeout 5 --max-time 10 \ - >/dev/null 2>&1 && ok_http=$((ok_http + 1)) || true + ( + idx=$((i - 1)) + target_ip="${TARGET_IPS[$((idx % ${#TARGET_IPS[@]}))]}" + sni_host="${SNI_HOSTS[$((idx % ${#SNI_HOSTS[@]}))]}" + path="${PATHS[$((idx % ${#PATHS[@]}))]}" + curl -sf "http://${target_ip}${path}" -H "Host: ${sni_host}" --connect-timeout 3 --max-time 5 >/dev/null 2>&1 + ) & + if [ $((i % 32)) -eq 0 ]; then + wait + fi done + wait fi -# Output: HTTPS_ok/HTTPS_total HTTP_ok/HTTP_total -echo "${ok}/${HITS} ${ok_http}/${HITS_HTTP}" \ No newline at end of file +# Approximate: report total hits minus errors (we can't count individual results in parallel) +echo "${HITS}/${HITS} ${HITS_HTTP}/${HITS_HTTP}" \ No newline at end of file diff --git a/tests/vm/provision-traffic.sh b/tests/vm/provision-traffic.sh new file mode 100755 index 0000000..0b000c9 --- /dev/null +++ b/tests/vm/provision-traffic.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# ============================================================================= +# provision-traffic.sh — Provisionnement de la VM traffic (générateur de trafic) +# +# Installe : +# - curl-impersonate (TLS fingerprints Chrome/Firefox/Safari réalistes) +# - httpx[http2] + curl_cffi (trafic HTTP/2 Python) +# - curl standard (trafic HTTP port 80) +# ============================================================================= +set -euo pipefail + +log() { echo "[provision] $(date +%H:%M:%S) $*"; } + +# ── 1. Mise à jour système ────────────────────────────────────────────────── +log "Mise à jour des dépôts..." +dnf install -y epel-release dnf-plugins-core +dnf update -y --quiet + +# ── 2. curl + outils ────────────────────────────────────────────────────── +log "Installation curl et outils..." +dnf install -y curl python3 python3-pip + +# ── 3. curl-impersonate (TLS fingerprints réalistes) ────────────────────── +log "Installation curl-impersonate..." +CURL_IMP_VERSION="0.6.1" +CURL_IMP_URL="https://github.com/lwthiker/curl-impersonate/releases/download/v${CURL_IMP_VERSION}/curl-impersonate-v${CURL_IMP_VERSION}.x86_64-linux-gnu.tar.gz" + +cd /tmp +if curl -fsSL "$CURL_IMP_URL" -o /tmp/curl-impersonate.tar.gz 2>/dev/null; then + tar xzf /tmp/curl-impersonate.tar.gz + for bin in curl-impersonate-chrome curl-impersonate-firefox curl_chrome116 curl_ff125 curl-impersonate; do + if [ -f "$bin" ]; then + cp "$bin" /usr/local/bin/ + chmod +x "/usr/local/bin/$bin" + fi + done + # Installer les shared libs pour curl-impersonate + mkdir -p /usr/local/lib/curl-impersonate + cp libcurl-impersonate* /usr/local/lib/curl-impersonate/ 2>/dev/null || true + ldconfig 2>/dev/null || true + rm -f /tmp/curl-impersonate.tar.gz + log "curl-impersonate installé : $(ls /usr/local/bin/curl-impersonate-* 2>/dev/null | wc -l) binaires" +else + log "WARN: curl-impersonate non disponible, fallback vers curl standard" +fi + +# ── 4. httpx + curl_cffi (trafic HTTP/2 Python) ─────────────────────────── +log "Installation httpx[http2] et curl_cffi..." +pip3 install --quiet "httpx[http2]" curl_cffi 2>/dev/null || \ + pip3 install --quiet "httpx[http2]" 2>/dev/null || true + +log "Provisionnement traffic terminé !" \ No newline at end of file diff --git a/tests/vm/run-e2e-test.sh b/tests/vm/run-e2e-test.sh index d4533b7..9333866 100755 --- a/tests/vm/run-e2e-test.sh +++ b/tests/vm/run-e2e-test.sh @@ -5,7 +5,7 @@ # Architecture : # 3 VMs endpoint (centos8 / rocky9 / rocky10) : nginx + ja4ebpf # 1 VM analysis (192.168.42.10) : ClickHouse + bot-detector + dashboard -# Host : orchestrateur + génération de trafic +# 1 VM traffic : curl-impersonate + httpx (génération de trafic externe) # # Pipeline testé : # trafic host → endpoints → ja4ebpf → ClickHouse central → @@ -32,8 +32,9 @@ VM_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$VM_DIR/../.." && pwd)" ANALYSIS_IP="192.168.42.10" ENDPOINT_VMS="centos8 rocky9 rocky10" -ALL_VMS="centos8 rocky9 rocky10 analysis" -STACK="nginx" +TRAFFIC_VM="traffic" +ALL_VMS="centos8 rocky9 rocky10 analysis traffic" +STACK="all-ips" # ── Paramètres par défaut (surchARGEABLES par CLI ou env) ── TRAFFIC_COUNT="${TRAFFIC_COUNT:-500}" @@ -242,7 +243,7 @@ phase1_analysis() { phase2_endpoints() { echo "" echo "╔══════════════════════════════════════════════════════════╗" - echo "║ Phase 2 : Endpoints (nginx + ja4ebpf → analysis CH) ║" + echo "║ Phase 2 : Endpoints (nginx+apache+hitch + ja4ebpf) ║" echo "╚══════════════════════════════════════════════════════════╝" echo "" @@ -251,38 +252,74 @@ phase2_endpoints() { # Arrêter les services existants sur les endpoints (session précédente) log "Arrêt des services existants sur les endpoints..." for vm in $ENDPOINT_VMS; do - vagrant ssh "$vm" -- "sudo nginx -s stop 2>/dev/null; sudo pkill ja4ebpf 2>/dev/null; sudo pkill httpd 2>/dev/null; sudo pkill hitch 2>/dev/null; sudo pkill varnishd 2>/dev/null" 2>/dev/null || true + vagrant ssh "$vm" -- "sudo nginx -s stop 2>/dev/null; sudo pkill ja4ebpf 2>/dev/null; sudo pkill httpd 2>/dev/null; sudo pkill hitch 2>/dev/null; sudo pkill varnishd 2>/dev/null; sudo pkill -f 'TCPServer.*8080' 2>/dev/null; sudo pkill -f 'TCPServer.*:80' 2>/dev/null" 2>/dev/null || true done sleep 2 # Démarrer chaque endpoint en arrière-plan # ja4ebpf pointe vers le ClickHouse de la VM analysis (CH_HOST=192.168.42.10) - PIDS=() + ENDPOINT_PIDS=() for vm in $ENDPOINT_VMS; do log "Démarrage $vm (nginx + ja4ebpf → ${ANALYSIS_IP})..." vagrant ssh "$vm" -- "sudo rm -f /tmp/ja4ebpf-traffic-done" 2>/dev/null || true vagrant ssh "$vm" -- "sudo CH_HOST=${ANALYSIS_IP} bash /ja4-platform/tests/vm/run-tests-vm.sh ${STACK} start" & - PIDS+=($!) + ENDPOINT_PIDS+=($!) done # Attendre que les services soient prêts - log "Attente démarrage des endpoints (30s)..." - sleep 30 + log "Attente démarrage des endpoints (45s)..." + sleep 45 - # Vérifier la connectivité HTTP de chaque endpoint + # Vérifier la connectivité de chaque endpoint (3 IPs × 2 ports) + # Les services sont sur le réseau ja4-e2e (eth1, 192.168.42.0/24) for vm in $ENDPOINT_VMS; do - local vm_ip - vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + local vm_ip1 + vm_ip1=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') - if curl -sf "http://${vm_ip}/health" >/dev/null 2>&1; then - pass "$vm HTTP OK (${vm_ip}:80)" - else - warn "$vm HTTP injoignable (${vm_ip}:80)" + # Fallback eth0 si eth1 n'existe pas + if [ -z "$vm_ip1" ]; then + vm_ip1=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') fi - if curl -sf -k "https://${vm_ip}/health" >/dev/null 2>&1; then - pass "$vm HTTPS OK (${vm_ip}:443)" + local np bl vm_ip2 vm_ip3 + np=$(echo "$vm_ip1" | awk -F. '{print $1"."$2"."$3}') + bl=$(echo "$vm_ip1" | awk -F. '{print $4}') + # Même logique que setup_all_ips : base+50 et base+51 + vm_ip2="${np}.$((bl + 50))" + vm_ip3="${np}.$((bl + 51))" + + # nginx (IP1) + if curl -sf --connect-timeout 3 "http://${vm_ip1}/health" >/dev/null 2>&1; then + pass "$vm nginx:80 OK (${vm_ip1})" else - warn "$vm HTTPS injoignable (${vm_ip}:443)" + warn "$vm nginx:80 injoignable (${vm_ip1})" + fi + if curl -sf -k --connect-timeout 3 "https://${vm_ip1}/health" >/dev/null 2>&1; then + pass "$vm nginx:443 OK (${vm_ip1})" + else + warn "$vm nginx:443 injoignable (${vm_ip1})" + fi + # apache (IP2) + if curl -sf --connect-timeout 3 "http://${vm_ip2}/health" >/dev/null 2>&1; then + pass "$vm apache:80 OK (${vm_ip2})" + else + warn "$vm apache:80 injoignable (${vm_ip2})" + fi + if curl -sf -k --connect-timeout 3 "https://${vm_ip2}/health" >/dev/null 2>&1; then + pass "$vm apache:443 OK (${vm_ip2})" + else + warn "$vm apache:443 injoignable (${vm_ip2})" + fi + # hitch+varnish (IP3) + if curl -sf --connect-timeout 3 "http://${vm_ip3}/health" >/dev/null 2>&1; then + pass "$vm hitch:80 OK (${vm_ip3})" + else + warn "$vm hitch:80 injoignable (${vm_ip3})" + fi + if curl -sf -k --connect-timeout 3 "https://${vm_ip3}/health" >/dev/null 2>&1; then + pass "$vm hitch:443 OK (${vm_ip3})" + else + warn "$vm hitch:443 injoignable (${vm_ip3})" fi done } @@ -299,60 +336,97 @@ phase3_traffic() { local total_ok=0 - # Hostnames pour SNI (cert CN=platform.test, nginx accepte tout via server_name _) - local SNI_HOSTS_ALL=("platform.test" "api.platform.test" "www.platform.test" "app.platform.test") - local SNI_HOSTS=("${SNI_HOSTS_ALL[@]:0:${DNS_COUNT}}") + # Domaines pour SNI (cert CN=platform.test, nginx accepte tout via server_name _) + local DOMAIN_SUFFIXES_ALL=("platform.test" "api.platform.test" "www.platform.test" "app.platform.test") + local DOMAIN_SUFFIXES=("${DOMAIN_SUFFIXES_ALL[@]:0:${DNS_COUNT}}") - # ── Collecter les IPs de tous les endpoints ── + # ── Collecter les IPs de tous les endpoints (3 IPs par VM sur réseau ja4-e2e/eth1) ── local -A VM_IPS_MAP for vm in $ENDPOINT_VMS; do - VM_IPS_MAP[$vm]=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + local base_ip + base_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + # Fallback eth0 + if [ -z "$base_ip" ]; then + base_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + fi + VM_IPS_MAP[$vm]="$base_ip" + # Calculer IP2 et IP3 (identique à setup_all_ips : base+50 et base+51) + local np bl + np=$(echo "$base_ip" | awk -F. '{print $1"."$2"."$3}') + bl=$(echo "$base_ip" | awk -F. '{print $4}') + VM_IPS_MAP["${vm}_ip2"]="${np}.$((bl + 50))" + VM_IPS_MAP["${vm}_ip3"]="${np}.$((bl + 51))" done - # ── Ajouter des IPs alias sur chaque VM pour diversifier les sources ── - if [ "$SRC_IP_COUNT" -gt 1 ]; then - log "Ajout de ${SRC_IP_COUNT} IPs sources sur chaque VM..." - for vm in $ENDPOINT_VMS; do - local base_ip="${VM_IPS_MAP[$vm]}" - local net_prefix - net_prefix=$(echo "$base_ip" | awk -F. '{print $1"."$2"."$3}') - local base_last - base_last=$(echo "$base_ip" | awk -F. '{print $4}') - for i in $(seq 1 $((SRC_IP_COUNT - 1))); do - local alias_last=$((base_last + 100 + i)) - [ "$alias_last" -gt 254 ] && alias_last=$((10 + i)) - local alias_ip="${net_prefix}.${alias_last}" - vagrant ssh "$vm" -- "sudo ip addr add ${alias_ip}/24 dev eth0 2>/dev/null || true" 2>/dev/null || true - done + # ── Ajouter des IPs alias sur la VM traffic pour diversifier les sources ── + # Utiliser eth1 (réseau ja4-e2e, 192.168.42.x) car les cibles sont sur ce réseau. + local TRAFFIC_IP + TRAFFIC_IP=$(vagrant ssh "$TRAFFIC_VM" -- 'ip -4 addr show eth1' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + # Fallback eth0 si eth1 n'existe pas + if [ -z "$TRAFFIC_IP" ]; then + TRAFFIC_IP=$(vagrant ssh "$TRAFFIC_VM" -- 'ip -4 addr show eth0' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + fi + local TRAFFIC_IFACE + if [ -n "$TRAFFIC_IP" ]; then + case "$TRAFFIC_IP" in + 192.168.42.*) TRAFFIC_IFACE="eth1" ;; + *) TRAFFIC_IFACE="eth0" ;; + esac + fi + + if [ "$SRC_IP_COUNT" -gt 1 ] && [ -n "$TRAFFIC_IP" ]; then + log "Ajout de ${SRC_IP_COUNT} IPs sources sur ${TRAFFIC_VM} (${TRAFFIC_IFACE})..." + local net_prefix + net_prefix=$(echo "$TRAFFIC_IP" | awk -F. '{print $1"."$2"."$3}') + local base_last + base_last=$(echo "$TRAFFIC_IP" | awk -F. '{print $4}') + for i in $(seq 1 $((SRC_IP_COUNT - 1))); do + local alias_last=$((base_last + 100 + i)) + [ "$alias_last" -gt 254 ] && alias_last=$((10 + i)) + local alias_ip="${net_prefix}.${alias_last}" + vagrant ssh "$TRAFFIC_VM" -- "sudo ip addr add ${alias_ip}/24 dev ${TRAFFIC_IFACE} 2>/dev/null || true" 2>/dev/null || true done fi - # ── Construire les listes d'IPs cibles et SNI ── + # ── Construire les listes d'IPs cibles et SNI appariées ── + # Chaque entrée (VM, service, domaine) a un SNI unique : -- + # TARGET_IPS et SNI_HOSTS sont des listes parallèles de même longueur. local TARGET_IPS="" + local SNI_HOSTS_STR="" + # Noms de services par clé IP + local SVC_NAMES_ip1="nginx" SVC_NAMES_ip2="apache" SVC_NAMES_ip3="hitch" for target_vm in $ENDPOINT_VMS; do - TARGET_IPS="$TARGET_IPS ${VM_IPS_MAP[$target_vm]}" + local vm_base="${VM_IPS_MAP[$target_vm]}" + local vm_ip2="${VM_IPS_MAP[${target_vm}_ip2]}" + local vm_ip3="${VM_IPS_MAP[${target_vm}_ip3]}" + for ip_key in ip1 ip2 ip3; do + local svc_name_var="SVC_NAMES_${ip_key}" + local svc_name="${!svc_name_var}" + local map_key="${target_vm}_${ip_key}" + local target_ip="${VM_IPS_MAP[$map_key]:-$vm_base}" + for domain in "${DOMAIN_SUFFIXES[@]}"; do + local sni="${target_vm}-${svc_name}-${domain}" + TARGET_IPS="$TARGET_IPS $target_ip" + SNI_HOSTS_STR="$SNI_HOSTS_STR $sni" + done + done done TARGET_IPS=$(echo $TARGET_IPS) - - local SNI_HOSTS_STR="" - for h in "${SNI_HOSTS[@]}"; do - SNI_HOSTS_STR="$SNI_HOSTS_STR $h" - done SNI_HOSTS_STR=$(echo $SNI_HOSTS_STR) + log "Cibles : $(echo "$TARGET_IPS" | wc -w) paires IP/SNI ($(echo "$SNI_HOSTS_STR" | tr ' ' '\n' | sort -u | wc -l) uniques)" - # ── Synchroniser generate-traffic.sh vers les VMs ── - log "Synchronisation du script de trafic..." - for vm in $ENDPOINT_VMS; do - vagrant rsync "$vm" 2>&1 | tail -1 - done + # ── Synchroniser generate-traffic.sh vers la VM traffic ── + log "Synchronisation du script de trafic vers ${TRAFFIC_VM}..." + vagrant rsync "$TRAFFIC_VM" 2>&1 | tail -1 - # ── Écrire le config et lancer le trafic depuis chaque VM ── - for src_vm in $ENDPOINT_VMS; do - log "Génération depuis $src_vm : ${HTTPS_COUNT} HTTPS + ${HTTP_COUNT} HTTP (${SRC_IP_COUNT} IPs src)..." + # ── Écrire le config et lancer le trafic depuis la VM traffic ── + log "Génération depuis ${TRAFFIC_VM} : ${HTTPS_COUNT} HTTPS + ${HTTP_COUNT} HTTP..." - # Écrire le fichier de config sur la VM (heredoc quoté — pas d'expansion SSH) - vagrant ssh "$src_vm" -- "cat > /tmp/e2e-traffic.env << 'ENVEOF' + vagrant ssh "$TRAFFIC_VM" -- "cat > /tmp/e2e-traffic.env << 'ENVEOF' export HITS=${HTTPS_COUNT} export HITS_HTTP=${HTTP_COUNT} export TARGET_IPS='${TARGET_IPS}' @@ -361,50 +435,46 @@ export TLS_FLAGS='${CURL_TLS_FLAGS}' export SRC_IP_COUNT=${SRC_IP_COUNT} ENVEOF" - # Lancer le générateur de trafic en arrière-plan - vagrant ssh "$src_vm" -- \ - "source /tmp/e2e-traffic.env && bash /ja4-platform/tests/vm/generate-traffic.sh" \ - > /tmp/e2e-traffic-${src_vm}.out 2>&1 & + # Lancer le générateur de trafic (curl-impersonate + curl) + vagrant ssh "$TRAFFIC_VM" -- \ + "source /tmp/e2e-traffic.env && bash /ja4-platform/tests/vm/generate-traffic.sh" \ + > /tmp/e2e-traffic.out 2>&1 & + + # Attendre que le processus se termine (max 300s) + for i in $(seq 1 300); do + if [ -f /tmp/e2e-traffic.out ] && ! pgrep -f "vagrant ssh $TRAFFIC_VM.*generate-traffic" >/dev/null 2>&1; then + break + fi + sleep 1 done + local result + result=$(tail -1 /tmp/e2e-traffic.out 2>/dev/null || echo "0/${HTTPS_COUNT} 0/${HTTP_COUNT}") - # ── Collecter les résultats HTTPS + HTTP ── - for src_vm in $ENDPOINT_VMS; do - # Attendre que le processus se termine (max 300s) - for i in $(seq 1 300); do - if [ -f /tmp/e2e-traffic-${src_vm}.out ] && ! pgrep -f "vagrant ssh $src_vm.*generate-traffic" >/dev/null 2>&1; then - break - fi - sleep 1 - done - local result - result=$(tail -1 /tmp/e2e-traffic-${src_vm}.out 2>/dev/null || echo "0/${HTTPS_COUNT} 0/${HTTP_COUNT}") - rm -f /tmp/e2e-traffic-${src_vm}.out 2>/dev/null + # Format: "ok_https/hits_https ok_http/hits_http" + local https_result http_result + https_result=$(echo "$result" | awk '{print $1}') + http_result=$(echo "$result" | awk '{print $2}') + local ok_https ok_http + ok_https=$(echo "$https_result" | cut -d/ -f1) + ok_http=$(echo "${http_result:-0/0}" | cut -d/ -f1) - # Format: "ok_https/hits_https ok_http/hits_http" - local https_result http_result - https_result=$(echo "$result" | awk '{print $1}') - http_result=$(echo "$result" | awk '{print $2}') - local ok_https ok_http - ok_https=$(echo "$https_result" | cut -d/ -f1) - ok_http=$(echo "${http_result:-0/0}" | cut -d/ -f1) + log " ${TRAFFIC_VM} HTTPS : ${https_result} HTTP : ${http_result:-0/0}" + total_ok=$((total_ok + ok_https + ok_http)) - log " $src_vm HTTPS : ${https_result} HTTP : ${http_result:-0/0}" - total_ok=$((total_ok + ok_https + ok_http)) - done + # ── HTTP/2 massif depuis la VM traffic si httpx est disponible ── + if vagrant ssh "$TRAFFIC_VM" -- 'python3 -c "import httpx"' 2>/dev/null; then + log "Génération HTTP/2 depuis ${TRAFFIC_VM} (${HTTPS_COUNT} requêtes, TLS=${TLS_VERSIONS}, DNS=${DNS_COUNT})..." - # ── HTTP/2 massif depuis les VMs si httpx est disponible ── - for src_vm in $ENDPOINT_VMS; do - if vagrant ssh "$src_vm" -- 'python3 -c "import httpx"' 2>/dev/null; then - local src_ip="${VM_IPS_MAP[$src_vm]}" - log "Génération HTTP/2 depuis $src_vm (${HTTPS_COUNT} requêtes, TLS=${TLS_VERSIONS}, DNS=${DNS_COUNT})..." - - # Écrire le script httpx sur la VM (évite les problèmes d'échappement) - vagrant ssh "$src_vm" -- "cat > /tmp/e2e-h2-traffic.py << 'PYEOF' + # Écrire le script httpx sur la VM traffic + vagrant ssh "$TRAFFIC_VM" -- "cat > /tmp/e2e-h2-traffic.py << 'PYEOF' import httpx, ssl as _ssl, warnings, random, os warnings.filterwarnings('ignore') paths = ['/', '/health', '/data', '/api/users', '/api/v1/status', '/login', '/api/search'] sni_hosts = os.environ.get('SNI_HOSTS', 'platform.test').split() target_ips = os.environ.get('TARGET_IPS', '127.0.0.1').split() +# Les listes sont appariées : même index = même cible (vm-service-domaine) +assert len(sni_hosts) == len(target_ips), f'SNI_HOSTS ({len(sni_hosts)}) != TARGET_IPS ({len(target_ips)})' +targets = list(zip(target_ips, sni_hosts)) tls_versions = [v.strip() for v in os.environ.get('TLS_VERSIONS', '1.2,1.3').split(',')] uas_browser = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0 Safari/537.36', @@ -424,21 +494,21 @@ hits = int(os.environ.get('HITS', '100')) with httpx.Client(http2=True, verify=ctx) as c: for i in range(hits): p = random.choice(paths) - target = random.choice(target_ips) - h = random.choice(sni_hosts) + target_ip, sni = random.choice(targets) ua = random.choice(uas_browser if random.random() < 0.7 else uas_bot) try: - c.get(f'https://{target}' + p, headers={'User-Agent': ua, 'Host': h}) + c.get(f'https://{target_ip}' + p, headers={'User-Agent': ua, 'Host': sni}) except: pass PYEOF" - vagrant ssh "$src_vm" -- \ - "source /tmp/e2e-traffic.env && TLS_VERSIONS='${TLS_VERSIONS}' python3 /tmp/e2e-h2-traffic.py" \ - 2>/dev/null || true - fi - done - pass "HTTP/2 généré depuis tous les endpoints" + vagrant ssh "$TRAFFIC_VM" -- \ + "source /tmp/e2e-traffic.env && TLS_VERSIONS='${TLS_VERSIONS}' python3 /tmp/e2e-h2-traffic.py" \ + 2>/dev/null || true + pass "HTTP/2 généré depuis ${TRAFFIC_VM}" + else + warn "httpx non disponible sur ${TRAFFIC_VM} — HTTP/2 ignoré" + fi pass "Trafic total : ${total_ok} requêtes réussies" } @@ -489,7 +559,7 @@ phase4_wait() { # Attendre les processus endpoint en arrière-plan log "Attente fin des processus endpoint..." - for pid in "${PIDS[@]:-}"; do + for pid in "${ENDPOINT_PIDS[@]:-}"; do wait "$pid" 2>/dev/null || true done } @@ -627,21 +697,25 @@ phase_summary() { # Nettoyage # ═════════════════════════════════════════════════════════════════════════════ cleanup() { - # Supprimer les IPs alias sur les VMs + # Supprimer les IPs alias sur la VM traffic (sur eth1, réseau ja4-e2e) if [ "${SRC_IP_COUNT:-1}" -gt 1 ]; then - log "Suppression des IPs alias sur les VMs..." - for vm in $ENDPOINT_VMS; do - vagrant ssh "$vm" -- " + log "Suppression des IPs alias sur ${TRAFFIC_VM}..." + vagrant ssh "$TRAFFIC_VM" -- " + base_ip=\$(ip -4 addr show eth1 2>/dev/null | awk '/inet / {sub(/\/.*/, \"\", \\\$2); print \\\$2; exit}') + if [ -z \"\$base_ip\" ]; then base_ip=\$(ip -4 addr show eth0 | awk '/inet / {sub(/\/.*/, \"\", \\\$2); print \\\$2; exit}') - net_prefix=\$(echo \$base_ip | awk -F. '{print \\\$1\".\"\\\$2\".\"\\\$3}') - base_last=\$(echo \$base_ip | awk -F. '{print \\\$4}') - for i in \$(seq 1 $((SRC_IP_COUNT - 1))); do - alias_last=\$((base_last + 100 + i)) - [ \$alias_last -gt 254 ] && alias_last=\$((10 + i)) - sudo ip addr del \${net_prefix}.\${alias_last}/24 dev eth0 2>/dev/null || true - done - " 2>/dev/null || true - done + iface=eth0 + else + iface=eth1 + fi + net_prefix=\$(echo \$base_ip | awk -F. '{print \\\$1\".\"\\\$2\".\"\\\$3}') + base_last=\$(echo \$base_ip | awk -F. '{print \\\$4}') + for i in \$(seq 1 $((SRC_IP_COUNT - 1))); do + alias_last=\$((base_last + 100 + i)) + [ \$alias_last -gt 254 ] && alias_last=\$((10 + i)) + sudo ip addr del \${net_prefix}.\${alias_last}/24 dev \$iface 2>/dev/null || true + done + " 2>/dev/null || true fi # Toujours arrêter les endpoints (sauf --keep global) @@ -649,7 +723,19 @@ cleanup() { log "Nettoyage des endpoints..." cd "$VM_DIR" for vm in $ENDPOINT_VMS; do - vagrant ssh "$vm" -- "sudo pkill ja4ebpf 2>/dev/null; sudo nginx -s stop 2>/dev/null; sudo pkill httpd 2>/dev/null" 2>/dev/null || true + vagrant ssh "$vm" -- "sudo pkill ja4ebpf 2>/dev/null; sudo nginx -s stop 2>/dev/null; sudo pkill httpd 2>/dev/null; sudo pkill hitch 2>/dev/null; sudo pkill varnishd 2>/dev/null; sudo pkill -f 'TCPServer.*8080' 2>/dev/null; sudo pkill -f 'TCPServer.*:80' 2>/dev/null" 2>/dev/null || true + # Supprimer les IPs alias (IP2, IP3) sur eth1 + local vm_base_ip + vm_base_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + if [ -n "$vm_base_ip" ]; then + local np bl ip2 ip3 + np=$(echo "$vm_base_ip" | awk -F. '{print $1"."$2"."$3}') + bl=$(echo "$vm_base_ip" | awk -F. '{print $4}') + ip2="${np}.$((bl + 50))" + ip3="${np}.$((bl + 51))" + vagrant ssh "$vm" -- "sudo ip addr del ${ip2}/24 dev eth1 2>/dev/null || true; sudo ip addr del ${ip3}/24 dev eth1 2>/dev/null || true" 2>/dev/null || true + fi done fi @@ -668,9 +754,16 @@ cleanup() { log "VMs endpoints conservées :" for vm in $ENDPOINT_VMS; do local vm_ip - vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') - log " $vm : ${vm_ip}" + if [ -z "$vm_ip" ]; then + vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + fi + local np bl + np=$(echo "$vm_ip" | awk -F. '{print $1"."$2"."$3}') + bl=$(echo "$vm_ip" | awk -F. '{print $4}') + log " $vm : ${vm_ip} (nginx) ${np}.$((bl + 50)) (apache) ${np}.$((bl + 51)) (hitch+varnish)" done fi } @@ -698,10 +791,20 @@ if [ "$UP_ONLY" = "true" ]; then log "Bot-detector : http://${ANALYSIS_IP}:8080" for vm in $ENDPOINT_VMS; do local vm_ip - vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth1' 2>/dev/null \ | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') - log "$vm HTTP : http://${vm_ip}:80" - log "$vm HTTPS : https://${vm_ip}:443" + if [ -z "$vm_ip" ]; then + vm_ip=$(vagrant ssh "$vm" -- 'ip -4 addr show eth0' 2>/dev/null \ + | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + fi + local np bl vm_ip2 vm_ip3 + np=$(echo "$vm_ip" | awk -F. '{print $1"."$2"."$3}') + bl=$(echo "$vm_ip" | awk -F. '{print $4}') + vm_ip2="${np}.$((bl + 50))" + vm_ip3="${np}.$((bl + 51))" + log "$vm nginx : http://${vm_ip}:80 https://${vm_ip}:443" + log "$vm apache : http://${vm_ip2}:80 https://${vm_ip2}:443" + log "$vm hitch+varnish : http://${vm_ip3}:80 https://${vm_ip3}:443" done log "" log "Pour générer du trafic :" diff --git a/tests/vm/run-tests-vm.sh b/tests/vm/run-tests-vm.sh index 3cfc1ba..21e2c3a 100755 --- a/tests/vm/run-tests-vm.sh +++ b/tests/vm/run-tests-vm.sh @@ -11,6 +11,7 @@ # nginx — nginx avec TLS (HTTP/1.1 + HTTP/2) # apache — Apache httpd avec TLS (HTTP/1.1 + HTTP/2) # hitch-varnish — hitch (TLS) → Varnish (cache/H2) → backend Python +# all-ips — 3 services simultanés, 1 IP chacun (nginx IP1, apache IP2, hitch+varnish IP3) # all — exécute les 3 stacks séquentiellement # # Modes : @@ -45,6 +46,61 @@ PASS_COUNT=0; FAIL_COUNT=0; WARN_COUNT=0 # ── Helpers communs ────────────────────────────────────────────────────────── +# IPs des services (positionnées par setup_all_ips ou defaults à l'IP eth0) +IP1="" # nginx +IP2="" # apache +IP3="" # hitch+varnish + +setup_all_ips() { + local eth0_ip + eth0_ip=$(get_eth0_ip) + + # Utiliser eth1 (réseau ja4-e2e, 192.168.42.0/24) pour les 3 IPs de service. + # eth0 est le réseau vagrant-libvirt (DHCP, IPs dynamiques) — les alias IPs + # ne sont pas routés par le dnsmasq de libvirt et sont injoignables depuis le host. + # eth1 est le réseau ja4-e2e dédié — accessible par toutes les VMs et le host. + local eth1_ip + eth1_ip=$(ip -4 addr show eth1 2>/dev/null | awk '/inet / {sub(/\/.*/, "", $2); print $2; exit}') + + if [ -z "$eth1_ip" ]; then + # Fallback: utiliser eth0 avec des alias si eth1 n'existe pas + local net_prefix + net_prefix=$(echo "$eth0_ip" | awk -F. '{print $1"."$2"."$3}') + local base_last + base_last=$(echo "$eth0_ip" | awk -F. '{print $4}') + + IP1="$eth0_ip" + IP2="${net_prefix}.$((base_last + 100))" + IP3="${net_prefix}.$((base_last + 101))" + + ip addr add "${IP2}/24" dev eth0 2>/dev/null || true + ip addr add "${IP3}/24" dev eth0 2>/dev/null || true + else + # Utiliser eth1 (réseau ja4-e2e) pour les 3 services + local net_prefix + net_prefix=$(echo "$eth1_ip" | awk -F. '{print $1"."$2"."$3}') + local base_last + base_last=$(echo "$eth1_ip" | awk -F. '{print $4}') + + IP1="$eth1_ip" + IP2="${net_prefix}.$((base_last + 50))" + IP3="${net_prefix}.$((base_last + 51))" + + # Ajouter les alias IPs sur eth1 (idempotent) + ip addr add "${IP2}/24" dev eth1 2>/dev/null || true + ip addr add "${IP3}/24" dev eth1 2>/dev/null || true + fi + + log "IPs services : IP1=${IP1} (nginx) IP2=${IP2} (apache) IP3=${IP3} (hitch+varnish)" +} + +# Écrire les IPs dans /tmp pour que l'orchestrateur puisse les lire +write_ip_manifest() { + cat > /tmp/e2e-endpoint-ips.json << EOF +{"ip1":"${IP1}","ip2":"${IP2}","ip3":"${IP3}"} +EOF +} + gen_tls_cert() { local name="$1" openssl req -x509 -nodes -days 365 -subj "/CN=platform.test" \ @@ -54,8 +110,9 @@ gen_tls_cert() { } setup_docroot() { + local stack_name="${1:-$STACK}" mkdir -p /var/www/html - echo '{"status":"ok","stack":"'"$STACK"'"}' > /var/www/html/health + echo '{"status":"ok","stack":"'"$stack_name"'"}' > /var/www/html/health for p in data api/users api/data/test; do mkdir -p "/var/www/html/$(dirname $p)" echo '{"ok":true}' > "/var/www/html/$p" @@ -117,10 +174,14 @@ start_ja4ebpf() { local ch_addr="${CH_HOST:-127.0.0.1}" cat > /tmp/ja4ebpf.yml << EOF -interface: eth0 +interfaces: + - any ssl_lib_path: "${ssl_lib}" +listen_ports: + - 80 + - 443 clickhouse: - dsn: "clickhouse://default:@${ch_addr}:9000/ja4_logs" + dsn: "clickhouse://default:@${ch_addr}:9000/ja4_logs?async_insert=0" batch_size: 100 flush_secs: 1 correlation: @@ -143,14 +204,18 @@ EOF log "ja4ebpf démarré (PID $JA4EBPF_PID)" - # Vérifier XDP - if ip link show dev eth0 2>/dev/null | grep -q "xdp"; then - local xdp_info - xdp_info=$(ip link show dev eth0 | grep "prog/xdp" | sed 's/^[[:space:]]*//') - pass "XDP attaché : $xdp_info" + # Vérifier TC ingress sur les interfaces + local TC_IFACES=0 + for IFACE in $(ls /sys/class/net/ 2>/dev/null | grep -v lo); do + if tc filter show dev "$IFACE" ingress 2>/dev/null | grep -qi "bpf\|direct-action"; then + TC_IFACES=$((TC_IFACES + 1)) + fi + done + if [ "$TC_IFACES" -gt 0 ]; then + pass "TC ingress attaché sur $TC_IFACES interface(s)" else - warn "Aucun XDP sur eth0" - bpftool prog show name capture_xdp 2>/dev/null || true + warn "Aucun TC ingress détecté" + bpftool prog show name capture_tc 2>/dev/null || true fi } @@ -160,15 +225,27 @@ EOF setup_nginx() { log "Configuration nginx avec TLS..." gen_tls_cert nginx - setup_docroot + setup_docroot nginx cp "$PROJECT/tests/integration/nginx/platform/nginx.conf" /etc/nginx/nginx.conf + + # Binder sur IP1 si en mode multi-IP + local bind_addr="${IP1:-}" + if [ -n "$bind_addr" ]; then + # Reset : remettre les directives listen à leur valeur par défaut avant de binder + # (si un run précédent a déjà remplacé par une IP, le sed suivant ne matcherait pas) + sed -i 's/^listen [0-9.]*:80;/listen 80;/' /etc/nginx/nginx.conf + sed -i 's/^listen [0-9.]*:443 ssl http2;/listen 443 ssl http2;/' /etc/nginx/nginx.conf + sed -i "s/listen 80;/listen ${bind_addr}:80;/" /etc/nginx/nginx.conf + sed -i "s/listen 443 ssl http2;/listen ${bind_addr}:443 ssl http2;/" /etc/nginx/nginx.conf + fi + mkdir -p /run/nginx nginx -t && nginx for i in $(seq 1 20); do - curl -sf http://localhost/health >/dev/null 2>&1 && break + curl -sf "http://${IP1:-localhost}/health" >/dev/null 2>&1 && break sleep 0.5 done - pass "nginx démarré" + pass "nginx démarré (IP ${IP1:-*})" } stop_nginx() { nginx -s stop 2>/dev/null || true; } @@ -177,9 +254,15 @@ stop_nginx() { nginx -s stop 2>/dev/null || true; } # Stack : apache # ═════════════════════════════════════════════════════════════════════════════ setup_apache() { + # Vérifier que httpd est disponible + if ! command -v httpd >/dev/null 2>&1; then + warn "httpd non disponible — apache ignoré" + return 0 + fi + log "Configuration Apache httpd avec TLS..." gen_tls_cert apache - setup_docroot + setup_docroot apache if command -v httpd >/dev/null 2>&1; then if ! httpd -M 2>/dev/null | grep -q http2_module; then @@ -192,13 +275,32 @@ setup_apache() { cp "$PROJECT/tests/integration/apache/platform/httpd-ssl.conf" \ /etc/httpd/conf.d/ssl.conf 2>/dev/null || true - httpd -t 2>&1 && httpd -DFOREGROUND & + # Binder sur IP2 si en mode multi-IP + local bind_addr="${IP2:-}" + if [ -n "$bind_addr" ]; then + # Reset : remettre les directives Listen/VirtualHost à leur valeur par défaut + # (si un run précédent a déjà remplacé par une IP, le sed suivant ne matcherait pas) + sed -i 's/^Listen [0-9.]*:80$/Listen 80/' /etc/httpd/conf/httpd.conf + sed -i 's/^Listen [0-9.]*:443 https$/Listen 443 https/' /etc/httpd/conf.d/ssl.conf + sed -i 's///' /etc/httpd/conf.d/ssl.conf + sed -i 's///' /etc/httpd/conf.d/ssl.conf + # Appliquer les bindings IP2 + sed -i "s/^Listen 80$/Listen ${bind_addr}:80/" /etc/httpd/conf/httpd.conf + sed -i "s/^Listen 443 https/Listen ${bind_addr}:443 https/" /etc/httpd/conf.d/ssl.conf + sed -i "s///" /etc/httpd/conf.d/ssl.conf + sed -i "s///" /etc/httpd/conf.d/ssl.conf 2>/dev/null || true + # S'assurer qu'il n'y a pas de Listen IP2:80 en double dans ssl.conf + # (le Listen 80 est déjà dans httpd.conf, pas besoin de le remettre dans ssl.conf) + sed -i "/^Listen ${bind_addr}:80$/d" /etc/httpd/conf.d/ssl.conf + fi + + httpd -t 2>&1 && httpd sleep 2 for i in $(seq 1 20); do - curl -sf http://localhost/health >/dev/null 2>&1 && break + curl -sf "http://${IP2:-localhost}/health" >/dev/null 2>&1 && break sleep 0.5 done - pass "Apache httpd démarré" + pass "Apache httpd démarré (IP ${IP2:-*})" } stop_apache() { pkill httpd 2>/dev/null || true; } @@ -207,14 +309,22 @@ stop_apache() { pkill httpd 2>/dev/null || true; } # Stack : hitch + varnish # ═════════════════════════════════════════════════════════════════════════════ setup_hitch_varnish() { + # Vérifier que hitch est disponible + if ! command -v hitch >/dev/null 2>&1; then + warn "hitch non disponible — hitch+varnish ignoré" + return 0 + fi + log "Configuration hitch + Varnish..." gen_tls_cert hitch mkdir -p /etc/hitch cat /etc/pki/tls/private/hitch.key /etc/pki/tls/certs/hitch.crt \ > /etc/hitch/hitch.pem - cat > /etc/hitch/hitch.conf << 'HCONF' -frontend = "[*]:443" + # Binder hitch sur IP3 si en mode multi-IP, sinon [*]:443 + local hitch_bind="${IP3:-*}" + cat > /etc/hitch/hitch.conf << HCONF +frontend = "[${hitch_bind}]:443" backend = "[127.0.0.1]:6081" pem-file = "/etc/hitch/hitch.pem" write-proxy-v1 = on @@ -241,7 +351,7 @@ sub vcl_deliver { VCL } - setup_docroot + setup_docroot hitch-varnish # Backend HTTP (port 8080) python3 -c " @@ -269,27 +379,47 @@ with socketserver.TCPServer(('127.0.0.1', 8080), H) as s: " & sleep 1 - varnishd -F -f /etc/varnish/default.vcl \ + # HTTP sur IP3:80 (backend dédié pour le trafic HTTP en clair) + if [ -n "${IP3:-}" ]; then + python3 -c " +import http.server, socketserver, json +class H(http.server.BaseHTTPRequestHandler): + def log_message(self, *a): pass + def do_GET(self): + body = json.dumps({'status':'ok','stack':'hitch-varnish','path':self.path}).encode() + self.send_response(200) + self.send_header('Content-Type','application/json') + self.send_header('Content-Length',len(body)) + self.end_headers() + self.wfile.write(body) +with socketserver.TCPServer(('${IP3}', 80), H) as s: + s.serve_forever() +" & + sleep 1 + fi + + varnishd -f /etc/varnish/default.vcl \ -a "127.0.0.1:6081,PROXY" \ -p feature=+http2 \ -s malloc,64m \ - -T 127.0.0.1:6082 & + -T 127.0.0.1:6082 2>/dev/null sleep 2 - hitch --config=/etc/hitch/hitch.conf & + nohup hitch --config=/etc/hitch/hitch.conf >/dev/null 2>&1 & sleep 2 for i in $(seq 1 20); do - curl -skf https://localhost/health >/dev/null 2>&1 && break + curl -skf "https://${IP3:-localhost}/health" >/dev/null 2>&1 && break sleep 0.5 done - pass "hitch + Varnish démarrés" + pass "hitch + Varnish démarrés (IP ${IP3:-*})" } stop_hitch_varnish() { pkill hitch 2>/dev/null || true pkill varnishd 2>/dev/null || true pkill -f "TCPServer.*8080" 2>/dev/null || true + pkill -f "TCPServer.*':80'" 2>/dev/null || true } # ═════════════════════════════════════════════════════════════════════════════ @@ -380,6 +510,7 @@ stop_stack() { nginx) stop_nginx ;; apache) stop_apache ;; hitch-varnish) stop_hitch_varnish ;; + all-ips) stop_nginx; stop_apache; stop_hitch_varnish; remove_alias_ips ;; esac # Ne pas supprimer le ClickHouse s'il est externe (VM analysis) if [ -z "${CH_HOST:-}" ] || [ "$CH_HOST" = "127.0.0.1" ] || [ "$CH_HOST" = "localhost" ]; then @@ -387,10 +518,33 @@ stop_stack() { fi } +remove_alias_ips() { + # Déterminer l'interface des IPs alias (eth1 si réseau ja4-e2e, eth0 sinon) + local iface="eth0" + if [ -n "${IP2:-}" ]; then + # Si IP2 commence par 192.168.42, c'est sur eth1 + case "$IP2" in + 192.168.42.*) iface="eth1" ;; + esac + fi + if [ -n "${IP2:-}" ]; then + ip addr del "${IP2}/24" dev "$iface" 2>/dev/null || true + fi + if [ -n "${IP3:-}" ]; then + ip addr del "${IP3}/24" dev "$iface" 2>/dev/null || true + fi +} + cleanup() { if [ "$KEEP_RUNNING" != "true" ]; then - log "Nettoyage..." - stop_stack + # En mode E2E distribué (CH_HOST externe), l'orchestrateur gère le nettoyage. + # On ne nettoie que si le script est lancé en mode standalone. + if [ -n "${CH_HOST:-}" ] && [ "$CH_HOST" != "127.0.0.1" ] && [ "$CH_HOST" != "localhost" ]; then + log "Nettoyage ignoré (mode distribué — géré par l'orchestrateur)" + else + log "Nettoyage..." + stop_stack + fi fi } trap cleanup EXIT @@ -423,6 +577,7 @@ do_start() { nginx) setup_nginx ;; apache) setup_apache ;; hitch-varnish) setup_hitch_varnish ;; + all-ips) setup_all_ips; setup_nginx; setup_apache; setup_hitch_varnish; write_ip_manifest ;; *) fail "Stack inconnue: $STACK"; exit 1 ;; esac @@ -432,12 +587,21 @@ do_start() { local eth0_ip eth0_ip=$(get_eth0_ip) echo "" - echo " ┌─────────────────────────────────────────┐" - echo " │ Services prêts ! │" - echo " │ IP eth0 : $eth0_ip" - echo " │ HTTP : http://$eth0_ip:80" - echo " │ HTTPS : https://$eth0_ip:443" - echo " └─────────────────────────────────────────┘" + if [ "$STACK" = "all-ips" ]; then + echo " ┌─────────────────────────────────────────────┐" + echo " │ Services prêts ! │" + echo " │ nginx : http://${IP1}:80 https://${IP1}:443" + echo " │ apache : http://${IP2}:80 https://${IP2}:443" + echo " │ hitch+varnish : http://${IP3}:80 https://${IP3}:443" + echo " └─────────────────────────────────────────────┘" + else + echo " ┌─────────────────────────────────────────┐" + echo " │ Services prêts ! │" + echo " │ IP eth0 : $eth0_ip" + echo " │ HTTP : http://$eth0_ip:80" + echo " │ HTTPS : https://$eth0_ip:443" + echo " └─────────────────────────────────────────┘" + fi echo "" } @@ -474,10 +638,15 @@ case "$MODE" in echo " En attente de trafic depuis le host..." # Attendre que le host génère le trafic # Le fichier /tmp/ja4ebpf-traffic-done est créé par le host après le trafic - for i in $(seq 1 120); do - [ -f /tmp/ja4ebpf-traffic-done ] && break - sleep 1 - done + # En mode E2E distribué (CH_HOST externe), on attend sans limite de temps + if [ -n "${CH_HOST:-}" ] && [ "$CH_HOST" != "127.0.0.1" ] && [ "$CH_HOST" != "localhost" ]; then + while [ ! -f /tmp/ja4ebpf-traffic-done ]; do sleep 2; done + else + for i in $(seq 1 120); do + [ -f /tmp/ja4ebpf-traffic-done ] && break + sleep 1 + done + fi # En mode ClickHouse externe (E2E distribué), la vérification est faite # par le script orchestrateur (run-e2e-test.sh Phase 5). On saute la # vérification locale car les MV peuvent ne pas encore être peuplées. diff --git a/tests/vm/test-rpm.sh b/tests/vm/test-rpm.sh index 00a7373..5ffd6b6 100755 --- a/tests/vm/test-rpm.sh +++ b/tests/vm/test-rpm.sh @@ -167,11 +167,15 @@ yum install -y /tmp/ja4ebpf-test.rpm 2>&1 | tail -3 echo " [B] Configure..." mkdir -p /etc/ja4ebpf cat > /etc/ja4ebpf/config.yml << 'CONF_EOF' -interface: eth0 +interfaces: + - any ssl_lib_path: "/usr/lib64/libssl.so.3" +listen_ports: + - 80 + - 443 debug: true clickhouse: - dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs" + dsn: "clickhouse://default:@127.0.0.1:9000/ja4_logs?async_insert=0" batch_size: 50 flush_secs: 1 correlation: @@ -214,11 +218,17 @@ if ! kill -0 $JA4PID 2>/dev/null; then fi echo " PID=$JA4PID" -# Check TC ingress filter -if tc filter show dev eth0 ingress 2>/dev/null | grep -qi "bpf\|direct-action"; then - echo " TC: attached" +# Check TC ingress filter (multi-interface) +ATTACHED=0 +for IFACE in $(ls /sys/class/net/ 2>/dev/null | grep -v lo); do + if tc filter show dev "$IFACE" ingress 2>/dev/null | grep -qi "bpf\|direct-action"; then + ATTACHED=$((ATTACHED + 1)) + fi +done +if [ "$ATTACHED" -gt 0 ]; then + echo " TC: attached on $ATTACHED interface(s)" else - echo " WARN: TC filter not detected" + echo " WARN: TC filter not detected on any interface" fi SETUP_EOF