feat: multi-distro VM tests, ja4ebpf eBPF improvements, bot-detector scoring

ja4ebpf:
- Refactor BPF TC capture with improved SYN offset handling and TCP option parsing
- Enhance TLS uprobe SSL hooking for better key extraction
- Add ClickHouse writer improvements for HTTP log materialized views
- Update RPM spec for Rocky Linux 8/9/10, fix systemd service
- Simplify loader with cleaner bpf2go integration

bot-detector:
- Add H2 SETTINGS per-parameter comparison in browser_matcher
- Enhance browser signatures and scoring pipeline
- Improve preprocessing and cycle detection

infra:
- Multi-distro Vagrantfile (centos8, rocky9, rocky10) with per-distro provisioning
- New Makefile targets: vm-up-all, test-vm-matrix, test-vm-centos8/rocky10
- Add debug helpers and run-test-from-host.sh for host-driven VM testing
- Update run-tests-vm.sh for cross-distro compatibility
- Remove accidental binary blob (\004)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jacquin Antoine
2026-04-13 01:09:33 +02:00
parent d81463a589
commit d75825278e
32 changed files with 2148 additions and 890 deletions

View File

@ -295,3 +295,50 @@ TTL observed_at + INTERVAL 30 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
-- -----------------------------------------------------------------------------
-- browser_h2_signatures — Base de signatures H2 structurées par famille navigateur
-- Thèse §3.9.5 : rechargée toutes les 24h par le module Python
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS ja4_processing.browser_h2_signatures
(
family LowCardinality(String),
version_min String DEFAULT '',
version_max String DEFAULT '',
h2_settings_json String DEFAULT '' CODEC(ZSTD(3)),
h2_settings_forbidden String DEFAULT '[]' CODEC(ZSTD(3)),
h2_window_update UInt32 DEFAULT 0,
h2_window_update_tolerance UInt32 DEFAULT 1000,
h2_priority_expected UInt8 DEFAULT 0,
pseudo_header_order String DEFAULT '',
tls_json String DEFAULT '{}' CODEC(ZSTD(3)),
headers_required String DEFAULT '[]' CODEC(ZSTD(3)),
headers_forbidden String DEFAULT '[]' CODEC(ZSTD(3)),
created_at DateTime DEFAULT now(),
is_active UInt8 DEFAULT 1
)
ENGINE = ReplacingMergeTree(created_at)
ORDER BY (family, version_min)
SETTINGS index_granularity = 8192;
-- Dictionnaire ClickHouse pour un lookup rapide par famille
CREATE DICTIONARY IF NOT EXISTS ja4_processing.dict_browser_h2_signatures
(
family String,
version_min String,
version_max String,
h2_settings_json String,
h2_settings_forbidden String,
h2_window_update UInt32 DEFAULT 0,
h2_window_update_tolerance UInt32 DEFAULT 1000,
h2_priority_expected UInt8 DEFAULT 0,
pseudo_header_order String DEFAULT '',
tls_json String DEFAULT '{}',
headers_required String DEFAULT '[]',
headers_forbidden String DEFAULT '[]',
is_active UInt8 DEFAULT 1
)
PRIMARY KEY (family)
SOURCE(CLICKHOUSE(TABLE 'browser_h2_signatures' DB 'ja4_processing'))
LIFETIME(MIN 82800 MAX 86400) -- Rechargement toutes les ~24h (82800-86400 secondes)
LAYOUT(COMPLEX_KEY_HASHED());