Feat: Détection menaces HTTP via vues ClickHouse + simplification shutdown
Some checks failed
Build and Test / test (push) Has been cancelled
Build and Test / build (push) Has been cancelled
Build and Test / docker (push) Has been cancelled

Nouvelles vues de détection (sql/views.sql) :
- Identification hosts par IP/JA4 (view_host_identification, view_host_ja4_anomalies)
- Détection brute force POST et query params variables
- Header fingerprinting (ordre, headers modernes manquants, Sec-CH-UA)
- ALPN mismatch detection (h2 déclaré mais HTTP/1.1 parlé)
- Rate limiting & burst detection (50 req/min, 20 req/10s)
- Path enumeration/scanning (paths sensibles)
- Payload attacks (SQLi, XSS, path traversal)
- JA4 botnet detection (même fingerprint sur 20+ IPs)
- Correlation quality (orphan ratio >80%)

ClickHouse (sql/init.sql) :
- Compression ZSTD(3) sur champs texte (path, query, headers, ja3/ja4)
- TTL automatique : 1 jour (raw) + 7 jours (http_logs)
- Paramètre ttl_only_drop_parts = 1

Shutdown simplifié (internal/app/orchestrator.go) :
- Suppression ShutdownTimeout et logique de flush/attente
- Stop() = cancel() + Close() uniquement
- systemd TimeoutStopSec gère l'arrêt forcé si besoin

File output toggle (internal/config/*.go) :
- Ajout champ Enabled dans FileOutputConfig
- Le sink fichier n'est créé que si enabled && path != ''
- Tests : TestValidate_FileOutputDisabled, TestLoadConfig_FileOutputDisabled

RPM packaging (packaging/rpm/logcorrelator.spec) :
- Changelog 1.1.18 → 1.1.22
- Suppression logcorrelator-tmpfiles.conf (redondant RuntimeDirectory=)

Nettoyage :
- idees.txt → idees/ (dossier)
- Suppression 91.224.92.185.txt (logs exemple)

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
toto
2026-03-11 18:28:07 +01:00
parent 5df2fd965b
commit 20ebe7240e
17 changed files with 1089 additions and 6598 deletions

View File

@ -19,96 +19,101 @@ CREATE DATABASE IF NOT EXISTS mabase_prod;
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.http_logs_raw
(
`raw_json` String,
`raw_json` String CODEC(ZSTD(3)),
`ingest_time` DateTime DEFAULT now()
)
ENGINE = MergeTree
PARTITION BY toDate(ingest_time)
ORDER BY ingest_time
SETTINGS index_granularity = 8192;
TTL ingest_time + INTERVAL 1 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
-- -----------------------------------------------------------------------------
-- Table parsée : alimentée automatiquement par la vue matérialisée
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mabase_prod.http_logs
CREATE TABLE mabase_prod.http_logs
(
-- Temporel
`time` DateTime,
`log_date` Date DEFAULT toDate(time),
`time` DateTime,
`log_date` Date DEFAULT toDate(time),
-- Réseau
`src_ip` IPv4,
`src_port` UInt16,
`dst_ip` IPv4,
`dst_port` UInt16,
`src_ip` IPv4,
`src_port` UInt16,
`dst_ip` IPv4,
`dst_port` UInt16,
-- Enrichissement IPLocate
`src_asn` UInt32,
`src_country_code` LowCardinality(String),
`src_as_name` LowCardinality(String),
`src_org` LowCardinality(String),
`src_domain` LowCardinality(String),
`src_asn` UInt32,
`src_country_code` LowCardinality(String),
`src_as_name` LowCardinality(String),
`src_org` LowCardinality(String),
`src_domain` LowCardinality(String),
-- HTTP
`method` LowCardinality(String),
`scheme` LowCardinality(String),
`host` LowCardinality(String),
`path` String,
`query` String,
`http_version` LowCardinality(String),
`method` LowCardinality(String),
`scheme` LowCardinality(String),
`host` LowCardinality(String),
`path` String CODEC(ZSTD(3)),
`query` String CODEC(ZSTD(3)),
`http_version` LowCardinality(String),
-- Corrélation
`orphan_side` LowCardinality(String),
`correlated` UInt8,
`keepalives` UInt16,
`a_timestamp` UInt64,
`b_timestamp` UInt64,
`conn_id` String,
`orphan_side` LowCardinality(String),
`correlated` UInt8,
`keepalives` UInt16,
`a_timestamp` UInt64,
`b_timestamp` UInt64,
`conn_id` String CODEC(ZSTD(3)),
-- Métadonnées IP
`ip_meta_df` UInt8,
`ip_meta_id` UInt16,
`ip_meta_total_length` UInt16,
`ip_meta_ttl` UInt8,
`ip_meta_df` UInt8,
`ip_meta_id` UInt16,
`ip_meta_total_length` UInt16,
`ip_meta_ttl` UInt8,
-- Métadonnées TCP
`tcp_meta_options` LowCardinality(String),
`tcp_meta_window_size` UInt32,
`tcp_meta_mss` UInt16,
`tcp_meta_window_scale` UInt8,
`syn_to_clienthello_ms` Int32,
`tcp_meta_options` LowCardinality(String),
`tcp_meta_window_size` UInt32,
`tcp_meta_mss` UInt16,
`tcp_meta_window_scale` UInt8,
`syn_to_clienthello_ms` Int32,
-- TLS / fingerprint
`tls_version` LowCardinality(String),
`tls_sni` LowCardinality(String),
`tls_alpn` LowCardinality(String),
`ja3` String,
`ja3_hash` String,
`ja4` String,
`tls_version` LowCardinality(String),
`tls_sni` LowCardinality(String),
`tls_alpn` LowCardinality(String),
`ja3` String CODEC(ZSTD(3)),
`ja3_hash` String CODEC(ZSTD(3)),
`ja4` String CODEC(ZSTD(3)),
-- En-têtes HTTP
`client_headers` String,
`header_user_agent` String,
`header_accept` String,
`header_accept_encoding` String,
`header_accept_language` String,
`header_content_type` String,
`header_x_request_id` String,
`header_x_trace_id` String,
`header_x_forwarded_for` String,
`header_sec_ch_ua` String,
`header_sec_ch_ua_mobile` String,
`header_sec_ch_ua_platform` String,
`header_sec_fetch_dest` String,
`header_sec_fetch_mode` String,
`header_sec_fetch_site` String
`client_headers` String CODEC(ZSTD(3)),
`header_user_agent` String CODEC(ZSTD(3)),
`header_accept` String CODEC(ZSTD(3)),
`header_accept_encoding` String CODEC(ZSTD(3)),
`header_accept_language` String CODEC(ZSTD(3)),
`header_content_type` String CODEC(ZSTD(3)),
`header_x_request_id` String CODEC(ZSTD(3)),
`header_x_trace_id` String CODEC(ZSTD(3)),
`header_x_forwarded_for` String CODEC(ZSTD(3)),
`header_sec_ch_ua` String CODEC(ZSTD(3)),
`header_sec_ch_ua_mobile` String CODEC(ZSTD(3)),
`header_sec_ch_ua_platform` String CODEC(ZSTD(3)),
`header_sec_fetch_dest` String CODEC(ZSTD(3)),
`header_sec_fetch_mode` String CODEC(ZSTD(3)),
`header_sec_fetch_site` String CODEC(ZSTD(3))
)
ENGINE = MergeTree
PARTITION BY log_date
ORDER BY (time, src_ip, dst_ip, ja4)
SETTINGS index_granularity = 8192;
TTL log_date + INTERVAL 7 DAY
SETTINGS
index_granularity = 8192,
ttl_only_drop_parts = 1;
-- -----------------------------------------------------------------------------
-- Vue matérialisée : parse le JSON de http_logs_raw vers http_logs

View File

@ -1,154 +0,0 @@
-- ============================================================================
-- PROJET : Moteur de Détection de Menaces HTTP (Full Spectrum)
-- DESCRIPTION : Configuration complète des tables d'agrégation et du scoring.
-- COUVRE : Spoofing UA/TLS, TCP Fingerprinting, Anomalies comportementales.
-- DATE : 2026-03-08
-- ============================================================================
-- ----------------------------------------------------------------------------
-- 1. NETTOYAGE (Ordre inverse des dépendances)
-- ----------------------------------------------------------------------------
DROP VIEW IF EXISTS mabase_prod.live_threat_scores;
DROP VIEW IF EXISTS mabase_prod.mv_baseline_update;
DROP VIEW IF EXISTS mabase_prod.mv_novelty;
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1d;
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1h;
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1m;
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1d;
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1h;
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1m;
-- ----------------------------------------------------------------------------
-- 2. TABLES DE DESTINATION (STORAGE)
-- ----------------------------------------------------------------------------
CREATE TABLE mabase_prod.agg_traffic_1m (
minute DateTime,
host LowCardinality(String),
src_ip IPv4,
src_asn UInt32,
src_country_code LowCardinality(String),
ja4 String,
ja3_hash String,
header_user_agent String,
-- Métriques de Base
hits AggregateFunction(count, UInt64),
uniq_paths AggregateFunction(uniq, String),
-- Couche 4 : TCP & Handshake
avg_syn_to_clienthello_ms AggregateFunction(avg, Int32),
var_syn_to_clienthello_ms AggregateFunction(varPop, Int32),
tcp_fingerprint AggregateFunction(uniq, UInt64), -- MSS + Window + Scale
-- Couche 7 : HTTP Fingerprinting
avg_headers_count AggregateFunction(avg, Float64),
host_sni_mismatch AggregateFunction(countIf, UInt8),
-- Détection Spoofing & Incohérences
spoofing_ua_tls AggregateFunction(countIf, UInt8),
spoofing_ua_alpn AggregateFunction(countIf, UInt8),
spoofing_os_ttl AggregateFunction(countIf, UInt8),
missing_human_headers AggregateFunction(countIf, UInt8),
-- Comportement & Payloads
sensitive_path_hits AggregateFunction(countIf, UInt8),
suspicious_methods AggregateFunction(countIf, UInt8),
suspicious_queries AggregateFunction(countIf, UInt8)
) ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(minute)
ORDER BY (host, ja4, src_ip, minute);
-- Tables 1h et 1d (Simplifiées pour le stockage long terme)
CREATE TABLE mabase_prod.agg_traffic_1h (
hour DateTime,
host LowCardinality(String),
src_country_code LowCardinality(String),
ja4 String,
hits AggregateFunction(count, UInt64),
uniq_ips AggregateFunction(uniq, IPv4)
) ENGINE = AggregatingMergeTree() ORDER BY (host, ja4, hour);
CREATE TABLE mabase_prod.agg_traffic_1d (
day Date,
host LowCardinality(String),
ja4 String,
hits AggregateFunction(count, UInt64),
uniq_ips AggregateFunction(uniq, IPv4)
) ENGINE = AggregatingMergeTree() ORDER BY (host, ja4, day);
-- ----------------------------------------------------------------------------
-- 3. VUES MATÉRIALISÉES (MOTEUR DE CALCUL)
-- ----------------------------------------------------------------------------
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1m TO mabase_prod.agg_traffic_1m
AS SELECT
toStartOfMinute(time) AS minute,
host, src_ip, src_asn, src_country_code, ja4, ja3_hash, header_user_agent,
countState() AS hits,
uniqState(path) AS uniq_paths,
avgState(syn_to_clienthello_ms) AS avg_syn_to_clienthello_ms,
varPopState(syn_to_clienthello_ms) AS var_syn_to_clienthello_ms,
-- TCP Fingerprint Hash
uniqState(cityHash64(toString(tcp_meta_mss), toString(tcp_meta_window_size), toString(tcp_meta_window_scale))) AS tcp_fingerprint,
-- HTTP Metrics
avgState(toFloat64(length(client_headers) - length(replaceAll(client_headers, ',', '')) + 1)) AS avg_headers_count,
countIfState(host != tls_sni AND tls_sni != '') AS host_sni_mismatch,
-- Spoofing Logic
countIfState((header_user_agent ILIKE '%Chrome%') AND (ja4 NOT ILIKE 't13d%')) AS spoofing_ua_tls,
countIfState((header_user_agent ILIKE '%Chrome%') AND (tls_alpn NOT ILIKE '%h2%')) AS spoofing_ua_alpn,
countIfState((header_user_agent ILIKE '%Windows%') AND (ip_meta_ttl <= 64)) AS spoofing_os_ttl,
countIfState((header_user_agent ILIKE '%Mozilla%') AND (header_sec_ch_ua = '')) AS missing_human_headers,
-- Behavior & Payloads
countIfState(match(path, 'login|auth|admin|password|config|wp-admin|api/v[0-9]/auth')) AS sensitive_path_hits,
countIfState(method IN ('PUT', 'DELETE', 'OPTIONS', 'TRACE')) AS suspicious_methods,
countIfState((length(query) > 250) OR match(query, '(<script|union|select|etc/passwd|%00)')) AS suspicious_queries
FROM mabase_prod.http_logs
GROUP BY minute, host, src_ip, src_asn, src_country_code, ja4, ja3_hash, header_user_agent;
-- Cascading to 1h
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1h TO mabase_prod.agg_traffic_1h
AS SELECT toStartOfHour(minute) AS hour, host, src_country_code, ja4, countMergeState(hits) AS hits, uniqState(src_ip) AS uniq_ips
FROM mabase_prod.agg_traffic_1m GROUP BY hour, host, src_country_code, ja4;
-- ----------------------------------------------------------------------------
-- 4. VUE DE SCORING FINAL (VERDICT)
-- ----------------------------------------------------------------------------
CREATE VIEW mabase_prod.live_threat_scores AS
SELECT
T1.src_ip,
T1.ja4,
T1.src_asn,
T1.src_country_code,
(
-- 1. Incohérences de Signature (Poids Fort : 40-50)
if(countMerge(T1.spoofing_ua_tls) > 0, 50, 0) +
if(countMerge(T1.spoofing_os_ttl) > 0, 40, 0) +
if(countMerge(T1.host_sni_mismatch) > 0, 45, 0) +
if(countMerge(T1.missing_human_headers) > 0, 30, 0) +
-- 2. Anomalies Réseau (Poids Moyen : 20-30)
if(varPopMerge(T1.var_syn_to_clienthello_ms) < 0.5 AND countMerge(T1.hits) > 5, 30, 0) +
if(avgMerge(T1.avg_headers_count) < 6, 25, 0) +
-- 3. Comportement (Poids Variable)
if(countMerge(T1.sensitive_path_hits) > 5, 40, 0) +
if(countMerge(T1.suspicious_queries) > 0, 60, 0) +
if(uniqMerge(T1.uniq_paths) > 50, 40, 0) + -- Balayage (Scanner)
-- 4. Volumétrie vs Baseline
if(countMerge(T1.hits) > (B.p99_hits_per_hour * 3), 50, 0)
) AS final_threat_score,
countMerge(T1.hits) AS request_count,
B.p99_hits_per_hour AS baseline
FROM mabase_prod.agg_traffic_1m AS T1
LEFT JOIN mabase_prod.tbl_baseline_ja4_7d AS B ON T1.ja4 = B.ja4
WHERE T1.minute >= now() - INTERVAL 5 MINUTE
GROUP BY T1.src_ip, T1.ja4, T1.src_asn, T1.src_country_code, B.p99_hits_per_hour
HAVING final_threat_score > 0
ORDER BY final_threat_score DESC;

251
sql/views.sql Normal file
View File

@ -0,0 +1,251 @@
-- ============================================================================
-- SCRIPT DE DÉPLOIEMENT DES VUES DE DÉTECTION DE BOTS & SPAM (CLICKHOUSE)
-- ============================================================================
-- ----------------------------------------------------------------------------
-- 1. NETTOYAGE STRICT
-- ----------------------------------------------------------------------------
DROP TABLE IF EXISTS mabase_prod.ml_detected_anomalies;
DROP VIEW IF EXISTS mabase_prod.view_ai_features_1h;
DROP VIEW IF EXISTS mabase_prod.view_host_ip_ja4_rotation;
DROP VIEW IF EXISTS mabase_prod.view_host_ja4_anomalies;
DROP VIEW IF EXISTS mabase_prod.view_form_bruteforce_detected;
DROP VIEW IF EXISTS mabase_prod.view_alpn_mismatch_detected;
DROP VIEW IF EXISTS mabase_prod.view_tcp_spoofing_detected;
DROP VIEW IF EXISTS mabase_prod.mv_agg_host_ip_ja4_1h;
DROP TABLE IF EXISTS mabase_prod.agg_host_ip_ja4_1h;
DROP VIEW IF EXISTS mabase_prod.mv_agg_header_fingerprint_1h;
DROP TABLE IF EXISTS mabase_prod.agg_header_fingerprint_1h;
-- ----------------------------------------------------------------------------
-- 2. TABLES D'AGRÉGATION ET VUES MATÉRIALISÉES (TEMPS RÉEL)
-- ----------------------------------------------------------------------------
CREATE TABLE mabase_prod.agg_host_ip_ja4_1h (
window_start DateTime,
src_ip String,
ja4 String,
host String,
first_seen SimpleAggregateFunction(min, DateTime),
last_seen SimpleAggregateFunction(max, DateTime),
hits SimpleAggregateFunction(sum, UInt64),
count_post SimpleAggregateFunction(sum, UInt64),
uniq_paths AggregateFunction(uniq, String),
uniq_query_params AggregateFunction(uniq, String),
src_country_code SimpleAggregateFunction(any, String),
tcp_fingerprint SimpleAggregateFunction(any, String),
tcp_jitter_variance AggregateFunction(varPop, Float64),
tcp_window_size SimpleAggregateFunction(any, UInt32),
tcp_window_scale SimpleAggregateFunction(any, UInt32),
tcp_mss SimpleAggregateFunction(any, UInt32),
tcp_ttl SimpleAggregateFunction(any, UInt32),
http_version SimpleAggregateFunction(any, String),
first_ua SimpleAggregateFunction(any, String)
) ENGINE = AggregatingMergeTree()
ORDER BY (window_start, src_ip, ja4, host)
TTL window_start + INTERVAL 7 DAY;
CREATE MATERIALIZED VIEW mabase_prod.mv_agg_host_ip_ja4_1h
TO mabase_prod.agg_host_ip_ja4_1h AS
SELECT
toStartOfHour(time) AS window_start,
src_ip,
ja4,
host,
min(time) AS first_seen,
max(time) AS last_seen,
count() AS hits,
sum(IF(method = 'POST', 1, 0)) AS count_post,
uniqState(path) AS uniq_paths,
uniqState(query) AS uniq_query_params,
any(src_country_code) AS src_country_code,
any(toString(cityHash64(concat(toString(tcp_meta_window_size), toString(tcp_meta_mss), toString(tcp_meta_window_scale), tcp_meta_options)))) AS tcp_fingerprint,
varPopState(toFloat64(syn_to_clienthello_ms)) AS tcp_jitter_variance,
any(tcp_meta_window_size) AS tcp_window_size,
any(tcp_meta_window_scale) AS tcp_window_scale,
any(tcp_meta_mss) AS tcp_mss,
any(ip_meta_ttl) AS tcp_ttl,
any(http_version) AS http_version,
any(header_user_agent) AS first_ua
FROM mabase_prod.http_logs
GROUP BY window_start, src_ip, ja4, host;
CREATE TABLE mabase_prod.agg_header_fingerprint_1h (
window_start DateTime,
src_ip String,
header_order_hash SimpleAggregateFunction(any, String),
modern_browser_score SimpleAggregateFunction(max, UInt8),
sec_fetch_mode SimpleAggregateFunction(any, String),
sec_fetch_dest SimpleAggregateFunction(any, String),
count_site_none SimpleAggregateFunction(sum, UInt64)
) ENGINE = AggregatingMergeTree()
ORDER BY (window_start, src_ip)
TTL window_start + INTERVAL 7 DAY;
CREATE MATERIALIZED VIEW mabase_prod.mv_agg_header_fingerprint_1h
TO mabase_prod.agg_header_fingerprint_1h AS
SELECT
toStartOfHour(time) AS window_start,
src_ip,
any(toString(cityHash64(client_headers))) AS header_order_hash,
max(toUInt8(if(length(header_sec_ch_ua) > 0, 100, if(length(header_user_agent) > 0, 50, 0)))) AS modern_browser_score,
any(header_sec_fetch_mode) AS sec_fetch_mode,
any(header_sec_fetch_dest) AS sec_fetch_dest,
sum(IF(header_sec_fetch_site = 'none', 1, 0)) AS count_site_none
FROM mabase_prod.http_logs
GROUP BY window_start, src_ip;
-- ----------------------------------------------------------------------------
-- 3. TABLE DE DESTINATION POUR LE MACHINE LEARNING
-- ----------------------------------------------------------------------------
CREATE TABLE mabase_prod.ml_detected_anomalies (
detected_at DateTime,
src_ip String,
ja4 String,
host String,
anomaly_score Float32,
reason String
) ENGINE = MergeTree()
ORDER BY (detected_at, src_ip, ja4)
TTL detected_at + INTERVAL 30 DAY;
-- ----------------------------------------------------------------------------
-- 4. VUE DE FEATURE ENGINEERING POUR L'ISOLATION FOREST (RÉSOLUE)
-- ----------------------------------------------------------------------------
-- Utilisation de sous-requêtes agrégées (GROUP BY explicite) avant la jointure
-- pour éviter les erreurs d'état et le produit cartésien.
CREATE VIEW mabase_prod.view_ai_features_1h AS
SELECT
a.src_ip,
a.ja4,
a.host,
a.hits,
a.uniq_paths,
a.uniq_query_params,
a.count_post,
-- Indicateur de Corrélation L4/L7
IF(length(a.ja4) > 0 AND length(a.tcp_fingerprint) > 0, 1, 0) AS correlated,
-- DIMENSIONS COMPORTEMENTALES
(a.count_post / (a.hits + 1)) AS post_ratio,
(a.uniq_query_params / (a.uniq_paths + 1)) AS fuzzing_index,
(a.hits / (dateDiff('second', a.first_seen, a.last_seen) + 1)) AS hit_velocity,
-- DIMENSIONS TCP / L4
COALESCE(a.tcp_jitter_variance, 0) AS tcp_jitter_variance,
count() OVER (PARTITION BY a.tcp_fingerprint) AS tcp_shared_count,
a.tcp_window_size * exp2(a.tcp_window_scale) AS true_window_size,
IF(a.tcp_mss > 0, a.tcp_window_size / a.tcp_mss, 0) AS window_mss_ratio,
-- DIMENSIONS TLS / L5 (Mismatch)
IF(substring(a.ja4, 10, 2) = 'h2' AND a.http_version!= '2', 1, 0) AS alpn_http_mismatch,
IF(substring(a.ja4, 10, 2) = '00', 1, 0) AS is_alpn_missing,
-- DIMENSIONS HTTP / L7
COALESCE(h.modern_browser_score, 0) AS modern_browser_score,
IF(h.sec_fetch_mode = 'navigate' AND h.sec_fetch_dest!= 'document', 1, 0) AS is_fake_navigation,
(h.count_site_none / (a.hits + 1)) AS site_none_ratio
FROM (
-- Consolidation des logs d'hôtes (Résolution du GROUP BY manquant)
SELECT
window_start, src_ip, ja4, host,
sum(hits) AS hits,
uniqMerge(uniq_paths) AS uniq_paths,
uniqMerge(uniq_query_params) AS uniq_query_params,
sum(count_post) AS count_post,
min(first_seen) AS first_seen,
max(last_seen) AS last_seen,
any(tcp_fingerprint) AS tcp_fingerprint,
varPopMerge(tcp_jitter_variance) AS tcp_jitter_variance,
any(tcp_window_size) AS tcp_window_size,
any(tcp_window_scale) AS tcp_window_scale,
any(tcp_mss) AS tcp_mss,
any(http_version) AS http_version
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 2 HOUR)
GROUP BY window_start, src_ip, ja4, host
) a
LEFT JOIN (
-- Consolidation des en-têtes
SELECT
window_start, src_ip,
max(modern_browser_score) AS modern_browser_score,
any(sec_fetch_mode) AS sec_fetch_mode,
any(sec_fetch_dest) AS sec_fetch_dest,
sum(count_site_none) AS count_site_none
FROM mabase_prod.agg_header_fingerprint_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 2 HOUR)
GROUP BY window_start, src_ip
) h
ON a.src_ip = h.src_ip AND a.window_start = h.window_start;
-- ----------------------------------------------------------------------------
-- 5. VUES DE DÉTECTION HEURISTIQUES STATIQUES (RÉSOLUES)
-- ----------------------------------------------------------------------------
CREATE VIEW mabase_prod.view_host_ip_ja4_rotation AS
SELECT
src_ip,
uniqExact(ja4) AS distinct_ja4_count,
sum(hits) AS total_hits
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR)
GROUP BY src_ip
HAVING distinct_ja4_count >= 5 AND total_hits > 100;
CREATE VIEW mabase_prod.view_host_ja4_anomalies AS
SELECT
ja4,
uniqExact(src_ip) AS unique_ips,
uniqExact(src_country_code) AS unique_countries,
uniqExact(host) AS targeted_hosts
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR)
GROUP BY ja4
HAVING unique_ips >= 20 AND targeted_hosts >= 3;
-- Ajout du GROUP BY
CREATE VIEW mabase_prod.view_form_bruteforce_detected AS
SELECT
src_ip, ja4, host,
sum(hits) AS hits,
uniqMerge(uniq_query_params) AS query_params_count
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR)
GROUP BY src_ip, ja4, host
HAVING query_params_count >= 10 AND hits >= 20;
-- Ajout du GROUP BY
CREATE VIEW mabase_prod.view_alpn_mismatch_detected AS
SELECT
src_ip, ja4, host,
sum(hits) AS hits,
any(http_version) AS http_version
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR)
AND substring(ja4, 10, 2) IN ('h2', 'h3')
GROUP BY src_ip, ja4, host
HAVING http_version = '1.1' AND hits >= 10;
-- Ajout du GROUP BY
CREATE VIEW mabase_prod.view_tcp_spoofing_detected AS
SELECT
src_ip, ja4,
any(tcp_ttl) AS tcp_ttl,
any(tcp_window_size) AS tcp_window_size,
any(first_ua) AS first_ua
FROM mabase_prod.agg_host_ip_ja4_1h
WHERE window_start >= toStartOfHour(now() - INTERVAL 1 HOUR)
GROUP BY src_ip, ja4
HAVING tcp_ttl <= 64
AND (first_ua ILIKE '%Windows%' OR first_ua ILIKE '%iPhone%');