view
This commit is contained in:
10
sql/bots.sql
Normal file
10
sql/bots.sql
Normal file
@ -0,0 +1,10 @@
|
||||
DROP TABLE IF EXISTS mabase_prod.ref_bot_networks;
|
||||
|
||||
CREATE TABLE mabase_prod.ref_bot_networks (
|
||||
-- On utilise IPv6CIDR car il accepte aussi les IPv4 au format ::ffff:1.2.3.4/120
|
||||
network IPv6CIDR,
|
||||
bot_name LowCardinality(String),
|
||||
is_legitimate UInt8,
|
||||
last_update DateTime
|
||||
) ENGINE = ReplacingMergeTree(last_update)
|
||||
ORDER BY (network, bot_name);
|
||||
@ -1,15 +0,0 @@
|
||||
DROP DICTIONARY IF EXISTS mabase_prod.dict_iplocate_asn;
|
||||
|
||||
CREATE DICTIONARY IF NOT EXISTS mabase_prod.dict_iplocate_asn
|
||||
(
|
||||
network String,
|
||||
asn UInt32,
|
||||
country_code String,
|
||||
name String,
|
||||
org String,
|
||||
domain String
|
||||
)
|
||||
PRIMARY KEY network
|
||||
SOURCE(FILE(path '/var/lib/clickhouse/user_files/iplocate-ip-to-asn.csv' format 'CSVWithNames'))
|
||||
LAYOUT(IP_TRIE())
|
||||
LIFETIME(MIN 3600 MAX 7200);
|
||||
190
sql/mv1.sql
190
sql/mv1.sql
@ -1,11 +1,12 @@
|
||||
-- ============================================================================
|
||||
-- PROJET : Moteur de Détection de Menaces HTTP
|
||||
-- DESCRIPTION : Reconfiguration des vues d'agrégation, nouveauté et scoring.
|
||||
-- PROJET : Moteur de Détection de Menaces HTTP (Full Spectrum)
|
||||
-- DESCRIPTION : Configuration complète des tables d'agrégation et du scoring.
|
||||
-- COUVRE : Spoofing UA/TLS, TCP Fingerprinting, Anomalies comportementales.
|
||||
-- DATE : 2026-03-08
|
||||
-- ============================================================================
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- 1. NETTOYAGE DES OBJETS EXISTANTS (Ordre inverse des dépendances)
|
||||
-- 1. NETTOYAGE (Ordre inverse des dépendances)
|
||||
-- ----------------------------------------------------------------------------
|
||||
DROP VIEW IF EXISTS mabase_prod.live_threat_scores;
|
||||
DROP VIEW IF EXISTS mabase_prod.mv_baseline_update;
|
||||
@ -14,13 +15,76 @@ DROP VIEW IF EXISTS mabase_prod.mv_traffic_1d;
|
||||
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1h;
|
||||
DROP VIEW IF EXISTS mabase_prod.mv_traffic_1m;
|
||||
|
||||
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1d;
|
||||
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1h;
|
||||
DROP TABLE IF EXISTS mabase_prod.agg_traffic_1m;
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- 2. RECONSTRUCTION DE LA CHAÎNE DE ROLLUP (Aggrégations temporelles)
|
||||
-- 2. TABLES DE DESTINATION (STORAGE)
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
-- MV 1 Minute : Transformation des logs bruts en métriques techniques
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1m
|
||||
TO mabase_prod.agg_traffic_1m
|
||||
CREATE TABLE mabase_prod.agg_traffic_1m (
|
||||
minute DateTime,
|
||||
host LowCardinality(String),
|
||||
src_ip IPv4,
|
||||
src_asn UInt32,
|
||||
src_country_code LowCardinality(String),
|
||||
ja4 String,
|
||||
ja3_hash String,
|
||||
header_user_agent String,
|
||||
|
||||
-- Métriques de Base
|
||||
hits AggregateFunction(count, UInt64),
|
||||
uniq_paths AggregateFunction(uniq, String),
|
||||
|
||||
-- Couche 4 : TCP & Handshake
|
||||
avg_syn_to_clienthello_ms AggregateFunction(avg, Int32),
|
||||
var_syn_to_clienthello_ms AggregateFunction(varPop, Int32),
|
||||
tcp_fingerprint AggregateFunction(uniq, UInt64), -- MSS + Window + Scale
|
||||
|
||||
-- Couche 7 : HTTP Fingerprinting
|
||||
avg_headers_count AggregateFunction(avg, Float64),
|
||||
host_sni_mismatch AggregateFunction(countIf, UInt8),
|
||||
|
||||
-- Détection Spoofing & Incohérences
|
||||
spoofing_ua_tls AggregateFunction(countIf, UInt8),
|
||||
spoofing_ua_alpn AggregateFunction(countIf, UInt8),
|
||||
spoofing_os_ttl AggregateFunction(countIf, UInt8),
|
||||
missing_human_headers AggregateFunction(countIf, UInt8),
|
||||
|
||||
-- Comportement & Payloads
|
||||
sensitive_path_hits AggregateFunction(countIf, UInt8),
|
||||
suspicious_methods AggregateFunction(countIf, UInt8),
|
||||
suspicious_queries AggregateFunction(countIf, UInt8)
|
||||
) ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY toYYYYMM(minute)
|
||||
ORDER BY (host, ja4, src_ip, minute);
|
||||
|
||||
-- Tables 1h et 1d (Simplifiées pour le stockage long terme)
|
||||
CREATE TABLE mabase_prod.agg_traffic_1h (
|
||||
hour DateTime,
|
||||
host LowCardinality(String),
|
||||
src_country_code LowCardinality(String),
|
||||
ja4 String,
|
||||
hits AggregateFunction(count, UInt64),
|
||||
uniq_ips AggregateFunction(uniq, IPv4)
|
||||
) ENGINE = AggregatingMergeTree() ORDER BY (host, ja4, hour);
|
||||
|
||||
CREATE TABLE mabase_prod.agg_traffic_1d (
|
||||
day Date,
|
||||
host LowCardinality(String),
|
||||
ja4 String,
|
||||
hits AggregateFunction(count, UInt64),
|
||||
uniq_ips AggregateFunction(uniq, IPv4)
|
||||
) ENGINE = AggregatingMergeTree() ORDER BY (host, ja4, day);
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- 3. VUES MATÉRIALISÉES (MOTEUR DE CALCUL)
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1m TO mabase_prod.agg_traffic_1m
|
||||
AS SELECT
|
||||
toStartOfMinute(time) AS minute,
|
||||
host, src_ip, src_asn, src_country_code, ja4, ja3_hash, header_user_agent,
|
||||
@ -28,80 +92,30 @@ AS SELECT
|
||||
uniqState(path) AS uniq_paths,
|
||||
avgState(syn_to_clienthello_ms) AS avg_syn_to_clienthello_ms,
|
||||
varPopState(syn_to_clienthello_ms) AS var_syn_to_clienthello_ms,
|
||||
avgState(toFloat64((length(client_headers) - length(replaceAll(client_headers, ',', ''))) + 1)) AS avg_headers_count,
|
||||
-- TCP Fingerprint Hash
|
||||
uniqState(cityHash64(toString(tcp_meta_mss), toString(tcp_meta_window_size), toString(tcp_meta_window_scale))) AS tcp_fingerprint,
|
||||
-- HTTP Metrics
|
||||
avgState(toFloat64(length(client_headers) - length(replaceAll(client_headers, ',', '')) + 1)) AS avg_headers_count,
|
||||
countIfState(host != tls_sni AND tls_sni != '') AS host_sni_mismatch,
|
||||
-- Spoofing Logic
|
||||
countIfState((header_user_agent ILIKE '%Chrome%') AND (ja4 NOT ILIKE 't13d%')) AS spoofing_ua_tls,
|
||||
countIfState((header_user_agent ILIKE '%Chrome%') AND (tls_alpn NOT ILIKE '%h2%')) AS spoofing_ua_alpn,
|
||||
countIfState((header_user_agent ILIKE '%Windows%') AND (ip_meta_ttl <= 64)) AS spoofing_os_ttl,
|
||||
countIfState((header_accept_language = '') OR (header_sec_ch_ua = '')) AS missing_human_headers,
|
||||
countIfState((header_user_agent ILIKE '%Mozilla%') AND (header_sec_ch_ua = '')) AS missing_human_headers,
|
||||
-- Behavior & Payloads
|
||||
countIfState(match(path, 'login|auth|admin|password|config|wp-admin|api/v[0-9]/auth')) AS sensitive_path_hits,
|
||||
countIfState(method IN ('PUT', 'DELETE', 'OPTIONS', 'TRACE')) AS suspicious_methods,
|
||||
countIfState((length(query) > 200) OR match(query, '(%[0-9A-Fa-f]{2}){5,}')) AS suspicious_queries
|
||||
countIfState((length(query) > 250) OR match(query, '(<script|union|select|etc/passwd|%00)')) AS suspicious_queries
|
||||
FROM mabase_prod.http_logs
|
||||
GROUP BY minute, host, src_ip, src_asn, src_country_code, ja4, ja3_hash, header_user_agent;
|
||||
|
||||
-- MV 1 Heure : Agrégation secondaire (Cascading)
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1h
|
||||
TO mabase_prod.agg_traffic_1h
|
||||
AS SELECT
|
||||
toStartOfHour(minute) AS hour,
|
||||
host, src_country_code, ja4, ja3_hash, header_user_agent,
|
||||
countMergeState(hits) AS hits,
|
||||
uniqMergeState(uniq_paths) AS uniq_paths,
|
||||
countIfMergeState(missing_human_headers) AS missing_human_headers,
|
||||
uniqState(src_ip) AS uniq_ips
|
||||
FROM mabase_prod.agg_traffic_1m
|
||||
GROUP BY hour, host, src_country_code, ja4, ja3_hash, header_user_agent;
|
||||
|
||||
-- MV 1 Jour : Agrégation tertiaire pour archivage et baseline
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1d
|
||||
TO mabase_prod.agg_traffic_1d
|
||||
AS SELECT
|
||||
toDate(hour) AS day,
|
||||
host, src_country_code, ja4, ja3_hash, header_user_agent,
|
||||
countMergeState(hits) AS hits,
|
||||
uniqMergeState(uniq_ips) AS uniq_ips,
|
||||
uniqMergeState(uniq_paths) AS uniq_paths,
|
||||
countIfMergeState(missing_human_headers) AS missing_human_headers
|
||||
FROM mabase_prod.agg_traffic_1h
|
||||
GROUP BY day, host, src_country_code, ja4, ja3_hash, header_user_agent;
|
||||
-- Cascading to 1h
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_traffic_1h TO mabase_prod.agg_traffic_1h
|
||||
AS SELECT toStartOfHour(minute) AS hour, host, src_country_code, ja4, countMergeState(hits) AS hits, uniqState(src_ip) AS uniq_ips
|
||||
FROM mabase_prod.agg_traffic_1m GROUP BY hour, host, src_country_code, ja4;
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- 3. RECONSTRUCTION DES BRIQUES D'INTELLIGENCE (Novelty & Baseline)
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
-- MV Novelty : Détection de nouvelles empreintes (HTTP + TLS)
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_novelty
|
||||
TO mabase_prod.agg_novelty
|
||||
AS SELECT
|
||||
host,
|
||||
ja4,
|
||||
cityHash64(client_headers) AS http_fp,
|
||||
minState(time) AS first_seen,
|
||||
maxState(time) AS last_seen,
|
||||
countState() AS total_hits
|
||||
FROM mabase_prod.http_logs
|
||||
GROUP BY host, ja4, http_fp;
|
||||
|
||||
-- MV Baseline : Calcul statistique du trafic normal par JA4
|
||||
CREATE MATERIALIZED VIEW mabase_prod.mv_baseline_update
|
||||
TO mabase_prod.tbl_baseline_ja4_7d
|
||||
AS SELECT
|
||||
ja4,
|
||||
quantile(0.99)(hourly_hits) AS p99_hits_per_hour,
|
||||
avg(hourly_hits) AS avg_hits_per_hour,
|
||||
now() AS last_update
|
||||
FROM (
|
||||
SELECT
|
||||
ja4,
|
||||
toStartOfHour(minute) as hour,
|
||||
countMerge(hits) AS hourly_hits
|
||||
FROM mabase_prod.agg_traffic_1m
|
||||
WHERE minute >= now() - INTERVAL 7 DAY
|
||||
GROUP BY ja4, hour
|
||||
)
|
||||
GROUP BY ja4;
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- 4. VUE DE SCORING FINAL (Verdict Temps Réel)
|
||||
-- 4. VUE DE SCORING FINAL (VERDICT)
|
||||
-- ----------------------------------------------------------------------------
|
||||
|
||||
CREATE VIEW mabase_prod.live_threat_scores AS
|
||||
@ -111,16 +125,30 @@ SELECT
|
||||
T1.src_asn,
|
||||
T1.src_country_code,
|
||||
(
|
||||
if(countMerge(T1.spoofing_ua_tls) > 0, 40, 0) +
|
||||
-- 1. Incohérences de Signature (Poids Fort : 40-50)
|
||||
if(countMerge(T1.spoofing_ua_tls) > 0, 50, 0) +
|
||||
if(countMerge(T1.spoofing_os_ttl) > 0, 40, 0) +
|
||||
if(varPopMerge(T1.var_syn_to_clienthello_ms) < 1.0, 20, 0) +
|
||||
if(dateDiff('hour', minMerge(N.first_seen), now()) < 2, 30, 0) +
|
||||
if(countMerge(T1.hits) > coalesce(B.p99_hits_per_hour * 3, 1000), 50, 0)
|
||||
) AS final_score,
|
||||
countMerge(T1.hits) AS current_hits,
|
||||
B.p99_hits_per_hour AS historical_baseline
|
||||
if(countMerge(T1.host_sni_mismatch) > 0, 45, 0) +
|
||||
if(countMerge(T1.missing_human_headers) > 0, 30, 0) +
|
||||
|
||||
-- 2. Anomalies Réseau (Poids Moyen : 20-30)
|
||||
if(varPopMerge(T1.var_syn_to_clienthello_ms) < 0.5 AND countMerge(T1.hits) > 5, 30, 0) +
|
||||
if(avgMerge(T1.avg_headers_count) < 6, 25, 0) +
|
||||
|
||||
-- 3. Comportement (Poids Variable)
|
||||
if(countMerge(T1.sensitive_path_hits) > 5, 40, 0) +
|
||||
if(countMerge(T1.suspicious_queries) > 0, 60, 0) +
|
||||
if(uniqMerge(T1.uniq_paths) > 50, 40, 0) + -- Balayage (Scanner)
|
||||
|
||||
-- 4. Volumétrie vs Baseline
|
||||
if(countMerge(T1.hits) > (B.p99_hits_per_hour * 3), 50, 0)
|
||||
|
||||
) AS final_threat_score,
|
||||
countMerge(T1.hits) AS request_count,
|
||||
B.p99_hits_per_hour AS baseline
|
||||
FROM mabase_prod.agg_traffic_1m AS T1
|
||||
LEFT JOIN mabase_prod.agg_novelty AS N ON T1.ja4 = N.ja4 AND T1.host = N.host
|
||||
LEFT JOIN mabase_prod.tbl_baseline_ja4_7d AS B ON T1.ja4 = B.ja4
|
||||
WHERE T1.minute >= now() - INTERVAL 5 MINUTE
|
||||
GROUP BY T1.src_ip, T1.ja4, T1.src_asn, T1.src_country_code, B.p99_hits_per_hour;
|
||||
GROUP BY T1.src_ip, T1.ja4, T1.src_asn, T1.src_country_code, B.p99_hits_per_hour
|
||||
HAVING final_threat_score > 0
|
||||
ORDER BY final_threat_score DESC;
|
||||
|
||||
29
sql/tables.sql
Normal file
29
sql/tables.sql
Normal file
@ -0,0 +1,29 @@
|
||||
DROP DICTIONARY IF EXISTS mabase_prod.dict_iplocate_asn;
|
||||
|
||||
CREATE DICTIONARY IF NOT EXISTS mabase_prod.dict_iplocate_asn
|
||||
(
|
||||
network String,
|
||||
asn UInt32,
|
||||
country_code String,
|
||||
name String,
|
||||
org String,
|
||||
domain String
|
||||
)
|
||||
PRIMARY KEY network
|
||||
SOURCE(FILE(path '/var/lib/clickhouse/user_files/iplocate-ip-to-asn.csv' format 'CSVWithNames'))
|
||||
LAYOUT(IP_TRIE())
|
||||
LIFETIME(MIN 3600 MAX 7200);
|
||||
|
||||
|
||||
|
||||
-- Suppression si existe pour reconfiguration
|
||||
DROP TABLE IF EXISTS mabase_prod.ref_bot_networks;
|
||||
|
||||
-- Table optimisée pour le filtrage binaire de CIDR
|
||||
CREATE TABLE mabase_prod.ref_bot_networks (
|
||||
network IPv6CIDR, -- Gère nativement '1.2.3.0/24' et '2001:db8::/32'
|
||||
bot_name LowCardinality(String),
|
||||
is_legitimate UInt8, -- 1 = Whitelist, 0 = Blacklist
|
||||
last_update DateTime
|
||||
) ENGINE = ReplacingMergeTree(last_update)
|
||||
ORDER BY (network, bot_name)
|
||||
Reference in New Issue
Block a user