docs: update ClickHouse schema with new table structure
- http_logs_raw: partition by toDate(ingest_time), order by ingest_time - http_logs: explicit columns (no DEFAULT), extracted by MV - mv_http_logs: full SELECT with JSONExtract* + coalesce for all fields - Add 17 HTTP header fields (User-Agent, Accept, Sec-CH-UA, etc.) - New ORDER BY: (time, src_ip, dst_ip, ja4) - architecture.yml: match new schema with MV query details Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
153
README.md
153
README.md
@ -207,63 +207,72 @@ DROP TABLE IF EXISTS mabase_prod.http_logs_raw;
|
|||||||
|
|
||||||
CREATE TABLE mabase_prod.http_logs_raw
|
CREATE TABLE mabase_prod.http_logs_raw
|
||||||
(
|
(
|
||||||
raw_json String,
|
raw_json String,
|
||||||
ingest_time DateTime DEFAULT now()
|
ingest_time DateTime DEFAULT now()
|
||||||
)
|
)
|
||||||
ENGINE = MergeTree
|
ENGINE = MergeTree
|
||||||
ORDER BY tuple()
|
PARTITION BY toDate(ingest_time)
|
||||||
TTL ingest_time + INTERVAL 1 DAY
|
ORDER BY ingest_time;
|
||||||
SETTINGS ttl_only_drop_parts = 1;
|
|
||||||
|
|
||||||
-- 3. Table parsée
|
-- 3. Table parsée
|
||||||
DROP TABLE IF EXISTS mabase_prod.http_logs;
|
DROP TABLE IF EXISTS mabase_prod.http_logs;
|
||||||
|
|
||||||
CREATE TABLE mabase_prod.http_logs
|
CREATE TABLE mabase_prod.http_logs
|
||||||
(
|
(
|
||||||
raw_json String,
|
time DateTime,
|
||||||
|
log_date Date DEFAULT toDate(time),
|
||||||
|
|
||||||
-- champs de base
|
src_ip IPv4,
|
||||||
time_str String DEFAULT JSONExtractString(raw_json, 'time'),
|
src_port UInt16,
|
||||||
timestamp_str String DEFAULT JSONExtractString(raw_json, 'timestamp'),
|
dst_ip IPv4,
|
||||||
time DateTime DEFAULT parseDateTimeBestEffort(time_str),
|
dst_port UInt16,
|
||||||
log_date Date DEFAULT toDate(time),
|
|
||||||
|
|
||||||
src_ip IPv4 DEFAULT toIPv4(JSONExtractString(raw_json, 'src_ip')),
|
method LowCardinality(String),
|
||||||
src_port UInt16 DEFAULT toUInt16(JSONExtractUInt(raw_json, 'src_port')),
|
scheme LowCardinality(String),
|
||||||
dst_ip IPv4 DEFAULT toIPv4(JSONExtractString(raw_json, 'dst_ip')),
|
host LowCardinality(String),
|
||||||
dst_port UInt16 DEFAULT toUInt16(JSONExtractUInt(raw_json, 'dst_port')),
|
path String,
|
||||||
|
query String,
|
||||||
|
http_version LowCardinality(String),
|
||||||
|
orphan_side LowCardinality(String),
|
||||||
|
|
||||||
correlated UInt8 DEFAULT JSONExtractBool(raw_json, 'correlated'),
|
correlated UInt8,
|
||||||
keepalives UInt16 DEFAULT toUInt16(JSONExtractUInt(raw_json, 'keepalives')),
|
keepalives UInt16,
|
||||||
method LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'method'),
|
a_timestamp UInt64,
|
||||||
scheme LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'scheme'),
|
b_timestamp UInt64,
|
||||||
host LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'host'),
|
conn_id String,
|
||||||
path String DEFAULT JSONExtractString(raw_json, 'path'),
|
|
||||||
query String DEFAULT JSONExtractString(raw_json, 'query'),
|
|
||||||
http_version LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'http_version'),
|
|
||||||
orphan_side LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'orphan_side'),
|
|
||||||
|
|
||||||
a_timestamp UInt64 DEFAULT JSONExtractUInt(raw_json, 'a_timestamp'),
|
ip_meta_df UInt8,
|
||||||
b_timestamp UInt64 DEFAULT JSONExtractUInt(raw_json, 'b_timestamp'),
|
ip_meta_id UInt32,
|
||||||
conn_id String DEFAULT JSONExtractString(raw_json, 'conn_id'),
|
ip_meta_total_length UInt32,
|
||||||
ip_meta_df UInt8 DEFAULT JSONExtractBool(raw_json, 'ip_meta_df'),
|
ip_meta_ttl UInt8,
|
||||||
ip_meta_id UInt32 DEFAULT JSONExtractUInt(raw_json, 'ip_meta_id'),
|
tcp_meta_options LowCardinality(String),
|
||||||
ip_meta_total_length UInt32 DEFAULT JSONExtractUInt(raw_json, 'ip_meta_total_length'),
|
tcp_meta_window_size UInt32,
|
||||||
ip_meta_ttl UInt8 DEFAULT JSONExtractUInt(raw_json, 'ip_meta_ttl'),
|
syn_to_clienthello_ms Int32,
|
||||||
tcp_meta_options LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'tcp_meta_options'),
|
|
||||||
tcp_meta_window_size UInt32 DEFAULT JSONExtractUInt(raw_json, 'tcp_meta_window_size'),
|
|
||||||
syn_to_clienthello_ms Int32 DEFAULT toInt32(JSONExtractInt(raw_json, 'syn_to_clienthello_ms')),
|
|
||||||
tls_version LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'tls_version'),
|
|
||||||
tls_sni LowCardinality(String) DEFAULT JSONExtractString(raw_json, 'tls_sni'),
|
|
||||||
ja3 String DEFAULT JSONExtractString(raw_json, 'ja3'),
|
|
||||||
ja3_hash String DEFAULT JSONExtractString(raw_json, 'ja3_hash'),
|
|
||||||
ja4 String DEFAULT JSONExtractString(raw_json, 'ja4'),
|
|
||||||
|
|
||||||
extra JSON DEFAULT raw_json
|
tls_version LowCardinality(String),
|
||||||
|
tls_sni LowCardinality(String),
|
||||||
|
ja3 String,
|
||||||
|
ja3_hash String,
|
||||||
|
ja4 String,
|
||||||
|
|
||||||
|
header_user_agent String,
|
||||||
|
header_accept String,
|
||||||
|
header_accept_encoding String,
|
||||||
|
header_accept_language String,
|
||||||
|
header_x_request_id String,
|
||||||
|
header_x_trace_id String,
|
||||||
|
header_x_forwarded_for String,
|
||||||
|
|
||||||
|
header_sec_ch_ua String,
|
||||||
|
header_sec_ch_ua_mobile String,
|
||||||
|
header_sec_ch_ua_platform String,
|
||||||
|
header_sec_fetch_dest String,
|
||||||
|
header_sec_fetch_mode String,
|
||||||
|
header_sec_fetch_site String
|
||||||
)
|
)
|
||||||
ENGINE = MergeTree
|
ENGINE = MergeTree
|
||||||
PARTITION BY toYYYYMM(log_date)
|
PARTITION BY log_date
|
||||||
ORDER BY (log_date, dst_ip, src_ip, time);
|
ORDER BY (time, src_ip, dst_ip, ja4);
|
||||||
|
|
||||||
-- 4. Vue matérialisée (RAW → logs)
|
-- 4. Vue matérialisée (RAW → logs)
|
||||||
DROP VIEW IF EXISTS mabase_prod.mv_http_logs;
|
DROP VIEW IF EXISTS mabase_prod.mv_http_logs;
|
||||||
@ -271,7 +280,67 @@ DROP VIEW IF EXISTS mabase_prod.mv_http_logs;
|
|||||||
CREATE MATERIALIZED VIEW mabase_prod.mv_http_logs
|
CREATE MATERIALIZED VIEW mabase_prod.mv_http_logs
|
||||||
TO mabase_prod.http_logs
|
TO mabase_prod.http_logs
|
||||||
AS
|
AS
|
||||||
SELECT raw_json
|
SELECT
|
||||||
|
-- 1. Temps
|
||||||
|
parseDateTimeBestEffort(
|
||||||
|
coalesce(JSONExtractString(raw_json, 'time'), '1970-01-01T00:00:00Z')
|
||||||
|
) AS time,
|
||||||
|
toDate(time) AS log_date,
|
||||||
|
|
||||||
|
-- 2. Réseau L3/L4
|
||||||
|
toIPv4(coalesce(JSONExtractString(raw_json, 'src_ip'), '0.0.0.0')) AS src_ip,
|
||||||
|
toUInt16(coalesce(JSONExtractUInt(raw_json, 'src_port'), 0)) AS src_port,
|
||||||
|
toIPv4(coalesce(JSONExtractString(raw_json, 'dst_ip'), '0.0.0.0')) AS dst_ip,
|
||||||
|
toUInt16(coalesce(JSONExtractUInt(raw_json, 'dst_port'), 0)) AS dst_port,
|
||||||
|
|
||||||
|
-- 3. HTTP de base
|
||||||
|
coalesce(JSONExtractString(raw_json, 'method'), '') AS method,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'host'), '') AS host,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'path'), '') AS path,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'query'), '') AS query,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side,
|
||||||
|
|
||||||
|
-- 4. Connexion / corrélation
|
||||||
|
toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated,
|
||||||
|
toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id,
|
||||||
|
|
||||||
|
-- 5. IP/TCP
|
||||||
|
toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0) AS ip_meta_id,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0) AS ip_meta_total_length,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'ip_meta_ttl'), 0) AS ip_meta_ttl,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'tcp_meta_options'), '') AS tcp_meta_options,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'tcp_meta_window_size'), 0) AS tcp_meta_window_size,
|
||||||
|
toInt32(coalesce(JSONExtractInt(raw_json, 'syn_to_clienthello_ms'), 0)) AS syn_to_clienthello_ms,
|
||||||
|
|
||||||
|
-- 6. TLS / JA3/JA4
|
||||||
|
coalesce(JSONExtractString(raw_json, 'tls_version'), '') AS tls_version,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'tls_sni'), '') AS tls_sni,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'ja3'), '') AS ja3,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'ja3_hash'), '') AS ja3_hash,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'ja4'), '') AS ja4,
|
||||||
|
|
||||||
|
-- 7. Headers HTTP
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_User-Agent'), '') AS header_user_agent,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Accept'), '') AS header_accept,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Accept-Encoding'), '') AS header_accept_encoding,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Accept-Language'), '') AS header_accept_language,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_X-Request-Id'), '') AS header_x_request_id,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_X-Trace-Id'), '') AS header_x_trace_id,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_X-Forwarded-For'), '') AS header_x_forwarded_for,
|
||||||
|
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA'), '') AS header_sec_ch_ua,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Mobile'), '') AS header_sec_ch_ua_mobile,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Platform'), '') AS header_sec_ch_ua_platform,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Dest'), '') AS header_sec_fetch_dest,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Mode'), '') AS header_sec_fetch_mode,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Site'), '') AS header_sec_fetch_site
|
||||||
|
|
||||||
FROM mabase_prod.http_logs_raw;
|
FROM mabase_prod.http_logs_raw;
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
164
architecture.yml
164
architecture.yml
@ -404,19 +404,17 @@ clickhouse_schema:
|
|||||||
database: mabase_prod
|
database: mabase_prod
|
||||||
description: >
|
description: >
|
||||||
La table ClickHouse est gérée en dehors du service. Deux tables sont utilisées :
|
La table ClickHouse est gérée en dehors du service. Deux tables sont utilisées :
|
||||||
http_logs_raw (table d'ingestion avec TTL 1 jour) et http_logs (table enrichie
|
http_logs_raw (table d'ingestion partitionnée par jour) et http_logs (table parsée
|
||||||
avec extraction des champs via des colonnes matérialisées). Une vue matérialisée
|
avec extraction explicite des champs). Une vue matérialisée transfère automatiquement
|
||||||
transfère automatiquement les données de RAW vers parsée.
|
les données de RAW vers parsée.
|
||||||
tables:
|
tables:
|
||||||
- name: http_logs_raw
|
- name: http_logs_raw
|
||||||
description: >
|
description: >
|
||||||
Table d'ingestion brute avec TTL. Une seule colonne raw_json contient le log
|
Table d'ingestion brute. Une seule colonne raw_json contient le log corrélé
|
||||||
corrélé complet sérialisé en JSON. TTL de 1 jour pour limiter le stockage.
|
complet sérialisé en JSON. Partitionnée par jour pour optimiser le TTL.
|
||||||
engine: MergeTree
|
engine: MergeTree
|
||||||
order_by: tuple()
|
partition_by: toDate(ingest_time)
|
||||||
ttl: ingest_time + INTERVAL 1 DAY
|
order_by: ingest_time
|
||||||
settings:
|
|
||||||
ttl_only_drop_parts: 1
|
|
||||||
columns:
|
columns:
|
||||||
- name: raw_json
|
- name: raw_json
|
||||||
type: String
|
type: String
|
||||||
@ -429,122 +427,168 @@ clickhouse_schema:
|
|||||||
|
|
||||||
- name: http_logs
|
- name: http_logs
|
||||||
description: >
|
description: >
|
||||||
Table enrichie avec extraction des champs du JSON brut via des expressions DEFAULT.
|
Table parsée avec tous les champs extraits explicitement par la vue matérialisée.
|
||||||
Partitionnée par mois, optimisée pour les requêtes analytiques.
|
Partitionnée par log_date, optimisée pour les requêtes analytiques.
|
||||||
engine: MergeTree
|
engine: MergeTree
|
||||||
partition_by: toYYYYMM(log_date)
|
partition_by: log_date
|
||||||
order_by: (log_date, dst_ip, src_ip, time)
|
order_by: (time, src_ip, dst_ip, ja4)
|
||||||
columns:
|
columns:
|
||||||
- name: raw_json
|
|
||||||
type: String
|
|
||||||
- name: time_str
|
|
||||||
type: String
|
|
||||||
default: JSONExtractString(raw_json, 'time')
|
|
||||||
- name: timestamp_str
|
|
||||||
type: String
|
|
||||||
default: JSONExtractString(raw_json, 'timestamp')
|
|
||||||
- name: time
|
- name: time
|
||||||
type: DateTime
|
type: DateTime
|
||||||
default: parseDateTimeBestEffort(time_str)
|
|
||||||
- name: log_date
|
- name: log_date
|
||||||
type: Date
|
type: Date
|
||||||
default: toDate(time)
|
default: toDate(time)
|
||||||
- name: src_ip
|
- name: src_ip
|
||||||
type: IPv4
|
type: IPv4
|
||||||
default: toIPv4(JSONExtractString(raw_json, 'src_ip'))
|
|
||||||
- name: src_port
|
- name: src_port
|
||||||
type: UInt16
|
type: UInt16
|
||||||
default: toUInt16(JSONExtractUInt(raw_json, 'src_port'))
|
|
||||||
- name: dst_ip
|
- name: dst_ip
|
||||||
type: IPv4
|
type: IPv4
|
||||||
default: toIPv4(JSONExtractString(raw_json, 'dst_ip'))
|
|
||||||
- name: dst_port
|
- name: dst_port
|
||||||
type: UInt16
|
type: UInt16
|
||||||
default: toUInt16(JSONExtractUInt(raw_json, 'dst_port'))
|
|
||||||
- name: correlated
|
|
||||||
type: UInt8
|
|
||||||
default: JSONExtractBool(raw_json, 'correlated')
|
|
||||||
- name: keepalives
|
|
||||||
type: UInt16
|
|
||||||
default: toUInt16(JSONExtractUInt(raw_json, 'keepalives'))
|
|
||||||
- name: method
|
- name: method
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'method')
|
|
||||||
- name: scheme
|
- name: scheme
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'scheme')
|
|
||||||
- name: host
|
- name: host
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'host')
|
|
||||||
- name: path
|
- name: path
|
||||||
type: String
|
type: String
|
||||||
default: JSONExtractString(raw_json, 'path')
|
|
||||||
- name: query
|
- name: query
|
||||||
type: String
|
type: String
|
||||||
default: JSONExtractString(raw_json, 'query')
|
|
||||||
- name: http_version
|
- name: http_version
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'http_version')
|
|
||||||
- name: orphan_side
|
- name: orphan_side
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'orphan_side')
|
- name: correlated
|
||||||
|
type: UInt8
|
||||||
|
- name: keepalives
|
||||||
|
type: UInt16
|
||||||
- name: a_timestamp
|
- name: a_timestamp
|
||||||
type: UInt64
|
type: UInt64
|
||||||
default: JSONExtractUInt(raw_json, 'a_timestamp')
|
|
||||||
- name: b_timestamp
|
- name: b_timestamp
|
||||||
type: UInt64
|
type: UInt64
|
||||||
default: JSONExtractUInt(raw_json, 'b_timestamp')
|
|
||||||
- name: conn_id
|
- name: conn_id
|
||||||
type: String
|
type: String
|
||||||
default: JSONExtractString(raw_json, 'conn_id')
|
|
||||||
- name: ip_meta_df
|
- name: ip_meta_df
|
||||||
type: UInt8
|
type: UInt8
|
||||||
default: JSONExtractBool(raw_json, 'ip_meta_df')
|
|
||||||
- name: ip_meta_id
|
- name: ip_meta_id
|
||||||
type: UInt32
|
type: UInt32
|
||||||
default: JSONExtractUInt(raw_json, 'ip_meta_id')
|
|
||||||
- name: ip_meta_total_length
|
- name: ip_meta_total_length
|
||||||
type: UInt32
|
type: UInt32
|
||||||
default: JSONExtractUInt(raw_json, 'ip_meta_total_length')
|
|
||||||
- name: ip_meta_ttl
|
- name: ip_meta_ttl
|
||||||
type: UInt8
|
type: UInt8
|
||||||
default: JSONExtractUInt(raw_json, 'ip_meta_ttl')
|
|
||||||
- name: tcp_meta_options
|
- name: tcp_meta_options
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'tcp_meta_options')
|
|
||||||
- name: tcp_meta_window_size
|
- name: tcp_meta_window_size
|
||||||
type: UInt32
|
type: UInt32
|
||||||
default: JSONExtractUInt(raw_json, 'tcp_meta_window_size')
|
|
||||||
- name: syn_to_clienthello_ms
|
- name: syn_to_clienthello_ms
|
||||||
type: Int32
|
type: Int32
|
||||||
default: toInt32(JSONExtractInt(raw_json, 'syn_to_clienthello_ms'))
|
|
||||||
- name: tls_version
|
- name: tls_version
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'tls_version')
|
|
||||||
- name: tls_sni
|
- name: tls_sni
|
||||||
type: LowCardinality(String)
|
type: LowCardinality(String)
|
||||||
default: JSONExtractString(raw_json, 'tls_sni')
|
|
||||||
- name: ja3
|
- name: ja3
|
||||||
type: String
|
type: String
|
||||||
default: JSONExtractString(raw_json, 'ja3')
|
|
||||||
- name: ja3_hash
|
- name: ja3_hash
|
||||||
type: String
|
type: String
|
||||||
default: JSONExtractString(raw_json, 'ja3_hash')
|
|
||||||
- name: ja4
|
- name: ja4
|
||||||
type: String
|
type: String
|
||||||
default: JSONExtractString(raw_json, 'ja4')
|
- name: header_user_agent
|
||||||
- name: extra
|
type: String
|
||||||
type: JSON
|
- name: header_accept
|
||||||
default: raw_json
|
type: String
|
||||||
|
- name: header_accept_encoding
|
||||||
|
type: String
|
||||||
|
- name: header_accept_language
|
||||||
|
type: String
|
||||||
|
- name: header_x_request_id
|
||||||
|
type: String
|
||||||
|
- name: header_x_trace_id
|
||||||
|
type: String
|
||||||
|
- name: header_x_forwarded_for
|
||||||
|
type: String
|
||||||
|
- name: header_sec_ch_ua
|
||||||
|
type: String
|
||||||
|
- name: header_sec_ch_ua_mobile
|
||||||
|
type: String
|
||||||
|
- name: header_sec_ch_ua_platform
|
||||||
|
type: String
|
||||||
|
- name: header_sec_fetch_dest
|
||||||
|
type: String
|
||||||
|
- name: header_sec_fetch_mode
|
||||||
|
type: String
|
||||||
|
- name: header_sec_fetch_site
|
||||||
|
type: String
|
||||||
|
|
||||||
- name: mv_http_logs
|
- name: mv_http_logs
|
||||||
type: materialized_view
|
type: materialized_view
|
||||||
description: >
|
description: >
|
||||||
Vue matérialisée qui transfère automatiquement les données de http_logs_raw
|
Vue matérialisée qui transfère les données de http_logs_raw vers http_logs
|
||||||
vers http_logs lors de chaque INSERT.
|
en extrayant tous les champs du JSON via JSONExtract* et coalesce pour les
|
||||||
|
valeurs par défaut.
|
||||||
target: mabase_prod.http_logs
|
target: mabase_prod.http_logs
|
||||||
query: |
|
query: |
|
||||||
SELECT raw_json FROM mabase_prod.http_logs_raw
|
SELECT
|
||||||
|
-- 1. Temps
|
||||||
|
parseDateTimeBestEffort(
|
||||||
|
coalesce(JSONExtractString(raw_json, 'time'), '1970-01-01T00:00:00Z')
|
||||||
|
) AS time,
|
||||||
|
toDate(time) AS log_date,
|
||||||
|
|
||||||
|
-- 2. Réseau L3/L4
|
||||||
|
toIPv4(coalesce(JSONExtractString(raw_json, 'src_ip'), '0.0.0.0')) AS src_ip,
|
||||||
|
toUInt16(coalesce(JSONExtractUInt(raw_json, 'src_port'), 0)) AS src_port,
|
||||||
|
toIPv4(coalesce(JSONExtractString(raw_json, 'dst_ip'), '0.0.0.0')) AS dst_ip,
|
||||||
|
toUInt16(coalesce(JSONExtractUInt(raw_json, 'dst_port'), 0)) AS dst_port,
|
||||||
|
|
||||||
|
-- 3. HTTP de base
|
||||||
|
coalesce(JSONExtractString(raw_json, 'method'), '') AS method,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'host'), '') AS host,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'path'), '') AS path,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'query'), '') AS query,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side,
|
||||||
|
|
||||||
|
-- 4. Connexion / corrélation
|
||||||
|
toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated,
|
||||||
|
toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id,
|
||||||
|
|
||||||
|
-- 5. IP/TCP
|
||||||
|
toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0) AS ip_meta_id,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0) AS ip_meta_total_length,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'ip_meta_ttl'), 0) AS ip_meta_ttl,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'tcp_meta_options'), '') AS tcp_meta_options,
|
||||||
|
coalesce(JSONExtractUInt(raw_json, 'tcp_meta_window_size'), 0) AS tcp_meta_window_size,
|
||||||
|
toInt32(coalesce(JSONExtractInt(raw_json, 'syn_to_clienthello_ms'), 0)) AS syn_to_clienthello_ms,
|
||||||
|
|
||||||
|
-- 6. TLS / JA3/JA4
|
||||||
|
coalesce(JSONExtractString(raw_json, 'tls_version'), '') AS tls_version,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'tls_sni'), '') AS tls_sni,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'ja3'), '') AS ja3,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'ja3_hash'), '') AS ja3_hash,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'ja4'), '') AS ja4,
|
||||||
|
|
||||||
|
-- 7. Headers HTTP
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_User-Agent'), '') AS header_user_agent,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Accept'), '') AS header_accept,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Accept-Encoding'), '') AS header_accept_encoding,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Accept-Language'), '') AS header_accept_language,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_X-Request-Id'), '') AS header_x_request_id,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_X-Trace-Id'), '') AS header_x_trace_id,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_X-Forwarded-For'), '') AS header_x_forwarded_for,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA'), '') AS header_sec_ch_ua,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Mobile'), '') AS header_sec_ch_ua_mobile,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Platform'), '') AS header_sec_ch_ua_platform,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Dest'), '') AS header_sec_fetch_dest,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Mode'), '') AS header_sec_fetch_mode,
|
||||||
|
coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Site'), '') AS header_sec_fetch_site
|
||||||
|
FROM mabase_prod.http_logs_raw;
|
||||||
|
|
||||||
users:
|
users:
|
||||||
- name: data_writer
|
- name: data_writer
|
||||||
|
|||||||
Reference in New Issue
Block a user