-- ============================================================================= -- logcorrelator - Initialisation ClickHouse -- ============================================================================= -- Ce fichier crée la base de données, les tables, la vue matérialisée -- et les utilisateurs nécessaires au fonctionnement de logcorrelator. -- -- Usage : -- clickhouse-client --multiquery < sql/init.sql -- ============================================================================= -- ----------------------------------------------------------------------------- -- Base de données -- ----------------------------------------------------------------------------- CREATE DATABASE IF NOT EXISTS mabase_prod; -- ----------------------------------------------------------------------------- -- Table brute : cible directe des inserts du service -- Le service n'insère que dans cette table (colonne raw_json). -- ----------------------------------------------------------------------------- CREATE TABLE IF NOT EXISTS mabase_prod.http_logs_raw ( `raw_json` String CODEC(ZSTD(3)), `ingest_time` DateTime DEFAULT now() ) ENGINE = MergeTree PARTITION BY toDate(ingest_time) ORDER BY ingest_time TTL ingest_time + INTERVAL 1 DAY SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; -- ----------------------------------------------------------------------------- -- Table parsée : alimentée automatiquement par la vue matérialisée -- ----------------------------------------------------------------------------- CREATE TABLE mabase_prod.http_logs ( -- Temporel `time` DateTime, `log_date` Date DEFAULT toDate(time), -- Réseau `src_ip` IPv4, `src_port` UInt16, `dst_ip` IPv4, `dst_port` UInt16, -- Enrichissement IPLocate `src_asn` UInt32, `src_country_code` LowCardinality(String), `src_as_name` LowCardinality(String), `src_org` LowCardinality(String), `src_domain` LowCardinality(String), -- HTTP `method` LowCardinality(String), `scheme` LowCardinality(String), `host` LowCardinality(String), `path` String CODEC(ZSTD(3)), `query` String CODEC(ZSTD(3)), `http_version` LowCardinality(String), -- Corrélation `orphan_side` LowCardinality(String), `correlated` UInt8, `keepalives` UInt16, `a_timestamp` UInt64, `b_timestamp` UInt64, `conn_id` String CODEC(ZSTD(3)), -- Métadonnées IP `ip_meta_df` UInt8, `ip_meta_id` UInt16, `ip_meta_total_length` UInt16, `ip_meta_ttl` UInt8, -- Métadonnées TCP `tcp_meta_options` LowCardinality(String), `tcp_meta_window_size` UInt32, `tcp_meta_mss` UInt16, `tcp_meta_window_scale` UInt8, `syn_to_clienthello_ms` Int32, -- TLS / fingerprint `tls_version` LowCardinality(String), `tls_sni` LowCardinality(String), `tls_alpn` LowCardinality(String), `ja3` String CODEC(ZSTD(3)), `ja3_hash` String CODEC(ZSTD(3)), `ja4` String CODEC(ZSTD(3)), -- En-têtes HTTP `client_headers` String CODEC(ZSTD(3)), `header_user_agent` String CODEC(ZSTD(3)), `header_accept` String CODEC(ZSTD(3)), `header_accept_encoding` String CODEC(ZSTD(3)), `header_accept_language` String CODEC(ZSTD(3)), `header_content_type` String CODEC(ZSTD(3)), `header_x_request_id` String CODEC(ZSTD(3)), `header_x_trace_id` String CODEC(ZSTD(3)), `header_x_forwarded_for` String CODEC(ZSTD(3)), `header_sec_ch_ua` String CODEC(ZSTD(3)), `header_sec_ch_ua_mobile` String CODEC(ZSTD(3)), `header_sec_ch_ua_platform` String CODEC(ZSTD(3)), `header_sec_fetch_dest` String CODEC(ZSTD(3)), `header_sec_fetch_mode` String CODEC(ZSTD(3)), `header_sec_fetch_site` String CODEC(ZSTD(3)) ) ENGINE = MergeTree PARTITION BY log_date ORDER BY (time, src_ip, dst_ip, ja4) TTL log_date + INTERVAL 7 DAY SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; -- ----------------------------------------------------------------------------- -- Vue matérialisée : parse le JSON de http_logs_raw vers http_logs -- ----------------------------------------------------------------------------- DROP VIEW IF EXISTS mabase_prod.mv_http_logs; CREATE MATERIALIZED VIEW IF NOT EXISTS mabase_prod.mv_http_logs TO mabase_prod.http_logs AS SELECT parseDateTimeBestEffort(coalesce(JSONExtractString(raw_json, 'time'), '1970-01-01T00:00:00Z')) AS time, toDate(time) AS log_date, toIPv4(coalesce(JSONExtractString(raw_json, 'src_ip'), '0.0.0.0')) AS src_ip, toUInt16(coalesce(JSONExtractUInt(raw_json, 'src_port'), 0)) AS src_port, toIPv4(coalesce(JSONExtractString(raw_json, 'dst_ip'), '0.0.0.0')) AS dst_ip, toUInt16(coalesce(JSONExtractUInt(raw_json, 'dst_port'), 0)) AS dst_port, dictGetOrDefault( 'mabase_prod.dict_iplocate_asn', 'asn', IPv4ToIPv6(IPv4StringToNum(toString(src_ip))), toUInt32(0) ) AS src_asn, dictGetOrDefault( 'mabase_prod.dict_iplocate_asn', 'country_code', IPv4ToIPv6(IPv4StringToNum(toString(src_ip))), '' ) AS src_country_code, dictGetOrDefault( 'mabase_prod.dict_iplocate_asn', 'name', IPv4ToIPv6(IPv4StringToNum(toString(src_ip))), '' ) AS src_as_name, dictGetOrDefault( 'mabase_prod.dict_iplocate_asn', 'org', IPv4ToIPv6(IPv4StringToNum(toString(src_ip))), '' ) AS src_org, dictGetOrDefault( 'mabase_prod.dict_iplocate_asn', 'domain', IPv4ToIPv6(IPv4StringToNum(toString(src_ip))), '' ) AS src_domain, coalesce(JSONExtractString(raw_json, 'method'), '') AS method, coalesce(JSONExtractString(raw_json, 'scheme'), '') AS scheme, coalesce(JSONExtractString(raw_json, 'host'), '') AS host, coalesce(JSONExtractString(raw_json, 'path'), '') AS path, coalesce(JSONExtractString(raw_json, 'query'), '') AS query, coalesce(JSONExtractString(raw_json, 'http_version'), '') AS http_version, coalesce(JSONExtractString(raw_json, 'orphan_side'), '') AS orphan_side, toUInt8(coalesce(JSONExtractBool(raw_json, 'correlated'), 0)) AS correlated, toUInt16(coalesce(JSONExtractUInt(raw_json, 'keepalives'), 0)) AS keepalives, coalesce(JSONExtractUInt(raw_json, 'a_timestamp'), 0) AS a_timestamp, coalesce(JSONExtractUInt(raw_json, 'b_timestamp'), 0) AS b_timestamp, coalesce(JSONExtractString(raw_json, 'conn_id'), '') AS conn_id, toUInt8(coalesce(JSONExtractBool(raw_json, 'ip_meta_df'), 0)) AS ip_meta_df, toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_id'), 0)) AS ip_meta_id, toUInt16(coalesce(JSONExtractUInt(raw_json, 'ip_meta_total_length'), 0)) AS ip_meta_total_length, toUInt8(coalesce(JSONExtractUInt(raw_json, 'ip_meta_ttl'), 0)) AS ip_meta_ttl, coalesce(JSONExtractString(raw_json, 'tcp_meta_options'), '') AS tcp_meta_options, toUInt32(coalesce(JSONExtractUInt(raw_json, 'tcp_meta_window_size'), 0)) AS tcp_meta_window_size, toUInt16(coalesce(JSONExtractUInt(raw_json, 'tcp_meta_mss'), 0)) AS tcp_meta_mss, toUInt8(coalesce(JSONExtractUInt(raw_json, 'tcp_meta_window_scale'), 0)) AS tcp_meta_window_scale, toInt32(coalesce(JSONExtractInt(raw_json, 'syn_to_clienthello_ms'), 0)) AS syn_to_clienthello_ms, coalesce(JSONExtractString(raw_json, 'tls_version'), '') AS tls_version, coalesce(JSONExtractString(raw_json, 'tls_sni'), '') AS tls_sni, coalesce(JSONExtractString(raw_json, 'tls_alpn'), '') AS tls_alpn, coalesce(JSONExtractString(raw_json, 'ja3'), '') AS ja3, coalesce(JSONExtractString(raw_json, 'ja3_hash'), '') AS ja3_hash, coalesce(JSONExtractString(raw_json, 'ja4'), '') AS ja4, coalesce(JSONExtractString(raw_json, 'client_headers'), '') AS client_headers, coalesce(JSONExtractString(raw_json, 'header_User-Agent'), '') AS header_user_agent, coalesce(JSONExtractString(raw_json, 'header_Accept'), '') AS header_accept, coalesce(JSONExtractString(raw_json, 'header_Accept-Encoding'), '') AS header_accept_encoding, coalesce(JSONExtractString(raw_json, 'header_Accept-Language'), '') AS header_accept_language, coalesce(JSONExtractString(raw_json, 'header_Content-Type'), '') AS header_content_type, coalesce(JSONExtractString(raw_json, 'header_X-Request-Id'), '') AS header_x_request_id, coalesce(JSONExtractString(raw_json, 'header_X-Trace-Id'), '') AS header_x_trace_id, coalesce(JSONExtractString(raw_json, 'header_X-Forwarded-For'), '') AS header_x_forwarded_for, coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA'), '') AS header_sec_ch_ua, coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Mobile'), '') AS header_sec_ch_ua_mobile, coalesce(JSONExtractString(raw_json, 'header_Sec-CH-UA-Platform'), '') AS header_sec_ch_ua_platform, coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Dest'), '') AS header_sec_fetch_dest, coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Mode'), '') AS header_sec_fetch_mode, coalesce(JSONExtractString(raw_json, 'header_Sec-Fetch-Site'), '') AS header_sec_fetch_site FROM mabase_prod.http_logs_raw; -- ----------------------------------------------------------------------------- -- Utilisateurs et permissions -- ----------------------------------------------------------------------------- CREATE USER IF NOT EXISTS data_writer IDENTIFIED WITH plaintext_password BY 'ChangeMe'; CREATE USER IF NOT EXISTS analyst IDENTIFIED WITH plaintext_password BY 'ChangeMe'; -- data_writer : INSERT uniquement sur la table brute GRANT INSERT ON mabase_prod.http_logs_raw TO data_writer; GRANT SELECT ON mabase_prod.http_logs_raw TO data_writer; -- analyst : lecture sur la table parsée GRANT SELECT ON mabase_prod.http_logs TO analyst; -- ----------------------------------------------------------------------------- -- Vérifications post-installation -- ----------------------------------------------------------------------------- -- SELECT count(*), min(ingest_time), max(ingest_time) FROM mabase_prod.http_logs_raw; -- SELECT count(*), min(time), max(time) FROM mabase_prod.http_logs; -- SELECT time, src_ip, dst_ip, method, host, path, ja4 FROM mabase_prod.http_logs ORDER BY time DESC LIMIT 10;