From a6327cc36f24b218c6c5299c1a0a0fa27caa4bcb Mon Sep 17 00:00:00 2001 From: toto Date: Tue, 3 Mar 2026 14:40:35 +0100 Subject: [PATCH] docs: add sanity check queries for ClickHouse ingestion - Add 6 verification queries in README - Check tables exist, MV definition, row counts - Display raw and parsed logs samples - Add interpretation guide for troubleshooting Co-authored-by: Qwen-Coder --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/README.md b/README.md index 7ec5bd1..4a59d6d 100644 --- a/README.md +++ b/README.md @@ -378,6 +378,65 @@ SELECT raw_json FROM mabase_prod.http_logs_raw; ``` +### Sanity checks - Vérification de l'ingestion + +Après avoir déployé le service, vérifiez que les données circulent correctement : + +```sql +-- 1. Tables présentes +SELECT + database, + table, + engine +FROM system.tables +WHERE database = currentDatabase() + AND table IN ('http_logs_raw', 'http_logs', 'mv_http_logs'); + +-- 2. Définition de la vue matérialisée +SHOW CREATE TABLE mv_http_logs; + +-- 3. Vérifier que les inserts bruts arrivent +SELECT + count(*) AS rows_raw, + min(ingest_time) AS min_ingest, + max(ingest_time) AS max_ingest +FROM http_logs_raw; + +-- 4. Voir les derniers logs bruts +SELECT + ingest_time, + raw_json +FROM http_logs_raw +ORDER BY ingest_time DESC +LIMIT 5; + +-- 5. Vérifier que la MV alimente http_logs +SELECT + count(*) AS rows_flat, + min(time) AS min_time, + max(time) AS max_time +FROM http_logs; + +-- 6. Voir les derniers logs parsés +SELECT + time, + src_ip, + dst_ip, + method, + host, + path, + header_user_agent, + tls_version, + ja4 +FROM http_logs +ORDER BY time DESC +LIMIT 10; +``` + +**Interprétation :** +- Si `rows_raw` > 0 mais `rows_flat` = 0 : la vue matérialisée ne fonctionne pas (vérifiez les droits SELECT sur `http_logs_raw`) +- Si les deux comptes sont > 0 : l'ingestion et le parsing fonctionnent correctement + ## Tests ```bash