From 1fa6aec784e5ebdb9a5c1f7955560f755fe661fb Mon Sep 17 00:00:00 2001 From: toto Date: Thu, 9 Apr 2026 22:39:25 +0200 Subject: [PATCH] fix: SQL view ordering, purge-db flag, ctest directory - 12_thesis_features.sql: move view_resource_cascade_1h before view_thesis_features_1h - Makefile: purge-db uses --reset (not --clean) - mod-reqin-log: ctest --test-dir build/tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Makefile | 2 +- shared/clickhouse/12_thesis_features.sql | 103 ++++++++++++----------- 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/Makefile b/Makefile index 83b5a8c..6e8e0f5 100644 --- a/Makefile +++ b/Makefile @@ -199,4 +199,4 @@ init-and-import: ./scripts/init-stack.sh --import-prod purge-db: - ./scripts/init-stack.sh --clean + ./scripts/init-stack.sh --reset diff --git a/shared/clickhouse/12_thesis_features.sql b/shared/clickhouse/12_thesis_features.sql index 3e78968..53acf70 100644 --- a/shared/clickhouse/12_thesis_features.sql +++ b/shared/clickhouse/12_thesis_features.sql @@ -200,6 +200,58 @@ FROM ja4_logs.http_logs GROUP BY window_start, src_ip, ja4, host; +-- ============================================================================= +-- §5.4 — Vue resource_cascade (Resource Dependency Tree) +-- +-- Calcule le délai moyen entre le premier document et le premier asset, +-- et l'écart-type des timestamps des assets (simultanéité). +-- Doit être créée AVANT view_thesis_features_1h qui la référence. +-- ============================================================================= + +CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS +WITH +cascade_raw AS ( + SELECT + window_start, src_ip, ja4, host, + arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads + FROM ja4_processing.agg_resource_cascade_1h + WHERE window_start >= now() - INTERVAL 24 HOUR + GROUP BY window_start, src_ip, ja4, host + HAVING length(sorted_loads) >= 3 +), +cascade_split AS ( + SELECT + window_start, src_ip, ja4, host, + -- Timestamps des documents (is_asset = 0) + arrayFilter(x -> x.2 = 0, sorted_loads) AS docs, + -- Timestamps des assets (is_asset = 1) + arrayFilter(x -> x.2 = 1, sorted_loads) AS assets + FROM cascade_raw +) +SELECT + window_start, src_ip, ja4, host, + length(docs) AS doc_count, + length(assets) AS asset_count, + -- Délai moyen premier document → premier asset (secondes) + -- Navigateur réel : 0.05–0.2s ; Playwright : <0.01s ; Scraper : >1s ou 0 + if( + length(docs) > 0 AND length(assets) > 0, + toFloat64(assets[1].1 - docs[1].1), + -1.0 + ) AS root_to_first_asset_delay, + -- Simultanéité des assets : écart-type des timestamps des assets + -- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel) + if( + length(assets) >= 2, + sqrt(arrayReduce('varPop', + arrayMap(x -> toFloat64(x.1), assets) + )), + -1.0 + ) AS asset_load_stddev +FROM cascade_split +WHERE length(docs) > 0 OR length(assets) > 0; + + -- ============================================================================= -- view_thesis_features_1h — Vue unifiée des features avancées -- @@ -463,57 +515,6 @@ LEFT JOIN ja4_processing.view_resource_cascade_1h rc AND p.host = rc.host; --- ============================================================================= --- §5.4 — Vue resource_cascade (Resource Dependency Tree) --- --- Calcule le délai moyen entre le premier document et le premier asset, --- et l'écart-type des timestamps des assets (simultanéité). --- ============================================================================= - -CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS -WITH -cascade_raw AS ( - SELECT - window_start, src_ip, ja4, host, - arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads - FROM ja4_processing.agg_resource_cascade_1h - WHERE window_start >= now() - INTERVAL 24 HOUR - GROUP BY window_start, src_ip, ja4, host - HAVING length(sorted_loads) >= 3 -), -cascade_split AS ( - SELECT - window_start, src_ip, ja4, host, - -- Timestamps des documents (is_asset = 0) - arrayFilter(x -> x.2 = 0, sorted_loads) AS docs, - -- Timestamps des assets (is_asset = 1) - arrayFilter(x -> x.2 = 1, sorted_loads) AS assets - FROM cascade_raw -) -SELECT - window_start, src_ip, ja4, host, - length(docs) AS doc_count, - length(assets) AS asset_count, - -- Délai moyen premier document → premier asset (secondes) - -- Navigateur réel : 0.05–0.2s ; Playwright : <0.01s ; Scraper : >1s ou 0 - if( - length(docs) > 0 AND length(assets) > 0, - toFloat64(assets[1].1 - docs[1].1), - -1.0 - ) AS root_to_first_asset_delay, - -- Simultanéité des assets : écart-type des timestamps des assets - -- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel) - if( - length(assets) >= 2, - sqrt(arrayReduce('varPop', - arrayMap(x -> toFloat64(x.1), assets) - )), - -1.0 - ) AS asset_load_stddev -FROM cascade_split -WHERE length(docs) > 0 OR length(assets) > 0; - - -- ============================================================================= -- §5.2 — Graphe bipartite JA4×ASN (Bipartite Bot Fleet Detection) --