fix: SQL view ordering, purge-db flag, ctest directory

- 12_thesis_features.sql: move view_resource_cascade_1h before view_thesis_features_1h
- Makefile: purge-db uses --reset (not --clean)
- mod-reqin-log: ctest --test-dir build/tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
toto
2026-04-09 22:39:25 +02:00
parent 6d64c2a8a8
commit 1fa6aec784
2 changed files with 53 additions and 52 deletions

View File

@ -199,4 +199,4 @@ init-and-import:
./scripts/init-stack.sh --import-prod ./scripts/init-stack.sh --import-prod
purge-db: purge-db:
./scripts/init-stack.sh --clean ./scripts/init-stack.sh --reset

View File

@ -200,6 +200,58 @@ FROM ja4_logs.http_logs
GROUP BY window_start, src_ip, ja4, host; GROUP BY window_start, src_ip, ja4, host;
-- =============================================================================
-- §5.4 — Vue resource_cascade (Resource Dependency Tree)
--
-- Calcule le délai moyen entre le premier document et le premier asset,
-- et l'écart-type des timestamps des assets (simultanéité).
-- Doit être créée AVANT view_thesis_features_1h qui la référence.
-- =============================================================================
CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS
WITH
cascade_raw AS (
SELECT
window_start, src_ip, ja4, host,
arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads
FROM ja4_processing.agg_resource_cascade_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY window_start, src_ip, ja4, host
HAVING length(sorted_loads) >= 3
),
cascade_split AS (
SELECT
window_start, src_ip, ja4, host,
-- Timestamps des documents (is_asset = 0)
arrayFilter(x -> x.2 = 0, sorted_loads) AS docs,
-- Timestamps des assets (is_asset = 1)
arrayFilter(x -> x.2 = 1, sorted_loads) AS assets
FROM cascade_raw
)
SELECT
window_start, src_ip, ja4, host,
length(docs) AS doc_count,
length(assets) AS asset_count,
-- Délai moyen premier document → premier asset (secondes)
-- Navigateur réel : 0.050.2s ; Playwright : <0.01s ; Scraper : >1s ou 0
if(
length(docs) > 0 AND length(assets) > 0,
toFloat64(assets[1].1 - docs[1].1),
-1.0
) AS root_to_first_asset_delay,
-- Simultanéité des assets : écart-type des timestamps des assets
-- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel)
if(
length(assets) >= 2,
sqrt(arrayReduce('varPop',
arrayMap(x -> toFloat64(x.1), assets)
)),
-1.0
) AS asset_load_stddev
FROM cascade_split
WHERE length(docs) > 0 OR length(assets) > 0;
-- ============================================================================= -- =============================================================================
-- view_thesis_features_1h — Vue unifiée des features avancées -- view_thesis_features_1h — Vue unifiée des features avancées
-- --
@ -463,57 +515,6 @@ LEFT JOIN ja4_processing.view_resource_cascade_1h rc
AND p.host = rc.host; AND p.host = rc.host;
-- =============================================================================
-- §5.4 — Vue resource_cascade (Resource Dependency Tree)
--
-- Calcule le délai moyen entre le premier document et le premier asset,
-- et l'écart-type des timestamps des assets (simultanéité).
-- =============================================================================
CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS
WITH
cascade_raw AS (
SELECT
window_start, src_ip, ja4, host,
arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads
FROM ja4_processing.agg_resource_cascade_1h
WHERE window_start >= now() - INTERVAL 24 HOUR
GROUP BY window_start, src_ip, ja4, host
HAVING length(sorted_loads) >= 3
),
cascade_split AS (
SELECT
window_start, src_ip, ja4, host,
-- Timestamps des documents (is_asset = 0)
arrayFilter(x -> x.2 = 0, sorted_loads) AS docs,
-- Timestamps des assets (is_asset = 1)
arrayFilter(x -> x.2 = 1, sorted_loads) AS assets
FROM cascade_raw
)
SELECT
window_start, src_ip, ja4, host,
length(docs) AS doc_count,
length(assets) AS asset_count,
-- Délai moyen premier document → premier asset (secondes)
-- Navigateur réel : 0.050.2s ; Playwright : <0.01s ; Scraper : >1s ou 0
if(
length(docs) > 0 AND length(assets) > 0,
toFloat64(assets[1].1 - docs[1].1),
-1.0
) AS root_to_first_asset_delay,
-- Simultanéité des assets : écart-type des timestamps des assets
-- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel)
if(
length(assets) >= 2,
sqrt(arrayReduce('varPop',
arrayMap(x -> toFloat64(x.1), assets)
)),
-1.0
) AS asset_load_stddev
FROM cascade_split
WHERE length(docs) > 0 OR length(assets) > 0;
-- ============================================================================= -- =============================================================================
-- §5.2 — Graphe bipartite JA4×ASN (Bipartite Bot Fleet Detection) -- §5.2 — Graphe bipartite JA4×ASN (Bipartite Bot Fleet Detection)
-- --