fix: SQL view ordering, purge-db flag, ctest directory
- 12_thesis_features.sql: move view_resource_cascade_1h before view_thesis_features_1h - Makefile: purge-db uses --reset (not --clean) - mod-reqin-log: ctest --test-dir build/tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2
Makefile
2
Makefile
@ -199,4 +199,4 @@ init-and-import:
|
||||
./scripts/init-stack.sh --import-prod
|
||||
|
||||
purge-db:
|
||||
./scripts/init-stack.sh --clean
|
||||
./scripts/init-stack.sh --reset
|
||||
|
||||
@ -200,6 +200,58 @@ FROM ja4_logs.http_logs
|
||||
GROUP BY window_start, src_ip, ja4, host;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- §5.4 — Vue resource_cascade (Resource Dependency Tree)
|
||||
--
|
||||
-- Calcule le délai moyen entre le premier document et le premier asset,
|
||||
-- et l'écart-type des timestamps des assets (simultanéité).
|
||||
-- Doit être créée AVANT view_thesis_features_1h qui la référence.
|
||||
-- =============================================================================
|
||||
|
||||
CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS
|
||||
WITH
|
||||
cascade_raw AS (
|
||||
SELECT
|
||||
window_start, src_ip, ja4, host,
|
||||
arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads
|
||||
FROM ja4_processing.agg_resource_cascade_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY window_start, src_ip, ja4, host
|
||||
HAVING length(sorted_loads) >= 3
|
||||
),
|
||||
cascade_split AS (
|
||||
SELECT
|
||||
window_start, src_ip, ja4, host,
|
||||
-- Timestamps des documents (is_asset = 0)
|
||||
arrayFilter(x -> x.2 = 0, sorted_loads) AS docs,
|
||||
-- Timestamps des assets (is_asset = 1)
|
||||
arrayFilter(x -> x.2 = 1, sorted_loads) AS assets
|
||||
FROM cascade_raw
|
||||
)
|
||||
SELECT
|
||||
window_start, src_ip, ja4, host,
|
||||
length(docs) AS doc_count,
|
||||
length(assets) AS asset_count,
|
||||
-- Délai moyen premier document → premier asset (secondes)
|
||||
-- Navigateur réel : 0.05–0.2s ; Playwright : <0.01s ; Scraper : >1s ou 0
|
||||
if(
|
||||
length(docs) > 0 AND length(assets) > 0,
|
||||
toFloat64(assets[1].1 - docs[1].1),
|
||||
-1.0
|
||||
) AS root_to_first_asset_delay,
|
||||
-- Simultanéité des assets : écart-type des timestamps des assets
|
||||
-- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel)
|
||||
if(
|
||||
length(assets) >= 2,
|
||||
sqrt(arrayReduce('varPop',
|
||||
arrayMap(x -> toFloat64(x.1), assets)
|
||||
)),
|
||||
-1.0
|
||||
) AS asset_load_stddev
|
||||
FROM cascade_split
|
||||
WHERE length(docs) > 0 OR length(assets) > 0;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- view_thesis_features_1h — Vue unifiée des features avancées
|
||||
--
|
||||
@ -463,57 +515,6 @@ LEFT JOIN ja4_processing.view_resource_cascade_1h rc
|
||||
AND p.host = rc.host;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- §5.4 — Vue resource_cascade (Resource Dependency Tree)
|
||||
--
|
||||
-- Calcule le délai moyen entre le premier document et le premier asset,
|
||||
-- et l'écart-type des timestamps des assets (simultanéité).
|
||||
-- =============================================================================
|
||||
|
||||
CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS
|
||||
WITH
|
||||
cascade_raw AS (
|
||||
SELECT
|
||||
window_start, src_ip, ja4, host,
|
||||
arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads
|
||||
FROM ja4_processing.agg_resource_cascade_1h
|
||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||
GROUP BY window_start, src_ip, ja4, host
|
||||
HAVING length(sorted_loads) >= 3
|
||||
),
|
||||
cascade_split AS (
|
||||
SELECT
|
||||
window_start, src_ip, ja4, host,
|
||||
-- Timestamps des documents (is_asset = 0)
|
||||
arrayFilter(x -> x.2 = 0, sorted_loads) AS docs,
|
||||
-- Timestamps des assets (is_asset = 1)
|
||||
arrayFilter(x -> x.2 = 1, sorted_loads) AS assets
|
||||
FROM cascade_raw
|
||||
)
|
||||
SELECT
|
||||
window_start, src_ip, ja4, host,
|
||||
length(docs) AS doc_count,
|
||||
length(assets) AS asset_count,
|
||||
-- Délai moyen premier document → premier asset (secondes)
|
||||
-- Navigateur réel : 0.05–0.2s ; Playwright : <0.01s ; Scraper : >1s ou 0
|
||||
if(
|
||||
length(docs) > 0 AND length(assets) > 0,
|
||||
toFloat64(assets[1].1 - docs[1].1),
|
||||
-1.0
|
||||
) AS root_to_first_asset_delay,
|
||||
-- Simultanéité des assets : écart-type des timestamps des assets
|
||||
-- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel)
|
||||
if(
|
||||
length(assets) >= 2,
|
||||
sqrt(arrayReduce('varPop',
|
||||
arrayMap(x -> toFloat64(x.1), assets)
|
||||
)),
|
||||
-1.0
|
||||
) AS asset_load_stddev
|
||||
FROM cascade_split
|
||||
WHERE length(docs) > 0 OR length(assets) > 0;
|
||||
|
||||
|
||||
-- =============================================================================
|
||||
-- §5.2 — Graphe bipartite JA4×ASN (Bipartite Bot Fleet Detection)
|
||||
--
|
||||
|
||||
Reference in New Issue
Block a user