fix: SQL view ordering, purge-db flag, ctest directory
- 12_thesis_features.sql: move view_resource_cascade_1h before view_thesis_features_1h - Makefile: purge-db uses --reset (not --clean) - mod-reqin-log: ctest --test-dir build/tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2
Makefile
2
Makefile
@ -199,4 +199,4 @@ init-and-import:
|
|||||||
./scripts/init-stack.sh --import-prod
|
./scripts/init-stack.sh --import-prod
|
||||||
|
|
||||||
purge-db:
|
purge-db:
|
||||||
./scripts/init-stack.sh --clean
|
./scripts/init-stack.sh --reset
|
||||||
|
|||||||
@ -200,6 +200,58 @@ FROM ja4_logs.http_logs
|
|||||||
GROUP BY window_start, src_ip, ja4, host;
|
GROUP BY window_start, src_ip, ja4, host;
|
||||||
|
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- §5.4 — Vue resource_cascade (Resource Dependency Tree)
|
||||||
|
--
|
||||||
|
-- Calcule le délai moyen entre le premier document et le premier asset,
|
||||||
|
-- et l'écart-type des timestamps des assets (simultanéité).
|
||||||
|
-- Doit être créée AVANT view_thesis_features_1h qui la référence.
|
||||||
|
-- =============================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS
|
||||||
|
WITH
|
||||||
|
cascade_raw AS (
|
||||||
|
SELECT
|
||||||
|
window_start, src_ip, ja4, host,
|
||||||
|
arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads
|
||||||
|
FROM ja4_processing.agg_resource_cascade_1h
|
||||||
|
WHERE window_start >= now() - INTERVAL 24 HOUR
|
||||||
|
GROUP BY window_start, src_ip, ja4, host
|
||||||
|
HAVING length(sorted_loads) >= 3
|
||||||
|
),
|
||||||
|
cascade_split AS (
|
||||||
|
SELECT
|
||||||
|
window_start, src_ip, ja4, host,
|
||||||
|
-- Timestamps des documents (is_asset = 0)
|
||||||
|
arrayFilter(x -> x.2 = 0, sorted_loads) AS docs,
|
||||||
|
-- Timestamps des assets (is_asset = 1)
|
||||||
|
arrayFilter(x -> x.2 = 1, sorted_loads) AS assets
|
||||||
|
FROM cascade_raw
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
window_start, src_ip, ja4, host,
|
||||||
|
length(docs) AS doc_count,
|
||||||
|
length(assets) AS asset_count,
|
||||||
|
-- Délai moyen premier document → premier asset (secondes)
|
||||||
|
-- Navigateur réel : 0.05–0.2s ; Playwright : <0.01s ; Scraper : >1s ou 0
|
||||||
|
if(
|
||||||
|
length(docs) > 0 AND length(assets) > 0,
|
||||||
|
toFloat64(assets[1].1 - docs[1].1),
|
||||||
|
-1.0
|
||||||
|
) AS root_to_first_asset_delay,
|
||||||
|
-- Simultanéité des assets : écart-type des timestamps des assets
|
||||||
|
-- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel)
|
||||||
|
if(
|
||||||
|
length(assets) >= 2,
|
||||||
|
sqrt(arrayReduce('varPop',
|
||||||
|
arrayMap(x -> toFloat64(x.1), assets)
|
||||||
|
)),
|
||||||
|
-1.0
|
||||||
|
) AS asset_load_stddev
|
||||||
|
FROM cascade_split
|
||||||
|
WHERE length(docs) > 0 OR length(assets) > 0;
|
||||||
|
|
||||||
|
|
||||||
-- =============================================================================
|
-- =============================================================================
|
||||||
-- view_thesis_features_1h — Vue unifiée des features avancées
|
-- view_thesis_features_1h — Vue unifiée des features avancées
|
||||||
--
|
--
|
||||||
@ -463,57 +515,6 @@ LEFT JOIN ja4_processing.view_resource_cascade_1h rc
|
|||||||
AND p.host = rc.host;
|
AND p.host = rc.host;
|
||||||
|
|
||||||
|
|
||||||
-- =============================================================================
|
|
||||||
-- §5.4 — Vue resource_cascade (Resource Dependency Tree)
|
|
||||||
--
|
|
||||||
-- Calcule le délai moyen entre le premier document et le premier asset,
|
|
||||||
-- et l'écart-type des timestamps des assets (simultanéité).
|
|
||||||
-- =============================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ja4_processing.view_resource_cascade_1h AS
|
|
||||||
WITH
|
|
||||||
cascade_raw AS (
|
|
||||||
SELECT
|
|
||||||
window_start, src_ip, ja4, host,
|
|
||||||
arraySort(x -> x.1, groupArrayMerge(200)(resource_loads)) AS sorted_loads
|
|
||||||
FROM ja4_processing.agg_resource_cascade_1h
|
|
||||||
WHERE window_start >= now() - INTERVAL 24 HOUR
|
|
||||||
GROUP BY window_start, src_ip, ja4, host
|
|
||||||
HAVING length(sorted_loads) >= 3
|
|
||||||
),
|
|
||||||
cascade_split AS (
|
|
||||||
SELECT
|
|
||||||
window_start, src_ip, ja4, host,
|
|
||||||
-- Timestamps des documents (is_asset = 0)
|
|
||||||
arrayFilter(x -> x.2 = 0, sorted_loads) AS docs,
|
|
||||||
-- Timestamps des assets (is_asset = 1)
|
|
||||||
arrayFilter(x -> x.2 = 1, sorted_loads) AS assets
|
|
||||||
FROM cascade_raw
|
|
||||||
)
|
|
||||||
SELECT
|
|
||||||
window_start, src_ip, ja4, host,
|
|
||||||
length(docs) AS doc_count,
|
|
||||||
length(assets) AS asset_count,
|
|
||||||
-- Délai moyen premier document → premier asset (secondes)
|
|
||||||
-- Navigateur réel : 0.05–0.2s ; Playwright : <0.01s ; Scraper : >1s ou 0
|
|
||||||
if(
|
|
||||||
length(docs) > 0 AND length(assets) > 0,
|
|
||||||
toFloat64(assets[1].1 - docs[1].1),
|
|
||||||
-1.0
|
|
||||||
) AS root_to_first_asset_delay,
|
|
||||||
-- Simultanéité des assets : écart-type des timestamps des assets
|
|
||||||
-- Navigateur : faible (batch parallèle) ; Scraper : élevé (séquentiel)
|
|
||||||
if(
|
|
||||||
length(assets) >= 2,
|
|
||||||
sqrt(arrayReduce('varPop',
|
|
||||||
arrayMap(x -> toFloat64(x.1), assets)
|
|
||||||
)),
|
|
||||||
-1.0
|
|
||||||
) AS asset_load_stddev
|
|
||||||
FROM cascade_split
|
|
||||||
WHERE length(docs) > 0 OR length(assets) > 0;
|
|
||||||
|
|
||||||
|
|
||||||
-- =============================================================================
|
-- =============================================================================
|
||||||
-- §5.2 — Graphe bipartite JA4×ASN (Bipartite Bot Fleet Detection)
|
-- §5.2 — Graphe bipartite JA4×ASN (Bipartite Bot Fleet Detection)
|
||||||
--
|
--
|
||||||
|
|||||||
Reference in New Issue
Block a user