feat: implement thesis §5 advanced detection techniques as ClickHouse MVs
New aggregation tables + materialized views: - agg_path_sequences_1h + MV (§5.1 Path Sequence Entropy) - agg_request_timing_1h + MV (§5.3 Request Cadence Fingerprint) - agg_ip_behavior_1h + MV (§5.5 JA4 Drift + §5.8 Cross-Domain) - agg_resource_cascade_1h + MV (§5.4 Resource Dependency Tree) New analytical views: - view_thesis_features_1h: unified view exposing all computable features (path_transition_entropy, cadence_cv, burst_ratio, pause_ratio, ja4_drift_ratio, host_diversity, host_sweep_speed, host_coverage_uniformity) - view_resource_cascade_1h: root_to_first_asset_delay, asset_load_stddev Documented future techniques (not feasible as MV): - §5.2 Bipartite Fleet Graph (needs Python networkx) - §5.6 DNS Shadow Analysis (needs sentinel UDP/53 extension) - §5.7 Compression Ratio Invariant (needs mod_reqin_log extension) Updated: deploy_schema.sh, verify_mvs.py (sections 8-10) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -176,6 +176,106 @@ def main() -> None:
|
||||
except Exception as exc:
|
||||
print(f" \033[93m?\033[0m {view:40s} ERREUR : {exc}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 8. Tables d'agrégation avancées (thèse §5)
|
||||
# ------------------------------------------------------------------
|
||||
print("\n── 8. Tables d'agrégation thèse §5 ─────────────────────────")
|
||||
thesis_tables = [
|
||||
("agg_path_sequences_1h", "§5.1 Path Sequence Entropy"),
|
||||
("agg_request_timing_1h", "§5.3 Request Cadence"),
|
||||
("agg_ip_behavior_1h", "§5.5/§5.8 JA4 Drift + Cross-Domain"),
|
||||
("agg_resource_cascade_1h", "§5.4 Resource Dependency Tree"),
|
||||
]
|
||||
for table, desc in thesis_tables:
|
||||
try:
|
||||
n = _count(client, f"SELECT count(*) FROM {CLICKHOUSE_DB}.{table}")
|
||||
if not _check(f"{table} ({desc})", n, ">", 0):
|
||||
failures += 1
|
||||
except Exception as exc:
|
||||
print(f" {FAIL} {table:40s} ERREUR : {exc}")
|
||||
failures += 1
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 9. Vue view_thesis_features_1h — features avancées
|
||||
# ------------------------------------------------------------------
|
||||
print("\n── 9. Vue view_thesis_features_1h (thèse §5) ───────────────")
|
||||
try:
|
||||
n = _count(client, f"SELECT count(*) FROM {CLICKHOUSE_DB}.view_thesis_features_1h")
|
||||
if not _check("view_thesis_features_1h count", n, ">", 0):
|
||||
failures += 1
|
||||
|
||||
# Vérification des colonnes §5.1
|
||||
result = client.query(
|
||||
f"SELECT avg(path_transition_entropy) AS avg_entropy "
|
||||
f"FROM {CLICKHOUSE_DB}.view_thesis_features_1h "
|
||||
f"WHERE path_transition_entropy >= 0"
|
||||
)
|
||||
avg_ent = float(result.result_rows[0][0]) if result.result_rows else -1
|
||||
ok = 0 <= avg_ent <= 1.0
|
||||
print(f" {'✓' if ok else '✗'} §5.1 path_transition_entropy avg {avg_ent:.4f} (attendu [0, 1])")
|
||||
if not ok:
|
||||
failures += 1
|
||||
|
||||
# Vérification des colonnes §5.3
|
||||
result = client.query(
|
||||
f"SELECT avg(cadence_cv) AS avg_cv, avg(burst_ratio) AS avg_burst "
|
||||
f"FROM {CLICKHOUSE_DB}.view_thesis_features_1h "
|
||||
f"WHERE cadence_cv IS NOT NULL"
|
||||
)
|
||||
if result.result_rows:
|
||||
avg_cv = float(result.result_rows[0][0])
|
||||
avg_burst = float(result.result_rows[0][1])
|
||||
print(f" {PASS} §5.3 cadence_cv avg {avg_cv:.4f}")
|
||||
print(f" {PASS} §5.3 burst_ratio avg {avg_burst:.4f}")
|
||||
else:
|
||||
print(f" \033[93m?\033[0m §5.3 cadence features pas de données")
|
||||
|
||||
# Vérification des colonnes §5.5
|
||||
result = client.query(
|
||||
f"SELECT avg(ja4_drift_ratio) AS avg_drift, "
|
||||
f" avg(host_diversity) AS avg_hosts "
|
||||
f"FROM {CLICKHOUSE_DB}.view_thesis_features_1h "
|
||||
f"WHERE ja4_drift_ratio IS NOT NULL"
|
||||
)
|
||||
if result.result_rows:
|
||||
avg_drift = float(result.result_rows[0][0])
|
||||
avg_hosts = float(result.result_rows[0][1])
|
||||
ok_drift = 0 <= avg_drift <= 1.0
|
||||
print(f" {'✓' if ok_drift else '✗'} §5.5 ja4_drift_ratio avg {avg_drift:.4f} (attendu [0, 1])")
|
||||
print(f" {PASS} §5.8 host_diversity avg {avg_hosts:.2f}")
|
||||
if not ok_drift:
|
||||
failures += 1
|
||||
else:
|
||||
print(f" \033[93m?\033[0m §5.5/§5.8 drift/cross-domain pas de données")
|
||||
|
||||
except Exception as exc:
|
||||
print(f" {FAIL} view_thesis_features_1h ERREUR : {exc}")
|
||||
failures += 1
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 10. Vue view_resource_cascade_1h (thèse §5.4)
|
||||
# ------------------------------------------------------------------
|
||||
print("\n── 10. Vue view_resource_cascade_1h (thèse §5.4) ───────────")
|
||||
try:
|
||||
n = _count(client, f"SELECT count(*) FROM {CLICKHOUSE_DB}.view_resource_cascade_1h")
|
||||
_check("view_resource_cascade_1h count", n, ">=", 0)
|
||||
|
||||
if n > 0:
|
||||
result = client.query(
|
||||
f"SELECT avg(root_to_first_asset_delay), avg(asset_load_stddev) "
|
||||
f"FROM {CLICKHOUSE_DB}.view_resource_cascade_1h "
|
||||
f"WHERE root_to_first_asset_delay >= 0"
|
||||
)
|
||||
if result.result_rows:
|
||||
avg_delay = float(result.result_rows[0][0])
|
||||
avg_stddev = float(result.result_rows[0][1])
|
||||
print(f" {PASS} §5.4 root_to_first_asset_delay avg {avg_delay:.2f}s")
|
||||
print(f" {PASS} §5.4 asset_load_stddev avg {avg_stddev:.2f}s")
|
||||
else:
|
||||
print(f" (peut être 0 si pas de mix document/asset dans le trafic test)")
|
||||
except Exception as exc:
|
||||
print(f" {FAIL} view_resource_cascade_1h ERREUR : {exc}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Résumé
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user