fix: init-stack rock-solid — drop/recreate derived tables before views
Root cause: CREATE TABLE IF NOT EXISTS is a no-op on existing tables, so stale schemas miss new columns. Views (07+) then fail with UNKNOWN_IDENTIFIER errors. Fix: split SQL execution into 3 phases: Phase 1: databases, raw tables, dictionaries (00-04) Phase 2: DROP all derived tables (agg_*, ml_*) — safe, repopulated by MVs Phase 3: recreate derived tables + views with full current schema (05-12) This removes the incomplete inline migrations and makes the script truly idempotent regardless of prior schema version. Tested: fresh --reset, existing stale DB, idempotent re-run. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -104,12 +104,21 @@ if [ "${RESET}" = true ]; then
|
||||
fi
|
||||
|
||||
# ── Exécution des fichiers SQL ───────────────────────────────────────────────
|
||||
SQL_FILES=(
|
||||
# Phase 1 : bases + tables persistantes + dictionnaires + MVs primaires (00-04)
|
||||
# Phase 2 : drop/recreate des tables dérivées (agg_*, ml_*) pour garantir le schéma
|
||||
# Phase 3 : tables dérivées + vues (05-12)
|
||||
# Cette approche garantit que les vues (07+) trouvent toutes les colonnes attendues,
|
||||
# même si le schéma a évolué depuis la dernière initialisation.
|
||||
|
||||
SQL_PHASE1=(
|
||||
00_database.sql
|
||||
01_raw_tables.sql
|
||||
02_dictionaries.sql
|
||||
03_anubis_tables.sql
|
||||
04_mv_http_logs.sql
|
||||
)
|
||||
|
||||
SQL_PHASE3=(
|
||||
05_aggregation_tables.sql
|
||||
06_ml_tables.sql
|
||||
07_ai_features_view.sql
|
||||
@ -120,17 +129,19 @@ SQL_FILES=(
|
||||
12_thesis_features.sql
|
||||
)
|
||||
|
||||
log "Application du schéma SQL (${#SQL_FILES[@]} fichiers)…"
|
||||
ALL_SQL=("${SQL_PHASE1[@]}" "${SQL_PHASE3[@]}")
|
||||
log "Application du schéma SQL (${#ALL_SQL[@]} fichiers)…"
|
||||
ERRORS=0
|
||||
|
||||
for f in "${SQL_FILES[@]}"; do
|
||||
filepath="${SQL_DIR}/${f}"
|
||||
# Fonction commune d'exécution SQL avec substitution
|
||||
run_sql_file() {
|
||||
local f="$1"
|
||||
local filepath="${SQL_DIR}/${f}"
|
||||
if [[ ! -f "${filepath}" ]]; then
|
||||
echo " WARN: ${f} non trouvé, ignoré" >&2
|
||||
continue
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Substitution des noms de bases et des credentials
|
||||
local SQL_PATCHED
|
||||
SQL_PATCHED=$(sed \
|
||||
-e "s/ja4_logs/${DB_LOGS}/g" \
|
||||
-e "s/ja4_processing/${DB_PROC}/g" \
|
||||
@ -139,7 +150,6 @@ for f in "${SQL_FILES[@]}"; do
|
||||
-e "s/PASSWORD 'ChangeMe'/PASSWORD '${DEV_PASSWORD}'/g" \
|
||||
"${filepath}")
|
||||
|
||||
# 10_perf_indexes.sql peut échouer si les index existent déjà
|
||||
if [[ "${f}" == 10_* ]]; then
|
||||
if ch_multiquery "${SQL_PATCHED}" 2>/dev/null; then
|
||||
ok "${f}"
|
||||
@ -154,27 +164,55 @@ for f in "${SQL_FILES[@]}"; do
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Phase 1 : bases, tables persistantes, dictionnaires
|
||||
for f in "${SQL_PHASE1[@]}"; do
|
||||
run_sql_file "${f}"
|
||||
done
|
||||
|
||||
# Phase 2 : drop tables dérivées (repopulées automatiquement par les MVs)
|
||||
# Ceci garantit que 05/06/12 recréent les tables avec TOUTES les colonnes du
|
||||
# schéma actuel, même si une version antérieure était déjà déployée.
|
||||
log "Nettoyage des tables dérivées (agg_*, ml_*) pour garantir le schéma…"
|
||||
DERIVED_MVS=(
|
||||
"${DB_PROC}.mv_agg_host_ip_ja4_1h"
|
||||
"${DB_PROC}.mv_agg_header_fingerprint_1h"
|
||||
"${DB_PROC}.mv_agg_path_sequences_1h"
|
||||
"${DB_PROC}.mv_agg_request_timing_1h"
|
||||
"${DB_PROC}.mv_agg_ip_behavior_1h"
|
||||
"${DB_PROC}.mv_agg_resource_cascade_1h"
|
||||
)
|
||||
DERIVED_TABLES=(
|
||||
"${DB_PROC}.agg_host_ip_ja4_1h"
|
||||
"${DB_PROC}.agg_header_fingerprint_1h"
|
||||
"${DB_PROC}.agg_path_sequences_1h"
|
||||
"${DB_PROC}.agg_request_timing_1h"
|
||||
"${DB_PROC}.agg_ip_behavior_1h"
|
||||
"${DB_PROC}.agg_resource_cascade_1h"
|
||||
"${DB_PROC}.ml_detected_anomalies"
|
||||
"${DB_PROC}.ml_all_scores"
|
||||
)
|
||||
for mv in "${DERIVED_MVS[@]}"; do
|
||||
ch "DROP VIEW IF EXISTS ${mv}" 2>/dev/null || true
|
||||
done
|
||||
for tbl in "${DERIVED_TABLES[@]}"; do
|
||||
ch "DROP TABLE IF EXISTS ${tbl}" 2>/dev/null || true
|
||||
done
|
||||
ok "Tables dérivées nettoyées"
|
||||
|
||||
# Phase 3 : tables dérivées + vues (schéma complet garanti)
|
||||
for f in "${SQL_PHASE3[@]}"; do
|
||||
run_sql_file "${f}"
|
||||
done
|
||||
|
||||
if [ "${ERRORS}" -gt 0 ]; then
|
||||
err "${ERRORS} fichier(s) SQL en erreur"
|
||||
fi
|
||||
|
||||
# ── Migrations post-schéma (colonnes manquantes sur DB existante) ────────────
|
||||
log "Application des migrations post-schéma…"
|
||||
MIGRATIONS=(
|
||||
"ALTER TABLE ${DB_PROC}.agg_host_ip_ja4_1h ADD COLUMN IF NOT EXISTS count_xff SimpleAggregateFunction(sum, UInt64)"
|
||||
"ALTER TABLE ${DB_PROC}.agg_host_ip_ja4_1h ADD COLUMN IF NOT EXISTS count_unusual_ct SimpleAggregateFunction(sum, UInt64)"
|
||||
"ALTER TABLE ${DB_PROC}.agg_host_ip_ja4_1h ADD COLUMN IF NOT EXISTS count_non_std_port SimpleAggregateFunction(sum, UInt64)"
|
||||
"ALTER TABLE ${DB_PROC}.agg_host_ip_ja4_1h ADD COLUMN IF NOT EXISTS count_login_post SimpleAggregateFunction(sum, UInt64)"
|
||||
"ALTER TABLE ${DB_PROC}.agg_header_fingerprint_1h ADD COLUMN IF NOT EXISTS sec_ch_mobile_mismatch SimpleAggregateFunction(max, UInt8)"
|
||||
)
|
||||
for mig in "${MIGRATIONS[@]}"; do
|
||||
ch "${mig}" 2>/dev/null || true
|
||||
done
|
||||
ok "Migrations appliquées"
|
||||
|
||||
# ── Nettoyage des tables Anubis obsolètes (UA, Country) ─────────────────────
|
||||
# ── Nettoyage post-schéma (tables Anubis obsolètes) ──────────────────────────
|
||||
# Note : les migrations inline ne sont plus nécessaires — les tables dérivées
|
||||
# sont DROP+CREATE en phase 2/3, garantissant le schéma complet.
|
||||
log "Nettoyage des tables Anubis obsolètes…"
|
||||
ch "DROP DICTIONARY IF EXISTS ${DB_PROC}.dict_anubis_ua" 2>/dev/null || true
|
||||
ch "DROP DICTIONARY IF EXISTS ${DB_PROC}.dict_anubis_country" 2>/dev/null || true
|
||||
|
||||
Reference in New Issue
Block a user