3 SQL files were missing from the docker-compose.yml volume mounts: - 10_perf_indexes.sql (performance indexes) - 11_views.sql (dashboard views) - 12_thesis_features.sql (thesis §5 MVs and views) Also make 10_perf_indexes.sql non-fatal in init script since ALTER TABLE ADD INDEX may fail if index already exists. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
50 lines
2.0 KiB
Bash
Executable File
50 lines
2.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# =============================================================================
|
|
# clickhouse-init.sh — Pre-process shared SQL files for integration testing
|
|
#
|
|
# Copies SQL from /initdb-src/ to /tmp, patches credentials, then executes.
|
|
# =============================================================================
|
|
set -e
|
|
|
|
SRC_DIR="/initdb-src"
|
|
TMP_DIR="/tmp/initdb-patched"
|
|
mkdir -p "$TMP_DIR"
|
|
|
|
for f in "$SRC_DIR"/*.sql; do
|
|
[ -f "$f" ] || continue
|
|
base=$(basename "$f")
|
|
echo "[init] Patching $base"
|
|
sed \
|
|
-e "s/USER 'admin'/USER 'default'/g" \
|
|
-e "s/PASSWORD 'CHANGE_ME'/PASSWORD ''/g" \
|
|
-e "s/PASSWORD 'ChangeMe'/PASSWORD ''/g" \
|
|
"$f" > "$TMP_DIR/$base"
|
|
done
|
|
|
|
for f in "$TMP_DIR"/*.sql; do
|
|
[ -f "$f" ] || continue
|
|
base=$(basename "$f")
|
|
echo "[init] Executing $base"
|
|
# 10_perf_indexes.sql uses ALTER TABLE ADD INDEX which may fail if index
|
|
# already exists — allow non-zero exit for migration/perf scripts
|
|
if [[ "$base" == 10_* ]]; then
|
|
clickhouse-client --multiquery < "$f" || echo "[init] WARNING: $base had errors (expected for duplicate indexes)"
|
|
else
|
|
clickhouse-client --multiquery < "$f"
|
|
fi
|
|
done
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Seed data required for dictionaries to function
|
|
# REGEXP_TREE dictionaries require at least one rule; without it, any INSERT
|
|
# into http_logs_raw fails because the MV mv_http_logs calls dictGet() on
|
|
# the empty dict. Insert a catch-all "unknown" rule so the pipeline works.
|
|
# ---------------------------------------------------------------------------
|
|
echo "[init] Seeding anubis_ua_rules (REGEXP_TREE needs ≥1 rule)..."
|
|
clickhouse-client --multiquery <<'SEED'
|
|
INSERT INTO ja4_processing.anubis_ua_rules (id, parent_id, regexp, keys, values) VALUES
|
|
(1, 0, '.*', ['bot_name','action','has_ip','rule_id','category'], ['','','0','0','']);
|
|
SEED
|
|
|
|
echo "[init] All SQL files executed and seed data inserted"
|