#!/bin/bash # ============================================================================= # clickhouse-init.sh — Pre-process shared SQL files for integration testing # # Copies SQL from /initdb-src/ to /tmp, patches credentials, then executes. # ============================================================================= set -e SRC_DIR="/initdb-src" TMP_DIR="/tmp/initdb-patched" mkdir -p "$TMP_DIR" for f in "$SRC_DIR"/*.sql; do [ -f "$f" ] || continue base=$(basename "$f") echo "[init] Patching $base" sed \ -e "s/USER 'admin'/USER 'default'/g" \ -e "s/PASSWORD 'CHANGE_ME'/PASSWORD ''/g" \ -e "s/PASSWORD 'ChangeMe'/PASSWORD ''/g" \ "$f" > "$TMP_DIR/$base" done for f in "$TMP_DIR"/*.sql; do [ -f "$f" ] || continue base=$(basename "$f") echo "[init] Executing $base" # 10_perf_indexes.sql uses ALTER TABLE ADD INDEX which may fail if index # already exists — allow non-zero exit for migration/perf scripts if [[ "$base" == 10_* ]]; then clickhouse-client --multiquery < "$f" || echo "[init] WARNING: $base had errors (expected for duplicate indexes)" else clickhouse-client --multiquery < "$f" fi done # --------------------------------------------------------------------------- # Seed data required for dictionaries to function # REGEXP_TREE dictionaries require at least one rule; without it, any INSERT # into http_logs_raw fails because the MV mv_http_logs calls dictGet() on # the empty dict. Insert a catch-all "unknown" rule so the pipeline works. # --------------------------------------------------------------------------- echo "[init] Seeding anubis_ua_rules (REGEXP_TREE needs ≥1 rule)..." clickhouse-client --multiquery <<'SEED' INSERT INTO ja4_processing.anubis_ua_rules (id, parent_id, regexp, keys, values) VALUES (1, 0, '.*', ['bot_name','action','has_ip','rule_id','category'], ['','','0','0','']); SEED echo "[init] All SQL files executed and seed data inserted"