Files
ja4-platform/shared/clickhouse/deploy_schema.sh
toto 14323f7b05 perf(clickhouse): P10 — créer les 4 vues métier manquantes + corriger préfixes DB
Bug de production : view_form_bruteforce_detected, view_host_ip_ja4_rotation,
view_dashboard_entities, view_dashboard_user_agents étaient référencées dans
13 endpoints du dashboard mais n'existaient nulle part dans le schéma.
Tous ces endpoints retournaient HTTP 500 en production.

shared/clickhouse/11_views.sql (nouveau) :

  view_form_bruteforce_detected
    Source : agg_host_ip_ja4_1h (24h)
    Logique : GROUP BY (src_ip, host) HAVING count_post >= 10
    Usage   : bruteforce.py (3 endpoints), investigation_summary.py

  view_host_ip_ja4_rotation
    Source : agg_host_ip_ja4_1h (24h)
    Logique : uniqExact(ja4) par src_ip, HAVING >= 2 (rotation de fingerprint)
    Usage   : rotation.py (3 endpoints), investigation_summary.py

  view_dashboard_entities
    Source : http_logs (7 jours), UNION ALL 5 branches (ip/ja4/country/asn/host)
    Colonnes : entity_type, entity_value, src_ip, ja4, host, log_date,
               client_headers Array(String), asns Array, countries Array,
               user_agents Array
    Usage   : entities.py (5 endpoints), clustering.py

  view_dashboard_user_agents
    Source : http_logs (7 jours), GROUP BY (src_ip, ja4, hour)
    Colonnes : src_ip, ja4, hour, log_date, user_agents Array(String), requests
    Usage   : variability.py (4 endpoints), fingerprints.py (5 endpoints)
              attributes.py (2 endpoints)

deploy_schema.sh : ajout de 10_perf_indexes.sql et 11_views.sql dans la liste

routes/variability.py + fingerprints.py :
  Correction de 9 requêtes utilisant view_dashboard_user_agents sans préfixe
  de base de données → remplacé par {settings.CLICKHOUSE_DB_PROCESSING}.view_*

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-07 22:30:09 +02:00

70 lines
2.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# deploy_schema.sh — Apply ClickHouse schema migrations with configurable
# database names via environment variables.
#
# Usage:
# CLICKHOUSE_DB_LOGS=my_logs CLICKHOUSE_DB_PROCESSING=my_proc ./deploy_schema.sh
#
# Env vars:
# CLICKHOUSE_DB_LOGS — logs database name (default: ja4_logs)
# CLICKHOUSE_DB_PROCESSING — processing database name (default: ja4_processing)
# CLICKHOUSE_HOST — ClickHouse host (default: localhost)
# CLICKHOUSE_PORT — ClickHouse native port (default: 9000)
# CLICKHOUSE_USER — ClickHouse user (default: default)
# CLICKHOUSE_PASSWORD — ClickHouse password (default: empty)
# =============================================================================
set -euo pipefail
CLICKHOUSE_DB_LOGS="${CLICKHOUSE_DB_LOGS:-ja4_logs}"
CLICKHOUSE_DB_PROCESSING="${CLICKHOUSE_DB_PROCESSING:-ja4_processing}"
CLICKHOUSE_HOST="${CLICKHOUSE_HOST:-localhost}"
CLICKHOUSE_PORT="${CLICKHOUSE_PORT:-9000}"
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CH_ARGS=(
--host "$CLICKHOUSE_HOST"
--port "$CLICKHOUSE_PORT"
--user "$CLICKHOUSE_USER"
)
if [[ -n "$CLICKHOUSE_PASSWORD" ]]; then
CH_ARGS+=(--password "$CLICKHOUSE_PASSWORD")
fi
SQL_FILES=(
00_database.sql
01_raw_tables.sql
02_dictionaries.sql
03_anubis_tables.sql
04_mv_http_logs.sql
05_aggregation_tables.sql
06_ml_tables.sql
07_ai_features_view.sql
08_users.sql
09_audit_table.sql
10_perf_indexes.sql
11_views.sql
)
for f in "${SQL_FILES[@]}"; do
filepath="${SCRIPT_DIR}/${f}"
if [[ ! -f "$filepath" ]]; then
echo "WARN: ${f} not found, skipping" >&2
continue
fi
echo ">>> Applying ${f} ..."
sed \
-e "s/ja4_logs/${CLICKHOUSE_DB_LOGS}/g" \
-e "s/ja4_processing/${CLICKHOUSE_DB_PROCESSING}/g" \
"$filepath" \
| clickhouse-client "${CH_ARGS[@]}" --multiquery
done
echo "=== Schema deployment complete ==="
echo " logs db: ${CLICKHOUSE_DB_LOGS}"
echo " processing db: ${CLICKHOUSE_DB_PROCESSING}"