|
|
|
|
@ -6,7 +6,7 @@ The ClickHouse schema for ja4-platform is managed through numbered SQL migration
|
|
|
|
|
|
|
|
|
|
| File | Purpose |
|
|
|
|
|
|------|---------|
|
|
|
|
|
| `00_database.sql` | Creates the `mabase_prod` database |
|
|
|
|
|
| `00_database.sql` | Creates the `ja4_processing` database |
|
|
|
|
|
| `01_raw_tables.sql` | Creates `http_logs_raw` ingest table (MergeTree, 1-day TTL) |
|
|
|
|
|
| `02_dictionaries.sql` | Creates ASN geo dictionary (`dict_iplocate_asn`), bot IP/JA4 reference tables, `ref_bot_networks` |
|
|
|
|
|
| `03_anubis_tables.sql` | Creates Anubis crawler rule tables (`anubis_ua_rules`, `anubis_ip_rules`, `anubis_asn_rules`, `anubis_country_rules`) and their dictionaries (`dict_anubis_ua`, `dict_anubis_ip`, `dict_anubis_asn`, `dict_anubis_country`) |
|
|
|
|
|
@ -87,21 +87,21 @@ After applying all migrations, run these queries to verify each migration was su
|
|
|
|
|
### 00 — Database
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
SHOW DATABASES LIKE 'mabase_prod';
|
|
|
|
|
-- Expected: mabase_prod
|
|
|
|
|
SHOW DATABASES LIKE 'ja4_processing';
|
|
|
|
|
-- Expected: ja4_processing
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 01 — Raw Tables
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
EXISTS mabase_prod.http_logs_raw;
|
|
|
|
|
EXISTS ja4_logs.http_logs_raw;
|
|
|
|
|
-- Expected: 1
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 02 — Dictionaries
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
SELECT dictGetOrDefault('mabase_prod.dict_iplocate_asn', 'country_code',
|
|
|
|
|
SELECT dictGetOrDefault('ja4_processing.dict_iplocate_asn', 'country_code',
|
|
|
|
|
toIPv6(toIPv4('8.8.8.8')), 'MISSING');
|
|
|
|
|
-- Expected: US (if CSV loaded) or MISSING
|
|
|
|
|
```
|
|
|
|
|
@ -109,43 +109,43 @@ SELECT dictGetOrDefault('mabase_prod.dict_iplocate_asn', 'country_code',
|
|
|
|
|
### 03 — Anubis Tables
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
EXISTS mabase_prod.anubis_ua_rules;
|
|
|
|
|
EXISTS mabase_prod.anubis_ip_rules;
|
|
|
|
|
EXISTS mabase_prod.anubis_asn_rules;
|
|
|
|
|
EXISTS mabase_prod.anubis_country_rules;
|
|
|
|
|
EXISTS ja4_processing.anubis_ua_rules;
|
|
|
|
|
EXISTS ja4_processing.anubis_ip_rules;
|
|
|
|
|
EXISTS ja4_processing.anubis_asn_rules;
|
|
|
|
|
EXISTS ja4_processing.anubis_country_rules;
|
|
|
|
|
-- Expected: 1 for each
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 04 — MV + http_logs
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
EXISTS mabase_prod.http_logs;
|
|
|
|
|
SELECT name FROM system.tables WHERE database = 'mabase_prod' AND name = 'mv_http_logs';
|
|
|
|
|
EXISTS ja4_logs.http_logs;
|
|
|
|
|
SELECT name FROM system.tables WHERE database = 'ja4_logs' AND name = 'mv_http_logs';
|
|
|
|
|
-- Expected: mv_http_logs
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 05 — Aggregation Tables
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
EXISTS mabase_prod.agg_host_ip_ja4_1h;
|
|
|
|
|
EXISTS mabase_prod.agg_header_fingerprint_1h;
|
|
|
|
|
SELECT name FROM system.dictionaries WHERE database = 'mabase_prod' AND name = 'dict_bot_ip';
|
|
|
|
|
EXISTS ja4_processing.agg_host_ip_ja4_1h;
|
|
|
|
|
EXISTS ja4_processing.agg_header_fingerprint_1h;
|
|
|
|
|
SELECT name FROM system.dictionaries WHERE database = 'ja4_processing' AND name = 'dict_bot_ip';
|
|
|
|
|
-- Expected: dict_bot_ip
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 06 — ML Tables
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
EXISTS mabase_prod.ml_detected_anomalies;
|
|
|
|
|
EXISTS mabase_prod.ml_all_scores;
|
|
|
|
|
SELECT name FROM system.tables WHERE database = 'mabase_prod' AND name LIKE 'view_ip%';
|
|
|
|
|
EXISTS ja4_processing.ml_detected_anomalies;
|
|
|
|
|
EXISTS ja4_processing.ml_all_scores;
|
|
|
|
|
SELECT name FROM system.tables WHERE database = 'ja4_processing' AND name LIKE 'view_ip%';
|
|
|
|
|
-- Expected: view_ip_recurrence
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 07 — AI Features View
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
SELECT name FROM system.tables WHERE database = 'mabase_prod' AND name = 'view_ai_features_1h';
|
|
|
|
|
SELECT name FROM system.tables WHERE database = 'ja4_processing' AND name = 'view_ai_features_1h';
|
|
|
|
|
-- Expected: view_ai_features_1h
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
@ -153,7 +153,7 @@ SELECT name FROM system.tables WHERE database = 'mabase_prod' AND name = 'view_a
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
SHOW GRANTS FOR data_writer;
|
|
|
|
|
-- Expected: GRANT INSERT, SELECT ON mabase_prod.http_logs_raw TO data_writer
|
|
|
|
|
-- Expected: GRANT INSERT, SELECT ON ja4_logs.http_logs_raw TO data_writer
|
|
|
|
|
SHOW GRANTS FOR analyst;
|
|
|
|
|
-- Expected: GRANT SELECT ON multiple tables
|
|
|
|
|
```
|
|
|
|
|
@ -161,7 +161,7 @@ SHOW GRANTS FOR analyst;
|
|
|
|
|
### 09 — Audit Table
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
EXISTS mabase_prod.audit_logs;
|
|
|
|
|
EXISTS ja4_processing.audit_logs;
|
|
|
|
|
-- Expected: 1
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
@ -171,7 +171,7 @@ EXISTS mabase_prod.audit_logs;
|
|
|
|
|
SELECT
|
|
|
|
|
count() AS total_tables
|
|
|
|
|
FROM system.tables
|
|
|
|
|
WHERE database = 'mabase_prod'
|
|
|
|
|
WHERE database = 'ja4_processing'
|
|
|
|
|
AND name IN (
|
|
|
|
|
'http_logs_raw', 'http_logs', 'agg_host_ip_ja4_1h', 'agg_header_fingerprint_1h',
|
|
|
|
|
'ml_detected_anomalies', 'ml_all_scores', 'ref_bot_networks',
|
|
|
|
|
@ -187,64 +187,64 @@ WHERE database = 'mabase_prod'
|
|
|
|
|
|
|
|
|
|
ClickHouse does not support transactional DDL. To roll back a migration:
|
|
|
|
|
|
|
|
|
|
1. **Tables**: `DROP TABLE IF EXISTS mabase_prod.<table_name>`
|
|
|
|
|
2. **Materialized Views**: `DROP VIEW IF EXISTS mabase_prod.<mv_name>` (drop MV before its target table)
|
|
|
|
|
3. **Dictionaries**: `DROP DICTIONARY IF EXISTS mabase_prod.<dict_name>`
|
|
|
|
|
4. **Views**: `DROP VIEW IF EXISTS mabase_prod.<view_name>`
|
|
|
|
|
1. **Tables**: `DROP TABLE IF EXISTS ja4_processing.<table_name>`
|
|
|
|
|
2. **Materialized Views**: `DROP VIEW IF EXISTS ja4_processing.<mv_name>` (drop MV before its target table)
|
|
|
|
|
3. **Dictionaries**: `DROP DICTIONARY IF EXISTS ja4_processing.<dict_name>`
|
|
|
|
|
4. **Views**: `DROP VIEW IF EXISTS ja4_processing.<view_name>`
|
|
|
|
|
5. **Users**: `DROP USER IF EXISTS <username>`
|
|
|
|
|
|
|
|
|
|
### Rollback Order (Reverse of Apply)
|
|
|
|
|
|
|
|
|
|
```sql
|
|
|
|
|
-- 09: Audit
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.audit_logs;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.audit_logs;
|
|
|
|
|
|
|
|
|
|
-- 08: Users
|
|
|
|
|
DROP USER IF EXISTS data_writer;
|
|
|
|
|
DROP USER IF EXISTS analyst;
|
|
|
|
|
|
|
|
|
|
-- 07: AI Features View
|
|
|
|
|
DROP VIEW IF EXISTS mabase_prod.view_ai_features_1h;
|
|
|
|
|
DROP VIEW IF EXISTS ja4_processing.view_ai_features_1h;
|
|
|
|
|
|
|
|
|
|
-- 06: ML Tables
|
|
|
|
|
DROP VIEW IF EXISTS mabase_prod.view_ip_recurrence;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.ml_all_scores;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.ml_detected_anomalies;
|
|
|
|
|
DROP VIEW IF EXISTS ja4_processing.view_ip_recurrence;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.ml_all_scores;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.ml_detected_anomalies;
|
|
|
|
|
|
|
|
|
|
-- 05: Aggregation
|
|
|
|
|
DROP VIEW IF EXISTS mabase_prod.mv_agg_header_fingerprint_1h;
|
|
|
|
|
DROP VIEW IF EXISTS mabase_prod.mv_agg_host_ip_ja4_1h;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.agg_header_fingerprint_1h;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.agg_host_ip_ja4_1h;
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_asn_reputation;
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_bot_ja4;
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_bot_ip;
|
|
|
|
|
DROP VIEW IF EXISTS ja4_processing.mv_agg_header_fingerprint_1h;
|
|
|
|
|
DROP VIEW IF EXISTS ja4_processing.mv_agg_host_ip_ja4_1h;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.agg_header_fingerprint_1h;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.agg_host_ip_ja4_1h;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_asn_reputation;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_bot_ja4;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_bot_ip;
|
|
|
|
|
|
|
|
|
|
-- 04: MV + http_logs
|
|
|
|
|
DROP VIEW IF EXISTS mabase_prod.mv_http_logs;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.http_logs;
|
|
|
|
|
DROP VIEW IF EXISTS ja4_logs.mv_http_logs;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_logs.http_logs;
|
|
|
|
|
|
|
|
|
|
-- 03: Anubis
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_country;
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_asn;
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_ip;
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_anubis_ua;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.anubis_country_rules;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.anubis_asn_rules;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.anubis_ip_rules;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.anubis_ua_rules;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_anubis_country;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_anubis_asn;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_anubis_ip;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_anubis_ua;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.anubis_country_rules;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.anubis_asn_rules;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.anubis_ip_rules;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.anubis_ua_rules;
|
|
|
|
|
|
|
|
|
|
-- 02: Dictionaries
|
|
|
|
|
DROP DICTIONARY IF EXISTS mabase_prod.dict_iplocate_asn;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.bot_ja4;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.bot_ip;
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.ref_bot_networks;
|
|
|
|
|
DROP DICTIONARY IF EXISTS ja4_processing.dict_iplocate_asn;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.bot_ja4;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.bot_ip;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_processing.ref_bot_networks;
|
|
|
|
|
|
|
|
|
|
-- 01: Raw Tables
|
|
|
|
|
DROP TABLE IF EXISTS mabase_prod.http_logs_raw;
|
|
|
|
|
DROP TABLE IF EXISTS ja4_logs.http_logs_raw;
|
|
|
|
|
|
|
|
|
|
-- 00: Database
|
|
|
|
|
DROP DATABASE IF EXISTS mabase_prod;
|
|
|
|
|
DROP DATABASE IF EXISTS ja4_processing;
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Important Notes
|
|
|
|
|
|