feat: CSV generation scripts, API filter params, enriched CSV stubs
- scripts/generate_bot_ip.py: download Tor exit nodes + curate scanner IPs (1353 entries) - scripts/generate_bot_ja4.py: 31 bot JA4 fingerprints across 16 families - scripts/generate_asn_data.py: 38 ASNs + 96 IP-to-ASN prefixes - scripts/update-csv-data.sh: master orchestrator with --install-stubs - api.py: add asn_org/country_code/ja4/bot_name filters on detections+scores - pages.py: add /network route - csv-stubs: enriched with generated data (Tor nodes, scanner IPs, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -26,11 +26,13 @@ _DETECTION_SORT_COLS = {
|
||||
"detected_at", "src_ip", "ja4", "host", "anomaly_score",
|
||||
"threat_level", "recurrence", "hits", "hit_velocity",
|
||||
"fuzzing_index", "post_ratio", "campaign_id",
|
||||
"asn_org", "country_code", "bot_name",
|
||||
}
|
||||
_SCORE_SORT_COLS = {
|
||||
"detected_at", "window_start", "src_ip", "ja4", "host",
|
||||
"anomaly_score", "raw_anomaly_score", "threat_level",
|
||||
"hits", "hit_velocity", "xgb_prob", "ae_recon_error",
|
||||
"asn_org", "country_code",
|
||||
}
|
||||
_TRAFFIC_SORT_COLS = {
|
||||
"time", "src_ip", "method", "host", "path", "http_version",
|
||||
@ -137,6 +139,10 @@ async def detections(
|
||||
order: str = Query("DESC"),
|
||||
threat_level: str | None = Query(None),
|
||||
search: str | None = Query(None),
|
||||
asn_org: str | None = Query(None),
|
||||
country_code: str | None = Query(None),
|
||||
ja4: str | None = Query(None),
|
||||
bot_name: str | None = Query(None),
|
||||
) -> dict[str, Any]:
|
||||
sort = _validate_sort(sort, _DETECTION_SORT_COLS, "detected_at")
|
||||
order = _validate_order(order)
|
||||
@ -155,6 +161,22 @@ async def detections(
|
||||
)
|
||||
params["search"] = f"%{search}%"
|
||||
|
||||
if asn_org:
|
||||
where_clauses.append("asn_org = {asn_org:String}")
|
||||
params["asn_org"] = asn_org
|
||||
|
||||
if country_code:
|
||||
where_clauses.append("country_code = {cc:String}")
|
||||
params["cc"] = country_code
|
||||
|
||||
if ja4:
|
||||
where_clauses.append("ja4 = {ja4:String}")
|
||||
params["ja4"] = ja4
|
||||
|
||||
if bot_name:
|
||||
where_clauses.append("bot_name = {bn:String}")
|
||||
params["bn"] = bot_name
|
||||
|
||||
where = " AND ".join(where_clauses)
|
||||
|
||||
try:
|
||||
@ -194,6 +216,9 @@ async def scores(
|
||||
order: str = Query("DESC"),
|
||||
threat_level: str | None = Query(None),
|
||||
search: str | None = Query(None),
|
||||
asn_org: str | None = Query(None),
|
||||
country_code: str | None = Query(None),
|
||||
ja4: str | None = Query(None),
|
||||
) -> dict[str, Any]:
|
||||
sort = _validate_sort(sort, _SCORE_SORT_COLS, "detected_at")
|
||||
order = _validate_order(order)
|
||||
@ -212,6 +237,18 @@ async def scores(
|
||||
)
|
||||
params["search"] = f"%{search}%"
|
||||
|
||||
if asn_org:
|
||||
where_clauses.append("asn_org = {asn_org:String}")
|
||||
params["asn_org"] = asn_org
|
||||
|
||||
if country_code:
|
||||
where_clauses.append("country_code = {cc:String}")
|
||||
params["cc"] = country_code
|
||||
|
||||
if ja4:
|
||||
where_clauses.append("ja4 = {ja4:String}")
|
||||
params["ja4"] = ja4
|
||||
|
||||
where = " AND ".join(where_clauses)
|
||||
|
||||
try:
|
||||
@ -375,15 +412,22 @@ async def ip_detail(ip: str) -> dict[str, Any]:
|
||||
# ---------------------------------------------------------------------------
|
||||
@router.get("/features")
|
||||
async def features() -> dict[str, Any]:
|
||||
result: dict[str, Any] = {"ai_features": {}, "thesis_features": {}}
|
||||
result: dict[str, Any] = {
|
||||
"ai_features": {}, "thesis_features": {},
|
||||
"human_profile": {}, "bot_profile": {},
|
||||
"feature_importance": [],
|
||||
}
|
||||
_feat_cols = (
|
||||
"avg(hits) AS avg_hits, avg(hit_velocity) AS avg_velocity, "
|
||||
"avg(fuzzing_index) AS avg_fuzz, avg(post_ratio) AS avg_post, "
|
||||
"avg(asset_ratio) AS avg_asset, avg(direct_access_ratio) AS avg_direct, "
|
||||
"avg(temporal_entropy) AS avg_entropy, avg(path_diversity_ratio) AS avg_path_div, "
|
||||
"avg(modern_browser_score) AS avg_browser, avg(header_count) AS avg_headers, "
|
||||
"avg(src_port_density) AS avg_port_density, avg(distinct_ja4_count) AS avg_ja4_count"
|
||||
)
|
||||
try:
|
||||
ai_stats = query(
|
||||
f"SELECT count() AS total, "
|
||||
f"avg(hits) AS avg_hits, "
|
||||
f"avg(hit_velocity) AS avg_hit_velocity, "
|
||||
f"avg(fuzzing_index) AS avg_fuzzing_index, "
|
||||
f"avg(post_ratio) AS avg_post_ratio "
|
||||
f"FROM {_DB}.view_ai_features_1h"
|
||||
f"SELECT count() AS total, {_feat_cols} FROM {_DB}.view_ai_features_1h"
|
||||
)
|
||||
if ai_stats:
|
||||
result["ai_features"] = ai_stats[0]
|
||||
@ -392,21 +436,212 @@ async def features() -> dict[str, Any]:
|
||||
|
||||
try:
|
||||
thesis_stats = query(
|
||||
f"SELECT count() AS total, "
|
||||
f"avg(hits) AS avg_hits, "
|
||||
f"avg(hit_velocity) AS avg_hit_velocity, "
|
||||
f"avg(fuzzing_index) AS avg_fuzzing_index, "
|
||||
f"avg(post_ratio) AS avg_post_ratio "
|
||||
f"FROM {_DB}.view_thesis_features_1h"
|
||||
f"SELECT count() AS total, {_feat_cols} FROM {_DB}.view_thesis_features_1h"
|
||||
)
|
||||
if thesis_stats:
|
||||
result["thesis_features"] = thesis_stats[0]
|
||||
except Exception:
|
||||
logger.debug("view_thesis_features_1h not available")
|
||||
|
||||
# Human vs bot feature profiles for radar comparison
|
||||
try:
|
||||
human = query(
|
||||
f"SELECT {_feat_cols} FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_label = 'human'"
|
||||
)
|
||||
if human:
|
||||
result["human_profile"] = human[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
bot = query(
|
||||
f"SELECT {_feat_cols} FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_label IN ('datacenter', 'hosting')"
|
||||
)
|
||||
if bot:
|
||||
result["bot_profile"] = bot[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Feature variance (importance proxy)
|
||||
try:
|
||||
variance_rows = query(
|
||||
f"SELECT "
|
||||
f"varPop(hit_velocity) AS v_velocity, "
|
||||
f"varPop(fuzzing_index) AS v_fuzz, "
|
||||
f"varPop(post_ratio) AS v_post, "
|
||||
f"varPop(asset_ratio) AS v_asset, "
|
||||
f"varPop(direct_access_ratio) AS v_direct, "
|
||||
f"varPop(temporal_entropy) AS v_entropy, "
|
||||
f"varPop(path_diversity_ratio) AS v_path_div, "
|
||||
f"varPop(src_port_density) AS v_port_density "
|
||||
f"FROM {_DB}.view_ai_features_1h"
|
||||
)
|
||||
if variance_rows:
|
||||
row = variance_rows[0]
|
||||
result["feature_importance"] = [
|
||||
{"name": k.replace("v_", ""), "variance": v}
|
||||
for k, v in sorted(row.items(), key=lambda x: -(x[1] or 0))
|
||||
]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/geo — Geographic & ASN breakdown
|
||||
# ---------------------------------------------------------------------------
|
||||
@router.get("/geo")
|
||||
async def geo() -> dict[str, Any]:
|
||||
try:
|
||||
countries = query(
|
||||
f"SELECT country_code, asn_label, "
|
||||
f"count() AS sessions, sum(hits) AS total_hits "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE country_code != '' "
|
||||
"GROUP BY country_code, asn_label ORDER BY sessions DESC"
|
||||
)
|
||||
asns = query(
|
||||
f"SELECT asn_org, asn_label, country_code, "
|
||||
f"count() AS sessions, sum(hits) AS total_hits, "
|
||||
f"avg(hit_velocity) AS avg_velocity, avg(fuzzing_index) AS avg_fuzz "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_org != '' "
|
||||
"GROUP BY asn_org, asn_label, country_code ORDER BY sessions DESC LIMIT 50"
|
||||
)
|
||||
return {"countries": countries, "asns": asns}
|
||||
except Exception as exc:
|
||||
logger.exception("geo query failed")
|
||||
return {"countries": [], "asns": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/fingerprints — JA4 fingerprint analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
@router.get("/fingerprints")
|
||||
async def fingerprints() -> dict[str, Any]:
|
||||
try:
|
||||
ja4_stats = query(
|
||||
f"SELECT ja4, asn_label, "
|
||||
f"count() AS sessions, sum(hits) AS total_hits, "
|
||||
f"avg(hit_velocity) AS avg_velocity, "
|
||||
f"avg(fuzzing_index) AS avg_fuzz, "
|
||||
f"avg(modern_browser_score) AS avg_browser_score "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE ja4 != '' "
|
||||
"GROUP BY ja4, asn_label ORDER BY sessions DESC LIMIT 100"
|
||||
)
|
||||
bot_ja4 = query(
|
||||
f"SELECT ja4, bot_name, count() AS sessions "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE bot_name != '' AND ja4 != '' "
|
||||
"GROUP BY ja4, bot_name ORDER BY sessions DESC"
|
||||
)
|
||||
return {"ja4_stats": ja4_stats, "bot_ja4": bot_ja4}
|
||||
except Exception as exc:
|
||||
logger.exception("fingerprints query failed")
|
||||
return {"ja4_stats": [], "bot_ja4": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/behavior — Feature scatter + distributions
|
||||
# ---------------------------------------------------------------------------
|
||||
_BEHAVIOR_FEATURES = [
|
||||
"hit_velocity", "fuzzing_index", "post_ratio", "asset_ratio",
|
||||
"direct_access_ratio", "temporal_entropy", "path_diversity_ratio",
|
||||
"modern_browser_score", "header_count", "is_ua_rotating",
|
||||
"distinct_ja4_count", "src_port_density",
|
||||
]
|
||||
|
||||
|
||||
@router.get("/behavior")
|
||||
async def behavior() -> dict[str, Any]:
|
||||
cols = ", ".join(_BEHAVIOR_FEATURES)
|
||||
try:
|
||||
scatter = query(
|
||||
f"SELECT toString(src_ip) AS ip, asn_label, bot_name, hits, {cols} "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"ORDER BY hits DESC LIMIT 500"
|
||||
)
|
||||
# Per-feature distributions (histogram buckets)
|
||||
distributions: dict[str, list] = {}
|
||||
for feat in ["hit_velocity", "fuzzing_index", "post_ratio",
|
||||
"asset_ratio", "temporal_entropy", "path_diversity_ratio"]:
|
||||
buckets = query(
|
||||
f"SELECT round({feat}, 2) AS bucket, count() AS cnt "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
f"GROUP BY bucket ORDER BY bucket"
|
||||
)
|
||||
distributions[feat] = buckets
|
||||
return {"scatter": scatter, "distributions": distributions}
|
||||
except Exception as exc:
|
||||
logger.exception("behavior query failed")
|
||||
return {"scatter": [], "distributions": {}}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/heatmap — Temporal heatmap (hour × day)
|
||||
# ---------------------------------------------------------------------------
|
||||
@router.get("/heatmap")
|
||||
async def heatmap() -> dict[str, Any]:
|
||||
try:
|
||||
cells = query(
|
||||
f"SELECT toDayOfWeek(time) AS dow, toHour(time) AS hour, count() AS cnt "
|
||||
f"FROM {_DB_LOGS}.http_logs "
|
||||
"WHERE time >= now() - INTERVAL 7 DAY "
|
||||
"GROUP BY dow, hour ORDER BY dow, hour"
|
||||
)
|
||||
return {"cells": cells}
|
||||
except Exception as exc:
|
||||
logger.exception("heatmap query failed")
|
||||
return {"cells": []}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/ip/{ip}/radar — Radar comparison vs human baseline
|
||||
# ---------------------------------------------------------------------------
|
||||
_RADAR_FEATURES = [
|
||||
"hit_velocity", "fuzzing_index", "post_ratio", "asset_ratio",
|
||||
"direct_access_ratio", "temporal_entropy", "path_diversity_ratio",
|
||||
"modern_browser_score",
|
||||
]
|
||||
|
||||
|
||||
@router.get("/ip/{ip}/radar")
|
||||
async def ip_radar(ip: str) -> dict[str, Any]:
|
||||
clean_ip = ip.replace("::ffff:", "")
|
||||
cols_avg = ", ".join(f"avg({f}) AS {f}" for f in _RADAR_FEATURES)
|
||||
try:
|
||||
ip_data = query(
|
||||
f"SELECT {', '.join(_RADAR_FEATURES)} "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE src_ip = toIPv6({ip:String}) LIMIT 1",
|
||||
{"ip": clean_ip},
|
||||
)
|
||||
baseline = query(
|
||||
f"SELECT {cols_avg} "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_label = 'human'"
|
||||
)
|
||||
bot_avg = query(
|
||||
f"SELECT {cols_avg} "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_label IN ('datacenter', 'hosting')"
|
||||
)
|
||||
return {
|
||||
"features": _RADAR_FEATURES,
|
||||
"ip_values": ip_data[0] if ip_data else {},
|
||||
"human_baseline": baseline[0] if baseline else {},
|
||||
"bot_baseline": bot_avg[0] if bot_avg else {},
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("ip radar query failed for %s", ip)
|
||||
return {"features": _RADAR_FEATURES, "ip_values": {},
|
||||
"human_baseline": {}, "bot_baseline": {}}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user