feat: rewrite ASN classification with PeeringDB + expanded heuristics
Major improvements to generate_asn_data.py: - Add PeeringDB network data source (34K networks with info_type) - Add new categories: education, government, enterprise - Rename 'human' label to 'isp' across all consumers - Expand keyword heuristics (ISP, datacenter, hosting, CDN, education, gov) - Add hard-coded lists for education, government, enterprise ASNs - Support both --output-dir and --output-asn/--output-ipasn CLI interfaces - Add --no-peeringdb flag for offline use Results: unknown dropped from 86% to 57%, ISP coverage 21.8K ASNs, education 3.1K, enterprise 5.7K, government 520. Updated consumers: - bot_detector.py: 'human' -> 'isp' for baseline selection - dashboard api.py: 'human' -> 'isp' in SQL queries - run-tests.sh: 'human' -> 'isp' in integration test assertions - update-csv-data.sh: updated label description comment Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@ -469,11 +469,11 @@ async def features() -> dict[str, Any]:
|
||||
except Exception:
|
||||
logger.debug("view_thesis_features_1h not available")
|
||||
|
||||
# Human vs bot feature profiles for radar comparison
|
||||
# ISP (residential) vs bot feature profiles for radar comparison
|
||||
try:
|
||||
human = query(
|
||||
f"SELECT {_feat_cols} FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_label = 'human'"
|
||||
"WHERE asn_label = 'isp'"
|
||||
)
|
||||
if human:
|
||||
result["human_profile"] = human[0]
|
||||
@ -663,7 +663,7 @@ async def heatmap() -> dict[str, Any]:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/ip/{ip}/radar — Radar comparison vs human baseline
|
||||
# GET /api/ip/{ip}/radar — Radar comparison vs ISP baseline
|
||||
# ---------------------------------------------------------------------------
|
||||
_RADAR_FEATURES = [
|
||||
"hit_velocity", "fuzzing_index", "post_ratio", "asset_ratio",
|
||||
@ -686,7 +686,7 @@ async def ip_radar(ip: str) -> dict[str, Any]:
|
||||
baseline = query(
|
||||
f"SELECT {cols_avg} "
|
||||
f"FROM {_DB}.view_ai_features_1h "
|
||||
"WHERE asn_label = 'human'"
|
||||
"WHERE asn_label = 'isp'"
|
||||
)
|
||||
bot_avg = query(
|
||||
f"SELECT {cols_avg} "
|
||||
|
||||
Reference in New Issue
Block a user