Add dashboard statistics module and refactor views for metrics handling
All checks were successful
CI / ci (push) Successful in 14s

- Introduced a new `stats.py` module to encapsulate dashboard statistics building and cache key constants.
- Refactored `views.py` to utilize the new `build_stats` function for constructing metrics context, improving code organization and readability.
- Updated Prometheus query handling to streamline metrics fetching with a new `fetch_dashboard_metrics` function.
- Enhanced test cases to reflect changes in metrics fetching and context building, ensuring accurate functionality.
- Added new HTML templates for displaying detailed resource allocation and flavor statistics on the dashboard.
This commit is contained in:
2026-02-07 18:39:28 +03:00
parent 9fa0a78eb6
commit 6a27fecb13
11 changed files with 925 additions and 1083 deletions

View File

@@ -1,9 +1,37 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from watcher_visio.settings import PROMETHEUS_URL
# Timeout for lightweight health check (seconds)
CHECK_TIMEOUT = 5
# Dashboard Prometheus queries (query_key -> query string), run in parallel
DASHBOARD_QUERIES = {
"hosts_total": "count(node_exporter_build_info{job='node_exporter_compute'})",
"pcpu_total": (
"sum(count(node_cpu_seconds_total{job='node_exporter_compute', mode='idle'}) "
"without (cpu,mode))"
),
"pcpu_usage": "sum(node_load5{job='node_exporter_compute'})",
"vcpu_allocated": "sum(libvirt_domain_info_virtual_cpus)",
"vcpu_overcommit_max": (
"avg(openstack_placement_resource_allocation_ratio{resourcetype='VCPU'})"
),
"pram_total": "sum(node_memory_MemTotal_bytes{job='node_exporter_compute'})",
"pram_usage": "sum(node_memory_Active_bytes{job='node_exporter_compute'})",
"vram_allocated": "sum(libvirt_domain_info_maximum_memory_bytes)",
"vram_overcommit_max": (
"avg(avg_over_time("
"openstack_placement_resource_allocation_ratio{resourcetype='MEMORY_MB'}[5m]))"
),
"vm_count": "sum(libvirt_domain_state_code)",
"vm_active": "sum(libvirt_domain_state_code{stateDesc='the domain is running'})",
}
# Keys that should be parsed as float (rest as int)
DASHBOARD_FLOAT_KEYS = frozenset(("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max"))
def check_prometheus() -> dict:
"""
@@ -36,3 +64,23 @@ def query_prometheus(query: str) -> str | list[str]:
return result
else:
return result[0]["value"][1]
def fetch_dashboard_metrics() -> dict:
"""Run all dashboard Prometheus queries in parallel and return a dict of name -> value."""
result = {}
with ThreadPoolExecutor(max_workers=len(DASHBOARD_QUERIES)) as executor:
future_to_key = {
executor.submit(query_prometheus, query=q): key for key, q in DASHBOARD_QUERIES.items()
}
for future in as_completed(future_to_key):
key = future_to_key[future]
try:
raw = future.result()
if key in DASHBOARD_FLOAT_KEYS:
result[key] = float(raw)
else:
result[key] = int(raw)
except (ValueError, TypeError):
result[key] = float(0) if key in DASHBOARD_FLOAT_KEYS else 0
return result

76
dashboard/stats.py Normal file
View File

@@ -0,0 +1,76 @@
"""Dashboard statistics building and cache key constants."""
# Cache keys used by views
CACHE_KEY_STATS = "dashboard_stats"
CACHE_KEY_AUDITS = "dashboard_audits"
CACHE_KEY_CURRENT_CLUSTER = "dashboard_current_cluster"
CACHE_KEY_SOURCE_STATUS = "dashboard_source_status"
# Empty structures for skeleton context (same shape as build_stats output)
EMPTY_FLAVORS = {
"first_common_flavor": {"name": "", "count": 0},
"second_common_flavor": None,
"third_common_flavor": None,
}
def build_stats(metrics: dict, region_name: str, flavors: dict) -> dict:
"""
Build stats dict from raw metrics and OpenStack-derived data.
Returns region, pcpu, vcpu, pram, vram, vm, flavors (no audits/current_cluster).
"""
hosts_total = metrics.get("hosts_total") or 1
pcpu_total = metrics.get("pcpu_total", 0)
pcpu_usage = metrics.get("pcpu_usage", 0)
vcpu_allocated = metrics.get("vcpu_allocated", 0)
vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
pram_total = metrics.get("pram_total", 0)
pram_usage = metrics.get("pram_usage", 0)
vram_allocated = metrics.get("vram_allocated", 0)
vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
vm_count = metrics.get("vm_count", 0)
vm_active = metrics.get("vm_active", 0)
vcpu_total = pcpu_total * vcpu_overcommit_max
vram_total = pram_total * vram_overcommit_max
return {
"region": {"name": region_name, "hosts_total": hosts_total},
"pcpu": {
"total": pcpu_total,
"usage": pcpu_usage,
"free": pcpu_total - pcpu_usage,
"used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
},
"vcpu": {
"total": vcpu_total,
"allocated": vcpu_allocated,
"free": vcpu_total - vcpu_allocated,
"allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
"overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
"overcommit_max": vcpu_overcommit_max,
},
"pram": {
"total": pram_total,
"usage": pram_usage,
"free": pram_total - pram_usage,
"used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
},
"vram": {
"total": vram_total,
"allocated": vram_allocated,
"free": vram_total - vram_allocated,
"allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
"overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
"overcommit_max": vram_overcommit_max,
},
"vm": {
"count": vm_count,
"active": vm_active,
"stopped": vm_count - vm_active,
"avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
"avg_ram": vram_allocated / vm_count if vm_count else 0,
"density": vm_count / hosts_total if hosts_total else 0,
},
"flavors": flavors,
}

View File

@@ -96,7 +96,7 @@ class CollectContextTest(TestCase):
return conn
@patch("dashboard.views.get_current_cluster_cpu")
@patch("dashboard.views._fetch_prometheus_metrics")
@patch("dashboard.views.fetch_dashboard_metrics")
@patch("dashboard.views.get_audits")
@patch("dashboard.views.get_flavor_list")
@patch("dashboard.views.get_connection")
@@ -152,8 +152,6 @@ class CollectContextTest(TestCase):
self.assertEqual(context["flavors"]["first_common_flavor"]["name"], "m1.small")
self.assertEqual(len(context["audits"]), 1)
# Serialized for JS
import json
self.assertIsInstance(context["audits"][0]["migrations"], str)
self.assertEqual(json.loads(context["audits"][0]["host_labels"]), ["h0", "h1"])
self.assertIn("current_cluster", context)
@@ -167,7 +165,7 @@ class ApiStatsTest(TestCase):
def setUp(self):
self.factory = RequestFactory()
@patch("dashboard.views._fetch_prometheus_metrics")
@patch("dashboard.views.fetch_dashboard_metrics")
@patch("dashboard.views.get_flavor_list")
@patch("dashboard.views.get_connection")
def test_api_stats_returns_json_with_expected_keys(

View File

@@ -1,5 +1,4 @@
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from django.conf import settings
from django.core.cache import cache
@@ -10,53 +9,32 @@ from dashboard.mock_data import get_mock_context
from dashboard.openstack_utils.audits import get_audits, get_current_cluster_cpu
from dashboard.openstack_utils.connect import check_openstack, get_connection
from dashboard.openstack_utils.flavor import get_flavor_list
from dashboard.prometheus_utils.query import check_prometheus, query_prometheus
# Prometheus queries run in parallel (query_key -> query string)
_PROMETHEUS_QUERIES = {
"hosts_total": "count(node_exporter_build_info{job='node_exporter_compute'})",
"pcpu_total": (
"sum(count(node_cpu_seconds_total{job='node_exporter_compute', mode='idle'}) "
"without (cpu,mode))"
),
"pcpu_usage": "sum(node_load5{job='node_exporter_compute'})",
"vcpu_allocated": "sum(libvirt_domain_info_virtual_cpus)",
"vcpu_overcommit_max": (
"avg(openstack_placement_resource_allocation_ratio{resourcetype='VCPU'})"
),
"pram_total": "sum(node_memory_MemTotal_bytes{job='node_exporter_compute'})",
"pram_usage": "sum(node_memory_Active_bytes{job='node_exporter_compute'})",
"vram_allocated": "sum(libvirt_domain_info_maximum_memory_bytes)",
"vram_overcommit_max": (
"avg(avg_over_time("
"openstack_placement_resource_allocation_ratio{resourcetype='MEMORY_MB'}[5m]))"
),
"vm_count": "sum(libvirt_domain_state_code)",
"vm_active": "sum(libvirt_domain_state_code{stateDesc='the domain is running'})",
}
from dashboard.prometheus_utils.query import check_prometheus, fetch_dashboard_metrics
from dashboard.stats import (
CACHE_KEY_AUDITS,
CACHE_KEY_CURRENT_CLUSTER,
CACHE_KEY_SOURCE_STATUS,
CACHE_KEY_STATS,
EMPTY_FLAVORS,
build_stats,
)
def _fetch_prometheus_metrics():
"""Run all Prometheus queries in parallel and return a dict of name -> value."""
result = {}
with ThreadPoolExecutor(max_workers=len(_PROMETHEUS_QUERIES)) as executor:
future_to_key = {
executor.submit(query_prometheus, query=q): key
for key, q in _PROMETHEUS_QUERIES.items()
}
for future in as_completed(future_to_key):
key = future_to_key[future]
try:
raw = future.result()
if key in ("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max"):
result[key] = float(raw)
else:
result[key] = int(raw)
except (ValueError, TypeError):
result[key] = (
0 if key in ("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max") else 0
)
return result
def _empty_metrics():
"""Metrics dict with zero/default values for skeleton context."""
return {
"hosts_total": 0,
"pcpu_total": 0,
"pcpu_usage": 0,
"vcpu_allocated": 0,
"vcpu_overcommit_max": 0,
"pram_total": 0,
"pram_usage": 0,
"vram_allocated": 0,
"vram_overcommit_max": 0,
"vm_count": 0,
"vm_active": 0,
}
def collect_context():
@@ -64,81 +42,14 @@ def collect_context():
region_name = connection._compute_region
flavors = get_flavor_list(connection=connection)
audits = get_audits(connection=connection)
metrics = _fetch_prometheus_metrics()
hosts_total = metrics.get("hosts_total") or 1
pcpu_total = metrics.get("pcpu_total", 0)
pcpu_usage = metrics.get("pcpu_usage", 0)
vcpu_allocated = metrics.get("vcpu_allocated", 0)
vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
pram_total = metrics.get("pram_total", 0)
pram_usage = metrics.get("pram_usage", 0)
vram_allocated = metrics.get("vram_allocated", 0)
vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
vm_count = metrics.get("vm_count", 0)
vm_active = metrics.get("vm_active", 0)
vcpu_total = pcpu_total * vcpu_overcommit_max
vram_total = pram_total * vram_overcommit_max
context = {
# <--- Region data --->
"region": {
"name": region_name,
"hosts_total": hosts_total,
},
# <--- CPU data --->
# pCPU data
"pcpu": {
"total": pcpu_total,
"usage": pcpu_usage,
"free": pcpu_total - pcpu_usage,
"used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
},
# vCPU data
"vcpu": {
"total": vcpu_total,
"allocated": vcpu_allocated,
"free": vcpu_total - vcpu_allocated,
"allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
"overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
"overcommit_max": vcpu_overcommit_max,
},
# <--- RAM data --->
# pRAM data
"pram": {
"total": pram_total,
"usage": pram_usage,
"free": pram_total - pram_usage,
"used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
},
# vRAM data
"vram": {
"total": vram_total,
"allocated": vram_allocated,
"free": vram_total - vram_allocated,
"allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
"overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
"overcommit_max": vram_overcommit_max,
},
# <--- VM data --->
"vm": {
"count": vm_count,
"active": vm_active,
"stopped": vm_count - vm_active,
"avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
"avg_ram": vram_allocated / vm_count if vm_count else 0,
"density": vm_count / hosts_total if hosts_total else 0,
},
"flavors": flavors,
"audits": audits,
}
metrics = fetch_dashboard_metrics()
context = build_stats(metrics, region_name, flavors)
context["audits"] = audits
current_cluster = get_current_cluster_cpu(connection)
context["current_cluster"] = {
"host_labels": json.dumps(current_cluster["host_labels"]),
"cpu_current": json.dumps(current_cluster["cpu_current"]),
}
# Serialize audit list fields for JavaScript so cached context is render-ready
for audit in context["audits"]:
audit["migrations"] = json.dumps(audit["migrations"])
audit["host_labels"] = json.dumps(audit["host_labels"])
@@ -152,60 +63,8 @@ def collect_stats():
connection = get_connection()
region_name = connection._compute_region
flavors = get_flavor_list(connection=connection)
metrics = _fetch_prometheus_metrics()
hosts_total = metrics.get("hosts_total") or 1
pcpu_total = metrics.get("pcpu_total", 0)
pcpu_usage = metrics.get("pcpu_usage", 0)
vcpu_allocated = metrics.get("vcpu_allocated", 0)
vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
pram_total = metrics.get("pram_total", 0)
pram_usage = metrics.get("pram_usage", 0)
vram_allocated = metrics.get("vram_allocated", 0)
vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
vm_count = metrics.get("vm_count", 0)
vm_active = metrics.get("vm_active", 0)
vcpu_total = pcpu_total * vcpu_overcommit_max
vram_total = pram_total * vram_overcommit_max
return {
"region": {"name": region_name, "hosts_total": hosts_total},
"pcpu": {
"total": pcpu_total,
"usage": pcpu_usage,
"free": pcpu_total - pcpu_usage,
"used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
},
"vcpu": {
"total": vcpu_total,
"allocated": vcpu_allocated,
"free": vcpu_total - vcpu_allocated,
"allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
"overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
"overcommit_max": vcpu_overcommit_max,
},
"pram": {
"total": pram_total,
"usage": pram_usage,
"free": pram_total - pram_usage,
"used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
},
"vram": {
"total": vram_total,
"allocated": vram_allocated,
"free": vram_total - vram_allocated,
"allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
"overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
"overcommit_max": vram_overcommit_max,
},
"vm": {
"count": vm_count,
"active": vm_active,
"stopped": vm_count - vm_active,
"avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
"avg_ram": vram_allocated / vm_count if vm_count else 0,
"density": vm_count / hosts_total if hosts_total else 0,
},
"flavors": flavors,
}
metrics = fetch_dashboard_metrics()
return build_stats(metrics, region_name, flavors)
def collect_audits():
@@ -222,40 +81,14 @@ def collect_audits():
def _skeleton_context():
"""Minimal context for skeleton-only index render."""
empty_flavors = {
"first_common_flavor": {"name": "", "count": 0},
"second_common_flavor": None,
"third_common_flavor": None,
}
return {
"skeleton": True,
"region": {"name": "", "hosts_total": 0},
"pcpu": {"total": 0, "usage": 0, "free": 0, "used_percentage": 0},
"pram": {"total": 0, "usage": 0, "free": 0, "used_percentage": 0},
"vcpu": {
"total": 0,
"allocated": 0,
"free": 0,
"allocated_percentage": 0,
"overcommit_ratio": 0,
"overcommit_max": 0,
},
"vram": {
"total": 0,
"allocated": 0,
"free": 0,
"allocated_percentage": 0,
"overcommit_ratio": 0,
"overcommit_max": 0,
},
"vm": {"count": 0, "active": 0, "stopped": 0, "avg_cpu": 0, "avg_ram": 0, "density": 0},
"flavors": empty_flavors,
"audits": [],
"current_cluster": {
"host_labels": "[]",
"cpu_current": "[]",
},
context = build_stats(_empty_metrics(), "", EMPTY_FLAVORS)
context["skeleton"] = True
context["audits"] = []
context["current_cluster"] = {
"host_labels": "[]",
"cpu_current": "[]",
}
return context
def index(request):
@@ -267,28 +100,25 @@ def index(request):
def api_stats(request):
cache_key = "dashboard_stats"
cache_ttl = getattr(settings, "DASHBOARD_CACHE_TTL", 120)
data = cache.get(cache_key)
data = cache.get(CACHE_KEY_STATS)
if data is None:
data = collect_stats()
cache.set(cache_key, data, timeout=cache_ttl)
cache.set(CACHE_KEY_STATS, data, timeout=cache_ttl)
return JsonResponse(data)
def api_audits(request):
cache_key_audits = "dashboard_audits"
cache_key_cluster = "dashboard_current_cluster"
cache_ttl = getattr(settings, "DASHBOARD_CACHE_TTL", 120)
audits = cache.get(cache_key_audits)
current_cluster = cache.get(cache_key_cluster)
audits = cache.get(CACHE_KEY_AUDITS)
current_cluster = cache.get(CACHE_KEY_CURRENT_CLUSTER)
if audits is None:
audits = collect_audits()
cache.set(cache_key_audits, audits, timeout=cache_ttl)
cache.set(CACHE_KEY_AUDITS, audits, timeout=cache_ttl)
if current_cluster is None:
connection = get_connection()
current_cluster = get_current_cluster_cpu(connection)
cache.set(cache_key_cluster, current_cluster, timeout=cache_ttl)
cache.set(CACHE_KEY_CURRENT_CLUSTER, current_cluster, timeout=cache_ttl)
return JsonResponse({"audits": audits, "current_cluster": current_cluster})
@@ -302,13 +132,12 @@ def api_source_status(request):
}
)
cache_key = "dashboard_source_status"
cache_ttl = getattr(settings, "SOURCE_STATUS_CACHE_TTL", 30)
data = cache.get(cache_key)
data = cache.get(CACHE_KEY_SOURCE_STATUS)
if data is None:
data = {
"prometheus": check_prometheus(),
"openstack": check_openstack(),
}
cache.set(cache_key, data, timeout=cache_ttl)
cache.set(CACHE_KEY_SOURCE_STATUS, data, timeout=cache_ttl)
return JsonResponse(data)