Add dashboard statistics module and refactor views for metrics handling

- Introduced a new `stats.py` module to encapsulate dashboard statistics building and cache key constants. - Refactored `views.py` to utilize the new `build_stats` function for constructing metrics context, improving code organization and readability. - Updated Prometheus query handling to streamline metrics fetching with a new `fetch_dashboard_metrics` function. - Enhanced test cases to reflect changes in metrics fetching and context building, ensuring accurate functionality. - Added new HTML templates for displaying detailed resource allocation and flavor statistics on the dashboard.
2026-02-07 18:39:28 +03:00
parent 9fa0a78eb6
commit 6a27fecb13
11 changed files with 925 additions and 1083 deletions
--- a/dashboard/prometheus_utils/query.py
+++ b/dashboard/prometheus_utils/query.py
@@ -1,9 +1,37 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
 import requests
 from watcher_visio.settings import PROMETHEUS_URL

 # Timeout for lightweight health check (seconds)
 CHECK_TIMEOUT = 5

+# Dashboard Prometheus queries (query_key -> query string), run in parallel
+DASHBOARD_QUERIES = {
+    "hosts_total": "count(node_exporter_build_info{job='node_exporter_compute'})",
+    "pcpu_total": (
+        "sum(count(node_cpu_seconds_total{job='node_exporter_compute', mode='idle'}) "
+        "without (cpu,mode))"
+    ),
+    "pcpu_usage": "sum(node_load5{job='node_exporter_compute'})",
+    "vcpu_allocated": "sum(libvirt_domain_info_virtual_cpus)",
+    "vcpu_overcommit_max": (
+        "avg(openstack_placement_resource_allocation_ratio{resourcetype='VCPU'})"
+    ),
+    "pram_total": "sum(node_memory_MemTotal_bytes{job='node_exporter_compute'})",
+    "pram_usage": "sum(node_memory_Active_bytes{job='node_exporter_compute'})",
+    "vram_allocated": "sum(libvirt_domain_info_maximum_memory_bytes)",
+    "vram_overcommit_max": (
+        "avg(avg_over_time("
+        "openstack_placement_resource_allocation_ratio{resourcetype='MEMORY_MB'}[5m]))"
+    ),
+    "vm_count": "sum(libvirt_domain_state_code)",
+    "vm_active": "sum(libvirt_domain_state_code{stateDesc='the domain is running'})",
+}
+
+# Keys that should be parsed as float (rest as int)
+DASHBOARD_FLOAT_KEYS = frozenset(("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max"))
+

 def check_prometheus() -> dict:
    """
@@ -36,3 +64,23 @@ def query_prometheus(query: str) -> str | list[str]:
        return result
    else:
        return result[0]["value"][1]
+
+
+def fetch_dashboard_metrics() -> dict:
+    """Run all dashboard Prometheus queries in parallel and return a dict of name -> value."""
+    result = {}
+    with ThreadPoolExecutor(max_workers=len(DASHBOARD_QUERIES)) as executor:
+        future_to_key = {
+            executor.submit(query_prometheus, query=q): key for key, q in DASHBOARD_QUERIES.items()
+        }
+        for future in as_completed(future_to_key):
+            key = future_to_key[future]
+            try:
+                raw = future.result()
+                if key in DASHBOARD_FLOAT_KEYS:
+                    result[key] = float(raw)
+                else:
+                    result[key] = int(raw)
+            except (ValueError, TypeError):
+                result[key] = float(0) if key in DASHBOARD_FLOAT_KEYS else 0
+    return result
--- a/dashboard/stats.py
+++ b/dashboard/stats.py
@@ -0,0 +1,76 @@
+"""Dashboard statistics building and cache key constants."""
+
+# Cache keys used by views
+CACHE_KEY_STATS = "dashboard_stats"
+CACHE_KEY_AUDITS = "dashboard_audits"
+CACHE_KEY_CURRENT_CLUSTER = "dashboard_current_cluster"
+CACHE_KEY_SOURCE_STATUS = "dashboard_source_status"
+
+# Empty structures for skeleton context (same shape as build_stats output)
+EMPTY_FLAVORS = {
+    "first_common_flavor": {"name": "—", "count": 0},
+    "second_common_flavor": None,
+    "third_common_flavor": None,
+}
+
+
+def build_stats(metrics: dict, region_name: str, flavors: dict) -> dict:
+    """
+    Build stats dict from raw metrics and OpenStack-derived data.
+    Returns region, pcpu, vcpu, pram, vram, vm, flavors (no audits/current_cluster).
+    """
+    hosts_total = metrics.get("hosts_total") or 1
+    pcpu_total = metrics.get("pcpu_total", 0)
+    pcpu_usage = metrics.get("pcpu_usage", 0)
+    vcpu_allocated = metrics.get("vcpu_allocated", 0)
+    vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
+    pram_total = metrics.get("pram_total", 0)
+    pram_usage = metrics.get("pram_usage", 0)
+    vram_allocated = metrics.get("vram_allocated", 0)
+    vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
+    vm_count = metrics.get("vm_count", 0)
+    vm_active = metrics.get("vm_active", 0)
+
+    vcpu_total = pcpu_total * vcpu_overcommit_max
+    vram_total = pram_total * vram_overcommit_max
+
+    return {
+        "region": {"name": region_name, "hosts_total": hosts_total},
+        "pcpu": {
+            "total": pcpu_total,
+            "usage": pcpu_usage,
+            "free": pcpu_total - pcpu_usage,
+            "used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
+        },
+        "vcpu": {
+            "total": vcpu_total,
+            "allocated": vcpu_allocated,
+            "free": vcpu_total - vcpu_allocated,
+            "allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
+            "overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
+            "overcommit_max": vcpu_overcommit_max,
+        },
+        "pram": {
+            "total": pram_total,
+            "usage": pram_usage,
+            "free": pram_total - pram_usage,
+            "used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
+        },
+        "vram": {
+            "total": vram_total,
+            "allocated": vram_allocated,
+            "free": vram_total - vram_allocated,
+            "allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
+            "overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
+            "overcommit_max": vram_overcommit_max,
+        },
+        "vm": {
+            "count": vm_count,
+            "active": vm_active,
+            "stopped": vm_count - vm_active,
+            "avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
+            "avg_ram": vram_allocated / vm_count if vm_count else 0,
+            "density": vm_count / hosts_total if hosts_total else 0,
+        },
+        "flavors": flavors,
+    }
--- a/dashboard/tests/test_views.py
+++ b/dashboard/tests/test_views.py
@@ -96,7 +96,7 @@ class CollectContextTest(TestCase):
        return conn

    @patch("dashboard.views.get_current_cluster_cpu")
-    @patch("dashboard.views._fetch_prometheus_metrics")
+    @patch("dashboard.views.fetch_dashboard_metrics")
    @patch("dashboard.views.get_audits")
    @patch("dashboard.views.get_flavor_list")
    @patch("dashboard.views.get_connection")
@@ -152,8 +152,6 @@ class CollectContextTest(TestCase):
        self.assertEqual(context["flavors"]["first_common_flavor"]["name"], "m1.small")
        self.assertEqual(len(context["audits"]), 1)
        # Serialized for JS
-        import json
-
        self.assertIsInstance(context["audits"][0]["migrations"], str)
        self.assertEqual(json.loads(context["audits"][0]["host_labels"]), ["h0", "h1"])
        self.assertIn("current_cluster", context)
@@ -167,7 +165,7 @@ class ApiStatsTest(TestCase):
    def setUp(self):
        self.factory = RequestFactory()

-    @patch("dashboard.views._fetch_prometheus_metrics")
+    @patch("dashboard.views.fetch_dashboard_metrics")
    @patch("dashboard.views.get_flavor_list")
    @patch("dashboard.views.get_connection")
    def test_api_stats_returns_json_with_expected_keys(
--- a/dashboard/views.py
+++ b/dashboard/views.py
@@ -1,5 +1,4 @@
 import json
-from concurrent.futures import ThreadPoolExecutor, as_completed

 from django.conf import settings
 from django.core.cache import cache
@@ -10,53 +9,32 @@ from dashboard.mock_data import get_mock_context
 from dashboard.openstack_utils.audits import get_audits, get_current_cluster_cpu
 from dashboard.openstack_utils.connect import check_openstack, get_connection
 from dashboard.openstack_utils.flavor import get_flavor_list
-from dashboard.prometheus_utils.query import check_prometheus, query_prometheus
-
-# Prometheus queries run in parallel (query_key -> query string)
-_PROMETHEUS_QUERIES = {
-    "hosts_total": "count(node_exporter_build_info{job='node_exporter_compute'})",
-    "pcpu_total": (
-        "sum(count(node_cpu_seconds_total{job='node_exporter_compute', mode='idle'}) "
-        "without (cpu,mode))"
-    ),
-    "pcpu_usage": "sum(node_load5{job='node_exporter_compute'})",
-    "vcpu_allocated": "sum(libvirt_domain_info_virtual_cpus)",
-    "vcpu_overcommit_max": (
-        "avg(openstack_placement_resource_allocation_ratio{resourcetype='VCPU'})"
-    ),
-    "pram_total": "sum(node_memory_MemTotal_bytes{job='node_exporter_compute'})",
-    "pram_usage": "sum(node_memory_Active_bytes{job='node_exporter_compute'})",
-    "vram_allocated": "sum(libvirt_domain_info_maximum_memory_bytes)",
-    "vram_overcommit_max": (
-        "avg(avg_over_time("
-        "openstack_placement_resource_allocation_ratio{resourcetype='MEMORY_MB'}[5m]))"
-    ),
-    "vm_count": "sum(libvirt_domain_state_code)",
-    "vm_active": "sum(libvirt_domain_state_code{stateDesc='the domain is running'})",
-}
+from dashboard.prometheus_utils.query import check_prometheus, fetch_dashboard_metrics
+from dashboard.stats import (
+    CACHE_KEY_AUDITS,
+    CACHE_KEY_CURRENT_CLUSTER,
+    CACHE_KEY_SOURCE_STATUS,
+    CACHE_KEY_STATS,
+    EMPTY_FLAVORS,
+    build_stats,
+)


-def _fetch_prometheus_metrics():
-    """Run all Prometheus queries in parallel and return a dict of name -> value."""
-    result = {}
-    with ThreadPoolExecutor(max_workers=len(_PROMETHEUS_QUERIES)) as executor:
-        future_to_key = {
-            executor.submit(query_prometheus, query=q): key
-            for key, q in _PROMETHEUS_QUERIES.items()
-        }
-        for future in as_completed(future_to_key):
-            key = future_to_key[future]
-            try:
-                raw = future.result()
-                if key in ("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max"):
-                    result[key] = float(raw)
-                else:
-                    result[key] = int(raw)
-            except (ValueError, TypeError):
-                result[key] = (
-                    0 if key in ("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max") else 0
-                )
-    return result
+def _empty_metrics():
+    """Metrics dict with zero/default values for skeleton context."""
+    return {
+        "hosts_total": 0,
+        "pcpu_total": 0,
+        "pcpu_usage": 0,
+        "vcpu_allocated": 0,
+        "vcpu_overcommit_max": 0,
+        "pram_total": 0,
+        "pram_usage": 0,
+        "vram_allocated": 0,
+        "vram_overcommit_max": 0,
+        "vm_count": 0,
+        "vm_active": 0,
+    }


 def collect_context():
@@ -64,81 +42,14 @@ def collect_context():
    region_name = connection._compute_region
    flavors = get_flavor_list(connection=connection)
    audits = get_audits(connection=connection)
-
-    metrics = _fetch_prometheus_metrics()
-    hosts_total = metrics.get("hosts_total") or 1
-    pcpu_total = metrics.get("pcpu_total", 0)
-    pcpu_usage = metrics.get("pcpu_usage", 0)
-    vcpu_allocated = metrics.get("vcpu_allocated", 0)
-    vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
-    pram_total = metrics.get("pram_total", 0)
-    pram_usage = metrics.get("pram_usage", 0)
-    vram_allocated = metrics.get("vram_allocated", 0)
-    vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
-    vm_count = metrics.get("vm_count", 0)
-    vm_active = metrics.get("vm_active", 0)
-
-    vcpu_total = pcpu_total * vcpu_overcommit_max
-    vram_total = pram_total * vram_overcommit_max
-
-    context = {
-        # <--- Region data --->
-        "region": {
-            "name": region_name,
-            "hosts_total": hosts_total,
-        },
-        # <--- CPU data --->
-        # pCPU data
-        "pcpu": {
-            "total": pcpu_total,
-            "usage": pcpu_usage,
-            "free": pcpu_total - pcpu_usage,
-            "used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
-        },
-        # vCPU data
-        "vcpu": {
-            "total": vcpu_total,
-            "allocated": vcpu_allocated,
-            "free": vcpu_total - vcpu_allocated,
-            "allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
-            "overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
-            "overcommit_max": vcpu_overcommit_max,
-        },
-        # <--- RAM data --->
-        # pRAM data
-        "pram": {
-            "total": pram_total,
-            "usage": pram_usage,
-            "free": pram_total - pram_usage,
-            "used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
-        },
-        # vRAM data
-        "vram": {
-            "total": vram_total,
-            "allocated": vram_allocated,
-            "free": vram_total - vram_allocated,
-            "allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
-            "overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
-            "overcommit_max": vram_overcommit_max,
-        },
-        # <--- VM data --->
-        "vm": {
-            "count": vm_count,
-            "active": vm_active,
-            "stopped": vm_count - vm_active,
-            "avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
-            "avg_ram": vram_allocated / vm_count if vm_count else 0,
-            "density": vm_count / hosts_total if hosts_total else 0,
-        },
-        "flavors": flavors,
-        "audits": audits,
-    }
+    metrics = fetch_dashboard_metrics()
+    context = build_stats(metrics, region_name, flavors)
+    context["audits"] = audits
    current_cluster = get_current_cluster_cpu(connection)
    context["current_cluster"] = {
        "host_labels": json.dumps(current_cluster["host_labels"]),
        "cpu_current": json.dumps(current_cluster["cpu_current"]),
    }
-    # Serialize audit list fields for JavaScript so cached context is render-ready
    for audit in context["audits"]:
        audit["migrations"] = json.dumps(audit["migrations"])
        audit["host_labels"] = json.dumps(audit["host_labels"])
@@ -152,60 +63,8 @@ def collect_stats():
    connection = get_connection()
    region_name = connection._compute_region
    flavors = get_flavor_list(connection=connection)
-    metrics = _fetch_prometheus_metrics()
-    hosts_total = metrics.get("hosts_total") or 1
-    pcpu_total = metrics.get("pcpu_total", 0)
-    pcpu_usage = metrics.get("pcpu_usage", 0)
-    vcpu_allocated = metrics.get("vcpu_allocated", 0)
-    vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
-    pram_total = metrics.get("pram_total", 0)
-    pram_usage = metrics.get("pram_usage", 0)
-    vram_allocated = metrics.get("vram_allocated", 0)
-    vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
-    vm_count = metrics.get("vm_count", 0)
-    vm_active = metrics.get("vm_active", 0)
-    vcpu_total = pcpu_total * vcpu_overcommit_max
-    vram_total = pram_total * vram_overcommit_max
-    return {
-        "region": {"name": region_name, "hosts_total": hosts_total},
-        "pcpu": {
-            "total": pcpu_total,
-            "usage": pcpu_usage,
-            "free": pcpu_total - pcpu_usage,
-            "used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
-        },
-        "vcpu": {
-            "total": vcpu_total,
-            "allocated": vcpu_allocated,
-            "free": vcpu_total - vcpu_allocated,
-            "allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
-            "overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
-            "overcommit_max": vcpu_overcommit_max,
-        },
-        "pram": {
-            "total": pram_total,
-            "usage": pram_usage,
-            "free": pram_total - pram_usage,
-            "used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
-        },
-        "vram": {
-            "total": vram_total,
-            "allocated": vram_allocated,
-            "free": vram_total - vram_allocated,
-            "allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
-            "overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
-            "overcommit_max": vram_overcommit_max,
-        },
-        "vm": {
-            "count": vm_count,
-            "active": vm_active,
-            "stopped": vm_count - vm_active,
-            "avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
-            "avg_ram": vram_allocated / vm_count if vm_count else 0,
-            "density": vm_count / hosts_total if hosts_total else 0,
-        },
-        "flavors": flavors,
-    }
+    metrics = fetch_dashboard_metrics()
+    return build_stats(metrics, region_name, flavors)


 def collect_audits():
@@ -222,40 +81,14 @@ def collect_audits():

 def _skeleton_context():
    """Minimal context for skeleton-only index render."""
-    empty_flavors = {
-        "first_common_flavor": {"name": "—", "count": 0},
-        "second_common_flavor": None,
-        "third_common_flavor": None,
-    }
-    return {
-        "skeleton": True,
-        "region": {"name": "—", "hosts_total": 0},
-        "pcpu": {"total": 0, "usage": 0, "free": 0, "used_percentage": 0},
-        "pram": {"total": 0, "usage": 0, "free": 0, "used_percentage": 0},
-        "vcpu": {
-            "total": 0,
-            "allocated": 0,
-            "free": 0,
-            "allocated_percentage": 0,
-            "overcommit_ratio": 0,
-            "overcommit_max": 0,
-        },
-        "vram": {
-            "total": 0,
-            "allocated": 0,
-            "free": 0,
-            "allocated_percentage": 0,
-            "overcommit_ratio": 0,
-            "overcommit_max": 0,
-        },
-        "vm": {"count": 0, "active": 0, "stopped": 0, "avg_cpu": 0, "avg_ram": 0, "density": 0},
-        "flavors": empty_flavors,
-        "audits": [],
-        "current_cluster": {
-            "host_labels": "[]",
-            "cpu_current": "[]",
-        },
+    context = build_stats(_empty_metrics(), "—", EMPTY_FLAVORS)
+    context["skeleton"] = True
+    context["audits"] = []
+    context["current_cluster"] = {
+        "host_labels": "[]",
+        "cpu_current": "[]",
    }
+    return context


 def index(request):
@@ -267,28 +100,25 @@ def index(request):


 def api_stats(request):
-    cache_key = "dashboard_stats"
    cache_ttl = getattr(settings, "DASHBOARD_CACHE_TTL", 120)
-    data = cache.get(cache_key)
+    data = cache.get(CACHE_KEY_STATS)
    if data is None:
        data = collect_stats()
-        cache.set(cache_key, data, timeout=cache_ttl)
+        cache.set(CACHE_KEY_STATS, data, timeout=cache_ttl)
    return JsonResponse(data)


 def api_audits(request):
-    cache_key_audits = "dashboard_audits"
-    cache_key_cluster = "dashboard_current_cluster"
    cache_ttl = getattr(settings, "DASHBOARD_CACHE_TTL", 120)
-    audits = cache.get(cache_key_audits)
-    current_cluster = cache.get(cache_key_cluster)
+    audits = cache.get(CACHE_KEY_AUDITS)
+    current_cluster = cache.get(CACHE_KEY_CURRENT_CLUSTER)
    if audits is None:
        audits = collect_audits()
-        cache.set(cache_key_audits, audits, timeout=cache_ttl)
+        cache.set(CACHE_KEY_AUDITS, audits, timeout=cache_ttl)
    if current_cluster is None:
        connection = get_connection()
        current_cluster = get_current_cluster_cpu(connection)
-        cache.set(cache_key_cluster, current_cluster, timeout=cache_ttl)
+        cache.set(CACHE_KEY_CURRENT_CLUSTER, current_cluster, timeout=cache_ttl)
    return JsonResponse({"audits": audits, "current_cluster": current_cluster})


@@ -302,13 +132,12 @@ def api_source_status(request):
            }
        )

-    cache_key = "dashboard_source_status"
    cache_ttl = getattr(settings, "SOURCE_STATUS_CACHE_TTL", 30)
-    data = cache.get(cache_key)
+    data = cache.get(CACHE_KEY_SOURCE_STATUS)
    if data is None:
        data = {
            "prometheus": check_prometheus(),
            "openstack": check_openstack(),
        }
-        cache.set(cache_key, data, timeout=cache_ttl)
+        cache.set(CACHE_KEY_SOURCE_STATUS, data, timeout=cache_ttl)
    return JsonResponse(data)