watcher-visio/dashboard/views.py

import json
from concurrent.futures import ThreadPoolExecutor, as_completed

from django.conf import settings
from django.core.cache import cache
from django.http import JsonResponse
from django.shortcuts import render
from dashboard.openstack_utils.connect import check_openstack, get_connection
from dashboard.openstack_utils.flavor import get_flavor_list
from dashboard.prometheus_utils.query import check_prometheus, query_prometheus
from dashboard.openstack_utils.audits import get_audits, get_current_cluster_cpu
from dashboard.mock_data import get_mock_context

# Prometheus queries run in parallel (query_key -> query string)
_PROMETHEUS_QUERIES = {
    "hosts_total": "count(node_exporter_build_info{job='node_exporter_compute'})",
    "pcpu_total": "sum(count(node_cpu_seconds_total{job='node_exporter_compute', mode='idle'}) without (cpu,mode))",
    "pcpu_usage": "sum(node_load5{job='node_exporter_compute'})",
    "vcpu_allocated": "sum(libvirt_domain_info_virtual_cpus)",
    "vcpu_overcommit_max": "avg(openstack_placement_resource_allocation_ratio{resourcetype='VCPU'})",
    "pram_total": "sum(node_memory_MemTotal_bytes{job='node_exporter_compute'})",
    "pram_usage": "sum(node_memory_Active_bytes{job='node_exporter_compute'})",
    "vram_allocated": "sum(libvirt_domain_info_maximum_memory_bytes)",
    "vram_overcommit_max": "avg(avg_over_time(openstack_placement_resource_allocation_ratio{resourcetype='MEMORY_MB'}[5m]))",
    "vm_count": "sum(libvirt_domain_state_code)",
    "vm_active": "sum(libvirt_domain_state_code{stateDesc='the domain is running'})",
}


def _fetch_prometheus_metrics():
    """Run all Prometheus queries in parallel and return a dict of name -> value."""
    result = {}
    with ThreadPoolExecutor(max_workers=len(_PROMETHEUS_QUERIES)) as executor:
        future_to_key = {
            executor.submit(query_prometheus, query=q): key
            for key, q in _PROMETHEUS_QUERIES.items()
        }
        for future in as_completed(future_to_key):
            key = future_to_key[future]
            try:
                raw = future.result()
                if key in ("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max"):
                    result[key] = float(raw)
                else:
                    result[key] = int(raw)
            except (ValueError, TypeError):
                result[key] = 0 if key in ("pcpu_usage", "vcpu_overcommit_max", "vram_overcommit_max") else 0
    return result


def collect_context():
    connection = get_connection()
    region_name = connection._compute_region
    flavors = get_flavor_list(connection=connection)
    audits = get_audits(connection=connection)

    metrics = _fetch_prometheus_metrics()
    hosts_total = metrics.get("hosts_total") or 1
    pcpu_total = metrics.get("pcpu_total", 0)
    pcpu_usage = metrics.get("pcpu_usage", 0)
    vcpu_allocated = metrics.get("vcpu_allocated", 0)
    vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
    pram_total = metrics.get("pram_total", 0)
    pram_usage = metrics.get("pram_usage", 0)
    vram_allocated = metrics.get("vram_allocated", 0)
    vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
    vm_count = metrics.get("vm_count", 0)
    vm_active = metrics.get("vm_active", 0)

    vcpu_total = pcpu_total * vcpu_overcommit_max
    vram_total = pram_total * vram_overcommit_max

    context = {
        # <--- Region data --->
        "region": {
            "name": region_name,
            "hosts_total": hosts_total,
        },
        # <--- CPU data --->
        # pCPU data
        "pcpu": {
            "total": pcpu_total,
            "usage": pcpu_usage,
            "free": pcpu_total - pcpu_usage,
            "used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
        },
        # vCPU data
        "vcpu": {
            "total": vcpu_total,
            "allocated": vcpu_allocated,
            "free": vcpu_total - vcpu_allocated,
            "allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
            "overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
            "overcommit_max": vcpu_overcommit_max,
        },
        # <--- RAM data --->
        # pRAM data
        "pram": {
            "total": pram_total,
            "usage": pram_usage,
            "free": pram_total - pram_usage,
            "used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
        },
        # vRAM data
        "vram": {
            "total": vram_total,
            "allocated": vram_allocated,
            "free": vram_total - vram_allocated,
            "allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
            "overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
            "overcommit_max": vram_overcommit_max,
        },
        # <--- VM data --->
        "vm": {
            "count": vm_count,
            "active": vm_active,
            "stopped": vm_count - vm_active,
            "avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
            "avg_ram": vram_allocated / vm_count if vm_count else 0,
            "density": vm_count / hosts_total if hosts_total else 0,
        },
        "flavors": flavors,
        "audits": audits,
    }
    current_cluster = get_current_cluster_cpu(connection)
    context["current_cluster"] = {
        "host_labels": json.dumps(current_cluster["host_labels"]),
        "cpu_current": json.dumps(current_cluster["cpu_current"]),
    }
    # Serialize audit list fields for JavaScript so cached context is render-ready
    for audit in context["audits"]:
        audit["migrations"] = json.dumps(audit["migrations"])
        audit["host_labels"] = json.dumps(audit["host_labels"])
        audit["cpu_current"] = json.dumps(audit["cpu_current"])
        audit["cpu_projected"] = json.dumps(audit["cpu_projected"])
    return context


def collect_stats():
    """Build stats dict: region, pcpu, pram, vcpu, vram, vm, flavors (no audits)."""
    connection = get_connection()
    region_name = connection._compute_region
    flavors = get_flavor_list(connection=connection)
    metrics = _fetch_prometheus_metrics()
    hosts_total = metrics.get("hosts_total") or 1
    pcpu_total = metrics.get("pcpu_total", 0)
    pcpu_usage = metrics.get("pcpu_usage", 0)
    vcpu_allocated = metrics.get("vcpu_allocated", 0)
    vcpu_overcommit_max = metrics.get("vcpu_overcommit_max", 0)
    pram_total = metrics.get("pram_total", 0)
    pram_usage = metrics.get("pram_usage", 0)
    vram_allocated = metrics.get("vram_allocated", 0)
    vram_overcommit_max = metrics.get("vram_overcommit_max", 0)
    vm_count = metrics.get("vm_count", 0)
    vm_active = metrics.get("vm_active", 0)
    vcpu_total = pcpu_total * vcpu_overcommit_max
    vram_total = pram_total * vram_overcommit_max
    return {
        "region": {"name": region_name, "hosts_total": hosts_total},
        "pcpu": {
            "total": pcpu_total,
            "usage": pcpu_usage,
            "free": pcpu_total - pcpu_usage,
            "used_percentage": (pcpu_usage / pcpu_total * 100) if pcpu_total else 0,
        },
        "vcpu": {
            "total": vcpu_total,
            "allocated": vcpu_allocated,
            "free": vcpu_total - vcpu_allocated,
            "allocated_percentage": (vcpu_allocated / vcpu_total * 100) if vcpu_total else 0,
            "overcommit_ratio": (vcpu_allocated / pcpu_total) if pcpu_total else 0,
            "overcommit_max": vcpu_overcommit_max,
        },
        "pram": {
            "total": pram_total,
            "usage": pram_usage,
            "free": pram_total - pram_usage,
            "used_percentage": (pram_usage / pram_total * 100) if pram_total else 0,
        },
        "vram": {
            "total": vram_total,
            "allocated": vram_allocated,
            "free": vram_total - vram_allocated,
            "allocated_percentage": (vram_allocated / vram_total * 100) if vram_total else 0,
            "overcommit_ratio": (vram_allocated / pram_total) if pram_total else 0,
            "overcommit_max": vram_overcommit_max,
        },
        "vm": {
            "count": vm_count,
            "active": vm_active,
            "stopped": vm_count - vm_active,
            "avg_cpu": vcpu_allocated / vm_count if vm_count else 0,
            "avg_ram": vram_allocated / vm_count if vm_count else 0,
            "density": vm_count / hosts_total if hosts_total else 0,
        },
        "flavors": flavors,
    }


def collect_audits():
    """Build audits list with serialized fields for frontend."""
    connection = get_connection()
    audits = get_audits(connection=connection)
    for audit in audits:
        audit["migrations"] = json.dumps(audit["migrations"])
        audit["host_labels"] = json.dumps(audit["host_labels"])
        audit["cpu_current"] = json.dumps(audit["cpu_current"])
        audit["cpu_projected"] = json.dumps(audit["cpu_projected"])
    return audits


def _skeleton_context():
    """Minimal context for skeleton-only index render."""
    empty_flavors = {
        "first_common_flavor": {"name": "—", "count": 0},
        "second_common_flavor": None,
        "third_common_flavor": None,
    }
    return {
        "skeleton": True,
        "region": {"name": "—", "hosts_total": 0},
        "pcpu": {"total": 0, "usage": 0, "free": 0, "used_percentage": 0},
        "pram": {"total": 0, "usage": 0, "free": 0, "used_percentage": 0},
        "vcpu": {"total": 0, "allocated": 0, "free": 0, "allocated_percentage": 0, "overcommit_ratio": 0, "overcommit_max": 0},
        "vram": {"total": 0, "allocated": 0, "free": 0, "allocated_percentage": 0, "overcommit_ratio": 0, "overcommit_max": 0},
        "vm": {"count": 0, "active": 0, "stopped": 0, "avg_cpu": 0, "avg_ram": 0, "density": 0},
        "flavors": empty_flavors,
        "audits": [],
        "current_cluster": {
            "host_labels": "[]",
            "cpu_current": "[]",
        },
    }


def index(request):
    if getattr(settings, "USE_MOCK_DATA", False):
        context = get_mock_context()
        return render(request, "index.html", context)
    context = _skeleton_context()
    return render(request, "index.html", context)


def api_stats(request):
    cache_key = "dashboard_stats"
    cache_ttl = getattr(settings, "DASHBOARD_CACHE_TTL", 120)
    data = cache.get(cache_key)
    if data is None:
        data = collect_stats()
        cache.set(cache_key, data, timeout=cache_ttl)
    return JsonResponse(data)


def api_audits(request):
    cache_key_audits = "dashboard_audits"
    cache_key_cluster = "dashboard_current_cluster"
    cache_ttl = getattr(settings, "DASHBOARD_CACHE_TTL", 120)
    audits = cache.get(cache_key_audits)
    current_cluster = cache.get(cache_key_cluster)
    if audits is None:
        audits = collect_audits()
        cache.set(cache_key_audits, audits, timeout=cache_ttl)
    if current_cluster is None:
        connection = get_connection()
        current_cluster = get_current_cluster_cpu(connection)
        cache.set(cache_key_cluster, current_cluster, timeout=cache_ttl)
    return JsonResponse({"audits": audits, "current_cluster": current_cluster})


def api_source_status(request):
    """Return status of Prometheus and OpenStack data sources (ok / error / mock)."""
    if getattr(settings, "USE_MOCK_DATA", False):
        return JsonResponse({
            "prometheus": {"status": "mock"},
            "openstack": {"status": "mock"},
        })

    cache_key = "dashboard_source_status"
    cache_ttl = getattr(settings, "SOURCE_STATUS_CACHE_TTL", 30)
    data = cache.get(cache_key)
    if data is None:
        data = {
            "prometheus": check_prometheus(),
            "openstack": check_openstack(),
        }
        cache.set(cache_key, data, timeout=cache_ttl)
    return JsonResponse(data)