mirror of
https://gitlab.com/polloloco/vgpu-proxmox.git
synced 2026-04-05 15:05:49 +03:00
Compare commits
29 Commits
450-driver
...
fdaa594afc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fdaa594afc | ||
|
|
c32b4c329f | ||
|
|
8e85d32eb0 | ||
|
|
3110f37d80 | ||
|
|
0c1d8e6bea | ||
|
|
028d78af09 | ||
|
|
9e3df0bdff | ||
|
|
e2955b232a | ||
|
|
a577cc6625 | ||
|
|
df93332349 | ||
|
|
ba197fb9ac | ||
|
|
646e599bce | ||
|
|
0e51ef508e | ||
|
|
dcf58742b8 | ||
|
|
5ec737e1a3 | ||
|
|
d1009fd47a | ||
|
|
8cef2c6082 | ||
|
|
ea99035a5b | ||
|
|
ba4b4b4787 | ||
|
|
22bd687e6d | ||
|
|
f2b1c0c1e9 | ||
|
|
7f5762abb3 | ||
|
|
e811051d09 | ||
|
|
cfd7da5ed2 | ||
|
|
cb0ed9baf3 | ||
|
|
f224154ea5 | ||
|
|
919b49ed74 | ||
|
|
dc9585f91e | ||
|
|
845180dfcd |
452
450_5.15.patch
452
450_5.15.patch
@@ -1,452 +0,0 @@
|
||||
--- ./kernel/Kbuild
|
||||
+++ ./kernel/Kbuild
|
||||
@@ -72,6 +72,7 @@ EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM -DNV_VERSION_STRING=\"450.156\" -Wno-unused-function -Wuninitialized -fno-strict-aliasing -mno-red-zone -mcmodel=kernel -DNV_UVM_ENABLE
|
||||
EXTRA_CFLAGS += $(call cc-option,-Werror=undef,)
|
||||
EXTRA_CFLAGS += -DNV_SPECTRE_V2=$(NV_SPECTRE_V2)
|
||||
+EXTRA_CFLAGS += -DNV_KERNEL_INTERFACE_LAYER -Wfatal-errors
|
||||
|
||||
#
|
||||
# Detect SGI UV systems and apply system-specific optimizations.
|
||||
|
||||
--- ./kernel/conftest.sh
|
||||
+++ ./kernel/conftest.sh
|
||||
@@ -4576,7 +4576,7 @@ case "$5" in
|
||||
#
|
||||
VERBOSE=$6
|
||||
iommu=CONFIG_VFIO_IOMMU_TYPE1
|
||||
- mdev=CONFIG_VFIO_MDEV_DEVICE
|
||||
+ mdev=CONFIG_VFIO_MDEV
|
||||
kvm=CONFIG_KVM_VFIO
|
||||
VFIO_IOMMU_PRESENT=0
|
||||
VFIO_MDEV_DEVICE_PRESENT=0
|
||||
|
||||
--- ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.c
|
||||
+++ ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.c
|
||||
@@ -24,6 +24,10 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/err.h>
|
||||
+#include <linux/eventfd.h>
|
||||
+#include <uapi/linux/uuid.h>
|
||||
+#include <linux/device.h>
|
||||
+#include <linux/mdev.h>
|
||||
#include "nvstatus.h"
|
||||
#include "nv-misc.h"
|
||||
#include "nv-linux.h"
|
||||
@@ -37,6 +41,25 @@
|
||||
struct vgpu_devs vgpu_devices;
|
||||
struct phys_devs phys_devices;
|
||||
|
||||
+struct mdev_parent {
|
||||
+ struct device *dev;
|
||||
+ const struct mdev_parent_ops *ops;
|
||||
+ struct kref ref;
|
||||
+ struct list_head next;
|
||||
+ struct kset *mdev_types_kset;
|
||||
+ struct list_head type_list;
|
||||
+ /* Synchronize device creation/removal with parent unregistration */
|
||||
+ struct rw_semaphore unreg_sem;
|
||||
+};
|
||||
+
|
||||
+struct mdev_type {
|
||||
+ struct kobject kobj;
|
||||
+ struct kobject *devices_kobj;
|
||||
+ struct mdev_parent *parent;
|
||||
+ struct list_head next;
|
||||
+ unsigned int type_group_id;
|
||||
+};
|
||||
+
|
||||
#define SLEEP_TIME_MILLISECONDS 20
|
||||
#define VGPU_EXIT_TIMEOUT_MILLISECONDS 5000
|
||||
#define WAITQUEUE_TIMEOUT_SECONDS 25000
|
||||
@@ -162,8 +185,8 @@ struct parent_ops vgpu_fops = {
|
||||
.remove = nv_vgpu_vfio_destroy,
|
||||
.read = nv_vgpu_vfio_read,
|
||||
.write = nv_vgpu_vfio_write,
|
||||
- .open = nv_vgpu_vfio_open,
|
||||
- .release = nv_vgpu_vfio_close,
|
||||
+ .open_device = nv_vgpu_vfio_open,
|
||||
+ .close_device = nv_vgpu_vfio_close,
|
||||
.ioctl = nv_vgpu_vfio_ioctl,
|
||||
.mmap = nv_vgpu_vfio_mmap,
|
||||
};
|
||||
@@ -368,9 +391,9 @@ static NV_STATUS nv_get_vgpu_type_id(const char *kobj_name, struct device *dev,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
-static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
|
||||
+static ssize_t name_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, char *buf)
|
||||
{
|
||||
- struct pci_dev *pdev = to_pci_dev(dev);
|
||||
+ struct pci_dev *pdev = to_pci_dev(mtype->parent->dev);
|
||||
struct pci_dev *parent_device;
|
||||
NvU32 vgpu_type_id;
|
||||
NV_STATUS status;
|
||||
@@ -381,7 +404,7 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
|
||||
|
||||
parent_device = pdev;
|
||||
|
||||
- if (nv_get_vgpu_type_id(kobj->name, dev, &vgpu_type_id)
|
||||
+ if (nv_get_vgpu_type_id(mtype->kobj.name, mtype->parent->dev, &vgpu_type_id)
|
||||
== NV_OK)
|
||||
status = rm_vgpu_vfio_ops.get_name(parent_device, vgpu_type_id, buf);
|
||||
else
|
||||
@@ -394,9 +417,9 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
|
||||
}
|
||||
MDEV_TYPE_ATTR_RO(name);
|
||||
|
||||
-static ssize_t description_show(struct kobject *kobj, struct device *dev, char *buf)
|
||||
+static ssize_t description_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, char *buf)
|
||||
{
|
||||
- struct pci_dev *pdev = to_pci_dev(dev);
|
||||
+ struct pci_dev *pdev = to_pci_dev(mtype->parent->dev);
|
||||
struct pci_dev *parent_device;
|
||||
NvU32 vgpu_type_id;
|
||||
NV_STATUS status;
|
||||
@@ -407,7 +430,7 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, char *
|
||||
|
||||
parent_device = pdev;
|
||||
|
||||
- if (nv_get_vgpu_type_id(kobj->name, dev, &vgpu_type_id)
|
||||
+ if (nv_get_vgpu_type_id(mtype->kobj.name, mtype->parent->dev, &vgpu_type_id)
|
||||
== NV_OK)
|
||||
status = rm_vgpu_vfio_ops.get_description(parent_device, vgpu_type_id, buf);
|
||||
else
|
||||
@@ -420,13 +443,13 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, char *
|
||||
}
|
||||
MDEV_TYPE_ATTR_RO(description);
|
||||
|
||||
-static ssize_t available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
|
||||
+static ssize_t available_instances_show(struct mdev_type *t, struct mdev_type_attribute *ta, char *buf)
|
||||
{
|
||||
- struct pci_dev *pdev = to_pci_dev(dev);
|
||||
+ struct pci_dev *pdev = to_pci_dev(t->parent->dev);
|
||||
NvU32 vgpu_type_id;
|
||||
NV_STATUS status;
|
||||
|
||||
- if ((nv_get_vgpu_type_id(kobj->name, dev, &vgpu_type_id)) == NV_OK)
|
||||
+ if ((nv_get_vgpu_type_id(t->kobj.name, t->parent->dev, &vgpu_type_id)) == NV_OK)
|
||||
status = rm_vgpu_vfio_ops.get_instances(pdev, vgpu_type_id, buf);
|
||||
else
|
||||
return -EINVAL;
|
||||
@@ -438,8 +461,7 @@ static ssize_t available_instances_show(struct kobject *kobj, struct device *dev
|
||||
}
|
||||
MDEV_TYPE_ATTR_RO(available_instances);
|
||||
|
||||
-static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
|
||||
- char *buf)
|
||||
+static ssize_t device_api_show(struct mdev_type *t, struct mdev_type_attribute *ta, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n",
|
||||
VFIO_DEVICE_API_PCI_STRING);
|
||||
@@ -534,7 +556,7 @@ destroy_exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int nv_vgpu_vfio_create(struct kobject *kobj, struct mdev_device *mdev)
|
||||
+static int nv_vgpu_vfio_create(struct mdev_device *mdev)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
vgpu_dev_t *vgpu_dev = NULL;
|
||||
@@ -556,7 +578,7 @@ static int nv_vgpu_vfio_create(struct kobject *kobj, struct mdev_device *mdev)
|
||||
if (!pdev)
|
||||
return -EINVAL;
|
||||
|
||||
- if (nv_get_vgpu_type_id(kobj->name, NV_GET_MDEV_PARENT(mdev), &vgpu_type_id)
|
||||
+ if (nv_get_vgpu_type_id(mdev->type->kobj.name, NV_GET_MDEV_PARENT(mdev), &vgpu_type_id)
|
||||
!= NV_OK)
|
||||
{
|
||||
ret = -EINVAL;
|
||||
@@ -631,12 +653,7 @@ static int nv_vgpu_vfio_create(struct kobject *kobj, struct mdev_device *mdev)
|
||||
if (pdev->is_virtfn)
|
||||
{
|
||||
#if defined(NV_MDEV_SET_IOMMU_DEVICE_PRESENT)
|
||||
- ret = mdev_set_iommu_device(NV_GET_MDEV_DEV(mdev), NV_GET_MDEV_PARENT(mdev));
|
||||
- if (ret != 0)
|
||||
- {
|
||||
- NV_VGPU_DEV_LOG(VGPU_ERR, mdev, "Failed to set IOMMU device. ret: %d \n", ret);
|
||||
- goto remove_vgpu;
|
||||
- }
|
||||
+ mdev_set_iommu_device(mdev, NV_GET_MDEV_PARENT(mdev));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2479,19 +2496,18 @@ invalidate_exit:
|
||||
|
||||
static int vgpu_save_fd(vgpu_dev_t *vgpu_dev, int fd, NvU32 index)
|
||||
{
|
||||
- struct fd irqfd;
|
||||
+ struct eventfd_ctx *evt;
|
||||
|
||||
- irqfd = fdget(fd);
|
||||
- if (!irqfd.file)
|
||||
- return -EBADF;
|
||||
+ evt = eventfd_ctx_fdget(fd);
|
||||
+ if (IS_ERR(evt))
|
||||
+ return PTR_ERR(evt);
|
||||
|
||||
if (index == VFIO_PCI_INTX_IRQ_INDEX)
|
||||
- vgpu_dev->intr_info.intx_file = irqfd.file;
|
||||
- else if (index == VFIO_PCI_MSI_IRQ_INDEX)
|
||||
- vgpu_dev->intr_info.msi_file = irqfd.file;
|
||||
+ vgpu_dev->intr_info.intx_evtfd = evt;
|
||||
+ else if (index == VFIO_PCI_MSI_IRQ_INDEX)
|
||||
+ vgpu_dev->intr_info.msi_evtfd = evt;
|
||||
|
||||
vgpu_dev->intr_info.index = index;
|
||||
- fdput(irqfd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2500,11 +2516,8 @@ static int vgpu_save_fd(vgpu_dev_t *vgpu_dev, int fd, NvU32 index)
|
||||
static irqreturn_t vgpu_msix_handler(int irq, void *arg)
|
||||
{
|
||||
vgpu_dev_t *vgpu_dev = (vgpu_dev_t *)arg;
|
||||
- struct file *pfile = NULL;
|
||||
- mm_segment_t old_fs;
|
||||
- NvU64 val = 1;
|
||||
+ struct eventfd_ctx *evt = NULL;
|
||||
int ret = 0;
|
||||
- loff_t offset = 0;
|
||||
int i;
|
||||
unsigned long eflags;
|
||||
|
||||
@@ -2512,21 +2525,16 @@ static irqreturn_t vgpu_msix_handler(int irq, void *arg)
|
||||
{
|
||||
if (vgpu_dev->intr_info.allocated_irq[i] == irq)
|
||||
{
|
||||
- pfile = vgpu_dev->intr_info.msix_fd[i].file;
|
||||
+ evt = vgpu_dev->intr_info.msix_evtfd[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- if (pfile && pfile->f_op && pfile->f_op->write)
|
||||
+ if (evt)
|
||||
{
|
||||
- old_fs = get_fs();
|
||||
- set_fs(KERNEL_DS);
|
||||
-
|
||||
NV_SAVE_FLAGS(eflags);
|
||||
- ret = pfile->f_op->write(pfile, (char *)&val, sizeof(val), &offset);
|
||||
+ ret = eventfd_signal(evt, 1);
|
||||
NV_RESTORE_FLAGS(eflags);
|
||||
-
|
||||
- set_fs(old_fs);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
@@ -2537,23 +2545,24 @@ static int vgpu_msix_set_vector_signal(vgpu_dev_t *vgpu_dev,
|
||||
{
|
||||
struct pci_dev *pdev;
|
||||
int irq = INVALID_IRQ, ret;
|
||||
- struct fd irqfd;
|
||||
+ struct eventfd_ctx *evt;
|
||||
|
||||
pdev = to_pci_dev(NV_GET_MDEV_PARENT(vgpu_dev->mdev));
|
||||
|
||||
- if (vgpu_dev->intr_info.msix_fd[vector].file)
|
||||
+ if (vgpu_dev->intr_info.msix_evtfd[vector])
|
||||
{
|
||||
free_irq(vgpu_dev->intr_info.allocated_irq[vector], vgpu_dev);
|
||||
- vgpu_dev->intr_info.msix_fd[vector].file = NULL;
|
||||
+ eventfd_ctx_put(vgpu_dev->intr_info.msix_evtfd[vector]);
|
||||
+ vgpu_dev->intr_info.msix_evtfd[vector] = NULL;
|
||||
vgpu_dev->intr_info.allocated_irq[vector] = INVALID_IRQ;
|
||||
}
|
||||
|
||||
if (fd < 0)
|
||||
return 0;
|
||||
|
||||
- irqfd = fdget(fd);
|
||||
- if (!irqfd.file)
|
||||
- return -EBADF;
|
||||
+ evt = eventfd_ctx_fdget(fd);
|
||||
+ if (IS_ERR(evt))
|
||||
+ return PTR_ERR(evt);
|
||||
|
||||
if (vector < 0 || vector >= vgpu_dev->intr_info.num_ctx)
|
||||
return -EINVAL;
|
||||
@@ -2569,7 +2578,7 @@ static int vgpu_msix_set_vector_signal(vgpu_dev_t *vgpu_dev,
|
||||
|
||||
vgpu_dev->intr_info.allocated_irq[vector] = irq;
|
||||
|
||||
- vgpu_dev->intr_info.msix_fd[vector]= irqfd;
|
||||
+ vgpu_dev->intr_info.msix_evtfd[vector]= evt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2586,7 +2595,12 @@ static void vgpu_msix_disable(vgpu_dev_t *vgpu_dev)
|
||||
if (vgpu_dev->intr_info.allocated_irq[i] != INVALID_IRQ)
|
||||
{
|
||||
free_irq(vgpu_dev->intr_info.allocated_irq[i], vgpu_dev);
|
||||
- vgpu_dev->intr_info.msix_fd[i].file = NULL;
|
||||
+
|
||||
+ if (vgpu_dev->intr_info.msix_evtfd[i]) {
|
||||
+ eventfd_ctx_put(vgpu_dev->intr_info.msix_evtfd[i]);
|
||||
+ vgpu_dev->intr_info.msix_evtfd[i] = NULL;
|
||||
+ }
|
||||
+
|
||||
vgpu_dev->intr_info.allocated_irq[i] = INVALID_IRQ;
|
||||
}
|
||||
}
|
||||
@@ -2675,7 +2689,10 @@ static int nv_vgpu_vfio_set_irqs(vgpu_dev_t *vgpu_dev, uint32_t flags,
|
||||
{
|
||||
if (flags & VFIO_IRQ_SET_DATA_NONE)
|
||||
{
|
||||
- vgpu_dev->intr_info.intx_file = NULL;
|
||||
+ if (vgpu_dev->intr_info.intx_evtfd) {
|
||||
+ eventfd_ctx_put(vgpu_dev->intr_info.intx_evtfd);
|
||||
+ vgpu_dev->intr_info.intx_evtfd = NULL;
|
||||
+ }
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2700,7 +2717,10 @@ static int nv_vgpu_vfio_set_irqs(vgpu_dev_t *vgpu_dev, uint32_t flags,
|
||||
{
|
||||
if (flags & VFIO_IRQ_SET_DATA_NONE)
|
||||
{
|
||||
- vgpu_dev->intr_info.msi_file = NULL;
|
||||
+ if (vgpu_dev->intr_info.msi_evtfd) {
|
||||
+ eventfd_ctx_put(vgpu_dev->intr_info.msi_evtfd);
|
||||
+ vgpu_dev->intr_info.msi_evtfd = NULL;
|
||||
+ }
|
||||
vgpu_dev->intr_info.index = VFIO_PCI_INTX_IRQ_INDEX;
|
||||
break;
|
||||
}
|
||||
@@ -2708,10 +2728,9 @@ static int nv_vgpu_vfio_set_irqs(vgpu_dev_t *vgpu_dev, uint32_t flags,
|
||||
if (flags & VFIO_IRQ_SET_DATA_EVENTFD)
|
||||
{
|
||||
int fd = *(int *)data;
|
||||
- if (fd > 0)
|
||||
+ if (fd > 0 && !vgpu_dev->intr_info.msi_evtfd)
|
||||
{
|
||||
- if (vgpu_dev->intr_info.msi_file == NULL)
|
||||
- ret = vgpu_save_fd(vgpu_dev, fd, index);
|
||||
+ ret = vgpu_save_fd(vgpu_dev, fd, index);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -2766,12 +2785,9 @@ exit:
|
||||
|
||||
NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef)
|
||||
{
|
||||
- mm_segment_t old_fs;
|
||||
- NvU64 val = 1;
|
||||
int ret = 0;
|
||||
- loff_t offset = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
- struct file *pfile = NULL;
|
||||
+ struct eventfd_ctx *evt = NULL;
|
||||
vgpu_dev_t *vgpu_dev = vgpuRef;
|
||||
unsigned long eflags;
|
||||
|
||||
@@ -2780,12 +2796,12 @@ NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef)
|
||||
|
||||
NV_SPIN_LOCK_IRQSAVE(&vgpu_dev->intr_info_lock, eflags);
|
||||
|
||||
- if ((vgpu_dev->intr_info.index == VFIO_PCI_MSI_IRQ_INDEX) && (vgpu_dev->intr_info.msi_file == NULL))
|
||||
+ if ((vgpu_dev->intr_info.index == VFIO_PCI_MSI_IRQ_INDEX) && (!vgpu_dev->intr_info.msi_evtfd))
|
||||
{
|
||||
NV_SPIN_UNLOCK_IRQRESTORE(&vgpu_dev->intr_info_lock, eflags);
|
||||
return NV_ERR_INVALID_REQUEST;
|
||||
}
|
||||
- else if ((vgpu_dev->intr_info.index == VFIO_PCI_INTX_IRQ_INDEX) && (vgpu_dev->intr_info.intx_file == NULL))
|
||||
+ else if ((vgpu_dev->intr_info.index == VFIO_PCI_INTX_IRQ_INDEX) && (!vgpu_dev->intr_info.intx_evtfd))
|
||||
{
|
||||
NV_SPIN_UNLOCK_IRQRESTORE(&vgpu_dev->intr_info_lock, eflags);
|
||||
return NV_ERR_INVALID_REQUEST;
|
||||
@@ -2797,9 +2813,9 @@ NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef)
|
||||
}
|
||||
|
||||
if (vgpu_dev->intr_info.index == VFIO_PCI_MSI_IRQ_INDEX)
|
||||
- pfile = vgpu_dev->intr_info.msi_file;
|
||||
+ evt = vgpu_dev->intr_info.msi_evtfd;
|
||||
else
|
||||
- pfile = vgpu_dev->intr_info.intx_file;
|
||||
+ evt = vgpu_dev->intr_info.intx_evtfd;
|
||||
|
||||
// QEMU has exited. So, safe to ignore interrupts.
|
||||
if (vgpu_dev->intr_info.ignore_interrupts == NV_TRUE)
|
||||
@@ -2809,19 +2825,14 @@ NV_STATUS nv_vgpu_inject_interrupt(void *vgpuRef)
|
||||
}
|
||||
NV_SPIN_UNLOCK_IRQRESTORE(&vgpu_dev->intr_info_lock, eflags);
|
||||
|
||||
- old_fs = get_fs();
|
||||
- set_fs(KERNEL_DS);
|
||||
-
|
||||
- if (pfile->f_op && pfile->f_op->write)
|
||||
- ret = pfile->f_op->write(pfile, (char *)&val, sizeof(val), &offset);
|
||||
- else
|
||||
- status = NV_ERR_INVALID_REQUEST;
|
||||
+ if (evt)
|
||||
+ ret = eventfd_signal(evt, 1);
|
||||
+ else
|
||||
+ status = NV_ERR_INVALID_REQUEST;
|
||||
|
||||
if (ret < 0)
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
|
||||
- set_fs(old_fs);
|
||||
-
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -4165,6 +4176,6 @@ static void __exit nv_vgpu_vfio_exit(void)
|
||||
module_init(nv_vgpu_vfio_init);
|
||||
module_exit(nv_vgpu_vfio_exit);
|
||||
|
||||
-MODULE_LICENSE("MIT");
|
||||
+MODULE_LICENSE("GPL");
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
||||
--- ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.h
|
||||
+++ ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.h
|
||||
@@ -51,7 +51,7 @@ static NV_STATUS nv_vgpu_probe(struct pci_dev *dev, NvU32, NvU32 *);
|
||||
static NV_STATUS nv_vgpu_vfio_validate_map_request(struct mdev_device *, loff_t, NvU64 *,
|
||||
NvU64 *, NvU64 *, pgprot_t *, NvBool *);
|
||||
static void nv_vgpu_remove(struct pci_dev *);
|
||||
-static int nv_vgpu_vfio_create(struct kobject *, struct mdev_device *);
|
||||
+static int nv_vgpu_vfio_create(struct mdev_device *);
|
||||
static int nv_vgpu_vfio_destroy(struct mdev_device *mdev);
|
||||
static int nv_vgpu_vfio_open(struct mdev_device *);
|
||||
static void nv_vgpu_vfio_close(struct mdev_device *);
|
||||
@@ -293,19 +293,20 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
- struct file *intx_file;
|
||||
- struct file *msi_file;
|
||||
+ struct eventfd_ctx *intx_evtfd;
|
||||
+ struct eventfd_ctx *msi_evtfd;
|
||||
int index;
|
||||
NvBool ignore_interrupts;
|
||||
|
||||
NvU32 allocated_irq[MAX_NUM_VECTORS];
|
||||
NvU32 num_ctx;
|
||||
#if defined(NV_VGPU_KVM_BUILD)
|
||||
- struct fd msix_fd[MAX_NUM_VECTORS];
|
||||
+ struct eventfd_ctx *msix_evtfd[MAX_NUM_VECTORS];
|
||||
#endif
|
||||
|
||||
} intr_info_t;
|
||||
|
||||
+
|
||||
typedef struct
|
||||
{
|
||||
NvU64 pending;
|
||||
|
||||
--- ./kernel/nvidia/nv-frontend.c
|
||||
+++ ./kernel/nvidia/nv-frontend.c
|
||||
@@ -15,7 +15,7 @@
|
||||
#include "nv-frontend.h"
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
-MODULE_LICENSE("NVIDIA");
|
||||
+MODULE_LICENSE("GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
|
||||
BIN
510.108.03.patch
Normal file
BIN
510.108.03.patch
Normal file
Binary file not shown.
BIN
510.85.03.patch
Normal file
BIN
510.85.03.patch
Normal file
Binary file not shown.
BIN
525.60.12.patch
Normal file
BIN
525.60.12.patch
Normal file
Binary file not shown.
BIN
525.85.07.patch
Normal file
BIN
525.85.07.patch
Normal file
Binary file not shown.
BIN
535.104.06.patch
Normal file
BIN
535.104.06.patch
Normal file
Binary file not shown.
BIN
535.129.03.patch
Normal file
BIN
535.129.03.patch
Normal file
Binary file not shown.
BIN
535.54.06.patch
Normal file
BIN
535.54.06.patch
Normal file
Binary file not shown.
526
README.md
526
README.md
@@ -1,15 +1,45 @@
|
||||
# NVIDIA vGPU on PVE 7.1 with a NVIDIA T1000 GPU
|
||||
# NVIDIA vGPU on Proxmox
|
||||
|
||||
This tutorial (and included patches) should allow you to use vGPU unlock on PVE 7.1 with the opt-in 5.15 Linux Kernel with a NVIDIA T1000 GPU. The GPU uses the TU117 Chip so other GPUs with the same Chip (T400, T600, GTX 1650 **NOT** Super) will probably work (no guarantees).
|
||||
[](https://www.buymeacoffee.com/polloloco)
|
||||
|
||||
### This tutorial assumes you are using a clean install of PVE 7.1, or ymmv when using an existing installation. Make sure to always have backups!
|
||||
This document serves as a guide to install NVIDIA vGPU host drivers on the latest Proxmox VE version, at time of writing this its pve 8.1.
|
||||
|
||||
You can follow this guide if you have a vGPU supported card from [this list](https://docs.nvidia.com/grid/gpus-supported-by-vgpu.html), or if you are using a consumer GPU from the GeForce series or a non-vGPU qualified Quadro GPU. There are several sections with a title similar to "Have a vGPU supported GPU? Read here" in this document, make sure to read those very carefully as this is where the instructions differ for a vGPU qualified card and a consumer card.
|
||||
|
||||
## Supported cards
|
||||
|
||||
The following consumer/not-vGPU-qualified NVIDIA GPUs can be used with vGPU:
|
||||
- Most GPUs from the Maxwell 2.0 generation (GTX 9xx, Quadro Mxxxx, Tesla Mxx) **EXCEPT the GTX 970**
|
||||
- All GPUs from the Pascal generation (GTX 10xx, Quadro Pxxxx, Tesla Pxx)
|
||||
- All GPUs from the Turing generation (GTX 16xx, RTX 20xx, Txxxx)
|
||||
|
||||
If you have GPUs from the Ampere and Ada Lovelace generation, you are out of luck, unless you have a vGPU qualified card from [this list](https://docs.nvidia.com/grid/gpus-supported-by-vgpu.html) like the A5000 or RTX 6000 Ada. If you have one of those cards, please consult the [NVIDIA documentation](https://docs.nvidia.com/grid/15.0/grid-vgpu-user-guide/index.html) for help with setting it up.
|
||||
|
||||
> **!!! THIS MEANS THAT YOUR RTX 30XX or 40XX WILL NOT WORK !!!**
|
||||
|
||||
This guide and all my tests were done on a RTX 2080 Ti which is based on the Turing architechture.
|
||||
|
||||
## Important notes before starting
|
||||
- This tutorial assumes you are using a clean install of Proxmox VE 8.1.
|
||||
- If you are using Proxmox VE 8.1, you **MUST** use 16.x drivers. Older versions only work with pve 7
|
||||
- If you tried GPU-passthrough before, you absolutely **MUST** revert all of the steps you did to set that up.
|
||||
- If you only have one GPU in your system with no iGPU, your local monitor will **NOT** give you any output anymore after the system boots up. Use SSH or a serial connection if you want terminal access to your machine.
|
||||
- Most of the steps can be applied to other linux distributions, however I'm only covering Proxmox VE here.
|
||||
|
||||
> ## Are you upgrading from a previous version of this guide?
|
||||
>
|
||||
> If you are upgrading from a previous version of this guide, you should uninstall the old driver by running `nvidia-uninstall` first.
|
||||
>
|
||||
> Then you also have to make sure that you are using the latest version of `vgpu_unlock-rs`, otherwise it won't work with the latest driver.
|
||||
>
|
||||
> Either delete the folder `/opt/vgpu_unlock-rs` or enter the folder and run `git pull` and then recompile the library again using `cargo build --release`
|
||||
|
||||
## Packages
|
||||
|
||||
Make sure to add the community pve repo and get rid of the enterprise repo (you can skip this step if you have a valid enterprise subscription)
|
||||
|
||||
```bash
|
||||
echo "deb http://download.proxmox.com/debian/pve bullseye pve-no-subscription" >> /etc/apt/sources.list
|
||||
echo "deb http://download.proxmox.com/debian/pve bookworm pve-no-subscription" >> /etc/apt/sources.list
|
||||
rm /etc/apt/sources.list.d/pve-enterprise.list
|
||||
```
|
||||
|
||||
@@ -19,33 +49,27 @@ apt update
|
||||
apt dist-upgrade
|
||||
```
|
||||
|
||||
PVE 7.1 comes with version 5.13 of the Linux Kernel, that version is incompatible with vGPU. For this guide you will have to install version 5.15, which will probably come with PVE 7.2 (~Q2 2022) but is opt-in on current PVE versions
|
||||
We need to install a few more packages like git, a compiler and some other tools.
|
||||
```bash
|
||||
apt install -y pve-kernel-5.15 pve-headers-5.15
|
||||
apt install -y git build-essential dkms pve-headers mdevctl
|
||||
```
|
||||
|
||||
Next we need to install a few more packages like git, a compiler and some other tools
|
||||
```bash
|
||||
apt install -y git build-essential dkms jq pve-headers mdevctl
|
||||
```
|
||||
|
||||
## Git repos and glorious [Rust](https://www.rust-lang.org/) compiler
|
||||
## Git repos and [Rust](https://www.rust-lang.org/) compiler
|
||||
|
||||
First, clone this repo to your home folder (in this case `/root/`)
|
||||
```bash
|
||||
git clone https://gitlab.com/polloloco/vgpu-5.15.git
|
||||
git clone https://gitlab.com/polloloco/vgpu-proxmox.git
|
||||
```
|
||||
|
||||
Clone two additional git repos for vGPU unlock
|
||||
You also need the vgpu_unlock-rs repo
|
||||
```bash
|
||||
cd /opt
|
||||
git clone https://github.com/DualCoder/vgpu_unlock
|
||||
git clone https://github.com/p0lloloco/vgpu_unlock-rs
|
||||
git clone https://github.com/mbilker/vgpu_unlock-rs.git
|
||||
```
|
||||
|
||||
After that, install the rust compiler
|
||||
```bash
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal
|
||||
```
|
||||
|
||||
Now make the rust binaries available in your $PATH (you only have to do it the first time after installing rust)
|
||||
@@ -76,40 +100,99 @@ echo -e "[Service]\nEnvironment=LD_PRELOAD=/opt/vgpu_unlock-rs/target/release/li
|
||||
echo -e "[Service]\nEnvironment=LD_PRELOAD=/opt/vgpu_unlock-rs/target/release/libvgpu_unlock_rs.so" > /etc/systemd/system/nvidia-vgpu-mgr.service.d/vgpu_unlock.conf
|
||||
```
|
||||
|
||||
> ### Have a vgpu supported card? Read here!
|
||||
>
|
||||
> If you don't have a card like the Tesla P4, or any other gpu from [this list](https://docs.nvidia.com/grid/gpus-supported-by-vgpu.html), please continue reading at [Enabling IOMMU](#enabling-iommu)
|
||||
>
|
||||
> Disable the unlock part as doing this on a gpu that already supports vgpu, could break things as it introduces unnecessary complexity and more points of possible failure:
|
||||
> ```bash
|
||||
> echo "unlock = false" > /etc/vgpu_unlock/config.toml
|
||||
> ```
|
||||
|
||||
## Enabling IOMMU
|
||||
#### Note: Usually this isn't required for vGPU to work, but it doesn't hurt to enable it. You can skip this section, but if you run into problems later on, make sure to enable IOMMU.
|
||||
|
||||
Assuming you installed PVE with ZFS-on-root and efi, you are booting with systemd-boot. All other installations use grub. The following instructions *ONLY* apply to systemd-boot, grub is different.
|
||||
To enable IOMMU you have to enable it in your BIOS/UEFI first. Due to it being vendor specific, I am unable to provide instructions for that, but usually for Intel systems the option you are looking for is called something like "Vt-d", AMD systems tend to call it "IOMMU".
|
||||
|
||||
To enable IOMMU you have to enable it in your UEFI first. Due to it being vendor specific, I am unable to provide instructions for that, but usually for Intel systems the option you are looking for is called something like "Vt-d", AMD systems tend to call it "IOMMU".
|
||||
After enabling it in your BIOS/UEFI, you also have to enable it in your kernel. Depending on how your system is booting, there are two ways to do that.
|
||||
|
||||
After enabling IOMMU in your UEFI, you have to add some options to your kernel to enable it in proxmox. Edit the kernel command line like this
|
||||
```bash
|
||||
nano /etc/kernel/cmdline
|
||||
```
|
||||
If you installed your system with ZFS-on-root and in UEFI mode, then you are using systemd-boot, everything else is GRUB. GRUB is way more common so if you are unsure, you are probably using that.
|
||||
|
||||
On a clean installation the file might look similar to this:
|
||||
```
|
||||
root=ZFS=rpool/ROOT/pve-1 boot=zfs
|
||||
```
|
||||
Depending on which system you are using to boot, you have to chose from the following two options:
|
||||
|
||||
On Intel systems, append this line at the end
|
||||
```
|
||||
intel_iommu=on iommu=pt
|
||||
```
|
||||
<details>
|
||||
<summary>GRUB</summary>
|
||||
|
||||
For AMD, use this
|
||||
```
|
||||
amd_iommu=on iommu=pt
|
||||
```
|
||||
Open the file `/etc/default/grub` in your favorite editor
|
||||
```bash
|
||||
nano /etc/default/grub
|
||||
```
|
||||
|
||||
After editing the file, it should look similar to this
|
||||
```
|
||||
root=ZFS=rpool/ROOT/pve-1 boot=zfs intel_iommu=on iommu=pt
|
||||
```
|
||||
The kernel parameters have to be appended to the variable `GRUB_CMDLINE_LINUX_DEFAULT`. On a clean installation that line should look like this
|
||||
```
|
||||
GRUB_CMDLINE_LINUX_DEFAULT="quiet"
|
||||
```
|
||||
|
||||
Save and exit using Ctrl+O and then Ctrl+X
|
||||
If you are using an Intel system, append this after `quiet`:
|
||||
```
|
||||
intel_iommu=on
|
||||
```
|
||||
|
||||
On AMD systems, you don't have to add anything and amd_iommu=on does not exist:
|
||||
https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html?highlight=amd_iommu
|
||||
|
||||
For either AMD or Intel there is an option incase you have heavy performance issues, but with the loss of security and stability of the system:
|
||||
```
|
||||
iommu=pt
|
||||
```
|
||||
|
||||
The result should look like this (for intel systems):
|
||||
```
|
||||
GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on"
|
||||
```
|
||||
|
||||
Now, save and exit from the editor using Ctrl+O and then Ctrl+X and then apply your changes:
|
||||
```bash
|
||||
update-grub
|
||||
```
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>systemd-boot</summary>
|
||||
|
||||
The kernel parameters have to be appended to the commandline in the file `/etc/kernel/cmdline`, so open that in your favorite editor:
|
||||
```bash
|
||||
nano /etc/kernel/cmdline
|
||||
```
|
||||
|
||||
On a clean installation the file might look similar to this:
|
||||
```
|
||||
root=ZFS=rpool/ROOT/pve-1 boot=zfs
|
||||
```
|
||||
|
||||
On Intel systems, append this at the end
|
||||
```
|
||||
intel_iommu=on
|
||||
```
|
||||
|
||||
On AMD systems, you don't have to add anything and amd_iommu=on does not exist:
|
||||
https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html?highlight=amd_iommu
|
||||
|
||||
For either AMD or Intel there is an option incase you have heavy performance issues, but with the loss of security and stability of the system:
|
||||
```
|
||||
iommu=pt
|
||||
```
|
||||
|
||||
After editing the file, it should look similar to this
|
||||
```
|
||||
root=ZFS=rpool/ROOT/pve-1 boot=zfs intel_iommu=on
|
||||
```
|
||||
|
||||
Now, save and exit from the editor using Ctrl+O and then Ctrl+X and then apply your changes:
|
||||
```bash
|
||||
proxmox-boot-tool refresh
|
||||
```
|
||||
</details>
|
||||
|
||||
## Loading required kernel modules and blacklisting the open source nvidia driver
|
||||
|
||||
@@ -123,11 +206,12 @@ Proxmox comes with the open source nouveau driver for nvidia gpus, however we ha
|
||||
echo "blacklist nouveau" >> /etc/modprobe.d/blacklist.conf
|
||||
```
|
||||
|
||||
## IMPORTANT: Apply our kernel configuration
|
||||
#### Note: This only applies to systemd-boot, if you are using grub, you can't use these instructions
|
||||
## Applying our kernel configuration
|
||||
|
||||
I'm not sure if this is needed, but it doesn't hurt :)
|
||||
|
||||
```bash
|
||||
proxmox-boot-tool refresh
|
||||
update-initramfs -u -k all
|
||||
```
|
||||
|
||||
...and reboot
|
||||
@@ -175,75 +259,90 @@ Depending on your mainboard and cpu, the output will be different, in my output
|
||||
|
||||
## NVIDIA Driver
|
||||
|
||||
### Choosing the right driver version
|
||||
This repo contains patches that allow you to use vGPU on not-qualified-vGPU cards (consumer GPUs). Those patches are binary patches, which means that each patch works **ONLY** for a specific driver version.
|
||||
|
||||
This is the tricky part, at the time of writing (Jan 2022), there are [three active branches](https://docs.nvidia.com/grid/) of the NVIDIA vGPU driver. The latest is branch 13 (long term support branch until mid 2024) with driver version 470. I had no luck getting *any* version of that driver to work with vGPU at all but as always - ymmv.
|
||||
I've created patches for the following driver versions:
|
||||
- 16.2 (535.129.03) - Use this if you are on pve 8.1 (kernel 6.2, 6.5 should work too)
|
||||
- 16.1 (535.104.06)
|
||||
- 16.0 (535.54.06)
|
||||
|
||||
Branch 12 is a "regular" production branch with support until January of 2022 and has driver version number 460. Lots of people are running that driver in combination with the Linux Kernel 5.15. I got it installed with my gpu, but as soon as I tried to use the gpu in my VM, the display would freeze every 30-ish seconds and `nvidia-vgpu-mgr.service` would report an error similar to `error: vmiop_log: (0x0): XID 43 detected on physical_chid:0x1c, guest_chid:0x14`. At first I thought I messed up some of the driver patches required to get the driver working on kernels newer than 5.11 - so I tried on PVE 6.4 without any patches (5.4 kernel) but got the same errors there. If anyone knows what's causing this error, or even how to fix it, **please** let me know :)
|
||||
> ### The following versions are EOL, don't use them unless you have a very specific reason!
|
||||
> - 15.1 (525.85.07)
|
||||
> - 15.0 (525.60.12)
|
||||
> - 14.4 (510.108.03)
|
||||
> - 14.3 (510.108.03)
|
||||
> - 14.2 (510.85.03)
|
||||
|
||||
Ruling out those two branches only leaves the older long term support branch 11: It is supported until mid 2023 and has the driver version 450. Like the other branch (12), you have to patch some parts of the driver to get it working on the Linux Kernel 5.15. I tried every patch I could find on the Internet (mostly twelve.patch and fourteen.patch and their variations) but no combination of them allowed me to install the driver - the installer would always complain about my system being incompatible. So I spent a few hours looking at the existing patches and reviewing the files they patch to finally come up with my own patch: Basically, it adapts twelve.patch and fourteen.patch to this older driver (they seem to be designed for the branch 12 driver) and merges them into a single patch.
|
||||
You can choose which of those you want to use, but generally its recommended to use the latest, most up-to-date version (16.2 in this case).
|
||||
|
||||
If you have a vGPU qualified GPU, you can use other versions too, because you don't need to patch the driver. However, you still have to make sure they are compatible with your proxmox version and kernel. Also I would not recommend using any older versions unless you have a very specific requirement.
|
||||
|
||||
### Obtaining the driver
|
||||
|
||||
I will be using the latest driver from branch 11 (at the time of writing that would be 11.6 / 450.156).
|
||||
|
||||
NVIDIA doesn't let you freely download vGPU drivers like they do with GeForce or normal Quadro drivers, instead you have to download them through the [NVIDIA Licensing Portal](https://nvid.nvidia.com/dashboard/) (see: [https://www.nvidia.com/en-us/drivers/vgpu-software-driver/](https://www.nvidia.com/en-us/drivers/vgpu-software-driver/)). You can sign up for a free evaluation to get access to the download page.
|
||||
|
||||
After downloading version 11.6 you should have a zip file called `NVIDIA-GRID-Linux-KVM-450.156-450.156.00-453.23.zip`, extract that and copy the file `NVIDIA-Linux-x86_64-450.156-vgpu-kvm.run` to your PVE host into the `/root/` folder
|
||||
```bash
|
||||
scp NVIDIA-Linux-x86_64-450.156-vgpu-kvm.run root@pve:/root/
|
||||
```
|
||||
NB: When applying for an eval license, do NOT use your personal email or other email at a free email provider like gmail.com. You will probably have to go through manual review if you use such emails. I have very good experience using a custom domain for my email address, that way the automatic verification usually lets me in after about five minutes.
|
||||
|
||||
I've created a small video tutorial to find the right driver version on the NVIDIA Enterprise Portal. In the video I'm downloading the 15.0 driver, if you want a different one just replace 15.0 with the version you want:
|
||||
|
||||

|
||||
|
||||
After downloading, extract the zip file and then copy the file called `NVIDIA-Linux-x86_64-DRIVERVERSION-vgpu-kvm.run` (where DRIVERVERSION is a string like `535.129.03`) from the `Host_Drivers` folder to your Proxmox host into the `/root/` folder using tools like FileZilla, WinSCP, scp or rsync.
|
||||
|
||||
### ⚠️ From here on, I will be using the 16.2 driver, but the steps are the same for other driver versions
|
||||
|
||||
For example when I run a command like `chmod +x NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm.run`, you should replace `535.129.03` with the driver version you are using (if you are using a different one). You can get the list of version numbers [here](#nvidia-driver).
|
||||
|
||||
Every step where you potentially have to replace the version name will have this warning emoji next to it: ⚠️
|
||||
|
||||
> ### Have a vgpu supported card? Read here!
|
||||
>
|
||||
> If you don't have a card like the Tesla P4, or any other gpu from [this list](https://docs.nvidia.com/grid/gpus-supported-by-vgpu.html), please continue reading at [Patching the driver](#patching-the-driver)
|
||||
>
|
||||
> With a supported gpu, patching the driver is not needed, so you should skip the next section. You can simply install the driver package like this:
|
||||
>
|
||||
> ⚠️
|
||||
> ```bash
|
||||
> chmod +x NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm.run
|
||||
> ./NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm.run --dkms
|
||||
> ```
|
||||
>
|
||||
> To finish the installation, reboot the system
|
||||
> ```bash
|
||||
> reboot
|
||||
> ```
|
||||
>
|
||||
> Now, skip the following two sections and continue at [Finishing touches](#finishing-touches)
|
||||
|
||||
### Patching the driver
|
||||
|
||||
Now, on the proxmox host, make the driver executable
|
||||
|
||||
⚠️
|
||||
```bash
|
||||
chmod +x NVIDIA-Linux-x86_64-450.156-vgpu-kvm.run
|
||||
chmod +x NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm.run
|
||||
```
|
||||
|
||||
And then unpack it
|
||||
And then patch it
|
||||
|
||||
⚠️
|
||||
```bash
|
||||
./NVIDIA-Linux-x86_64-450.156-vgpu-kvm.run -x
|
||||
./NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm.run --apply-patch ~/vgpu-proxmox/535.129.03.patch
|
||||
```
|
||||
That should output a lot of lines ending with
|
||||
```
|
||||
Self-extractible archive "NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm-custom.run" successfully created.
|
||||
```
|
||||
|
||||
Go inside the extracted folder
|
||||
```bash
|
||||
cd NVIDIA-Linux-x86_64-450.156-vgpu-kvm/
|
||||
```
|
||||
|
||||
To be able to install the driver on your proxmox host, apply the driver patch
|
||||
```bash
|
||||
patch -p0 < ~/vgpu-5.15/450_5.15.patch
|
||||
```
|
||||
|
||||
If everything went right (and you are using the exact same nvidia driver version 11.6), the output should be exactly this
|
||||
```
|
||||
patching file ./kernel/Kbuild
|
||||
patching file ./kernel/conftest.sh
|
||||
patching file ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.c
|
||||
patching file ./kernel/nvidia-vgpu-vfio/nvidia-vgpu-vfio.h
|
||||
patching file ./kernel/nvidia/nv-frontend.c
|
||||
```
|
||||
|
||||
There is a second patch you need to apply.
|
||||
|
||||
#### Warning: If you followed every step of this tutorial it should be safe to just apply it, but if you did anything different than I, you should check if the paths in the patch are valid for you.
|
||||
|
||||
```bash
|
||||
patch -p0 < ~/vgpu-5.15/unlock.patch
|
||||
```
|
||||
|
||||
The output should be exactly this
|
||||
```
|
||||
patching file ./kernel/nvidia/nvidia.Kbuild
|
||||
patching file ./kernel/nvidia/os-interface.c
|
||||
```
|
||||
You should now have a file called `NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm-custom.run`, that is your patched driver.
|
||||
|
||||
### Installing the driver
|
||||
|
||||
Now that all the required patches are applied, you can install the driver
|
||||
Now that the required patch is applied, you can install the driver
|
||||
|
||||
⚠️
|
||||
```bash
|
||||
./nvidia-installer --dkms
|
||||
./NVIDIA-Linux-x86_64-535.129.03-vgpu-kvm-custom.run --dkms
|
||||
```
|
||||
|
||||
The installer will ask you `Would you like to register the kernel module sources with DKMS? This will allow DKMS to automatically build a new module, if you install a different kernel later.`, answer with `Yes`.
|
||||
@@ -252,7 +351,7 @@ Depending on your hardware, the installation could take a minute or two.
|
||||
|
||||
If everything went right, you will be presented with this message.
|
||||
```
|
||||
Installation of the NVIDIA Accelerated Graphics Driver for Linux-x86_64 (version: 450.156) is now complete.
|
||||
Installation of the NVIDIA Accelerated Graphics Driver for Linux-x86_64 (version: 535.129.03) is now complete.
|
||||
```
|
||||
|
||||
Click `Ok` to exit the installer.
|
||||
@@ -271,16 +370,16 @@ nvidia-smi
|
||||
|
||||
You should get an output similar to this one
|
||||
```
|
||||
Mon Jan 3 20:41:15 2022
|
||||
Tue Jan 24 20:21:28 2023
|
||||
+-----------------------------------------------------------------------------+
|
||||
| NVIDIA-SMI 450.156 Driver Version: 450.156 CUDA Version: N/A |
|
||||
| NVIDIA-SMI 525.85.07 Driver Version: 525.85.07 CUDA Version: N/A |
|
||||
|-------------------------------+----------------------+----------------------+
|
||||
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
|
||||
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|
||||
| | | MIG M. |
|
||||
|===============================+======================+======================|
|
||||
| 0 T1000 On | 00000000:01:00.0 Off | N/A |
|
||||
| 32% 39C P8 N/A / 50W | 30MiB / 4095MiB | 0% Default |
|
||||
| 0 NVIDIA GeForce ... On | 00000000:01:00.0 Off | N/A |
|
||||
| 26% 33C P8 43W / 260W | 85MiB / 11264MiB | 0% Default |
|
||||
| | | N/A |
|
||||
+-------------------------------+----------------------+----------------------+
|
||||
|
||||
@@ -321,40 +420,211 @@ The output will be similar to this
|
||||
|
||||
If this command doesn't return any output, vGPU unlock isn't working.
|
||||
|
||||
### Bonus: working `nvidia-smi vgpu` command
|
||||
|
||||
I've included an adapted version of the `nvidia-smi` [wrapper script](https://github.com/erin-allison/nvidia-merged-arch/blob/d2ce752cd38461b53b7e017612410a3348aa86e5/nvidia-smi) to get useful output from `nvidia-smi vgpu`.
|
||||
|
||||
Without that wrapper script, running `nvidia-smi vgpu` in your shell results in this output
|
||||
```
|
||||
No supported devices in vGPU mode
|
||||
```
|
||||
|
||||
With the wrapper script, the output looks similar to this
|
||||
```
|
||||
Mon Jan 3 20:54:35 2022
|
||||
+-----------------------------------------------------------------------------+
|
||||
| NVIDIA-SMI 450.156 Driver Version: 450.156 |
|
||||
|---------------------------------+------------------------------+------------+
|
||||
| GPU Name | Bus-Id | GPU-Util |
|
||||
| vGPU ID Name | VM ID VM Name | vGPU-Util |
|
||||
|=================================+==============================+============|
|
||||
| 0 T1000 | 00000000:01:00.0 | 0% |
|
||||
+---------------------------------+------------------------------+------------+
|
||||
```
|
||||
|
||||
To install this script, copy the `nvidia-smi` file from this repo to `/usr/local/bin` and make it executable
|
||||
```bash
|
||||
cp ~/vgpu-5.15/nvidia-smi /usr/local/bin/
|
||||
chmod +x /usr/local/bin/nvidia-smi
|
||||
```
|
||||
|
||||
Run this in your shell (you might have to logout and back in first) to see if it worked
|
||||
Another command you can try to see if your card is recognized as being vgpu enabled is this one:
|
||||
```bash
|
||||
nvidia-smi vgpu
|
||||
```
|
||||
|
||||
## Credits
|
||||
If everything worked right with the unlock, the output should be similar to this:
|
||||
```
|
||||
Tue Jan 24 20:21:43 2023
|
||||
+-----------------------------------------------------------------------------+
|
||||
| NVIDIA-SMI 525.85.07 Driver Version: 525.85.07 |
|
||||
|---------------------------------+------------------------------+------------+
|
||||
| GPU Name | Bus-Id | GPU-Util |
|
||||
| vGPU ID Name | VM ID VM Name | vGPU-Util |
|
||||
|=================================+==============================+============|
|
||||
| 0 NVIDIA GeForce RTX 208... | 00000000:01:00.0 | 0% |
|
||||
+---------------------------------+------------------------------+------------+
|
||||
```
|
||||
|
||||
However, if you get this output, then something went wrong
|
||||
```
|
||||
No supported devices in vGPU mode
|
||||
```
|
||||
|
||||
If any of those commands give the wrong output, you cannot continue. Please make sure to read everything here very carefully and when in doubt, create an issue or join the [discord server](#support) and ask for help there.
|
||||
|
||||
## vGPU overrides
|
||||
|
||||
Further up we have created the file `/etc/vgpu_unlock/profile_override.toml` and I didn't explain what it was for yet. Using that file you can override lots of parameters for your vGPU instances: For example you can change the maximum resolution, enable/disable the frame rate limiter, enable/disable support for CUDA or change the vram size of your virtual gpus.
|
||||
|
||||
If we take a look at the output of `mdevctl types` we see lots of different types that we can choose from. However, if we for example chose `GRID RTX6000-4Q` which gives us 4GB of vram in a VM, we are locked to that type for all of our VMs. Meaning we can only have 4GB VMs, its not possible to mix different types to have one 4GB VM, and two 2GB VMs.
|
||||
|
||||
> ### Important notes
|
||||
>
|
||||
> Q profiles *can* give you horrible performance in OpenGL applications/games. To fix that, switch to an equivalent A or B profile (for example `GRID RTX6000-4B`)
|
||||
>
|
||||
> C profiles (for example `GRID RTX6000-4C`) only work on Linux, don't try using those on Windows, it will not work - at all.
|
||||
>
|
||||
> A profiles (for example `GRID RTX6000-4A`) will NOT work on Linux, they only work on Windows.
|
||||
|
||||
All of that changes with the override config file. Technically we are still locked to only using one profile, but now its possible to change the vram of the profile on a VM basis so even though we have three `GRID RTX6000-4Q` instances, one VM can have 4GB or vram but we can override the vram size for the other two VMs to only 2GB.
|
||||
|
||||
Lets take a look at this example config override file (its in TOML format)
|
||||
```toml
|
||||
[profile.nvidia-259]
|
||||
num_displays = 1 # Max number of virtual displays. Usually 1 if you want a simple remote gaming VM
|
||||
display_width = 1920 # Maximum display width in the VM
|
||||
display_height = 1080 # Maximum display height in the VM
|
||||
max_pixels = 2073600 # This is the product of display_width and display_height so 1920 * 1080 = 2073600
|
||||
cuda_enabled = 1 # Enables CUDA support. Either 1 or 0 for enabled/disabled
|
||||
frl_enabled = 1 # This controls the frame rate limiter, if you enable it your fps in the VM get locked to 60fps. Either 1 or 0 for enabled/disabled
|
||||
framebuffer = 0x74000000
|
||||
framebuffer_reservation = 0xC000000 # In combination with the framebuffer size
|
||||
# above, these two lines will give you a VM
|
||||
# with 2GB of VRAM (framebuffer + framebuffer_reservation = VRAM size in bytes).
|
||||
# See below for some other sizes
|
||||
|
||||
[vm.100]
|
||||
frl_enabled = 0
|
||||
# You can override all the options from above here too. If you want to add more overrides for a new VM, just copy this block and change the VM ID
|
||||
```
|
||||
|
||||
There are two blocks here, the first being `[profile.nvidia-259]` and the second `[vm.100]`.
|
||||
The first one applies the overrides to all VM instances of the `nvidia-259` type (thats `GRID RTX6000-4Q`) and the second one applies its overrides only to one specific VM, that one with the proxmox VM ID `100`.
|
||||
|
||||
The proxmox VM ID is the same number that you see in the proxmox webinterface, next to the VM name.
|
||||
|
||||
You don't have to specify all parameters, only the ones you need/want. There are some more that I didn't mention here, you can find them by going through the source code of the `vgpu_unlock-rs` repo.
|
||||
|
||||
For a simple 1080p remote gaming VM I recommend going with something like this
|
||||
```toml
|
||||
[profile.nvidia-259] # choose the profile you want here
|
||||
num_displays = 1
|
||||
display_width = 1920
|
||||
display_height = 1080
|
||||
max_pixels = 2073600
|
||||
```
|
||||
|
||||
### Common VRAM sizes
|
||||
|
||||
Here are some common framebuffer sizes that you might want to use:
|
||||
|
||||
- 512MB:
|
||||
```toml
|
||||
framebuffer = 0x1A000000
|
||||
framebuffer_reservation = 0x6000000
|
||||
```
|
||||
- 1GB:
|
||||
```toml
|
||||
framebuffer = 0x38000000
|
||||
framebuffer_reservation = 0x8000000
|
||||
```
|
||||
- 2GB:
|
||||
```toml
|
||||
framebuffer = 0x74000000
|
||||
framebuffer_reservation = 0xC000000
|
||||
```
|
||||
- 3GB:
|
||||
```toml
|
||||
framebuffer = 0xB0000000
|
||||
framebuffer_reservation = 0x10000000
|
||||
```
|
||||
- 4GB:
|
||||
```toml
|
||||
framebuffer = 0xEC000000
|
||||
framebuffer_reservation = 0x14000000
|
||||
```
|
||||
- 5GB:
|
||||
```toml
|
||||
framebuffer = 0x128000000
|
||||
framebuffer_reservation = 0x18000000
|
||||
```
|
||||
- 6GB:
|
||||
```toml
|
||||
framebuffer = 0x164000000
|
||||
framebuffer_reservation = 0x1C000000
|
||||
```
|
||||
- 8GB:
|
||||
```toml
|
||||
framebuffer = 0x1DC000000
|
||||
framebuffer_reservation = 0x24000000
|
||||
```
|
||||
- 10GB:
|
||||
```toml
|
||||
framebuffer = 0x254000000
|
||||
framebuffer_reservation = 0x2C000000
|
||||
```
|
||||
- 12GB:
|
||||
```toml
|
||||
framebuffer = 0x2CC000000
|
||||
framebuffer_reservation = 0x34000000
|
||||
```
|
||||
- 16GB:
|
||||
```toml
|
||||
framebuffer = 0x3BC000000
|
||||
framebuffer_reservation = 0x44000000
|
||||
```
|
||||
- 20GB:
|
||||
```toml
|
||||
framebuffer = 0x4AC000000
|
||||
framebuffer_reservation = 0x54000000
|
||||
```
|
||||
- 24GB:
|
||||
```toml
|
||||
framebuffer = 0x59C000000
|
||||
framebuffer_reservation = 0x64000000
|
||||
```
|
||||
- 32GB:
|
||||
```toml
|
||||
framebuffer = 0x77C000000
|
||||
framebuffer_reservation = 0x84000000
|
||||
```
|
||||
- 48GB:
|
||||
```toml
|
||||
framebuffer = 0xB2D200000
|
||||
framebuffer_reservation = 0xD2E00000
|
||||
```
|
||||
|
||||
`framebuffer` and `framebuffer_reservation` will always equal the VRAM size in bytes when added together.
|
||||
|
||||
## Adding a vGPU to a Proxmox VM
|
||||
|
||||
Go to the proxmox webinterface, go to your VM, then to `Hardware`, then to `Add` and select `PCI Device`.
|
||||
You should be able to choose from a list of pci devices. Choose your GPU there, its entry should say `Yes` in the `Mediated Devices` column.
|
||||
|
||||
Now you should be able to also select the `MDev Type`. Choose whatever profile you want, if you don't remember which one you want, you can see the list of all available types with `mdevctl types`.
|
||||
|
||||
Finish by clicking `Add`, start the VM and install the required drivers. After installing the drivers you can shut the VM down and remove the virtual display adapter by selecting `Display` in the `Hardware` section and selecting `none (none)`. ONLY do that if you have some other way to access the Virtual Machine like Parsec or Remote Desktop because the Proxmox Console won't work anymore.
|
||||
|
||||
Enjoy your new vGPU VM :)
|
||||
|
||||
## Licensing
|
||||
|
||||
Usually a license is required to use vGPU, but luckily the community found several ways around that. Spoofing the vGPU instance to a Quadro GPU used to be very popular, but I don't recommend it anymore. I've also removed the related sections from this guide. If you still want it for whatever reason, you can go back in the commit history to find the instructions on how to use that.
|
||||
|
||||
The recommended way to get around the license is to set up your own license server. Follow the instructions [here](https://git.collinwebdesigns.de/oscar.krause/fastapi-dls) (or [here](https://gitea.publichub.eu/oscar.krause/fastapi-dls) if the other link is down).
|
||||
|
||||
## Common problems
|
||||
|
||||
Most problems can be solved by reading the instructions very carefully. For some very common problems, read here:
|
||||
|
||||
- The nvidia driver won't install/load
|
||||
- If you were using gpu passthrough before, revert **ALL** of the steps you did or start with a fresh proxmox installation. If you run `lspci -knnd 10de:` and see `vfio-pci` under `Kernel driver in use:` then you have to fix that
|
||||
- Make sure that you are using a supported kernel version (check `uname -a`)
|
||||
- My OpenGL performance is absolute garbage, what can I do?
|
||||
- Read [here](#important-notes)
|
||||
- `mdevctl types` doesn't output anything, how to fix it?
|
||||
- Make sure that you don't have unlock disabled if you have a consumer gpu ([more information](#have-a-vgpu-supported-card-read-here))
|
||||
- vGPU doesn't work on my RTX 3080! What to do?
|
||||
- [Learn to read](#your-rtx-30xx-or-40xx-will-not-work-at-this-point-in-time)
|
||||
- Make sure that you don't have any dummy plugs connected to the GPU ports, they may cause problems as [reported by a user from the vgpu discord](https://discord.com/channels/829786927829745685/1182258311014400040/1187339682082721822)
|
||||
|
||||
## Support
|
||||
|
||||
If something isn't working, please create an issue or join the [Discord server](https://discord.gg/5rQsSV3Byq) and ask for help in the `#proxmox-support` channel so that the community can help you.
|
||||
|
||||
> ### DO NOT SEND ME A DM, I'M NOT YOUR PERSONAL SUPPORT
|
||||
|
||||
When asking for help, please describe your problem in detail instead of just saying "vgpu doesn't work". Usually a rough overview over your system (gpu, mainboard, proxmox version, kernel version, ...) and full output of `dmesg` and/or `journalctl --no-pager -b 0 -u nvidia-vgpu-mgr.service` (<-- this only after starting the VM that causes trouble) is helpful.
|
||||
Please also provide the output of `uname -a` and `cat /proc/cmdline`
|
||||
|
||||
## Feed my coffee addiction ☕
|
||||
|
||||
If you found this guide helpful and want to support me, please feel free to [buy me a coffee](https://www.buymeacoffee.com/polloloco). Thank you very much!
|
||||
|
||||
## Further reading
|
||||
|
||||
Thanks to all these people (in no particular order) for making this project possible
|
||||
- [DualCoder](https://github.com/DualCoder) for his original [vgpu_unlock](https://github.com/DualCoder/vgpu_unlock) repo with the kernel hooks
|
||||
@@ -364,13 +634,9 @@ Thanks to all these people (in no particular order) for making this project poss
|
||||
- [rupansh](https://github.com/rupansh) for the original [twelve.patch](https://github.com/rupansh/vgpu_unlock_5.12/blob/master/twelve.patch) to patch the driver on kernels >= 5.12
|
||||
- mbuchel#1878 on the [GPU Unlocking discord](https://discord.gg/5rQsSV3Byq) for [fourteen.patch](https://gist.github.com/erin-allison/5f8acc33fa1ac2e4c0f77fdc5d0a3ed1) to patch the driver on kernels >= 5.14
|
||||
- [erin-allison](https://github.com/erin-allison) for the [nvidia-smi wrapper script](https://github.com/erin-allison/nvidia-merged-arch/blob/d2ce752cd38461b53b7e017612410a3348aa86e5/nvidia-smi)
|
||||
- LIL'pingu#9069 on the [GPU Unlocking discord](https://discord.gg/5rQsSV3Byq) for his patch to nop out code that NVIDIA added to prevent usage of drivers with a version 460 - 470 with consumer cards
|
||||
|
||||
If I forgot to mention someone, please create an issue or let me know otherwise.
|
||||
|
||||
## TODO (soon tm)
|
||||
|
||||
- Add basic profile_override.toml config
|
||||
- Add proxmox VM installation guide
|
||||
|
||||
## Contributing
|
||||
Pull requests are welcome (factual errors, amendments, grammar/spelling mistakes etc).
|
||||
BIN
downloading_driver.mp4
Normal file
BIN
downloading_driver.mp4
Normal file
Binary file not shown.
12
nvidia-smi
12
nvidia-smi
@@ -1,12 +0,0 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
for a in $*
|
||||
do
|
||||
case $a in
|
||||
vgpu)
|
||||
export LD_PRELOAD="/opt/vgpu_unlock-rs/target/release/libvgpu_unlock_rs.so"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
exec /usr/bin/nvidia-smi $@
|
||||
17
unlock.patch
17
unlock.patch
@@ -1,17 +0,0 @@
|
||||
--- ./kernel/nvidia/nvidia.Kbuild
|
||||
+++ ./kernel/nvidia/nvidia.Kbuild
|
||||
@@ -203,3 +203,4 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += pm_runtime_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += pci_class_multimedia_hd_audio
|
||||
+ldflags-y += -T /opt/vgpu_unlock/kern.ld
|
||||
|
||||
--- ./kernel/nvidia/os-interface.c
|
||||
+++ ./kernel/nvidia/os-interface.c
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
#include "nv-time.h"
|
||||
|
||||
-
|
||||
+#include "/opt/vgpu_unlock/vgpu_unlock_hooks.c"
|
||||
|
||||
Reference in New Issue
Block a user