Same issue as #5 - https://github.com/mbilker/vgpu_unlock-rs/issues/5
Host Setup:
- Proxmox 7.2 on Debian
- Quadro RTX 4000
Guest Setup
- 20.04 ubuntu desktop
- Quadro RTX 6000
Followed Jeff (Craft Computings) guide on this software. Able to get list using "mdevctl types". Looking to split RTX4000 into 4x instances of 2Gb each
Error
root@p53:~# qm start 311
no efidisk configured! Using temporary efivars disk.
mdev instance '00000000-0000-0000-0000-000000000311' already existed, using it.
kvm: -device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:01:00.0/00000000-0000-0000-0000-000000000311,id=hostpci0,bus=pci.0,addr=0x10: warning: vfio 00000000-0000-0000-0000-000000000311: Could not enable error recovery for the device
kvm: -device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:01:00.0/00000000-0000-0000-0000-000000000311,id=hostpci0,bus=pci.0,addr=0x10: vfio 00000000-0000-0000-0000-000000000311: failed to read device config space: Bad address
start failed: QEMU exited with code 1
root@p53:~#
.
Setup
.
root@p53:~# nvidia-smi
Mon May 23 17:13:41 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03 Driver Version: 510.47.03 CUDA Version: N/A |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 Quadro RTX 4000 On | 00000000:01:00.0 Off | N/A |
| N/A 42C P8 15W / N/A | 54MiB / 8192MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
root@p53:~#
.
root@p53:~# systemctl stop nvidia-vgpu-mgr.service
root@p53:~# systemctl status nvidia-vgpu-mgr.service
● nvidia-vgpu-mgr.service - NVIDIA vGPU Manager Daemon
Loaded: loaded (/lib/systemd/system/nvidia-vgpu-mgr.service; enabled; vendor preset: enabled)
Drop-In: /etc/systemd/system/nvidia-vgpu-mgr.service.d
└─vgpu_unlock.conf
Active: inactive (dead) since Mon 2022-05-23 17:04:21 +08; 39s ago
Process: 48056 ExecStart=/usr/bin/nvidia-vgpu-mgr (code=exited, status=0/SUCCESS)
Process: 53989 ExecStopPost=/bin/rm -rf /var/run/nvidia-vgpu-mgr (code=exited, status=0/SUCCESS)
Main PID: 48057 (code=exited, status=0/SUCCESS)
CPU: 18ms
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_log: /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xea) [0x7f>
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_log: vgpu(+0x3c1d) [0x562973203c1d]
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_log: (0x0): init_device_instance failed for inst 0 with error 3 (v>
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_log: (0x0): Initialization: init_device_instance failed error 3
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_log: display_init failed for inst: 0
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_env_log: (0x0): vmiope_process_configuration: plugin registration >
May 23 16:47:35 p53.rubicon.local nvidia-vgpu-mgr[49013]: error: vmiop_env_log: (0x0): vmiope_process_configuration failed with 0x5b
May 23 17:04:20 p53.rubicon.local systemd[1]: Stopping NVIDIA vGPU Manager Daemon...
May 23 17:04:21 p53.rubicon.local systemd[1]: nvidia-vgpu-mgr.service: Succeeded.
May 23 17:04:21 p53.rubicon.local systemd[1]: Stopped NVIDIA vGPU Manager Daemon.
root@p53:~# systemctl start nvidia-vgpu-mgr.service
root@p53:~# systemctl status nvidia-vgpu-mgr.service
● nvidia-vgpu-mgr.service - NVIDIA vGPU Manager Daemon
Loaded: loaded (/lib/systemd/system/nvidia-vgpu-mgr.service; enabled; vendor preset: enabled)
Drop-In: /etc/systemd/system/nvidia-vgpu-mgr.service.d
└─vgpu_unlock.conf
Active: active (running) since Mon 2022-05-23 17:05:34 +08; 3s ago
Process: 54378 ExecStart=/usr/bin/nvidia-vgpu-mgr (code=exited, status=0/SUCCESS)
Main PID: 54379 (nvidia-vgpu-mgr)
Tasks: 1 (limit: 76740)
Memory: 304.0K
CPU: 6ms
CGroup: /system.slice/nvidia-vgpu-mgr.service
└─54379 /usr/bin/nvidia-vgpu-mgr
May 23 17:05:34 p53.rubicon.local systemd[1]: Starting NVIDIA vGPU Manager Daemon...
May 23 17:05:34 p53.rubicon.local systemd[1]: Started NVIDIA vGPU Manager Daemon.
May 23 17:05:34 p53.rubicon.local nvidia-vgpu-mgr[54379]: notice: vmiop_env_log: nvidia-vgpu-mgr daemon started
root@p53:~#
.
root@p53:~# systemctl status nvidia-vgpud.service
● nvidia-vgpud.service - NVIDIA vGPU Daemon
Loaded: loaded (/lib/systemd/system/nvidia-vgpud.service; enabled; vendor preset: enabled)
Drop-In: /etc/systemd/system/nvidia-vgpud.service.d
└─vgpu_unlock.conf
Active: inactive (dead) since Mon 2022-05-23 17:16:45 +08; 1s ago
Process: 58289 ExecStart=/usr/bin/nvidia-vgpud (code=exited, status=0/SUCCESS)
Process: 58291 ExecStopPost=/bin/rm -rf /var/run/nvidia-vgpud (code=exited, status=0/SUCCESS)
Main PID: 58290 (code=exited, status=0/SUCCESS)
CPU: 245ms
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: BAR1 Length: 0x100
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: Frame Rate Limiter enabled: 0x1
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: Number of Displays: 1
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: Max pixels: 1310720
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: Display: width 1280, height 1024
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: License: GRID-Virtual-Apps,3.0
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: PID file unlocked.
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: PID file closed.
May 23 17:16:45 p53.rubicon.local nvidia-vgpud[58290]: Shutdown (58290)
May 23 17:16:45 p53.rubicon.local systemd[1]: nvidia-vgpud.service: Succeeded.
root@p53:~#
.
root@p53:~# cat /etc/vgpu_unlock/profile_override.toml
[profile.nvidia-257]
num_displays = 1
display_width = 1920
display_height = 1080
max_pixels = 2073600
cuda_enabled = 1
frl_enabled = 60
framebuffer = 1968526677
pci_id = 0x1E3012BA
pci_device_id = 0x1E30
root@p53:~#
.
root@p53:~# cat /etc/pve/nodes/p53/qemu-server/311.conf
agent: 1
args: -uuid 00000000-0000-0000-0000-000000000311
bios: ovmf
boot: order=scsi0;ide2;net0
cores: 4
hostpci0: 0000:01:00.0,mdev=nvidia-257
ide2: local:iso/ubuntu-20.04.4-desktop-amd64.iso,media=cdrom,size=3299872K
machine: q35
memory: 2048
meta: creation-qemu=6.2.0,ctime=1653292108
name: u311.u2004
net0: virtio=26:69:CE:5F:52:16,bridge=vmbr1,firewall=1,tag=30
numa: 0
ostype: l26
scsi0: local:311/vm-311-disk-0.qcow2,size=60G
scsihw: virtio-scsi-pci
smbios1: uuid=d2e8b2af-319f-44a5-aeb5-c0c449b7d92d
sockets: 1
vmgenid: f2a8f0f4-7b18-4a9c-903e-c4161e6d7abc
root@p53:~#
.
Even if I proceed with the Guest VM (ubuntu 20.04) setup and install the nvidia driver, the driver will not run
(from guest vm)
$ sudo apt install nvidia-driver-470
$ nvidia-smi
NVIDIA_SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running
Would appreciate any help to move this along. Thanks!