PCI Reset Bug unter Ryzen + KVM VFIO PCIe Passthrough

Hast Du Probleme mit Hardware, die durch die anderen Foren nicht abgedeckt werden? Schau auch in den "Tipps und Tricks"-Bereich.
Antworten
Knogle
Beiträge: 465
Registriert: 06.05.2016 19:29:00
Lizenz eigener Beiträge: MIT Lizenz

PCI Reset Bug unter Ryzen + KVM VFIO PCIe Passthrough

Beitrag von Knogle » 31.01.2021 00:01:53

Ich grüße euch Freunde.

Aktuell habe ich ein Ryzen System am laufen auf Debian, und recht viele KVM Guests. Das ganze läuft prima. Nun habe ich eine SAS Karte + Expander, und eine 56G Infiniband Karte welche ich an meinen Guest durchreichen möchte um RDMA und zu fahren.
Nun ist jedoch das Problem , ich kann die Karten attachen (SAS + Expander) und die Netzwerkkarte, jedoch bekomme ich dann im Zielsystem, in dem BSD, ein Kernel Panic. Da bin ich mir nicht sicher ob es sich um ein Problem beim durchreichen handelt, oder um ein Problem mit den Treibern im Zielsystem, was jedoch aufgrund der super Unterstützung und meinen bisherigen Erfahrungen damit eher rausfällt.

Nun habe ich mir statt meiner Mellanox ConnectX2 nun testweise eine Mellanox ConnectX3 gekauft um zu schauen wie es damit klappt, jedoch tritt dort wohl der PCI Reset Bug unter Ryzen auf wodurch die PCIe devices nicht korrekt resettet werden, und daher nicht eingebunden werden können.
Das führt meistens auch kurz dannach zu einem Crash des Host-Systems.
So sieht das ganze dann aus:

Code: Alles auswählen

nternal error: Unknown PCI header type '127' for device '0000:2b:00.0'
Hier mal ein

Code: Alles auswählen

virsh nodedev-dumpxml

Code: Alles auswählen

virsh nodedev-dumpxml pci_0000_2b_00_0 
<device>
  <name>pci_0000_2b_00_0</name>
  <path>/sys/devices/pci0000:00/0000:00:03.1/0000:2b:00.0</path>
  <parent>pci_0000_00_03_1</parent>
  <driver>
    <name>vfio-pci</name>
  </driver>
  <capability type='pci'>
    <class>0x020000</class>
    <domain>0</domain>
    <bus>43</bus>
    <slot>0</slot>
    <function>0</function>
    <product id='0x1003'>MT27500 Family [ConnectX-3]</product>
    <vendor id='0x15b3'>Mellanox Technologies</vendor>
    <iommuGroup number='14'>
      <address domain='0x0000' bus='0x2b' slot='0x00' function='0x0'/>
    </iommuGroup>
    <pci-express>
      <link validity='cap' port='8' speed='8' width='4'/>
      <link validity='sta' speed='8' width='4'/>
    </pci-express>
  </capability>
</device>

Entsprechend angepasst die XML um das device mit

Code: Alles auswählen

virsh attach-device
dem Guest hinzuzufügen.

Code: Alles auswählen

<hostdev mode='subsystem' type='pci' managed='yes'>
  <source>
      <address domain='0x0000' bus='0x2b' slot='0x00' function='0x00'/>
  </source>
</hostdev>
Daher hier das

Code: Alles auswählen

virsh dumpxml
von meinem Guest.

Code: Alles auswählen

<hostdev mode='subsystem' type='pci' managed='yes'>
  <source>
      <address domain='0x0000' bus='0x2b' slot='0x00' function='0x00'/>
  </source>
</hostdev>
[root@millenium-fbe48 ~]# virsh dumpxml FreeNAS 
<domain type='kvm'>
  <name>FreeNAS</name>
  <uuid>47f2aa38-60ad-41e5-a0ee-42d763621df6</uuid>
  <metadata>
    <libosinfo:libosinfo xmlns:libosinfo="http://libosinfo.org/xmlns/libvirt/domain/1.0">
      <libosinfo:os id="http://freebsd.org/freebsd/11.4"/>
    </libosinfo:libosinfo>
    <cockpit_machines:data xmlns:cockpit_machines="https://github.com/cockpit-project/cockpit/tree/master/pkg/machines">
      <cockpit_machines:has_install_phase>false</cockpit_machines:has_install_phase>
      <cockpit_machines:install_source_type>url</cockpit_machines:install_source_type>
      <cockpit_machines:install_source>http://download.freenas.org/11.3/STABLE/U4.1/x64/FreeNAS-11.3-U4.1.iso</cockpit_machines:install_source>
      <cockpit_machines:os_variant>freebsd11.4</cockpit_machines:os_variant>
    </cockpit_machines:data>
  </metadata>
  <memory unit='KiB'>16777216</memory>
  <currentMemory unit='KiB'>16777216</currentMemory>
  <vcpu placement='static'>16</vcpu>
  <os>
    <type arch='x86_64' machine='pc-q35-5.1'>hvm</type>
    <loader readonly='yes' type='pflash'>/usr/share/edk2/ovmf/OVMF_CODE.fd</loader>
    <nvram>/var/lib/libvirt/qemu/nvram/FreeNAS.fd</nvram>
    <boot dev='hd'/>
  </os>
  <features>
    <acpi/>
    <apic/>
    <vmport state='off'/>
    <ioapic driver='kvm'/>
  </features>
  <cpu mode='host-model' check='partial'/>
  <clock offset='utc'>
    <timer name='rtc' tickpolicy='catchup'/>
    <timer name='pit' tickpolicy='delay'/>
    <timer name='hpet' present='no'/>
  </clock>
  <on_poweroff>destroy</on_poweroff>
  <on_reboot>restart</on_reboot>
  <on_crash>destroy</on_crash>
  <pm>
    <suspend-to-mem enabled='no'/>
    <suspend-to-disk enabled='no'/>
  </pm>
  <devices>
    <emulator>/usr/bin/qemu-system-x86_64</emulator>
    <disk type='file' device='disk'>
      <driver name='qemu' type='qcow2'/>
      <source file='/var/lib/libvirt/images/FreeNAS.qcow2'/>
      <target dev='sda' bus='sata'/>
      <address type='drive' controller='0' bus='0' target='0' unit='0'/>
    </disk>
    <disk type='volume' device='disk'>
      <driver name='qemu'/>
      <source pool='images' volume='FreeNAS-11.3-U4.1.iso'/>
      <target dev='sdb' bus='sata'/>
      <address type='drive' controller='0' bus='0' target='0' unit='1'/>
    </disk>
    <controller type='usb' index='0' model='ich9-ehci1'>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' function='0x7'/>
    </controller>
    <controller type='usb' index='0' model='ich9-uhci1'>
      <master startport='0'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' function='0x0' multifunction='on'/>
    </controller>
    <controller type='usb' index='0' model='ich9-uhci2'>
      <master startport='2'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' function='0x1'/>
    </controller>
    <controller type='usb' index='0' model='ich9-uhci3'>
      <master startport='4'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' function='0x2'/>
    </controller>
    <controller type='sata' index='0'>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/>
    </controller>
    <controller type='pci' index='0' model='pcie-root'/>
    <controller type='pci' index='1' model='pcie-root-port'>
      <model name='pcie-root-port'/>
      <target chassis='1' port='0x10'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0' multifunction='on'/>
    </controller>
    <controller type='pci' index='2' model='pcie-root-port'>
      <model name='pcie-root-port'/>
      <target chassis='2' port='0x11'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x1'/>
    </controller>
    <controller type='pci' index='3' model='pcie-root-port'>
      <model name='pcie-root-port'/>
      <target chassis='3' port='0x12'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x2'/>
    </controller>
    <controller type='pci' index='4' model='pcie-root-port'>
      <model name='pcie-root-port'/>
      <target chassis='4' port='0x13'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x3'/>
    </controller>
    <controller type='pci' index='5' model='pcie-root-port'>
      <model name='pcie-root-port'/>
      <target chassis='5' port='0x14'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x4'/>
    </controller>
    <controller type='pci' index='6' model='pcie-root-port'>
      <model name='pcie-root-port'/>
      <target chassis='6' port='0x15'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x5'/>
    </controller>
    <controller type='pci' index='7' model='pcie-to-pci-bridge'>
      <model name='pcie-pci-bridge'/>
      <address type='pci' domain='0x0000' bus='0x04' slot='0x00' function='0x0'/>
    </controller>
    <controller type='virtio-serial' index='0'>
      <address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/>
    </controller>
    <interface type='direct'>
      <mac address='52:54:00:6d:0f:28'/>
      <source dev='bridge200' mode='vepa'/>
      <model type='e1000e'/>
      <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
    </interface>
    <serial type='pty'>
      <target type='isa-serial' port='0'>
        <model name='isa-serial'/>
      </target>
    </serial>
    <console type='pty'>
      <target type='serial' port='0'/>
    </console>
    <channel type='spicevmc'>
      <target type='virtio' name='com.redhat.spice.0'/>
      <address type='virtio-serial' controller='0' bus='0' port='1'/>
    </channel>
    <input type='tablet' bus='usb'>
      <address type='usb' bus='0' port='1'/>
    </input>
    <input type='mouse' bus='ps2'/>
    <input type='keyboard' bus='ps2'/>
    <graphics type='spice' autoport='yes' listen='127.0.0.1'>
      <listen type='address' address='127.0.0.1'/>
      <image compression='off'/>
    </graphics>
    <graphics type='vnc' port='-1' autoport='yes' listen='127.0.0.1'>
      <listen type='address' address='127.0.0.1'/>
    </graphics>
    <sound model='ich9'>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' function='0x0'/>
    </sound>
    <video>
      <model type='qxl' ram='65536' vram='65536' vgamem='16384' heads='1' primary='yes'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/>
    </video>
    <hostdev mode='subsystem' type='pci' managed='yes'>
      <source>
        <address domain='0x0000' bus='0x2c' slot='0x00' function='0x0'/>
      </source>
      <address type='pci' domain='0x0000' bus='0x05' slot='0x00' function='0x0'/>
    </hostdev>
    <hostdev mode='subsystem' type='pci' managed='yes'>
      <source>
        <address domain='0x0000' bus='0x2b' slot='0x00' function='0x0'/>
      </source>
      <address type='pci' domain='0x0000' bus='0x07' slot='0x01' function='0x0'/>
    </hostdev>
    <redirdev bus='usb' type='spicevmc'>
      <address type='usb' bus='0' port='2'/>
    </redirdev>
    <redirdev bus='usb' type='spicevmc'>
      <address type='usb' bus='0' port='3'/>
    </redirdev>
    <memballoon model='virtio'>
      <address type='pci' domain='0x0000' bus='0x03' slot='0x00' function='0x0'/>
    </memballoon>
  </devices>
</domain>
Hat jemand entsprechend Tipps wie ich die Karte doch einbinden kann?
In einigen Foren haben andere ebenfalls das Problem unter der Plattform, jedoch beim Durchreichen von Grafikkarten, da Ryzen wohl als Serverplattform eher weniger eine Rolle spielt.
Dazu gibt es bereits einen Patch, aber hat jemand eine Idee wie ich den auf meine Mellanox Karte abwandeln kann?
Das Durchreichen zum Guest als virtio device ist leider keine Option. Oder soll ich eventuell wieder die Connect-X2 probieren ob man damit vielleicht was erreichen kann? Denn dort tritt der header Bug zumindest nicht auf.

Code: Alles auswählen

From 69ea42207b544b6e3fa9755022bff09d2ce953d9 Mon Sep 17 00:00:00 2001
From: Geoffrey McRae <geoff@hostfission.com>
Date: Thu, 12 Sep 2019 03:19:28 +1000
Subject: [PATCH] pci quirk: AMD Navi 10 series vendor specific reset

Signed-off-by: Geoffrey McRae <geoff@hostfission.com>
---
 drivers/pci/quirks.c | 98 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 44c4ae1abd00..d94ddb1c6832 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3825,6 +3825,97 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
 	return 0;
 }
 
+/*
+ * AMD Navi 10 series GPUs require a vendor specific reset procedure.
+ * According to AMD a PSP mode 2 reset should be enough however at this
+ * time the details of how to perform this are not available to us.
+ * Instead we can signal the SMU to enter and exit BACO which has the same
+ * desired effect.
+ */
+static int reset_amd_navi10(struct pci_dev *dev, int probe)
+{
+	const int mmMP0_SMN_C2PMSG_81 = 0x16091;
+	const int mmMP1_SMN_C2PMSG_66 = 0x16282;
+	const int mmMP1_SMN_C2PMSG_82 = 0x16292;
+	const int mmMP1_SMN_C2PMSG_90 = 0x1629a;
+
+	u16 cfg;
+	resource_size_t mmio_base, mmio_size;
+	uint32_t __iomem * mmio;
+	unsigned int sol;
+	unsigned int timeout;
+
+	/* bus resets still cause navi to flake out */
+	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
+
+	if (probe)
+		return 0;
+
+	/* save the PCI state and enable memory access */
+	pci_save_state(dev);
+	pci_read_config_word(dev, PCI_COMMAND, &cfg);
+	pci_write_config_word(dev, PCI_COMMAND, cfg | PCI_COMMAND_MEMORY);
+
+	/* map BAR5 */
+	mmio_base = pci_resource_start(dev, 5);
+	mmio_size = pci_resource_len(dev, 5);
+	mmio = ioremap_nocache(mmio_base, mmio_size);
+	if (mmio == NULL) {
+		pci_disable_device(dev);
+		pci_err(dev, "Navi10: cannot iomap device\n");
+		return 0;
+	}
+
+	/* check the sign of life indicator */
+	sol = readl(mmio + mmMP0_SMN_C2PMSG_81);
+	pci_info(dev, "Navi10: SOL 0x%x\n", sol);
+	if (sol == 0 || sol == 0xffffffff) {
+		pci_info(dev, "Navi10: device doesn't need to be reset\n");
+		goto out;
+	}
+
+	pci_info(dev, "Navi10: performing BACO reset\n");
+
+	/* the SMU might be busy already, wait for it */
+	for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+		msleep(1);
+	readl(mmio + mmMP1_SMN_C2PMSG_90);
+
+	/* send PPSMC_MSG_ArmD3 */
+	writel(0x00, mmio + mmMP1_SMN_C2PMSG_90);
+	writel(0x46, mmio + mmMP1_SMN_C2PMSG_66);
+	for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+		msleep(1);
+
+	/* send PPSMC_MSG_EnterBaco with param */
+	writel(0x00, mmio + mmMP1_SMN_C2PMSG_90);
+	writel(0x00, mmio + mmMP1_SMN_C2PMSG_82);
+	writel(0x18, mmio + mmMP1_SMN_C2PMSG_66);
+	for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+		msleep(1);
+
+	/* wait for the regulators to shutdown */
+	msleep(400);
+
+	/* send PPSMC_MSG_ExitBaco */
+	writel(0x00, mmio + mmMP1_SMN_C2PMSG_90);
+	writel(0x19, mmio + mmMP1_SMN_C2PMSG_66);
+	for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+		msleep(1);
+
+	/* wait for regulators to startup again */
+	msleep(400);
+
+out:
+	/* unmap BAR5 */
+	iounmap(mmio);
+
+	/* restore the PCI state and command register */
+	pci_restore_state(dev);
+	pci_write_config_word(dev, PCI_COMMAND, cfg);
+	return 0;
+}
+
 static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
 		 reset_intel_82599_sfp_virtfn },
@@ -3836,6 +3927,13 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
 	{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
 		reset_chelsio_generic_dev },
+	{ PCI_VENDOR_ID_ATI, 0x7310, reset_amd_navi10 },
+	{ PCI_VENDOR_ID_ATI, 0x7312, reset_amd_navi10 },
+	{ PCI_VENDOR_ID_ATI, 0x7318, reset_amd_navi10 },
+	{ PCI_VENDOR_ID_ATI, 0x7319, reset_amd_navi10 },
+	{ PCI_VENDOR_ID_ATI, 0x731a, reset_amd_navi10 },
+	{ PCI_VENDOR_ID_ATI, 0x731b, reset_amd_navi10 },
+	{ PCI_VENDOR_ID_ATI, 0x731f, reset_amd_navi10 },
 	{ 0 }
 };
 
-- 
2.20.1

Antworten