296 lines
11 KiB
Diff
296 lines
11 KiB
Diff
|
# HG changeset patch
|
||
|
# User Jan Beulich <jbeulich@novell.com>
|
||
|
# Date 1313226898 -3600
|
||
|
# Node ID 8d6edc3d26d26931f3732a2008fb4818bc7bab2d
|
||
|
# Parent 68b903bb1b01b2a6ef9c6e8ead3be3c1c2208341
|
||
|
x86/PCI-MSI: properly determine VF BAR values
|
||
|
|
||
|
As was discussed a couple of times on this list, SR-IOV virtual
|
||
|
functions have their BARs read as zero - the physical function's
|
||
|
SR-IOV capability structure must be consulted instead. The bogus
|
||
|
warnings people complained about are being eliminated with this
|
||
|
change.
|
||
|
|
||
|
Signed-off-by: Jan Beulich <jbeulich@novell.com>
|
||
|
|
||
|
--- a/xen/arch/x86/msi.c
|
||
|
+++ b/xen/arch/x86/msi.c
|
||
|
@@ -522,12 +522,48 @@ static int msi_capability_init(struct pc
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
-static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
|
||
|
+static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf)
|
||
|
{
|
||
|
u8 limit;
|
||
|
- u32 addr;
|
||
|
+ u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0;
|
||
|
|
||
|
- switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
|
||
|
+ if ( vf >= 0 )
|
||
|
+ {
|
||
|
+ struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
|
||
|
+ unsigned int pos = pci_find_ext_capability(0, bus,
|
||
|
+ PCI_DEVFN(slot, func),
|
||
|
+ PCI_EXT_CAP_ID_SRIOV);
|
||
|
+ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
|
||
|
+ u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF);
|
||
|
+ u16 offset = pci_conf_read16(bus, slot, func,
|
||
|
+ pos + PCI_SRIOV_VF_OFFSET);
|
||
|
+ u16 stride = pci_conf_read16(bus, slot, func,
|
||
|
+ pos + PCI_SRIOV_VF_STRIDE);
|
||
|
+
|
||
|
+ if ( !pdev || !pos ||
|
||
|
+ !(ctrl & PCI_SRIOV_CTRL_VFE) ||
|
||
|
+ !(ctrl & PCI_SRIOV_CTRL_MSE) ||
|
||
|
+ !num_vf || !offset || (num_vf > 1 && !stride) ||
|
||
|
+ bir >= PCI_SRIOV_NUM_BARS ||
|
||
|
+ !pdev->vf_rlen[bir] )
|
||
|
+ return 0;
|
||
|
+ base = pos + PCI_SRIOV_BAR;
|
||
|
+ vf -= PCI_BDF(bus, slot, func) + offset;
|
||
|
+ if ( vf < 0 || (vf && vf % stride) )
|
||
|
+ return 0;
|
||
|
+ if ( stride )
|
||
|
+ {
|
||
|
+ if ( vf % stride )
|
||
|
+ return 0;
|
||
|
+ vf /= stride;
|
||
|
+ }
|
||
|
+ if ( vf >= num_vf )
|
||
|
+ return 0;
|
||
|
+ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
|
||
|
+ disp = vf * pdev->vf_rlen[bir];
|
||
|
+ limit = PCI_SRIOV_NUM_BARS;
|
||
|
+ }
|
||
|
+ else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
|
||
|
{
|
||
|
case PCI_HEADER_TYPE_NORMAL:
|
||
|
limit = 6;
|
||
|
@@ -544,7 +580,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
|
||
|
|
||
|
if ( bir >= limit )
|
||
|
return 0;
|
||
|
- addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4);
|
||
|
+ addr = pci_conf_read32(bus, slot, func, base + bir * 4);
|
||
|
if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
|
||
|
return 0;
|
||
|
if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
|
||
|
@@ -552,11 +588,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
|
||
|
addr &= PCI_BASE_ADDRESS_MEM_MASK;
|
||
|
if ( ++bir >= limit )
|
||
|
return 0;
|
||
|
- return addr |
|
||
|
- ((u64)pci_conf_read32(bus, slot, func,
|
||
|
- PCI_BASE_ADDRESS_0 + bir * 4) << 32);
|
||
|
+ return addr + disp +
|
||
|
+ ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32);
|
||
|
}
|
||
|
- return addr & PCI_BASE_ADDRESS_MEM_MASK;
|
||
|
+ return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
@@ -629,11 +664,29 @@ static int msix_capability_init(struct p
|
||
|
|
||
|
if ( !dev->msix_nr_entries )
|
||
|
{
|
||
|
+ u8 pbus, pslot, pfunc;
|
||
|
+ int vf;
|
||
|
u64 pba_paddr;
|
||
|
u32 pba_offset;
|
||
|
|
||
|
+ if ( !dev->info.is_virtfn )
|
||
|
+ {
|
||
|
+ pbus = bus;
|
||
|
+ pslot = slot;
|
||
|
+ pfunc = func;
|
||
|
+ vf = -1;
|
||
|
+ }
|
||
|
+ else
|
||
|
+ {
|
||
|
+ pbus = dev->info.physfn.bus;
|
||
|
+ pslot = PCI_SLOT(dev->info.physfn.devfn);
|
||
|
+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
|
||
|
+ vf = PCI_BDF2(dev->bus, dev->devfn);
|
||
|
+ }
|
||
|
+
|
||
|
ASSERT(!dev->msix_used_entries);
|
||
|
- WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
|
||
|
+ WARN_ON(msi->table_base !=
|
||
|
+ read_pci_mem_bar(pbus, pslot, pfunc, bir, vf));
|
||
|
|
||
|
dev->msix_nr_entries = nr_entries;
|
||
|
dev->msix_table.first = PFN_DOWN(table_paddr);
|
||
|
@@ -645,7 +698,7 @@ static int msix_capability_init(struct p
|
||
|
pba_offset = pci_conf_read32(bus, slot, func,
|
||
|
msix_pba_offset_reg(pos));
|
||
|
bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
|
||
|
- pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
|
||
|
+ pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf);
|
||
|
WARN_ON(!pba_paddr);
|
||
|
pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
|
||
|
|
||
|
--- a/xen/drivers/passthrough/pci.c
|
||
|
+++ b/xen/drivers/passthrough/pci.c
|
||
|
@@ -145,6 +145,7 @@ void pci_enable_acs(struct pci_dev *pdev
|
||
|
int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
|
||
|
{
|
||
|
struct pci_dev *pdev;
|
||
|
+ unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
|
||
|
const char *pdev_type;
|
||
|
int ret = -ENOMEM;
|
||
|
|
||
|
@@ -153,7 +154,14 @@ int pci_add_device(u8 bus, u8 devfn, con
|
||
|
else if (info->is_extfn)
|
||
|
pdev_type = "extended function";
|
||
|
else if (info->is_virtfn)
|
||
|
+ {
|
||
|
+ spin_lock(&pcidevs_lock);
|
||
|
+ pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
|
||
|
+ spin_unlock(&pcidevs_lock);
|
||
|
+ if ( !pdev )
|
||
|
+ pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
|
||
|
pdev_type = "virtual function";
|
||
|
+ }
|
||
|
else
|
||
|
return -EINVAL;
|
||
|
|
||
|
@@ -164,6 +172,70 @@ int pci_add_device(u8 bus, u8 devfn, con
|
||
|
|
||
|
if ( info )
|
||
|
pdev->info = *info;
|
||
|
+ else if ( !pdev->vf_rlen[0] )
|
||
|
+ {
|
||
|
+ unsigned int pos = pci_find_ext_capability(0, bus, devfn,
|
||
|
+ PCI_EXT_CAP_ID_SRIOV);
|
||
|
+ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
|
||
|
+
|
||
|
+ if ( !pos )
|
||
|
+ /* Nothing */;
|
||
|
+ else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
|
||
|
+ {
|
||
|
+ unsigned int i;
|
||
|
+
|
||
|
+ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
|
||
|
+ for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
|
||
|
+ {
|
||
|
+ unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
|
||
|
+ u32 bar = pci_conf_read32(bus, slot, func, idx);
|
||
|
+ u32 hi = 0;
|
||
|
+
|
||
|
+ if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
|
||
|
+ PCI_BASE_ADDRESS_SPACE_IO )
|
||
|
+ {
|
||
|
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf"
|
||
|
+ " BAR%u in IO space\n",
|
||
|
+ bus, slot, func, i);
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+ pci_conf_write32(bus, slot, func, idx, ~0);
|
||
|
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
|
||
|
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
|
||
|
+ {
|
||
|
+ if ( i >= PCI_SRIOV_NUM_BARS )
|
||
|
+ {
|
||
|
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with"
|
||
|
+ " 64-bit vf BAR in last slot\n",
|
||
|
+ bus, slot, func);
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ hi = pci_conf_read32(bus, slot, func, idx + 4);
|
||
|
+ pci_conf_write32(bus, slot, func, idx + 4, ~0);
|
||
|
+ }
|
||
|
+ pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) &
|
||
|
+ PCI_BASE_ADDRESS_MEM_MASK;
|
||
|
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
|
||
|
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
|
||
|
+ {
|
||
|
+ pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func,
|
||
|
+ idx + 4) << 32;
|
||
|
+ pci_conf_write32(bus, slot, func, idx + 4, hi);
|
||
|
+ }
|
||
|
+ else if ( pdev->vf_rlen[i] )
|
||
|
+ pdev->vf_rlen[i] |= (u64)~0 << 32;
|
||
|
+ pci_conf_write32(bus, slot, func, idx, bar);
|
||
|
+ pdev->vf_rlen[i] = -pdev->vf_rlen[i];
|
||
|
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
|
||
|
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
|
||
|
+ ++i;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ else
|
||
|
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual"
|
||
|
+ " functions already enabled (%04x)\n",
|
||
|
+ bus, slot, func, ctrl);
|
||
|
+ }
|
||
|
|
||
|
ret = 0;
|
||
|
if ( !pdev->domain )
|
||
|
@@ -183,7 +255,7 @@ int pci_add_device(u8 bus, u8 devfn, con
|
||
|
out:
|
||
|
spin_unlock(&pcidevs_lock);
|
||
|
printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
|
||
|
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
|
||
|
+ bus, slot, func);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
--- a/xen/include/xen/pci.h
|
||
|
+++ b/xen/include/xen/pci.h
|
||
|
@@ -57,6 +57,7 @@ struct pci_dev {
|
||
|
const u8 bus;
|
||
|
const u8 devfn;
|
||
|
struct pci_dev_info info;
|
||
|
+ u64 vf_rlen[6];
|
||
|
};
|
||
|
|
||
|
#define for_each_pdev(domain, pdev) \
|
||
|
--- a/xen/include/xen/pci_regs.h
|
||
|
+++ b/xen/include/xen/pci_regs.h
|
||
|
@@ -425,7 +425,7 @@
|
||
|
#define PCI_EXT_CAP_ID_ACS 13
|
||
|
#define PCI_EXT_CAP_ID_ARI 14
|
||
|
#define PCI_EXT_CAP_ID_ATS 15
|
||
|
-#define PCI_EXT_CAP_ID_IOV 16
|
||
|
+#define PCI_EXT_CAP_ID_SRIOV 16
|
||
|
|
||
|
/* Advanced Error Reporting */
|
||
|
#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
|
||
|
@@ -545,4 +545,35 @@
|
||
|
#define PCI_ACS_CTRL 0x06 /* ACS Control Register */
|
||
|
#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */
|
||
|
|
||
|
+/* Single Root I/O Virtualization */
|
||
|
+#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
|
||
|
+#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */
|
||
|
+#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
|
||
|
+#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */
|
||
|
+#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */
|
||
|
+#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */
|
||
|
+#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */
|
||
|
+#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */
|
||
|
+#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */
|
||
|
+#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */
|
||
|
+#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */
|
||
|
+#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */
|
||
|
+#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
|
||
|
+#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */
|
||
|
+#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */
|
||
|
+#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */
|
||
|
+#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */
|
||
|
+#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */
|
||
|
+#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */
|
||
|
+#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */
|
||
|
+#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */
|
||
|
+#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */
|
||
|
+#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/
|
||
|
+#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */
|
||
|
+#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */
|
||
|
+#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */
|
||
|
+#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */
|
||
|
+#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */
|
||
|
+#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */
|
||
|
+
|
||
|
#endif /* LINUX_PCI_REGS_H */
|