19d8f590f0
xen.spec - bsc#945167 - Running command ’ xl pci-assignable-add 03:10.1’ secondly show errors 55f7f9d2-libxl-slightly-refine-pci-assignable-add-remove-handling.patch - Upstream patches from Jan 55f2e438-x86-hvm-fix-saved-pmtimer-and-hpet-values.patch 55f9345b-x86-MSI-fail-if-no-hardware-support.patch 5604f239-x86-PV-properly-populate-descriptor-tables.patch 5604f2e6-vt-d-fix-IM-bit-mask-and-unmask-of-FECTL_REG.patch 560a4af9-x86-EPT-tighten-conditions-of-IOMMU-mapping-updates.patch 560a7c36-x86-p2m-pt-delay-freeing-of-intermediate-page-tables.patch 560a7c53-x86-p2m-pt-ignore-pt-share-flag-for-shadow-mode-guests.patch 560bd926-credit1-fix-tickling-when-it-happens-from-a-remote-pCPU.patch 560e6d34-x86-p2m-pt-tighten-conditions-of-IOMMU-mapping-updates.patch - bsc#941074 - VmError: Device 51728 (vbd) could not be connected. Hotplug scripts not working. hotplug-Linux-block-performance-fix.patch - bsc#947165 - VUL-0: CVE-2015-7311: xen: libxl fails to honour readonly flag on disks with qemu-xen (xsa-142) CVE-2015-7311-xsa142.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=378
352 lines
12 KiB
Diff
352 lines
12 KiB
Diff
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
|
|
|
|
# Commit 082fdc6ce85e5b603f8fb24553cf200e3b67889f
|
|
# Date 2015-07-23 10:14:59 +0200
|
|
# Author Jan Beulich <jbeulich@suse.com>
|
|
# Committer Jan Beulich <jbeulich@suse.com>
|
|
x86/MSI-X: be more careful during teardown
|
|
|
|
When a device gets detached from a guest, pciback will clear its
|
|
command register, thus disabling both memory and I/O decoding. The
|
|
disabled memory decoding, however, has an effect on the MSI-X table
|
|
accesses the hypervisor does: These won't have the intended effect
|
|
anymore. Even worse, for PCIe devices (but not SR-IOV virtual
|
|
functions) such accesses may (will?) be treated as Unsupported
|
|
Requests, causing respective errors to be surfaced, potentially in the
|
|
form of NMIs that may be fatal to the hypervisor or Dom0 is different
|
|
ways. Hence rather than carrying out these accesses, we should avoid
|
|
them where we can, and use alternative (e.g. PCI config space based)
|
|
mechanisms to achieve at least the same effect.
|
|
|
|
At this time it continues to be unclear whether this is fixing an
|
|
actual bug or is rather just working around bogus (but apparently
|
|
common) system behavior.
|
|
|
|
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
|
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
|
---
|
|
Backporting note (largely to myself):
|
|
Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e
|
|
"x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use
|
|
of struct arch_msix's warned field).
|
|
|
|
--- a/xen/arch/x86/irq.c
|
|
+++ b/xen/arch/x86/irq.c
|
|
@@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq)
|
|
}
|
|
|
|
spin_lock_irqsave(&desc->lock, flags);
|
|
- desc->status |= IRQ_DISABLED;
|
|
desc->status &= ~IRQ_GUEST;
|
|
desc->handler->shutdown(desc);
|
|
+ desc->status |= IRQ_DISABLED;
|
|
action = desc->action;
|
|
desc->action = NULL;
|
|
desc->msi_desc = NULL;
|
|
@@ -995,8 +995,8 @@ void __init release_irq(unsigned int irq
|
|
spin_lock_irqsave(&desc->lock,flags);
|
|
action = desc->action;
|
|
desc->action = NULL;
|
|
- desc->status |= IRQ_DISABLED;
|
|
desc->handler->shutdown(desc);
|
|
+ desc->status |= IRQ_DISABLED;
|
|
spin_unlock_irqrestore(&desc->lock,flags);
|
|
|
|
/* Wait to make sure it's not being used on another CPU */
|
|
@@ -1732,8 +1732,8 @@ static irq_guest_action_t *__pirq_guest_
|
|
BUG_ON(action->in_flight != 0);
|
|
|
|
/* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
|
|
- desc->status |= IRQ_DISABLED;
|
|
desc->handler->disable(desc);
|
|
+ desc->status |= IRQ_DISABLED;
|
|
|
|
/*
|
|
* Mark any remaining pending EOIs as ready to flush.
|
|
--- a/xen/arch/x86/msi.c
|
|
+++ b/xen/arch/x86/msi.c
|
|
@@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_
|
|
spin_unlock(&msix->table_lock);
|
|
}
|
|
|
|
+static bool_t memory_decoded(const struct pci_dev *dev)
|
|
+{
|
|
+ u8 bus, slot, func;
|
|
+
|
|
+ if ( !dev->info.is_virtfn )
|
|
+ {
|
|
+ bus = dev->bus;
|
|
+ slot = PCI_SLOT(dev->devfn);
|
|
+ func = PCI_FUNC(dev->devfn);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ bus = dev->info.physfn.bus;
|
|
+ slot = PCI_SLOT(dev->info.physfn.devfn);
|
|
+ func = PCI_FUNC(dev->info.physfn.devfn);
|
|
+ }
|
|
+
|
|
+ return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
|
|
+ PCI_COMMAND_MEMORY);
|
|
+}
|
|
+
|
|
/*
|
|
* MSI message composition
|
|
*/
|
|
@@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co
|
|
}
|
|
}
|
|
|
|
-static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
|
|
+static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
|
|
{
|
|
switch ( entry->msi_attrib.type )
|
|
{
|
|
@@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc
|
|
{
|
|
void __iomem *base = entry->mask_base;
|
|
|
|
+ if ( unlikely(!memory_decoded(entry->dev)) )
|
|
+ return 0;
|
|
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
|
|
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
|
|
msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
|
|
@@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc
|
|
|
|
if ( iommu_intremap )
|
|
iommu_read_msi_from_ire(entry, msg);
|
|
+
|
|
+ return 1;
|
|
}
|
|
|
|
static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
|
|
@@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc
|
|
{
|
|
void __iomem *base = entry->mask_base;
|
|
|
|
+ if ( unlikely(!memory_decoded(entry->dev)) )
|
|
+ return -ENXIO;
|
|
writel(msg->address_lo,
|
|
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
|
|
writel(msg->address_hi,
|
|
@@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d
|
|
ASSERT(spin_is_locked(&desc->lock));
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
- read_msi_msg(msi_desc, &msg);
|
|
+ if ( !read_msi_msg(msi_desc, &msg) )
|
|
+ return;
|
|
|
|
msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
|
|
@@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de
|
|
|| entry->msi_attrib.maskbit;
|
|
}
|
|
|
|
-static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
|
|
+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
|
|
{
|
|
struct msi_desc *entry = desc->msi_desc;
|
|
+ struct pci_dev *pdev;
|
|
+ u16 seg;
|
|
+ u8 bus, slot, func;
|
|
bool_t flag = host || guest;
|
|
|
|
ASSERT(spin_is_locked(&desc->lock));
|
|
BUG_ON(!entry || !entry->dev);
|
|
+ pdev = entry->dev;
|
|
+ seg = pdev->seg;
|
|
+ bus = pdev->bus;
|
|
+ slot = PCI_SLOT(pdev->devfn);
|
|
+ func = PCI_FUNC(pdev->devfn);
|
|
switch ( entry->msi_attrib.type )
|
|
{
|
|
case PCI_CAP_ID_MSI:
|
|
if ( entry->msi_attrib.maskbit )
|
|
{
|
|
u32 mask_bits;
|
|
- u16 seg = entry->dev->seg;
|
|
- u8 bus = entry->dev->bus;
|
|
- u8 slot = PCI_SLOT(entry->dev->devfn);
|
|
- u8 func = PCI_FUNC(entry->dev->devfn);
|
|
|
|
mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
|
|
mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
|
|
@@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_
|
|
}
|
|
break;
|
|
case PCI_CAP_ID_MSIX:
|
|
- {
|
|
- int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
|
|
- writel(flag, entry->mask_base + offset);
|
|
- readl(entry->mask_base + offset);
|
|
- break;
|
|
- }
|
|
+ if ( likely(memory_decoded(pdev)) )
|
|
+ {
|
|
+ writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
|
+ readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
|
+ break;
|
|
+ }
|
|
+ if ( flag )
|
|
+ {
|
|
+ u16 control;
|
|
+ domid_t domid = pdev->domain->domain_id;
|
|
+
|
|
+ pdev->msix->host_maskall = 1;
|
|
+ control = pci_conf_read16(seg, bus, slot, func,
|
|
+ msix_control_reg(entry->msi_attrib.pos));
|
|
+ if ( control & PCI_MSIX_FLAGS_MASKALL )
|
|
+ break;
|
|
+ pci_conf_write16(seg, bus, slot, func,
|
|
+ msix_control_reg(entry->msi_attrib.pos),
|
|
+ control | PCI_MSIX_FLAGS_MASKALL);
|
|
+ if ( pdev->msix->warned != domid )
|
|
+ {
|
|
+ pdev->msix->warned = domid;
|
|
+ printk(XENLOG_G_WARNING
|
|
+ "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
|
|
+ desc->irq, domid, pdev->seg, pdev->bus,
|
|
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ /* fall through */
|
|
default:
|
|
- BUG();
|
|
- break;
|
|
+ return 0;
|
|
}
|
|
entry->msi_attrib.host_masked = host;
|
|
entry->msi_attrib.guest_masked = guest;
|
|
+
|
|
+ return 1;
|
|
}
|
|
|
|
static int msi_get_mask_bit(const struct msi_desc *entry)
|
|
{
|
|
- switch (entry->msi_attrib.type) {
|
|
+ if ( !entry->dev )
|
|
+ return -1;
|
|
+
|
|
+ switch ( entry->msi_attrib.type )
|
|
+ {
|
|
case PCI_CAP_ID_MSI:
|
|
- if (!entry->dev || !entry->msi_attrib.maskbit)
|
|
+ if ( !entry->msi_attrib.maskbit )
|
|
break;
|
|
return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
|
|
PCI_SLOT(entry->dev->devfn),
|
|
@@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct
|
|
entry->msi.mpos) >>
|
|
entry->msi_attrib.entry_nr) & 1;
|
|
case PCI_CAP_ID_MSIX:
|
|
+ if ( unlikely(!memory_decoded(entry->dev)) )
|
|
+ break;
|
|
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
|
|
}
|
|
return -1;
|
|
@@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct
|
|
|
|
void mask_msi_irq(struct irq_desc *desc)
|
|
{
|
|
- msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
|
|
+ if ( unlikely(!msi_set_mask_bit(desc, 1,
|
|
+ desc->msi_desc->msi_attrib.guest_masked)) )
|
|
+ BUG_ON(!(desc->status & IRQ_DISABLED));
|
|
}
|
|
|
|
void unmask_msi_irq(struct irq_desc *desc)
|
|
{
|
|
- msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
|
|
+ if ( unlikely(!msi_set_mask_bit(desc, 0,
|
|
+ desc->msi_desc->msi_attrib.guest_masked)) )
|
|
+ WARN();
|
|
}
|
|
|
|
void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
|
|
@@ -422,13 +489,15 @@ void guest_mask_msi_irq(struct irq_desc
|
|
|
|
static unsigned int startup_msi_irq(struct irq_desc *desc)
|
|
{
|
|
- msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
|
|
+ if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
|
|
+ WARN();
|
|
return 0;
|
|
}
|
|
|
|
static void shutdown_msi_irq(struct irq_desc *desc)
|
|
{
|
|
- msi_set_mask_bit(desc, 1, 1);
|
|
+ if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
|
|
+ BUG_ON(!(desc->status & IRQ_DISABLED));
|
|
}
|
|
|
|
void ack_nonmaskable_msi_irq(struct irq_desc *desc)
|
|
@@ -742,6 +811,9 @@ static int msix_capability_init(struct p
|
|
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
|
|
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
|
|
|
|
+ if ( unlikely(!memory_decoded(dev)) )
|
|
+ return -ENXIO;
|
|
+
|
|
if ( desc )
|
|
{
|
|
entry = alloc_msi_entry(1);
|
|
@@ -881,7 +953,8 @@ static int msix_capability_init(struct p
|
|
++msix->used_entries;
|
|
|
|
/* Restore MSI-X enabled bits */
|
|
- pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
|
|
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
|
|
+ control & ~PCI_MSIX_FLAGS_MASKALL);
|
|
|
|
return 0;
|
|
}
|
|
@@ -1026,8 +1099,16 @@ static void __pci_disable_msix(struct ms
|
|
|
|
BUG_ON(list_empty(&dev->msi_list));
|
|
|
|
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
|
-
|
|
+ if ( likely(memory_decoded(dev)) )
|
|
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
|
+ else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
|
|
+ {
|
|
+ printk(XENLOG_WARNING
|
|
+ "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
|
|
+ entry->irq, dev->seg, dev->bus,
|
|
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
|
|
+ control |= PCI_MSIX_FLAGS_MASKALL;
|
|
+ }
|
|
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
|
|
|
|
_pci_cleanup_msix(dev->msix);
|
|
@@ -1201,15 +1282,24 @@ int pci_restore_msi_state(struct pci_dev
|
|
nr = entry->msi.nvec;
|
|
}
|
|
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
|
|
+ {
|
|
msix_set_enable(pdev, 0);
|
|
+ if ( unlikely(!memory_decoded(pdev)) )
|
|
+ {
|
|
+ spin_unlock_irqrestore(&desc->lock, flags);
|
|
+ return -ENXIO;
|
|
+ }
|
|
+ }
|
|
|
|
msg = entry->msg;
|
|
write_msi_msg(entry, &msg);
|
|
|
|
for ( i = 0; ; )
|
|
{
|
|
- msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
|
|
- entry[i].msi_attrib.guest_masked);
|
|
+ if ( unlikely(!msi_set_mask_bit(desc,
|
|
+ entry[i].msi_attrib.host_masked,
|
|
+ entry[i].msi_attrib.guest_masked)) )
|
|
+ BUG();
|
|
|
|
if ( !--nr )
|
|
break;
|