xen/55b0a283-x86-MSI-X-teardown.patch
Charles Arnold 9e9b5acb9c - Upstream patches from Jan
5604f239-x86-PV-properly-populate-descriptor-tables.patch
  561bbc8b-VT-d-don-t-suppress-invalidation-address-write-when-0.patch
  561d2046-VT-d-use-proper-error-codes-in-iommu_enable_x2apic_IR.patch
  561d20a0-x86-hide-MWAITX-from-PV-domains.patch
  561e3283-x86-NUMA-fix-SRAT-table-processor-entry-handling.patch

- bsc#951845 - VUL-0: CVE-2015-7972: xen: x86: populate-on-demand
  balloon size inaccuracy can crash guests (XSA-153)
  xsa153-libxl.patch

- bsc#950703 - VUL-1: CVE-2015-7969: xen: leak of main per-domain
  vcpu pointer array (DoS) (XSA-149)
  xsa149.patch
- bsc#950705 - VUL-1: CVE-2015-7969: xen: x86: leak of per-domain
  profiling-related vcpu pointer array (DoS) (XSA-151)
  xsa151.patch
- bsc#950706 - VUL-0: CVE-2015-7971: xen: x86: some pmu and
  profiling hypercalls log without rate limiting (XSA-152)
  xsa152.patch
- Dropped
  55dc7937-x86-IO-APIC-don-t-create-pIRQ-mapping-from-masked-RTE.patch
  5604f239-x86-PV-properly-populate-descriptor-tables.patch

- bsc#932267 - VUL-1: CVE-2015-4037: qemu,kvm,xen: insecure
  temporary file use in /net/slirp.c
  CVE-2015-4037-qemuu-smb-config-dir-name.patch
  CVE-2015-4037-qemut-smb-config-dir-name.patch
- bsc#877642 - VUL-0: CVE-2014-0222: qemu: qcow1: validate L2 table
  size to avoid integer overflows

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=382
2015-10-29 22:28:05 +00:00

352 lines
12 KiB
Diff

References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 082fdc6ce85e5b603f8fb24553cf200e3b67889f
# Date 2015-07-23 10:14:59 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI-X: be more careful during teardown
When a device gets detached from a guest, pciback will clear its
command register, thus disabling both memory and I/O decoding. The
disabled memory decoding, however, has an effect on the MSI-X table
accesses the hypervisor does: These won't have the intended effect
anymore. Even worse, for PCIe devices (but not SR-IOV virtual
functions) such accesses may (will?) be treated as Unsupported
Requests, causing respective errors to be surfaced, potentially in the
form of NMIs that may be fatal to the hypervisor or Dom0 is different
ways. Hence rather than carrying out these accesses, we should avoid
them where we can, and use alternative (e.g. PCI config space based)
mechanisms to achieve at least the same effect.
At this time it continues to be unclear whether this is fixing an
actual bug or is rather just working around bogus (but apparently
common) system behavior.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
Backporting note (largely to myself):
Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e
"x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use
of struct arch_msix's warned field).
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq)
}
spin_lock_irqsave(&desc->lock, flags);
- desc->status |= IRQ_DISABLED;
desc->status &= ~IRQ_GUEST;
desc->handler->shutdown(desc);
+ desc->status |= IRQ_DISABLED;
action = desc->action;
desc->action = NULL;
desc->msi_desc = NULL;
@@ -995,8 +995,8 @@ void __init release_irq(unsigned int irq
spin_lock_irqsave(&desc->lock,flags);
action = desc->action;
desc->action = NULL;
- desc->status |= IRQ_DISABLED;
desc->handler->shutdown(desc);
+ desc->status |= IRQ_DISABLED;
spin_unlock_irqrestore(&desc->lock,flags);
/* Wait to make sure it's not being used on another CPU */
@@ -1732,8 +1732,8 @@ static irq_guest_action_t *__pirq_guest_
BUG_ON(action->in_flight != 0);
/* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
- desc->status |= IRQ_DISABLED;
desc->handler->disable(desc);
+ desc->status |= IRQ_DISABLED;
/*
* Mark any remaining pending EOIs as ready to flush.
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_
spin_unlock(&msix->table_lock);
}
+static bool_t memory_decoded(const struct pci_dev *dev)
+{
+ u8 bus, slot, func;
+
+ if ( !dev->info.is_virtfn )
+ {
+ bus = dev->bus;
+ slot = PCI_SLOT(dev->devfn);
+ func = PCI_FUNC(dev->devfn);
+ }
+ else
+ {
+ bus = dev->info.physfn.bus;
+ slot = PCI_SLOT(dev->info.physfn.devfn);
+ func = PCI_FUNC(dev->info.physfn.devfn);
+ }
+
+ return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
+ PCI_COMMAND_MEMORY);
+}
+
/*
* MSI message composition
*/
@@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co
}
}
-static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
switch ( entry->msi_attrib.type )
{
@@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc
{
void __iomem *base = entry->mask_base;
+ if ( unlikely(!memory_decoded(entry->dev)) )
+ return 0;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
@@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc
if ( iommu_intremap )
iommu_read_msi_from_ire(entry, msg);
+
+ return 1;
}
static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
@@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc
{
void __iomem *base = entry->mask_base;
+ if ( unlikely(!memory_decoded(entry->dev)) )
+ return -ENXIO;
writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
writel(msg->address_hi,
@@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d
ASSERT(spin_is_locked(&desc->lock));
memset(&msg, 0, sizeof(msg));
- read_msi_msg(msi_desc, &msg);
+ if ( !read_msi_msg(msi_desc, &msg) )
+ return;
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
@@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de
|| entry->msi_attrib.maskbit;
}
-static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
{
struct msi_desc *entry = desc->msi_desc;
+ struct pci_dev *pdev;
+ u16 seg;
+ u8 bus, slot, func;
bool_t flag = host || guest;
ASSERT(spin_is_locked(&desc->lock));
BUG_ON(!entry || !entry->dev);
+ pdev = entry->dev;
+ seg = pdev->seg;
+ bus = pdev->bus;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
switch ( entry->msi_attrib.type )
{
case PCI_CAP_ID_MSI:
if ( entry->msi_attrib.maskbit )
{
u32 mask_bits;
- u16 seg = entry->dev->seg;
- u8 bus = entry->dev->bus;
- u8 slot = PCI_SLOT(entry->dev->devfn);
- u8 func = PCI_FUNC(entry->dev->devfn);
mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
@@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_
}
break;
case PCI_CAP_ID_MSIX:
- {
- int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
- writel(flag, entry->mask_base + offset);
- readl(entry->mask_base + offset);
- break;
- }
+ if ( likely(memory_decoded(pdev)) )
+ {
+ writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ break;
+ }
+ if ( flag )
+ {
+ u16 control;
+ domid_t domid = pdev->domain->domain_id;
+
+ pdev->msix->host_maskall = 1;
+ control = pci_conf_read16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos));
+ if ( control & PCI_MSIX_FLAGS_MASKALL )
+ break;
+ pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | PCI_MSIX_FLAGS_MASKALL);
+ if ( pdev->msix->warned != domid )
+ {
+ pdev->msix->warned = domid;
+ printk(XENLOG_G_WARNING
+ "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+ desc->irq, domid, pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ }
+ break;
+ }
+ /* fall through */
default:
- BUG();
- break;
+ return 0;
}
entry->msi_attrib.host_masked = host;
entry->msi_attrib.guest_masked = guest;
+
+ return 1;
}
static int msi_get_mask_bit(const struct msi_desc *entry)
{
- switch (entry->msi_attrib.type) {
+ if ( !entry->dev )
+ return -1;
+
+ switch ( entry->msi_attrib.type )
+ {
case PCI_CAP_ID_MSI:
- if (!entry->dev || !entry->msi_attrib.maskbit)
+ if ( !entry->msi_attrib.maskbit )
break;
return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
PCI_SLOT(entry->dev->devfn),
@@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct
entry->msi.mpos) >>
entry->msi_attrib.entry_nr) & 1;
case PCI_CAP_ID_MSIX:
+ if ( unlikely(!memory_decoded(entry->dev)) )
+ break;
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
}
return -1;
@@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct
void mask_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
+ if ( unlikely(!msi_set_mask_bit(desc, 1,
+ desc->msi_desc->msi_attrib.guest_masked)) )
+ BUG_ON(!(desc->status & IRQ_DISABLED));
}
void unmask_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+ if ( unlikely(!msi_set_mask_bit(desc, 0,
+ desc->msi_desc->msi_attrib.guest_masked)) )
+ WARN();
}
void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
@@ -422,13 +489,15 @@ void guest_mask_msi_irq(struct irq_desc
static unsigned int startup_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
+ if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
+ WARN();
return 0;
}
static void shutdown_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 1, 1);
+ if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
+ BUG_ON(!(desc->status & IRQ_DISABLED));
}
void ack_nonmaskable_msi_irq(struct irq_desc *desc)
@@ -740,6 +809,9 @@ static int msix_capability_init(struct p
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+ if ( unlikely(!memory_decoded(dev)) )
+ return -ENXIO;
+
if ( desc )
{
entry = alloc_msi_entry(1);
@@ -879,7 +951,8 @@ static int msix_capability_init(struct p
++msix->used_entries;
/* Restore MSI-X enabled bits */
- pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control & ~PCI_MSIX_FLAGS_MASKALL);
return 0;
}
@@ -1024,8 +1097,16 @@ static void __pci_disable_msix(struct ms
BUG_ON(list_empty(&dev->msi_list));
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-
+ if ( likely(memory_decoded(dev)) )
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
+ {
+ printk(XENLOG_WARNING
+ "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
+ entry->irq, dev->seg, dev->bus,
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ control |= PCI_MSIX_FLAGS_MASKALL;
+ }
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
_pci_cleanup_msix(dev->msix);
@@ -1199,15 +1280,24 @@ int pci_restore_msi_state(struct pci_dev
nr = entry->msi.nvec;
}
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+ {
msix_set_enable(pdev, 0);
+ if ( unlikely(!memory_decoded(pdev)) )
+ {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return -ENXIO;
+ }
+ }
msg = entry->msg;
write_msi_msg(entry, &msg);
for ( i = 0; ; )
{
- msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
- entry[i].msi_attrib.guest_masked);
+ if ( unlikely(!msi_set_mask_bit(desc,
+ entry[i].msi_attrib.host_masked,
+ entry[i].msi_attrib.guest_masked)) )
+ BUG();
if ( !--nr )
break;