xen/26692-x86-fully-protect-MSI-X-table-from-PV-guest-accesses.patch
Charles Arnold 387129caad - bnc#797014 - no way to control live migrations
- bnc#803712 - after live migration rcu_sched_state detected stalls
  xen.migrate.tools-xend_move_assert_to_exception_block.patch
  xen.migrate.tools-libxc_print_stats_if_migration_is_aborted.patch
  xen.migrate.tools_set_number_of_dirty_pages_during_migration.patch
  xen.migrate.tools_notify_restore_to_hangup_during_migration_--abort_if_busy.patch

- bnc#811764 - XEN (hypervisor or kernel) has a problem with EFI
  variable services
  x86-EFI-set-variable-permit-attrs.patch
- Upstream patches from Jan
  26060-ACPI-ERST-table-size-checks.patch
  26692-x86-fully-protect-MSI-X-table-from-PV-guest-accesses.patch
  26702-powernow-add-fixups-for-AMD-P-state-figures.patch
  26704-x86-MCA-suppress-bank-clearing-for-certain-injected-events.patch (bnc#805579)
  26731-AMD-IOMMU-Process-softirqs-while-building-dom0-iommu-mappings.patch
  26733-VT-d-Enumerate-IOMMUs-when-listing-capabilities.patch
  26734-ACPI-ERST-Name-table-in-otherwise-opaque-error-messages.patch
  26736-ACPI-APEI-Unlock-apei_iomaps_lock-on-error-path.patch
  26737-ACPI-APEI-Add-apei_exec_run_optional.patch
  26742-IOMMU-properly-check-whether-interrupt-remapping-is-enabled.patch
  26743-VT-d-deal-with-5500-5520-X58-errata.patch (bnc#801910)
  26744-AMD-IOMMU-allow-disabling-only-interrupt-remapping.patch 
- PVonHVM: __devinit was removed in linux-3.8

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=234
2013-04-04 17:05:57 +00:00

370 lines
12 KiB
Diff

# Commit 4245d331e0e75de8d1bddbbb518f3a8ce6d0bb7e
# Date 2013-03-08 14:05:34 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI: add mechanism to fully protect MSI-X table from PV guest accesses
This adds two new physdev operations for Dom0 to invoke when resource
allocation for devices is known to be complete, so that the hypervisor
can arrange for the respective MMIO ranges to be marked read-only
before an eventual guest getting such a device assigned even gets
started, such that it won't be able to set up writable mappings for
these MMIO ranges before Xen has a chance to protect them.
This also addresses another issue with the code being modified here,
in that so far write protection for the address ranges in question got
set up only once during the lifetime of a device (i.e. until either
system shutdown or device hot removal), while teardown happened when
the last interrupt was disposed of by the guest (which at least allowed
the tables to be writable when the device got assigned to a second
guest [instance] after the first terminated).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -649,8 +649,8 @@ static u64 read_pci_mem_bar(u16 seg, u8
* @entries: pointer to an array of struct msix_entry entries
* @nvec: number of @entries
*
- * Setup the MSI-X capability structure of device function with a
- * single MSI-X irq. A return of zero indicates the successful setup of
+ * Setup the MSI-X capability structure of device function with the requested
+ * number MSI-X irqs. A return of zero indicates the successful setup of
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
**/
static int msix_capability_init(struct pci_dev *dev,
@@ -658,86 +658,69 @@ static int msix_capability_init(struct p
struct msi_desc **desc,
unsigned int nr_entries)
{
- struct msi_desc *entry;
- int pos;
+ struct msi_desc *entry = NULL;
+ int pos, vf;
u16 control;
- u64 table_paddr, entry_paddr;
- u32 table_offset, entry_offset;
- u8 bir;
- void __iomem *base;
- int idx;
+ u64 table_paddr;
+ u32 table_offset;
+ u8 bir, pbus, pslot, pfunc;
u16 seg = dev->seg;
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
ASSERT(spin_is_locked(&pcidevs_lock));
- ASSERT(desc);
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
- /* MSI-X Table Initialization */
- entry = alloc_msi_entry();
- if ( !entry )
- return -ENOMEM;
+ if ( desc )
+ {
+ entry = alloc_msi_entry();
+ if ( !entry )
+ return -ENOMEM;
+ ASSERT(msi);
+ }
- /* Request & Map MSI-X table region */
+ /* Locate MSI-X table region */
table_offset = pci_conf_read32(seg, bus, slot, func,
msix_table_offset_reg(pos));
bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
table_offset &= ~PCI_MSIX_BIRMASK;
- entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
- table_paddr = msi->table_base + table_offset;
- entry_paddr = table_paddr + entry_offset;
- idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
- if ( idx < 0 )
- {
- xfree(entry);
- return idx;
- }
- base = (void *)(fix_to_virt(idx) +
- ((unsigned long)entry_paddr & ((1UL << PAGE_SHIFT) - 1)));
-
- entry->msi_attrib.type = PCI_CAP_ID_MSIX;
- entry->msi_attrib.is_64 = 1;
- entry->msi_attrib.entry_nr = msi->entry_nr;
- entry->msi_attrib.maskbit = 1;
- entry->msi_attrib.masked = 1;
- entry->msi_attrib.pos = pos;
- entry->irq = msi->irq;
- entry->dev = dev;
- entry->mask_base = base;
-
- list_add_tail(&entry->list, &dev->msi_list);
-
- if ( !dev->msix_nr_entries )
+ if ( !dev->info.is_virtfn )
{
- u8 pbus, pslot, pfunc;
- int vf;
- u64 pba_paddr;
- u32 pba_offset;
+ pbus = bus;
+ pslot = slot;
+ pfunc = func;
+ vf = -1;
+ }
+ else
+ {
+ pbus = dev->info.physfn.bus;
+ pslot = PCI_SLOT(dev->info.physfn.devfn);
+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
+ vf = PCI_BDF2(dev->bus, dev->devfn);
+ }
- if ( !dev->info.is_virtfn )
- {
- pbus = bus;
- pslot = slot;
- pfunc = func;
- vf = -1;
- }
- else
+ table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
+ WARN_ON(msi && msi->table_base != table_paddr);
+ if ( !table_paddr )
+ {
+ if ( !msi || !msi->table_base )
{
- pbus = dev->info.physfn.bus;
- pslot = PCI_SLOT(dev->info.physfn.devfn);
- pfunc = PCI_FUNC(dev->info.physfn.devfn);
- vf = PCI_BDF2(dev->bus, dev->devfn);
+ xfree(entry);
+ return -ENXIO;
}
+ table_paddr = msi->table_base;
+ }
+ table_paddr += table_offset;
- ASSERT(!dev->msix_used_entries);
- WARN_ON(msi->table_base !=
- read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf));
+ if ( !dev->msix_used_entries )
+ {
+ u64 pba_paddr;
+ u32 pba_offset;
dev->msix_nr_entries = nr_entries;
dev->msix_table.first = PFN_DOWN(table_paddr);
@@ -758,7 +741,42 @@ static int msix_capability_init(struct p
BITS_TO_LONGS(nr_entries) - 1);
WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev->msix_pba.first,
dev->msix_pba.last));
+ }
+
+ if ( entry )
+ {
+ /* Map MSI-X table region */
+ u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
+ int idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
+ void __iomem *base;
+
+ if ( idx < 0 )
+ {
+ xfree(entry);
+ return idx;
+ }
+ base = (void *)(fix_to_virt(idx) +
+ ((unsigned long)entry_paddr & (PAGE_SIZE - 1)));
+ /* Mask interrupt here */
+ writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+
+ entry->msi_attrib.type = PCI_CAP_ID_MSIX;
+ entry->msi_attrib.is_64 = 1;
+ entry->msi_attrib.entry_nr = msi->entry_nr;
+ entry->msi_attrib.maskbit = 1;
+ entry->msi_attrib.masked = 1;
+ entry->msi_attrib.pos = pos;
+ entry->irq = msi->irq;
+ entry->dev = dev;
+ entry->mask_base = base;
+
+ list_add_tail(&entry->list, &dev->msi_list);
+ *desc = entry;
+ }
+
+ if ( !dev->msix_used_entries )
+ {
if ( rangeset_add_range(mmio_ro_ranges, dev->msix_table.first,
dev->msix_table.last) )
WARN();
@@ -769,7 +787,7 @@ static int msix_capability_init(struct p
if ( dev->domain )
p2m_change_entry_type_global(dev->domain,
p2m_mmio_direct, p2m_mmio_direct);
- if ( !dev->domain || !paging_mode_translate(dev->domain) )
+ if ( desc && (!dev->domain || !paging_mode_translate(dev->domain)) )
{
struct domain *d = dev->domain;
@@ -783,6 +801,13 @@ static int msix_capability_init(struct p
break;
if ( d )
{
+ if ( !IS_PRIV(d) && dev->msix_warned != d->domain_id )
+ {
+ dev->msix_warned = d->domain_id;
+ printk(XENLOG_ERR
+ "Potentially insecure use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
+ seg, bus, slot, func, d->domain_id);
+ }
/* XXX How to deal with existing mappings? */
}
}
@@ -791,10 +816,6 @@ static int msix_capability_init(struct p
WARN_ON(dev->msix_table.first != (table_paddr >> PAGE_SHIFT));
++dev->msix_used_entries;
- /* Mask interrupt here */
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-
- *desc = entry;
/* Restore MSI-X enabled bits */
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
@@ -919,6 +940,19 @@ static int __pci_enable_msix(struct msi_
return status;
}
+static void _pci_cleanup_msix(struct pci_dev *dev)
+{
+ if ( !--dev->msix_used_entries )
+ {
+ if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,
+ dev->msix_table.last) )
+ WARN();
+ if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,
+ dev->msix_pba.last) )
+ WARN();
+ }
+}
+
static void __pci_disable_msix(struct msi_desc *entry)
{
struct pci_dev *dev;
@@ -942,15 +976,45 @@ static void __pci_disable_msix(struct ms
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
- if ( !--dev->msix_used_entries )
+ _pci_cleanup_msix(dev);
+}
+
+int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off)
+{
+ int rc;
+ struct pci_dev *pdev;
+ u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
+ unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
+ PCI_CAP_ID_MSIX);
+
+ if ( !use_msi )
+ return 0;
+
+ if ( !pos )
+ return -ENODEV;
+
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( !pdev )
+ rc = -ENODEV;
+ else if ( pdev->msix_used_entries != !!off )
+ rc = -EBUSY;
+ else if ( off )
{
- if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,
- dev->msix_table.last) )
- WARN();
- if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,
- dev->msix_pba.last) )
- WARN();
+ _pci_cleanup_msix(pdev);
+ rc = 0;
}
+ else
+ {
+ u16 control = pci_conf_read16(seg, bus, slot, func,
+ msix_control_reg(pos));
+
+ rc = msix_capability_init(pdev, NULL, NULL,
+ multi_msix_capable(control));
+ }
+ spin_unlock(&pcidevs_lock);
+
+ return rc;
}
/*
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -609,6 +609,18 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
}
+ case PHYSDEVOP_prepare_msix:
+ case PHYSDEVOP_release_msix: {
+ struct physdev_pci_device dev;
+
+ if ( copy_from_guest(&dev, arg, 1) )
+ ret = -EFAULT;
+ else
+ ret = pci_prepare_msix(dev.seg, dev.bus, dev.devfn,
+ cmd != PHYSDEVOP_prepare_msix);
+ break;
+ }
+
#ifdef __x86_64__
case PHYSDEVOP_pci_mmcfg_reserved: {
struct physdev_pci_mmcfg_reserved info;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -80,6 +80,7 @@ struct msi_desc;
/* Helper functions */
extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
extern void pci_disable_msi(struct msi_desc *desc);
+extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
extern void pci_cleanup_msi(struct pci_dev *pdev);
extern void setup_msi_handler(struct irq_desc *, struct msi_desc *);
extern void setup_msi_irq(struct irq_desc *);
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -303,6 +303,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi
#define PHYSDEVOP_pci_device_remove 26
#define PHYSDEVOP_restore_msi_ext 27
+/*
+ * Dom0 should use these two to announce MMIO resources assigned to
+ * MSI-X capable devices won't (prepare) or may (release) change.
+ */
+#define PHYSDEVOP_prepare_msix 30
+#define PHYSDEVOP_release_msix 31
struct physdev_pci_device {
/* IN */
uint16_t seg;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -57,6 +57,7 @@ struct pci_dev {
int msix_table_refcnt[MAX_MSIX_TABLE_PAGES];
int msix_table_idx[MAX_MSIX_TABLE_PAGES];
spinlock_t msix_table_lock;
+ domid_t msix_warned;
struct domain *domain;
const u16 seg;