- bnc#803712 - after live migration rcu_sched_state detected stalls xen.migrate.tools-xend_move_assert_to_exception_block.patch xen.migrate.tools-libxc_print_stats_if_migration_is_aborted.patch xen.migrate.tools_set_number_of_dirty_pages_during_migration.patch xen.migrate.tools_notify_restore_to_hangup_during_migration_--abort_if_busy.patch - bnc#811764 - XEN (hypervisor or kernel) has a problem with EFI variable services x86-EFI-set-variable-permit-attrs.patch - Upstream patches from Jan 26060-ACPI-ERST-table-size-checks.patch 26692-x86-fully-protect-MSI-X-table-from-PV-guest-accesses.patch 26702-powernow-add-fixups-for-AMD-P-state-figures.patch 26704-x86-MCA-suppress-bank-clearing-for-certain-injected-events.patch (bnc#805579) 26731-AMD-IOMMU-Process-softirqs-while-building-dom0-iommu-mappings.patch 26733-VT-d-Enumerate-IOMMUs-when-listing-capabilities.patch 26734-ACPI-ERST-Name-table-in-otherwise-opaque-error-messages.patch 26736-ACPI-APEI-Unlock-apei_iomaps_lock-on-error-path.patch 26737-ACPI-APEI-Add-apei_exec_run_optional.patch 26742-IOMMU-properly-check-whether-interrupt-remapping-is-enabled.patch 26743-VT-d-deal-with-5500-5520-X58-errata.patch (bnc#801910) 26744-AMD-IOMMU-allow-disabling-only-interrupt-remapping.patch - PVonHVM: __devinit was removed in linux-3.8 OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=234
370 lines
12 KiB
Diff
370 lines
12 KiB
Diff
# Commit 4245d331e0e75de8d1bddbbb518f3a8ce6d0bb7e
|
|
# Date 2013-03-08 14:05:34 +0100
|
|
# Author Jan Beulich <jbeulich@suse.com>
|
|
# Committer Jan Beulich <jbeulich@suse.com>
|
|
x86/MSI: add mechanism to fully protect MSI-X table from PV guest accesses
|
|
|
|
This adds two new physdev operations for Dom0 to invoke when resource
|
|
allocation for devices is known to be complete, so that the hypervisor
|
|
can arrange for the respective MMIO ranges to be marked read-only
|
|
before an eventual guest getting such a device assigned even gets
|
|
started, such that it won't be able to set up writable mappings for
|
|
these MMIO ranges before Xen has a chance to protect them.
|
|
|
|
This also addresses another issue with the code being modified here,
|
|
in that so far write protection for the address ranges in question got
|
|
set up only once during the lifetime of a device (i.e. until either
|
|
system shutdown or device hot removal), while teardown happened when
|
|
the last interrupt was disposed of by the guest (which at least allowed
|
|
the tables to be writable when the device got assigned to a second
|
|
guest [instance] after the first terminated).
|
|
|
|
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
|
|
|
--- a/xen/arch/x86/msi.c
|
|
+++ b/xen/arch/x86/msi.c
|
|
@@ -649,8 +649,8 @@ static u64 read_pci_mem_bar(u16 seg, u8
|
|
* @entries: pointer to an array of struct msix_entry entries
|
|
* @nvec: number of @entries
|
|
*
|
|
- * Setup the MSI-X capability structure of device function with a
|
|
- * single MSI-X irq. A return of zero indicates the successful setup of
|
|
+ * Setup the MSI-X capability structure of device function with the requested
|
|
+ * number MSI-X irqs. A return of zero indicates the successful setup of
|
|
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
|
|
**/
|
|
static int msix_capability_init(struct pci_dev *dev,
|
|
@@ -658,86 +658,69 @@ static int msix_capability_init(struct p
|
|
struct msi_desc **desc,
|
|
unsigned int nr_entries)
|
|
{
|
|
- struct msi_desc *entry;
|
|
- int pos;
|
|
+ struct msi_desc *entry = NULL;
|
|
+ int pos, vf;
|
|
u16 control;
|
|
- u64 table_paddr, entry_paddr;
|
|
- u32 table_offset, entry_offset;
|
|
- u8 bir;
|
|
- void __iomem *base;
|
|
- int idx;
|
|
+ u64 table_paddr;
|
|
+ u32 table_offset;
|
|
+ u8 bir, pbus, pslot, pfunc;
|
|
u16 seg = dev->seg;
|
|
u8 bus = dev->bus;
|
|
u8 slot = PCI_SLOT(dev->devfn);
|
|
u8 func = PCI_FUNC(dev->devfn);
|
|
|
|
ASSERT(spin_is_locked(&pcidevs_lock));
|
|
- ASSERT(desc);
|
|
|
|
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
|
|
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
|
|
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
|
|
|
|
- /* MSI-X Table Initialization */
|
|
- entry = alloc_msi_entry();
|
|
- if ( !entry )
|
|
- return -ENOMEM;
|
|
+ if ( desc )
|
|
+ {
|
|
+ entry = alloc_msi_entry();
|
|
+ if ( !entry )
|
|
+ return -ENOMEM;
|
|
+ ASSERT(msi);
|
|
+ }
|
|
|
|
- /* Request & Map MSI-X table region */
|
|
+ /* Locate MSI-X table region */
|
|
table_offset = pci_conf_read32(seg, bus, slot, func,
|
|
msix_table_offset_reg(pos));
|
|
bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
|
|
table_offset &= ~PCI_MSIX_BIRMASK;
|
|
- entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
|
|
|
|
- table_paddr = msi->table_base + table_offset;
|
|
- entry_paddr = table_paddr + entry_offset;
|
|
- idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
|
|
- if ( idx < 0 )
|
|
- {
|
|
- xfree(entry);
|
|
- return idx;
|
|
- }
|
|
- base = (void *)(fix_to_virt(idx) +
|
|
- ((unsigned long)entry_paddr & ((1UL << PAGE_SHIFT) - 1)));
|
|
-
|
|
- entry->msi_attrib.type = PCI_CAP_ID_MSIX;
|
|
- entry->msi_attrib.is_64 = 1;
|
|
- entry->msi_attrib.entry_nr = msi->entry_nr;
|
|
- entry->msi_attrib.maskbit = 1;
|
|
- entry->msi_attrib.masked = 1;
|
|
- entry->msi_attrib.pos = pos;
|
|
- entry->irq = msi->irq;
|
|
- entry->dev = dev;
|
|
- entry->mask_base = base;
|
|
-
|
|
- list_add_tail(&entry->list, &dev->msi_list);
|
|
-
|
|
- if ( !dev->msix_nr_entries )
|
|
+ if ( !dev->info.is_virtfn )
|
|
{
|
|
- u8 pbus, pslot, pfunc;
|
|
- int vf;
|
|
- u64 pba_paddr;
|
|
- u32 pba_offset;
|
|
+ pbus = bus;
|
|
+ pslot = slot;
|
|
+ pfunc = func;
|
|
+ vf = -1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ pbus = dev->info.physfn.bus;
|
|
+ pslot = PCI_SLOT(dev->info.physfn.devfn);
|
|
+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
|
|
+ vf = PCI_BDF2(dev->bus, dev->devfn);
|
|
+ }
|
|
|
|
- if ( !dev->info.is_virtfn )
|
|
- {
|
|
- pbus = bus;
|
|
- pslot = slot;
|
|
- pfunc = func;
|
|
- vf = -1;
|
|
- }
|
|
- else
|
|
+ table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
|
|
+ WARN_ON(msi && msi->table_base != table_paddr);
|
|
+ if ( !table_paddr )
|
|
+ {
|
|
+ if ( !msi || !msi->table_base )
|
|
{
|
|
- pbus = dev->info.physfn.bus;
|
|
- pslot = PCI_SLOT(dev->info.physfn.devfn);
|
|
- pfunc = PCI_FUNC(dev->info.physfn.devfn);
|
|
- vf = PCI_BDF2(dev->bus, dev->devfn);
|
|
+ xfree(entry);
|
|
+ return -ENXIO;
|
|
}
|
|
+ table_paddr = msi->table_base;
|
|
+ }
|
|
+ table_paddr += table_offset;
|
|
|
|
- ASSERT(!dev->msix_used_entries);
|
|
- WARN_ON(msi->table_base !=
|
|
- read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf));
|
|
+ if ( !dev->msix_used_entries )
|
|
+ {
|
|
+ u64 pba_paddr;
|
|
+ u32 pba_offset;
|
|
|
|
dev->msix_nr_entries = nr_entries;
|
|
dev->msix_table.first = PFN_DOWN(table_paddr);
|
|
@@ -758,7 +741,42 @@ static int msix_capability_init(struct p
|
|
BITS_TO_LONGS(nr_entries) - 1);
|
|
WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev->msix_pba.first,
|
|
dev->msix_pba.last));
|
|
+ }
|
|
+
|
|
+ if ( entry )
|
|
+ {
|
|
+ /* Map MSI-X table region */
|
|
+ u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
|
|
+ int idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
|
|
+ void __iomem *base;
|
|
+
|
|
+ if ( idx < 0 )
|
|
+ {
|
|
+ xfree(entry);
|
|
+ return idx;
|
|
+ }
|
|
+ base = (void *)(fix_to_virt(idx) +
|
|
+ ((unsigned long)entry_paddr & (PAGE_SIZE - 1)));
|
|
|
|
+ /* Mask interrupt here */
|
|
+ writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
|
+
|
|
+ entry->msi_attrib.type = PCI_CAP_ID_MSIX;
|
|
+ entry->msi_attrib.is_64 = 1;
|
|
+ entry->msi_attrib.entry_nr = msi->entry_nr;
|
|
+ entry->msi_attrib.maskbit = 1;
|
|
+ entry->msi_attrib.masked = 1;
|
|
+ entry->msi_attrib.pos = pos;
|
|
+ entry->irq = msi->irq;
|
|
+ entry->dev = dev;
|
|
+ entry->mask_base = base;
|
|
+
|
|
+ list_add_tail(&entry->list, &dev->msi_list);
|
|
+ *desc = entry;
|
|
+ }
|
|
+
|
|
+ if ( !dev->msix_used_entries )
|
|
+ {
|
|
if ( rangeset_add_range(mmio_ro_ranges, dev->msix_table.first,
|
|
dev->msix_table.last) )
|
|
WARN();
|
|
@@ -769,7 +787,7 @@ static int msix_capability_init(struct p
|
|
if ( dev->domain )
|
|
p2m_change_entry_type_global(dev->domain,
|
|
p2m_mmio_direct, p2m_mmio_direct);
|
|
- if ( !dev->domain || !paging_mode_translate(dev->domain) )
|
|
+ if ( desc && (!dev->domain || !paging_mode_translate(dev->domain)) )
|
|
{
|
|
struct domain *d = dev->domain;
|
|
|
|
@@ -783,6 +801,13 @@ static int msix_capability_init(struct p
|
|
break;
|
|
if ( d )
|
|
{
|
|
+ if ( !IS_PRIV(d) && dev->msix_warned != d->domain_id )
|
|
+ {
|
|
+ dev->msix_warned = d->domain_id;
|
|
+ printk(XENLOG_ERR
|
|
+ "Potentially insecure use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
|
|
+ seg, bus, slot, func, d->domain_id);
|
|
+ }
|
|
/* XXX How to deal with existing mappings? */
|
|
}
|
|
}
|
|
@@ -791,10 +816,6 @@ static int msix_capability_init(struct p
|
|
WARN_ON(dev->msix_table.first != (table_paddr >> PAGE_SHIFT));
|
|
++dev->msix_used_entries;
|
|
|
|
- /* Mask interrupt here */
|
|
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
|
-
|
|
- *desc = entry;
|
|
/* Restore MSI-X enabled bits */
|
|
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
|
|
|
|
@@ -919,6 +940,19 @@ static int __pci_enable_msix(struct msi_
|
|
return status;
|
|
}
|
|
|
|
+static void _pci_cleanup_msix(struct pci_dev *dev)
|
|
+{
|
|
+ if ( !--dev->msix_used_entries )
|
|
+ {
|
|
+ if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,
|
|
+ dev->msix_table.last) )
|
|
+ WARN();
|
|
+ if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,
|
|
+ dev->msix_pba.last) )
|
|
+ WARN();
|
|
+ }
|
|
+}
|
|
+
|
|
static void __pci_disable_msix(struct msi_desc *entry)
|
|
{
|
|
struct pci_dev *dev;
|
|
@@ -942,15 +976,45 @@ static void __pci_disable_msix(struct ms
|
|
|
|
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
|
|
|
|
- if ( !--dev->msix_used_entries )
|
|
+ _pci_cleanup_msix(dev);
|
|
+}
|
|
+
|
|
+int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off)
|
|
+{
|
|
+ int rc;
|
|
+ struct pci_dev *pdev;
|
|
+ u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
|
|
+ unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
|
|
+ PCI_CAP_ID_MSIX);
|
|
+
|
|
+ if ( !use_msi )
|
|
+ return 0;
|
|
+
|
|
+ if ( !pos )
|
|
+ return -ENODEV;
|
|
+
|
|
+ spin_lock(&pcidevs_lock);
|
|
+ pdev = pci_get_pdev(seg, bus, devfn);
|
|
+ if ( !pdev )
|
|
+ rc = -ENODEV;
|
|
+ else if ( pdev->msix_used_entries != !!off )
|
|
+ rc = -EBUSY;
|
|
+ else if ( off )
|
|
{
|
|
- if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,
|
|
- dev->msix_table.last) )
|
|
- WARN();
|
|
- if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,
|
|
- dev->msix_pba.last) )
|
|
- WARN();
|
|
+ _pci_cleanup_msix(pdev);
|
|
+ rc = 0;
|
|
}
|
|
+ else
|
|
+ {
|
|
+ u16 control = pci_conf_read16(seg, bus, slot, func,
|
|
+ msix_control_reg(pos));
|
|
+
|
|
+ rc = msix_capability_init(pdev, NULL, NULL,
|
|
+ multi_msix_capable(control));
|
|
+ }
|
|
+ spin_unlock(&pcidevs_lock);
|
|
+
|
|
+ return rc;
|
|
}
|
|
|
|
/*
|
|
--- a/xen/arch/x86/physdev.c
|
|
+++ b/xen/arch/x86/physdev.c
|
|
@@ -609,6 +609,18 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
|
|
break;
|
|
}
|
|
|
|
+ case PHYSDEVOP_prepare_msix:
|
|
+ case PHYSDEVOP_release_msix: {
|
|
+ struct physdev_pci_device dev;
|
|
+
|
|
+ if ( copy_from_guest(&dev, arg, 1) )
|
|
+ ret = -EFAULT;
|
|
+ else
|
|
+ ret = pci_prepare_msix(dev.seg, dev.bus, dev.devfn,
|
|
+ cmd != PHYSDEVOP_prepare_msix);
|
|
+ break;
|
|
+ }
|
|
+
|
|
#ifdef __x86_64__
|
|
case PHYSDEVOP_pci_mmcfg_reserved: {
|
|
struct physdev_pci_mmcfg_reserved info;
|
|
--- a/xen/include/asm-x86/msi.h
|
|
+++ b/xen/include/asm-x86/msi.h
|
|
@@ -80,6 +80,7 @@ struct msi_desc;
|
|
/* Helper functions */
|
|
extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
|
|
extern void pci_disable_msi(struct msi_desc *desc);
|
|
+extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
|
|
extern void pci_cleanup_msi(struct pci_dev *pdev);
|
|
extern void setup_msi_handler(struct irq_desc *, struct msi_desc *);
|
|
extern void setup_msi_irq(struct irq_desc *);
|
|
--- a/xen/include/public/physdev.h
|
|
+++ b/xen/include/public/physdev.h
|
|
@@ -303,6 +303,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi
|
|
|
|
#define PHYSDEVOP_pci_device_remove 26
|
|
#define PHYSDEVOP_restore_msi_ext 27
|
|
+/*
|
|
+ * Dom0 should use these two to announce MMIO resources assigned to
|
|
+ * MSI-X capable devices won't (prepare) or may (release) change.
|
|
+ */
|
|
+#define PHYSDEVOP_prepare_msix 30
|
|
+#define PHYSDEVOP_release_msix 31
|
|
struct physdev_pci_device {
|
|
/* IN */
|
|
uint16_t seg;
|
|
--- a/xen/include/xen/pci.h
|
|
+++ b/xen/include/xen/pci.h
|
|
@@ -57,6 +57,7 @@ struct pci_dev {
|
|
int msix_table_refcnt[MAX_MSIX_TABLE_PAGES];
|
|
int msix_table_idx[MAX_MSIX_TABLE_PAGES];
|
|
spinlock_t msix_table_lock;
|
|
+ domid_t msix_warned;
|
|
|
|
struct domain *domain;
|
|
const u16 seg;
|