- bnc#935634 - VUL-0: CVE-2015-3259: xen: XSA-137: xl command line

config handling stack overflow CVE-2015-3259-xsa137.patch - Upstream patches from Jan 558bfaa0-x86-traps-avoid-using-current-too-early.patch 5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch 559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch 559bdde5-pull-in-latest-linux-earlycpio.patch - Upstream patches from Jan pending review 552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch 552d0fe8-x86-mtrr-include-asm-atomic.h.patch 552d293b-x86-vMSI-X-honor-all-mask-requests.patch 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch 554c7aee-x86-provide-arch_fetch_and_add.patch 554c7b00-arm-provide-arch_fetch_and_add.patch 55534b0a-x86-provide-add_sized.patch 55534b25-arm-provide-add_sized.patch 5555a4f8-use-ticket-locks-for-spin-locks.patch 5555a5b9-x86-arm-remove-asm-spinlock-h.patch 5555a8ec-introduce-non-contiguous-allocation.patch 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch 557eb55f-gnttab-per-active-entry-locking.patch 557eb5b6-gnttab-introduce-maptrack-lock.patch 557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch 557ffab8-evtchn-factor-out-freeing-an-event-channel.patch 5582bf43-evtchn-simplify-port_is_valid.patch 5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch 5583d9c5-x86-MSI-X-cleanup.patch 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch 5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=369
2015-07-10 15:21:29 +00:00 · 2015-07-10 15:21:29 +00:00 · 763b78040d
commit 763b78040d
parent d9b8b1278d
43 changed files with 5205 additions and 1056 deletions
--- a/552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch
+++ b/552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch
@ -0,0 +1,41 @@
+# Commit 63dcef9fe5b880007075b5eb53f9950a826519ce
+# Date 2015-04-14 15:02:10 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/hvm: don't include asm/spinlock.h
+
+asm/spinlock.h should not be included directly.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- sle12sp1.orig/xen/arch/x86/hvm/hvm.c	2015-07-08 14:13:16.000000000 +0200
+++ sle12sp1/xen/arch/x86/hvm/hvm.c	2015-07-08 14:13:38.000000000 +0200
+@@ -52,7 +52,6 @@
+ #include <asm/xstate.h>
+ #include <asm/traps.h>
+ #include <asm/mc146818rtc.h>
+-#include <asm/spinlock.h>
+ #include <asm/mce.h>
+ #include <asm/hvm/hvm.h>
+ #include <asm/hvm/vpt.h>
+--- sle12sp1.orig/xen/arch/x86/hvm/svm/svm.c	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/arch/x86/hvm/svm/svm.c	2015-07-08 14:13:38.000000000 +0200
+@@ -41,7 +41,6 @@
+ #include <asm/msr.h>
+ #include <asm/i387.h>
+ #include <asm/iocap.h>
+-#include <asm/spinlock.h>
+ #include <asm/hvm/emulate.h>
+ #include <asm/hvm/hvm.h>
+ #include <asm/hvm/support.h>
+--- sle12sp1.orig/xen/arch/x86/hvm/vmx/vmx.c	2015-05-19 23:16:48.000000000 +0200
+++ sle12sp1/xen/arch/x86/hvm/vmx/vmx.c	2015-07-08 14:13:38.000000000 +0200
+@@ -35,7 +35,6 @@
+ #include <asm/types.h>
+ #include <asm/debugreg.h>
+ #include <asm/msr.h>
+-#include <asm/spinlock.h>
+ #include <asm/paging.h>
+ #include <asm/p2m.h>
+ #include <asm/mem_sharing.h>
--- a/552d0fe8-x86-mtrr-include-asm-atomic.h.patch
+++ b/552d0fe8-x86-mtrr-include-asm-atomic.h.patch
@ -0,0 +1,22 @@
+# Commit f70df9ec1ab72b6bbebad72d81109c1b214007e1
+# Date 2015-04-14 15:02:32 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/mtrr: include asm/atomic.h
+
+asm/atomic.h is needed but only included indirectly via
+asm/spinlock.h.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- sle12sp1.orig/xen/arch/x86/cpu/mtrr/main.c	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/arch/x86/cpu/mtrr/main.c	2015-07-08 14:13:42.000000000 +0200
+@@ -36,6 +36,7 @@
+ #include <xen/lib.h>
+ #include <xen/smp.h>
+ #include <xen/spinlock.h>
+#include <asm/atomic.h>
+ #include <asm/mtrr.h>
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
--- a/552d293b-x86-vMSI-X-honor-all-mask-requests.patch
+++ b/552d293b-x86-vMSI-X-honor-all-mask-requests.patch
@ -0,0 +1,44 @@
+# Commit 70a3cbb8c9cb17a61fa25c48ba3d7b44fd059c90
+# Date 2015-04-14 16:50:35 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/vMSI-X: honor all mask requests
+
+Commit 74fd0036de ("x86: properly handle MSI-X unmask operation from
+guests") didn't go far enough: it fixed an issue with unmasking, but
+left an issue with masking in place: Due to the (late) point in time
+when qemu requests the hypervisor to set up MSI-X interrupts (which is
+where the MMIO intercept gets put in place), the hypervisor doesn't
+see all guest writes, and hence shouldn't make assumptions on the state
+the virtual MSI-X resources are in. Bypassing the rest of the logic on
+a guest mask operation leads to
+
+[00:04.0] pci_msix_write: Error: Can't update msix entry 1 since MSI-X is already enabled.
+
+which surprisingly enough doesn't lead to the device not working
+anymore (I didn't dig in deep enough to figure out why that is). But it
+does prevent the IRQ to be migrated inside the guest, i.e. all
+interrupts will always arrive in vCPU 0.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- sle12sp1.orig/xen/arch/x86/hvm/vmsi.c	2015-07-08 11:22:13.000000000 +0200
+++ sle12sp1/xen/arch/x86/hvm/vmsi.c	2015-04-20 09:30:29.000000000 +0200
+@@ -286,11 +286,11 @@ static int msixtbl_write(struct vcpu *v,
+         goto out;
+     }
+ 
+-    /* exit to device model if address/data has been modified */
+-    if ( test_and_clear_bit(nr_entry, &entry->table_flags) )
+    /* Exit to device model when unmasking and address/data got modified. */
+    if ( !(val & PCI_MSIX_VECTOR_BITMASK) &&
+         test_and_clear_bit(nr_entry, &entry->table_flags) )
+     {
+-        if ( !(val & PCI_MSIX_VECTOR_BITMASK) )
+-            v->arch.hvm_vcpu.hvm_io.msix_unmask_address = address;
+        v->arch.hvm_vcpu.hvm_io.msix_unmask_address = address;
+         goto out;
+     }
+ 
--- a/552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch
+++ b/552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch
@ -0,0 +1,56 @@
+# Commit df9f5676b3711c95127d44e871ad7ca38d6ed28a
+# Date 2015-04-14 16:51:18 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/vMSI-X: add valid bits for read acceleration
+
+Again because Xen doesn't get to see all guest writes, it shouldn't
+serve reads from its cache before having seen a write to the respective
+address.
+
+Also use DECLARE_BITMAP() in a related field declaration instead of
+open coding it.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- sle12sp1.orig/xen/arch/x86/hvm/vmsi.c	2015-04-20 09:30:29.000000000 +0200
+++ sle12sp1/xen/arch/x86/hvm/vmsi.c	2015-04-20 09:32:57.000000000 +0200
+@@ -154,11 +154,14 @@ struct msixtbl_entry
+     struct pci_dev *pdev;
+     unsigned long gtable;       /* gpa of msix table */
+     unsigned long table_len;
+-    unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
+    DECLARE_BITMAP(table_flags, MAX_MSIX_TABLE_ENTRIES);
+ #define MAX_MSIX_ACC_ENTRIES 3
+     struct { 
+         uint32_t msi_ad[3];	/* Shadow of address low, high and data */
+     } gentries[MAX_MSIX_ACC_ENTRIES];
+    DECLARE_BITMAP(acc_valid, 3 * MAX_MSIX_ACC_ENTRIES);
+#define acc_bit(what, ent, slot, idx) \
+        what##_bit((slot) * 3 + (idx), (ent)->acc_valid)
+     struct rcu_head rcu;
+ };
+ 
+@@ -233,9 +236,10 @@ static int msixtbl_read(
+     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
+     {
+         nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
+-        if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
+-            goto out;
+         index = offset / sizeof(uint32_t);
+        if ( nr_entry >= MAX_MSIX_ACC_ENTRIES ||
+             !acc_bit(test, entry, nr_entry, index) )
+            goto out;
+         *pval = entry->gentries[nr_entry].msi_ad[index];
+     }
+     else 
+@@ -281,6 +285,7 @@ static int msixtbl_write(struct vcpu *v,
+         {
+             index = offset / sizeof(uint32_t);
+             entry->gentries[nr_entry].msi_ad[index] = val;
+            acc_bit(set, entry, nr_entry, index);
+         }
+         set_bit(nr_entry, &entry->table_flags);
+         goto out;
--- a/554c7aee-x86-provide-arch_fetch_and_add.patch
+++ b/554c7aee-x86-provide-arch_fetch_and_add.patch
@ -0,0 +1,68 @@
+# Commit 2bfc9fc52ce8485fa43e79bbdc32360c74e12fe8
+# Date 2015-05-08 10:59:26 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: provide arch_fetch_and_add()
+
+arch_fetch_and_add() atomically adds a value and returns the previous
+value.
+
+This is needed to implement ticket locks.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+--- sle12sp1.orig/xen/include/asm-x86/system.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/asm-x86/system.h	2015-07-08 12:35:11.000000000 +0200
+@@ -118,6 +118,52 @@ static always_inline unsigned long __cmp
+ })
+ 
+ /*
+ * Undefined symbol to cause link failure if a wrong size is used with
+ * arch_fetch_and_add().
+ */
+extern unsigned long __bad_fetch_and_add_size(void);
+
+static always_inline unsigned long __xadd(
+    volatile void *ptr, unsigned long v, int size)
+{
+    switch ( size )
+    {
+    case 1:
+        asm volatile ( "lock; xaddb %b0,%1"
+                       : "+r" (v), "+m" (*__xg(ptr))
+                       :: "memory");
+        return v;
+    case 2:
+        asm volatile ( "lock; xaddw %w0,%1"
+                       : "+r" (v), "+m" (*__xg(ptr))
+                       :: "memory");
+        return v;
+    case 4:
+        asm volatile ( "lock; xaddl %k0,%1"
+                       : "+r" (v), "+m" (*__xg(ptr))
+                       :: "memory");
+        return v;
+    case 8:
+        asm volatile ( "lock; xaddq %q0,%1"
+                       : "+r" (v), "+m" (*__xg(ptr))
+                       :: "memory");
+
+        return v;
+    default:
+        return __bad_fetch_and_add_size();
+    }
+}
+
+/*
+ * Atomically add @v to the 1, 2, 4, or 8 byte value at @ptr.  Returns
+ * the previous value.
+ *
+ * This is a full memory barrier.
+ */
+#define arch_fetch_and_add(ptr, v) \
+    ((typeof(*(ptr)))__xadd(ptr, (typeof(*(ptr)))(v), sizeof(*(ptr))))
+
+/*
+  * Both Intel and AMD agree that, from a programmer's viewpoint:
+  *  Loads cannot be reordered relative to other loads.
+  *  Stores cannot be reordered relative to other stores.
--- a/554c7b00-arm-provide-arch_fetch_and_add.patch
+++ b/554c7b00-arm-provide-arch_fetch_and_add.patch
@ -0,0 +1,29 @@
+# Commit f9cc3cd9b4de58cf032c8624406384c172937e57
+# Date 2015-05-08 10:59:44 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+arm: provide arch_fetch_and_add()
+
+arch_fetch_and_add() atomically adds a value and returns the previous
+value.
+
+This generic arm implementation uses the GCC __sync_fetch_and_add()
+builtin.  This builtin resulted in suitable inlined asm for GCC 4.8.3
+(arm64) and GCC 4.6.3 (arm32).
+
+This is needed to implement ticket locks.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- sle12sp1.orig/xen/include/asm-arm/system.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/asm-arm/system.h	2015-07-08 12:35:16.000000000 +0200
+@@ -51,6 +51,8 @@
+ # error "unknown ARM variant"
+ #endif
+ 
+#define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v)
+
+ extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next);
+ 
+ #endif
--- a/55534b0a-x86-provide-add_sized.patch
+++ b/55534b0a-x86-provide-add_sized.patch
@ -0,0 +1,65 @@
+# Commit 3c694aec08dda782d9c866e599b848dff86f474f
+# Date 2015-05-13 15:00:58 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: provide add_sized()
+
+add_sized(ptr, inc) adds inc to the value at ptr using only the correct
+size of loads and stores for the type of *ptr.  The add is /not/ atomic.
+
+This is needed for ticket locks to ensure the increment of the head ticket
+does not affect the tail ticket.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+--- sle12sp1.orig/xen/include/asm-x86/atomic.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/asm-x86/atomic.h	2015-07-08 12:35:20.000000000 +0200
+@@ -14,6 +14,14 @@ static inline void name(volatile type *a
+ { asm volatile("mov" size " %1,%0": "=m" (*(volatile type *)addr) \
+ :reg (val) barrier); }
+ 
+#define build_add_sized(name, size, type, reg) \
+    static inline void name(volatile type *addr, type val)              \
+    {                                                                   \
+        asm volatile("add" size " %1,%0"                                \
+                     : "=m" (*addr)                                     \
+                     : reg (val));                                      \
+    }
+
+ build_read_atomic(read_u8_atomic, "b", uint8_t, "=q", )
+ build_read_atomic(read_u16_atomic, "w", uint16_t, "=r", )
+ build_read_atomic(read_u32_atomic, "l", uint32_t, "=r", )
+@@ -25,8 +33,14 @@ build_write_atomic(write_u32_atomic, "l"
+ build_read_atomic(read_u64_atomic, "q", uint64_t, "=r", )
+ build_write_atomic(write_u64_atomic, "q", uint64_t, "r", )
+ 
+build_add_sized(add_u8_sized, "b", uint8_t, "qi")
+build_add_sized(add_u16_sized, "w", uint16_t, "ri")
+build_add_sized(add_u32_sized, "l", uint32_t, "ri")
+build_add_sized(add_u64_sized, "q", uint64_t, "ri")
+
+ #undef build_read_atomic
+ #undef build_write_atomic
+#undef build_add_sized
+ 
+ void __bad_atomic_size(void);
+ 
+@@ -54,6 +68,18 @@ void __bad_atomic_size(void);
+     __x;                                                                \
+ })
+ 
+#define add_sized(p, x) ({                                \
+    typeof(*(p)) x_ = (x);                                \
+    switch ( sizeof(*(p)) )                               \
+    {                                                     \
+    case 1: add_u8_sized((uint8_t *)(p), x_); break;      \
+    case 2: add_u16_sized((uint16_t *)(p), x_); break;    \
+    case 4: add_u32_sized((uint32_t *)(p), x_); break;    \
+    case 8: add_u64_sized((uint64_t *)(p), x_); break;    \
+    default: __bad_atomic_size(); break;                  \
+    }                                                     \
+})
+
+ /*
+  * NB. I've pushed the volatile qualifier into the operations. This allows
+  * fast accessors such as _atomic_read() and _atomic_set() which don't give
--- a/55534b25-arm-provide-add_sized.patch
+++ b/55534b25-arm-provide-add_sized.patch
@ -0,0 +1,64 @@
+# Commit 890674d13feb4a270aa112ca452dcf62fdd53f34
+# Date 2015-05-13 15:01:25 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+arm: provide add_sized()
+
+add_sized(ptr, inc) adds inc to the value at ptr using only the correct
+size of loads and stores for the type of *ptr.  The add is /not/ atomic.
+
+This is needed for ticket locks to ensure the increment of the head ticket
+does not affect the tail ticket.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- sle12sp1.orig/xen/include/asm-arm/atomic.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/asm-arm/atomic.h	2015-07-08 12:35:55.000000000 +0200
+@@ -23,6 +23,17 @@ static inline void name(volatile type *a
+                  : reg (val));                         \
+ }
+ 
+#define build_add_sized(name, size, width, type, reg) \
+static inline void name(volatile type *addr, type val)                  \
+{                                                                       \
+    type t;                                                             \
+    asm volatile("ldr" size " %"width"1,%0\n"                           \
+                 "add %"width"1,%"width"1,%"width"2\n"                  \
+                 "str" size " %"width"1,%0"                             \
+                 : "=m" (*(volatile type *)addr), "=r" (t)              \
+                 : reg (val));                                          \
+}
+
+ #if defined (CONFIG_ARM_32)
+ #define BYTE ""
+ #define WORD ""
+@@ -46,6 +57,10 @@ build_atomic_read(read_u64_atomic, "x", 
+ build_atomic_write(write_u64_atomic, "x", uint64_t, "r")
+ #endif
+ 
+build_add_sized(add_u8_sized, "b", BYTE, uint8_t, "ri")
+build_add_sized(add_u16_sized, "h", WORD, uint16_t, "ri")
+build_add_sized(add_u32_sized, "", WORD, uint32_t, "ri")
+
+ void __bad_atomic_size(void);
+ 
+ #define read_atomic(p) ({                                               \
+@@ -70,6 +85,17 @@ void __bad_atomic_size(void);
+     __x;                                                                \
+ })
+ 
+#define add_sized(p, x) ({                                              \
+    typeof(*(p)) __x = (x);                                             \
+    switch ( sizeof(*(p)) )                                             \
+    {                                                                   \
+    case 1: add_u8_sized((uint8_t *)(p), __x); break;                   \
+    case 2: add_u16_sized((uint16_t *)(p), __x); break;                 \
+    case 4: add_u32_sized((uint32_t *)(p), __x); break;                 \
+    default: __bad_atomic_size(); break;                                \
+    }                                                                   \
+})
+    
+ /*
+  * NB. I've pushed the volatile qualifier into the operations. This allows
+  * fast accessors such as _atomic_read() and _atomic_set() which don't give
--- a/5555a4f8-use-ticket-locks-for-spin-locks.patch
+++ b/5555a4f8-use-ticket-locks-for-spin-locks.patch
@ -0,0 +1,305 @@
+# Commit 45fcc4568c5162b00fb3907fb158af82dd484a3d
+# Date 2015-05-15 09:49:12 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+use ticket locks for spin locks
+
+Replace the byte locks with ticket locks.  Ticket locks are: a) fair;
+and b) peform better when contented since they spin without an atomic
+operation.
+
+The lock is split into two ticket values: head and tail.  A locker
+acquires a ticket by (atomically) increasing tail and using the
+previous tail value.  A CPU holds the lock if its ticket == head.  The
+lock is released by increasing head.
+
+spin_lock_irq() and spin_lock_irqsave() now spin with irqs disabled
+(previously, they would spin with irqs enabled if possible).  This is
+required to prevent deadlocks when the irq handler tries to take the
+same lock with a higher ticket.
+
+Architectures need only provide arch_fetch_and_add() and two barriers:
+arch_lock_acquire_barrier() and arch_lock_release_barrier().
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/common/spinlock.c	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/common/spinlock.c	2015-07-08 12:37:59.000000000 +0200
+@@ -115,125 +115,134 @@ void spin_debug_disable(void)
+ 
+ #endif
+ 
+static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
+{
+    spinlock_tickets_t v;
+
+    smp_rmb();
+    v.head_tail = read_atomic(&t->head_tail);
+    return v;
+}
+
+static always_inline u16 observe_head(spinlock_tickets_t *t)
+{
+    smp_rmb();
+    return read_atomic(&t->head);
+}
+
+ void _spin_lock(spinlock_t *lock)
+ {
+    spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
+     LOCK_PROFILE_VAR;
+ 
+     check_lock(&lock->debug);
+-    while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
+    tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
+                                           tickets.head_tail);
+    while ( tickets.tail != observe_head(&lock->tickets) )
+     {
+         LOCK_PROFILE_BLOCK;
+-        while ( likely(_raw_spin_is_locked(&lock->raw)) )
+-            cpu_relax();
+        cpu_relax();
+     }
+     LOCK_PROFILE_GOT;
+     preempt_disable();
+    arch_lock_acquire_barrier();
+ }
+ 
+ void _spin_lock_irq(spinlock_t *lock)
+ {
+-    LOCK_PROFILE_VAR;
+-
+     ASSERT(local_irq_is_enabled());
+     local_irq_disable();
+-    check_lock(&lock->debug);
+-    while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
+-    {
+-        LOCK_PROFILE_BLOCK;
+-        local_irq_enable();
+-        while ( likely(_raw_spin_is_locked(&lock->raw)) )
+-            cpu_relax();
+-        local_irq_disable();
+-    }
+-    LOCK_PROFILE_GOT;
+-    preempt_disable();
+    _spin_lock(lock);
+ }
+ 
+ unsigned long _spin_lock_irqsave(spinlock_t *lock)
+ {
+     unsigned long flags;
+-    LOCK_PROFILE_VAR;
+ 
+     local_irq_save(flags);
+-    check_lock(&lock->debug);
+-    while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
+-    {
+-        LOCK_PROFILE_BLOCK;
+-        local_irq_restore(flags);
+-        while ( likely(_raw_spin_is_locked(&lock->raw)) )
+-            cpu_relax();
+-        local_irq_save(flags);
+-    }
+-    LOCK_PROFILE_GOT;
+-    preempt_disable();
+    _spin_lock(lock);
+     return flags;
+ }
+ 
+ void _spin_unlock(spinlock_t *lock)
+ {
+    arch_lock_release_barrier();
+     preempt_enable();
+     LOCK_PROFILE_REL;
+-    _raw_spin_unlock(&lock->raw);
+    add_sized(&lock->tickets.head, 1);
+ }
+ 
+ void _spin_unlock_irq(spinlock_t *lock)
+ {
+-    preempt_enable();
+-    LOCK_PROFILE_REL;
+-    _raw_spin_unlock(&lock->raw);
+    _spin_unlock(lock);
+     local_irq_enable();
+ }
+ 
+ void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+ {
+-    preempt_enable();
+-    LOCK_PROFILE_REL;
+-    _raw_spin_unlock(&lock->raw);
+    _spin_unlock(lock);
+     local_irq_restore(flags);
+ }
+ 
+ int _spin_is_locked(spinlock_t *lock)
+ {
+     check_lock(&lock->debug);
+-    return _raw_spin_is_locked(&lock->raw);
+    return lock->tickets.head != lock->tickets.tail;
+ }
+ 
+ int _spin_trylock(spinlock_t *lock)
+ {
+    spinlock_tickets_t old, new;
+
+     check_lock(&lock->debug);
+-    if ( !_raw_spin_trylock(&lock->raw) )
+    old = observe_lock(&lock->tickets);
+    if ( old.head != old.tail )
+        return 0;
+    new = old;
+    new.tail++;
+    if ( cmpxchg(&lock->tickets.head_tail,
+                 old.head_tail, new.head_tail) != old.head_tail )
+         return 0;
+ #ifdef LOCK_PROFILE
+     if (lock->profile)
+         lock->profile->time_locked = NOW();
+ #endif
+     preempt_disable();
+    /*
+     * cmpxchg() is a full barrier so no need for an
+     * arch_lock_acquire_barrier().
+     */
+     return 1;
+ }
+ 
+ void _spin_barrier(spinlock_t *lock)
+ {
+    spinlock_tickets_t sample;
+ #ifdef LOCK_PROFILE
+     s_time_t block = NOW();
+-    u64      loop = 0;
+#endif
+ 
+     check_barrier(&lock->debug);
+-    do { smp_mb(); loop++;} while ( _raw_spin_is_locked(&lock->raw) );
+-    if ((loop > 1) && lock->profile)
+    smp_mb();
+    sample = observe_lock(&lock->tickets);
+    if ( sample.head != sample.tail )
+     {
+-        lock->profile->time_block += NOW() - block;
+-        lock->profile->block_cnt++;
+-    }
+-#else
+-    check_barrier(&lock->debug);
+-    do { smp_mb(); } while ( _raw_spin_is_locked(&lock->raw) );
+        while ( observe_head(&lock->tickets) == sample.head )
+            cpu_relax();
+#ifdef LOCK_PROFILE
+        if ( lock->profile )
+        {
+            lock->profile->time_block += NOW() - block;
+            lock->profile->block_cnt++;
+        }
+ #endif
+    }
+     smp_mb();
+ }
+ 
+ int _spin_trylock_recursive(spinlock_t *lock)
+ {
+-    int cpu = smp_processor_id();
+    unsigned int cpu = smp_processor_id();
+ 
+     /* Don't allow overflow of recurse_cpu field. */
+     BUILD_BUG_ON(NR_CPUS > 0xfffu);
+@@ -256,8 +265,17 @@ int _spin_trylock_recursive(spinlock_t *
+ 
+ void _spin_lock_recursive(spinlock_t *lock)
+ {
+-    while ( !spin_trylock_recursive(lock) )
+-        cpu_relax();
+    unsigned int cpu = smp_processor_id();
+
+    if ( likely(lock->recurse_cpu != cpu) )
+    {
+        _spin_lock(lock);
+        lock->recurse_cpu = cpu;
+    }
+
+    /* We support only fairly shallow recursion, else the counter overflows. */
+    ASSERT(lock->recurse_cnt < 0xfu);
+    lock->recurse_cnt++;
+ }
+ 
+ void _spin_unlock_recursive(spinlock_t *lock)
+--- sle12sp1.orig/xen/include/asm-arm/system.h	2015-07-08 12:35:16.000000000 +0200
+++ sle12sp1/xen/include/asm-arm/system.h	2015-07-08 12:37:59.000000000 +0200
+@@ -53,6 +53,9 @@
+ 
+ #define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v)
+ 
+#define arch_lock_acquire_barrier() smp_mb()
+#define arch_lock_release_barrier() smp_mb()
+
+ extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next);
+ 
+ #endif
+--- sle12sp1.orig/xen/include/asm-x86/system.h	2015-07-08 12:35:11.000000000 +0200
+++ sle12sp1/xen/include/asm-x86/system.h	2015-07-08 12:37:59.000000000 +0200
+@@ -185,6 +185,17 @@ static always_inline unsigned long __xad
+ #define set_mb(var, value) do { xchg(&var, value); } while (0)
+ #define set_wmb(var, value) do { var = value; wmb(); } while (0)
+ 
+/*
+ * On x86 the only reordering is of reads with older writes.  In the
+ * lock case, the read in observe_head() can only be reordered with
+ * writes that precede it, and moving a write _into_ a locked section
+ * is OK.  In the release case, the write in add_sized() can only be
+ * reordered with reads that follow it, and hoisting a read _into_ a
+ * locked region is OK.
+ */
+#define arch_lock_acquire_barrier() barrier()
+#define arch_lock_release_barrier() barrier()
+
+ #define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
+ #define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
+ 
+--- sle12sp1.orig/xen/include/xen/spinlock.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/xen/spinlock.h	2015-07-08 12:37:59.000000000 +0200
+@@ -80,8 +80,7 @@ struct lock_profile_qhead {
+     static struct lock_profile *__lock_profile_##name                         \
+     __used_section(".lockprofile.data") =                                     \
+     &__lock_profile_data_##name
+-#define _SPIN_LOCK_UNLOCKED(x) { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0,          \
+-                                 _LOCK_DEBUG, x }
+#define _SPIN_LOCK_UNLOCKED(x) { { 0 }, 0xfffu, 0, _LOCK_DEBUG, x }
+ #define SPIN_LOCK_UNLOCKED _SPIN_LOCK_UNLOCKED(NULL)
+ #define DEFINE_SPINLOCK(l)                                                    \
+     spinlock_t l = _SPIN_LOCK_UNLOCKED(NULL);                                 \
+@@ -117,8 +116,7 @@ extern void spinlock_profile_reset(unsig
+ 
+ struct lock_profile_qhead { };
+ 
+-#define SPIN_LOCK_UNLOCKED                                                    \
+-    { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, _LOCK_DEBUG }
+#define SPIN_LOCK_UNLOCKED { { 0 }, 0xfffu, 0, _LOCK_DEBUG }
+ #define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
+ 
+ #define spin_lock_init_prof(s, l) spin_lock_init(&((s)->l))
+@@ -127,8 +125,18 @@ struct lock_profile_qhead { };
+ 
+ #endif
+ 
+typedef union {
+    u32 head_tail;
+    struct {
+        u16 head;
+        u16 tail;
+    };
+} spinlock_tickets_t;
+
+#define SPINLOCK_TICKET_INC { .head_tail = 0x10000, }
+
+ typedef struct spinlock {
+-    raw_spinlock_t raw;
+    spinlock_tickets_t tickets;
+     u16 recurse_cpu:12;
+     u16 recurse_cnt:4;
+     struct lock_debug debug;
--- a/5555a5b9-x86-arm-remove-asm-spinlock-h.patch
+++ b/5555a5b9-x86-arm-remove-asm-spinlock-h.patch
@ -0,0 +1,266 @@
+# Commit e62e49e6d5d4e8d22f3df0b75443ede65a812435
+# Date 2015-05-15 09:52:25 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86,arm: remove asm/spinlock.h from all architectures
+
+Now that all architecture use a common ticket lock implementation for
+spinlocks, remove the architecture specific byte lock implementations.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- sle12sp1.orig/xen/arch/arm/README.LinuxPrimitives	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/arch/arm/README.LinuxPrimitives	2015-07-08 12:41:16.000000000 +0200
+@@ -25,16 +25,6 @@ linux/arch/arm64/include/asm/atomic.h   
+ 
+ ---------------------------------------------------------------------
+ 
+-spinlocks: last sync @ v3.16-rc6 (last commit: 95c4189689f9)
+-
+-linux/arch/arm64/include/asm/spinlock.h xen/include/asm-arm/arm64/spinlock.h
+-
+-Skipped:
+-  5686b06 arm64: lockref: add support for lockless lockrefs using cmpxchg
+-  52ea2a5 arm64: locks: introduce ticket-based spinlock implementation
+-
+----------------------------------------------------------------------
+-
+ mem*: last sync @ v3.16-rc6 (last commit: d875c9b37240)
+ 
+ linux/arch/arm64/lib/memchr.S           xen/arch/arm/arm64/lib/memchr.S
+@@ -103,24 +93,6 @@ linux/arch/arm/include/asm/atomic.h     
+ 
+ ---------------------------------------------------------------------
+ 
+-spinlocks: last sync: 15e7e5c1ebf5
+-
+-linux/arch/arm/include/asm/spinlock.h   xen/include/asm-arm/arm32/spinlock.h
+-
+-*** Linux has switched to ticket locks but we still use bitlocks.
+-
+-resync to v3.14-rc7:
+-
+-  7c8746a ARM: 7955/1: spinlock: ensure we have a compiler barrier before sev
+-  0cbad9c ARM: 7854/1: lockref: add support for lockless lockrefs using cmpxchg64
+-  9bb17be ARM: locks: prefetch the destination word for write prior to strex
+-  27a8479 ARM: smp_on_up: move inline asm ALT_SMP patching macro out of spinlock.
+-  00efaa0 ARM: 7812/1: rwlocks: retry trylock operation if strex fails on free lo
+-  afa31d8 ARM: 7811/1: locks: use early clobber in arch_spin_trylock
+-  73a6fdc ARM: spinlock: use inner-shareable dsb variant prior to sev instruction
+-
+----------------------------------------------------------------------
+-
+ mem*: last sync @ v3.16-rc6 (last commit: d98b90ea22b0)
+ 
+ linux/arch/arm/lib/copy_template.S      xen/arch/arm/arm32/lib/copy_template.S
+--- sle12sp1.orig/xen/include/asm-arm/arm32/spinlock.h	2015-01-14 18:44:18.000000000 +0100
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
+@@ -1,66 +0,0 @@
+-#ifndef __ASM_ARM32_SPINLOCK_H
+-#define __ASM_ARM32_SPINLOCK_H
+-
+-static inline void dsb_sev(void)
+-{
+-    __asm__ __volatile__ (
+-        "dsb\n"
+-        "sev\n"
+-        );
+-}
+-
+-typedef struct {
+-    volatile unsigned int lock;
+-} raw_spinlock_t;
+-
+-#define _RAW_SPIN_LOCK_UNLOCKED { 0 }
+-
+-#define _raw_spin_is_locked(x)          ((x)->lock != 0)
+-
+-static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
+-{
+-    ASSERT(_raw_spin_is_locked(lock));
+-
+-    smp_mb();
+-
+-    __asm__ __volatile__(
+-"   str     %1, [%0]\n"
+-    :
+-    : "r" (&lock->lock), "r" (0)
+-    : "cc");
+-
+-    dsb_sev();
+-}
+-
+-static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
+-{
+-    unsigned long contended, res;
+-
+-    do {
+-        __asm__ __volatile__(
+-    "   ldrex   %0, [%2]\n"
+-    "   teq     %0, #0\n"
+-    "   strexeq %1, %3, [%2]\n"
+-    "   movne   %1, #0\n"
+-        : "=&r" (contended), "=r" (res)
+-        : "r" (&lock->lock), "r" (1)
+-        : "cc");
+-    } while (res);
+-
+-    if (!contended) {
+-        smp_mb();
+-        return 1;
+-    } else {
+-        return 0;
+-    }
+-}
+-
+-#endif /* __ASM_SPINLOCK_H */
+-/*
+- * Local variables:
+- * mode: C
+- * c-file-style: "BSD"
+- * c-basic-offset: 4
+- * indent-tabs-mode: nil
+- * End:
+- */
+--- sle12sp1.orig/xen/include/asm-arm/arm64/spinlock.h	2015-01-14 18:44:18.000000000 +0100
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
+@@ -1,63 +0,0 @@
+-/*
+- * Derived from Linux arch64 spinlock.h which is:
+- * Copyright (C) 2012 ARM Ltd.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License version 2 as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- * GNU General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+- */
+-
+-#ifndef __ASM_ARM64_SPINLOCK_H
+-#define __ASM_ARM64_SPINLOCK_H
+-
+-typedef struct {
+-    volatile unsigned int lock;
+-} raw_spinlock_t;
+-
+-#define _RAW_SPIN_LOCK_UNLOCKED { 0 }
+-
+-#define _raw_spin_is_locked(x)          ((x)->lock != 0)
+-
+-static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
+-{
+-    ASSERT(_raw_spin_is_locked(lock));
+-
+-    asm volatile(
+-        "       stlr    %w1, %0\n"
+-        : "=Q" (lock->lock) : "r" (0) : "memory");
+-}
+-
+-static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
+-{
+-    unsigned int tmp;
+-
+-    asm volatile(
+-        "2:     ldaxr   %w0, %1\n"
+-        "       cbnz    %w0, 1f\n"
+-        "       stxr    %w0, %w2, %1\n"
+-        "       cbnz    %w0, 2b\n"
+-        "1:\n"
+-        : "=&r" (tmp), "+Q" (lock->lock)
+-        : "r" (1)
+-        : "cc", "memory");
+-
+-    return !tmp;
+-}
+-
+-#endif /* __ASM_SPINLOCK_H */
+-/*
+- * Local variables:
+- * mode: C
+- * c-file-style: "BSD"
+- * c-basic-offset: 4
+- * indent-tabs-mode: nil
+- * End:
+- */
+--- sle12sp1.orig/xen/include/asm-arm/spinlock.h	2013-07-09 20:57:12.000000000 +0200
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
+@@ -1,23 +0,0 @@
+-#ifndef __ASM_SPINLOCK_H
+-#define __ASM_SPINLOCK_H
+-
+-#include <xen/config.h>
+-#include <xen/lib.h>
+-
+-#if defined(CONFIG_ARM_32)
+-# include <asm/arm32/spinlock.h>
+-#elif defined(CONFIG_ARM_64)
+-# include <asm/arm64/spinlock.h>
+-#else
+-# error "unknown ARM variant"
+-#endif
+-
+-#endif /* __ASM_SPINLOCK_H */
+-/*
+- * Local variables:
+- * mode: C
+- * c-file-style: "BSD"
+- * c-basic-offset: 4
+- * indent-tabs-mode: nil
+- * End:
+- */
+--- sle12sp1.orig/xen/include/asm-x86/spinlock.h	2015-01-14 18:44:18.000000000 +0100
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
+@@ -1,34 +0,0 @@
+-#ifndef __ASM_SPINLOCK_H
+-#define __ASM_SPINLOCK_H
+-
+-#include <xen/config.h>
+-#include <xen/lib.h>
+-#include <asm/atomic.h>
+-
+-typedef struct {
+-    volatile s16 lock;
+-} raw_spinlock_t;
+-
+-#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 1 }
+-
+-#define _raw_spin_is_locked(x) ((x)->lock <= 0)
+-
+-static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
+-{
+-    ASSERT(_raw_spin_is_locked(lock));
+-    asm volatile (
+-        "movw $1,%0" 
+-        : "=m" (lock->lock) : : "memory" );
+-}
+-
+-static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
+-{
+-    s16 oldval;
+-    asm volatile (
+-        "xchgw %w0,%1"
+-        :"=r" (oldval), "=m" (lock->lock)
+-        :"0" ((s16)0) : "memory" );
+-    return (oldval > 0);
+-}
+-
+-#endif /* __ASM_SPINLOCK_H */
+--- sle12sp1.orig/xen/include/xen/spinlock.h	2015-07-08 12:37:59.000000000 +0200
+++ sle12sp1/xen/include/xen/spinlock.h	2015-07-08 12:41:16.000000000 +0200
+@@ -2,7 +2,6 @@
+ #define __SPINLOCK_H__
+ 
+ #include <asm/system.h>
+-#include <asm/spinlock.h>
+ 
+ #ifndef NDEBUG
+ struct lock_debug {
--- a/5555a8ec-introduce-non-contiguous-allocation.patch
+++ b/5555a8ec-introduce-non-contiguous-allocation.patch
@ -0,0 +1,141 @@
+# Commit f278fcf19ce15f7b7ee69181560b5884a5e12b66
+# Date 2015-05-15 10:06:04 +0200
+# Author Roger Pau Monné <roger.pau@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+introduce a helper to allocate non-contiguous memory
+
+The allocator uses independent calls to alloc_domheap_pages in order to get
+the desired amount of memory and then maps all the independent physical
+addresses into a contiguous virtual address space.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Tested-by: Julien Grall <julien.grall@citrix.com> (ARM)
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+# Commit 640f891eb258563bb155e577389e8c5e6541a59a
+# Date 2015-05-21 08:57:19 +0200
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+vmap: avoid hitting an ASSERT with vfree(NULL)
+
+and unconditionally defer the vm_size() call, as it doesn't have a NULL
+short circuit.
+
+Reported-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Wei Liu <wei.liu2@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Tim Deegan <tim@xen.org>
+
+--- sle12sp1.orig/xen/common/vmap.c	2013-10-31 22:33:32.000000000 +0100
+++ sle12sp1/xen/common/vmap.c	2015-07-08 14:18:50.000000000 +0200
+@@ -215,4 +215,75 @@ void vunmap(const void *va)
+ #endif
+     vm_free(va);
+ }
+
+void *vmalloc(size_t size)
+{
+    unsigned long *mfn;
+    size_t pages, i;
+    struct page_info *pg;
+    void *va;
+
+    ASSERT(size);
+
+    pages = PFN_UP(size);
+    mfn = xmalloc_array(unsigned long, pages);
+    if ( mfn == NULL )
+        return NULL;
+
+    for ( i = 0; i < pages; i++ )
+    {
+        pg = alloc_domheap_page(NULL, 0);
+        if ( pg == NULL )
+            goto error;
+        mfn[i] = page_to_mfn(pg);
+    }
+
+    va = vmap(mfn, pages);
+    if ( va == NULL )
+        goto error;
+
+    xfree(mfn);
+    return va;
+
+ error:
+    while ( i-- )
+         free_domheap_page(mfn_to_page(mfn[i]));
+    xfree(mfn);
+    return NULL;
+}
+
+void *vzalloc(size_t size)
+{
+    void *p = vmalloc(size);
+    int i;
+
+    if ( p == NULL )
+        return NULL;
+
+    for ( i = 0; i < size; i += PAGE_SIZE )
+        clear_page(p + i);
+
+    return p;
+}
+
+void vfree(void *va)
+{
+    unsigned int i, pages;
+    struct page_info *pg;
+    PAGE_LIST_HEAD(pg_list);
+
+    if ( !va )
+        return;
+
+    pages = vm_size(va);
+    ASSERT(pages);
+
+    for ( i = 0; i < pages; i++ )
+        page_list_add(vmap_to_page(va + i * PAGE_SIZE), &pg_list);
+
+    vunmap(va);
+
+    while ( (pg = page_list_remove_head(&pg_list)) != NULL )
+        free_domheap_page(pg);
+}
+ #endif
+--- sle12sp1.orig/xen/include/asm-arm/mm.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/asm-arm/mm.h	2015-07-08 14:18:50.000000000 +0200
+@@ -208,6 +208,8 @@ static inline void __iomem *ioremap_wc(p
+ #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
+ #define paddr_to_pfn(pa)  ((unsigned long)((pa) >> PAGE_SHIFT))
+ #define paddr_to_pdx(pa)    pfn_to_pdx(paddr_to_pfn(pa))
+#define vmap_to_mfn(va)     paddr_to_pfn(virt_to_maddr((vaddr_t)va))
+#define vmap_to_page(va)    mfn_to_page(vmap_to_mfn(va))
+ 
+ /* Page-align address and convert to frame number format */
+ #define paddr_to_pfn_aligned(paddr)    paddr_to_pfn(PAGE_ALIGN(paddr))
+--- sle12sp1.orig/xen/include/asm-x86/page.h	2015-06-03 16:55:05.000000000 +0200
+++ sle12sp1/xen/include/asm-x86/page.h	2015-07-08 14:18:50.000000000 +0200
+@@ -262,6 +262,8 @@ void copy_page_sse2(void *, const void *
+ #define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
+ #define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
+ #define paddr_to_pdx(pa)    pfn_to_pdx(paddr_to_pfn(pa))
+#define vmap_to_mfn(va)     l1e_get_pfn(*virt_to_xen_l1e((unsigned long)(va)))
+#define vmap_to_page(va)    mfn_to_page(vmap_to_mfn(va))
+ 
+ #endif /* !defined(__ASSEMBLY__) */
+ 
+--- sle12sp1.orig/xen/include/xen/vmap.h	2013-07-09 20:57:12.000000000 +0200
+++ sle12sp1/xen/include/xen/vmap.h	2015-07-08 14:18:50.000000000 +0200
+@@ -11,6 +11,9 @@ void *__vmap(const unsigned long *mfn, u
+              unsigned int nr, unsigned int align, unsigned int flags);
+ void *vmap(const unsigned long *mfn, unsigned int nr);
+ void vunmap(const void *);
+void *vmalloc(size_t size);
+void *vzalloc(size_t size);
+void vfree(void *va);
+ 
+ void __iomem *ioremap(paddr_t, size_t);
+ 
--- a/55795a52-x86-vMSI-X-support-qword-MMIO-access.patch
+++ b/55795a52-x86-vMSI-X-support-qword-MMIO-access.patch
@ -0,0 +1,97 @@
+# Commit 284ffb4f9b0d5c3a33c4c5bd87645d0cc342ca96
+# Date 2015-06-11 11:52:18 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/vMSI-X: support qword MMIO access
+
+The specification explicitly provides for this, so we should have
+supported this from the beginning.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
+@@ -223,7 +223,7 @@ static int msixtbl_read(
+     unsigned int nr_entry, index;
+     int r = X86EMUL_UNHANDLEABLE;
+ 
+-    if ( len != 4 || (address & 3) )
+    if ( (len != 4 && len != 8) || (address & (len - 1)) )
+         return r;
+ 
+     rcu_read_lock(&msixtbl_rcu_lock);
+@@ -241,13 +241,25 @@ static int msixtbl_read(
+              !acc_bit(test, entry, nr_entry, index) )
+             goto out;
+         *pval = entry->gentries[nr_entry].msi_ad[index];
+        if ( len == 8 )
+        {
+            if ( index )
+                offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+            else if ( acc_bit(test, entry, nr_entry, 1) )
+                *pval |= (u64)entry->gentries[nr_entry].msi_ad[1] << 32;
+            else
+                goto out;
+        }
+     }
+-    else 
+    if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
+     {
+         virt = msixtbl_addr_to_virt(entry, address);
+         if ( !virt )
+             goto out;
+-        *pval = readl(virt);
+        if ( len == 4 )
+            *pval = readl(virt);
+        else
+            *pval |= (u64)readl(virt) << 32;
+     }
+     
+     r = X86EMUL_OKAY;
+@@ -268,7 +280,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long address,
+     unsigned long flags, orig;
+     struct irq_desc *desc;
+ 
+-    if ( len != 4 || (address & 3) )
+    if ( (len != 4 && len != 8) || (address & (len - 1)) )
+         return r;
+ 
+     rcu_read_lock(&msixtbl_rcu_lock);
+@@ -279,16 +291,23 @@ static int msixtbl_write(struct vcpu *v, unsigned long address,
+     nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
+ 
+     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+-    if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+    if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
+     {
+        index = offset / sizeof(uint32_t);
+         if ( nr_entry < MAX_MSIX_ACC_ENTRIES ) 
+         {
+-            index = offset / sizeof(uint32_t);
+             entry->gentries[nr_entry].msi_ad[index] = val;
+             acc_bit(set, entry, nr_entry, index);
+            if ( len == 8 && !index )
+            {
+                entry->gentries[nr_entry].msi_ad[1] = val >> 32;
+                acc_bit(set, entry, nr_entry, 1);
+            }
+         }
+         set_bit(nr_entry, &entry->table_flags);
+-        goto out;
+        if ( len != 8 || !index )
+            goto out;
+        val >>= 32;
+     }
+ 
+     /* Exit to device model when unmasking and address/data got modified. */
+@@ -352,7 +371,8 @@ static int msixtbl_write(struct vcpu *v, unsigned long address,
+ 
+ unlock:
+     spin_unlock_irqrestore(&desc->lock, flags);
+-    r = X86EMUL_OKAY;
+    if ( len == 4 )
+        r = X86EMUL_OKAY;
+ 
+ out:
+     rcu_read_unlock(&msixtbl_rcu_lock);
--- a/557eb55f-gnttab-per-active-entry-locking.patch
+++ b/557eb55f-gnttab-per-active-entry-locking.patch
@ -0,0 +1,551 @@
+# Commit b4650e9a96d78b87ccf7deb4f74733ccfcc64db5
+# Date 2015-06-15 13:22:07 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+gnttab: per-active entry locking
+
+Introduce a per-active entry spin lock to protect active entry state
+The grant table lock must be locked before acquiring (locking) an
+active entry.
+
+This is a step in reducing contention on the grant table lock, but
+will only do so once the grant table lock is turned into a read-write
+lock.
+
+Based on a patch originally by Matt Wilson <msw@amazon.com>.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/docs/misc/grant-tables.txt	2008-10-14 19:44:06.000000000 +0200
+++ sle12sp1/docs/misc/grant-tables.txt	2015-07-08 13:49:42.000000000 +0200
+@@ -63,6 +63,7 @@ is complete.
+   act->domid : remote domain being granted rights
+   act->frame : machine frame being granted
+   act->pin   : used to hold reference counts
+  act->lock  : spinlock used to serialize access to active entry state
+ 
+  Map tracking
+  ~~~~~~~~~~~~
+@@ -74,7 +75,46 @@ is complete.
+  matching map track entry is then removed, as if unmap had been invoked.
+  These are not used by the transfer mechanism.
+   map->domid         : owner of the mapped frame
+-  map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+  map->ref           : grant reference
+  map->flags         : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+  grant_table->lock          : lock used to prevent readers from accessing
+                               inconsistent grant table state such as current
+                               version, partially initialized active table pages,
+                               etc.
+  active_grant_entry->lock   : spinlock used to serialize modifications to
+                               active entries
+
+ The primary lock for the grant table is a spinlock. All functions
+ that access members of struct grant_table must acquire the lock
+ around critical sections.
+
+ Active entries are obtained by calling active_entry_acquire(gt, ref).
+ This function returns a pointer to the active entry after locking its
+ spinlock. The caller must hold the grant table lock for the gt in
+ question before calling active_entry_acquire(). This is because the
+ grant table can be dynamically extended via gnttab_grow_table() while
+ a domain is running and must be fully initialized. Once all access to
+ the active entry is complete, release the lock by calling
+ active_entry_release(act).
+
+ Summary of rules for locking:
+  active_entry_acquire() and active_entry_release() can only be
+  called when holding the relevant grant table's lock. I.e.:
+    spin_lock(&gt->lock);
+    act = active_entry_acquire(gt, ref);
+    ...
+    active_entry_release(act);
+    spin_unlock(&gt->lock);
+
+ Active entries cannot be acquired while holding the maptrack lock.
+ Multiple active entries can be acquired while holding the grant table
+ lock.
+ 
+ ********************************************************************************
+ 
+--- sle12sp1.orig/xen/common/grant_table.c	2015-06-26 15:38:17.000000000 +0200
+++ sle12sp1/xen/common/grant_table.c	2015-07-08 13:49:42.000000000 +0200
+@@ -157,10 +157,13 @@ struct active_grant_entry {
+                                in the page.                           */
+     unsigned      length:16; /* For sub-page grants, the length of the
+                                 grant.                                */
+    spinlock_t    lock;      /* lock to protect access of this entry.
+                                see docs/misc/grant-tables.txt for
+                                locking protocol                      */
+ };
+ 
+ #define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
+-#define active_entry(t, e) \
+#define _active_entry(t, e) \
+     ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
+ 
+ static inline void gnttab_flush_tlb(const struct domain *d)
+@@ -188,6 +191,24 @@ nr_active_grant_frames(struct grant_tabl
+     return num_act_frames_from_sha_frames(nr_grant_frames(gt));
+ }
+ 
+static inline struct active_grant_entry *
+active_entry_acquire(struct grant_table *t, grant_ref_t e)
+{
+    struct active_grant_entry *act;
+
+    ASSERT(spin_is_locked(&t->lock));
+
+    act = &_active_entry(t, e);
+    spin_lock(&act->lock);
+
+    return act;
+}
+
+static inline void active_entry_release(struct active_grant_entry *act)
+{
+    spin_unlock(&act->lock);
+}
+
+ /* Check if the page has been paged out, or needs unsharing. 
+    If rc == GNTST_okay, *page contains the page struct with a ref taken.
+    Caller must do put_page(*page).
+@@ -505,7 +526,6 @@ static int grant_map_exists(const struct
+                             unsigned long mfn,
+                             unsigned int *ref_count)
+ {
+-    const struct active_grant_entry *act;
+     unsigned int ref, max_iter;
+     
+     ASSERT(spin_is_locked(&rgt->lock));
+@@ -514,18 +534,19 @@ static int grant_map_exists(const struct
+                    nr_grant_entries(rgt));
+     for ( ref = *ref_count; ref < max_iter; ref++ )
+     {
+-        act = &active_entry(rgt, ref);
+        struct active_grant_entry *act;
+        bool_t exists;
+ 
+-        if ( !act->pin )
+-            continue;
+        act = active_entry_acquire(rgt, ref);
+ 
+-        if ( act->domid != ld->domain_id )
+-            continue;
+        exists = act->pin
+            && act->domid == ld->domain_id
+            && act->frame == mfn;
+ 
+-        if ( act->frame != mfn )
+-            continue;
+        active_entry_release(act);
+ 
+-        return 0;
+        if ( exists )
+            return 0;
+     }
+ 
+     if ( ref < nr_grant_entries(rgt) )
+@@ -546,13 +567,24 @@ static void mapcount(
+ 
+     *wrc = *rdc = 0;
+ 
+    /*
+     * Must have the local domain's grant table lock when iterating
+     * over its maptrack entries.
+     */
+    ASSERT(spin_is_locked(&lgt->lock));
+    /*
+     * Must have the remote domain's grant table lock while counting
+     * its active entries.
+     */
+    ASSERT(spin_is_locked(&rd->grant_table->lock));
+
+     for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
+     {
+         map = &maptrack_entry(lgt, handle);
+         if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
+              map->domid != rd->domain_id )
+             continue;
+-        if ( active_entry(rd->grant_table, map->ref).frame == mfn )
+        if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
+             (map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++;
+     }
+ }
+@@ -639,7 +671,7 @@ __gnttab_map_grant_ref(
+     if ( unlikely(op->ref >= nr_grant_entries(rgt)))
+         PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
+ 
+-    act = &active_entry(rgt, op->ref);
+    act = active_entry_acquire(rgt, op->ref);
+     shah = shared_entry_header(rgt, op->ref);
+     if (rgt->gt_version == 1) {
+         sha1 = &shared_entry_v1(rgt, op->ref);
+@@ -656,7 +688,7 @@ __gnttab_map_grant_ref(
+          ((act->domid != ld->domain_id) ||
+           (act->pin & 0x80808080U) != 0 ||
+           (act->is_sub_page)) )
+-        PIN_FAIL(unlock_out, GNTST_general_error,
+        PIN_FAIL(act_release_out, GNTST_general_error,
+                  "Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
+                  act->domid, ld->domain_id, act->pin, act->is_sub_page);
+ 
+@@ -667,7 +699,7 @@ __gnttab_map_grant_ref(
+         if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
+                                op->flags & GNTMAP_readonly,
+                                1, shah, act, status) ) != GNTST_okay )
+-             goto unlock_out;
+            goto act_release_out;
+ 
+         if ( !act->pin )
+         {
+@@ -702,6 +734,7 @@ __gnttab_map_grant_ref(
+ 
+     cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+ 
+    active_entry_release(act);
+     spin_unlock(&rgt->lock);
+ 
+     /* pg may be set, with a refcount included, from __get_paged_frame */
+@@ -839,7 +872,7 @@ __gnttab_map_grant_ref(
+ 
+     spin_lock(&rgt->lock);
+ 
+-    act = &active_entry(rgt, op->ref);
+    act = active_entry_acquire(rgt, op->ref);
+ 
+     if ( op->flags & GNTMAP_device_map )
+         act->pin -= (op->flags & GNTMAP_readonly) ?
+@@ -856,6 +889,9 @@ __gnttab_map_grant_ref(
+     if ( !act->pin )
+         gnttab_clear_flag(_GTF_reading, status);
+ 
+ act_release_out:
+    active_entry_release(act);
+
+  unlock_out:
+     spin_unlock(&rgt->lock);
+     op->status = rc;
+@@ -950,7 +986,7 @@ __gnttab_unmap_common(
+     }
+ 
+     op->rd = rd;
+-    act = &active_entry(rgt, op->map->ref);
+    act = active_entry_acquire(rgt, op->map->ref);
+ 
+     if ( op->frame == 0 )
+     {
+@@ -959,7 +995,7 @@ __gnttab_unmap_common(
+     else
+     {
+         if ( unlikely(op->frame != act->frame) )
+-            PIN_FAIL(unmap_out, GNTST_general_error,
+            PIN_FAIL(act_release_out, GNTST_general_error,
+                      "Bad frame number doesn't match gntref. (%lx != %lx)\n",
+                      op->frame, act->frame);
+         if ( op->flags & GNTMAP_device_map )
+@@ -978,7 +1014,7 @@ __gnttab_unmap_common(
+         if ( (rc = replace_grant_host_mapping(op->host_addr,
+                                               op->frame, op->new_addr, 
+                                               op->flags)) < 0 )
+-            goto unmap_out;
+            goto act_release_out;
+ 
+         ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
+         op->map->flags &= ~GNTMAP_host_map;
+@@ -1000,7 +1036,7 @@ __gnttab_unmap_common(
+         if ( err )
+         {
+             rc = GNTST_general_error;
+-            goto unmap_out;
+            goto act_release_out;
+         }
+     }
+ 
+@@ -1008,8 +1044,11 @@ __gnttab_unmap_common(
+     if ( !(op->flags & GNTMAP_readonly) )
+          gnttab_mark_dirty(rd, op->frame);
+ 
+ act_release_out:
+    active_entry_release(act);
+  unmap_out:
+     double_gt_unlock(lgt, rgt);
+
+     op->status = rc;
+     rcu_unlock_domain(rd);
+ }
+@@ -1042,9 +1081,9 @@ __gnttab_unmap_common_complete(struct gn
+     spin_lock(&rgt->lock);
+ 
+     if ( rgt->gt_version == 0 )
+-        goto unmap_out;
+        goto unlock_out;
+ 
+-    act = &active_entry(rgt, op->map->ref);
+    act = active_entry_acquire(rgt, op->map->ref);
+     sha = shared_entry_header(rgt, op->map->ref);
+ 
+     if ( rgt->gt_version == 1 )
+@@ -1058,7 +1097,7 @@ __gnttab_unmap_common_complete(struct gn
+          * Suggests that __gntab_unmap_common failed early and so
+          * nothing further to do
+          */
+-        goto unmap_out;
+        goto act_release_out;
+     }
+ 
+     pg = mfn_to_page(op->frame);
+@@ -1082,7 +1121,7 @@ __gnttab_unmap_common_complete(struct gn
+              * Suggests that __gntab_unmap_common failed in
+              * replace_grant_host_mapping() so nothing further to do
+              */
+-            goto unmap_out;
+            goto act_release_out;
+         }
+ 
+         if ( !is_iomem_page(op->frame) ) 
+@@ -1103,8 +1142,11 @@ __gnttab_unmap_common_complete(struct gn
+     if ( act->pin == 0 )
+         gnttab_clear_flag(_GTF_reading, status);
+ 
+- unmap_out:
+ act_release_out:
+    active_entry_release(act);
+ unlock_out:
+     spin_unlock(&rgt->lock);
+
+     if ( put_handle )
+     {
+         op->map->flags = 0;
+@@ -1296,7 +1338,7 @@ gnttab_grow_table(struct domain *d, unsi
+     /* d's grant table lock must be held by the caller */
+ 
+     struct grant_table *gt = d->grant_table;
+-    unsigned int i;
+    unsigned int i, j;
+ 
+     ASSERT(req_nr_frames <= max_grant_frames);
+ 
+@@ -1311,6 +1353,8 @@ gnttab_grow_table(struct domain *d, unsi
+         if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
+             goto active_alloc_failed;
+         clear_page(gt->active[i]);
+        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+            spin_lock_init(&gt->active[i][j].lock);
+     }
+ 
+     /* Shared */
+@@ -1805,7 +1849,7 @@ __release_grant_for_copy(
+ 
+     spin_lock(&rgt->lock);
+ 
+-    act = &active_entry(rgt, gref);
+    act = active_entry_acquire(rgt, gref);
+     sha = shared_entry_header(rgt, gref);
+     r_frame = act->frame;
+ 
+@@ -1844,6 +1888,7 @@ __release_grant_for_copy(
+         released_read = 1;
+     }
+ 
+    active_entry_release(act);
+     spin_unlock(&rgt->lock);
+ 
+     if ( td != rd )
+@@ -1905,14 +1950,14 @@ __acquire_grant_for_copy(
+     spin_lock(&rgt->lock);
+ 
+     if ( rgt->gt_version == 0 )
+-        PIN_FAIL(unlock_out, GNTST_general_error,
+        PIN_FAIL(gt_unlock_out, GNTST_general_error,
+                  "remote grant table not ready\n");
+ 
+     if ( unlikely(gref >= nr_grant_entries(rgt)) )
+-        PIN_FAIL(unlock_out, GNTST_bad_gntref,
+        PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
+                  "Bad grant reference %ld\n", gref);
+ 
+-    act = &active_entry(rgt, gref);
+    act = active_entry_acquire(rgt, gref);
+     shah = shared_entry_header(rgt, gref);
+     if ( rgt->gt_version == 1 )
+     {
+@@ -1971,6 +2016,13 @@ __acquire_grant_for_copy(
+                 PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                          "transitive grant referenced bad domain %d\n",
+                          trans_domid);
+
+            /*
+             * __acquire_grant_for_copy() could take the lock on the
+             * remote table (if rd == td), so we have to drop the lock
+             * here and reacquire
+             */
+            active_entry_release(act);
+             spin_unlock(&rgt->lock);
+ 
+             rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+@@ -1978,9 +2030,12 @@ __acquire_grant_for_copy(
+                                           &trans_page_off, &trans_length, 0);
+ 
+             spin_lock(&rgt->lock);
+            act = active_entry_acquire(rgt, gref);
+
+             if ( rc != GNTST_okay ) {
+                 __fixup_status_for_copy_pin(act, status);
+                 rcu_unlock_domain(td);
+                active_entry_release(act);
+                 spin_unlock(&rgt->lock);
+                 return rc;
+             }
+@@ -1993,6 +2048,7 @@ __acquire_grant_for_copy(
+             {
+                 __fixup_status_for_copy_pin(act, status);
+                 rcu_unlock_domain(td);
+                active_entry_release(act);
+                 spin_unlock(&rgt->lock);
+                 put_page(*page);
+                 return __acquire_grant_for_copy(rd, gref, ldom, readonly,
+@@ -2061,6 +2117,7 @@ __acquire_grant_for_copy(
+     *length = act->length;
+     *frame = act->frame;
+ 
+    active_entry_release(act);
+     spin_unlock(&rgt->lock);
+     return rc;
+  
+@@ -2073,7 +2130,11 @@ __acquire_grant_for_copy(
+         gnttab_clear_flag(_GTF_reading, status);
+ 
+  unlock_out:
+    active_entry_release(act);
+
+ gt_unlock_out:
+     spin_unlock(&rgt->lock);
+
+     return rc;
+ }
+ 
+@@ -2231,7 +2292,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
+     gnttab_set_version_t op;
+     struct domain *d = current->domain;
+     struct grant_table *gt = d->grant_table;
+-    struct active_grant_entry *act;
+     grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
+     long res;
+     int i;
+@@ -2256,8 +2316,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
+     {
+         for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
+         {
+-            act = &active_entry(gt, i);
+-            if ( act->pin != 0 )
+            if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
+             {
+                 gdprintk(XENLOG_WARNING,
+                          "tried to change grant table version from %d to %d, but some grant entries still in use\n",
+@@ -2444,7 +2503,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
+ {
+     struct domain *d = rcu_lock_current_domain();
+     struct grant_table *gt = d->grant_table;
+-    struct active_grant_entry *act;
+    struct active_grant_entry *act_a = NULL;
+    struct active_grant_entry *act_b = NULL;
+     s16 rc = GNTST_okay;
+ 
+     spin_lock(&gt->lock);
+@@ -2458,12 +2518,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
+     if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
+         PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b);
+ 
+-    act = &active_entry(gt, ref_a);
+-    if ( act->pin )
+    /* Swapping the same ref is a no-op. */
+    if ( ref_a == ref_b )
+        goto out;
+
+    act_a = active_entry_acquire(gt, ref_a);
+    if ( act_a->pin )
+         PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a);
+ 
+-    act = &active_entry(gt, ref_b);
+-    if ( act->pin )
+    act_b = active_entry_acquire(gt, ref_b);
+    if ( act_b->pin )
+         PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b);
+ 
+     if ( gt->gt_version == 1 )
+@@ -2490,6 +2554,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
+     }
+ 
+ out:
+    if ( act_b != NULL )
+        active_entry_release(act_b);
+    if ( act_a != NULL )
+        active_entry_release(act_a);
+     spin_unlock(&gt->lock);
+ 
+     rcu_unlock_domain(d);
+@@ -2799,7 +2867,7 @@ grant_table_create(
+     struct domain *d)
+ {
+     struct grant_table *t;
+-    int                 i;
+    unsigned int i, j;
+ 
+     if ( (t = xzalloc(struct grant_table)) == NULL )
+         goto no_mem_0;
+@@ -2818,6 +2886,8 @@ grant_table_create(
+         if ( (t->active[i] = alloc_xenheap_page()) == NULL )
+             goto no_mem_2;
+         clear_page(t->active[i]);
+        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+            spin_lock_init(&t->active[i][j].lock);
+     }
+ 
+     /* Tracking of mapped foreign frames table */
+@@ -2914,7 +2984,7 @@ gnttab_release_mappings(
+         rgt = rd->grant_table;
+         spin_lock(&rgt->lock);
+ 
+-        act = &active_entry(rgt, ref);
+        act = active_entry_acquire(rgt, ref);
+         sha = shared_entry_header(rgt, ref);
+         if (rgt->gt_version == 1)
+             status = &sha->flags;
+@@ -2972,6 +3042,7 @@ gnttab_release_mappings(
+         if ( act->pin == 0 )
+             gnttab_clear_flag(_GTF_reading, status);
+ 
+        active_entry_release(act);
+         spin_unlock(&rgt->lock);
+ 
+         rcu_unlock_domain(rd);
+@@ -3034,9 +3105,12 @@ static void gnttab_usage_print(struct do
+         uint16_t status;
+         uint64_t frame;
+ 
+-        act = &active_entry(gt, ref);
+        act = active_entry_acquire(gt, ref);
+         if ( !act->pin )
+        {
+            active_entry_release(act);
+             continue;
+        }
+ 
+         sha = shared_entry_header(gt, ref);
+ 
+@@ -3066,6 +3140,7 @@ static void gnttab_usage_print(struct do
+         printk("[%3d]    %5d 0x%06lx 0x%08x      %5d 0x%06"PRIx64" 0x%02x\n",
+                ref, act->domid, act->frame, act->pin,
+                sha->domid, frame, status);
+        active_entry_release(act);
+     }
+ 
+  out:
--- a/557eb5b6-gnttab-introduce-maptrack-lock.patch
+++ b/557eb5b6-gnttab-introduce-maptrack-lock.patch
@ -0,0 +1,86 @@
+# Commit 5a9899ddc42040e139233a6b1f0f65f3b65eda6d
+# Date 2015-06-15 13:23:34 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+gnttab: introduce maptrack lock
+
+Split grant table lock into two separate locks. One to protect
+maptrack free list (maptrack_lock) and one for everything else (lock).
+
+Based on a patch originally by Matt Wilson <msw@amazon.com>.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/docs/misc/grant-tables.txt	2015-07-08 13:49:42.000000000 +0200
+++ sle12sp1/docs/misc/grant-tables.txt	2015-07-08 13:49:46.000000000 +0200
+@@ -87,6 +87,7 @@ is complete.
+                                inconsistent grant table state such as current
+                                version, partially initialized active table pages,
+                                etc.
+  grant_table->maptrack_lock : spinlock used to protect the maptrack free list
+   active_grant_entry->lock   : spinlock used to serialize modifications to
+                                active entries
+ 
+@@ -94,6 +95,9 @@ is complete.
+  that access members of struct grant_table must acquire the lock
+  around critical sections.
+ 
+ The maptrack free list is protected by its own spinlock. The maptrack
+ lock may be locked while holding the grant table lock.
+
+  Active entries are obtained by calling active_entry_acquire(gt, ref).
+  This function returns a pointer to the active entry after locking its
+  spinlock. The caller must hold the grant table lock for the gt in
+--- sle12sp1.orig/xen/common/grant_table.c	2015-07-08 13:49:42.000000000 +0200
+++ sle12sp1/xen/common/grant_table.c	2015-07-08 13:49:46.000000000 +0200
+@@ -288,10 +288,10 @@ static inline void
+ put_maptrack_handle(
+     struct grant_table *t, int handle)
+ {
+-    spin_lock(&t->lock);
+    spin_lock(&t->maptrack_lock);
+     maptrack_entry(t, handle).ref = t->maptrack_head;
+     t->maptrack_head = handle;
+-    spin_unlock(&t->lock);
+    spin_unlock(&t->maptrack_lock);
+ }
+ 
+ static inline int
+@@ -303,7 +303,7 @@ get_maptrack_handle(
+     struct grant_mapping *new_mt;
+     unsigned int          new_mt_limit, nr_frames;
+ 
+-    spin_lock(&lgt->lock);
+    spin_lock(&lgt->maptrack_lock);
+ 
+     while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+     {
+@@ -332,7 +332,7 @@ get_maptrack_handle(
+                  nr_frames + 1);
+     }
+ 
+-    spin_unlock(&lgt->lock);
+    spin_unlock(&lgt->maptrack_lock);
+ 
+     return handle;
+ }
+@@ -2874,6 +2874,7 @@ grant_table_create(
+ 
+     /* Simple stuff. */
+     spin_lock_init(&t->lock);
+    spin_lock_init(&t->maptrack_lock);
+     t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
+ 
+     /* Active grant table. */
+--- sle12sp1.orig/xen/include/xen/grant_table.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/xen/grant_table.h	2015-07-08 13:49:46.000000000 +0200
+@@ -82,6 +82,8 @@ struct grant_table {
+     struct grant_mapping **maptrack;
+     unsigned int          maptrack_head;
+     unsigned int          maptrack_limit;
+    /* Lock protecting the maptrack page list, head, and limit */
+    spinlock_t            maptrack_lock;
+     /* Lock protecting updates to active and shared grant tables. */
+     spinlock_t            lock;
+     /* The defined versions are 1 and 2.  Set to 0 if we don't know
--- a/557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch
+++ b/557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch
@ -0,0 +1,733 @@
+# Commit 40de9fffb4cc0b0485aa3391d72e2220b8e1ce12
+# Date 2015-06-15 13:25:20 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+gnttab: make the grant table lock a read-write lock
+
+In combination with the per-active entry locks, the grant table lock
+can be made a read-write lock since the majority of cases only the
+read lock is required. The grant table read lock protects against
+changes to the table version or size (which are done with the write
+lock held).
+
+The write lock is also required when two active entries must be
+acquired.
+
+The double lock is still required when updating IOMMU page tables.
+
+With the lock contention being only on the maptrack lock (unless IOMMU
+updates are required), performance and scalability is improved.
+
+Based on a patch originally by Matt Wilson <msw@amazon.com>.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/docs/misc/grant-tables.txt	2015-07-08 13:49:46.000000000 +0200
+++ sle12sp1/docs/misc/grant-tables.txt	2015-07-08 13:49:47.000000000 +0200
+@@ -83,7 +83,7 @@ is complete.
+  ~~~~~~~
+  Xen uses several locks to serialize access to the internal grant table state.
+ 
+-  grant_table->lock          : lock used to prevent readers from accessing
+  grant_table->lock          : rwlock used to prevent readers from accessing
+                                inconsistent grant table state such as current
+                                version, partially initialized active table pages,
+                                etc.
+@@ -91,34 +91,43 @@ is complete.
+   active_grant_entry->lock   : spinlock used to serialize modifications to
+                                active entries
+ 
+- The primary lock for the grant table is a spinlock. All functions
+- that access members of struct grant_table must acquire the lock
+- around critical sections.
+ The primary lock for the grant table is a read/write spinlock. All
+ functions that access members of struct grant_table must acquire a
+ read lock around critical sections. Any modification to the members
+ of struct grant_table (e.g., nr_status_frames, nr_grant_frames,
+ active frames, etc.) must only be made if the write lock is
+ held. These elements are read-mostly, and read critical sections can
+ be large, which makes a rwlock a good choice.
+ 
+  The maptrack free list is protected by its own spinlock. The maptrack
+  lock may be locked while holding the grant table lock.
+ 
+  Active entries are obtained by calling active_entry_acquire(gt, ref).
+  This function returns a pointer to the active entry after locking its
+- spinlock. The caller must hold the grant table lock for the gt in
+- question before calling active_entry_acquire(). This is because the
+- grant table can be dynamically extended via gnttab_grow_table() while
+- a domain is running and must be fully initialized. Once all access to
+- the active entry is complete, release the lock by calling
+- active_entry_release(act).
+ spinlock. The caller must hold the grant table read lock before
+ calling active_entry_acquire(). This is because the grant table can
+ be dynamically extended via gnttab_grow_table() while a domain is
+ running and must be fully initialized. Once all access to the active
+ entry is complete, release the lock by calling active_entry_release(act).
+ 
+  Summary of rules for locking:
+   active_entry_acquire() and active_entry_release() can only be
+-  called when holding the relevant grant table's lock. I.e.:
+-    spin_lock(&gt->lock);
+  called when holding the relevant grant table's read lock. I.e.:
+    read_lock(&gt->lock);
+     act = active_entry_acquire(gt, ref);
+     ...
+     active_entry_release(act);
+-    spin_unlock(&gt->lock);
+    read_unlock(&gt->lock);
+ 
+  Active entries cannot be acquired while holding the maptrack lock.
+  Multiple active entries can be acquired while holding the grant table
+- lock.
+ _write_ lock.
+
+ Maptrack entries are protected by the corresponding active entry
+ lock.  As an exception, new maptrack entries may be populated without
+ holding the lock, provided the flags field is written last.  This
+ requires any maptrack entry user validates the flags field as
+ non-zero first.
+ 
+ ********************************************************************************
+ 
+--- sle12sp1.orig/xen/arch/arm/mm.c	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/arch/arm/mm.c	2015-07-08 13:49:47.000000000 +0200
+@@ -1037,7 +1037,7 @@ int xenmem_add_to_physmap_one(
+     switch ( space )
+     {
+     case XENMAPSPACE_grant_table:
+-        spin_lock(&d->grant_table->lock);
+        write_lock(&d->grant_table->lock);
+ 
+         if ( d->grant_table->gt_version == 0 )
+             d->grant_table->gt_version = 1;
+@@ -1067,7 +1067,7 @@ int xenmem_add_to_physmap_one(
+ 
+         t = p2m_ram_rw;
+ 
+-        spin_unlock(&d->grant_table->lock);
+        write_unlock(&d->grant_table->lock);
+         break;
+     case XENMAPSPACE_shared_info:
+         if ( idx != 0 )
+--- sle12sp1.orig/xen/arch/x86/mm.c	2015-07-08 00:00:00.000000000 +0200
+++ sle12sp1/xen/arch/x86/mm.c	2015-07-08 13:49:47.000000000 +0200
+@@ -4594,7 +4594,7 @@ int xenmem_add_to_physmap_one(
+                 mfn = virt_to_mfn(d->shared_info);
+             break;
+         case XENMAPSPACE_grant_table:
+-            spin_lock(&d->grant_table->lock);
+            write_lock(&d->grant_table->lock);
+ 
+             if ( d->grant_table->gt_version == 0 )
+                 d->grant_table->gt_version = 1;
+@@ -4616,7 +4616,7 @@ int xenmem_add_to_physmap_one(
+                     mfn = virt_to_mfn(d->grant_table->shared_raw[idx]);
+             }
+ 
+-            spin_unlock(&d->grant_table->lock);
+            write_unlock(&d->grant_table->lock);
+             break;
+         case XENMAPSPACE_gmfn_range:
+         case XENMAPSPACE_gmfn:
+--- sle12sp1.orig/xen/common/grant_table.c	2015-07-08 13:49:46.000000000 +0200
+++ sle12sp1/xen/common/grant_table.c	2015-07-08 13:49:47.000000000 +0200
+@@ -196,7 +196,7 @@ active_entry_acquire(struct grant_table 
+ {
+     struct active_grant_entry *act;
+ 
+-    ASSERT(spin_is_locked(&t->lock));
+    ASSERT(rw_is_locked(&t->lock));
+ 
+     act = &_active_entry(t, e);
+     spin_lock(&act->lock);
+@@ -252,25 +252,29 @@ static int __get_paged_frame(unsigned lo
+ static inline void
+ double_gt_lock(struct grant_table *lgt, struct grant_table *rgt)
+ {
+    /*
+     * See mapcount() for why the write lock is also required for the
+     * remote domain.
+     */
+     if ( lgt < rgt )
+     {
+-        spin_lock(&lgt->lock);
+-        spin_lock(&rgt->lock);
+        write_lock(&lgt->lock);
+        write_lock(&rgt->lock);
+     }
+     else
+     {
+         if ( lgt != rgt )
+-            spin_lock(&rgt->lock);
+-        spin_lock(&lgt->lock);
+            write_lock(&rgt->lock);
+        write_lock(&lgt->lock);
+     }
+ }
+ 
+ static inline void
+ double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
+ {
+-    spin_unlock(&lgt->lock);
+    write_unlock(&lgt->lock);
+     if ( lgt != rgt )
+-        spin_unlock(&rgt->lock);
+        write_unlock(&rgt->lock);
+ }
+ 
+ static inline int
+@@ -528,7 +532,7 @@ static int grant_map_exists(const struct
+ {
+     unsigned int ref, max_iter;
+     
+-    ASSERT(spin_is_locked(&rgt->lock));
+    ASSERT(rw_is_locked(&rgt->lock));
+ 
+     max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
+                    nr_grant_entries(rgt));
+@@ -568,15 +572,15 @@ static void mapcount(
+     *wrc = *rdc = 0;
+ 
+     /*
+-     * Must have the local domain's grant table lock when iterating
+-     * over its maptrack entries.
+     * Must have the local domain's grant table write lock when
+     * iterating over its maptrack entries.
+      */
+-    ASSERT(spin_is_locked(&lgt->lock));
+    ASSERT(rw_is_write_locked(&lgt->lock));
+     /*
+-     * Must have the remote domain's grant table lock while counting
+-     * its active entries.
+     * Must have the remote domain's grant table write lock while
+     * counting its active entries.
+      */
+-    ASSERT(spin_is_locked(&rd->grant_table->lock));
+    ASSERT(rw_is_write_locked(&rd->grant_table->lock));
+ 
+     for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
+     {
+@@ -616,6 +620,7 @@ __gnttab_map_grant_ref(
+     grant_entry_v2_t *sha2;
+     grant_entry_header_t *shah;
+     uint16_t *status;
+    bool_t need_iommu;
+ 
+     led = current;
+     ld = led->domain;
+@@ -661,7 +666,7 @@ __gnttab_map_grant_ref(
+     }
+ 
+     rgt = rd->grant_table;
+-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
+ 
+     if ( rgt->gt_version == 0 )
+         PIN_FAIL(unlock_out, GNTST_general_error,
+@@ -735,7 +740,7 @@ __gnttab_map_grant_ref(
+     cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+ 
+     active_entry_release(act);
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+ 
+     /* pg may be set, with a refcount included, from __get_paged_frame */
+     if ( !pg )
+@@ -811,12 +816,14 @@ __gnttab_map_grant_ref(
+         goto undo_out;
+     }
+ 
+-    double_gt_lock(lgt, rgt);
+-
+-    if ( gnttab_need_iommu_mapping(ld) )
+    need_iommu = gnttab_need_iommu_mapping(ld);
+    if ( need_iommu )
+     {
+         unsigned int wrc, rdc;
+         int err = 0;
+
+        double_gt_lock(lgt, rgt);
+
+         /* We're not translated, so we know that gmfns and mfns are
+            the same things, so the IOMMU entry is always 1-to-1. */
+         mapcount(lgt, rd, frame, &wrc, &rdc);
+@@ -842,12 +849,22 @@ __gnttab_map_grant_ref(
+ 
+     TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
+ 
+    /*
+     * All maptrack entry users check mt->flags first before using the
+     * other fields so just ensure the flags field is stored last.
+     *
+     * However, if gnttab_need_iommu_mapping() then this would race
+     * with a concurrent mapcount() call (on an unmap, for example)
+     * and a lock is required.
+     */
+     mt = &maptrack_entry(lgt, handle);
+     mt->domid = op->dom;
+     mt->ref   = op->ref;
+-    mt->flags = op->flags;
+    wmb();
+    write_atomic(&mt->flags, op->flags);
+ 
+-    double_gt_unlock(lgt, rgt);
+    if ( need_iommu )
+        double_gt_unlock(lgt, rgt);
+ 
+     op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
+     op->handle       = handle;
+@@ -870,7 +887,7 @@ __gnttab_map_grant_ref(
+         put_page(pg);
+     }
+ 
+-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
+ 
+     act = active_entry_acquire(rgt, op->ref);
+ 
+@@ -893,7 +910,7 @@ __gnttab_map_grant_ref(
+     active_entry_release(act);
+ 
+  unlock_out:
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+     op->status = rc;
+     put_maptrack_handle(lgt, handle);
+     rcu_unlock_domain(rd);
+@@ -943,18 +960,19 @@ __gnttab_unmap_common(
+     }
+ 
+     op->map = &maptrack_entry(lgt, op->handle);
+-    spin_lock(&lgt->lock);
+ 
+-    if ( unlikely(!op->map->flags) )
+    read_lock(&lgt->lock);
+
+    if ( unlikely(!read_atomic(&op->map->flags)) )
+     {
+-        spin_unlock(&lgt->lock);
+        read_unlock(&lgt->lock);
+         gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
+         op->status = GNTST_bad_handle;
+         return;
+     }
+ 
+     dom = op->map->domid;
+-    spin_unlock(&lgt->lock);
+    read_unlock(&lgt->lock);
+ 
+     if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
+     {
+@@ -975,9 +993,10 @@ __gnttab_unmap_common(
+     TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom);
+ 
+     rgt = rd->grant_table;
+-    double_gt_lock(lgt, rgt);
+ 
+-    op->flags = op->map->flags;
+    read_lock(&rgt->lock);
+
+    op->flags = read_atomic(&op->map->flags);
+     if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) )
+     {
+         gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle);
+@@ -1024,31 +1043,34 @@ __gnttab_unmap_common(
+             act->pin -= GNTPIN_hstw_inc;
+     }
+ 
+-    if ( gnttab_need_iommu_mapping(ld) )
+ act_release_out:
+    active_entry_release(act);
+ unmap_out:
+    read_unlock(&rgt->lock);
+
+    if ( rc == GNTST_okay && gnttab_need_iommu_mapping(ld) )
+     {
+         unsigned int wrc, rdc;
+         int err = 0;
+
+        double_gt_lock(lgt, rgt);
+
+         mapcount(lgt, rd, op->frame, &wrc, &rdc);
+         if ( (wrc + rdc) == 0 )
+             err = iommu_unmap_page(ld, op->frame);
+         else if ( wrc == 0 )
+             err = iommu_map_page(ld, op->frame, op->frame, IOMMUF_readable);
+
+        double_gt_unlock(lgt, rgt);
+
+         if ( err )
+-        {
+             rc = GNTST_general_error;
+-            goto act_release_out;
+-        }
+     }
+ 
+     /* If just unmapped a writable mapping, mark as dirtied */
+-    if ( !(op->flags & GNTMAP_readonly) )
+    if ( rc == GNTST_okay && !(op->flags & GNTMAP_readonly) )
+          gnttab_mark_dirty(rd, op->frame);
+ 
+- act_release_out:
+-    active_entry_release(act);
+- unmap_out:
+-    double_gt_unlock(lgt, rgt);
+-
+     op->status = rc;
+     rcu_unlock_domain(rd);
+ }
+@@ -1078,8 +1100,8 @@ __gnttab_unmap_common_complete(struct gn
+ 
+     rcu_lock_domain(rd);
+     rgt = rd->grant_table;
+-    spin_lock(&rgt->lock);
+ 
+    read_lock(&rgt->lock);
+     if ( rgt->gt_version == 0 )
+         goto unlock_out;
+ 
+@@ -1145,7 +1167,7 @@ __gnttab_unmap_common_complete(struct gn
+  act_release_out:
+     active_entry_release(act);
+  unlock_out:
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+ 
+     if ( put_handle )
+     {
+@@ -1332,11 +1354,13 @@ gnttab_unpopulate_status_frames(struct d
+     gt->nr_status_frames = 0;
+ }
+ 
+/*
+ * Grow the grant table. The caller must hold the grant table's
+ * write lock before calling this function.
+ */
+ int
+ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
+ {
+-    /* d's grant table lock must be held by the caller */
+-
+     struct grant_table *gt = d->grant_table;
+     unsigned int i, j;
+ 
+@@ -1442,7 +1466,7 @@ gnttab_setup_table(
+     }
+ 
+     gt = d->grant_table;
+-    spin_lock(&gt->lock);
+    write_lock(&gt->lock);
+ 
+     if ( gt->gt_version == 0 )
+         gt->gt_version = 1;
+@@ -1470,7 +1494,7 @@ gnttab_setup_table(
+     }
+ 
+  out3:
+-    spin_unlock(&gt->lock);
+    write_unlock(&gt->lock);
+  out2:
+     rcu_unlock_domain(d);
+  out1:
+@@ -1512,13 +1536,13 @@ gnttab_query_size(
+         goto query_out_unlock;
+     }
+ 
+-    spin_lock(&d->grant_table->lock);
+    read_lock(&d->grant_table->lock);
+ 
+     op.nr_frames     = nr_grant_frames(d->grant_table);
+     op.max_nr_frames = max_grant_frames;
+     op.status        = GNTST_okay;
+ 
+-    spin_unlock(&d->grant_table->lock);
+    read_unlock(&d->grant_table->lock);
+ 
+  
+  query_out_unlock:
+@@ -1544,7 +1568,7 @@ gnttab_prepare_for_transfer(
+     union grant_combo   scombo, prev_scombo, new_scombo;
+     int                 retries = 0;
+ 
+-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
+ 
+     if ( rgt->gt_version == 0 )
+     {
+@@ -1595,11 +1619,11 @@ gnttab_prepare_for_transfer(
+         scombo = prev_scombo;
+     }
+ 
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+     return 1;
+ 
+  fail:
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+     return 0;
+ }
+ 
+@@ -1614,6 +1638,7 @@ gnttab_transfer(
+     struct gnttab_transfer gop;
+     unsigned long mfn;
+     unsigned int max_bitsize;
+    struct active_grant_entry *act;
+ 
+     for ( i = 0; i < count; i++ )
+     {
+@@ -1791,7 +1816,8 @@ gnttab_transfer(
+         TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
+ 
+         /* Tell the guest about its new page frame. */
+-        spin_lock(&e->grant_table->lock);
+        read_lock(&e->grant_table->lock);
+        act = active_entry_acquire(e->grant_table, gop.ref);
+ 
+         if ( e->grant_table->gt_version == 1 )
+         {
+@@ -1809,7 +1835,8 @@ gnttab_transfer(
+         shared_entry_header(e->grant_table, gop.ref)->flags |=
+             GTF_transfer_completed;
+ 
+-        spin_unlock(&e->grant_table->lock);
+        active_entry_release(act);
+        read_unlock(&e->grant_table->lock);
+ 
+         rcu_unlock_domain(e);
+ 
+@@ -1847,7 +1874,7 @@ __release_grant_for_copy(
+     released_read = 0;
+     released_write = 0;
+ 
+-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
+ 
+     act = active_entry_acquire(rgt, gref);
+     sha = shared_entry_header(rgt, gref);
+@@ -1889,7 +1916,7 @@ __release_grant_for_copy(
+     }
+ 
+     active_entry_release(act);
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+ 
+     if ( td != rd )
+     {
+@@ -1947,7 +1974,7 @@ __acquire_grant_for_copy(
+ 
+     *page = NULL;
+ 
+-    spin_lock(&rgt->lock);
+    read_lock(&rgt->lock);
+ 
+     if ( rgt->gt_version == 0 )
+         PIN_FAIL(gt_unlock_out, GNTST_general_error,
+@@ -2023,20 +2050,20 @@ __acquire_grant_for_copy(
+              * here and reacquire
+              */
+             active_entry_release(act);
+-            spin_unlock(&rgt->lock);
+            read_unlock(&rgt->lock);
+ 
+             rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+                                           readonly, &grant_frame, page,
+                                           &trans_page_off, &trans_length, 0);
+ 
+-            spin_lock(&rgt->lock);
+            read_lock(&rgt->lock);
+             act = active_entry_acquire(rgt, gref);
+ 
+             if ( rc != GNTST_okay ) {
+                 __fixup_status_for_copy_pin(act, status);
+                 rcu_unlock_domain(td);
+                 active_entry_release(act);
+-                spin_unlock(&rgt->lock);
+                read_unlock(&rgt->lock);
+                 return rc;
+             }
+ 
+@@ -2049,7 +2076,7 @@ __acquire_grant_for_copy(
+                 __fixup_status_for_copy_pin(act, status);
+                 rcu_unlock_domain(td);
+                 active_entry_release(act);
+-                spin_unlock(&rgt->lock);
+                read_unlock(&rgt->lock);
+                 put_page(*page);
+                 return __acquire_grant_for_copy(rd, gref, ldom, readonly,
+                                                 frame, page, page_off, length,
+@@ -2118,7 +2145,7 @@ __acquire_grant_for_copy(
+     *frame = act->frame;
+ 
+     active_entry_release(act);
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+     return rc;
+  
+  unlock_out_clear:
+@@ -2133,7 +2160,7 @@ __acquire_grant_for_copy(
+     active_entry_release(act);
+ 
+  gt_unlock_out:
+-    spin_unlock(&rgt->lock);
+    read_unlock(&rgt->lock);
+ 
+     return rc;
+ }
+@@ -2307,7 +2334,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
+     if ( gt->gt_version == op.version )
+         goto out;
+ 
+-    spin_lock(&gt->lock);
+    write_lock(&gt->lock);
+     /* Make sure that the grant table isn't currently in use when we
+        change the version number, except for the first 8 entries which
+        are allowed to be in use (xenstore/xenconsole keeps them mapped).
+@@ -2392,7 +2419,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
+     gt->gt_version = op.version;
+ 
+ out_unlock:
+-    spin_unlock(&gt->lock);
+    write_unlock(&gt->lock);
+ 
+ out:
+     op.version = gt->gt_version;
+@@ -2448,7 +2475,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
+ 
+     op.status = GNTST_okay;
+ 
+-    spin_lock(&gt->lock);
+    read_lock(&gt->lock);
+ 
+     for ( i = 0; i < op.nr_frames; i++ )
+     {
+@@ -2457,7 +2484,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
+             op.status = GNTST_bad_virt_addr;
+     }
+ 
+-    spin_unlock(&gt->lock);
+    read_unlock(&gt->lock);
+ out2:
+     rcu_unlock_domain(d);
+ out1:
+@@ -2507,7 +2534,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
+     struct active_grant_entry *act_b = NULL;
+     s16 rc = GNTST_okay;
+ 
+-    spin_lock(&gt->lock);
+    write_lock(&gt->lock);
+ 
+     if ( gt->gt_version == 0 )
+         PIN_FAIL(out, GNTST_general_error, "grant table not yet set up\n");
+@@ -2558,7 +2585,7 @@ out:
+         active_entry_release(act_b);
+     if ( act_a != NULL )
+         active_entry_release(act_a);
+-    spin_unlock(&gt->lock);
+    write_unlock(&gt->lock);
+ 
+     rcu_unlock_domain(d);
+ 
+@@ -2629,12 +2656,12 @@ static int __gnttab_cache_flush(gnttab_c
+ 
+     if ( d != owner )
+     {
+-        spin_lock(&owner->grant_table->lock);
+        read_lock(&owner->grant_table->lock);
+ 
+         ret = grant_map_exists(d, owner->grant_table, mfn, ref_count);
+         if ( ret != 0 )
+         {
+-            spin_unlock(&owner->grant_table->lock);
+            read_unlock(&owner->grant_table->lock);
+             rcu_unlock_domain(d);
+             put_page(page);
+             return ret;
+@@ -2654,7 +2681,7 @@ static int __gnttab_cache_flush(gnttab_c
+         ret = 0;
+ 
+     if ( d != owner )
+-        spin_unlock(&owner->grant_table->lock);
+        read_unlock(&owner->grant_table->lock);
+     unmap_domain_page(v);
+     put_page(page);
+ 
+@@ -2873,7 +2900,7 @@ grant_table_create(
+         goto no_mem_0;
+ 
+     /* Simple stuff. */
+-    spin_lock_init(&t->lock);
+    rwlock_init(&t->lock);
+     spin_lock_init(&t->maptrack_lock);
+     t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
+ 
+@@ -2983,7 +3010,7 @@ gnttab_release_mappings(
+         }
+ 
+         rgt = rd->grant_table;
+-        spin_lock(&rgt->lock);
+        read_lock(&rgt->lock);
+ 
+         act = active_entry_acquire(rgt, ref);
+         sha = shared_entry_header(rgt, ref);
+@@ -3044,7 +3071,7 @@ gnttab_release_mappings(
+             gnttab_clear_flag(_GTF_reading, status);
+ 
+         active_entry_release(act);
+-        spin_unlock(&rgt->lock);
+        read_unlock(&rgt->lock);
+ 
+         rcu_unlock_domain(rd);
+ 
+@@ -3092,7 +3119,7 @@ static void gnttab_usage_print(struct do
+     printk("      -------- active --------       -------- shared --------\n");
+     printk("[ref] localdom mfn      pin          localdom gmfn     flags\n");
+ 
+-    spin_lock(&gt->lock);
+    read_lock(&gt->lock);
+ 
+     if ( gt->gt_version == 0 )
+         goto out;
+@@ -3145,7 +3172,7 @@ static void gnttab_usage_print(struct do
+     }
+ 
+  out:
+-    spin_unlock(&gt->lock);
+    read_unlock(&gt->lock);
+ 
+     if ( first )
+         printk("grant-table for remote domain:%5d ... "
+--- sle12sp1.orig/xen/include/xen/grant_table.h	2015-07-08 13:49:46.000000000 +0200
+++ sle12sp1/xen/include/xen/grant_table.h	2015-07-08 13:49:47.000000000 +0200
+@@ -64,6 +64,11 @@ struct grant_mapping {
+ 
+ /* Per-domain grant information. */
+ struct grant_table {
+    /*
+     * Lock protecting updates to grant table state (version, active
+     * entry list, etc.)
+     */
+    rwlock_t              lock;
+     /* Table size. Number of frames shared with guest */
+     unsigned int          nr_grant_frames;
+     /* Shared grant table (see include/public/grant_table.h). */
+@@ -84,8 +89,6 @@ struct grant_table {
+     unsigned int          maptrack_limit;
+     /* Lock protecting the maptrack page list, head, and limit */
+     spinlock_t            maptrack_lock;
+-    /* Lock protecting updates to active and shared grant tables. */
+-    spinlock_t            lock;
+     /* The defined versions are 1 and 2.  Set to 0 if we don't know
+        what version to use yet. */
+     unsigned              gt_version;
+@@ -103,7 +106,7 @@ gnttab_release_mappings(
+     struct domain *d);
+ 
+ /* Increase the size of a domain's grant table.
+- * Caller must hold d's grant table lock.
+ * Caller must hold d's grant table write lock.
+  */
+ int
+ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames);
--- a/557ffab8-evtchn-factor-out-freeing-an-event-channel.patch
+++ b/557ffab8-evtchn-factor-out-freeing-an-event-channel.patch
@ -0,0 +1,47 @@
+# Commit a622b5ade2bdf79ad95e6088a4041e75253c43f3
+# Date 2015-06-16 12:30:16 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: factor out freeing an event channel
+
+We're going to want to free an event channel from two places.  Factor out
+the code into a free_evtchn() function.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+--- sle12sp1.orig/xen/common/event_channel.c	2015-07-08 12:33:47.000000000 +0200
+++ sle12sp1/xen/common/event_channel.c	2015-07-08 13:53:49.000000000 +0200
+@@ -194,6 +194,17 @@ static int get_free_port(struct domain *
+     return port;
+ }
+ 
+static void free_evtchn(struct domain *d, struct evtchn *chn)
+{
+    /* Clear pending event to avoid unexpected behavior on re-bind. */
+    evtchn_port_clear_pending(d, chn);
+
+    /* Reset binding to vcpu0 when the channel is freed. */
+    chn->state          = ECS_FREE;
+    chn->notify_vcpu_id = 0;
+
+    xsm_evtchn_close_post(chn);
+}
+ 
+ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
+ {
+@@ -571,14 +582,7 @@ static long __evtchn_close(struct domain
+         BUG();
+     }
+ 
+-    /* Clear pending event to avoid unexpected behavior on re-bind. */
+-    evtchn_port_clear_pending(d1, chn1);
+-
+-    /* Reset binding to vcpu0 when the channel is freed. */
+-    chn1->state          = ECS_FREE;
+-    chn1->notify_vcpu_id = 0;
+-
+-    xsm_evtchn_close_post(chn1);
+    free_evtchn(d1, chn1);
+ 
+  out:
+     if ( d2 != NULL )
--- a/5582bf43-evtchn-simplify-port_is_valid.patch
+++ b/5582bf43-evtchn-simplify-port_is_valid.patch
@ -0,0 +1,69 @@
+# Commit 01280dc19cf3da089f98faf4f524b54b5a191df0
+# Date 2015-06-18 14:53:23 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: simplify port_is_valid()
+
+By keeping a count of the number of currently valid event channels,
+port_is_valid() can be simplified.
+
+d->valid_evtchns is only increased (while holding d->event_lock), so
+port_is_valid() may be safely called without taking the lock (this
+will be useful later).
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+Index: xen-4.5.1-testing/xen/common/event_channel.c
+===================================================================
+--- xen-4.5.1-testing.orig/xen/common/event_channel.c
+++ xen-4.5.1-testing/xen/common/event_channel.c
+@@ -191,6 +191,8 @@ static int get_free_port(struct domain *
+         return -ENOMEM;
+     bucket_from_port(d, port) = chn;
+ 
+    write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
+
+     return port;
+ }
+ 
+@@ -1267,6 +1269,7 @@ int evtchn_init(struct domain *d)
+     d->evtchn = alloc_evtchn_bucket(d, 0);
+     if ( !d->evtchn )
+         return -ENOMEM;
+    d->valid_evtchns = EVTCHNS_PER_BUCKET;
+ 
+     spin_lock_init(&d->event_lock);
+     if ( get_free_port(d) != 0 )
+Index: xen-4.5.1-testing/xen/include/xen/event.h
+===================================================================
+--- xen-4.5.1-testing.orig/xen/include/xen/event.h
+++ xen-4.5.1-testing/xen/include/xen/event.h
+@@ -90,11 +90,7 @@ static inline bool_t port_is_valid(struc
+ {
+     if ( p >= d->max_evtchns )
+         return 0;
+-    if ( !d->evtchn )
+-        return 0;
+-    if ( p < EVTCHNS_PER_BUCKET )
+-        return 1;
+-    return group_from_port(d, p) != NULL && bucket_from_port(d, p) != NULL;
+    return p < read_atomic(&d->valid_evtchns);
+ }
+ 
+ static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p)
+Index: xen-4.5.1-testing/xen/include/xen/sched.h
+===================================================================
+--- xen-4.5.1-testing.orig/xen/include/xen/sched.h
+++ xen-4.5.1-testing/xen/include/xen/sched.h
+@@ -335,8 +335,9 @@ struct domain
+     /* Event channel information. */
+     struct evtchn   *evtchn;                         /* first bucket only */
+     struct evtchn  **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
+-    unsigned int     max_evtchns;
+-    unsigned int     max_evtchn_port;
+    unsigned int     max_evtchns;     /* number supported by ABI */
+    unsigned int     max_evtchn_port; /* max permitted port number */
+    unsigned int     valid_evtchns;   /* number of allocated event channels */
+     spinlock_t       event_lock;
+     const struct evtchn_port_ops *evtchn_port_ops;
+     struct evtchn_fifo_domain *evtchn_fifo;
--- a/5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch
+++ b/5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch
@ -0,0 +1,32 @@
+# Commit e156654d4eb2fdeb524e6b40838767a5dc918966
+# Date 2015-06-18 14:54:25 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: remove the locking when unmasking an event channel
+
+The event channel lock is no longer required to check if the port is
+valid.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+--- sle12sp1.orig/xen/common/event_channel.c	2015-07-08 13:53:50.000000000 +0200
+++ sle12sp1/xen/common/event_channel.c	2015-07-08 13:54:42.000000000 +0200
+@@ -934,8 +934,6 @@ int evtchn_unmask(unsigned int port)
+     struct domain *d = current->domain;
+     struct evtchn *evtchn;
+ 
+-    ASSERT(spin_is_locked(&d->event_lock));
+-
+     if ( unlikely(!port_is_valid(d, port)) )
+         return -EINVAL;
+ 
+@@ -1102,9 +1100,7 @@ long do_event_channel_op(int cmd, XEN_GU
+         struct evtchn_unmask unmask;
+         if ( copy_from_guest(&unmask, arg, 1) != 0 )
+             return -EFAULT;
+-        spin_lock(&current->domain->event_lock);
+         rc = evtchn_unmask(unmask.port);
+-        spin_unlock(&current->domain->event_lock);
+         break;
+     }
+ 
--- a/5583d9c5-x86-MSI-X-cleanup.patch
+++ b/5583d9c5-x86-MSI-X-cleanup.patch
@ -0,0 +1,285 @@
+# Commit 236e13ce60e1c0eb0535ad258e74a3789bc0d074
+# Date 2015-06-19 10:58:45 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/MSI-X: cleanup
+
+- __pci_enable_msix() now checks that an MSI-X capability was actually
+  found
+- pass "pos" to msix_capability_init() as both callers already know it
+  (and hence there's no need to re-obtain it)
+- call __pci_disable_msi{,x}() directly instead of via
+  pci_disable_msi() from __pci_enable_msi{x,}() state validation paths
+- use msix_control_reg() instead of open coding it
+- log message adjustments
+- coding style corrections
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
+@@ -35,6 +35,8 @@
+ static s8 __read_mostly use_msi = -1;
+ boolean_param("msi", use_msi);
+ 
+static void __pci_disable_msix(struct msi_desc *);
+
+ /* bitmap indicate which fixed map is free */
+ static DEFINE_SPINLOCK(msix_fixmap_lock);
+ static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
+@@ -129,12 +131,14 @@ void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg
+     unsigned dest;
+ 
+     memset(msg, 0, sizeof(*msg));
+-    if ( !cpumask_intersects(cpu_mask, &cpu_online_map) ) {
+    if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
+    {
+         dprintk(XENLOG_ERR,"%s, compose msi message error!!\n", __func__);
+         return;
+     }
+ 
+-    if ( vector ) {
+    if ( vector )
+    {
+         cpumask_t *mask = this_cpu(scratch_mask);
+ 
+         cpumask_and(mask, cpu_mask, &cpu_online_map);
+@@ -195,8 +199,7 @@ static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+     }
+     case PCI_CAP_ID_MSIX:
+     {
+-        void __iomem *base;
+-        base = entry->mask_base;
+        void __iomem *base = entry->mask_base;
+ 
+         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+@@ -257,8 +260,7 @@ static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+     }
+     case PCI_CAP_ID_MSIX:
+     {
+-        void __iomem *base;
+-        base = entry->mask_base;
+        void __iomem *base = entry->mask_base;
+ 
+         writel(msg->address_lo,
+                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+@@ -281,7 +283,7 @@ void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
+     struct msi_desc *msi_desc = desc->msi_desc;
+ 
+     dest = set_desc_affinity(desc, mask);
+-    if (dest == BAD_APICID || !msi_desc)
+    if ( dest == BAD_APICID || !msi_desc )
+         return;
+ 
+     ASSERT(spin_is_locked(&desc->lock));
+@@ -332,11 +334,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable)
+     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
+     if ( pos )
+     {
+-        control = pci_conf_read16(seg, bus, slot, func, pos + PCI_MSIX_FLAGS);
+        control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
+         control &= ~PCI_MSIX_FLAGS_ENABLE;
+         if ( enable )
+             control |= PCI_MSIX_FLAGS_ENABLE;
+-        pci_conf_write16(seg, bus, slot, func, pos + PCI_MSIX_FLAGS, control);
+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+     }
+ }
+ 
+@@ -353,9 +355,11 @@ static void msi_set_mask_bit(struct irq_desc *desc, int flag)
+ 
+     ASSERT(spin_is_locked(&desc->lock));
+     BUG_ON(!entry || !entry->dev);
+-    switch (entry->msi_attrib.type) {
+    switch ( entry->msi_attrib.type )
+    {
+     case PCI_CAP_ID_MSI:
+-        if (entry->msi_attrib.maskbit) {
+        if ( entry->msi_attrib.maskbit )
+        {
+             u32 mask_bits;
+             u16 seg = entry->dev->seg;
+             u8 bus = entry->dev->bus;
+@@ -701,13 +705,14 @@ static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf)
+  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
+  **/
+ static int msix_capability_init(struct pci_dev *dev,
+                                unsigned int pos,
+                                 struct msi_info *msi,
+                                 struct msi_desc **desc,
+                                 unsigned int nr_entries)
+ {
+     struct arch_msix *msix = dev->msix;
+     struct msi_desc *entry = NULL;
+-    int pos, vf;
+    int vf;
+     u16 control;
+     u64 table_paddr;
+     u32 table_offset;
+@@ -719,7 +724,6 @@ static int msix_capability_init(struct pci_dev *dev,
+ 
+     ASSERT(spin_is_locked(&pcidevs_lock));
+ 
+-    pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
+     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
+     msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+ 
+@@ -884,10 +888,9 @@ static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
+     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
+     if ( old_desc )
+     {
+-        dprintk(XENLOG_WARNING, "irq %d has already mapped to MSI on "
+-                "device %04x:%02x:%02x.%01x\n",
+-                msi->irq, msi->seg, msi->bus,
+-                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+        printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
+               msi->irq, msi->seg, msi->bus,
+               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+         *desc = old_desc;
+         return 0;
+     }
+@@ -895,10 +898,10 @@ static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
+     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
+     if ( old_desc )
+     {
+-        dprintk(XENLOG_WARNING, "MSI-X is already in use on "
+-                "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus,
+-                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+-        pci_disable_msi(old_desc);
+        printk(XENLOG_WARNING "MSI-X already in use on %04x:%02x:%02x.%u\n",
+               msi->seg, msi->bus,
+               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+        __pci_disable_msix(old_desc);
+     }
+ 
+     return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
+@@ -912,7 +915,6 @@ static void __pci_disable_msi(struct msi_desc *entry)
+     msi_set_enable(dev, 0);
+ 
+     BUG_ON(list_empty(&dev->msi_list));
+-
+ }
+ 
+ /**
+@@ -932,7 +934,7 @@ static void __pci_disable_msi(struct msi_desc *entry)
+  **/
+ static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
+ {
+-    int status, pos, nr_entries;
+    int pos, nr_entries;
+     struct pci_dev *pdev;
+     u16 control;
+     u8 slot = PCI_SLOT(msi->devfn);
+@@ -941,23 +943,22 @@ static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
+ 
+     ASSERT(spin_is_locked(&pcidevs_lock));
+     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
+-    if ( !pdev )
+    pos = pci_find_cap_offset(msi->seg, msi->bus, slot, func, PCI_CAP_ID_MSIX);
+    if ( !pdev || !pos )
+         return -ENODEV;
+ 
+-    pos = pci_find_cap_offset(msi->seg, msi->bus, slot, func, PCI_CAP_ID_MSIX);
+     control = pci_conf_read16(msi->seg, msi->bus, slot, func,
+                               msix_control_reg(pos));
+     nr_entries = multi_msix_capable(control);
+-    if (msi->entry_nr >= nr_entries)
+    if ( msi->entry_nr >= nr_entries )
+         return -EINVAL;
+ 
+     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
+     if ( old_desc )
+     {
+-        dprintk(XENLOG_WARNING, "irq %d has already mapped to MSIX on "
+-                "device %04x:%02x:%02x.%01x\n",
+-                msi->irq, msi->seg, msi->bus,
+-                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+        printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
+               msi->irq, msi->seg, msi->bus,
+               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+         *desc = old_desc;
+         return 0;
+     }
+@@ -965,15 +966,13 @@ static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
+     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
+     if ( old_desc )
+     {
+-        dprintk(XENLOG_WARNING, "MSI is already in use on "
+-                "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus,
+-                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+-        pci_disable_msi(old_desc);
+-
+        printk(XENLOG_WARNING "MSI already in use on %04x:%02x:%02x.%u\n",
+               msi->seg, msi->bus,
+               PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+        __pci_disable_msi(old_desc);
+     }
+ 
+-    status = msix_capability_init(pdev, msi, desc, nr_entries);
+-    return status;
+    return msix_capability_init(pdev, pos, msi, desc, nr_entries);
+ }
+ 
+ static void _pci_cleanup_msix(struct arch_msix *msix)
+@@ -991,19 +990,16 @@ static void _pci_cleanup_msix(struct arch_msix *msix)
+ 
+ static void __pci_disable_msix(struct msi_desc *entry)
+ {
+-    struct pci_dev *dev;
+-    int pos;
+-    u16 control, seg;
+-    u8 bus, slot, func;
+-
+-    dev = entry->dev;
+-    seg = dev->seg;
+-    bus = dev->bus;
+-    slot = PCI_SLOT(dev->devfn);
+-    func = PCI_FUNC(dev->devfn);
+    struct pci_dev *dev = entry->dev;
+    u16 seg = dev->seg;
+    u8 bus = dev->bus;
+    u8 slot = PCI_SLOT(dev->devfn);
+    u8 func = PCI_FUNC(dev->devfn);
+    unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
+                                           PCI_CAP_ID_MSIX);
+    u16 control = pci_conf_read16(seg, bus, slot, func,
+                                  msix_control_reg(entry->msi_attrib.pos));
+ 
+-    pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
+-    control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
+     msix_set_enable(dev, 0);
+ 
+     BUG_ON(list_empty(&dev->msi_list));
+@@ -1045,7 +1041,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off)
+         u16 control = pci_conf_read16(seg, bus, slot, func,
+                                       msix_control_reg(pos));
+ 
+-        rc = msix_capability_init(pdev, NULL, NULL,
+        rc = msix_capability_init(pdev, pos, NULL, NULL,
+                                   multi_msix_capable(control));
+     }
+     spin_unlock(&pcidevs_lock);
+@@ -1064,8 +1060,8 @@ int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
+     if ( !use_msi )
+         return -EPERM;
+ 
+-    return  msi->table_base ? __pci_enable_msix(msi, desc) :
+-        __pci_enable_msi(msi, desc);
+    return msi->table_base ? __pci_enable_msix(msi, desc) :
+                             __pci_enable_msi(msi, desc);
+ }
+ 
+ /*
+@@ -1115,7 +1111,9 @@ int pci_restore_msi_state(struct pci_dev *pdev)
+     if ( !pdev )
+         return -EINVAL;
+ 
+-    ret = xsm_resource_setup_pci(XSM_PRIV, (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn);
+    ret = xsm_resource_setup_pci(XSM_PRIV,
+                                (pdev->seg << 16) | (pdev->bus << 8) |
+                                pdev->devfn);
+     if ( ret )
+         return ret;
+ 
--- a/5583da09-x86-MSI-track-host-and-guest-masking-separately.patch
+++ b/5583da09-x86-MSI-track-host-and-guest-masking-separately.patch
@ -1,5 +1,7 @@
-References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
-
+# Commit ad28e42bd1d28d746988ed71654e8aa670629753
+# Date 2015-06-19 10:59:53 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
 x86/MSI: track host and guest masking separately

 In particular we want to avoid losing track of our own intention to
@ -9,8 +11,8 @@ host and guest requested so.
 Signed-off-by: Jan Beulich <jbeulich@suse.com>
 Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- trunk.orig/xen/arch/x86/hpet.c	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/arch/x86/hpet.c	2015-03-09 09:44:33.000000000 +0100
+--- sle12sp1.orig/xen/arch/x86/hpet.c	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/arch/x86/hpet.c	2015-07-08 00:00:00.000000000 +0200
@@ -240,7 +240,7 @@ static void hpet_msi_unmask(struct irq_d
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg |= HPET_TN_ENABLE;
@ -29,9 +31,9 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 }
 
 static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
--- trunk.orig/xen/arch/x86/hvm/vmsi.c	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/arch/x86/hvm/vmsi.c	2015-03-09 14:40:46.000000000 +0100
-@@ -216,7 +216,6 @@ static int msixtbl_read(
+--- sle12sp1.orig/xen/arch/x86/hvm/vmsi.c	2015-07-08 00:00:00.000000000 +0200
+++ sle12sp1/xen/arch/x86/hvm/vmsi.c	2015-07-08 00:00:00.000000000 +0200
+@@ -219,7 +219,6 @@ static int msixtbl_read(
 {
     unsigned long offset;
     struct msixtbl_entry *entry;
@ -39,9 +41,9 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
     unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
 
-@@ -240,10 +239,16 @@ static int msixtbl_read(
+@@ -253,13 +252,20 @@ static int msixtbl_read(
     }
-     else 
+     if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
     {
 -        virt = msixtbl_addr_to_virt(entry, address);
 +        const struct msi_desc *msi_desc;
@ -49,16 +51,21 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 +
         if ( !virt )
             goto out;
-        *pval = readl(virt);
 +        msi_desc = virt_to_msi_desc(entry->pdev, virt);
 +        if ( !msi_desc )
 +            goto out;
-+        *pval = MASK_INSR(msi_desc->msi_attrib.guest_masked,
-+                          PCI_MSIX_VECTOR_BITMASK);
+         if ( len == 4 )
+-            *pval = readl(virt);
+            *pval = MASK_INSR(msi_desc->msi_attrib.guest_masked,
+                              PCI_MSIX_VECTOR_BITMASK);
+         else
+-            *pval |= (u64)readl(virt) << 32;
+            *pval |= (u64)MASK_INSR(msi_desc->msi_attrib.guest_masked,
+                                    PCI_MSIX_VECTOR_BITMASK) << 32;
     }
     
     r = X86EMUL_OKAY;
-@@ -261,7 +266,7 @@ static int msixtbl_write(struct vcpu *v,
+@@ -277,7 +283,7 @@ static int msixtbl_write(struct vcpu *v,
     void *virt;
     unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
@ -66,8 +73,8 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 +    unsigned long flags;
     struct irq_desc *desc;
 
-     if ( len != 4 || (address & 3) )
-@@ -313,37 +318,7 @@ static int msixtbl_write(struct vcpu *v,
+     if ( (len != 4 && len != 8) || (address & (len - 1)) )
+@@ -337,37 +343,7 @@ static int msixtbl_write(struct vcpu *v,
 
     ASSERT(msi_desc == desc->msi_desc);
    
@ -106,77 +113,68 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 
 unlock:
     spin_unlock_irqrestore(&desc->lock, flags);
--- trunk.orig/xen/arch/x86/msi.c	2015-05-18 11:39:36.000000000 +0200
-+++ trunk/xen/arch/x86/msi.c	2015-05-18 11:44:39.000000000 +0200
-@@ -388,12 +388,13 @@ int msi_maskable_irq(const struct msi_de
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-07-08 00:00:00.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-07-08 00:00:00.000000000 +0200
+@@ -349,9 +349,10 @@ int msi_maskable_irq(const struct msi_de
            || entry->msi_attrib.maskbit;
 }
 
-static bool_t msi_set_mask_bit(struct irq_desc *desc, int flag)
-+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
+-static void msi_set_mask_bit(struct irq_desc *desc, int flag)
+static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
 {
     struct msi_desc *entry = desc->msi_desc;
-     struct pci_dev *pdev;
-     u16 seg, control;
-     u8 bus, slot, func;
 +    bool_t flag = host || guest;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
-@@ -449,7 +450,8 @@ static bool_t msi_set_mask_bit(struct ir
-     default:
-         return 0;
+@@ -383,7 +384,8 @@ static void msi_set_mask_bit(struct irq_
+         BUG();
+         break;
     }
 -    entry->msi_attrib.masked = !!flag;
 +    entry->msi_attrib.host_masked = host;
 +    entry->msi_attrib.guest_masked = guest;
- 
-     return 1;
 }
-@@ -480,22 +482,39 @@ static int msi_get_mask_bit(const struct
+ 
+ static int msi_get_mask_bit(const struct msi_desc *entry)
+@@ -405,20 +407,33 @@ static int msi_get_mask_bit(const struct
 
 void mask_msi_irq(struct irq_desc *desc)
 {
-    if ( unlikely(!msi_set_mask_bit(desc, 1)) )
-+    if ( unlikely(!msi_set_mask_bit(desc, 1,
-+                                    desc->msi_desc->msi_attrib.guest_masked)) )
-         BUG_ON(!(desc->status & IRQ_DISABLED));
+-    msi_set_mask_bit(desc, 1);
+    msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
 }
 
 void unmask_msi_irq(struct irq_desc *desc)
 {
-    if ( unlikely(!msi_set_mask_bit(desc, 0)) )
-+    if ( unlikely(!msi_set_mask_bit(desc, 0,
-+                                    desc->msi_desc->msi_attrib.guest_masked)) )
-         WARN();
- }
- 
+-    msi_set_mask_bit(desc, 0);
+    msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+}
+
 +void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
 +{
 +    msi_set_mask_bit(desc, desc->msi_desc->msi_attrib.host_masked, mask);
-+}
-+
+ }
+ 
 static unsigned int startup_msi_irq(struct irq_desc *desc)
 {
 -    unmask_msi_irq(desc);
 +    bool_t guest_masked = (desc->status & IRQ_GUEST) &&
 +                          is_hvm_domain(desc->msi_desc->dev->domain);
 +
-+    if ( unlikely(!msi_set_mask_bit(desc, 0, guest_masked)) )
-+        WARN();
+    msi_set_mask_bit(desc, 0, guest_masked);
     return 0;
 }
 
 +static void shutdown_msi_irq(struct irq_desc *desc)
 +{
-+    if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
-+        BUG_ON(!(desc->status & IRQ_DISABLED));
+    msi_set_mask_bit(desc, 1, 1);
 +}
 +
 void ack_nonmaskable_msi_irq(struct irq_desc *desc)
 {
     irq_complete_move(desc);
-@@ -520,7 +539,7 @@ void end_nonmaskable_msi_irq(struct irq_
+@@ -443,7 +458,7 @@ void end_nonmaskable_msi_irq(struct irq_
 static hw_irq_controller pci_msi_maskable = {
     .typename     = "PCI-MSI/-X",
     .startup      = startup_msi_irq,
@ -185,7 +183,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
     .enable       = unmask_msi_irq,
     .disable      = mask_msi_irq,
     .ack          = ack_maskable_msi_irq,
-@@ -690,7 +709,8 @@ static int msi_capability_init(struct pc
+@@ -591,7 +606,8 @@ static int msi_capability_init(struct pc
         entry[i].msi_attrib.is_64 = is_64bit_address(control);
         entry[i].msi_attrib.entry_nr = i;
         entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
@ -195,7 +193,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
         entry[i].msi_attrib.pos = pos;
         if ( entry[i].msi_attrib.maskbit )
             entry[i].msi.mpos = mpos;
-@@ -939,7 +959,8 @@ static int msix_capability_init(struct p
+@@ -817,7 +833,8 @@ static int msix_capability_init(struct p
         entry->msi_attrib.is_64 = 1;
         entry->msi_attrib.entry_nr = msi->entry_nr;
         entry->msi_attrib.maskbit = 1;
@ -205,17 +203,17 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
         entry->msi_attrib.pos = pos;
         entry->irq = msi->irq;
         entry->dev = dev;
-@@ -1309,7 +1330,8 @@ int pci_restore_msi_state(struct pci_dev
+@@ -1152,7 +1169,8 @@ int pci_restore_msi_state(struct pci_dev
+ 
         for ( i = 0; ; )
         {
-             if ( unlikely(!msi_set_mask_bit(desc,
-                                            entry[i].msi_attrib.masked)) )
-+                                            entry[i].msi_attrib.host_masked,
-+                                            entry[i].msi_attrib.guest_masked)) )
-                 BUG();
+-            msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+            msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
+                             entry[i].msi_attrib.guest_masked);
 
             if ( !--nr )
-@@ -1462,7 +1484,7 @@ static void dump_msi(unsigned char key)
+                 break;
+@@ -1304,7 +1322,7 @@ static void dump_msi(unsigned char key)
         else
             mask = '?';
         printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
@ -224,7 +222,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
                type, irq,
                (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
                data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
-@@ -1470,7 +1492,10 @@ static void dump_msi(unsigned char key)
+@@ -1312,7 +1330,10 @@ static void dump_msi(unsigned char key)
                data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
                addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
                addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
@ -236,8 +234,8 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
     }
 }
 
--- trunk.orig/xen/drivers/passthrough/amd/iommu_init.c	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/drivers/passthrough/amd/iommu_init.c	2015-03-09 09:44:48.000000000 +0100
+--- sle12sp1.orig/xen/drivers/passthrough/amd/iommu_init.c	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/drivers/passthrough/amd/iommu_init.c	2015-07-08 00:00:00.000000000 +0200
@@ -451,7 +451,7 @@ static void iommu_msi_unmask(struct irq_
     spin_lock_irqsave(&iommu->lock, flags);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
@ -256,8 +254,8 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 }
 
 static unsigned int iommu_msi_startup(struct irq_desc *desc)
--- trunk.orig/xen/drivers/passthrough/vtd/iommu.c	2015-05-19 23:16:48.000000000 +0200
-+++ trunk/xen/drivers/passthrough/vtd/iommu.c	2015-03-09 09:44:58.000000000 +0100
+--- sle12sp1.orig/xen/drivers/passthrough/vtd/iommu.c	2015-05-19 23:16:48.000000000 +0200
+++ sle12sp1/xen/drivers/passthrough/vtd/iommu.c	2015-07-08 00:00:00.000000000 +0200
@@ -996,7 +996,7 @@ static void dma_msi_unmask(struct irq_de
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
@ -276,8 +274,8 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 }
 
 static unsigned int dma_msi_startup(struct irq_desc *desc)
--- trunk.orig/xen/include/asm-x86/msi.h	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/include/asm-x86/msi.h	2015-03-09 09:42:49.000000000 +0100
+--- sle12sp1.orig/xen/include/asm-x86/msi.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/asm-x86/msi.h	2015-07-08 00:00:00.000000000 +0200
@@ -90,12 +90,13 @@ extern unsigned int pci_msix_get_table_l
 
 struct msi_desc {
--- a/5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch
+++ b/5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch
@ -0,0 +1,284 @@
+# Commit dff515dfeac4c1c13422a128c558ac21ddc6c8db
+# Date 2015-06-19 11:01:24 +0200
+# Author Malcolm Crossley <malcolm.crossley@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+gnttab: use per-VCPU maptrack free lists
+
+Performance analysis of aggregate network throughput with many VMs
+shows that performance is signficantly limited by contention on the
+maptrack lock when obtaining/releasing maptrack handles from the free
+list.
+
+Instead of a single free list use a per-VCPU list. This avoids any
+contention when obtaining a handle.  Handles must be released back to
+their original list and since this may occur on a different VCPU there
+is some contention on the destination VCPU's free list tail pointer
+(but this is much better than a per-domain lock).
+
+Increase the default maximum number of maptrack frames by 4 times
+because: a) struct grant_mapping is now 16 bytes (instead of 8); and
+b) a guest may not evenly distribute all the grant map operations
+across the VCPUs (meaning some VCPUs need more maptrack entries than
+others).
+
+Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/common/domain.c	2015-07-08 00:00:00.000000000 +0200
+++ sle12sp1/xen/common/domain.c	2015-07-08 13:52:23.000000000 +0200
+@@ -126,6 +126,8 @@ struct vcpu *alloc_vcpu(
+ 
+     tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
+ 
+    grant_table_init_vcpu(v);
+
+     if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
+          !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
+          !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
+--- sle12sp1.orig/xen/common/grant_table.c	2015-07-08 13:49:47.000000000 +0200
+++ sle12sp1/xen/common/grant_table.c	2015-07-08 13:52:23.000000000 +0200
+@@ -37,6 +37,7 @@
+ #include <xen/iommu.h>
+ #include <xen/paging.h>
+ #include <xen/keyhandler.h>
+#include <xen/vmap.h>
+ #include <xsm/xsm.h>
+ #include <asm/flushtlb.h>
+ 
+@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_g
+  * New options allow to set max_maptrack_frames and
+  * map_grant_table_frames independently.
+  */
+-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
+ 
+ static unsigned int __read_mostly max_maptrack_frames;
+ integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
+@@ -279,62 +280,103 @@ double_gt_unlock(struct grant_table *lgt
+ 
+ static inline int
+ __get_maptrack_handle(
+-    struct grant_table *t)
+    struct grant_table *t,
+    struct vcpu *v)
+ {
+-    unsigned int h;
+-    if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+    unsigned int head, next;
+
+    /* No maptrack pages allocated for this VCPU yet? */
+    head = v->maptrack_head;
+    if ( unlikely(head == MAPTRACK_TAIL) )
+         return -1;
+-    t->maptrack_head = maptrack_entry(t, h).ref;
+-    return h;
+
+    /*
+     * Always keep one entry in the free list to make it easier to add
+     * free entries to the tail.
+     */
+    next = read_atomic(&maptrack_entry(t, head).ref);
+    if ( unlikely(next == MAPTRACK_TAIL) )
+        return -1;
+
+    v->maptrack_head = next;
+
+    return head;
+ }
+ 
+ static inline void
+ put_maptrack_handle(
+     struct grant_table *t, int handle)
+ {
+-    spin_lock(&t->maptrack_lock);
+-    maptrack_entry(t, handle).ref = t->maptrack_head;
+-    t->maptrack_head = handle;
+-    spin_unlock(&t->maptrack_lock);
+    struct domain *currd = current->domain;
+    struct vcpu *v;
+    unsigned int prev_tail, cur_tail;
+
+    /* 1. Set entry to be a tail. */
+    maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+
+    /* 2. Add entry to the tail of the list on the original VCPU. */
+    v = currd->vcpu[maptrack_entry(t, handle).vcpu];
+
+    cur_tail = read_atomic(&v->maptrack_tail);
+    do {
+        prev_tail = cur_tail;
+        cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
+    } while ( cur_tail != prev_tail );
+
+    /* 3. Update the old tail entry to point to the new entry. */
+    write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
+ }
+ 
+ static inline int
+ get_maptrack_handle(
+     struct grant_table *lgt)
+ {
+    struct vcpu          *curr = current;
+     int                   i;
+     grant_handle_t        handle;
+     struct grant_mapping *new_mt;
+-    unsigned int          new_mt_limit, nr_frames;
+
+    handle = __get_maptrack_handle(lgt, curr);
+    if ( likely(handle != -1) )
+        return handle;
+ 
+     spin_lock(&lgt->maptrack_lock);
+ 
+-    while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+    if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
+     {
+-        nr_frames = nr_maptrack_frames(lgt);
+-        if ( nr_frames >= max_maptrack_frames )
+-            break;
+        spin_unlock(&lgt->maptrack_lock);
+        return -1;
+    }
+ 
+-        new_mt = alloc_xenheap_page();
+-        if ( !new_mt )
+-            break;
+    new_mt = alloc_xenheap_page();
+    if ( !new_mt )
+    {
+        spin_unlock(&lgt->maptrack_lock);
+        return -1;
+    }
+    clear_page(new_mt);
+ 
+-        clear_page(new_mt);
+    /*
+     * Use the first new entry and add the remaining entries to the
+     * head of the free list.
+     */
+    handle = lgt->maptrack_limit;
+ 
+-        new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+    for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
+    {
+        new_mt[i].ref = handle + i + 1;
+        new_mt[i].vcpu = curr->vcpu_id;
+    }
+    new_mt[i - 1].ref = curr->maptrack_head;
+ 
+-        for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
+-            new_mt[i - 1].ref = lgt->maptrack_limit + i;
+-        new_mt[i - 1].ref = lgt->maptrack_head;
+-        lgt->maptrack_head = lgt->maptrack_limit;
+    /* Set tail directly if this is the first page for this VCPU. */
+    if ( curr->maptrack_tail == MAPTRACK_TAIL )
+        curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
+ 
+-        lgt->maptrack[nr_frames] = new_mt;
+-        smp_wmb();
+-        lgt->maptrack_limit      = new_mt_limit;
+    curr->maptrack_head = handle + 1;
+ 
+-        gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
+-                 nr_frames + 1);
+-    }
+    lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+    lgt->maptrack_limit += MAPTRACK_PER_PAGE;
+ 
+     spin_unlock(&lgt->maptrack_lock);
+ 
+@@ -2919,16 +2961,9 @@ grant_table_create(
+     }
+ 
+     /* Tracking of mapped foreign frames table */
+-    if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
+-                                      max_maptrack_frames)) == NULL )
+    t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
+    if ( t->maptrack == NULL )
+         goto no_mem_2;
+-    if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
+-        goto no_mem_3;
+-    clear_page(t->maptrack[0]);
+-    t->maptrack_limit = MAPTRACK_PER_PAGE;
+-    for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
+-        t->maptrack[0][i - 1].ref = i;
+-    t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
+ 
+     /* Shared grant table. */
+     if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
+@@ -2960,8 +2995,7 @@ grant_table_create(
+         free_xenheap_page(t->shared_raw[i]);
+     xfree(t->shared_raw);
+  no_mem_3:
+-    free_xenheap_page(t->maptrack[0]);
+-    xfree(t->maptrack);
+    vfree(t->maptrack);
+  no_mem_2:
+     for ( i = 0;
+           i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
+@@ -3096,7 +3130,7 @@ grant_table_destroy(
+ 
+     for ( i = 0; i < nr_maptrack_frames(t); i++ )
+         free_xenheap_page(t->maptrack[i]);
+-    xfree(t->maptrack);
+    vfree(t->maptrack);
+ 
+     for ( i = 0; i < nr_active_grant_frames(t); i++ )
+         free_xenheap_page(t->active[i]);
+@@ -3110,6 +3144,12 @@ grant_table_destroy(
+     d->grant_table = NULL;
+ }
+ 
+void grant_table_init_vcpu(struct vcpu *v)
+{
+    v->maptrack_head = MAPTRACK_TAIL;
+    v->maptrack_tail = MAPTRACK_TAIL;
+}
+
+ static void gnttab_usage_print(struct domain *rd)
+ {
+     int first = 1;
+--- sle12sp1.orig/xen/include/xen/grant_table.h	2015-07-08 13:49:47.000000000 +0200
+++ sle12sp1/xen/include/xen/grant_table.h	2015-07-08 13:52:23.000000000 +0200
+@@ -60,6 +60,8 @@ struct grant_mapping {
+     u32      ref;           /* grant ref */
+     u16      flags;         /* 0-4: GNTMAP_* ; 5-15: unused */
+     domid_t  domid;         /* granting domain */
+    u32      vcpu;          /* vcpu which created the grant mapping */
+    u32      pad;           /* round size to a power of 2 */
+ };
+ 
+ /* Per-domain grant information. */
+@@ -83,9 +85,8 @@ struct grant_table {
+     grant_status_t       **status;
+     /* Active grant table. */
+     struct active_grant_entry **active;
+-    /* Mapping tracking table. */
+    /* Mapping tracking table per vcpu. */
+     struct grant_mapping **maptrack;
+-    unsigned int          maptrack_head;
+     unsigned int          maptrack_limit;
+     /* Lock protecting the maptrack page list, head, and limit */
+     spinlock_t            maptrack_lock;
+@@ -99,6 +100,7 @@ int grant_table_create(
+     struct domain *d);
+ void grant_table_destroy(
+     struct domain *d);
+void grant_table_init_vcpu(struct vcpu *v);
+ 
+ /* Domain death release of granted mappings of other domains' memory. */
+ void
+--- sle12sp1.orig/xen/include/xen/sched.h	2015-01-14 18:44:18.000000000 +0100
+++ sle12sp1/xen/include/xen/sched.h	2015-07-08 13:52:23.000000000 +0200
+@@ -219,6 +219,10 @@ struct vcpu 
+     /* VCPU paused by system controller. */
+     int              controller_pause_count;
+ 
+    /* Maptrack */
+    unsigned int     maptrack_head;
+    unsigned int     maptrack_tail;
+
+     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
+     evtchn_port_t    virq_to_evtchn[NR_VIRQS];
+     spinlock_t       virq_lock;
--- a/5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch
+++ b/5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch
@ -0,0 +1,153 @@
+# Commit e76ff6c156906b515c2a4300a81c95886ece5d5f
+# Date 2015-06-19 11:02:04 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+gnttab: steal maptrack entries from other VCPUs
+
+If a guest is not evenly grant mapping across its VCPUs one of the
+VCPUs may run out of free maptrack entries even though other VCPUs
+have many free.
+
+If this happens, "steal" free entries from other VCPUs.  We want to
+steal entries such that:
+
+a) We avoid ping-ponging stolen entries between VCPUs.
+
+b) The number of free entries owned by each VCPUs tends (over time) to
+   the number it uses.
+
+So when stealing, we select a VCPU at random (reducing (a)) and we
+transfer the stolen entries to the thief VCPU (aiming for (b)).
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/common/grant_table.c	2015-07-08 13:52:23.000000000 +0200
+++ sle12sp1/xen/common/grant_table.c	2015-07-08 13:52:31.000000000 +0200
+@@ -283,26 +283,70 @@ __get_maptrack_handle(
+     struct grant_table *t,
+     struct vcpu *v)
+ {
+-    unsigned int head, next;
+    unsigned int head, next, prev_head;
+ 
+-    /* No maptrack pages allocated for this VCPU yet? */
+-    head = v->maptrack_head;
+-    if ( unlikely(head == MAPTRACK_TAIL) )
+-        return -1;
+-
+-    /*
+-     * Always keep one entry in the free list to make it easier to add
+-     * free entries to the tail.
+-     */
+-    next = read_atomic(&maptrack_entry(t, head).ref);
+-    if ( unlikely(next == MAPTRACK_TAIL) )
+-        return -1;
+    do {
+        /* No maptrack pages allocated for this VCPU yet? */
+        head = read_atomic(&v->maptrack_head);
+        if ( unlikely(head == MAPTRACK_TAIL) )
+            return -1;
+ 
+-    v->maptrack_head = next;
+        /*
+         * Always keep one entry in the free list to make it easier to
+         * add free entries to the tail.
+         */
+        next = read_atomic(&maptrack_entry(t, head).ref);
+        if ( unlikely(next == MAPTRACK_TAIL) )
+            return -1;
+
+        prev_head = head;
+        head = cmpxchg(&v->maptrack_head, prev_head, next);
+    } while ( head != prev_head );
+ 
+     return head;
+ }
+ 
+/*
+ * Try to "steal" a free maptrack entry from another VCPU.
+ *
+ * A stolen entry is transferred to the thief, so the number of
+ * entries for each VCPU should tend to the usage pattern.
+ *
+ * To avoid having to atomically count the number of free entries on
+ * each VCPU and to avoid two VCPU repeatedly stealing entries from
+ * each other, the initial victim VCPU is selected randomly.
+ */
+static int steal_maptrack_handle(struct grant_table *t,
+                                 const struct vcpu *curr)
+{
+    const struct domain *currd = curr->domain;
+    unsigned int first, i;
+
+    /* Find an initial victim. */
+    first = i = get_random() % currd->max_vcpus;
+
+    do {
+        if ( currd->vcpu[i] )
+        {
+            int handle;
+
+            handle = __get_maptrack_handle(t, currd->vcpu[i]);
+            if ( handle != -1 )
+            {
+                maptrack_entry(t, handle).vcpu = curr->vcpu_id;
+                return handle;
+            }
+        }
+
+        i++;
+        if ( i == currd->max_vcpus )
+            i = 0;
+    } while ( i != first );
+
+    /* No free handles on any VCPU. */
+    return -1;
+}
+
+ static inline void
+ put_maptrack_handle(
+     struct grant_table *t, int handle)
+@@ -342,10 +386,31 @@ get_maptrack_handle(
+ 
+     spin_lock(&lgt->maptrack_lock);
+ 
+    /*
+     * If we've run out of frames, try stealing an entry from another
+     * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+     */
+     if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
+     {
+        /*
+         * Can drop the lock since no other VCPU can be adding a new
+         * frame once they've run out.
+         */
+         spin_unlock(&lgt->maptrack_lock);
+-        return -1;
+
+        /*
+         * Uninitialized free list? Steal an extra entry for the tail
+         * sentinel.
+         */
+        if ( curr->maptrack_tail == MAPTRACK_TAIL )
+        {
+            handle = steal_maptrack_handle(lgt, curr);
+            if ( handle == -1 )
+                return -1;
+            curr->maptrack_tail = handle;
+            write_atomic(&curr->maptrack_head, handle);
+        }
+        return steal_maptrack_handle(lgt, curr);
+     }
+ 
+     new_mt = alloc_xenheap_page();
+@@ -373,7 +438,7 @@ get_maptrack_handle(
+     if ( curr->maptrack_tail == MAPTRACK_TAIL )
+         curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
+ 
+-    curr->maptrack_head = handle + 1;
+    write_atomic(&curr->maptrack_head, handle + 1);
+ 
+     lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+     lgt->maptrack_limit += MAPTRACK_PER_PAGE;
--- a/5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch
+++ b/5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch
@ -0,0 +1,105 @@
+# Commit b399386bcdb9d458f5647476a06fe86f5968d87e
+# Date 2015-06-22 11:36:17 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: clear xen_consumer when clearing state
+
+Freeing a xen event channel would clear xen_consumer before clearing
+the channel state, leaving a window where the channel is in a funny
+state (still bound but no consumer).
+
+Move the clear of xen_consumer into free_evtchn() where the state is
+also cleared.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+Ditch the pointless evtchn_close() wrapper around __evtchn_close()
+(renaming the latter) as well as some bogus casts of function results
+to void.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/common/event_channel.c	2015-07-08 13:54:42.000000000 +0200
+++ sle12sp1/xen/common/event_channel.c	2015-07-08 13:57:44.000000000 +0200
+@@ -204,6 +204,7 @@ static void free_evtchn(struct domain *d
+     /* Reset binding to vcpu0 when the channel is freed. */
+     chn->state          = ECS_FREE;
+     chn->notify_vcpu_id = 0;
+    chn->xen_consumer   = 0;
+ 
+     xsm_evtchn_close_post(chn);
+ }
+@@ -470,7 +471,7 @@ static long evtchn_bind_pirq(evtchn_bind
+ }
+ 
+ 
+-static long __evtchn_close(struct domain *d1, int port1)
+static long evtchn_close(struct domain *d1, int port1, bool_t guest)
+ {
+     struct domain *d2 = NULL;
+     struct vcpu   *v;
+@@ -490,7 +491,7 @@ static long __evtchn_close(struct domain
+     chn1 = evtchn_from_port(d1, port1);
+ 
+     /* Guest cannot close a Xen-attached event channel. */
+-    if ( unlikely(consumer_is_xen(chn1)) )
+    if ( unlikely(consumer_is_xen(chn1)) && guest )
+     {
+         rc = -EINVAL;
+         goto out;
+@@ -599,12 +600,6 @@ static long __evtchn_close(struct domain
+     return rc;
+ }
+ 
+-
+-static long evtchn_close(evtchn_close_t *close)
+-{
+-    return __evtchn_close(current->domain, close->port);
+-}
+-
+ int evtchn_send(struct domain *d, unsigned int lport)
+ {
+     struct evtchn *lchn, *rchn;
+@@ -959,7 +954,7 @@ static long evtchn_reset(evtchn_reset_t 
+         goto out;
+ 
+     for ( i = 0; port_is_valid(d, i); i++ )
+-        (void)__evtchn_close(d, i);
+        evtchn_close(d, i, 1);
+ 
+     spin_lock(&d->event_lock);
+ 
+@@ -1066,7 +1061,7 @@ long do_event_channel_op(int cmd, XEN_GU
+         struct evtchn_close close;
+         if ( copy_from_guest(&close, arg, 1) != 0 )
+             return -EFAULT;
+-        rc = evtchn_close(&close);
+        rc = evtchn_close(current->domain, close.port, 1);
+         break;
+     }
+ 
+@@ -1196,11 +1191,10 @@ void free_xen_event_channel(
+     BUG_ON(!port_is_valid(d, port));
+     chn = evtchn_from_port(d, port);
+     BUG_ON(!consumer_is_xen(chn));
+-    chn->xen_consumer = 0;
+ 
+     spin_unlock(&d->event_lock);
+ 
+-    (void)__evtchn_close(d, port);
+    evtchn_close(d, port, 0);
+ }
+ 
+ 
+@@ -1299,10 +1293,7 @@ void evtchn_destroy(struct domain *d)
+ 
+     /* Close all existing event channels. */
+     for ( i = 0; port_is_valid(d, i); i++ )
+-    {
+-        evtchn_from_port(d, i)->xen_consumer = 0;
+-        (void)__evtchn_close(d, i);
+-    }
+        evtchn_close(d, i, 0);
+ 
+     /* Free all event-channel buckets. */
+     spin_lock(&d->event_lock);
--- a/5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch
+++ b/5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch
@ -0,0 +1,110 @@
+# Commit a753f0e53ff973a8a066e86c1cb3d6dd5c68d59f
+# Date 2015-06-22 11:38:01 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: defer freeing struct evtchn's until evtchn_destroy_final()
+
+notify_via_xen_event_channel() and free_xen_event_channel() had to
+check if the domain was dying because they may be called while the
+domain is being destroyed and the struct evtchn's are being freed.
+
+By deferring the freeing of the struct evtchn's until all references
+to the domain are dropped, these functions can rely on the channel
+state being present and valid.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+
+--- sle12sp1.orig/xen/common/event_channel.c	2015-07-08 13:57:44.000000000 +0200
+++ sle12sp1/xen/common/event_channel.c	2015-07-08 14:00:53.000000000 +0200
+@@ -1177,22 +1177,8 @@ int alloc_unbound_xen_event_channel(
+ void free_xen_event_channel(
+     struct vcpu *local_vcpu, int port)
+ {
+-    struct evtchn *chn;
+     struct domain *d = local_vcpu->domain;
+-
+-    spin_lock(&d->event_lock);
+-
+-    if ( unlikely(d->is_dying) )
+-    {
+-        spin_unlock(&d->event_lock);
+-        return;
+-    }
+-
+     BUG_ON(!port_is_valid(d, port));
+-    chn = evtchn_from_port(d, port);
+-    BUG_ON(!consumer_is_xen(chn));
+-
+-    spin_unlock(&d->event_lock);
+ 
+     evtchn_close(d, port, 0);
+ }
+@@ -1206,18 +1192,12 @@ void notify_via_xen_event_channel(struct
+ 
+     spin_lock(&ld->event_lock);
+ 
+-    if ( unlikely(ld->is_dying) )
+-    {
+-        spin_unlock(&ld->event_lock);
+-        return;
+-    }
+-
+     ASSERT(port_is_valid(ld, lport));
+     lchn = evtchn_from_port(ld, lport);
+-    ASSERT(consumer_is_xen(lchn));
+ 
+     if ( likely(lchn->state == ECS_INTERDOMAIN) )
+     {
+        ASSERT(consumer_is_xen(lchn));
+         rd    = lchn->u.interdomain.remote_dom;
+         rport = lchn->u.interdomain.remote_port;
+         rchn  = evtchn_from_port(rd, rport);
+@@ -1285,7 +1265,7 @@ int evtchn_init(struct domain *d)
+ 
+ void evtchn_destroy(struct domain *d)
+ {
+-    unsigned int i, j;
+    unsigned int i;
+ 
+     /* After this barrier no new event-channel allocations can occur. */
+     BUG_ON(!d->is_dying);
+@@ -1295,8 +1275,17 @@ void evtchn_destroy(struct domain *d)
+     for ( i = 0; port_is_valid(d, i); i++ )
+         evtchn_close(d, i, 0);
+ 
+    clear_global_virq_handlers(d);
+
+    evtchn_fifo_destroy(d);
+}
+
+
+void evtchn_destroy_final(struct domain *d)
+{
+    unsigned int i, j;
+
+     /* Free all event-channel buckets. */
+-    spin_lock(&d->event_lock);
+     for ( i = 0; i < NR_EVTCHN_GROUPS; i++ )
+     {
+         if ( !d->evtchn_group[i] )
+@@ -1304,20 +1293,9 @@ void evtchn_destroy(struct domain *d)
+         for ( j = 0; j < BUCKETS_PER_GROUP; j++ )
+             free_evtchn_bucket(d, d->evtchn_group[i][j]);
+         xfree(d->evtchn_group[i]);
+-        d->evtchn_group[i] = NULL;
+     }
+     free_evtchn_bucket(d, d->evtchn);
+-    d->evtchn = NULL;
+-    spin_unlock(&d->event_lock);
+ 
+-    clear_global_virq_handlers(d);
+-
+-    evtchn_fifo_destroy(d);
+-}
+-
+-
+-void evtchn_destroy_final(struct domain *d)
+-{
+ #if MAX_VIRT_CPUS > BITS_PER_LONG
+     xfree(d->poll_mask);
+     d->poll_mask = NULL;
--- a/5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch
+++ b/5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch
@ -0,0 +1,257 @@
+# Commit de6acb78bf0e137cbe5b72cee4a35ca018d759cc
+# Date 2015-06-22 11:39:03 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: use a per-event channel lock for sending events
+
+When sending an event, use a new per-event channel lock to safely
+validate the event channel state.
+
+This new lock must be held when changing event channel state.  Note
+that the event channel lock must also be held when changing state from
+ECS_FREE or it will race with a concurrent get_free_port() call.
+
+To avoid having to take the remote event channel locks when sending to
+an interdomain event channel, the local and remote channel locks are
+both held when binding or closing an interdomain event channel.
+
+This significantly  increases the  number of events  that can  be sent
+from multiple  VCPUs.  But struct  evtchn increases in  size, reducing
+the number that fit into a single page to 64 (instead of 128).
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/common/event_channel.c	2015-07-08 14:00:53.000000000 +0200
+++ sle12sp1/xen/common/event_channel.c	2015-07-08 14:04:08.000000000 +0200
+@@ -141,6 +141,7 @@ static struct evtchn *alloc_evtchn_bucke
+             return NULL;
+         }
+         chn[i].port = port + i;
+        spin_lock_init(&chn[i].lock);
+     }
+     return chn;
+ }
+@@ -231,11 +232,15 @@ static long evtchn_alloc_unbound(evtchn_
+     if ( rc )
+         goto out;
+ 
+    spin_lock(&chn->lock);
+
+     chn->state = ECS_UNBOUND;
+     if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF )
+         chn->u.unbound.remote_domid = current->domain->domain_id;
+     evtchn_port_init(d, chn);
+ 
+    spin_unlock(&chn->lock);
+
+     alloc->port = port;
+ 
+  out:
+@@ -246,6 +251,28 @@ static long evtchn_alloc_unbound(evtchn_
+ }
+ 
+ 
+static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
+{
+    if ( lchn < rchn )
+    {
+        spin_lock(&lchn->lock);
+        spin_lock(&rchn->lock);
+    }
+    else
+    {
+        if ( lchn != rchn )
+            spin_lock(&rchn->lock);
+        spin_lock(&lchn->lock);
+    }
+}
+
+static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn)
+{
+    spin_unlock(&lchn->lock);
+    if ( lchn != rchn )
+        spin_unlock(&rchn->lock);
+}
+
+ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
+ {
+     struct evtchn *lchn, *rchn;
+@@ -288,6 +315,8 @@ static long evtchn_bind_interdomain(evtc
+     if ( rc )
+         goto out;
+ 
+    double_evtchn_lock(lchn, rchn);
+
+     lchn->u.interdomain.remote_dom  = rd;
+     lchn->u.interdomain.remote_port = rport;
+     lchn->state                     = ECS_INTERDOMAIN;
+@@ -303,6 +332,8 @@ static long evtchn_bind_interdomain(evtc
+      */
+     evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
+ 
+    double_evtchn_unlock(lchn, rchn);
+
+     bind->local_port = lport;
+ 
+  out:
+@@ -343,11 +374,16 @@ static long evtchn_bind_virq(evtchn_bind
+         ERROR_EXIT(port);
+ 
+     chn = evtchn_from_port(d, port);
+
+    spin_lock(&chn->lock);
+
+     chn->state          = ECS_VIRQ;
+     chn->notify_vcpu_id = vcpu;
+     chn->u.virq         = virq;
+     evtchn_port_init(d, chn);
+ 
+    spin_unlock(&chn->lock);
+
+     v->virq_to_evtchn[virq] = bind->port = port;
+ 
+  out:
+@@ -374,10 +410,15 @@ static long evtchn_bind_ipi(evtchn_bind_
+         ERROR_EXIT(port);
+ 
+     chn = evtchn_from_port(d, port);
+
+    spin_lock(&chn->lock);
+
+     chn->state          = ECS_IPI;
+     chn->notify_vcpu_id = vcpu;
+     evtchn_port_init(d, chn);
+ 
+    spin_unlock(&chn->lock);
+
+     bind->port = port;
+ 
+  out:
+@@ -452,11 +493,15 @@ static long evtchn_bind_pirq(evtchn_bind
+         goto out;
+     }
+ 
+    spin_lock(&chn->lock);
+
+     chn->state  = ECS_PIRQ;
+     chn->u.pirq.irq = pirq;
+     link_pirq_port(port, chn, v);
+     evtchn_port_init(d, chn);
+ 
+    spin_unlock(&chn->lock);
+
+     bind->port = port;
+ 
+ #ifdef CONFIG_X86
+@@ -577,15 +622,24 @@ static long evtchn_close(struct domain *
+         BUG_ON(chn2->state != ECS_INTERDOMAIN);
+         BUG_ON(chn2->u.interdomain.remote_dom != d1);
+ 
+        double_evtchn_lock(chn1, chn2);
+
+        free_evtchn(d1, chn1);
+
+         chn2->state = ECS_UNBOUND;
+         chn2->u.unbound.remote_domid = d1->domain_id;
+-        break;
+
+        double_evtchn_unlock(chn1, chn2);
+
+        goto out;
+ 
+     default:
+         BUG();
+     }
+ 
+    spin_lock(&chn1->lock);
+     free_evtchn(d1, chn1);
+    spin_unlock(&chn1->lock);
+ 
+  out:
+     if ( d2 != NULL )
+@@ -607,21 +661,18 @@ int evtchn_send(struct domain *d, unsign
+     struct vcpu   *rvcpu;
+     int            rport, ret = 0;
+ 
+-    spin_lock(&ld->event_lock);
+-
+-    if ( unlikely(!port_is_valid(ld, lport)) )
+-    {
+-        spin_unlock(&ld->event_lock);
+    if ( !port_is_valid(ld, lport) )
+         return -EINVAL;
+-    }
+ 
+     lchn = evtchn_from_port(ld, lport);
+ 
+    spin_lock(&lchn->lock);
+
+     /* Guest cannot send via a Xen-attached event channel. */
+     if ( unlikely(consumer_is_xen(lchn)) )
+     {
+-        spin_unlock(&ld->event_lock);
+-        return -EINVAL;
+        ret = -EINVAL;
+        goto out;
+     }
+ 
+     ret = xsm_evtchn_send(XSM_HOOK, ld, lchn);
+@@ -651,7 +702,7 @@ int evtchn_send(struct domain *d, unsign
+     }
+ 
+ out:
+-    spin_unlock(&ld->event_lock);
+    spin_unlock(&lchn->lock);
+ 
+     return ret;
+ }
+@@ -1162,11 +1213,15 @@ int alloc_unbound_xen_event_channel(
+     if ( rc )
+         goto out;
+ 
+    spin_lock(&chn->lock);
+
+     chn->state = ECS_UNBOUND;
+     chn->xen_consumer = get_xen_consumer(notification_fn);
+     chn->notify_vcpu_id = local_vcpu->vcpu_id;
+     chn->u.unbound.remote_domid = remote_domid;
+ 
+    spin_unlock(&chn->lock);
+
+  out:
+     spin_unlock(&d->event_lock);
+ 
+@@ -1190,11 +1245,11 @@ void notify_via_xen_event_channel(struct
+     struct domain *rd;
+     int            rport;
+ 
+-    spin_lock(&ld->event_lock);
+-
+     ASSERT(port_is_valid(ld, lport));
+     lchn = evtchn_from_port(ld, lport);
+ 
+    spin_lock(&lchn->lock);
+
+     if ( likely(lchn->state == ECS_INTERDOMAIN) )
+     {
+         ASSERT(consumer_is_xen(lchn));
+@@ -1204,7 +1259,7 @@ void notify_via_xen_event_channel(struct
+         evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
+     }
+ 
+-    spin_unlock(&ld->event_lock);
+    spin_unlock(&lchn->lock);
+ }
+ 
+ void evtchn_check_pollers(struct domain *d, unsigned int port)
+--- sle12sp1.orig/xen/include/xen/sched.h	2015-07-08 13:53:50.000000000 +0200
+++ sle12sp1/xen/include/xen/sched.h	2015-07-08 14:04:08.000000000 +0200
+@@ -79,6 +79,7 @@ extern domid_t hardware_domid;
+ 
+ struct evtchn
+ {
+    spinlock_t lock;
+ #define ECS_FREE         0 /* Channel is available for use.                  */
+ #define ECS_RESERVED     1 /* Channel is reserved.                           */
+ #define ECS_UNBOUND      2 /* Channel is waiting to bind to a remote domain. */
--- a/5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch
+++ b/5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch
@ -0,0 +1,27 @@
+# Commit b58214a24231a1f2a7e09ae9cc3014eff752918b
+# Date 2015-06-22 11:39:46 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+evtchn: pad struct evtchn to 64 bytes
+
+The number of struct evtchn in a page must be a power of two.  Under
+some workloads performance is improved slightly by padding struct
+evtchn to 64 bytes (a typical cache line size), thus putting the fewer
+per-channel locks into each cache line.
+
+This does not decrease the number of struct evtchn's per-page.
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/include/xen/sched.h	2015-07-08 14:04:08.000000000 +0200
+++ sle12sp1/xen/include/xen/sched.h	2015-07-08 14:04:21.000000000 +0200
+@@ -129,7 +129,7 @@ struct evtchn
+ #endif
+     } ssid;
+ #endif
+-};
+} __attribute__((aligned(64)));
+ 
+ int  evtchn_init(struct domain *d); /* from domain_create */
+ void evtchn_destroy(struct domain *d); /* from domain_kill */
--- a/558bfaa0-x86-traps-avoid-using-current-too-early.patch
+++ b/558bfaa0-x86-traps-avoid-using-current-too-early.patch
@ -0,0 +1,23 @@
+# Commit 142473cfce41a565898e0fa33dc98a1f5e41abe4
+# Date 2015-06-25 14:57:04 +0200
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/traps: avoid using current too early on boot
+
+Early on boot, current has the sentinel value 0xfffff000.  Blindly using it in
+show_registers() causes a nested failure and no useful information printed
+from an early crash.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
+@@ -84,7 +84,7 @@ void show_registers(const struct cpu_use
+     struct cpu_user_regs fault_regs = *regs;
+     unsigned long fault_crs[8];
+     enum context context;
+-    struct vcpu *v = current;
+    struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL;
+ 
+     if ( guest_mode(regs) && has_hvm_container_vcpu(v) )
+     {
--- a/5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch
+++ b/5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch
@ -0,0 +1,50 @@
+# Commit 71bb7304e7a7a35ea6df4b0cedebc35028e4c159
+# Date 2015-06-30 15:00:54 +0100
+# Author Liang Li <liang.z.li@intel.com>
+# Committer Ian Campbell <ian.campbell@citrix.com>
+nested EPT: fix the handling of nested EPT
+
+If the host EPT entry is changed, the nested EPT should be updated.
+the current code does not do this, and it's wrong.
+I have tested this patch, the L2 guest can boot and run as normal.
+
+Signed-off-by: Liang Li <liang.z.li@intel.com>
+Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
+Reported-by: Tim Deegan <tim@xen.org>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
+@@ -26,6 +26,7 @@
+ #include <asm/p2m.h>
+ #include <asm/hvm/vmx/vmx.h>
+ #include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/nestedhvm.h>
+ #include <xen/iommu.h>
+ #include <asm/mtrr.h>
+ #include <asm/hvm/cacheattr.h>
+@@ -1040,6 +1041,9 @@ void ept_sync_domain(struct p2m_domain *
+ 
+     ASSERT(local_irq_is_enabled());
+ 
+    if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) )
+        p2m_flush_nestedp2m(d);
+
+     /*
+      * Flush active cpus synchronously. Flush others the next time this domain
+      * is scheduled onto them. We accept the race of other CPUs adding to
+--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
+@@ -1713,6 +1713,12 @@ p2m_flush_table(struct p2m_domain *p2m)
+     ASSERT(page_list_empty(&p2m->pod.super));
+     ASSERT(page_list_empty(&p2m->pod.single));
+ 
+    if ( p2m->np2m_base == P2M_BASE_EADDR )
+    {
+        p2m_unlock(p2m);
+        return;
+    }
+
+     /* This is no longer a valid nested p2m for any address space */
+     p2m->np2m_base = P2M_BASE_EADDR;
+     
--- a/559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch
+++ b/559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch
@ -0,0 +1,64 @@
+# Commit e4e9d2d4e76bd8fe229c124bd57fc6ba824271b3
+# Date 2015-07-07 11:37:26 +0200
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/p2m-ept: don't unmap the EPT pagetable while it is still in use
+
+The call to iommu_pte_flush() between the two hunks uses &ept_entry->epte
+which is a pointer into the mapped page.
+
+It is eventually passed to `clflush` instruction which will suffer a pagefault
+if the virtual mapping has fallen out of the TLB.
+
+    (XEN) ----[ Xen-4.5.0-xs102594-d  x86_64  debug=y  Not tainted ]----
+    (XEN) CPU:    7
+    (XEN) RIP:    e008:[<ffff82d0801572f0>] cacheline_flush+0x4/0x9
+    <snip>
+    (XEN) Xen call trace:
+    (XEN)    [<ffff82d0801572f0>] cacheline_flush+0x4/0x9
+    (XEN)    [<ffff82d08014ffff>] __iommu_flush_cache+0x4a/0x6a
+    (XEN)    [<ffff82d0801532e2>] iommu_pte_flush+0x2b/0xd5
+    (XEN)    [<ffff82d0801f909a>] ept_set_entry+0x4bc/0x61f
+    (XEN)    [<ffff82d0801f0c25>] p2m_set_entry+0xd1/0x112
+    (XEN)    [<ffff82d0801f25b1>] clear_mmio_p2m_entry+0x1a0/0x200
+    (XEN)    [<ffff82d0801f4aac>] unmap_mmio_regions+0x49/0x73
+    (XEN)    [<ffff82d080106292>] do_domctl+0x15bd/0x1edb
+    (XEN)    [<ffff82d080234fcb>] syscall_enter+0xeb/0x145
+    (XEN)
+    (XEN) Pagetable walk from ffff820040004ae0:
+    (XEN)  L4[0x104] = 00000008668a5063 ffffffffffffffff
+    (XEN)  L3[0x001] = 00000008668a3063 ffffffffffffffff
+    (XEN)  L2[0x000] = 000000086689c063 ffffffffffffffff
+    (XEN)  L1[0x004] = 000000056f078063 000000000007f678
+    (XEN)
+    (XEN) ****************************************
+    (XEN) Panic on CPU 7:
+    (XEN) FATAL PAGE FAULT
+    (XEN) [error_code=0000]
+    (XEN) Faulting linear address: ffff820040004ae0
+    (XEN) ****************************************
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
+@@ -764,8 +764,6 @@ ept_set_entry(struct p2m_domain *p2m, un
+         p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
+ 
+ out:
+-    unmap_domain_page(table);
+-
+     if ( needs_sync != sync_off )
+         ept_sync_domain(p2m);
+ 
+@@ -788,6 +786,8 @@ out:
+         }
+     }
+ 
+    unmap_domain_page(table);
+
+     /* Release the old intermediate tables, if any.  This has to be the
+        last thing we do, after the ept_sync_domain() and removal
+        from the iommu tables, so as to avoid a potential
--- a/559bdde5-pull-in-latest-linux-earlycpio.patch
+++ b/559bdde5-pull-in-latest-linux-earlycpio.patch
@ -0,0 +1,102 @@
+# Commit 39c6664a0e6e1b4ed80660d545dff34ce41bee31
+# Date 2015-07-07 15:10:45 +0100
+# Author Ian Campbell <ian.campbell@citrix.com>
+# Committer Ian Campbell <ian.campbell@citrix.com>
+xen: earlycpio: Pull in latest linux earlycpio.[ch]
+
+AFAICT our current version does not correspond to any version in the
+Linux history. This commit resynchronised to the state in Linux
+commit 598bae70c2a8e35c8d39b610cca2b32afcf047af.
+
+Differences from upstream: find_cpio_data is __init, printk instead of
+pr_*.
+
+This appears to fix Debian bug #785187. "Appears" because my test box
+happens to be AMD and the issue is that the (valid) cpio generated by
+the Intel ucode is not liked by the old Xen code. I've tested by
+hacking the hypervisor to look for the Intel path.
+
+Reported-by: Stephan Seitz <stse+debianbugs@fsing.rootsland.net>
+Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Jan Beulich <jbeulich@suse.com>
+Cc: Stephan Seitz <stse+debianbugs@fsing.rootsland.net>
+Cc: 785187@bugs.debian.org
+Acked-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/common/earlycpio.c
+++ b/xen/common/earlycpio.c
+@@ -54,25 +54,26 @@ enum cpio_fields {
+ 
+ /**
+  * cpio_data find_cpio_data - Search for files in an uncompressed cpio
+- * @path:   The directory to search for, including a slash at the end
+- * @data:   Pointer to the the cpio archive or a header inside
+- * @len:    Remaining length of the cpio based on data pointer
+- * @offset: When a matching file is found, this is the offset to the
+- *          beginning of the cpio. It can be used to iterate through
+- *          the cpio to find all files inside of a directory path
+ * @path:       The directory to search for, including a slash at the end
+ * @data:       Pointer to the the cpio archive or a header inside
+ * @len:        Remaining length of the cpio based on data pointer
+ * @nextoff:    When a matching file is found, this is the offset from the
+ *              beginning of the cpio to the beginning of the next file, not the
+ *              matching file itself. It can be used to iterate through the cpio
+ *              to find all files inside of a directory path.
+  *
+- * @return: struct cpio_data containing the address, length and
+- *          filename (with the directory path cut off) of the found file.
+- *          If you search for a filename and not for files in a directory,
+- *          pass the absolute path of the filename in the cpio and make sure
+- *          the match returned an empty filename string.
+ * @return:     struct cpio_data containing the address, length and
+ *              filename (with the directory path cut off) of the found file.
+ *              If you search for a filename and not for files in a directory,
+ *              pass the absolute path of the filename in the cpio and make sure
+ *              the match returned an empty filename string.
+  */
+ 
+ struct cpio_data __init find_cpio_data(const char *path, void *data,
+-					  size_t len,  long *offset)
+				       size_t len,  long *nextoff)
+ {
+ 	const size_t cpio_header_len = 8*C_NFIELDS - 2;
+-	struct cpio_data cd = { NULL, 0 };
+	struct cpio_data cd = { NULL, 0, "" };
+ 	const char *p, *dptr, *nptr;
+ 	unsigned int ch[C_NFIELDS], *chp, v;
+ 	unsigned char c, x;
+@@ -129,17 +130,17 @@ struct cpio_data __init find_cpio_data(c
+ 		if ((ch[C_MODE] & 0170000) == 0100000 &&
+ 		    ch[C_NAMESIZE] >= mypathsize &&
+ 		    !memcmp(p, path, mypathsize)) {
+-			*offset = (long)nptr - (long)data;
+			*nextoff = (long)nptr - (long)data;
+ 			if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
+ 				printk(
+ 				"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
+ 				p, MAX_CPIO_FILE_NAME);
+ 			}
+-			if (ch[C_NAMESIZE] - 1 /* includes \0 */ == mypathsize) {
+-				cd.data = (void *)dptr;
+-				cd.size = ch[C_FILESIZE];
+-				return cd; /* Found it! */
+-			}
+			strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME);
+
+			cd.data = (void *)dptr;
+			cd.size = ch[C_FILESIZE];
+			return cd; /* Found it! */
+ 		}
+ 		len -= (nptr - p);
+ 		p = nptr;
+--- a/xen/include/xen/earlycpio.h
+++ b/xen/include/xen/earlycpio.h
+@@ -6,6 +6,7 @@
+ struct cpio_data {
+ 	void *data;
+ 	size_t size;
+	char name[MAX_CPIO_FILE_NAME];
+ };
+ 
+ struct cpio_data find_cpio_data(const char *path, void *data, size_t len,
--- a/CVE-2015-3259-xsa137.patch
+++ b/CVE-2015-3259-xsa137.patch
@ -0,0 +1,216 @@
+xl: Sane handling of extra config file arguments
+
+Various xl sub-commands take additional parameters containing = as
+additional config fragments.
+
+The handling of these config fragments has a number of bugs:
+
+ 1. Use of a static 1024-byte buffer.  (If truncation would occur,
+    with semi-trusted input, a security risk arises due to quotes
+    being lost.)
+
+ 2. Mishandling of the return value from snprintf, so that if
+    truncation occurs, the to-write pointer is updated with the
+    wanted-to-write length, resulting in stack corruption.  (This is
+    XSA-137.)
+
+ 3. Clone-and-hack of the code for constructing the appended
+    config file.
+
+These are fixed here, by introducing a new function
+`string_realloc_append' and using it everywhere.  The `extra_info'
+buffers are replaced by pointers, which start off NULL and are
+explicitly freed on all return paths.
+
+The separate variable which will become dom_info.extra_config is
+abolished (which involves moving the clearing of dom_info).
+
+Additional bugs I observe, not fixed here:
+
+ 4. The functions which now call string_realloc_append use ad-hoc
+    error returns, with multiple calls to `return'.  This currently
+    necessitates multiple new calls to `free'.
+
+ 5. Many of the paths in xl call exit(-rc) where rc is a libxl status
+    code.  This is a ridiculous exit status `convention'.
+
+ 6. The loops for handling extra config data are clone-and-hacks.
+
+ 7. Once the extra config buffer is accumulated, it must be combined
+    with the appropriate main config file.  The code to do this
+    combining is clone-and-hacked too.
+
+Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
+Tested-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
+Acked-by: Ian Campbell <ian,campbell@citrix.com>
+
+--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
+@@ -151,7 +151,7 @@ struct domain_create {
+     int console_autoconnect;
+     int checkpointed_stream;
+     const char *config_file;
+-    const char *extra_config; /* extra config string */
+    char *extra_config; /* extra config string */
+     const char *restore_file;
+     int migrate_fd; /* -1 means none */
+     char **migration_domname_r; /* from malloc */
+@@ -4572,11 +4572,25 @@ int main_vm_list(int argc, char **argv)
+     return 0;
+ }
+ 
+static void string_realloc_append(char **accumulate, const char *more)
+{
+    /* Appends more to accumulate.  Accumulate is either NULL, or
+     * points (always) to a malloc'd nul-terminated string. */
+
+    size_t oldlen = *accumulate ? strlen(*accumulate) : 0;
+    size_t morelen = strlen(more) + 1/*nul*/;
+    if (oldlen > SSIZE_MAX || morelen > SSIZE_MAX - oldlen) {
+        fprintf(stderr,"Additional config data far too large\n");
+        exit(-ERROR_FAIL);
+    }
+
+    *accumulate = xrealloc(*accumulate, oldlen + morelen);
+    memcpy(*accumulate + oldlen, more, morelen);
+}
+
+ int main_create(int argc, char **argv)
+ {
+     const char *filename = NULL;
+-    char *p;
+-    char extra_config[1024];
+     struct domain_create dom_info;
+     int paused = 0, debug = 0, daemonize = 1, console_autoconnect = 0,
+         quiet = 0, monitor = 1, vnc = 0, vncautopass = 0;
+@@ -4591,6 +4605,8 @@ int main_create(int argc, char **argv)
+         {0, 0, 0, 0}
+     };
+ 
+    dom_info.extra_config = NULL;
+
+     if (argv[1] && argv[1][0] != '-' && !strchr(argv[1], '=')) {
+         filename = argv[1];
+         argc--; argv++;
+@@ -4630,20 +4646,21 @@ int main_create(int argc, char **argv)
+         break;
+     }
+ 
+-    extra_config[0] = '\0';
+-    for (p = extra_config; optind < argc; optind++) {
+    memset(&dom_info, 0, sizeof(dom_info));
+
+    for (; optind < argc; optind++) {
+         if (strchr(argv[optind], '=') != NULL) {
+-            p += snprintf(p, sizeof(extra_config) - (p - extra_config),
+-                "%s\n", argv[optind]);
+            string_realloc_append(&dom_info.extra_config, argv[optind]);
+            string_realloc_append(&dom_info.extra_config, "\n");
+         } else if (!filename) {
+             filename = argv[optind];
+         } else {
+             help("create");
+            free(dom_info.extra_config);
+             return 2;
+         }
+     }
+ 
+-    memset(&dom_info, 0, sizeof(dom_info));
+     dom_info.debug = debug;
+     dom_info.daemonize = daemonize;
+     dom_info.monitor = monitor;
+@@ -4651,16 +4668,18 @@ int main_create(int argc, char **argv)
+     dom_info.dryrun = dryrun_only;
+     dom_info.quiet = quiet;
+     dom_info.config_file = filename;
+-    dom_info.extra_config = extra_config;
+     dom_info.migrate_fd = -1;
+     dom_info.vnc = vnc;
+     dom_info.vncautopass = vncautopass;
+     dom_info.console_autoconnect = console_autoconnect;
+ 
+     rc = create_domain(&dom_info);
+-    if (rc < 0)
+    if (rc < 0) {
+        free(dom_info.extra_config);
+         return -rc;
+    }
+ 
+    free(dom_info.extra_config);
+     return 0;
+ }
+ 
+@@ -4668,8 +4687,7 @@ int main_config_update(int argc, char **
+ {
+     uint32_t domid;
+     const char *filename = NULL;
+-    char *p;
+-    char extra_config[1024];
+    char *extra_config = NULL;
+     void *config_data = 0;
+     int config_len = 0;
+     libxl_domain_config d_config;
+@@ -4707,15 +4725,15 @@ int main_config_update(int argc, char **
+         break;
+     }
+ 
+-    extra_config[0] = '\0';
+-    for (p = extra_config; optind < argc; optind++) {
+    for (; optind < argc; optind++) {
+         if (strchr(argv[optind], '=') != NULL) {
+-            p += snprintf(p, sizeof(extra_config) - (p - extra_config),
+-                "%s\n", argv[optind]);
+            string_realloc_append(&extra_config, argv[optind]);
+            string_realloc_append(&extra_config, "\n");
+         } else if (!filename) {
+             filename = argv[optind];
+         } else {
+             help("create");
+            free(extra_config);
+             return 2;
+         }
+     }
+@@ -4724,7 +4742,8 @@ int main_config_update(int argc, char **
+         rc = libxl_read_file_contents(ctx, filename,
+                                       &config_data, &config_len);
+         if (rc) { fprintf(stderr, "Failed to read config file: %s: %s\n",
+-                           filename, strerror(errno)); return ERROR_FAIL; }
+                           filename, strerror(errno));
+                  free(extra_config); return ERROR_FAIL; }
+         if (strlen(extra_config)) {
+             if (config_len > INT_MAX - (strlen(extra_config) + 2 + 1)) {
+                 fprintf(stderr, "Failed to attach extra configration\n");
+@@ -4765,7 +4784,7 @@ int main_config_update(int argc, char **
+     libxl_domain_config_dispose(&d_config);
+ 
+     free(config_data);
+-
+    free(extra_config);
+     return 0;
+ }
+ 
+@@ -7022,7 +7041,7 @@ int main_cpupoolcreate(int argc, char **
+ {
+     const char *filename = NULL, *config_src=NULL;
+     const char *p;
+-    char extra_config[1024];
+    char *extra_config = NULL;
+     int opt;
+     static struct option opts[] = {
+         {"defconfig", 1, 0, 'f'},
+@@ -7056,13 +7075,10 @@ int main_cpupoolcreate(int argc, char **
+         break;
+     }
+ 
+-    memset(extra_config, 0, sizeof(extra_config));
+     while (optind < argc) {
+         if ((p = strchr(argv[optind], '='))) {
+-            if (strlen(extra_config) + 1 + strlen(argv[optind]) < sizeof(extra_config)) {
+-                strcat(extra_config, "\n");
+-                strcat(extra_config, argv[optind]);
+-            }
+            string_realloc_append(&extra_config, "\n");
+            string_realloc_append(&extra_config, argv[optind]);
+         } else if (!filename) {
+             filename = argv[optind];
+         } else {
--- a/libxl.pvscsi.patch
+++ b/libxl.pvscsi.patch
@ -31,10 +31,8 @@ ee2e7e5 Merge pull request #1 from aaannz/pvscsi
 7de6f49 support character devices too
 c84381b allow /dev/sda as scsi devspec
 f11e3a2 pvscsi
-Index: xen-4.5.1-testing/docs/man/xl.cfg.pod.5
-===================================================================
--- xen-4.5.1-testing.orig/docs/man/xl.cfg.pod.5
-+++ xen-4.5.1-testing/docs/man/xl.cfg.pod.5
+--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -448,6 +448,36 @@ value is optional if this is a guest dom
 
 =back
@ -72,10 +70,8 @@ Index: xen-4.5.1-testing/docs/man/xl.cfg.pod.5
 =item B<vfb=[ "VFB_SPEC_STRING", "VFB_SPEC_STRING", ...]>
 
 Specifies the paravirtual framebuffer devices which should be supplied
-Index: xen-4.5.1-testing/docs/man/xl.pod.1
-===================================================================
--- xen-4.5.1-testing.orig/docs/man/xl.pod.1
-+++ xen-4.5.1-testing/docs/man/xl.pod.1
+--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -1323,6 +1323,26 @@ List virtual trusted platform modules fo
 
 =back
@ -103,10 +99,8 @@ Index: xen-4.5.1-testing/docs/man/xl.pod.1
 =head1 PCI PASS-THROUGH
 
 =over 4
-Index: xen-4.5.1-testing/tools/libxl/libxl.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl.c
-+++ xen-4.5.1-testing/tools/libxl/libxl.c
+--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -2310,6 +2310,273 @@ int libxl_devid_to_device_vtpm(libxl_ctx
     return rc;
 }
@ -440,10 +434,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl.c
 /*
  * Local variables:
  * mode: C
-Index: xen-4.5.1-testing/tools/libxl/libxl.h
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl.h
-+++ xen-4.5.1-testing/tools/libxl/libxl.h
+--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1238,6 +1238,26 @@ libxl_device_vtpm *libxl_device_vtpm_lis
 int libxl_device_vtpm_getinfo(libxl_ctx *ctx, uint32_t domid,
                                libxl_device_vtpm *vtpm, libxl_vtpminfo *vtpminfo);
@ -499,10 +491,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl.h
 #endif /* LIBXL_H */
 
 /*
-Index: xen-4.5.1-testing/tools/libxl/libxl_create.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl_create.c
-+++ xen-4.5.1-testing/tools/libxl/libxl_create.c
+--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1141,6 +1141,7 @@ static void domcreate_rebuild_done(libxl
     libxl__multidev_begin(ao, &dcs->multidev);
     dcs->multidev.callback = domcreate_launch_dm;
@ -511,10 +501,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl_create.c
     libxl__multidev_prepared(egc, &dcs->multidev, 0);
 
     return;
-Index: xen-4.5.1-testing/tools/libxl/libxl_device.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl_device.c
-+++ xen-4.5.1-testing/tools/libxl/libxl_device.c
+--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -541,6 +541,7 @@ void libxl__multidev_prepared(libxl__egc
  * The following functions are defined:
  * libxl__add_disks
@ -556,10 +544,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl_device.c
 /******************************************************************************/
 
 int libxl__device_destroy(libxl__gc *gc, libxl__device *dev)
-Index: xen-4.5.1-testing/tools/libxl/libxl_internal.h
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl_internal.h
-+++ xen-4.5.1-testing/tools/libxl/libxl_internal.h
+--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1079,6 +1079,7 @@ _hidden int libxl__device_disk_setdefaul
 _hidden int libxl__device_nic_setdefault(libxl__gc *gc, libxl_device_nic *nic,
                                          uint32_t domid);
@ -590,10 +576,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl_internal.h
 /*----- device model creation -----*/
 
 /* First layer; wraps libxl__spawn_spawn. */
-Index: xen-4.5.1-testing/tools/libxl/libxl_types.idl
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl_types.idl
-+++ xen-4.5.1-testing/tools/libxl/libxl_types.idl
+--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -540,6 +540,26 @@ libxl_device_channel = Struct("device_ch
            ])),
 ])
@ -659,10 +643,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl_types.idl
 libxl_vcpuinfo = Struct("vcpuinfo", [
     ("vcpuid", uint32),
     ("cpu", uint32),
-Index: xen-4.5.1-testing/tools/libxl/libxl_types_internal.idl
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/libxl_types_internal.idl
-+++ xen-4.5.1-testing/tools/libxl/libxl_types_internal.idl
+--- a/tools/libxl/libxl_types_internal.idl
+++ b/tools/libxl/libxl_types_internal.idl
@@ -22,6 +22,7 @@ libxl__device_kind = Enumeration("device
     (6, "VKBD"),
     (7, "CONSOLE"),
@ -671,10 +653,8 @@ Index: xen-4.5.1-testing/tools/libxl/libxl_types_internal.idl
     ])
 
 libxl__console_backend = Enumeration("console_backend", [
-Index: xen-4.5.1-testing/tools/libxl/xl.h
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/xl.h
-+++ xen-4.5.1-testing/tools/libxl/xl.h
+--- a/tools/libxl/xl.h
+++ b/tools/libxl/xl.h
@@ -83,6 +83,9 @@ int main_channellist(int argc, char **ar
 int main_blockattach(int argc, char **argv);
 int main_blocklist(int argc, char **argv);
@ -685,10 +665,8 @@ Index: xen-4.5.1-testing/tools/libxl/xl.h
 int main_vtpmattach(int argc, char **argv);
 int main_vtpmlist(int argc, char **argv);
 int main_vtpmdetach(int argc, char **argv);
-Index: xen-4.5.1-testing/tools/libxl/xl_cmdimpl.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/xl_cmdimpl.c
-+++ xen-4.5.1-testing/tools/libxl/xl_cmdimpl.c
+--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -17,6 +17,7 @@
 #include "libxl_osdeps.h"
 
@ -904,7 +882,7 @@ Index: xen-4.5.1-testing/tools/libxl/xl_cmdimpl.c
     if (!xlu_cfg_get_list(config, "vtpm", &vtpms, 0, 0)) {
         d_config->num_vtpms = 0;
         d_config->vtpms = NULL;
-@@ -6492,6 +6670,256 @@ int main_blockdetach(int argc, char **ar
+@@ -6511,6 +6689,256 @@ int main_blockdetach(int argc, char **ar
     return rc;
 }
 
@ -1161,10 +1139,8 @@ Index: xen-4.5.1-testing/tools/libxl/xl_cmdimpl.c
 int main_vtpmattach(int argc, char **argv)
 {
     int opt;
-Index: xen-4.5.1-testing/tools/libxl/xl_cmdtable.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/libxl/xl_cmdtable.c
-+++ xen-4.5.1-testing/tools/libxl/xl_cmdtable.c
+--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -372,6 +372,21 @@ struct cmd_spec cmd_table[] = {
       "Destroy a domain's virtual block device",
       "<Domain> <DevId>",
--- a/qemu-MSI-X-enable-maskall.patch
+++ b/qemu-MSI-X-enable-maskall.patch
@ -1,333 +0,0 @@
-References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
-
-xen/MSI-X: drive maskall and enable bits through hypercalls
-
-Particularly the maskall bit has to be under exclusive hypervisor
-control (and since they live in the same config space field, the
-enable bit has to follow suit). Use the replacement hypercall
-interfaces.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
-Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pass-through.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pass-through.c
-+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pass-through.c
-@@ -814,9 +814,12 @@ static struct pt_reg_info_tbl pt_emu_reg
-         .offset     = PCI_MSI_FLAGS, // 2
-         .size       = 2,
-         .init_val   = 0x0000,
-        .res_mask   = 0x3800,
-        .ro_mask    = 0x07FF,
-        .emu_mask   = 0x0000,
-+        /* This must not be split into res_mask (0x3800) and ro_mask (0x07FF)
-+         * because even in permissive mode there must not be any write back
-+         * to this register.
-+         */
-+        .ro_mask    = 0x3FFF,
-+        .emu_mask   = 0xC000,
-         .init       = pt_msixctrl_reg_init,
-         .u.w.read   = pt_word_reg_read,
-         .u.w.write  = pt_msixctrl_reg_write,
-@@ -4135,30 +4138,52 @@ static int pt_msixctrl_reg_write(struct
-     uint16_t *value, uint16_t dev_value, uint16_t valid_mask)
- {
-     struct pt_reg_info_tbl *reg = cfg_entry->reg;
-    uint16_t writable_mask = 0;
-+    uint16_t writable_mask, val;
-     uint16_t throughable_mask = get_throughable_mask(ptdev, reg, valid_mask);
-     uint16_t old_ctrl = cfg_entry->data;
- 
-     /* modify emulate register */
-     writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
-    cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
-+    val = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
-+    cfg_entry->data = val;
- 
-     /* create value for writing to I/O device register */
-     *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
- 
-     /* update MSI-X */
-    if ((*value & PCI_MSIX_ENABLE) && !(*value & PCI_MSIX_MASK))
-+    if ((val & PCI_MSIX_ENABLE) && !(val & PCI_MSIX_MASK))
-     {
-         if (ptdev->msi_trans_en) {
-             PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n");
-             pt_disable_msi_translate(ptdev);
-         }
-+        if (!ptdev->msix->enabled) {
-+            if (!ptdev->msix->maskall)
-+                pt_msix_maskall(ptdev, 1);
-+            pt_msix_enable(ptdev);
-+        }
-         pt_msix_update(ptdev);
-    } else if (!(*value & PCI_MSIX_ENABLE) && ptdev->msix->enabled) {
-        pt_msix_disable(ptdev);
-+        ptdev->msix->enabled = 1;
-+        ptdev->msix->maskall = 0;
-+        pt_msix_maskall(ptdev, 0);
-+    } else if (ptdev->msix->enabled) {
-+        if (!(val & PCI_MSIX_ENABLE)) {
-+            pt_msix_disable(ptdev);
-+            ptdev->msix->enabled = 0;
-+        } else if (!ptdev->msix->maskall) {
-+            ptdev->msix->maskall = 1;
-+            pt_msix_maskall(ptdev, 1);
-+        }
-     }
- 
-    ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE);
-+    dev_value = pci_read_word(ptdev->pci_dev, ptdev->msix->ctrl_offset);
-+
-+    if (ptdev->msix->enabled && !(dev_value & PCI_MSIX_ENABLE))
-+        PT_ERR("MSI-X unexpectedly disabled\n");
-+    else if ((dev_value & PCI_MSIX_ENABLE) &&
-+             ptdev->msix->maskall &&
-+             !(dev_value & PCI_MSIX_MASK))
-+        PT_ERR("MSI-X unexpectedly unmasked\n");
- 
-     return 0;
- }
-Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pass-through.h
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pass-through.h
-+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pass-through.h
-@@ -206,6 +206,7 @@ struct msix_entry_info {
- struct pt_msix_info {
-     uint32_t ctrl_offset;
-     int enabled;
-+    int maskall;
-     int total_entries;
-     int bar_index;
-     uint64_t table_base;
-Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pt-msi.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pt-msi.c
-+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pt-msi.c
-@@ -41,20 +41,12 @@ void msi_set_enable(struct pt_dev *dev,
- 
- static void msix_set_enable(struct pt_dev *dev, int en)
- {
-    uint16_t val = 0;
-    uint32_t address = 0;
-     if (!dev->msix)
-         return;
- 
-    address = dev->msix->ctrl_offset;
-    if (!address)
-        return;
-
-    val = pci_read_word(dev->pci_dev, address);
-    val &= ~PCI_MSIX_ENABLE;
-    if (en)
-        val |= PCI_MSIX_ENABLE;
-    pci_write_word(dev->pci_dev, address, val);
-+    xc_physdev_msix_enable(xc_handle, dev->pci_dev->domain, dev->pci_dev->bus,
-+                           PCI_DEVFN(dev->pci_dev->dev, dev->pci_dev->func),
-+                           en);
- }
- 
- /* MSI virtuailization functions */
-@@ -349,6 +341,11 @@ int pt_msix_update(struct pt_dev *dev)
-     return 0;
- }
- 
-+void pt_msix_enable(struct pt_dev *dev)
-+{
-+    msix_set_enable(dev, 1);
-+}
-+
- void pt_msix_disable(struct pt_dev *dev)
- {
-     PCIDevice *d = &dev->dev;
-@@ -394,6 +391,15 @@ void pt_msix_disable(struct pt_dev *dev)
-     }
- }
- 
-+int pt_msix_maskall(struct pt_dev *dev, int mask)
-+{
-+    return xc_physdev_msix_mask_all(xc_handle, dev->pci_dev->domain,
-+                                    dev->pci_dev->bus,
-+                                    PCI_DEVFN(dev->pci_dev->dev,
-+                                              dev->pci_dev->func),
-+                                    mask);
-+}
-+
- int pt_msix_update_remap(struct pt_dev *dev, int bar_index)
- {
-     struct msix_entry_info *entry;
-Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pt-msi.h
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pt-msi.h
-+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/pt-msi.h
-@@ -106,9 +106,15 @@ int
- pt_msix_update(struct pt_dev *dev);
- 
- void
-+pt_msix_enable(struct pt_dev *dev);
-+
-+void
- pt_msix_disable(struct pt_dev *dev);
- 
- int
-+pt_msix_maskall(struct pt_dev *dev, int mask);
-+
-+int
- has_msix_mapping(struct pt_dev *dev, int bar_index);
- 
- int
-Index: xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/xen/xen_pt.h
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-dir-remote/hw/xen/xen_pt.h
-+++ xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/xen/xen_pt.h
-@@ -181,6 +181,7 @@ typedef struct XenPTMSIXEntry {
- typedef struct XenPTMSIX {
-     uint32_t ctrl_offset;
-     bool enabled;
-+    bool maskall;
-     int total_entries;
-     int bar_index;
-     uint64_t table_base;
-@@ -293,7 +294,9 @@ int xen_pt_msix_init(XenPCIPassthroughSt
- void xen_pt_msix_delete(XenPCIPassthroughState *s);
- int xen_pt_msix_update(XenPCIPassthroughState *s);
- int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index);
-+void xen_pt_msix_enable(XenPCIPassthroughState *s);
- void xen_pt_msix_disable(XenPCIPassthroughState *s);
-+int xen_pt_msix_maskall(XenPCIPassthroughState *s, bool mask);
- 
- static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar)
- {
-Index: xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/xen/xen_pt_config_init.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-dir-remote/hw/xen/xen_pt_config_init.c
-+++ xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/xen/xen_pt_config_init.c
-@@ -1436,32 +1436,58 @@ static int xen_pt_msixctrl_reg_write(Xen
-                                      uint16_t dev_value, uint16_t valid_mask)
- {
-     XenPTRegInfo *reg = cfg_entry->reg;
-    uint16_t writable_mask = 0;
-+    uint16_t writable_mask, value;
-     uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask);
-     int debug_msix_enabled_old;
- 
-     /* modify emulate register */
-     writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
-    cfg_entry->data = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
-+    value = XEN_PT_MERGE_VALUE(*val, cfg_entry->data, writable_mask);
-+    cfg_entry->data = value;
- 
-     /* create value for writing to I/O device register */
-     *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask);
- 
-+    debug_msix_enabled_old = s->msix->enabled;
-+
-     /* update MSI-X */
-    if ((*val & PCI_MSIX_FLAGS_ENABLE)
-        && !(*val & PCI_MSIX_FLAGS_MASKALL)) {
-+    if ((value & PCI_MSIX_FLAGS_ENABLE)
-+        && !(value & PCI_MSIX_FLAGS_MASKALL)) {
-+        if (!s->msix->enabled) {
-+            if (!s->msix->maskall) {
-+                xen_pt_msix_maskall(s, true);
-+            }
-+            xen_pt_msix_enable(s);
-+        }
-         xen_pt_msix_update(s);
-    } else if (!(*val & PCI_MSIX_FLAGS_ENABLE) && s->msix->enabled) {
-        xen_pt_msix_disable(s);
-+        s->msix->enabled = true;
-+        s->msix->maskall = false;
-+        xen_pt_msix_maskall(s, false);
-+    } else if (s->msix->enabled) {
-+        if (!(value & PCI_MSIX_FLAGS_ENABLE)) {
-+            xen_pt_msix_disable(s);
-+            s->msix->enabled = false;
-+        } else if (!s->msix->maskall) {
-+            s->msix->maskall = true;
-+            xen_pt_msix_maskall(s, true);
-+        }
-     }
- 
-    debug_msix_enabled_old = s->msix->enabled;
-    s->msix->enabled = !!(*val & PCI_MSIX_FLAGS_ENABLE);
-     if (s->msix->enabled != debug_msix_enabled_old) {
-         XEN_PT_LOG(&s->dev, "%s MSI-X\n",
-                    s->msix->enabled ? "enable" : "disable");
-     }
- 
-+    xen_host_pci_get_word(&s->real_device, s->msix->ctrl_offset, &dev_value);
-+
-+    if (s->msix->enabled && !(dev_value & PCI_MSIX_FLAGS_ENABLE)) {
-+        XEN_PT_ERR(&s->dev, "MSI-X unexpectedly disabled\n");
-+    } else if ((dev_value & PCI_MSIX_FLAGS_ENABLE) &&
-+               s->msix->maskall &&
-+               !(dev_value & PCI_MSIX_FLAGS_MASKALL)) {
-+        XEN_PT_ERR(&s->dev, "MSI-X unexpectedly unmasked\n");
-+    }
-+
-     return 0;
- }
- 
-@@ -1483,9 +1509,12 @@ static XenPTRegInfo xen_pt_emu_reg_msix[
-         .offset     = PCI_MSI_FLAGS,
-         .size       = 2,
-         .init_val   = 0x0000,
-        .res_mask   = 0x3800,
-        .ro_mask    = 0x07FF,
-        .emu_mask   = 0x0000,
-+        /* This must not be split into res_mask (0x3800) and ro_mask (0x07FF)
-+         * because even in permissive mode there must not be any write back
-+         * to this register.
-+         */
-+        .ro_mask    = 0x3FFF,
-+        .emu_mask   = 0xC000,
-         .init       = xen_pt_msixctrl_reg_init,
-         .u.w.read   = xen_pt_word_reg_read,
-         .u.w.write  = xen_pt_msixctrl_reg_write,
-Index: xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/xen/xen_pt_msi.c
-===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-dir-remote/hw/xen/xen_pt_msi.c
-+++ xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/xen/xen_pt_msi.c
-@@ -301,8 +301,11 @@ static int msix_set_enable(XenPCIPassthr
-         return -1;
-     }
- 
-    return msi_msix_enable(s, s->msix->ctrl_offset, PCI_MSIX_FLAGS_ENABLE,
-                           enabled);
-+    return xc_physdev_msix_enable(xen_xc, s->real_device.domain,
-+                                  s->real_device.bus,
-+                                  PCI_DEVFN(s->real_device.dev,
-+                                            s->real_device.func),
-+                                  enabled);
- }
- 
- static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr)
-@@ -361,6 +364,11 @@ int xen_pt_msix_update(XenPCIPassthrough
-     return 0;
- }
- 
-+void xen_pt_msix_enable(XenPCIPassthroughState *s)
-+{
-+    msix_set_enable(s, true);
-+}
-+
- void xen_pt_msix_disable(XenPCIPassthroughState *s)
- {
-     int i = 0;
-@@ -378,6 +386,15 @@ void xen_pt_msix_disable(XenPCIPassthrou
-     }
- }
- 
-+int xen_pt_msix_maskall(XenPCIPassthroughState *s, bool mask)
-+{
-+    return xc_physdev_msix_mask_all(xen_xc, s->real_device.domain,
-+                                    s->real_device.bus,
-+                                    PCI_DEVFN(s->real_device.dev,
-+                                              s->real_device.func),
-+                                    mask);
-+}
-+
- int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index)
- {
-     XenPTMSIXEntry *entry;
--- a/qemu-MSI-X-latch-writes.patch
+++ b/qemu-MSI-X-latch-writes.patch
@ -1,141 +0,0 @@
-References: bsc#931627
-
-xen/MSI-X: latch MSI-X table writes
-
-The remaining log message in pci_msix_write() is wrong, as there guest
-behavior may only appear to be wrong: For one, the old logic didn't
-take the mask-all bit into account. And then this shouldn't depend on
-host device state (i.e. the host may have masked the entry without the
-guest having done so). Plus these writes shouldn't be dropped even when
-an entry gets unmasked. Instead, if they can't be made take effect
-right away, they should take effect on the next unmasking or enabling
-operation - the specification explicitly describes such caching
-behavior.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
--- trunk.orig/tools/qemu-xen-dir-remote/hw/xen/xen_pt.h	2015-06-10 00:00:00.000000000 +0200
-+++ trunk/tools/qemu-xen-dir-remote/hw/xen/xen_pt.h	2015-04-14 08:58:43.000000000 +0200
-@@ -175,9 +175,8 @@ typedef struct XenPTMSIXEntry {
-     int pirq;
-     uint64_t addr;
-     uint32_t data;
-    uint32_t vector_ctrl;
-+    uint32_t latch[4];
-     bool updated; /* indicate whether MSI ADDR or DATA is updated */
-    bool warned;  /* avoid issuing (bogus) warning more than once */
- } XenPTMSIXEntry;
- typedef struct XenPTMSIX {
-     uint32_t ctrl_offset;
--- trunk.orig/tools/qemu-xen-dir-remote/hw/xen/xen_pt_msi.c	2015-06-10 00:00:00.000000000 +0200
-+++ trunk/tools/qemu-xen-dir-remote/hw/xen/xen_pt_msi.c	2015-05-07 12:46:09.000000000 +0200
-@@ -25,6 +25,7 @@
- #define XEN_PT_GFLAGSSHIFT_DELIV_MODE     12
- #define XEN_PT_GFLAGSSHIFT_TRG_MODE       15
- 
-+#define latch(fld) latch[PCI_MSIX_ENTRY_##fld / sizeof(uint32_t)]
- 
- /*
-  * Helpers
-@@ -322,6 +323,13 @@ static int xen_pt_msix_update_one(XenPCI
- 
-     pirq = entry->pirq;
- 
-+    if (pirq == XEN_PT_UNASSIGNED_PIRQ || s->msix->maskall ||
-+        (entry->latch(VECTOR_CTRL) & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
-+        entry->addr = entry->latch(LOWER_ADDR) |
-+                      ((uint64_t)entry->latch(UPPER_ADDR) << 32);
-+        entry->data = entry->latch(DATA);
-+    }
-+
-     rc = msi_msix_setup(s, entry->addr, entry->data, &pirq, true, entry_nr,
-                         entry->pirq == XEN_PT_UNASSIGNED_PIRQ);
-     if (rc) {
-@@ -396,35 +404,15 @@ int xen_pt_msix_update_remap(XenPCIPasst
- 
- static uint32_t get_entry_value(XenPTMSIXEntry *e, int offset)
- {
-    switch (offset) {
-    case PCI_MSIX_ENTRY_LOWER_ADDR:
-        return e->addr & UINT32_MAX;
-    case PCI_MSIX_ENTRY_UPPER_ADDR:
-        return e->addr >> 32;
-    case PCI_MSIX_ENTRY_DATA:
-        return e->data;
-    case PCI_MSIX_ENTRY_VECTOR_CTRL:
-        return e->vector_ctrl;
-    default:
-        return 0;
-    }
-+    return !(offset % sizeof(*e->latch))
-+           ? e->latch[offset / sizeof(*e->latch)] : 0;
- }
- 
- static void set_entry_value(XenPTMSIXEntry *e, int offset, uint32_t val)
- {
-    switch (offset) {
-    case PCI_MSIX_ENTRY_LOWER_ADDR:
-        e->addr = (e->addr & ((uint64_t)UINT32_MAX << 32)) | val;
-        break;
-    case PCI_MSIX_ENTRY_UPPER_ADDR:
-        e->addr = (uint64_t)val << 32 | (e->addr & UINT32_MAX);
-        break;
-    case PCI_MSIX_ENTRY_DATA:
-        e->data = val;
-        break;
-    case PCI_MSIX_ENTRY_VECTOR_CTRL:
-        e->vector_ctrl = val;
-        break;
-+    if (!(offset % sizeof(*e->latch)))
-+    {
-+        e->latch[offset / sizeof(*e->latch)] = val;
-     }
- }
- 
-@@ -444,39 +432,28 @@ static void pci_msix_write(void *opaque,
-     offset = addr % PCI_MSIX_ENTRY_SIZE;
- 
-     if (offset != PCI_MSIX_ENTRY_VECTOR_CTRL) {
-        const volatile uint32_t *vec_ctrl;
-
-         if (get_entry_value(entry, offset) == val
-             && entry->pirq != XEN_PT_UNASSIGNED_PIRQ) {
-             return;
-         }
- 
-+        entry->updated = true;
-+    } else if (msix->enabled && entry->updated &&
-+               !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
-+        const volatile uint32_t *vec_ctrl;
-+
-         /*
-          * If Xen intercepts the mask bit access, entry->vec_ctrl may not be
-          * up-to-date. Read from hardware directly.
-          */
-         vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE
-             + PCI_MSIX_ENTRY_VECTOR_CTRL;
-+        set_entry_value(entry, offset, *vec_ctrl);
- 
-        if (msix->enabled && !(*vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
-            if (!entry->warned) {
-                entry->warned = true;
-                XEN_PT_ERR(&s->dev, "Can't update msix entry %d since MSI-X is"
-                           " already enabled.\n", entry_nr);
-            }
-            return;
-        }
-
-        entry->updated = true;
-+        xen_pt_msix_update_one(s, entry_nr);
-     }
- 
-     set_entry_value(entry, offset, val);
-
-    if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL) {
-        if (msix->enabled && !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
-            xen_pt_msix_update_one(s, entry_nr);
-        }
-    }
- }
- 
- static uint64_t pci_msix_read(void *opaque, hwaddr addr,
--- a/x86-MSI-X-enable.patch
+++ b/x86-MSI-X-enable.patch
@ -1,5 +1,3 @@
-References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
-
 x86/MSI-X: access MSI-X table only after having enabled MSI-X

 As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
@ -9,12 +7,10 @@ instead to prevent interrupts from occurring.

 Signed-off-by: Jan Beulich <jbeulich@suse.com>
 Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
-v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled

--- trunk.orig/xen/arch/x86/msi.c	2015-03-25 09:35:38.000000000 +0100
-+++ trunk/xen/arch/x86/msi.c	2015-05-18 11:39:36.000000000 +0200
-@@ -142,6 +142,17 @@ static bool_t memory_decoded(const struc
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-07-07 18:01:16.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-07-07 18:01:41.000000000 +0200
+@@ -144,6 +144,17 @@ static bool_t memory_decoded(const struc
               PCI_COMMAND_MEMORY);
 }
 
@ -32,9 +28,9 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 /*
  * MSI message composition
  */
-@@ -219,7 +230,8 @@ static bool_t read_msi_msg(struct msi_de
-         void __iomem *base;
-         base = entry->mask_base;
+@@ -222,7 +233,8 @@ static bool_t read_msi_msg(struct msi_de
+     {
+         void __iomem *base = entry->mask_base;
 
 -        if ( unlikely(!memory_decoded(entry->dev)) )
 +        if ( unlikely(!msix_memory_decoded(entry->dev,
@ -42,9 +38,9 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
             return 0;
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-@@ -285,7 +297,8 @@ static int write_msi_msg(struct msi_desc
-         void __iomem *base;
-         base = entry->mask_base;
+@@ -287,7 +299,8 @@ static int write_msi_msg(struct msi_desc
+     {
+         void __iomem *base = entry->mask_base;
 
 -        if ( unlikely(!memory_decoded(entry->dev)) )
 +        if ( unlikely(!msix_memory_decoded(entry->dev,
@ -52,26 +48,33 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
             return -ENXIO;
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-@@ -379,7 +392,7 @@ static bool_t msi_set_mask_bit(struct ir
+@@ -381,9 +394,9 @@ static bool_t msi_set_mask_bit(struct ir
 {
     struct msi_desc *entry = desc->msi_desc;
     struct pci_dev *pdev;
 -    u16 seg;
 +    u16 seg, control;
     u8 bus, slot, func;
+-    bool_t flag = host || guest;
+    bool_t flag = host || guest, maskall;
 
     ASSERT(spin_is_locked(&desc->lock));
-@@ -401,35 +414,38 @@ static bool_t msi_set_mask_bit(struct ir
+     BUG_ON(!entry || !entry->dev);
+@@ -406,36 +419,45 @@ static bool_t msi_set_mask_bit(struct ir
         }
         break;
     case PCI_CAP_ID_MSIX:
+        maskall = pdev->msix->host_maskall;
 +        control = pci_conf_read16(seg, bus, slot, func,
 +                                  msix_control_reg(entry->msi_attrib.pos));
 +        if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+        {
+            pdev->msix->host_maskall = 1;
 +            pci_conf_write16(seg, bus, slot, func,
 +                             msix_control_reg(entry->msi_attrib.pos),
 +                             control | (PCI_MSIX_FLAGS_ENABLE |
 +                                        PCI_MSIX_FLAGS_MASKALL));
+        }
         if ( likely(memory_decoded(pdev)) )
         {
             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
@ -87,6 +90,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 -            u16 control;
             domid_t domid = pdev->domain->domain_id;
 
+-            pdev->msix->host_maskall = 1;
 -            control = pci_conf_read16(seg, bus, slot, func,
 -                                      msix_control_reg(entry->msi_attrib.pos));
 -            if ( control & PCI_MSIX_FLAGS_MASKALL )
@ -94,7 +98,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 -            pci_conf_write16(seg, bus, slot, func,
 -                             msix_control_reg(entry->msi_attrib.pos),
 -                             control | PCI_MSIX_FLAGS_MASKALL);
-+            control |= PCI_MSIX_FLAGS_MASKALL;
+            maskall = 1;
             if ( pdev->msix->warned != domid )
             {
                 pdev->msix->warned = domid;
@ -107,13 +111,16 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 -            break;
         }
 -        /* fall through */
+        pdev->msix->host_maskall = maskall;
+        if ( maskall || pdev->msix->guest_maskall )
+            control |= PCI_MSIX_FLAGS_MASKALL;
 +        pci_conf_write16(seg, bus, slot, func,
 +                         msix_control_reg(entry->msi_attrib.pos), control);
 +        return flag;
     default:
         return 0;
     }
-@@ -454,7 +470,8 @@ static int msi_get_mask_bit(const struct
+@@ -461,7 +483,8 @@ static int msi_get_mask_bit(const struct
                                 entry->msi.mpos) >>
                 entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
@ -123,7 +130,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
             break;
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
-@@ -543,9 +560,31 @@ static struct msi_desc *alloc_msi_entry(
+@@ -564,9 +587,31 @@ static struct msi_desc *alloc_msi_entry(
 
 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
 {
@ -158,9 +165,14 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 }
 
 int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
-@@ -785,16 +824,32 @@ static int msix_capability_init(struct p
+@@ -803,20 +848,38 @@ static int msix_capability_init(struct p
+     u8 bus = dev->bus;
+     u8 slot = PCI_SLOT(dev->devfn);
+     u8 func = PCI_FUNC(dev->devfn);
+    bool_t maskall = msix->host_maskall;
+ 
+     ASSERT(spin_is_locked(&pcidevs_lock));
 
-     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
 -    msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
 +    /*
@ -169,6 +181,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 +     * to mask all the vectors to prevent interrupts coming in before they're
 +     * fully set up.
 +     */
+    msix->host_maskall = 1;
 +    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
 +                     control | (PCI_MSIX_FLAGS_ENABLE |
 +                                PCI_MSIX_FLAGS_MASKALL));
@ -192,7 +205,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
         ASSERT(msi);
     }
 
-@@ -825,6 +880,8 @@ static int msix_capability_init(struct p
+@@ -847,6 +910,8 @@ static int msix_capability_init(struct p
     {
         if ( !msi || !msi->table_base )
         {
@ -201,7 +214,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
             xfree(entry);
             return -ENXIO;
         }
-@@ -867,6 +924,8 @@ static int msix_capability_init(struct p
+@@ -889,6 +954,8 @@ static int msix_capability_init(struct p
 
         if ( idx < 0 )
         {
@ -210,29 +223,57 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
             xfree(entry);
             return idx;
         }
-@@ -922,8 +981,7 @@ static int msix_capability_init(struct p
+@@ -915,7 +982,7 @@ static int msix_capability_init(struct p
+ 
+     if ( !msix->used_entries )
+     {
+-        msix->host_maskall = 0;
+        maskall = 0;
+         if ( !msix->guest_maskall )
+             control &= ~PCI_MSIX_FLAGS_MASKALL;
+         else
+@@ -951,8 +1018,8 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
 -    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
 -                     control & ~PCI_MSIX_FLAGS_MASKALL);
+    msix->host_maskall = maskall;
 +    pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     return 0;
 }
-@@ -1072,7 +1130,10 @@ static void __pci_disable_msix(struct ms
+@@ -1092,8 +1159,15 @@ static void __pci_disable_msix(struct ms
+                                            PCI_CAP_ID_MSIX);
+     u16 control = pci_conf_read16(seg, bus, slot, func,
+                                   msix_control_reg(entry->msi_attrib.pos));
+    bool_t maskall = dev->msix->host_maskall;
 
-     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
-     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
 -    msix_set_enable(dev, 0);
 +    if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+    {
+        dev->msix->host_maskall = 1;
 +        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
 +                         control | (PCI_MSIX_FLAGS_ENABLE |
 +                                    PCI_MSIX_FLAGS_MASKALL));
+    }
 
     BUG_ON(list_empty(&dev->msi_list));
 
-@@ -1198,6 +1259,8 @@ int pci_restore_msi_state(struct pci_dev
+@@ -1105,8 +1179,11 @@ static void __pci_disable_msix(struct ms
+                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
+                entry->irq, dev->seg, dev->bus,
+                PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+-        control |= PCI_MSIX_FLAGS_MASKALL;
+        maskall = 1;
+     }
+    dev->msix->host_maskall = maskall;
+    if ( maskall || dev->msix->guest_maskall )
+        control |= PCI_MSIX_FLAGS_MASKALL;
+     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+ 
+     _pci_cleanup_msix(dev->msix);
+@@ -1255,6 +1332,8 @@ int pci_restore_msi_state(struct pci_dev
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
         unsigned int i = 0, nr = 1;
@ -241,7 +282,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
 
         irq = entry->irq;
         desc = &irq_desc[irq];
-@@ -1224,10 +1287,18 @@ int pci_restore_msi_state(struct pci_dev
+@@ -1281,10 +1360,18 @@ int pci_restore_msi_state(struct pci_dev
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
         {
@ -261,7 +302,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
                 return -ENXIO;
             }
         }
-@@ -1256,11 +1327,9 @@ int pci_restore_msi_state(struct pci_dev
+@@ -1314,11 +1401,9 @@ int pci_restore_msi_state(struct pci_dev
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
         {
             unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
@ -275,7 +316,7 @@ v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled
             multi_msi_enable(control, entry->msi.nvec);
             pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
                              PCI_FUNC(pdev->devfn), cpos, control);
-@@ -1268,7 +1337,9 @@ int pci_restore_msi_state(struct pci_dev
+@@ -1326,7 +1411,9 @@ int pci_restore_msi_state(struct pci_dev
             msi_set_enable(pdev, 1);
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
--- a/x86-MSI-X-maskall.patch
+++ b/x86-MSI-X-maskall.patch
@ -1,308 +1,65 @@
-References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
+x86/MSI-X: track host and guest mask-all requests separately

-x86/MSI-X: provide hypercall interface for mask-all control
-
-Qemu shouldn't be fiddling with this bit directly, as the hypervisor
-may (and now does) use it for its own purposes. Provide it with a
-replacement interface, allowing the hypervisor to track host and guest
-masking intentions independently (clearing the bit only when both want
-it clear).
+Host uses of the bits will be added subsequently, and must not be
+overridden by guests (including Dom0, namely when acting on behalf of
+a guest).

 Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
-Whether the permission check should really be an XSM_TARGET one needs
-to be determined: That allowing the guest to issue the hypercalls on
-itself means permitting it to bypass the device model, and thus render
-device model state stale.
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- trunk.orig/tools/libxc/include/xenctrl.h	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/tools/libxc/include/xenctrl.h	2015-03-25 13:51:05.000000000 +0100
-@@ -1793,6 +1793,17 @@ int xc_physdev_unmap_pirq(xc_interface *
-                           int domid,
-                           int pirq);
- 
-+int xc_physdev_msix_enable(xc_interface *xch,
-+                           int segment,
-+                           int bus,
-+                           int devfn,
-+                           int on);
-+int xc_physdev_msix_mask_all(xc_interface *xch,
-+                             int segment,
-+                             int bus,
-+                             int devfn,
-+                             int mask);
-+
- int xc_hvm_set_pci_intx_level(
-     xc_interface *xch, domid_t dom,
-     uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
--- trunk.orig/tools/libxc/xc_physdev.c	2013-07-09 20:57:12.000000000 +0200
-+++ trunk/tools/libxc/xc_physdev.c	2015-03-24 15:59:43.000000000 +0100
-@@ -108,3 +108,38 @@ int xc_physdev_unmap_pirq(xc_interface *
-     return rc;
- }
- 
-+int xc_physdev_msix_enable(xc_interface *xch,
-+                           int segment,
-+                           int bus,
-+                           int devfn,
-+                           int on)
-+{
-+    struct physdev_pci_device dev = {
-+        .seg = segment,
-+        .bus = bus,
-+        .devfn = devfn
-+    };
-+
-+    return do_physdev_op(xch,
-+                         on ? PHYSDEVOP_msix_enable
-+                            : PHYSDEVOP_msix_disable,
-+                         &dev, sizeof(dev));
-+}
-+
-+int xc_physdev_msix_mask_all(xc_interface *xch,
-+                             int segment,
-+                             int bus,
-+                             int devfn,
-+                             int mask)
-+{
-+    struct physdev_pci_device dev = {
-+        .seg = segment,
-+        .bus = bus,
-+        .devfn = devfn
-+    };
-+
-+    return do_physdev_op(xch,
-+                         mask ? PHYSDEVOP_msix_mask_all
-+                              : PHYSDEVOP_msix_unmask_all,
-+                         &dev, sizeof(dev));
-+}
--- trunk.orig/xen/arch/x86/msi.c	2015-05-18 11:44:39.000000000 +0200
-+++ trunk/xen/arch/x86/msi.c	2015-06-10 12:53:52.000000000 +0200
-@@ -394,7 +394,7 @@ static bool_t msi_set_mask_bit(struct ir
-     struct pci_dev *pdev;
-     u16 seg, control;
-     u8 bus, slot, func;
-    bool_t flag = host || guest;
-+    bool_t flag = host || guest, maskall;
- 
-     ASSERT(spin_is_locked(&desc->lock));
-     BUG_ON(!entry || !entry->dev);
-@@ -415,13 +415,17 @@ static bool_t msi_set_mask_bit(struct ir
-         }
-         break;
-     case PCI_CAP_ID_MSIX:
-+        maskall = pdev->msix->host_maskall;
-         control = pci_conf_read16(seg, bus, slot, func,
-                                   msix_control_reg(entry->msi_attrib.pos));
-         if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
-+        {
-+            pdev->msix->host_maskall = 1;
-             pci_conf_write16(seg, bus, slot, func,
-                              msix_control_reg(entry->msi_attrib.pos),
-                              control | (PCI_MSIX_FLAGS_ENABLE |
-                                         PCI_MSIX_FLAGS_MASKALL));
-+        }
-         if ( likely(memory_decoded(pdev)) )
-         {
-             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-@@ -434,7 +438,7 @@ static bool_t msi_set_mask_bit(struct ir
-         {
-             domid_t domid = pdev->domain->domain_id;
- 
-            control |= PCI_MSIX_FLAGS_MASKALL;
-+            maskall = 1;
-             if ( pdev->msix->warned != domid )
-             {
-                 pdev->msix->warned = domid;
-@@ -444,6 +448,9 @@ static bool_t msi_set_mask_bit(struct ir
-                        PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
-             }
-         }
-+        pdev->msix->host_maskall = maskall;
-+        if ( maskall || pdev->msix->guest_maskall )
-+            control |= PCI_MSIX_FLAGS_MASKALL;
-         pci_conf_write16(seg, bus, slot, func,
-                          msix_control_reg(entry->msi_attrib.pos), control);
-         return flag;
-@@ -839,6 +846,7 @@ static int msix_capability_init(struct p
-     u8 bus = dev->bus;
-     u8 slot = PCI_SLOT(dev->devfn);
-     u8 func = PCI_FUNC(dev->devfn);
-+    bool_t maskall = msix->host_maskall;
- 
-     ASSERT(spin_is_locked(&pcidevs_lock));
- 
-@@ -850,6 +858,7 @@ static int msix_capability_init(struct p
-      * to mask all the vectors to prevent interrupts coming in before they're
-      * fully set up.
-      */
-+    msix->host_maskall = 1;
-     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
-                      control | (PCI_MSIX_FLAGS_ENABLE |
-                                 PCI_MSIX_FLAGS_MASKALL));
-@@ -972,6 +981,10 @@ static int msix_capability_init(struct p
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-06-22 09:06:30.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-06-22 09:23:08.000000000 +0200
+@@ -843,6 +843,12 @@ static int msix_capability_init(struct p
 
     if ( !msix->used_entries )
     {
-+        maskall = 0;
-+        msix->guest_maskall = 0;
-+        control &= ~PCI_MSIX_FLAGS_MASKALL;
+        msix->host_maskall = 0;
+        if ( !msix->guest_maskall )
+            control &= ~PCI_MSIX_FLAGS_MASKALL;
+        else
+            control |= PCI_MSIX_FLAGS_MASKALL;
 +
         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
                                 msix->table.last) )
             WARN();
-@@ -1002,6 +1015,7 @@ static int msix_capability_init(struct p
-     ++msix->used_entries;
- 
-     /* Restore MSI-X enabled bits */
-+    msix->host_maskall = maskall;
-     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
- 
-     return 0;
-@@ -1142,6 +1156,7 @@ static void __pci_disable_msix(struct ms
-     int pos;
-     u16 control, seg;
-     u8 bus, slot, func;
-+    bool_t maskall;
- 
-     dev = entry->dev;
-     seg = dev->seg;
-@@ -1151,10 +1166,14 @@ static void __pci_disable_msix(struct ms
- 
-     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
-     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
-+    maskall = dev->msix->host_maskall;
-     if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+@@ -1111,6 +1117,34 @@ void pci_cleanup_msi(struct pci_dev *pde
+ int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
+                                  unsigned int size, uint32_t *data)
+ {
+    u16 seg = pdev->seg;
+    u8 bus = pdev->bus;
+    u8 slot = PCI_SLOT(pdev->devfn);
+    u8 func = PCI_FUNC(pdev->devfn);
+    struct msi_desc *entry;
+    unsigned int pos;
+
+    if ( pdev->msix )
 +    {
-+        dev->msix->host_maskall = 1;
-         pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
-                          control | (PCI_MSIX_FLAGS_ENABLE |
-                                     PCI_MSIX_FLAGS_MASKALL));
+        entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
+        pos = entry ? entry->msi_attrib.pos
+                    : pci_find_cap_offset(seg, bus, slot, func,
+                                          PCI_CAP_ID_MSIX);
+        ASSERT(pos);
+
+        if ( reg < pos || reg >= msix_pba_offset_reg(pos) + 4 )
+            return 0;
+
+        if ( reg != msix_control_reg(pos) || size != 2 )
+            return -EACCES;
+
+        pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
+        if ( pdev->msix->host_maskall )
+            *data |= PCI_MSIX_FLAGS_MASKALL;
+
+        return 1;
 +    }
- 
-     BUG_ON(list_empty(&dev->msi_list));
- 
-@@ -1166,8 +1185,11 @@ static void __pci_disable_msix(struct ms
-                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
-                entry->irq, dev->seg, dev->bus,
-                PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-        control |= PCI_MSIX_FLAGS_MASKALL;
-+        maskall = 1;
-     }
-+    dev->msix->host_maskall = maskall;
-+    if ( maskall || dev->msix->guest_maskall )
-+        control |= PCI_MSIX_FLAGS_MASKALL;
-     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
- 
-     _pci_cleanup_msix(dev->msix);
-@@ -1211,6 +1233,62 @@ int pci_prepare_msix(u16 seg, u8 bus, u8
-     return rc;
+
+     return 0;
 }
 
-+int pci_msix_enable(u16 seg, u8 bus, u8 devfn, bool_t on)
-+{
-+    int rc;
-+    struct pci_dev *pdev;
-+
-+    if ( !use_msi )
-+        return -EOPNOTSUPP;
-+
-+    spin_lock(&pcidevs_lock);
-+    pdev = pci_get_pdev(seg, bus, devfn);
-+    if ( !pdev || !pdev->msix || !pdev->domain )
-+        rc = -ENODEV;
-+    else if ( !is_hvm_domain(pdev->domain) )
-+        rc = -ENXIO;
-+    else if ( (rc = xsm_manage_domain_pirq(XSM_TARGET, pdev->domain)) == 0 )
-+        msix_set_enable(pdev, on);
-+    spin_unlock(&pcidevs_lock);
-+
-+    return rc;
-+}
-+
-+int pci_msix_maskall(u16 seg, u8 bus, u8 devfn, bool_t mask)
-+{
-+    int rc;
-+    struct pci_dev *pdev;
-+    u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
-+
-+    if ( !use_msi )
-+        return -EOPNOTSUPP;
-+
-+    spin_lock(&pcidevs_lock);
-+    pdev = pci_get_pdev(seg, bus, devfn);
-+    if ( !pdev || !pdev->msix || !pdev->domain )
-+        rc = -ENODEV;
-+    else if ( !is_hvm_domain(pdev->domain) )
-+        rc = -ENXIO;
-+    else if ( (rc = xsm_manage_domain_pirq(XSM_TARGET, pdev->domain)) == 0 )
-+    {
-+        unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
-+                                               PCI_CAP_ID_MSIX);
-+        u16 control = pci_conf_read16(seg, bus, slot, func,
-+                                      msix_control_reg(pos));
-+
-+        BUG_ON(!pos);
-+        pdev->msix->guest_maskall = mask;
-+        if ( pdev->msix->host_maskall || mask )
-+            control |= PCI_MSIX_FLAGS_MASKALL;
-+        else
-+            control &= ~PCI_MSIX_FLAGS_MASKALL;
-+        pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
-+    }
-+    spin_unlock(&pcidevs_lock);
-+
-+    return rc;
-+}
-+
- /*
-  * Notice: only construct the msi_desc
-  * no change to irq_desc here, and the interrupt is masked
--- trunk.orig/xen/arch/x86/physdev.c	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/arch/x86/physdev.c	2015-03-25 14:02:24.000000000 +0100
-@@ -648,6 +648,30 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
-         break;
-     }
- 
-+    case PHYSDEVOP_msix_enable:
-+    case PHYSDEVOP_msix_disable: {
-+        struct physdev_pci_device dev;
-+
-+        if ( copy_from_guest(&dev, arg, 1) )
-+            ret = -EFAULT;
-+        else
-+            ret = pci_msix_enable(dev.seg, dev.bus, dev.devfn,
-+                                  cmd == PHYSDEVOP_msix_enable);
-+        break;
-+    }
-+
-+    case PHYSDEVOP_msix_mask_all:
-+    case PHYSDEVOP_msix_unmask_all: {
-+        struct physdev_pci_device dev;
-+
-+        if ( copy_from_guest(&dev, arg, 1) )
-+            ret = -EFAULT;
-+        else
-+            ret = pci_msix_maskall(dev.seg, dev.bus, dev.devfn,
-+                                   cmd == PHYSDEVOP_msix_mask_all);
-+        break;
-+    }
-+
-     case PHYSDEVOP_pci_mmcfg_reserved: {
-         struct physdev_pci_mmcfg_reserved info;
- 
--- trunk.orig/xen/include/asm-x86/msi.h	2015-03-09 09:42:49.000000000 +0100
-+++ trunk/xen/include/asm-x86/msi.h	2015-03-25 14:01:00.000000000 +0100
-@@ -78,6 +78,8 @@ struct msi_desc;
- extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
- extern void pci_disable_msi(struct msi_desc *desc);
- extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
-+extern int pci_msix_enable(u16 seg, u8 bus, u8 devfn, bool_t on);
-+extern int pci_msix_maskall(u16 seg, u8 bus, u8 devfn, bool_t mask);
- extern void pci_cleanup_msi(struct pci_dev *pdev);
- extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
- extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *,
-@@ -228,6 +230,7 @@ struct arch_msix {
+--- sle12sp1.orig/xen/include/asm-x86/msi.h	2015-07-08 00:00:00.000000000 +0200
+++ sle12sp1/xen/include/asm-x86/msi.h	2015-06-19 09:32:02.000000000 +0200
+@@ -228,6 +228,7 @@ struct arch_msix {
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
     int table_idx[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
@ -310,89 +67,3 @@ device model state stale.
     domid_t warned;
 };
 
--- trunk.orig/xen/include/public/physdev.h	2013-12-24 18:25:25.000000000 +0100
-+++ trunk/xen/include/public/physdev.h	2015-03-24 15:54:54.000000000 +0100
-@@ -310,6 +310,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi
-  */
- #define PHYSDEVOP_prepare_msix          30
- #define PHYSDEVOP_release_msix          31
-+/*
-+ * The device model domain for a guest should be using these instead of
-+ * fiddling with the respective flags in the MSI-X capability structure.
-+ */
-+#define PHYSDEVOP_msix_enable           32
-+#define PHYSDEVOP_msix_disable          33
-+#define PHYSDEVOP_msix_mask_all         34
-+#define PHYSDEVOP_msix_unmask_all       35
- struct physdev_pci_device {
-     /* IN */
-     uint16_t seg;
--- trunk.orig/xen/include/xsm/dummy.h	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/include/xsm/dummy.h	2015-03-23 11:13:16.000000000 +0100
-@@ -439,6 +439,12 @@ static XSM_INLINE int xsm_map_domain_irq
-     return xsm_default_action(action, current->domain, d);
- }
- 
-+static XSM_INLINE int xsm_manage_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
-+{
-+    XSM_ASSERT_ACTION(XSM_TARGET);
-+    return xsm_default_action(action, current->domain, d);
-+}
-+
- static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
- {
-     XSM_ASSERT_ACTION(XSM_TARGET);
--- trunk.orig/xen/include/xsm/xsm.h	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/include/xsm/xsm.h	2015-05-15 10:28:19.000000000 +0200
-@@ -105,6 +105,7 @@ struct xsm_operations {
-     char *(*show_irq_sid) (int irq);
-     int (*map_domain_pirq) (struct domain *d);
-     int (*map_domain_irq) (struct domain *d, int irq, void *data);
-+    int (*manage_domain_pirq) (struct domain *d);
-     int (*unmap_domain_pirq) (struct domain *d);
-     int (*unmap_domain_irq) (struct domain *d, int irq, void *data);
-     int (*irq_permission) (struct domain *d, int pirq, uint8_t allow);
-@@ -409,6 +410,11 @@ static inline int xsm_map_domain_irq (xs
-     return xsm_ops->map_domain_irq(d, irq, data);
- }
- 
-+static inline int xsm_manage_domain_pirq(xsm_default_t def, struct domain *d)
-+{
-+    return xsm_ops->manage_domain_pirq(d);
-+}
-+
- static inline int xsm_unmap_domain_pirq (xsm_default_t def, struct domain *d)
- {
-     return xsm_ops->unmap_domain_pirq(d);
--- trunk.orig/xen/xsm/dummy.c	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/xsm/dummy.c	2015-05-15 10:27:35.000000000 +0200
-@@ -79,6 +79,7 @@ void xsm_fixup_ops (struct xsm_operation
-     set_to_dummy_if_null(ops, show_irq_sid);
-     set_to_dummy_if_null(ops, map_domain_pirq);
-     set_to_dummy_if_null(ops, map_domain_irq);
-+    set_to_dummy_if_null(ops, manage_domain_pirq);
-     set_to_dummy_if_null(ops, unmap_domain_pirq);
-     set_to_dummy_if_null(ops, unmap_domain_irq);
-     set_to_dummy_if_null(ops, irq_permission);
--- trunk.orig/xen/xsm/flask/hooks.c	2015-01-14 18:44:18.000000000 +0100
-+++ trunk/xen/xsm/flask/hooks.c	2015-05-15 10:27:50.000000000 +0200
-@@ -875,6 +875,11 @@ static int flask_map_domain_irq (struct 
-     return rc;
- }
- 
-+static int flask_manage_domain_pirq(struct domain *d)
-+{
-+    return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__USE);
-+}
-+
- static int flask_unmap_domain_pirq (struct domain *d)
- {
-     return current_has_perm(d, SECCLASS_RESOURCE, RESOURCE__REMOVE);
-@@ -1556,6 +1561,7 @@ static struct xsm_operations flask_ops =
- 
-     .map_domain_pirq = flask_map_domain_pirq,
-     .map_domain_irq = flask_map_domain_irq,
-+    .manage_domain_pirq = flask_manage_domain_pirq,
-     .unmap_domain_pirq = flask_unmap_domain_pirq,
-     .unmap_domain_irq = flask_unmap_domain_irq,
-     .irq_permission = flask_irq_permission,
--- a/x86-MSI-X-teardown.patch
+++ b/x86-MSI-X-teardown.patch
@ -1,5 +1,3 @@
-References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
-
 x86/MSI-X: be more careful during teardown

 When a device gets detached from a guest, pciback will clear its
@ -21,19 +19,13 @@ common) system behavior.
 Signed-off-by: Jan Beulich <jbeulich@suse.com>
 Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
 ---
-The use of the mask-all bit here collides with qemu's incorreect use
-of that same bit. This would become a security issue if released that
-way. A later patch in this series will provide the infrastructure for
-qemu to stop direct access to that bit. A qemu series including a patch
-making use of the new interface will be sent subsequently.
-
 Backporting note (largely to myself):
   Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e
   "x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use
   of struct arch_msix's warned field).

--- trunk.orig/xen/arch/x86/irq.c	2015-06-03 16:55:05.000000000 +0200
-+++ trunk/xen/arch/x86/irq.c	2015-03-25 09:36:52.000000000 +0100
+--- sle12sp1.orig/xen/arch/x86/irq.c	2015-07-08 11:47:52.000000000 +0200
+++ sle12sp1/xen/arch/x86/irq.c	2015-07-07 18:01:32.000000000 +0200
@@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq)
     }
 
@ -65,9 +57,9 @@ Backporting note (largely to myself):
 
     /*
      * Mark any remaining pending EOIs as ready to flush.
--- trunk.orig/xen/arch/x86/msi.c	2015-05-19 23:16:48.000000000 +0200
-+++ trunk/xen/arch/x86/msi.c	2015-03-25 09:35:38.000000000 +0100
-@@ -121,6 +121,27 @@ static void msix_put_fixmap(struct arch_
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-06-22 09:23:08.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-07-07 18:01:16.000000000 +0200
+@@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_
     spin_unlock(&msix->table_lock);
 }
 
@ -95,7 +87,7 @@ Backporting note (largely to myself):
 /*
  * MSI message composition
  */
-@@ -162,7 +183,7 @@ void msi_compose_msg(unsigned vector, co
+@@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co
     }
 }
 
@ -104,16 +96,16 @@ Backporting note (largely to myself):
 {
     switch ( entry->msi_attrib.type )
     {
-@@ -198,6 +219,8 @@ static void read_msi_msg(struct msi_desc
-         void __iomem *base;
-         base = entry->mask_base;
+@@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc
+     {
+         void __iomem *base = entry->mask_base;
 
 +        if ( unlikely(!memory_decoded(entry->dev)) )
 +            return 0;
         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
         msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
-@@ -209,6 +232,8 @@ static void read_msi_msg(struct msi_desc
+@@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc
 
     if ( iommu_intremap )
         iommu_read_msi_from_ire(entry, msg);
@ -122,16 +114,16 @@ Backporting note (largely to myself):
 }
 
 static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
-@@ -260,6 +285,8 @@ static int write_msi_msg(struct msi_desc
-         void __iomem *base;
-         base = entry->mask_base;
+@@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc
+     {
+         void __iomem *base = entry->mask_base;
 
 +        if ( unlikely(!memory_decoded(entry->dev)) )
 +            return -ENXIO;
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         writel(msg->address_hi,
-@@ -287,7 +314,8 @@ void set_msi_affinity(struct irq_desc *d
+@@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d
     ASSERT(spin_is_locked(&desc->lock));
 
     memset(&msg, 0, sizeof(msg));
@ -141,17 +133,18 @@ Backporting note (largely to myself):
 
     msg.data &= ~MSI_DATA_VECTOR_MASK;
     msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
-@@ -347,20 +375,24 @@ int msi_maskable_irq(const struct msi_de
+@@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de
            || entry->msi_attrib.maskbit;
 }
 
-static void msi_set_mask_bit(struct irq_desc *desc, int flag)
-+static bool_t msi_set_mask_bit(struct irq_desc *desc, int flag)
+-static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
 {
     struct msi_desc *entry = desc->msi_desc;
 +    struct pci_dev *pdev;
 +    u16 seg;
 +    u8 bus, slot, func;
+     bool_t flag = host || guest;
 
     ASSERT(spin_is_locked(&desc->lock));
     BUG_ON(!entry || !entry->dev);
@ -160,9 +153,11 @@ Backporting note (largely to myself):
 +    bus = pdev->bus;
 +    slot = PCI_SLOT(pdev->devfn);
 +    func = PCI_FUNC(pdev->devfn);
-     switch (entry->msi_attrib.type) {
+     switch ( entry->msi_attrib.type )
+     {
     case PCI_CAP_ID_MSI:
-         if (entry->msi_attrib.maskbit) {
+         if ( entry->msi_attrib.maskbit )
+         {
             u32 mask_bits;
 -            u16 seg = entry->dev->seg;
 -            u8 bus = entry->dev->bus;
@ -171,7 +166,7 @@ Backporting note (largely to myself):
 
             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
             mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
-@@ -369,24 +401,52 @@ static void msi_set_mask_bit(struct irq_
+@@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_
         }
         break;
     case PCI_CAP_ID_MSIX:
@ -192,6 +187,7 @@ Backporting note (largely to myself):
 +            u16 control;
 +            domid_t domid = pdev->domain->domain_id;
 +
+            pdev->msix->host_maskall = 1;
 +            control = pci_conf_read16(seg, bus, slot, func,
 +                                      msix_control_reg(entry->msi_attrib.pos));
 +            if ( control & PCI_MSIX_FLAGS_MASKALL )
@ -215,7 +211,8 @@ Backporting note (largely to myself):
 -        break;
 +        return 0;
     }
-     entry->msi_attrib.masked = !!flag;
+     entry->msi_attrib.host_masked = host;
+     entry->msi_attrib.guest_masked = guest;
 +
 +    return 1;
 }
@ -234,7 +231,7 @@ Backporting note (largely to myself):
             break;
         return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
                                 PCI_SLOT(entry->dev->devfn),
-@@ -394,6 +454,8 @@ static int msi_get_mask_bit(const struct
+@@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct
                                 entry->msi.mpos) >>
                 entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
@ -243,24 +240,44 @@ Backporting note (largely to myself):
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
     return -1;
-@@ -401,12 +463,14 @@ static int msi_get_mask_bit(const struct
+@@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct
 
 void mask_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 1);
-+    if ( unlikely(!msi_set_mask_bit(desc, 1)) )
+-    msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 1,
+                                    desc->msi_desc->msi_attrib.guest_masked)) )
 +        BUG_ON(!(desc->status & IRQ_DISABLED));
 }
 
 void unmask_msi_irq(struct irq_desc *desc)
 {
-    msi_set_mask_bit(desc, 0);
-+    if ( unlikely(!msi_set_mask_bit(desc, 0)) )
+-    msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+    if ( unlikely(!msi_set_mask_bit(desc, 0,
+                                    desc->msi_desc->msi_attrib.guest_masked)) )
 +        WARN();
 }
 
+ void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
+@@ -422,13 +489,15 @@ void guest_mask_msi_irq(struct irq_desc 
+ 
 static unsigned int startup_msi_irq(struct irq_desc *desc)
-@@ -723,6 +787,9 @@ static int msix_capability_init(struct p
+ {
+-    msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
+    if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
+        WARN();
+     return 0;
+ }
+ 
+ static void shutdown_msi_irq(struct irq_desc *desc)
+ {
+-    msi_set_mask_bit(desc, 1, 1);
+    if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
+        BUG_ON(!(desc->status & IRQ_DISABLED));
+ }
+ 
+ void ack_nonmaskable_msi_irq(struct irq_desc *desc)
+@@ -740,6 +809,9 @@ static int msix_capability_init(struct p
     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
     msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
 
@ -270,7 +287,7 @@ Backporting note (largely to myself):
     if ( desc )
     {
         entry = alloc_msi_entry(1);
-@@ -855,7 +922,8 @@ static int msix_capability_init(struct p
+@@ -879,7 +951,8 @@ static int msix_capability_init(struct p
     ++msix->used_entries;
 
     /* Restore MSI-X enabled bits */
@ -280,7 +297,7 @@ Backporting note (largely to myself):
 
     return 0;
 }
-@@ -1008,8 +1076,16 @@ static void __pci_disable_msix(struct ms
+@@ -1024,8 +1097,16 @@ static void __pci_disable_msix(struct ms
 
     BUG_ON(list_empty(&dev->msi_list));
 
@ -299,7 +316,7 @@ Backporting note (largely to myself):
     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
 
     _pci_cleanup_msix(dev->msix);
-@@ -1147,14 +1223,23 @@ int pci_restore_msi_state(struct pci_dev
+@@ -1199,15 +1280,24 @@ int pci_restore_msi_state(struct pci_dev
             nr = entry->msi.nvec;
         }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
@ -317,9 +334,11 @@ Backporting note (largely to myself):
 
         for ( i = 0; ; )
         {
-            msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+-            msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
+-                             entry[i].msi_attrib.guest_masked);
 +            if ( unlikely(!msi_set_mask_bit(desc,
-+                                            entry[i].msi_attrib.masked)) )
+                                            entry[i].msi_attrib.host_masked,
+                                            entry[i].msi_attrib.guest_masked)) )
 +                BUG();
 
             if ( !--nr )
--- a/x86-MSI-mask.patch
+++ b/x86-MSI-mask.patch
@ -0,0 +1,48 @@
+x86/MSI: properly track guest masking requests
+
+... by monitoring writes to the mask register.
+
+This allows reverting the main effect of the XSA-129 patches in qemu.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-07-07 18:01:41.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-07-07 18:01:47.000000000 +0200
+@@ -1303,6 +1303,37 @@ int pci_msi_conf_write_intercept(struct 
+         return 1;
+     }
+ 
+    entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
+    if ( entry && entry->msi_attrib.maskbit )
+    {
+        uint16_t cntl;
+        uint32_t unused;
+
+        pos = entry->msi_attrib.pos;
+        if ( reg < pos || reg >= entry->msi.mpos + 8 )
+            return 0;
+
+        if ( reg == msi_control_reg(pos) )
+            return size == 2 ? 1 : -EACCES;
+        if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
+            return -EACCES;
+
+        cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
+        unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl));
+        for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry )
+        {
+            entry->msi_attrib.guest_masked =
+                *data >> entry->msi_attrib.entry_nr;
+            if ( entry->msi_attrib.host_masked )
+                *data |= 1 << pos;
+            unused &= ~(1 << pos);
+        }
+
+        *data |= unused;
+
+        return 1;
+    }
+
+     return 0;
+ }
+ 
--- a/x86-MSI-pv-unmask.patch
+++ b/x86-MSI-pv-unmask.patch
@ -0,0 +1,93 @@
+x86/MSI: fix guest unmasking when handling IRQ via event channel
+
+Rather than assuming only PV guests need special treatment (and
+dealing with that directly when an IRQ gets set up), keep all guest MSI
+IRQs masked until either the (HVM) guest unmasks them via vMSI or the
+(PV, PVHVM, or PVH) guest sets up an event channel for it.
+
+To not further clutter the common evtchn_bind_pirq() with x86-specific
+code, introduce an arch_evtchn_bind_pirq() hook instead.
+
+Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- sle12sp1.orig/xen/arch/x86/irq.c	2015-07-08 12:33:47.000000000 +0200
+++ sle12sp1/xen/arch/x86/irq.c	2015-07-07 17:04:08.000000000 +0200
+@@ -2502,6 +2502,25 @@ int unmap_domain_pirq_emuirq(struct doma
+     return ret;
+ }
+ 
+void arch_evtchn_bind_pirq(struct domain *d, int pirq)
+{
+    int irq = domain_pirq_to_irq(d, pirq);
+    struct irq_desc *desc;
+    unsigned long flags;
+
+    if ( irq <= 0 )
+        return;
+
+    if ( is_hvm_domain(d) )
+        map_domain_emuirq_pirq(d, pirq, IRQ_PT);
+
+    desc = irq_to_desc(irq);
+    spin_lock_irqsave(&desc->lock, flags);
+    if ( desc->msi_desc )
+        guest_mask_msi_irq(desc, 0);
+    spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+ bool_t hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq)
+ {
+     return is_hvm_domain(d) && pirq &&
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-07-08 00:00:00.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-07-07 16:50:02.000000000 +0200
+@@ -422,10 +422,7 @@ void guest_mask_msi_irq(struct irq_desc 
+ 
+ static unsigned int startup_msi_irq(struct irq_desc *desc)
+ {
+-    bool_t guest_masked = (desc->status & IRQ_GUEST) &&
+-                          is_hvm_domain(desc->msi_desc->dev->domain);
+-
+-    msi_set_mask_bit(desc, 0, guest_masked);
+    msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
+     return 0;
+ }
+ 
+--- sle12sp1.orig/xen/common/event_channel.c	2015-07-08 14:04:08.000000000 +0200
+++ sle12sp1/xen/common/event_channel.c	2015-07-07 16:53:47.000000000 +0200
+@@ -504,10 +504,7 @@ static long evtchn_bind_pirq(evtchn_bind
+ 
+     bind->port = port;
+ 
+-#ifdef CONFIG_X86
+-    if ( is_hvm_domain(d) && domain_pirq_to_irq(d, pirq) > 0 )
+-        map_domain_emuirq_pirq(d, pirq, IRQ_PT);
+-#endif
+    arch_evtchn_bind_pirq(d, pirq);
+ 
+  out:
+     spin_unlock(&d->event_lock);
+--- sle12sp1.orig/xen/include/asm-arm/irq.h	2015-07-08 12:33:47.000000000 +0200
+++ sle12sp1/xen/include/asm-arm/irq.h	2015-07-07 17:02:00.000000000 +0200
+@@ -44,6 +44,8 @@ int route_irq_to_guest(struct domain *d,
+                        const char *devname);
+ void arch_move_irqs(struct vcpu *v);
+ 
+#define arch_evtchn_bind_pirq(d, pirq) ((void)((d) + (pirq)))
+
+ /* Set IRQ type for an SPI */
+ int irq_set_spi_type(unsigned int spi, unsigned int type);
+ 
+--- sle12sp1.orig/xen/include/xen/irq.h	2015-07-08 12:33:47.000000000 +0200
+++ sle12sp1/xen/include/xen/irq.h	2015-07-07 17:02:49.000000000 +0200
+@@ -172,4 +172,8 @@ unsigned int set_desc_affinity(struct ir
+ unsigned int arch_hwdom_irqs(domid_t);
+ #endif
+ 
+#ifndef arch_evtchn_bind_pirq
+void arch_evtchn_bind_pirq(struct domain *, int pirq);
+#endif
+
+ #endif /* __XEN_IRQ_H__ */
--- a/x86-PCI-CFG-write-intercept.patch
+++ b/x86-PCI-CFG-write-intercept.patch
@ -0,0 +1,114 @@
+x86/PCI: add config space abstract write intercept logic
+
+This is to be used by MSI code, and later to also be hooked up to
+MMCFG accesses by Dom0.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- sle12sp1.orig/xen/arch/x86/msi.c	2015-07-08 11:45:59.000000000 +0200
+++ sle12sp1/xen/arch/x86/msi.c	2015-06-22 09:06:30.000000000 +0200
+@@ -1108,6 +1108,12 @@ void pci_cleanup_msi(struct pci_dev *pde
+     msi_free_irqs(pdev);
+ }
+ 
+int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
+                                 unsigned int size, uint32_t *data)
+{
+    return 0;
+}
+
+ int pci_restore_msi_state(struct pci_dev *pdev)
+ {
+     unsigned long flags;
+--- sle12sp1.orig/xen/arch/x86/pci.c	2015-07-08 11:45:59.000000000 +0200
+++ sle12sp1/xen/arch/x86/pci.c	2015-06-19 16:08:11.000000000 +0200
+@@ -67,3 +67,28 @@ void pci_conf_write(uint32_t cf8, uint8_
+ 
+     spin_unlock_irqrestore(&pci_config_lock, flags);
+ }
+
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+                             unsigned int reg, unsigned int size,
+                             uint32_t *data)
+{
+    struct pci_dev *pdev;
+    int rc = 0;
+
+    /*
+     * Avoid expensive operations when no hook is going to do anything
+     * for the access anyway.
+     */
+    if ( reg < 64 || reg >= 256 )
+        return 0;
+
+    spin_lock(&pcidevs_lock);
+
+    pdev = pci_get_pdev(seg, PCI_BUS(bdf), PCI_DEVFN2(bdf));
+    if ( pdev )
+        rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+
+    spin_unlock(&pcidevs_lock);
+
+    return rc;
+}
+--- sle12sp1.orig/xen/arch/x86/traps.c	2015-07-08 11:45:59.000000000 +0200
+++ sle12sp1/xen/arch/x86/traps.c	2015-06-19 15:52:47.000000000 +0200
+@@ -1708,8 +1708,8 @@ static int admin_io_okay(
+     return ioports_access_permitted(v->domain, port, port + bytes - 1);
+ }
+ 
+-static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
+-                         unsigned int start, unsigned int size)
+static bool_t pci_cfg_ok(struct domain *currd, unsigned int start,
+                         unsigned int size, uint32_t *write)
+ {
+     uint32_t machine_bdf;
+ 
+@@ -1741,8 +1741,12 @@ static bool_t pci_cfg_ok(struct domain *
+             start |= CF8_ADDR_HI(currd->arch.pci_cf8);
+     }
+ 
+-    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+-                                      start, start + size - 1, write);
+    if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+                                   start, start + size - 1, !!write) != 0 )
+         return 0;
+
+    return !write ||
+           pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
+ }
+ 
+ uint32_t guest_io_read(
+@@ -1796,7 +1800,7 @@ uint32_t guest_io_read(
+             size = min(bytes, 4 - (port & 3));
+             if ( size == 3 )
+                 size = 2;
+-            if ( pci_cfg_ok(v->domain, 0, port & 3, size) )
+            if ( pci_cfg_ok(v->domain, port & 3, size, NULL) )
+                 sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
+         }
+ 
+@@ -1869,7 +1873,7 @@ void guest_io_write(
+             size = min(bytes, 4 - (port & 3));
+             if ( size == 3 )
+                 size = 2;
+-            if ( pci_cfg_ok(v->domain, 1, port & 3, size) )
+            if ( pci_cfg_ok(v->domain, port & 3, size, &data) )
+                 pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
+         }
+ 
+--- sle12sp1.orig/xen/include/asm-x86/pci.h	2015-07-08 11:45:59.000000000 +0200
+++ sle12sp1/xen/include/asm-x86/pci.h	2015-06-19 15:52:03.000000000 +0200
+@@ -15,4 +15,11 @@ struct arch_pci_dev {
+     vmask_t used_vectors;
+ };
+ 
+struct pci_dev;
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+                             unsigned int reg, unsigned int size,
+                             uint32_t *data);
+int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
+                                 unsigned int size, uint32_t *data);
+
+ #endif /* __X86_PCI_H__ */
--- a/x86-pci_cfg_okay.patch
+++ b/x86-pci_cfg_okay.patch
@ -0,0 +1,156 @@
+# Commit 85baced14dec2fafa9fe560969dba2ae28e8bebb
+# Date 2015-06-09 15:59:31 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: adjust PV I/O emulation functions' types
+
+admin_io_okay(), guest_io_read(), and guest_io_write() all don't need
+their current "regs" parameter at all, and they don't use the vCPU
+passed to them for other than obtaining its domain. Drop the former and
+replace the latter by a struct domain pointer.
+
+pci_cfg_okay() returns a boolean type, and its "write" parameter is of
+boolean kind too.
+
+All of them get called for the current vCPU (and hence current domain)
+only, so name the domain parameters accordingly except in the
+admin_io_okay() case, which a subsequent patch will use for simplifying
+setup_io_bitmap().
+
+Latch current->domain into a local variable in emulate_privileged_op().
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+# Commit 2d67a7a4d37a4759bcd7f2ee2d740497ad669c7d
+# Date 2015-06-18 15:07:10 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: synchronize PCI config space access decoding
+
+Both PV and HVM logic have similar but not similar enough code here.
+Synchronize the two so that
+- in the HVM case we don't unconditionally try to access extended
+  config space
+- in the PV case we pass a correct range to the XSM hook
+- in the PV case we don't needlessly deny access when the operation
+  isn't really on PCI config space
+All this along with sharing the macros HVM already had here.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+Backport stripped down to just the pci_cfg_ok() adjustments.
+
+--- sle12sp1.orig/xen/arch/x86/traps.c	2015-07-08 14:13:16.000000000 +0200
+++ sle12sp1/xen/arch/x86/traps.c	2015-07-08 11:43:22.000000000 +0200
+@@ -1708,14 +1708,18 @@ static int admin_io_okay(
+     return ioports_access_permitted(v->domain, port, port + bytes - 1);
+ }
+ 
+-static int pci_cfg_ok(struct domain *d, int write, int size)
+static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
+                         unsigned int start, unsigned int size)
+ {
+     uint32_t machine_bdf;
+-    uint16_t start, end;
+-    if (!is_hardware_domain(d))
+
+    if ( !is_hardware_domain(currd) )
+         return 0;
+ 
+-    machine_bdf = (d->arch.pci_cf8 >> 8) & 0xFFFF;
+    if ( !CF8_ENABLED(currd->arch.pci_cf8) )
+        return 1;
+
+    machine_bdf = CF8_BDF(currd->arch.pci_cf8);
+     if ( write )
+     {
+         const unsigned long *ro_map = pci_get_ro_map(0);
+@@ -1723,9 +1727,9 @@ static int pci_cfg_ok(struct domain *d, 
+         if ( ro_map && test_bit(machine_bdf, ro_map) )
+             return 0;
+     }
+-    start = d->arch.pci_cf8 & 0xFF;
+    start |= CF8_ADDR_LO(currd->arch.pci_cf8);
+     /* AMD extended configuration space access? */
+-    if ( (d->arch.pci_cf8 & 0x0F000000) &&
+    if ( CF8_ADDR_HI(currd->arch.pci_cf8) &&
+          boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+          boot_cpu_data.x86 >= 0x10 && boot_cpu_data.x86 <= 0x17 )
+     {
+@@ -1734,12 +1738,11 @@ static int pci_cfg_ok(struct domain *d, 
+         if ( rdmsr_safe(MSR_AMD64_NB_CFG, msr_val) )
+             return 0;
+         if ( msr_val & (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT) )
+-            start |= (d->arch.pci_cf8 >> 16) & 0xF00;
+            start |= CF8_ADDR_HI(currd->arch.pci_cf8);
+     }
+-    end = start + size - 1;
+-    if (xsm_pci_config_permission(XSM_HOOK, d, machine_bdf, start, end, write))
+-        return 0;
+-    return 1;
+
+    return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+                                      start, start + size - 1, write);
+ }
+ 
+ uint32_t guest_io_read(
+@@ -1793,7 +1796,7 @@ uint32_t guest_io_read(
+             size = min(bytes, 4 - (port & 3));
+             if ( size == 3 )
+                 size = 2;
+-            if ( pci_cfg_ok(v->domain, 0, size) )
+            if ( pci_cfg_ok(v->domain, 0, port & 3, size) )
+                 sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
+         }
+ 
+@@ -1866,7 +1869,7 @@ void guest_io_write(
+             size = min(bytes, 4 - (port & 3));
+             if ( size == 3 )
+                 size = 2;
+-            if ( pci_cfg_ok(v->domain, 1, size) )
+            if ( pci_cfg_ok(v->domain, 1, port & 3, size) )
+                 pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
+         }
+ 
+--- sle12sp1.orig/xen/arch/x86/hvm/hvm.c	2015-07-08 14:13:38.000000000 +0200
+++ sle12sp1/xen/arch/x86/hvm/hvm.c	2015-07-08 11:43:22.000000000 +0200
+@@ -2356,11 +2356,6 @@ void hvm_vcpu_down(struct vcpu *v)
+ static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
+                                                         ioreq_t *p)
+ {
+-#define CF8_BDF(cf8)     (((cf8) & 0x00ffff00) >> 8)
+-#define CF8_ADDR_LO(cf8) ((cf8) & 0x000000fc)
+-#define CF8_ADDR_HI(cf8) (((cf8) & 0x0f000000) >> 16)
+-#define CF8_ENABLED(cf8) (!!((cf8) & 0x80000000))
+-
+     struct hvm_ioreq_server *s;
+     uint32_t cf8;
+     uint8_t type;
+@@ -2445,11 +2440,6 @@ static struct hvm_ioreq_server *hvm_sele
+     }
+ 
+     return d->arch.hvm_domain.default_ioreq_server;
+-
+-#undef CF8_ADDR_ENABLED
+-#undef CF8_ADDR_HI
+-#undef CF8_ADDR_LO
+-#undef CF8_BDF
+ }
+ 
+ int hvm_buffered_io_send(ioreq_t *p)
+--- sle12sp1.orig/xen/include/asm-x86/pci.h	2015-07-08 14:13:16.000000000 +0200
+++ sle12sp1/xen/include/asm-x86/pci.h	2015-07-08 11:43:22.000000000 +0200
+@@ -1,6 +1,11 @@
+ #ifndef __X86_PCI_H__
+ #define __X86_PCI_H__
+ 
+#define CF8_BDF(cf8)     (  ((cf8) & 0x00ffff00) >> 8)
+#define CF8_ADDR_LO(cf8) (   (cf8) & 0x000000fc)
+#define CF8_ADDR_HI(cf8) (  ((cf8) & 0x0f000000) >> 16)
+#define CF8_ENABLED(cf8) (!!((cf8) & 0x80000000))
+
+ #define IS_SNB_GFX(id) (id == 0x01068086 || id == 0x01168086 \
+                         || id == 0x01268086 || id == 0x01028086 \
+                         || id == 0x01128086 || id == 0x01228086 \
--- a/xen.changes
+++ b/xen.changes
@ -1,3 +1,53 @@
+-------------------------------------------------------------------
+Wed Jul  8 11:38:26 MDT 2015 - carnold@suse.com
+
+- bnc#935634 - VUL-0: CVE-2015-3259: xen: XSA-137: xl command line
+  config handling stack overflow
+  CVE-2015-3259-xsa137.patch
+- Upstream patches from Jan
+  558bfaa0-x86-traps-avoid-using-current-too-early.patch
+  5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch
+  559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch
+  559bdde5-pull-in-latest-linux-earlycpio.patch
+- Upstream patches from Jan pending review
+  552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch
+  552d0fe8-x86-mtrr-include-asm-atomic.h.patch
+  552d293b-x86-vMSI-X-honor-all-mask-requests.patch
+  552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch
+  554c7aee-x86-provide-arch_fetch_and_add.patch
+  554c7b00-arm-provide-arch_fetch_and_add.patch
+  55534b0a-x86-provide-add_sized.patch
+  55534b25-arm-provide-add_sized.patch
+  5555a4f8-use-ticket-locks-for-spin-locks.patch
+  5555a5b9-x86-arm-remove-asm-spinlock-h.patch
+  5555a8ec-introduce-non-contiguous-allocation.patch
+  55795a52-x86-vMSI-X-support-qword-MMIO-access.patch
+  557eb55f-gnttab-per-active-entry-locking.patch
+  557eb5b6-gnttab-introduce-maptrack-lock.patch
+  557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch
+  557ffab8-evtchn-factor-out-freeing-an-event-channel.patch
+  5582bf43-evtchn-simplify-port_is_valid.patch
+  5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch
+  5583d9c5-x86-MSI-X-cleanup.patch
+  5583da09-x86-MSI-track-host-and-guest-masking-separately.patch
+  5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch
+  5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch
+  5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch
+  5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch
+  5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch
+  5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch
+  x86-MSI-pv-unmask.patch
+  x86-pci_cfg_okay.patch
+  x86-PCI-CFG-write-intercept.patch
+  x86-MSI-X-maskall.patch
+  x86-MSI-X-teardown.patch
+  x86-MSI-X-enable.patch
+  x86-MSI-mask.patch
+- Dropped
+  qemu-MSI-X-enable-maskall.patch
+  qemu-MSI-X-latch-writes.patch
+  x86-MSI-X-guest-mask.patch
+
 -------------------------------------------------------------------
 Tue Jun 30 08:25:35 MDT 2015 - carnold@suse.com

--- a/xen.spec
+++ b/xen.spec
@ -90,6 +90,7 @@ BuildRequires:  dev86
 #!BuildIgnore:  gcc-PIE
 BuildRequires:  bison
 BuildRequires:  fdupes
+BuildRequires:  figlet
 BuildRequires:  flex
 BuildRequires:  glib2-devel
 BuildRequires:  libaio-devel
@ -201,13 +202,20 @@ Source99:       baselibs.conf
 # http://xenbits.xensource.com/ext/xenalyze
 Source20000:    xenalyze.hg.tar.bz2
 # Upstream patches
-Patch1:         551ac326-xentop-add-support-for-qdisk.patch
-Patch2:         5548e903-domctl-don-t-truncate-XEN_DOMCTL_max_mem-requests.patch
-Patch3:         554cc211-libxl-add-qxl.patch
-Patch4:         556d973f-unmodified-drivers-tolerate-IRQF_DISABLED-being-undefined.patch
-Patch5:         5576f178-kexec-add-more-pages-to-v1-environment.patch
-Patch6:         55780be1-x86-EFI-adjust-EFI_MEMORY_WP-handling-for-spec-version-2.5.patch
+Patch1:         55103616-vm-assist-prepare-for-discontiguous-used-bit-numbers.patch
+Patch2:         551ac326-xentop-add-support-for-qdisk.patch
+Patch3:         5548e903-domctl-don-t-truncate-XEN_DOMCTL_max_mem-requests.patch
+Patch4:         5548e95d-x86-allow-to-suppress-M2P-user-mode-exposure.patch
+Patch5:         554cc211-libxl-add-qxl.patch
+Patch6:         556d973f-unmodified-drivers-tolerate-IRQF_DISABLED-being-undefined.patch
+Patch7:         5576f178-kexec-add-more-pages-to-v1-environment.patch
+Patch8:         55780be1-x86-EFI-adjust-EFI_MEMORY_WP-handling-for-spec-version-2.5.patch
+Patch9:         558bfaa0-x86-traps-avoid-using-current-too-early.patch
+Patch10:        5592a116-nested-EPT-fix-the-handling-of-nested-EPT.patch
+Patch11:        559b9dd6-x86-p2m-ept-don-t-unmap-in-use-EPT-pagetable.patch
+Patch12:        559bdde5-pull-in-latest-linux-earlycpio.patch
 Patch131:       CVE-2015-4106-xsa131-9.patch
+Patch137:       CVE-2015-3259-xsa137.patch
 # Upstream qemu
 Patch250:       VNC-Support-for-ExtendedKeyEvent-client-message.patch
 Patch251:       0001-net-move-the-tap-buffer-into-TAPState.patch
@ -218,15 +226,6 @@ Patch255:       0005-e1000-multi-buffer-packet-support.patch
 Patch256:       0006-e1000-clear-EOP-for-multi-buffer-descriptors.patch
 Patch257:       0007-e1000-verify-we-have-buffers-upfront.patch
 Patch258:       0008-e1000-check-buffer-availability.patch
-# Extra patches pending review
-Patch150:       55103616-vm-assist-prepare-for-discontiguous-used-bit-numbers.patch
-Patch151:       5548e95d-x86-allow-to-suppress-M2P-user-mode-exposure.patch
-Patch156:       x86-MSI-X-teardown.patch
-Patch157:       x86-MSI-X-enable.patch
-Patch158:       x86-MSI-X-guest-mask.patch
-Patch159:       x86-MSI-X-maskall.patch
-Patch160:       qemu-MSI-X-latch-writes.patch
-Patch161:       qemu-MSI-X-enable-maskall.patch
 # Our platform specific patches
 Patch301:       xen-destdir.patch
 Patch302:       vif-bridge-no-iptables.patch
@ -309,6 +308,40 @@ Patch605:       xen.build-compare.vgabios.patch
 Patch606:       xen.build-compare.seabios.patch
 Patch607:       xen.build-compare.man.patch
 Patch608:       ipxe-no-error-logical-not-parentheses.patch
+# Extra patches pending review
+Patch801:       552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch
+Patch802:       552d0fe8-x86-mtrr-include-asm-atomic.h.patch
+Patch803:       552d293b-x86-vMSI-X-honor-all-mask-requests.patch
+Patch804:       552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch
+Patch805:       554c7aee-x86-provide-arch_fetch_and_add.patch
+Patch806:       554c7b00-arm-provide-arch_fetch_and_add.patch
+Patch807:       55534b0a-x86-provide-add_sized.patch
+Patch808:       55534b25-arm-provide-add_sized.patch
+Patch809:       5555a4f8-use-ticket-locks-for-spin-locks.patch
+Patch810:       5555a5b9-x86-arm-remove-asm-spinlock-h.patch
+Patch811:       5555a8ec-introduce-non-contiguous-allocation.patch
+Patch812:       55795a52-x86-vMSI-X-support-qword-MMIO-access.patch
+Patch813:       557eb55f-gnttab-per-active-entry-locking.patch
+Patch814:       557eb5b6-gnttab-introduce-maptrack-lock.patch
+Patch815:       557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch
+Patch816:       557ffab8-evtchn-factor-out-freeing-an-event-channel.patch
+Patch817:       5582bf43-evtchn-simplify-port_is_valid.patch
+Patch818:       5582bf81-evtchn-remove-the-locking-when-unmasking-an-event-channel.patch
+Patch819:       5583d9c5-x86-MSI-X-cleanup.patch
+Patch820:       5583da09-x86-MSI-track-host-and-guest-masking-separately.patch
+Patch821:       5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch
+Patch822:       5583da8c-gnttab-steal-maptrack-entries-from-other-VCPUs.patch
+Patch823:       5587d711-evtchn-clear-xen_consumer-when-clearing-state.patch
+Patch824:       5587d779-evtchn-defer-freeing-struct-evtchn-s-until-evtchn_destroy_final.patch
+Patch825:       5587d7b7-evtchn-use-a-per-event-channel-lock-for-sending-events.patch
+Patch826:       5587d7e2-evtchn-pad-struct-evtchn-to-64-bytes.patch
+Patch850:       x86-MSI-pv-unmask.patch
+Patch851:       x86-pci_cfg_okay.patch
+Patch852:       x86-PCI-CFG-write-intercept.patch
+Patch853:       x86-MSI-X-maskall.patch
+Patch854:       x86-MSI-X-teardown.patch
+Patch855:       x86-MSI-X-enable.patch
+Patch856:       x86-MSI-mask.patch
 # Build patches
 Patch99996:     xen.stubdom.newlib.patch
 Patch99998:     tmp_build.patch
@ -521,7 +554,14 @@ Authors:
 %patch4 -p1
 %patch5 -p1
 %patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
 %patch131 -p1
+%patch137 -p1
 # Upstream qemu patches
 %patch250 -p1
 %patch251 -p1
@ -532,15 +572,6 @@ Authors:
 %patch256 -p1
 %patch257 -p1
 %patch258 -p1
-# Extra patches pending review
-%patch150 -p1
-%patch151 -p1
-%patch156 -p1
-%patch157 -p1
-%patch158 -p1
-%patch159 -p1
-%patch160 -p1
-%patch161 -p1
 # Our platform specific patches
 %patch301 -p1
 %patch302 -p1
@ -622,6 +653,40 @@ Authors:
 %patch606 -p1
 %patch607 -p1
 %patch608 -p1
+# Extra patches pending review
+%patch801 -p1
+%patch802 -p1
+%patch803 -p1
+%patch804 -p1
+%patch805 -p1
+%patch806 -p1
+%patch807 -p1
+%patch808 -p1
+%patch809 -p1
+%patch810 -p1
+%patch811 -p1
+%patch812 -p1
+%patch813 -p1
+%patch814 -p1
+%patch815 -p1
+%patch816 -p1
+%patch817 -p1
+%patch818 -p1
+%patch819 -p1
+%patch820 -p1
+%patch821 -p1
+%patch822 -p1
+%patch823 -p1
+%patch824 -p1
+%patch825 -p1
+%patch826 -p1
+%patch850 -p1
+%patch851 -p1
+%patch852 -p1
+%patch853 -p1
+%patch854 -p1
+%patch855 -p1
+%patch856 -p1
 # Build patches
 %patch99996 -p1
 %patch99998 -p1