Accepting request 204765 from Virtualization

Security and other fixes for os13.1 RC2 OBS-URL: https://build.opensuse.org/request/show/204765 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/xen?expand=0&rev=179
2013-10-25 11:38:55 +00:00 · 2013-10-25 11:38:55 +00:00 · 1add73c8a5
commit 1add73c8a5
parent 79960454a5 b244ce9e91
26 changed files with 1902 additions and 15 deletions
--- a/52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch
+++ b/52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch
@ -1,5 +1,9 @@
 References: bnc#840592 CVE-2013-4355 XSA-63

+# Commit 6bb838e7375f5b031e9ac346b353775c90de45dc
+# Date 2013-09-30 14:17:46 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
 x86: properly handle hvm_copy_from_guest_{phys,virt}() errors

 Ignoring them generally implies using uninitialized data and, in all
--- a/52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch
+++ b/52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch
@ -1,5 +1,9 @@
 References: bnc#840593 CVE-2013-4356 XSA-64

+# Commit f46befdd825c8a459c5eb21adb7d5b0dc6e30ad5
+# Date 2013-09-30 14:18:25 +0200
+# Author Tim Deegan <tim@xen.org>
+# Committer Jan Beulich <jbeulich@suse.com>
 x86/mm/shadow: Fix initialization of PV shadow L4 tables.

 Shadowed PV L4 tables must have the same Xen mappings as their
@ -23,9 +27,7 @@ old name (with its new semantics).

 This is CVE-2013-4356 / XSA-64.

-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
 Signed-off-by: Tim Deegan <tim@xen.org>
-Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
 Reviewed-by: Jan Beulich <jbeulich@suse.com>

 --- a/xen/arch/x86/mm/shadow/multi.c
--- a/52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch
+++ b/52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch
@ -1,5 +1,9 @@
 References: bnc#841766 CVE-2013-4361 XSA-66

+# Commit 28b706efb6abb637fabfd74cde70a50935a5640b
+# Date 2013-09-30 14:18:58 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
 x86: properly set up fbld emulation operand address

 This is CVE-2013-4361 / XSA-66.
--- a/52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch
+++ b/52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch
@ -0,0 +1,116 @@
+# Commit ca145fe70bad3a25ad54c6ded1ef237e45a2311e
+# Date 2013-09-30 15:28:12 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: don't blindly create L3 tables for the direct map
+
+Now that the direct map area can extend all the way up to almost the
+end of address space, this is wasteful.
+
+Also fold two almost redundant messages in SRAT parsing into one.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Malcolm Crossley <malcolm.crossley@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
+@@ -137,7 +137,7 @@ l1_pgentry_t __attribute__ ((__section__
+ #define PTE_UPDATE_WITH_CMPXCHG
+ #endif
+ 
+-bool_t __read_mostly mem_hotplug = 0;
+paddr_t __read_mostly mem_hotplug;
+ 
+ /* Private domain structs for DOMID_XEN and DOMID_IO. */
+ struct domain *dom_xen, *dom_io, *dom_cow;
+--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
+@@ -113,6 +113,7 @@ static __init void bad_srat(void)
+ 		apicid_to_node[i] = NUMA_NO_NODE;
+ 	for (i = 0; i < ARRAY_SIZE(pxm2node); i++)
+ 		pxm2node[i] = NUMA_NO_NODE;
+	mem_hotplug = 0;
+ }
+ 
+ /*
+@@ -257,13 +258,6 @@ acpi_numa_memory_affinity_init(struct ac
+ 		return;
+ 	}
+ 	/* It is fine to add this area to the nodes data it will be used later*/
+-	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
+-	{
+-		printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n",
+-				start, end);
+-		mem_hotplug = 1;
+-	}
+-
+ 	i = conflicting_memblks(start, end);
+ 	if (i == node) {
+ 		printk(KERN_WARNING
+@@ -287,8 +281,11 @@ acpi_numa_memory_affinity_init(struct ac
+ 		if (nd->end < end)
+ 			nd->end = end;
+ 	}
+-	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm,
+-	       start, end);
+	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && end > mem_hotplug)
+		mem_hotplug = end;
+	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"%s\n",
+	       node, pxm, start, end,
+	       ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
+ 
+ 	node_memblk_range[num_node_memblks].start = start;
+ 	node_memblk_range[num_node_memblks].end = end;
+--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
+@@ -621,25 +621,20 @@ void __init paging_init(void)
+      * We setup the L3s for 1:1 mapping if host support memory hotplug
+      * to avoid sync the 1:1 mapping on page fault handler
+      */
+-    if ( mem_hotplug )
+    for ( va = DIRECTMAP_VIRT_START;
+          va < DIRECTMAP_VIRT_END && (void *)va < __va(mem_hotplug);
+          va += (1UL << L4_PAGETABLE_SHIFT) )
+     {
+-        unsigned long va;
+-
+-        for ( va = DIRECTMAP_VIRT_START;
+-              va < DIRECTMAP_VIRT_END;
+-              va += (1UL << L4_PAGETABLE_SHIFT) )
+        if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) &
+              _PAGE_PRESENT) )
+         {
+-            if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) &
+-                  _PAGE_PRESENT) )
+-            {
+-                l3_pg = alloc_domheap_page(NULL, 0);
+-                if ( !l3_pg )
+-                    goto nomem;
+-                l3_ro_mpt = page_to_virt(l3_pg);
+-                clear_page(l3_ro_mpt);
+-                l4e_write(&idle_pg_table[l4_table_offset(va)],
+-                  l4e_from_page(l3_pg, __PAGE_HYPERVISOR));
+-            }
+            l3_pg = alloc_domheap_page(NULL, 0);
+            if ( !l3_pg )
+                goto nomem;
+            l3_ro_mpt = page_to_virt(l3_pg);
+            clear_page(l3_ro_mpt);
+            l4e_write(&idle_pg_table[l4_table_offset(va)],
+                      l4e_from_page(l3_pg, __PAGE_HYPERVISOR));
+         }
+     }
+ 
+--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
+@@ -399,7 +399,7 @@ static inline int get_page_and_type(stru
+ int check_descriptor(const struct domain *, struct desc_struct *d);
+ 
+ extern bool_t opt_allow_superpage;
+-extern bool_t mem_hotplug;
+extern paddr_t mem_hotplug;
+ 
+ /******************************************************************************
+  * With shadow pagetables, the different kinds of address start 
--- a/524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch
+++ b/524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch
@ -0,0 +1,82 @@
+# Commit 0aa27ce3351f7eb09d13e863a1d5f303086aa32a
+# Date 2013-10-04 12:23:23 +0200
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/idle: Fix get_cpu_idle_time()'s interaction with offline pcpus
+
+Checking for "idle_vcpu[cpu] != NULL" is insufficient protection against
+offline pcpus.  From a hypercall, vcpu_runstate_get() will determine "v !=
+current", and try to take the vcpu_schedule_lock().  This will try to look up
+per_cpu(schedule_data, v->processor) and promptly suffer a NULL structure
+deference as v->processors' __per_cpu_offset is INVALID_PERCPU_AREA.
+
+One example might look like this:
+
+...
+Xen call trace:
+   [<ffff82c4c0126ddb>] vcpu_runstate_get+0x50/0x113
+   [<ffff82c4c0126ec6>] get_cpu_idle_time+0x28/0x2e
+   [<ffff82c4c012b5cb>] do_sysctl+0x3db/0xeb8
+   [<ffff82c4c023280d>] compat_hypercall+0xbd/0x116
+
+Pagetable walk from 0000000000000040:
+ L4[0x000] = 0000000186df8027 0000000000028207
+ L3[0x000] = 0000000188e36027 00000000000261c9
+ L2[0x000] = 0000000000000000 ffffffffffffffff
+
+****************************************
+Panic on CPU 11:
+...
+
+get_cpu_idle_time() has been updated to correctly deal with offline pcpus
+itself by returning 0, in the same way as it would if it was missing the
+idle_vcpu[] pointer.
+
+In doing so, XENPF_getidletime needed updating to correctly retain its
+described behaviour of clearing bits in the cpumap for offline pcpus.
+
+As this crash can only be triggered with toolstack hypercalls, it is not a
+security issue and just a simple bug.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
+@@ -355,10 +355,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PA
+ 
+         for_each_cpu ( cpu, cpumap )
+         {
+-            if ( idle_vcpu[cpu] == NULL )
+-                cpumask_clear_cpu(cpu, cpumap);
+             idletime = get_cpu_idle_time(cpu);
+ 
+            if ( !idletime )
+            {
+                cpumask_clear_cpu(cpu, cpumap);
+                continue;
+            }
+
+             if ( copy_to_guest_offset(idletimes, cpu, &idletime, 1) )
+             {
+                 ret = -EFAULT;
+--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
+@@ -176,13 +176,12 @@ void vcpu_runstate_get(struct vcpu *v, s
+ 
+ uint64_t get_cpu_idle_time(unsigned int cpu)
+ {
+-    struct vcpu_runstate_info state;
+-    struct vcpu *v;
+    struct vcpu_runstate_info state = { 0 };
+    struct vcpu *v = idle_vcpu[cpu];
+ 
+-    if ( (v = idle_vcpu[cpu]) == NULL )
+-        return 0;
+    if ( cpu_online(cpu) && v )
+        vcpu_runstate_get(v, &state);
+ 
+-    vcpu_runstate_get(v, &state);
+     return state.time[RUNSTATE_running];
+ }
+ 
--- a/524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch
+++ b/524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch
@ -0,0 +1,35 @@
+# Commit 7cfb0053629c4dd1a6f01dc43cca7c0c25b8b7bf
+# Date 2013-10-04 12:24:34 +0200
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/percpu: Force INVALID_PERCPU_AREA into the non-canonical address region
+
+This causes accidental uses of per_cpu() on a pcpu with an INVALID_PERCPU_AREA
+to result in a #GF for attempting to access the middle of the non-canonical
+virtual address region.
+
+This is preferable to the current behaviour, where incorrect use of per_cpu()
+will result in an effective NULL structure dereference which has security
+implication in the context of PV guests.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/percpu.c
+++ b/xen/arch/x86/percpu.c
+@@ -6,7 +6,14 @@
+ #include <xen/rcupdate.h>
+ 
+ unsigned long __per_cpu_offset[NR_CPUS];
+-#define INVALID_PERCPU_AREA (-(long)__per_cpu_start)
+
+/*
+ * Force uses of per_cpu() with an invalid area to attempt to access the
+ * middle of the non-canonical address space resulting in a #GP, rather than a
+ * possible #PF at (NULL + a little) which has security implications in the
+ * context of PV guests.
+ */
+#define INVALID_PERCPU_AREA (0x8000000000000000L - (long)__per_cpu_start)
+ #define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start))
+ 
+ void __init percpu_init_areas(void)
--- a/524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch
+++ b/524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch
@ -0,0 +1,82 @@
+# Commit 190b667ac20e8175758f4a3a0f13c4d990e6af7e
+# Date 2013-10-04 12:28:14 +0200
+# Author Yang Zhang <yang.z.zhang@Intel.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+Nested VMX: check VMX capability before read VMX related MSRs
+
+VMX MSRs only available when the CPU support the VMX feature. In addition,
+VMX_TRUE* MSRs only available when bit 55 of VMX_BASIC MSR is set.
+
+Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
+
+Cleanup.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Jun Nakajima <jun.nakajima@intel.com>
+
+--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
+@@ -78,6 +78,7 @@ static DEFINE_PER_CPU(struct list_head, 
+ static DEFINE_PER_CPU(bool_t, vmxon);
+ 
+ static u32 vmcs_revision_id __read_mostly;
+u64 __read_mostly vmx_basic_msr;
+ 
+ static void __init vmx_display_features(void)
+ {
+@@ -301,6 +302,8 @@ static int vmx_init_vmcs_config(void)
+         vmx_vmexit_control         = _vmx_vmexit_control;
+         vmx_vmentry_control        = _vmx_vmentry_control;
+         cpu_has_vmx_ins_outs_instr_info = !!(vmx_basic_msr_high & (1U<<22));
+        vmx_basic_msr              = ((u64)vmx_basic_msr_high << 32) |
+                                     vmx_basic_msr_low;
+         vmx_display_features();
+     }
+     else
+--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
+@@ -1814,12 +1814,33 @@ int nvmx_handle_invvpid(struct cpu_user_
+ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+ {
+     struct vcpu *v = current;
+    unsigned int ecx, dummy;
+     u64 data = 0, host_data = 0;
+     int r = 1;
+ 
+     if ( !nestedhvm_enabled(v->domain) )
+         return 0;
+ 
+    /* VMX capablity MSRs are available only when guest supports VMX. */
+    hvm_cpuid(0x1, &dummy, &dummy, &ecx, &dummy);
+    if ( !(ecx & cpufeat_mask(X86_FEATURE_VMXE)) )
+        return 0;
+
+    /*
+     * Those MSRs are available only when bit 55 of
+     * MSR_IA32_VMX_BASIC is set.
+     */
+    switch ( msr )
+    {
+    case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+    case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+    case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+    case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+        if ( !(vmx_basic_msr & VMX_BASIC_DEFAULT1_ZERO) )
+            return 0;
+        break;
+    }
+
+     rdmsrl(msr, host_data);
+ 
+     /*
+--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
+@@ -284,6 +284,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
+  */
+ #define VMX_BASIC_DEFAULT1_ZERO		(1ULL << 55)
+ 
+extern u64 vmx_basic_msr;
+
+ /* Guest interrupt status */
+ #define VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK  0x0FF
+ #define VMX_GUEST_INTR_STATUS_SVI_OFFSET        8
--- a/524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch
+++ b/524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch
@ -0,0 +1,115 @@
+# Commit c6f92aed0e209df823d2cb5780dbb1ea12fc6d4a
+# Date 2013-10-04 12:30:09 +0200
+# Author Yang Zhang <yang.z.zhang@Intel.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+Nested VMX: fix IA32_VMX_CR4_FIXED1 msr emulation
+
+Currently, it use hardcode value for IA32_VMX_CR4_FIXED1. This is wrong.
+We should check guest's cpuid to know which bits are writeable in CR4 by guest
+and allow the guest to set the corresponding bit only when guest has the feature.
+
+Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
+
+Cleanup.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Jun Nakajima <jun.nakajima@intel.com>
+
+--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
+@@ -1814,7 +1814,7 @@ int nvmx_handle_invvpid(struct cpu_user_
+ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+ {
+     struct vcpu *v = current;
+-    unsigned int ecx, dummy;
+    unsigned int eax, ebx, ecx, edx, dummy;
+     u64 data = 0, host_data = 0;
+     int r = 1;
+ 
+@@ -1822,7 +1822,7 @@ int nvmx_msr_read_intercept(unsigned int
+         return 0;
+ 
+     /* VMX capablity MSRs are available only when guest supports VMX. */
+-    hvm_cpuid(0x1, &dummy, &dummy, &ecx, &dummy);
+    hvm_cpuid(0x1, &dummy, &dummy, &ecx, &edx);
+     if ( !(ecx & cpufeat_mask(X86_FEATURE_VMXE)) )
+         return 0;
+ 
+@@ -1946,8 +1946,55 @@ int nvmx_msr_read_intercept(unsigned int
+         data = X86_CR4_VMXE;
+         break;
+     case MSR_IA32_VMX_CR4_FIXED1:
+-        /* allow 0-settings except SMXE */
+-        data = 0x267ff & ~X86_CR4_SMXE;
+        if ( edx & cpufeat_mask(X86_FEATURE_VME) )
+            data |= X86_CR4_VME | X86_CR4_PVI;
+        if ( edx & cpufeat_mask(X86_FEATURE_TSC) )
+            data |= X86_CR4_TSD;
+        if ( edx & cpufeat_mask(X86_FEATURE_DE) )
+            data |= X86_CR4_DE;
+        if ( edx & cpufeat_mask(X86_FEATURE_PSE) )
+            data |= X86_CR4_PSE;
+        if ( edx & cpufeat_mask(X86_FEATURE_PAE) )
+            data |= X86_CR4_PAE;
+        if ( edx & cpufeat_mask(X86_FEATURE_MCE) )
+            data |= X86_CR4_MCE;
+        if ( edx & cpufeat_mask(X86_FEATURE_PGE) )
+            data |= X86_CR4_PGE;
+        if ( edx & cpufeat_mask(X86_FEATURE_FXSR) )
+            data |= X86_CR4_OSFXSR;
+        if ( edx & cpufeat_mask(X86_FEATURE_XMM) )
+            data |= X86_CR4_OSXMMEXCPT;
+        if ( ecx & cpufeat_mask(X86_FEATURE_VMXE) )
+            data |= X86_CR4_VMXE;
+        if ( ecx & cpufeat_mask(X86_FEATURE_SMXE) )
+            data |= X86_CR4_SMXE;
+        if ( ecx & cpufeat_mask(X86_FEATURE_PCID) )
+            data |= X86_CR4_PCIDE;
+        if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
+            data |= X86_CR4_OSXSAVE;
+
+        hvm_cpuid(0x0, &eax, &dummy, &dummy, &dummy);
+        switch ( eax )
+        {
+        default:
+            hvm_cpuid(0xa, &eax, &dummy, &dummy, &dummy);
+            /* Check whether guest has the perf monitor feature. */
+            if ( (eax & 0xff) && (eax & 0xff00) )
+                data |= X86_CR4_PCE;
+            /* fall through */
+        case 0x7 ... 0x9:
+            ecx = 0;
+            hvm_cpuid(0x7, &dummy, &ebx, &ecx, &dummy);
+            if ( ebx & cpufeat_mask(X86_FEATURE_FSGSBASE) )
+                data |= X86_CR4_FSGSBASE;
+            if ( ebx & cpufeat_mask(X86_FEATURE_SMEP) )
+                data |= X86_CR4_SMEP;
+            if ( ebx & cpufeat_mask(X86_FEATURE_SMAP) )
+                data |= X86_CR4_SMAP;
+            /* fall through */
+        case 0x0 ... 0x6:
+            break;
+        }
+         break;
+     case MSR_IA32_VMX_MISC:
+         /* Do not support CR3-target feature now */
+--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
+@@ -148,6 +148,7 @@
+ #define X86_FEATURE_INVPCID	(7*32+10) /* Invalidate Process Context ID */
+ #define X86_FEATURE_RTM 	(7*32+11) /* Restricted Transactional Memory */
+ #define X86_FEATURE_NO_FPU_SEL 	(7*32+13) /* FPU CS/DS stored as zero */
+#define X86_FEATURE_SMAP	(7*32+20) /* Supervisor Mode Access Prevention */
+ 
+ #define cpu_has(c, bit)		test_bit(bit, (c)->x86_capability)
+ #define boot_cpu_has(bit)	test_bit(bit, boot_cpu_data.x86_capability)
+--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
+@@ -87,6 +87,7 @@
+ #define X86_CR4_PCIDE		0x20000 /* enable PCID */
+ #define X86_CR4_OSXSAVE	0x40000 /* enable XSAVE/XRSTOR */
+ #define X86_CR4_SMEP		0x100000/* enable SMEP */
+#define X86_CR4_SMAP		0x200000/* enable SMAP */
+ 
+ /*
+  * Trap/fault mnemonics.
--- a/524e9dc0-xsm-forbid-PV-guest-console-reads.patch
+++ b/524e9dc0-xsm-forbid-PV-guest-console-reads.patch
@ -0,0 +1,28 @@
+# Commit 65ba631bcb62c79eb33ebfde8a0471fd012c37a8
+# Date 2013-10-04 12:51:44 +0200
+# Author Daniel De Graaf <dgdegra@tycho.nsa.gov>
+# Committer Jan Beulich <jbeulich@suse.com>
+xsm: forbid PV guest console reads
+
+The CONSOLEIO_read operation was incorrectly allowed to PV guests if the
+hypervisor was compiled in debug mode (with VERBOSE defined).
+
+Reported-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
+
+--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
+@@ -222,10 +222,10 @@ static XSM_INLINE int xsm_console_io(XSM
+ {
+     XSM_ASSERT_ACTION(XSM_OTHER);
+ #ifdef VERBOSE
+-    return xsm_default_action(XSM_HOOK, current->domain, NULL);
+-#else
+-    return xsm_default_action(XSM_PRIV, current->domain, NULL);
+    if ( cmd == CONSOLEIO_write )
+        return xsm_default_action(XSM_HOOK, d, NULL);
+ #endif
+    return xsm_default_action(XSM_PRIV, d, NULL);
+ }
+ 
+ static XSM_INLINE int xsm_profile(XSM_DEFAULT_ARG struct domain *d, int op)
--- a/5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch
+++ b/5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch
@ -0,0 +1,43 @@
+References: bnc#842511 CVE-2013-4368 XSA-67
+
+# Commit 0771faba163769089c9f05f7f76b63e397677613
+# Date 2013-10-10 15:19:53 +0200
+# Author Matthew Daley <mattjd@gmail.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: check segment descriptor read result in 64-bit OUTS emulation
+
+When emulating such an operation from a 64-bit context (CS has long
+mode set), and the data segment is overridden to FS/GS, the result of
+reading the overridden segment's descriptor (read_descriptor) is not
+checked. If it fails, data_base is left uninitialized.
+
+This can lead to 8 bytes of Xen's stack being leaked to the guest
+(implicitly, i.e. via the address given in a #PF).
+
+Coverity-ID: 1055116
+
+This is CVE-2013-4368 / XSA-67.
+
+Signed-off-by: Matthew Daley <mattjd@gmail.com>
+
+Fix formatting.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
+@@ -1990,10 +1990,10 @@ static int emulate_privileged_op(struct 
+                     break;
+                 }
+             }
+-            else
+-                read_descriptor(data_sel, v, regs,
+-                                &data_base, &data_limit, &ar,
+-                                0);
+            else if ( !read_descriptor(data_sel, v, regs,
+                                       &data_base, &data_limit, &ar, 0) ||
+                      !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
+                goto fail;
+             data_limit = ~0UL;
+             ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
+         }
--- a/5256be57-libxl-fix-vif-rate-parsing.patch
+++ b/5256be57-libxl-fix-vif-rate-parsing.patch
@ -0,0 +1,71 @@
+References: bnc#842512 CVE-2013-4369 XSA-68
+
+# Commit c53702cee1d6f9f1b72f0cae0b412e21bcda8724
+# Date 2013-10-10 15:48:55 +0100
+# Author Ian Jackson <ian.jackson@eu.citrix.com>
+# Committer Ian Jackson <Ian.Jackson@eu.citrix.com>
+libxl: fix vif rate parsing
+
+strtok can return NULL here. We don't need to use strtok anyway, so just
+use a simple strchr method.
+
+Coverity-ID: 1055642
+
+This is CVE-2013-4369 / XSA-68
+
+Signed-off-by: Matthew Daley <mattjd@gmail.com>
+
+Fix type. Add test case
+
+Signed-off-by: Ian Campbell <Ian.campbell@citrix.com>
+
+--- a/tools/libxl/check-xl-vif-parse
+++ b/tools/libxl/check-xl-vif-parse
+@@ -206,4 +206,8 @@ expected </dev/null
+ one $e rate=4294967295GB/s@5us
+ one $e rate=4296MB/s@4294s
+ 
+# test include of single '@'
+expected </dev/null
+one $e rate=@
+
+ complete
+--- a/tools/libxl/libxlu_vif.c
+++ b/tools/libxl/libxlu_vif.c
+@@ -95,23 +95,30 @@ int xlu_vif_parse_rate(XLU_Config *cfg, 
+     uint64_t bytes_per_sec = 0;
+     uint64_t bytes_per_interval = 0;
+     uint32_t interval_usecs = 50000UL; /* Default to 50ms */
+-    char *ratetok, *tmprate;
+    char *p, *tmprate;
+     int rc = 0;
+ 
+     tmprate = strdup(rate);
+    if (tmprate == NULL) {
+        rc = ENOMEM;
+        goto out;
+    }
+
+    p = strchr(tmprate, '@');
+    if (p != NULL)
+        *p++ = 0;
+
+     if (!strcmp(tmprate,"")) {
+         xlu__vif_err(cfg, "no rate specified", rate);
+         rc = EINVAL;
+         goto out;
+     }
+ 
+-    ratetok = strtok(tmprate, "@");
+-    rc = vif_parse_rate_bytes_per_sec(cfg, ratetok, &bytes_per_sec);
+    rc = vif_parse_rate_bytes_per_sec(cfg, tmprate, &bytes_per_sec);
+     if (rc) goto out;
+ 
+-    ratetok = strtok(NULL, "@");
+-    if (ratetok != NULL) {
+-        rc = vif_parse_rate_interval_usecs(cfg, ratetok, &interval_usecs);
+    if (p != NULL) {
+        rc = vif_parse_rate_interval_usecs(cfg, p, &interval_usecs);
+         if (rc) goto out;
+     }
+ 
--- a/5256be84-tools-ocaml-fix-erroneous-free-of-cpumap-in-stub_xc_vcpu_getaffinity.patch
+++ b/5256be84-tools-ocaml-fix-erroneous-free-of-cpumap-in-stub_xc_vcpu_getaffinity.patch
@ -0,0 +1,28 @@
+References: bnc#842513 CVE-2013-4370 XSA-69
+
+# Commit 3cd10fd21220f2b814324e6e732004f8f0487d0a
+# Date 2013-10-10 15:49:40 +0100
+# Author Matthew Daley <mattjd@gmail.com>
+# Committer Ian Jackson <Ian.Jackson@eu.citrix.com>
+tools/ocaml: fix erroneous free of cpumap in stub_xc_vcpu_getaffinity
+
+Not sure how it got there...
+
+Coverity-ID: 1056196
+
+This is CVE-2013-4370 / XSA-69
+
+Signed-off-by: Matthew Daley <mattjd@gmail.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
+@@ -461,8 +461,6 @@ CAMLprim value stub_xc_vcpu_getaffinity(
+ 
+ 	retval = xc_vcpu_getaffinity(_H(xch), _D(domid),
+ 	                             Int_val(vcpu), c_cpumap);
+-	free(c_cpumap);
+-
+ 	if (retval < 0) {
+ 		free(c_cpumap);
+ 		failwith_xc(_H(xch));
--- a/5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch
+++ b/5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch
@ -0,0 +1,28 @@
+References: bnc#842514 CVE-2013-4371 XSA-70
+
+# Commit 4c37ed562224295c0f8b00211287d57cae629782
+# Date 2013-10-10 15:49:54 +0100
+# Author Matthew Daley <mattjd@gmail.com>
+# Committer Ian Jackson <Ian.Jackson@eu.citrix.com>
+libxl: fix out-of-memory error handling in libxl_list_cpupool
+
+...otherwise it will return freed memory. All the current users of this
+function check already for a NULL return, so use that.
+
+Coverity-ID: 1056194
+
+This is CVE-2013-4371 / XSA-70
+
+Signed-off-by: Matthew Daley <mattjd@gmail.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+
+--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
+@@ -649,6 +649,7 @@ libxl_cpupoolinfo * libxl_list_cpupool(l
+         if (!tmp) {
+             LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "allocating cpupool info");
+             libxl_cpupoolinfo_list_free(ptr, i);
+            ptr = NULL;
+             goto out;
+         }
+         ptr = tmp;
--- a/5257a89a-x86-correct-LDT-checks.patch
+++ b/5257a89a-x86-correct-LDT-checks.patch
@ -0,0 +1,176 @@
+# Commit 40d66baa46ca8a9ffa6df3e063a967d08ec92bcf
+# Date 2013-10-11 09:28:26 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: correct LDT checks
+
+- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as
+  possible: fail only if the base address is non-canonical
+- instead LDT descriptor accesses should fault if the descriptor
+  address ends up being non-canonical (by ensuring this we at once
+  avoid reading an entry from the mach-to-phys table and consider it a
+  page table entry)
+- fault propagation on using LDT selectors must distinguish #PF and #GP
+  (the latter must be raised for a non-canonical descriptor address,
+  which also applies to several other uses of propagate_page_fault(),
+  and hence the problem is being fixed there)
+- map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs
+
+At once remove the odd invokation of map_ldt_shadow_page() from the
+MMUEXT_SET_LDT handler: There's nothing really telling us that the
+first LDT page is going to be preferred over others.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
+@@ -674,12 +674,7 @@ int arch_set_info_guest(
+                 fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs);
+             }
+ 
+-            /* LDT safety checks. */
+-            if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) ||
+-                 (c.nat->ldt_ents > 8192) ||
+-                 !array_access_ok(c.nat->ldt_base,
+-                                  c.nat->ldt_ents,
+-                                  LDT_ENTRY_SIZE) )
+            if ( !__addr_ok(c.nat->ldt_base) )
+                 return -EINVAL;
+         }
+         else
+@@ -692,15 +687,12 @@ int arch_set_info_guest(
+ 
+             for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
+                 fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
+-
+-            /* LDT safety checks. */
+-            if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) ||
+-                 (c.cmp->ldt_ents > 8192) ||
+-                 !compat_array_access_ok(c.cmp->ldt_base,
+-                                         c.cmp->ldt_ents,
+-                                         LDT_ENTRY_SIZE) )
+-                return -EINVAL;
+         }
+
+        /* LDT safety checks. */
+        if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) ||
+             (c(ldt_ents) > 8192) )
+            return -EINVAL;
+     }
+ 
+     v->fpu_initialised = !!(flags & VGCF_I387_VALID);
+--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
+@@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off
+ 
+     BUG_ON(unlikely(in_irq()));
+ 
+    if ( is_pv_32bit_domain(d) )
+        gva = (u32)gva;
+     guest_get_eff_kern_l1e(v, gva, &l1e);
+     if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
+         return 0;
+@@ -3229,9 +3231,8 @@ long do_mmuext_op(
+                 MEM_LOG("ignoring SET_LDT hypercall from external domain");
+                 okay = 0;
+             }
+-            else if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
+-                      (ents > 8192) ||
+-                      !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
+            else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
+                      (ents > 8192) )
+             {
+                 okay = 0;
+                 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
+@@ -3244,8 +3245,6 @@ long do_mmuext_op(
+                 curr->arch.pv_vcpu.ldt_base = ptr;
+                 curr->arch.pv_vcpu.ldt_ents = ents;
+                 load_LDT(curr);
+-                if ( ents != 0 )
+-                    (void)map_ldt_shadow_page(0);
+             }
+             break;
+         }
+--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
+@@ -1070,12 +1070,24 @@ static void reserved_bit_page_fault(
+     show_execution_state(regs);
+ }
+ 
+-void propagate_page_fault(unsigned long addr, u16 error_code)
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code)
+ {
+     struct trap_info *ti;
+     struct vcpu *v = current;
+     struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
+ 
+    if ( unlikely(!is_canonical_address(addr)) )
+    {
+        ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault];
+        tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
+        tb->error_code = 0;
+        tb->cs         = ti->cs;
+        tb->eip        = ti->address;
+        if ( TI_GET_IF(ti) )
+            tb->flags |= TBF_INTERRUPT;
+        return tb;
+    }
+
+     v->arch.pv_vcpu.ctrlreg[2] = addr;
+     arch_set_cr2(v, addr);
+ 
+@@ -1102,6 +1114,8 @@ void propagate_page_fault(unsigned long 
+ 
+     if ( unlikely(error_code & PFEC_reserved_bit) )
+         reserved_bit_page_fault(addr, guest_cpu_user_regs());
+
+    return NULL;
+ }
+ 
+ static int handle_gdt_ldt_mapping_fault(
+@@ -1135,13 +1149,16 @@ static int handle_gdt_ldt_mapping_fault(
+         }
+         else
+         {
+            struct trap_bounce *tb;
+
+             /* In hypervisor mode? Leave it to the #PF handler to fix up. */
+             if ( !guest_mode(regs) )
+                 return 0;
+-            /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
+-            propagate_page_fault(
+-                curr->arch.pv_vcpu.ldt_base + offset,
+-                regs->error_code);
+            /* In guest mode? Propagate fault to guest, with adjusted %cr2. */
+            tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset,
+                                      regs->error_code);
+            if ( tb )
+                tb->error_code = ((u16)offset & ~3) | 4;
+         }
+     }
+     else
+--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
+@@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn);
+ void make_cr3(struct vcpu *v, unsigned long mfn);
+ void update_cr3(struct vcpu *v);
+ int vcpu_destroy_pagetables(struct vcpu *);
+-void propagate_page_fault(unsigned long addr, u16 error_code);
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
+ void *do_page_walk(struct vcpu *v, unsigned long addr);
+ 
+ int __sync_local_execstate(void);
+--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
+@@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
+     if ( likely(!paging_mode_translate(v->domain)) )
+     {
+         ASSERT(!paging_mode_external(v->domain));
+-        if ( __copy_from_user(eff_l1e, 
+        if ( !__addr_ok(addr) ||
+             __copy_from_user(eff_l1e,
+                               &__linear_l1_table[l1_linear_offset(addr)],
+                               sizeof(l1_pgentry_t)) != 0 )
+             *(l1_pgentry_t *)eff_l1e = l1e_empty();
--- a/5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch
+++ b/5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch
@ -0,0 +1,26 @@
+# Commit d06a0d715ec1423b6c42141ab1b0ff69a3effb56
+# Date 2013-10-11 09:29:43 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: add address validity check to guest_map_l1e()
+
+Just like for guest_get_eff_l1e() this prevents accessing as page
+tables (and with the wrong memory attribute) internal data inside Xen
+happening to be mapped with 1Gb pages.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
+@@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l
+         return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn);
+ 
+     /* Find this l1e and its enclosing l1mfn in the linear map */
+-    if ( __copy_from_user(&l2e, 
+    if ( !__addr_ok(addr) ||
+         __copy_from_user(&l2e,
+                           &__linear_l2_table[l2_linear_offset(addr)],
+                           sizeof(l2_pgentry_t)) != 0 )
+         return NULL;
--- a/5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch
+++ b/5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch
@ -0,0 +1,38 @@
+# Commit 6fd9b0361e2eb5a7f12bdd5cbf7e42c0d1937d26
+# Date 2013-10-11 09:31:16 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: check for canonical address before doing page walks
+
+... as there doesn't really exists any valid mapping for them.
+
+Particularly in the case of do_page_walk() this also avoids returning
+non-NULL for such invalid input.
+
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
+@@ -135,7 +135,7 @@ void *do_page_walk(struct vcpu *v, unsig
+     l2_pgentry_t l2e, *l2t;
+     l1_pgentry_t l1e, *l1t;
+ 
+-    if ( is_hvm_vcpu(v) )
+    if ( is_hvm_vcpu(v) || !is_canonical_address(addr) )
+         return NULL;
+ 
+     l4t = map_domain_page(mfn);
+--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
+@@ -169,6 +169,8 @@ void show_page_walk(unsigned long addr)
+     l1_pgentry_t l1e, *l1t;
+ 
+     printk("Pagetable walk from %016lx:\n", addr);
+    if ( !is_canonical_address(addr) )
+        return;
+ 
+     l4t = map_domain_page(mfn);
+     l4e = l4t[l4_table_offset(addr)];
--- a/525b95f4-scheduler-adjust-internal-locking-interface.patch
+++ b/525b95f4-scheduler-adjust-internal-locking-interface.patch
@ -0,0 +1,632 @@
+# Commit eedd60391610629b4e8a2e8278b857ff884f750d
+# Date 2013-10-14 08:57:56 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+scheduler: adjust internal locking interface
+
+Make the locking functions return the lock pointers, so they can be
+passed to the unlocking functions (which in turn can check that the
+lock is still actually providing the intended protection, i.e. the
+parameters determining which lock is the right one didn't change).
+
+Further use proper spin lock primitives rather than open coded
+local_irq_...() constructs, so that interrupts can be re-enabled as
+appropriate while spinning.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
+@@ -1170,6 +1170,7 @@ csched_runq_sort(struct csched_private *
+     struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+     struct list_head *runq, *elem, *next, *last_under;
+     struct csched_vcpu *svc_elem;
+    spinlock_t *lock;
+     unsigned long flags;
+     int sort_epoch;
+ 
+@@ -1179,7 +1180,7 @@ csched_runq_sort(struct csched_private *
+ 
+     spc->runq_sort_last = sort_epoch;
+ 
+-    pcpu_schedule_lock_irqsave(cpu, flags);
+    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+ 
+     runq = &spc->runq;
+     elem = runq->next;
+@@ -1204,7 +1205,7 @@ csched_runq_sort(struct csched_private *
+         elem = next;
+     }
+ 
+-    pcpu_schedule_unlock_irqrestore(cpu, flags);
+    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
+ }
+ 
+ static void
+@@ -1568,7 +1569,9 @@ csched_load_balance(struct csched_privat
+                  * could cause a deadlock if the peer CPU is also load
+                  * balancing and trying to lock this CPU.
+                  */
+-                if ( !pcpu_schedule_trylock(peer_cpu) )
+                spinlock_t *lock = pcpu_schedule_trylock(peer_cpu);
+
+                if ( !lock )
+                 {
+                     SCHED_STAT_CRANK(steal_trylock_failed);
+                     peer_cpu = cpumask_cycle(peer_cpu, &workers);
+@@ -1578,7 +1581,7 @@ csched_load_balance(struct csched_privat
+                 /* Any work over there to steal? */
+                 speer = cpumask_test_cpu(peer_cpu, online) ?
+                     csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) : NULL;
+-                pcpu_schedule_unlock(peer_cpu);
+                pcpu_schedule_unlock(lock, peer_cpu);
+ 
+                 /* As soon as one vcpu is found, balancing ends */
+                 if ( speer != NULL )
+--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
+@@ -881,15 +881,17 @@ csched_vcpu_insert(const struct schedule
+      */
+     if ( ! is_idle_vcpu(vc) )
+     {
+        spinlock_t *lock;
+
+         /* FIXME: Do we need the private lock here? */
+         list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
+ 
+         /* Add vcpu to runqueue of initial processor */
+-        vcpu_schedule_lock_irq(vc);
+        lock = vcpu_schedule_lock_irq(vc);
+ 
+         runq_assign(ops, vc);
+ 
+-        vcpu_schedule_unlock_irq(vc);
+        vcpu_schedule_unlock_irq(lock, vc);
+ 
+         sdom->nr_vcpus++;
+     }
+@@ -916,14 +918,16 @@ csched_vcpu_remove(const struct schedule
+ 
+     if ( ! is_idle_vcpu(vc) )
+     {
+        spinlock_t *lock;
+
+         SCHED_STAT_CRANK(vcpu_destroy);
+ 
+         /* Remove from runqueue */
+-        vcpu_schedule_lock_irq(vc);
+        lock = vcpu_schedule_lock_irq(vc);
+ 
+         runq_deassign(ops, vc);
+ 
+-        vcpu_schedule_unlock_irq(vc);
+        vcpu_schedule_unlock_irq(lock, vc);
+ 
+         /* Remove from sdom list.  Don't need a lock for this, as it's called
+          * syncronously when nothing else can happen. */
+@@ -1010,8 +1014,7 @@ csched_context_saved(const struct schedu
+ {
+     struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+     s_time_t now = NOW();
+-
+-    vcpu_schedule_lock_irq(vc);
+    spinlock_t *lock = vcpu_schedule_lock_irq(vc);
+ 
+     BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops, vc->processor));
+ 
+@@ -1037,7 +1040,7 @@ csched_context_saved(const struct schedu
+     else if ( !is_idle_vcpu(vc) )
+         update_load(ops, svc->rqd, svc, -1, now);
+ 
+-    vcpu_schedule_unlock_irq(vc);
+    vcpu_schedule_unlock_irq(lock, vc);
+ }
+ 
+ #define MAX_LOAD (1ULL<<60);
+@@ -1454,14 +1457,14 @@ csched_dom_cntl(
+                  * must never lock csched_priv.lock if we're holding a runqueue lock.
+                  * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
+                  * been disabled. */
+-                vcpu_schedule_lock(svc->vcpu);
+                spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
+ 
+                 BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
+ 
+                 svc->weight = sdom->weight;
+                 update_max_weight(svc->rqd, svc->weight, old_weight);
+ 
+-                vcpu_schedule_unlock(svc->vcpu);
+                vcpu_schedule_unlock(lock, svc->vcpu);
+             }
+         }
+     }
+@@ -1991,6 +1994,7 @@ static void init_pcpu(const struct sched
+     cpumask_set_cpu(cpu, &rqd->idle);
+     cpumask_set_cpu(cpu, &rqd->active);
+ 
+    /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
+     spin_unlock(old_lock);
+ 
+     cpumask_set_cpu(cpu, &prv->initialized);
+--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
+@@ -1350,14 +1350,16 @@ static int sedf_adjust_weights(struct cp
+             if ( EDOM_INFO(p)->weight )
+             {
+                 /* Interrupts already off */
+-                vcpu_schedule_lock(p);
+                spinlock_t *lock = vcpu_schedule_lock(p);
+
+                 EDOM_INFO(p)->period_orig = 
+                     EDOM_INFO(p)->period  = WEIGHT_PERIOD;
+                 EDOM_INFO(p)->slice_orig  =
+                     EDOM_INFO(p)->slice   = 
+                     (EDOM_INFO(p)->weight *
+                      (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
+-                vcpu_schedule_unlock(p);
+
+                vcpu_schedule_unlock(lock, p);
+             }
+         }
+     }
+@@ -1418,21 +1420,24 @@ static int sedf_adjust(const struct sche
+                 {
+                     /* (Here and everywhere in the following) IRQs are already off,
+                      * hence vcpu_spin_lock() is the one. */
+-                    vcpu_schedule_lock(v);
+                    spinlock_t *lock = vcpu_schedule_lock(v);
+
+                     EDOM_INFO(v)->extraweight = op->u.sedf.weight;
+                     EDOM_INFO(v)->weight = 0;
+                     EDOM_INFO(v)->slice = 0;
+                     EDOM_INFO(v)->period = WEIGHT_PERIOD;
+-                    vcpu_schedule_unlock(v);
+                    vcpu_schedule_unlock(lock, v);
+                 }
+             }
+             else
+             {
+                 /* Weight-driven domains with real-time execution */
+-                for_each_vcpu ( p, v ) {
+-                    vcpu_schedule_lock(v);
+                for_each_vcpu ( p, v )
+                {
+                    spinlock_t *lock = vcpu_schedule_lock(v);
+
+                     EDOM_INFO(v)->weight = op->u.sedf.weight;
+-                    vcpu_schedule_unlock(v);
+                    vcpu_schedule_unlock(lock, v);
+                 }
+             }
+         }
+@@ -1454,14 +1459,15 @@ static int sedf_adjust(const struct sche
+             /* Time-driven domains */
+             for_each_vcpu ( p, v )
+             {
+-                vcpu_schedule_lock(v);
+                spinlock_t *lock = vcpu_schedule_lock(v);
+
+                 EDOM_INFO(v)->weight = 0;
+                 EDOM_INFO(v)->extraweight = 0;
+                 EDOM_INFO(v)->period_orig = 
+                     EDOM_INFO(v)->period  = op->u.sedf.period;
+                 EDOM_INFO(v)->slice_orig  = 
+                     EDOM_INFO(v)->slice   = op->u.sedf.slice;
+-                vcpu_schedule_unlock(v);
+                vcpu_schedule_unlock(lock, v);
+             }
+         }
+ 
+@@ -1471,13 +1477,14 @@ static int sedf_adjust(const struct sche
+ 
+         for_each_vcpu ( p, v )
+         {
+-            vcpu_schedule_lock(v);
+            spinlock_t *lock = vcpu_schedule_lock(v);
+
+             EDOM_INFO(v)->status  = 
+                 (EDOM_INFO(v)->status &
+                  ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
+             EDOM_INFO(v)->latency = op->u.sedf.latency;
+             extraq_check(v);
+-            vcpu_schedule_unlock(v);
+            vcpu_schedule_unlock(lock, v);
+         }
+     }
+     else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
+--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
+@@ -160,18 +160,16 @@ static inline void vcpu_runstate_change(
+ 
+ void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
+ {
+    spinlock_t *lock = likely(v == current) ? NULL : vcpu_schedule_lock_irq(v);
+     s_time_t delta;
+ 
+-    if ( unlikely(v != current) )
+-        vcpu_schedule_lock_irq(v);
+-
+     memcpy(runstate, &v->runstate, sizeof(*runstate));
+     delta = NOW() - runstate->state_entry_time;
+     if ( delta > 0 )
+         runstate->time[runstate->state] += delta;
+ 
+-    if ( unlikely(v != current) )
+-        vcpu_schedule_unlock_irq(v);
+    if ( unlikely(lock != NULL) )
+        vcpu_schedule_unlock_irq(lock, v);
+ }
+ 
+ uint64_t get_cpu_idle_time(unsigned int cpu)
+@@ -333,8 +331,7 @@ void sched_destroy_domain(struct domain 
+ void vcpu_sleep_nosync(struct vcpu *v)
+ {
+     unsigned long flags;
+-
+-    vcpu_schedule_lock_irqsave(v, flags);
+    spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
+ 
+     if ( likely(!vcpu_runnable(v)) )
+     {
+@@ -344,7 +341,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
+         SCHED_OP(VCPU2OP(v), sleep, v);
+     }
+ 
+-    vcpu_schedule_unlock_irqrestore(v, flags);
+    vcpu_schedule_unlock_irqrestore(lock, flags, v);
+ 
+     TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
+ }
+@@ -362,8 +359,7 @@ void vcpu_sleep_sync(struct vcpu *v)
+ void vcpu_wake(struct vcpu *v)
+ {
+     unsigned long flags;
+-
+-    vcpu_schedule_lock_irqsave(v, flags);
+    spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
+ 
+     if ( likely(vcpu_runnable(v)) )
+     {
+@@ -377,7 +373,7 @@ void vcpu_wake(struct vcpu *v)
+             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+     }
+ 
+-    vcpu_schedule_unlock_irqrestore(v, flags);
+    vcpu_schedule_unlock_irqrestore(lock, flags, v);
+ 
+     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
+ }
+@@ -528,10 +524,11 @@ static void vcpu_migrate(struct vcpu *v)
+  */
+ void vcpu_force_reschedule(struct vcpu *v)
+ {
+-    vcpu_schedule_lock_irq(v);
+    spinlock_t *lock = vcpu_schedule_lock_irq(v);
+
+     if ( v->is_running )
+         set_bit(_VPF_migrating, &v->pause_flags);
+-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
+ 
+     if ( test_bit(_VPF_migrating, &v->pause_flags) )
+     {
+@@ -546,7 +543,7 @@ void restore_vcpu_affinity(struct domain
+ 
+     for_each_vcpu ( d, v )
+     {
+-        vcpu_schedule_lock_irq(v);
+        spinlock_t *lock = vcpu_schedule_lock_irq(v);
+ 
+         if ( v->affinity_broken )
+         {
+@@ -559,13 +556,13 @@ void restore_vcpu_affinity(struct domain
+         if ( v->processor == smp_processor_id() )
+         {
+             set_bit(_VPF_migrating, &v->pause_flags);
+-            vcpu_schedule_unlock_irq(v);
+            vcpu_schedule_unlock_irq(lock, v);
+             vcpu_sleep_nosync(v);
+             vcpu_migrate(v);
+         }
+         else
+         {
+-            vcpu_schedule_unlock_irq(v);
+            vcpu_schedule_unlock_irq(lock, v);
+         }
+     }
+ 
+@@ -592,7 +589,7 @@ int cpu_disable_scheduler(unsigned int c
+     {
+         for_each_vcpu ( d, v )
+         {
+-            vcpu_schedule_lock_irq(v);
+            spinlock_t *lock = vcpu_schedule_lock_irq(v);
+ 
+             cpumask_and(&online_affinity, v->cpu_affinity, c->cpu_valid);
+             if ( cpumask_empty(&online_affinity) &&
+@@ -613,13 +610,13 @@ int cpu_disable_scheduler(unsigned int c
+             if ( v->processor == cpu )
+             {
+                 set_bit(_VPF_migrating, &v->pause_flags);
+-                vcpu_schedule_unlock_irq(v);
+                vcpu_schedule_unlock_irq(lock, v);
+                 vcpu_sleep_nosync(v);
+                 vcpu_migrate(v);
+             }
+             else
+             {
+-                vcpu_schedule_unlock_irq(v);
+                vcpu_schedule_unlock_irq(lock, v);
+             }
+ 
+             /*
+@@ -646,6 +643,7 @@ int vcpu_set_affinity(struct vcpu *v, co
+ {
+     cpumask_t online_affinity;
+     cpumask_t *online;
+    spinlock_t *lock;
+ 
+     if ( v->domain->is_pinned )
+         return -EINVAL;
+@@ -654,7 +652,7 @@ int vcpu_set_affinity(struct vcpu *v, co
+     if ( cpumask_empty(&online_affinity) )
+         return -EINVAL;
+ 
+-    vcpu_schedule_lock_irq(v);
+    lock = vcpu_schedule_lock_irq(v);
+ 
+     cpumask_copy(v->cpu_affinity, affinity);
+ 
+@@ -662,7 +660,7 @@ int vcpu_set_affinity(struct vcpu *v, co
+      * when changing the affinity */
+     set_bit(_VPF_migrating, &v->pause_flags);
+ 
+-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
+ 
+     domain_update_node_affinity(v->domain);
+ 
+@@ -776,10 +774,10 @@ static long do_poll(struct sched_poll *s
+ static long do_yield(void)
+ {
+     struct vcpu * v=current;
+    spinlock_t *lock = vcpu_schedule_lock_irq(v);
+ 
+-    vcpu_schedule_lock_irq(v);
+     SCHED_OP(VCPU2OP(v), yield, v);
+-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
+ 
+     TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
+     raise_softirq(SCHEDULE_SOFTIRQ);
+@@ -1140,6 +1138,7 @@ static void schedule(void)
+     unsigned long        *tasklet_work = &this_cpu(tasklet_work_to_do);
+     bool_t                tasklet_work_scheduled = 0;
+     struct schedule_data *sd;
+    spinlock_t           *lock;
+     struct task_slice     next_slice;
+     int cpu = smp_processor_id();
+ 
+@@ -1166,7 +1165,7 @@ static void schedule(void)
+         BUG();
+     }
+ 
+-    pcpu_schedule_lock_irq(cpu);
+    lock = pcpu_schedule_lock_irq(cpu);
+ 
+     stop_timer(&sd->s_timer);
+     
+@@ -1183,7 +1182,7 @@ static void schedule(void)
+ 
+     if ( unlikely(prev == next) )
+     {
+-        pcpu_schedule_unlock_irq(cpu);
+        pcpu_schedule_unlock_irq(lock, cpu);
+         trace_continue_running(next);
+         return continue_running(prev);
+     }
+@@ -1221,7 +1220,7 @@ static void schedule(void)
+     ASSERT(!next->is_running);
+     next->is_running = 1;
+ 
+-    pcpu_schedule_unlock_irq(cpu);
+    pcpu_schedule_unlock_irq(lock, cpu);
+ 
+     SCHED_STAT_CRANK(sched_ctx);
+ 
+@@ -1408,6 +1407,7 @@ int schedule_cpu_switch(unsigned int cpu
+ {
+     unsigned long flags;
+     struct vcpu *idle;
+    spinlock_t *lock;
+     void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
+     struct scheduler *old_ops = per_cpu(scheduler, cpu);
+     struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
+@@ -1426,7 +1426,7 @@ int schedule_cpu_switch(unsigned int cpu
+         return -ENOMEM;
+     }
+ 
+-    pcpu_schedule_lock_irqsave(cpu, flags);
+    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+ 
+     SCHED_OP(old_ops, tick_suspend, cpu);
+     vpriv_old = idle->sched_priv;
+@@ -1437,7 +1437,7 @@ int schedule_cpu_switch(unsigned int cpu
+     SCHED_OP(new_ops, tick_resume, cpu);
+     SCHED_OP(new_ops, insert_vcpu, idle);
+ 
+-    pcpu_schedule_unlock_irqrestore(cpu, flags);
+    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
+ 
+     SCHED_OP(old_ops, free_vdata, vpriv_old);
+     SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
+@@ -1495,10 +1495,11 @@ void schedule_dump(struct cpupool *c)
+ 
+     for_each_cpu (i, cpus)
+     {
+-        pcpu_schedule_lock(i);
+        spinlock_t *lock = pcpu_schedule_lock(i);
+
+         printk("CPU[%02d] ", i);
+         SCHED_OP(sched, dump_cpu_state, i);
+-        pcpu_schedule_unlock(i);
+        pcpu_schedule_unlock(lock, i);
+     }
+ }
+ 
+--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
+@@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc
+ DECLARE_PER_CPU(struct scheduler *, scheduler);
+ DECLARE_PER_CPU(struct cpupool *, cpupool);
+ 
+-static inline spinlock_t * pcpu_schedule_lock(int cpu)
+-{
+-    spinlock_t * lock=NULL;
+-
+-    for ( ; ; )
+-    {
+-        /* The per_cpu(v->processor) may also change, if changing
+-         * cpu pool also changes the scheduler lock.  Retry
+-         * until they match.
+-         */
+-        lock=per_cpu(schedule_data, cpu).schedule_lock;
+-
+-        spin_lock(lock);
+-        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) )
+-            break;
+-        spin_unlock(lock);
+-    }
+-    return lock;
+#define sched_lock(kind, param, cpu, irq, arg...) \
+static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+{ \
+    for ( ; ; ) \
+    { \
+        spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \
+        /* \
+         * v->processor may change when grabbing the lock; but \
+         * per_cpu(v->processor) may also change, if changing cpu pool \
+         * also changes the scheduler lock.  Retry until they match. \
+         * \
+         * It may also be the case that v->processor may change but the \
+         * lock may be the same; this will succeed in that case. \
+         */ \
+        spin_lock##irq(lock, ## arg); \
+        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \
+            return lock; \
+        spin_unlock##irq(lock, ## arg); \
+    } \
+ }
+ 
+-static inline int pcpu_schedule_trylock(int cpu)
+-{
+-    spinlock_t * lock=NULL;
+-
+-    lock=per_cpu(schedule_data, cpu).schedule_lock;
+-    if ( ! spin_trylock(lock) )
+-        return 0;
+-    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
+-        return 1;
+-    else
+-    {
+-        spin_unlock(lock);
+-        return 0;
+-    }
+#define sched_unlock(kind, param, cpu, irq, arg...) \
+static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
+                                               EXTRA_TYPE(arg), param) \
+{ \
+    ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \
+    spin_unlock##irq(lock, ## arg); \
+ }
+ 
+-#define pcpu_schedule_lock_irq(p) \
+-    do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 )
+-#define pcpu_schedule_lock_irqsave(p, flags) \
+-    do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 )
+#define EXTRA_TYPE(arg)
+sched_lock(pcpu, unsigned int cpu,     cpu, )
+sched_lock(vcpu, const struct vcpu *v, v->processor, )
+sched_lock(pcpu, unsigned int cpu,     cpu,          _irq)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irq)
+sched_unlock(pcpu, unsigned int cpu,     cpu, )
+sched_unlock(vcpu, const struct vcpu *v, v->processor, )
+sched_unlock(pcpu, unsigned int cpu,     cpu,          _irq)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq)
+#undef EXTRA_TYPE
+
+#define EXTRA_TYPE(arg) , unsigned long arg
+#define spin_unlock_irqsave spin_unlock_irqrestore
+sched_lock(pcpu, unsigned int cpu,     cpu,          _irqsave, *flags)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags)
+#undef spin_unlock_irqsave
+sched_unlock(pcpu, unsigned int cpu,     cpu,          _irqrestore, flags)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags)
+#undef EXTRA_TYPE
+
+#undef sched_unlock
+#undef sched_lock
+ 
+-static inline void pcpu_schedule_unlock(int cpu)
+static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
+ {
+-    spin_unlock(per_cpu(schedule_data, cpu).schedule_lock);
+-}
+    spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
+ 
+-#define pcpu_schedule_unlock_irq(p) \
+-    do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 )
+-#define pcpu_schedule_unlock_irqrestore(p, flags) \
+-    do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 )
+-
+-static inline void vcpu_schedule_lock(struct vcpu *v)
+-{
+-    spinlock_t * lock;
+-
+-    for ( ; ; )
+-    {
+-        /* v->processor may change when grabbing the lock; but
+-         * per_cpu(v->processor) may also change, if changing
+-         * cpu pool also changes the scheduler lock.  Retry
+-         * until they match.
+-         *
+-         * It may also be the case that v->processor may change
+-         * but the lock may be the same; this will succeed
+-         * in that case.
+-         */
+-        lock=per_cpu(schedule_data, v->processor).schedule_lock;
+-
+-        spin_lock(lock);
+-        if ( likely(lock == per_cpu(schedule_data, v->processor).schedule_lock) )
+-            break;
+-        spin_unlock(lock);
+-    }
+-}
+-
+-#define vcpu_schedule_lock_irq(v) \
+-    do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 )
+-#define vcpu_schedule_lock_irqsave(v, flags) \
+-    do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 )
+-
+-static inline void vcpu_schedule_unlock(struct vcpu *v)
+-{
+-    spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock);
+    if ( !spin_trylock(lock) )
+        return NULL;
+    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
+        return lock;
+    spin_unlock(lock);
+    return NULL;
+ }
+ 
+-#define vcpu_schedule_unlock_irq(v) \
+-    do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 )
+-#define vcpu_schedule_unlock_irqrestore(v, flags) \
+-    do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 )
+-
+ struct task_slice {
+     struct vcpu *task;
+     s_time_t     time;
--- a/525b9617-sched-fix-race-between-sched_move_domain-and-vcpu_wake.patch
+++ b/525b9617-sched-fix-race-between-sched_move_domain-and-vcpu_wake.patch
@ -0,0 +1,63 @@
+# Commit ef55257bc81204e34691f1c2aa9e01f2d0768bdd
+# Date 2013-10-14 08:58:31 +0200
+# Author David Vrabel <david.vrabel@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+sched: fix race between sched_move_domain() and vcpu_wake()
+
+From: David Vrabel <david.vrabel@citrix.com>
+
+sched_move_domain() changes v->processor for all the domain's VCPUs.
+If another domain, softirq etc. triggers a simultaneous call to
+vcpu_wake() (e.g., by setting an event channel as pending), then
+vcpu_wake() may lock one schedule lock and try to unlock another.
+
+vcpu_schedule_lock() attempts to handle this but only does so for the
+window between reading the schedule_lock from the per-CPU data and the
+spin_lock() call.  This does not help with sched_move_domain()
+changing v->processor between the calls to vcpu_schedule_lock() and
+vcpu_schedule_unlock().
+
+Fix the race by taking the schedule_lock for v->processor in
+sched_move_domain().
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Acked-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
+
+Use vcpu_schedule_lock_irq() (which now returns the lock) to properly
+retry the locking should the to be used lock have changed in the course
+of acquiring it (issue pointed out by George Dunlap).
+
+Add a comment explaining the state after the v->processor adjustment.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
+@@ -276,6 +276,8 @@ int sched_move_domain(struct domain *d, 
+     new_p = cpumask_first(c->cpu_valid);
+     for_each_vcpu ( d, v )
+     {
+        spinlock_t *lock;
+
+         vcpudata = v->sched_priv;
+ 
+         migrate_timer(&v->periodic_timer, new_p);
+@@ -283,7 +285,16 @@ int sched_move_domain(struct domain *d, 
+         migrate_timer(&v->poll_timer, new_p);
+ 
+         cpumask_setall(v->cpu_affinity);
+
+        lock = vcpu_schedule_lock_irq(v);
+         v->processor = new_p;
+        /*
+         * With v->processor modified we must not
+         * - make any further changes assuming we hold the scheduler lock,
+         * - use vcpu_schedule_unlock_irq().
+         */
+        spin_unlock_irq(lock);
+
+         v->sched_priv = vcpu_priv[v->vcpu_id];
+         evtchn_move_pirqs(v);
+ 
--- a/525e69e8-credit-unpause-parked-vcpu-before-destroying-it.patch
+++ b/525e69e8-credit-unpause-parked-vcpu-before-destroying-it.patch
@ -0,0 +1,27 @@
+# Commit d38a668b6ef8c84d1d3fda9947ffb0056d01fe3a
+# Date 2013-10-16 12:26:48 +0200
+# Author Juergen Gross <juergen.gross@ts.fujitsu.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+credit: unpause parked vcpu before destroying it
+
+A capped out vcpu must be unpaused in case of moving it to another cpupool,
+otherwise it will be paused forever.
+
+Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
+Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
+
+--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
+@@ -931,6 +931,12 @@ csched_vcpu_remove(const struct schedule
+ 
+     SCHED_STAT_CRANK(vcpu_destroy);
+ 
+    if ( test_and_clear_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
+    {
+        SCHED_STAT_CRANK(vcpu_unpark);
+        vcpu_unpause(svc->vcpu);
+    }
+
+     if ( __vcpu_on_runq(svc) )
+         __runq_remove(svc);
+ 
--- a/525faf5e-x86-print-relevant-tail-part-of-filename-for-warnings-and-crashes.patch
+++ b/525faf5e-x86-print-relevant-tail-part-of-filename-for-warnings-and-crashes.patch
@ -0,0 +1,77 @@
+# Commit f72cb6bbc10348f4f7671428e5db509731e9e6a5
+# Date 2013-10-17 11:35:26 +0200
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: print relevant (tail) part of filename for warnings and crashes
+
+In particular when the origin construct is in a header file (and
+hence the file name is an absolute path instead of just the file name
+portion) the information can otherwise become rather useless when the
+build tree isn't sitting relatively close to the file system root.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
+@@ -953,7 +953,7 @@ void do_invalid_op(struct cpu_user_regs 
+ {
+     struct bug_frame bug;
+     struct bug_frame_str bug_str;
+-    const char *p, *filename, *predicate, *eip = (char *)regs->eip;
+    const char *p, *prefix = "", *filename, *predicate, *eip = (char *)regs->eip;
+     unsigned long fixup;
+     int id, lineno;
+ 
+@@ -995,12 +995,19 @@ void do_invalid_op(struct cpu_user_regs 
+     }
+ 
+     /* WARN, BUG or ASSERT: decode the filename pointer and line number. */
+-    filename = p;
+    fixup = strlen(p);
+    if ( fixup > 50 )
+    {
+        filename = p + fixup - 47;
+        prefix = "...";
+    }
+    else
+        filename = p;
+     lineno = bug.id >> 2;
+ 
+     if ( id == BUGFRAME_warn )
+     {
+-        printk("Xen WARN at %.50s:%d\n", filename, lineno);
+        printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno);
+         show_execution_state(regs);
+         regs->eip = (unsigned long)eip;
+         return;
+@@ -1008,10 +1015,10 @@ void do_invalid_op(struct cpu_user_regs 
+ 
+     if ( id == BUGFRAME_bug )
+     {
+-        printk("Xen BUG at %.50s:%d\n", filename, lineno);
+        printk("Xen BUG at %s%s:%d\n", prefix, filename, lineno);
+         DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+         show_execution_state(regs);
+-        panic("Xen BUG at %.50s:%d\n", filename, lineno);
+        panic("Xen BUG at %s%s:%d\n", prefix, filename, lineno);
+     }
+ 
+     /* ASSERT: decode the predicate string pointer. */
+@@ -1025,12 +1032,12 @@ void do_invalid_op(struct cpu_user_regs 
+ 
+     if ( !is_kernel(predicate) )
+         predicate = "<unknown>";
+-    printk("Assertion '%s' failed at %.50s:%d\n",
+-           predicate, filename, lineno);
+    printk("Assertion '%s' failed at %s%s:%d\n",
+           predicate, prefix, filename, lineno);
+     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+     show_execution_state(regs);
+-    panic("Assertion '%s' failed at %.50s:%d\n",
+-          predicate, filename, lineno);
+    panic("Assertion '%s' failed at %s%s:%d\n",
+          predicate, prefix, filename, lineno);
+ 
+  die:
+     if ( (fixup = search_exception_table(regs->eip)) != 0 )
--- a/CVE-2013-4375-xsa71.patch
+++ b/CVE-2013-4375-xsa71.patch
@ -0,0 +1,33 @@
+References: bnc#842515 CVE-2013-4375 XSA-71 
+
+xen_disk: mark ioreq as mapped before unmapping in error case
+
+Commit c6961b7d ("xen_disk: use bdrv_aio_flush instead of bdrv_flush")
+modified the semantics of ioreq_{un,}map so that they are idempotent if
+called when they're not needed (ie., twice in a row). However, it neglected
+to handle the case where batch mapping is not being used (the default), and
+one of the grants fails to map. In this case, ioreq_unmap will be called to
+unwind and unmap any mappings already performed, but ioreq_unmap simply
+returns due to the aforementioned change (the ioreq has not already been
+marked as mapped).
+
+The frontend user can therefore force xen_disk to leak grant mappings, a
+per-backend-domain limited resource.
+
+Fix by marking the ioreq as mapped before calling ioreq_unmap in this
+situation.
+
+This is XSA-71 / CVE-2013-4375
+
+Signed-off-by: Matthew Daley <mattjd@gmail.com>
+
+--- a/tools/qemu-xen-dir-remote/hw/xen_disk.c
+++ b/tools/qemu-xen-dir-remote/hw/xen_disk.c
+@@ -406,6 +406,7 @@ static int ioreq_map(struct ioreq *ioreq
+                 xen_be_printf(&ioreq->blkdev->xendev, 0,
+                               "can't map grant ref %d (%s, %d maps)\n",
+                               refs[i], strerror(errno), ioreq->blkdev->cnt_map);
+                ioreq->mapped = 1;
+                 ioreq_unmap(ioreq);
+                 return -1;
+             }
--- a/pygrub-boot-legacy-sles.patch
+++ b/pygrub-boot-legacy-sles.patch
@ -0,0 +1,34 @@
+Index: xen-4.3.0-testing/tools/pygrub/src/pygrub
+===================================================================
+--- xen-4.3.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.3.0-testing/tools/pygrub/src/pygrub
+@@ -606,6 +606,14 @@ def run_grub(file, entry, fs, cfg_args):
+             print "  args: %s" % img.args
+             print "  initrd: %s" % img.initrd[1]
+ 
+    # If grub has no menu entries to select, look for vmlinuz-xen and initrd-xen in /boot
+    if len(g.cf.images) == 0:
+        chosencfg = { "kernel": None, "ramdisk": None, "args": "" }
+        chosencfg = sniff_xen_kernel(fs, incfg)
+        if chosencfg["kernel"] and chosencfg["ramdisk"]:
+            chosencfg["args"] = cfg_args
+            return chosencfg
+
+     if interactive and not list_entries:
+         curses.wrapper(run_main)
+     else:
+@@ -692,6 +700,14 @@ def sniff_netware(fs, cfg):
+ 
+     return cfg
+ 
+def sniff_xen_kernel(fs, cfg):
+    if not cfg["kernel"] and fs.file_exists('/boot/vmlinuz-xen'):
+        cfg["kernel"] = '/boot/vmlinuz-xen'
+    if cfg["kernel"] and not cfg["ramdisk"]:
+        if fs.file_exists('/boot/initrd-xen'):
+            cfg["ramdisk"] = '/boot/initrd-xen'
+    return cfg
+
+ def format_sxp(kernel, ramdisk, args):
+     s = "linux (kernel %s)" % kernel
+     if ramdisk:
--- a/set-mtu-from-bridge-for-tap-interface.patch
+++ b/set-mtu-from-bridge-for-tap-interface.patch
@ -0,0 +1,61 @@
+# HG changeset patch
+# User Charles Arnold <carnold@suse.com>
+# Date 1379427987 -3600
+# Node ID e6da6ffd6749237316d4440799f0a0272bbdae9c
+# Parent  5597ce99ec7f2587a29f3b2dee0bde98d59bf327
+tools/hotplug: set mtu from bridge for tap interface
+
+With changeset 22885 support was added for setting the MTU in the vif-bridge
+script for when a vif interface was set to 'online'.  The was not done for the
+'add' operation.  The 'add' operation was added to the script for when tap
+devices were specified (c/s 21944). With the setting of the MTU for the
+'online' case was there a reason for omitting the 'add'?
+
+This patch sets the MTU for both 'online' and 'add' in the vif-bridge script.
+
+Signed-off-by: Charles Arnold <carnold@suse.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+
+Index: xen-4.3.0-testing/tools/hotplug/Linux/vif-bridge
+===================================================================
+--- xen-4.3.0-testing.orig/tools/hotplug/Linux/vif-bridge
+++ xen-4.3.0-testing/tools/hotplug/Linux/vif-bridge
+@@ -89,11 +89,7 @@ fi
+ case "$command" in
+     online)
+         setup_virtual_bridge_port "$dev"
+-        mtu="`ip link show $bridge | awk '/mtu/ { print $5 }'`"
+-        if [ -n "$mtu" ] && [ "$mtu" -gt 0 ]
+-        then
+-                ip link set $dev mtu $mtu || :
+-        fi
+        set_mtu "$bridge" "$dev"
+         add_to_bridge "$bridge" "$dev"
+         ;;
+ 
+@@ -104,6 +100,7 @@ case "$command" in
+ 
+     add)
+         setup_virtual_bridge_port "$dev"
+        set_mtu "$bridge" "$dev"
+         add_to_bridge "$bridge" "$dev"
+         ;;
+ esac
+Index: xen-4.3.0-testing/tools/hotplug/Linux/xen-network-common.sh
+===================================================================
+--- xen-4.3.0-testing.orig/tools/hotplug/Linux/xen-network-common.sh
+++ xen-4.3.0-testing/tools/hotplug/Linux/xen-network-common.sh
+@@ -132,3 +132,13 @@ add_to_bridge () {
+     ip link set ${dev} up
+ }
+ 
+# Usage: set_mtu bridge dev
+set_mtu () {
+    local bridge=$1
+    local dev=$2
+    mtu="`ip link show ${bridge}| awk '/mtu/ { print $5 }'`"
+    if [ -n "$mtu" ] && [ "$mtu" -gt 0 ]
+    then
+            ip link set ${dev} mtu $mtu || :
+    fi
+}
--- a/x86-cpufreq-report.patch
+++ b/x86-cpufreq-report.patch
@ -9,7 +9,7 @@
 #include <asm/edd.h>
 #include <asm/mtrr.h>
 #include <asm/io_apic.h>
-@@ -597,6 +597,41 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PA
+@@ -601,6 +601,41 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PA
     }
     break;
 
--- a/xen.changes
+++ b/xen.changes
@ -1,9 +1,51 @@
+-------------------------------------------------------------------
+Tue Oct 22 13:42:54 MDT 2013 - carnold@suse.com
+
+- domUloader can no longer be used with the xl toolstack to boot
+  sles10. Patch pygrub to get the kernel and initrd from the image.
+  pygrub-boot-legacy-sles.patch
+
+-------------------------------------------------------------------
+Mon Oct 21 09:57:54 MDT 2013 - carnold@suse.com
+
+- bnc#842515 - VUL-0: CVE-2013-4375: XSA-71: xen: qemu disk backend
+  (qdisk) resource leak
+  CVE-2013-4375-xsa71.patch
+- Upstream patches from Jan
+  52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch (Replaces CVE-2013-4355-xsa63.patch)
+  52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch (Replaces CVE-2013-4356-xsa64.patch)
+  52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch (Replaces CVE-2013-4361-xsa66.patch)
+  52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch
+  524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch
+  524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch
+  524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch
+  524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch
+  524e9dc0-xsm-forbid-PV-guest-console-reads.patch
+  5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch
+  5256be57-libxl-fix-vif-rate-parsing.patch
+  5256be84-tools-ocaml-fix-erroneous-free-of-cpumap-in-stub_xc_vcpu_getaffinity.patch
+  5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch
+  5257a89a-x86-correct-LDT-checks.patch
+  5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch
+  5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch
+  525b95f4-scheduler-adjust-internal-locking-interface.patch
+  525b9617-sched-fix-race-between-sched_move_domain-and-vcpu_wake.patch
+  525e69e8-credit-unpause-parked-vcpu-before-destroying-it.patch
+  525faf5e-x86-print-relevant-tail-part-of-filename-for-warnings-and-crashes.patch
+
 -------------------------------------------------------------------
 Wed Oct  2 15:58:47 MDT 2013 - jfehlig@suse.com

 - Improvements to block-dmmd script
  bnc#828623

+-------------------------------------------------------------------
+Tue Oct  1 15:28:25 MDT 2013 - carnold@suse.com
+
+- bnc#840196 - L3: MTU size on Dom0 gets reset when booting DomU
+  with e1000 device
+  set-mtu-from-bridge-for-tap-interface.patch
+
 -------------------------------------------------------------------
 Mon Sep 30 10:48:29 MDT 2013 - carnold@suse.com

--- a/xen.spec
+++ b/xen.spec
@ -139,7 +139,7 @@ BuildRequires:  xorg-x11
 BuildRequires:  lndir
 %endif
 %endif
-Version:        4.3.0_12
+Version:        4.3.0_14
 Release:        0
 PreReq:         %insserv_prereq %fillup_prereq
 Summary:        Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
@ -254,9 +254,27 @@ Patch57:        523c1834-unmodified_drivers-enable-unplug-per-default.patch
 Patch58:        523ff393-x86-HVM-linear-address-must-be-canonical-for-the-whole-accessed-range.patch
 Patch59:        523ff3e2-x86-HVM-refuse-doing-string-operations-in-certain-situations.patch
 Patch60:        5242a1b5-x86-xsave-initialize-extended-register-state-when-guests-enable-it.patch
-Patch6300:      CVE-2013-4355-xsa63.patch
-Patch6400:      CVE-2013-4356-xsa64.patch
-Patch6600:      CVE-2013-4361-xsa66.patch
+Patch61:        52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch
+Patch62:        52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch
+Patch63:        52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch
+Patch64:        52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch
+Patch65:        524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch
+Patch66:        524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch
+Patch67:        524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch
+Patch68:        524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch
+Patch69:        524e9dc0-xsm-forbid-PV-guest-console-reads.patch
+Patch70:        5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch
+Patch71:        5256be57-libxl-fix-vif-rate-parsing.patch
+Patch72:        5256be84-tools-ocaml-fix-erroneous-free-of-cpumap-in-stub_xc_vcpu_getaffinity.patch
+Patch73:        5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch
+Patch74:        5257a89a-x86-correct-LDT-checks.patch
+Patch75:        5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch
+Patch76:        5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch
+Patch77:        525b95f4-scheduler-adjust-internal-locking-interface.patch
+Patch78:        525b9617-sched-fix-race-between-sched_move_domain-and-vcpu_wake.patch
+Patch79:        525e69e8-credit-unpause-parked-vcpu-before-destroying-it.patch
+Patch80:        525faf5e-x86-print-relevant-tail-part-of-filename-for-warnings-and-crashes.patch
+Patch7100:      CVE-2013-4375-xsa71.patch
 # Upstream qemu patches
 # Our patches
 Patch301:       xen-destdir.patch
@ -283,10 +301,12 @@ Patch350:       hibernate.patch
 Patch351:       stdvga-cache.patch
 Patch352:       ipxe-enable-nics.patch
 Patch353:       pygrub-netware-xnloader.patch
+Patch354:       pygrub-boot-legacy-sles.patch
 Patch360:       blktapctrl-close-fifos.patch
 Patch361:       blktapctrl-default-to-ioemu.patch
 Patch362:       blktapctrl-disable-debug-printf.patch
 Patch363:       blktap-pv-cdrom.patch
+Patch364:       set-mtu-from-bridge-for-tap-interface.patch
 # Hypervisor and PV driver Patches
 Patch501:       x86-ioapic-ack-default.patch
 Patch502:       x86-cpufreq-report.patch
@ -605,9 +625,27 @@ Authors
 %patch58 -p1
 %patch59 -p1
 %patch60 -p1
-%patch6300 -p1
-%patch6400 -p1
-%patch6600 -p1
+%patch61 -p1
+%patch62 -p1
+%patch63 -p1
+%patch64 -p1
+%patch65 -p1
+%patch66 -p1
+%patch67 -p1
+%patch68 -p1
+%patch69 -p1
+%patch70 -p1
+%patch71 -p1
+%patch72 -p1
+%patch73 -p1
+%patch74 -p1
+%patch75 -p1
+%patch76 -p1
+%patch77 -p1
+%patch78 -p1
+%patch79 -p1
+%patch80 -p1
+%patch7100 -p1
 %patch301 -p1
 %patch302 -p1
 %patch303 -p1
@ -630,10 +668,12 @@ Authors
 %patch351 -p1
 %patch352 -p1
 %patch353 -p1
+%patch354 -p1
 %patch360 -p1
 %patch361 -p1
 %patch362 -p1
 %patch363 -p1
+%patch364 -p1
 %patch501 -p1
 %patch502 -p1
 %patch503 -p1