diff --git a/57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch b/57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch new file mode 100644 index 0000000..69015db --- /dev/null +++ b/57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch @@ -0,0 +1,25 @@ +# Commit 81caac0cd0f56b0052a7884e6bd99e3a652ddd59 +# Date 2016-08-29 16:05:31 +0200 +# Author Suravee Suthikulpanit +# Committer Jan Beulich +x86/HVM: add guarding logic for VMX specific code + +The struct hvm_domain.vmx is defined in a union along with the svm. +This can causes issue for SVM since this code is used in the common +scheduling code for x86. The logic must check for cpu_has_vmx before +accessing the hvm_domain.vmx sturcture. + +Signed-off-by: Suravee Suthikulpanit +Acked-by: Jan Beulich + +--- a/xen/include/asm-x86/hvm/hvm.h ++++ b/xen/include/asm-x86/hvm/hvm.h +@@ -611,7 +611,7 @@ unsigned long hvm_cr4_guest_reserved_bit + struct vcpu *v_ = (v); \ + struct domain *d_ = v_->domain; \ + if ( has_hvm_container_domain(d_) && \ +- d_->arch.hvm_domain.vmx.vcpu_block ) \ ++ (cpu_has_vmx && d_->arch.hvm_domain.vmx.vcpu_block) ) \ + d_->arch.hvm_domain.vmx.vcpu_block(v_); \ + }) + diff --git a/57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch b/57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch new file mode 100644 index 0000000..3e7d4ad --- /dev/null +++ b/57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch @@ -0,0 +1,30 @@ +# Commit 9daed8321b44c3ca82e412eb130f84e6b6c17dc5 +# Date 2016-08-30 13:43:31 +0100 +# Author Juergen Gross +# Committer Wei Liu +libxc: correct max_pfn calculation for saving domain + +Commit 91e204d37f44913913776d0a89279721694f8b32 ("libxc: try to find +last used pfn when migrating") introduced a bug for the case of a +domain supporting the virtual mapped linear p2m list: the maximum pfn +of the domain calculated from the p2m memory allocation might be too +low. + +Correct this. + +Reported-by: Stefan Bader +Signed-off-by: Juergen Gross +Tested-by: Stefan Bader +Acked-by: Wei Liu + +--- a/tools/libxc/xc_sr_save_x86_pv.c ++++ b/tools/libxc/xc_sr_save_x86_pv.c +@@ -430,6 +430,8 @@ static int map_p2m_list(struct xc_sr_con + + if ( level == 2 ) + { ++ if ( saved_idx == idx_end ) ++ saved_idx++; + max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp - 1; + if ( max_pfn < ctx->x86_pv.max_pfn ) + { diff --git a/57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch b/57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch new file mode 100644 index 0000000..d033379 --- /dev/null +++ b/57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch @@ -0,0 +1,51 @@ +# Commit 3b7cac5232012e167b284aba738fef1eceda33f8 +# Date 2016-09-01 11:41:03 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/levelling: Restrict non-architectural OSXSAVE handling to emulated CPUID + +There is no need to extend the workaround to the faulted CPUID view, as +Linux's dependence on the workaround is stricly via the emulated view. + +This causes a guest kernel faulted CPUID to observe architectural behaviour +with respect to its CR4.OSXSAVE setting. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -972,6 +972,8 @@ void pv_cpuid(struct cpu_user_regs *regs + * + * Therefore, the leaking of Xen's OSXSAVE setting has become a + * defacto part of the PV ABI and can't reasonably be corrected. ++ * It can however be restricted to only the enlightened CPUID ++ * view, as seen by the guest kernel. + * + * The following situations and logic now applies: + * +@@ -985,14 +987,18 @@ void pv_cpuid(struct cpu_user_regs *regs + * + * - Enlightened CPUID or CPUID faulting available: + * Xen can fully control what is seen here. Guest kernels need +- * to see the leaked OSXSAVE, but guest userspace is given +- * architectural behaviour, to reflect the guest kernels +- * intentions. ++ * to see the leaked OSXSAVE via the enlightened path, but ++ * guest userspace and the native is given architectural ++ * behaviour. ++ * ++ * Emulated vs Faulted CPUID is distinguised based on whether a ++ * #UD or #GP is currently being serviced. + */ + /* OSXSAVE cleared by pv_featureset. Fast-forward CR4 back in. */ +- if ( (guest_kernel_mode(curr, regs) && +- (read_cr4() & X86_CR4_OSXSAVE)) || +- (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) ) ++ if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) || ++ (regs->entry_vector == TRAP_invalid_op && ++ guest_kernel_mode(curr, regs) && ++ (read_cr4() & X86_CR4_OSXSAVE)) ) + c |= cpufeat_mask(X86_FEATURE_OSXSAVE); + + /* diff --git a/57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch b/57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch new file mode 100644 index 0000000..c8821e7 --- /dev/null +++ b/57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch @@ -0,0 +1,80 @@ +# Commit 33b23e5ab319a6bf9bfd38c4d9268fa6d9d072c6 +# Date 2016-09-01 11:41:05 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/levelling: Pass a vcpu rather than a domain to ctxt_switch_levelling() + +A subsequent change needs to special-case OSXSAVE handling, which is per-vcpu +rather than per-domain. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -203,9 +203,10 @@ static void __init noinline probe_maskin + * used to context switch to the default host state (by the cpu bringup-code, + * crash path, etc). + */ +-static void amd_ctxt_switch_levelling(const struct domain *nextd) ++static void amd_ctxt_switch_levelling(const struct vcpu *next) + { + struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); ++ const struct domain *nextd = next ? next->domain : NULL; + const struct cpuidmasks *masks = + (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) + ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -90,11 +90,11 @@ static const struct cpu_dev default_cpu + }; + static const struct cpu_dev *this_cpu = &default_cpu; + +-static void default_ctxt_switch_levelling(const struct domain *nextd) ++static void default_ctxt_switch_levelling(const struct vcpu *next) + { + /* Nop */ + } +-void (* __read_mostly ctxt_switch_levelling)(const struct domain *nextd) = ++void (* __read_mostly ctxt_switch_levelling)(const struct vcpu *next) = + default_ctxt_switch_levelling; + + bool_t opt_cpu_info; +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -151,9 +151,10 @@ static void __init probe_masking_msrs(vo + * used to context switch to the default host state (by the cpu bringup-code, + * crash path, etc). + */ +-static void intel_ctxt_switch_levelling(const struct domain *nextd) ++static void intel_ctxt_switch_levelling(const struct vcpu *next) + { + struct cpuidmasks *these_masks = &this_cpu(cpuidmasks); ++ const struct domain *nextd = next ? next->domain : NULL; + const struct cpuidmasks *masks; + + if (cpu_has_cpuid_faulting) { +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -2107,7 +2107,7 @@ void context_switch(struct vcpu *prev, s + load_segments(next); + } + +- ctxt_switch_levelling(nextd); ++ ctxt_switch_levelling(next); + } + + context_saved(prev); +--- a/xen/include/asm-x86/processor.h ++++ b/xen/include/asm-x86/processor.h +@@ -211,7 +211,7 @@ extern struct cpuinfo_x86 boot_cpu_data; + extern struct cpuinfo_x86 cpu_data[]; + #define current_cpu_data cpu_data[smp_processor_id()] + +-extern void (*ctxt_switch_levelling)(const struct domain *nextd); ++extern void (*ctxt_switch_levelling)(const struct vcpu *next); + + extern u64 host_pat; + extern bool_t opt_cpu_info; diff --git a/57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch b/57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch new file mode 100644 index 0000000..c8a9982 --- /dev/null +++ b/57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch @@ -0,0 +1,164 @@ +# Commit 08e7738ec3644350fbac0325085baac6b3c7cd11 +# Date 2016-09-01 11:41:07 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/levelling: Provide architectural OSXSAVE handling to masked native CPUID + +Contrary to c/s b2507fe7 "x86/domctl: Update PV domain cpumasks when setting +cpuid policy", Intel CPUID masks are applied after fast forwarding hardware +state, rather than before. (All behaviour in this regard appears completely +undocumented by both Intel and AMD). + +Therefore, a set bit in the MSR causes hardware to be fast-forwarded, while a +clear bit forces the guests view to 0, even if Xen's CR4.OSXSAVE is actually +set. + +This allows Xen to provide an architectural view of a guest kernels +CR4.OSXSAVE setting to any native CPUID instruction issused by guest kernel or +userspace, even when masking is used. + +The masking value defaults to 1 (if the guest has XSAVE available) to cause +fast-forwarding to occur for the HVM and idle vcpus. + +When setting the MSRs, a PV guest kernel's choice of OXSAVE is taken into +account, and clobbered from the MSR if not set. This causes the +fast-forwarding of Xen's CR4 state not to happen. + +As a side effect however, levelling potentially need updating on all PV CR4 +changes. + +Reported-by: Jan Beulich +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +# Commit 1461504ce3c414fc5dc717ce16f039d0742b455a +# Date 2016-09-02 08:12:29 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/levelling: fix breakage on older Intel boxes from c/s 08e7738 + +cpufeat_mask() yields an unsigned integer constant. As a result, taking its +complement causes zero extention rather than sign extention. + +The result is that, when a guest OS has OXSAVE disabled, all features in 1d +are hidden from native CPUID. Amongst other things, this causes the early +code in Linux to find no LAPIC, but for everything to appear fine later when +userspace is up and running. + +Signed-off-by: Andrew Cooper +Tested-by: Jan Beulich + +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -211,6 +211,24 @@ static void amd_ctxt_switch_levelling(co + (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) + ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; + ++ if ((levelling_caps & LCAP_1cd) == LCAP_1cd) { ++ uint64_t val = masks->_1cd; ++ ++ /* ++ * OSXSAVE defaults to 1, which causes fast-forwarding of ++ * Xen's real setting. Clobber it if disabled by the guest ++ * kernel. ++ */ ++ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) && ++ !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE)) ++ val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32); ++ ++ if (unlikely(these_masks->_1cd != val)) { ++ wrmsr_amd(MSR_K8_FEATURE_MASK, val); ++ these_masks->_1cd = val; ++ } ++ } ++ + #define LAZY(cap, msr, field) \ + ({ \ + if (unlikely(these_masks->field != masks->field) && \ +@@ -221,7 +239,6 @@ static void amd_ctxt_switch_levelling(co + } \ + }) + +- LAZY(LCAP_1cd, MSR_K8_FEATURE_MASK, _1cd); + LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK, e1cd); + LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0); + LAZY(LCAP_6c, MSR_AMD_THRM_FEATURE_MASK, _6c); +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -182,6 +182,24 @@ static void intel_ctxt_switch_levelling( + masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks) + ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults; + ++ if (msr_basic) { ++ uint64_t val = masks->_1cd; ++ ++ /* ++ * OSXSAVE defaults to 1, which causes fast-forwarding of ++ * Xen's real setting. Clobber it if disabled by the guest ++ * kernel. ++ */ ++ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) && ++ !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE)) ++ val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE); ++ ++ if (unlikely(these_masks->_1cd != val)) { ++ wrmsrl(msr_basic, val); ++ these_masks->_1cd = val; ++ } ++ } ++ + #define LAZY(msr, field) \ + ({ \ + if (unlikely(these_masks->field != masks->field) && \ +@@ -192,7 +210,6 @@ static void intel_ctxt_switch_levelling( + } \ + }) + +- LAZY(msr_basic, _1cd); + LAZY(msr_ext, e1cd); + LAZY(msr_xsave, Da1); + +@@ -218,6 +235,11 @@ static void __init noinline intel_init_l + ecx &= opt_cpuid_mask_ecx; + edx &= opt_cpuid_mask_edx; + ++ /* Fast-forward bits - Must be set. */ ++ if (ecx & cpufeat_mask(X86_FEATURE_XSAVE)) ++ ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE); ++ edx |= cpufeat_mask(X86_FEATURE_APIC); ++ + cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx; + } + +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -110,10 +110,18 @@ static void update_domain_cpuid_info(str + case X86_VENDOR_INTEL: + /* + * Intel masking MSRs are documented as AND masks. +- * Experimentally, they are applied before OSXSAVE and APIC ++ * Experimentally, they are applied after OSXSAVE and APIC + * are fast-forwarded from real hardware state. + */ + mask &= ((uint64_t)edx << 32) | ecx; ++ ++ if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) ) ++ ecx = cpufeat_mask(X86_FEATURE_OSXSAVE); ++ else ++ ecx = 0; ++ edx = cpufeat_mask(X86_FEATURE_APIC); ++ ++ mask |= ((uint64_t)edx << 32) | ecx; + break; + + case X86_VENDOR_AMD: +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -2696,6 +2696,7 @@ static int emulate_privileged_op(struct + case 4: /* Write CR4 */ + v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg); + write_cr4(pv_guest_cr4_to_real_cr4(v)); ++ ctxt_switch_levelling(v); + break; + + default: diff --git a/57c82be2-x86-32on64-adjust-call-gate-emulation.patch b/57c82be2-x86-32on64-adjust-call-gate-emulation.patch new file mode 100644 index 0000000..08ffc46 --- /dev/null +++ b/57c82be2-x86-32on64-adjust-call-gate-emulation.patch @@ -0,0 +1,48 @@ +# Commit ee1cc4bfdca84d526805c4c72302c026f5e9cd94 +# Date 2016-09-01 15:23:46 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/32on64: misc adjustments to call gate emulation + +- There's no 32-bit displacement in 16-bit addressing mode. +- It is wrong to ASSERT() anything on parts of an instruction fetched + from guest memory. +- The two scaling bits of a SIB byte don't affect whether there is a + scaled index register or not. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -3176,7 +3176,7 @@ static void emulate_gate_op(struct cpu_u + sib = insn_fetch(u8, base, eip, limit); + + modrm = (modrm & ~7) | (sib & 7); +- if ( (sib >>= 3) != 4 ) ++ if ( ((sib >>= 3) & 7) != 4 ) + opnd_off = *(unsigned long *) + decode_register(sib & 7, regs, 0); + opnd_off <<= sib >> 3; +@@ -3236,7 +3236,10 @@ static void emulate_gate_op(struct cpu_u + opnd_off += insn_fetch(s8, base, eip, limit); + break; + case 0x80: +- opnd_off += insn_fetch(s32, base, eip, limit); ++ if ( ad_bytes > 2 ) ++ opnd_off += insn_fetch(s32, base, eip, limit); ++ else ++ opnd_off += insn_fetch(s16, base, eip, limit); + break; + } + if ( ad_bytes == 4 ) +@@ -3273,8 +3276,7 @@ static void emulate_gate_op(struct cpu_u + #define ad_default ad_bytes + opnd_sel = insn_fetch(u16, base, opnd_off, limit); + #undef ad_default +- ASSERT((opnd_sel & ~3) == regs->error_code); +- if ( dpl < (opnd_sel & 3) ) ++ if ( (opnd_sel & ~3) != regs->error_code || dpl < (opnd_sel & 3) ) + { + do_guest_trap(TRAP_gp_fault, regs, 1); + return; diff --git a/57c93e52-fix-error-in-libxl_device_usbdev_list.patch b/57c93e52-fix-error-in-libxl_device_usbdev_list.patch new file mode 100644 index 0000000..1c6edce --- /dev/null +++ b/57c93e52-fix-error-in-libxl_device_usbdev_list.patch @@ -0,0 +1,27 @@ +References: bsc#989679 + +Subject: libxl: fix libxl_device_usbdev_list() +From: Juergen Gross jgross@suse.com Fri Sep 2 10:16:14 2016 +0200 +Date: Fri Sep 2 09:54:42 2016 +0100: +Git: 74157a2f9886b55cd45714e58c80035bfe3e080c + +Commit 03814de1d2ecdabedabceb8e728d934a632a43b9 ("libxl: Do not trust +frontend for vusb") introduced an error in libxl_device_usbdev_list(). +Fix it. + +Signed-off-by: Juergen Gross +Acked-by: Wei Liu + +Index: xen-4.7.0-testing/tools/libxl/libxl_pvusb.c +=================================================================== +--- xen-4.7.0-testing.orig/tools/libxl/libxl_pvusb.c ++++ xen-4.7.0-testing/tools/libxl/libxl_pvusb.c +@@ -732,7 +732,7 @@ libxl_device_usbdev_list(libxl_ctx *ctx, + *num = 0; + + libxl_vusbs_path = GCSPRINTF("%s/device/vusb", +- libxl__xs_libxl_path(gc, !domid)); ++ libxl__xs_libxl_path(gc, domid)); + usbctrls = libxl__xs_directory(gc, XBT_NULL, libxl_vusbs_path, &nc); + + for (i = 0; i < nc; i++) { diff --git a/57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch b/57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch new file mode 100644 index 0000000..2492a12 --- /dev/null +++ b/57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch @@ -0,0 +1,146 @@ +References: bsc#991934 + +# Commit 9109bf55084398c4547b8956906410c158eb9a17 +# Date 2016-09-02 14:17:55 +0200 +# Author Dario Faggioli +# Committer Jan Beulich +credit1: fix a race when picking initial pCPU for a vCPU + +In the Credit1 hunk of 9f358ddd69463 ("xen: Have +schedulers revise initial placement") csched_cpu_pick() +is called without taking the runqueue lock of the +(temporary) pCPU that the vCPU has been assigned to +(e.g., in XEN_DOMCTL_max_vcpus). + +However, although 'hidden' in the IS_RUNQ_IDLE() macro, +that function does access the runq (for doing load +balancing calculations). Two scenarios are possible: + 1) we are on cpu X, and IS_RUNQ_IDLE() peeks at cpu's + X own runq; + 2) we are on cpu X, but IS_RUNQ_IDLE() peeks at some + other cpu's runq. + +Scenario 2) absolutely requies that the appropriate +runq lock is taken. Scenario 1) works even without +taking the cpu's own runq lock. That is actually what +happens when when _csched_pick_cpu() is called from +csched_vcpu_acct() (in turn, called by csched_tick()). + +Races have been observed and reported (by both XenServer +own testing and OSSTest [1]), in the form of +IS_RUNQ_IDLE() falling over LIST_POISON, because we're +not currently holding the proper lock, in +csched_vcpu_insert(), when scenario 1) occurs. + +However, for better robustness, from now on we always +ask for the proper runq lock to be held when calling +IS_RUNQ_IDLE() (which is also becoming a static inline +function instead of macro). + +In order to comply with that, we take the lock around +the call to _csched_cpu_pick() in csched_vcpu_acct(). + +[1] https://lists.xen.org/archives/html/xen-devel/2016-08/msg02144.html + +Reported-by: Andrew Cooper +Signed-off-by: Dario Faggioli +Reviewed-by: George Dunlap + +--- a/xen/common/sched_credit.c ++++ b/xen/common/sched_credit.c +@@ -84,9 +84,6 @@ + #define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv) + #define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv) + #define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq)) +-/* Is the first element of _cpu's runq its idle vcpu? */ +-#define IS_RUNQ_IDLE(_cpu) (list_empty(RUNQ(_cpu)) || \ +- is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu)) + + + /* +@@ -248,6 +245,18 @@ __runq_elem(struct list_head *elem) + return list_entry(elem, struct csched_vcpu, runq_elem); + } + ++/* Is the first element of cpu's runq (if any) cpu's idle vcpu? */ ++static inline bool_t is_runq_idle(unsigned int cpu) ++{ ++ /* ++ * We're peeking at cpu's runq, we must hold the proper lock. ++ */ ++ ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock)); ++ ++ return list_empty(RUNQ(cpu)) || ++ is_idle_vcpu(__runq_elem(RUNQ(cpu)->next)->vcpu); ++} ++ + static inline void + __runq_insert(struct csched_vcpu *svc) + { +@@ -767,7 +776,7 @@ _csched_cpu_pick(const struct scheduler + * runnable vcpu on cpu, we add cpu to the idlers. + */ + cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers); +- if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) ) ++ if ( vc->processor == cpu && is_runq_idle(cpu) ) + __cpumask_set_cpu(cpu, &idlers); + cpumask_and(&cpus, &cpus, &idlers); + +@@ -947,21 +956,33 @@ csched_vcpu_acct(struct csched_private * + /* + * Put this VCPU and domain back on the active list if it was + * idling. +- * +- * If it's been active a while, check if we'd be better off +- * migrating it to run elsewhere (see multi-core and multi-thread +- * support in csched_cpu_pick()). + */ + if ( list_empty(&svc->active_vcpu_elem) ) + { + __csched_vcpu_acct_start(prv, svc); + } +- else if ( _csched_cpu_pick(ops, current, 0) != cpu ) ++ else + { +- SCHED_VCPU_STAT_CRANK(svc, migrate_r); +- SCHED_STAT_CRANK(migrate_running); +- set_bit(_VPF_migrating, ¤t->pause_flags); +- cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); ++ unsigned int new_cpu; ++ unsigned long flags; ++ spinlock_t *lock = vcpu_schedule_lock_irqsave(current, &flags); ++ ++ /* ++ * If it's been active a while, check if we'd be better off ++ * migrating it to run elsewhere (see multi-core and multi-thread ++ * support in csched_cpu_pick()). ++ */ ++ new_cpu = _csched_cpu_pick(ops, current, 0); ++ ++ vcpu_schedule_unlock_irqrestore(lock, flags, current); ++ ++ if ( new_cpu != cpu ) ++ { ++ SCHED_VCPU_STAT_CRANK(svc, migrate_r); ++ SCHED_STAT_CRANK(migrate_running); ++ set_bit(_VPF_migrating, ¤t->pause_flags); ++ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); ++ } + } + } + +@@ -994,9 +1015,13 @@ csched_vcpu_insert(const struct schedule + + BUG_ON( is_idle_vcpu(vc) ); + +- /* This is safe because vc isn't yet being scheduled */ ++ /* csched_cpu_pick() looks in vc->processor's runq, so we need the lock. */ ++ lock = vcpu_schedule_lock_irq(vc); ++ + vc->processor = csched_cpu_pick(ops, vc); + ++ spin_unlock_irq(lock); ++ + lock = vcpu_schedule_lock_irq(vc); + + if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running ) diff --git a/57c96e2c-x86-correct-PT_NOTE-file-position.patch b/57c96e2c-x86-correct-PT_NOTE-file-position.patch new file mode 100644 index 0000000..765e4e3 --- /dev/null +++ b/57c96e2c-x86-correct-PT_NOTE-file-position.patch @@ -0,0 +1,25 @@ +# Commit f8f185dc4359a1cd8e7896dfbcacb54b473436c8 +# Date 2016-09-02 14:18:52 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: correct PT_NOTE file position + +Program and section headers disagreed about the file offset at which +the build ID note lives. + +Reported-by: Sylvain Munaut +Signed-off-by: Jan Beulich +Reviewed-by: Konrad Rzeszutek Wilk +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/boot/mkelf32.c ++++ b/xen/arch/x86/boot/mkelf32.c +@@ -394,7 +394,7 @@ int main(int argc, char **argv) + note_phdr.p_paddr = note_base; + note_phdr.p_filesz = note_sz; + note_phdr.p_memsz = note_sz; +- note_phdr.p_offset = offset; ++ note_phdr.p_offset = RAW_OFFSET + offset; + + /* Tack on the .note\0 */ + out_shdr[2].sh_size += sizeof(out_shstrtab_extra); diff --git a/57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch b/57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch new file mode 100644 index 0000000..2770441 --- /dev/null +++ b/57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch @@ -0,0 +1,146 @@ +# Commit 68eb1a4d92be58e26bd11d02b8e0317bd56294ac +# Date 2016-09-07 12:34:43 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VMX: correct feature checks for MPX and XSAVES + +Their VMCS fields aren't tied to the respective base CPU feature flags +but instead to VMX specific ones. + +Note that while the VMCS GUEST_BNDCFGS field exists if either of the +two respective features is available, MPX continues to get exposed to +guests only with both features present. + +Also add the so far missing handling of +- GUEST_BNDCFGS in construct_vmcs() +- MSR_IA32_BNDCFGS in vmx_msr_{read,write}_intercept() +and mirror the extra correctness checks during MSR write to +vmx_load_msr(). + +Reported-by: "Rockosov, Dmitry" +Signed-off-by: Jan Beulich +Tested-by: "Rockosov, Dmitry" +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -168,8 +168,7 @@ static void __init calculate_hvm_feature + */ + if ( cpu_has_vmx ) + { +- if ( !(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) || +- !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS) ) ++ if ( !cpu_has_vmx_mpx ) + __clear_bit(X86_FEATURE_MPX, hvm_featureset); + + if ( !cpu_has_vmx_xsaves ) +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -1281,6 +1281,8 @@ static int construct_vmcs(struct vcpu *v + __vmwrite(HOST_PAT, host_pat); + __vmwrite(GUEST_PAT, guest_pat); + } ++ if ( cpu_has_vmx_mpx ) ++ __vmwrite(GUEST_BNDCFGS, 0); + if ( cpu_has_vmx_xsaves ) + __vmwrite(XSS_EXIT_BITMAP, 0); + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -786,14 +786,15 @@ static int vmx_load_vmcs_ctxt(struct vcp + + static unsigned int __init vmx_init_msr(void) + { +- return !!cpu_has_mpx + !!cpu_has_xsaves; ++ return (cpu_has_mpx && cpu_has_vmx_mpx) + ++ (cpu_has_xsaves && cpu_has_vmx_xsaves); + } + + static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) + { + vmx_vmcs_enter(v); + +- if ( cpu_has_mpx ) ++ if ( cpu_has_mpx && cpu_has_vmx_mpx ) + { + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); + if ( ctxt->msr[ctxt->count].val ) +@@ -802,7 +803,7 @@ static void vmx_save_msr(struct vcpu *v, + + vmx_vmcs_exit(v); + +- if ( cpu_has_xsaves ) ++ if ( cpu_has_xsaves && cpu_has_vmx_xsaves ) + { + ctxt->msr[ctxt->count].val = v->arch.hvm_vcpu.msr_xss; + if ( ctxt->msr[ctxt->count].val ) +@@ -822,13 +823,15 @@ static int vmx_load_msr(struct vcpu *v, + switch ( ctxt->msr[i].index ) + { + case MSR_IA32_BNDCFGS: +- if ( cpu_has_mpx ) ++ if ( cpu_has_mpx && cpu_has_vmx_mpx && ++ is_canonical_address(ctxt->msr[i].val) && ++ !(ctxt->msr[i].val & IA32_BNDCFGS_RESERVED) ) + __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val); + else if ( ctxt->msr[i].val ) + err = -ENXIO; + break; + case MSR_IA32_XSS: +- if ( cpu_has_xsaves ) ++ if ( cpu_has_xsaves && cpu_has_vmx_xsaves ) + v->arch.hvm_vcpu.msr_xss = ctxt->msr[i].val; + else + err = -ENXIO; +@@ -2640,6 +2643,11 @@ static int vmx_msr_read_intercept(unsign + case MSR_IA32_DEBUGCTLMSR: + __vmread(GUEST_IA32_DEBUGCTL, msr_content); + break; ++ case MSR_IA32_BNDCFGS: ++ if ( !cpu_has_mpx || !cpu_has_vmx_mpx ) ++ goto gp_fault; ++ __vmread(GUEST_BNDCFGS, msr_content); ++ break; + case IA32_FEATURE_CONTROL_MSR: + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC: + if ( !nvmx_msr_read_intercept(msr, msr_content) ) +@@ -2866,6 +2874,13 @@ static int vmx_msr_write_intercept(unsig + + break; + } ++ case MSR_IA32_BNDCFGS: ++ if ( !cpu_has_mpx || !cpu_has_vmx_mpx || ++ !is_canonical_address(msr_content) || ++ (msr_content & IA32_BNDCFGS_RESERVED) ) ++ goto gp_fault; ++ __vmwrite(GUEST_BNDCFGS, msr_content); ++ break; + case IA32_FEATURE_CONTROL_MSR: + case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS: + if ( !nvmx_msr_write_intercept(msr, msr_content) ) +--- a/xen/include/asm-x86/hvm/vmx/vmcs.h ++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h +@@ -375,6 +375,9 @@ extern u64 vmx_ept_vpid_cap; + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS) + #define cpu_has_vmx_pml \ + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) ++#define cpu_has_vmx_mpx \ ++ ((vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) && \ ++ (vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS)) + #define cpu_has_vmx_xsaves \ + (vmx_secondary_exec_control & SECONDARY_EXEC_XSAVES) + #define cpu_has_vmx_tsc_scaling \ +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -56,7 +56,10 @@ + #define MSR_IA32_DS_AREA 0x00000600 + #define MSR_IA32_PERF_CAPABILITIES 0x00000345 + +-#define MSR_IA32_BNDCFGS 0x00000D90 ++#define MSR_IA32_BNDCFGS 0x00000d90 ++#define IA32_BNDCFGS_ENABLE 0x00000001 ++#define IA32_BNDCFGS_PRESERVE 0x00000002 ++#define IA32_BNDCFGS_RESERVED 0x00000ffc + + #define MSR_IA32_XSS 0x00000da0 + diff --git a/57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch b/57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch new file mode 100644 index 0000000..4eb32e7 --- /dev/null +++ b/57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch @@ -0,0 +1,33 @@ +References: bsc#995785 CVE-2016-7092 XSA-185 + +# Commit c844d637d92a75854ea5c8d4e5ca34302a9f623c +# Date 2016-09-08 14:14:53 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/32on64: don't allow recursive page tables from L3 + +L3 entries are special in PAE mode, and hence can't reasonably be used +for setting up recursive (and hence linear) page table mappings. Since +abuse is possible when the guest in fact gets run on 4-level page +tables, this needs to be excluded explicitly. + +This is XSA-185 / CVE-2016-7092. + +Reported-by: Jérémie Boutoille +Reported-by: "栾尚聪(好风)" +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1123,7 +1123,9 @@ get_page_from_l3e( + + rc = get_page_and_type_from_pagenr( + l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1); +- if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) ++ if ( unlikely(rc == -EINVAL) && ++ !is_pv_32bit_domain(d) && ++ get_l3_linear_pagetable(l3e, pfn, d) ) + rc = 0; + + return rc; diff --git a/57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch b/57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch new file mode 100644 index 0000000..0bcb0da --- /dev/null +++ b/57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch @@ -0,0 +1,67 @@ +References: bsc#995789 CVE-2016-7093 XSA-186 + +# Commit e9575f980df81aeb0e5b6139f485fd6f7bb7f5b6 +# Date 2016-09-08 14:15:53 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/emulate: Correct boundary interactions of emulated instructions + +This reverts most of c/s 0640ffb6 "x86emul: fix rIP handling". + +Experimentally, in long mode processors will execute an instruction stream +which crosses the 64bit -1 -> 0 virtual boundary, whether the instruction +boundary is aligned on the virtual boundary, or is misaligned. + +In compatibility mode, Intel processors will execute an instruction stream +which crosses the 32bit -1 -> 0 virtual boundary, while AMD processors raise a +segmentation fault. Xen's segmentation behaviour matches AMD. + +For 16bit code, hardware does not ever truncated %ip. %eip is always used and +behaves normally as a 32bit register, including in 16bit protected mode +segments, as well as in Real and Unreal mode. + +This is XSA-186 / CVE-2016-7093. + +Reported-by: Brian Marcotte +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -1538,10 +1538,6 @@ x86_emulate( + #endif + } + +- /* Truncate rIP to def_ad_bytes (2 or 4) if necessary. */ +- if ( def_ad_bytes < sizeof(_regs.eip) ) +- _regs.eip &= (1UL << (def_ad_bytes * 8)) - 1; +- + /* Prefix bytes. */ + for ( ; ; ) + { +@@ -3843,21 +3839,11 @@ x86_emulate( + + /* Commit shadow register state. */ + _regs.eflags &= ~EFLG_RF; +- switch ( __builtin_expect(def_ad_bytes, sizeof(_regs.eip)) ) +- { +- uint16_t ip; + +- case 2: +- ip = _regs.eip; +- _regs.eip = ctxt->regs->eip; +- *(uint16_t *)&_regs.eip = ip; +- break; +-#ifdef __x86_64__ +- case 4: +- _regs.rip = _regs._eip; +- break; +-#endif +- } ++ /* Zero the upper 32 bits of %rip if not in long mode. */ ++ if ( def_ad_bytes < sizeof(_regs.eip) ) ++ _regs.eip = (uint32_t)_regs.eip; ++ + *ctxt->regs = _regs; + + done: diff --git a/57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch b/57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch new file mode 100644 index 0000000..6f8e8fe --- /dev/null +++ b/57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch @@ -0,0 +1,47 @@ +References: bsc#995792 CVE-2016-7094 XSA-187 + +# Commit a9f3b3bad17d91e2067fc00d51b0302349570d08 +# Date 2016-09-08 14:16:26 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/shadow: Avoid overflowing sh_ctxt->seg_reg[] + +hvm_get_seg_reg() does not perform a range check on its input segment, calls +hvm_get_segment_register() and writes straight into sh_ctxt->seg_reg[]. + +x86_seg_none is outside the bounds of sh_ctxt->seg_reg[], and will hit a BUG() +in {vmx,svm}_get_segment_register(). + +HVM guests running with shadow paging can end up performing a virtual to +linear translation with x86_seg_none. This is used for addresses which are +already linear. However, none of this is a legitimate pagetable update, so +fail the emulation in such a case. + +This is XSA-187 / CVE-2016-7094. + +Reported-by: Andrew Cooper +Signed-off-by: Andrew Cooper +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -140,9 +140,18 @@ static int hvm_translate_linear_addr( + struct sh_emulate_ctxt *sh_ctxt, + unsigned long *paddr) + { +- struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt); ++ struct segment_register *reg; + int okay; + ++ /* ++ * Can arrive here with non-user segments. However, no such cirucmstance ++ * is part of a legitimate pagetable update, so fail the emulation. ++ */ ++ if ( !is_x86_user_segment(seg) ) ++ return X86EMUL_UNHANDLEABLE; ++ ++ reg = hvm_get_seg_reg(seg, sh_ctxt); ++ + okay = hvm_virtual_to_linear_addr( + seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr); + diff --git a/57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch b/57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch new file mode 100644 index 0000000..9ad283b --- /dev/null +++ b/57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch @@ -0,0 +1,32 @@ +References: bsc#995789 + +# Commit 7b5cee79dad24e7006059667b02bd7de685d8ee5 +# Date 2016-09-08 16:39:46 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +hvm/fep: Allow testing of instructions crossing the -1 -> 0 virtual boundary + +The Force Emulation Prefix is named to follow its PV counterpart for cpuid or +rdtsc, but isn't really an instruction prefix. It behaves as a break-out into +Xen, with the purpose of emulating the next instruction in the current state. + +It is important to be able to test legal situations which occur in real +hardware, including instruction which cross certain boundaries, and +instructions starting at 0. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -3905,6 +3905,10 @@ void hvm_ud_intercept(struct cpu_user_re + { + regs->eip += sizeof(sig); + regs->eflags &= ~X86_EFLAGS_RF; ++ ++ /* Zero the upper 32 bits of %rip if not in long mode. */ ++ if ( !(hvm_long_mode_enabled(cur) && cs.attr.fields.l) ) ++ regs->eip = regs->_eip; + } + } + diff --git a/57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch b/57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch new file mode 100644 index 0000000..03b4fec --- /dev/null +++ b/57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch @@ -0,0 +1,203 @@ +References: bsc#995792 + +# Commit 4fa0105d95be6e7145a1f6fd1036ccd43976228c +# Date 2016-09-08 16:39:46 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/segment: Bounds check accesses to emulation ctxt->seg_reg[] + +HVM HAP codepaths have space for all segment registers in the seg_reg[] +cache (with x86_seg_none still risking an array overrun), while the shadow +codepaths only have space for the user segments. + +Range check the input segment of *_get_seg_reg() against the size of the array +used to cache the results, to avoid overruns in the case that the callers +don't filter their input suitably. + +Subsume the is_x86_user_segment(seg) checks from the shadow code, which were +an incomplete attempt at range checking, and are now superceeded. Make +hvm_get_seg_reg() static, as it is not used outside of shadow/common.c + +No functional change, but far easier to reason that no overflow is possible. + +Reported-by: Andrew Cooper +Signed-off-by: Andrew Cooper +Acked-by: Tim Deegan +Acked-by: Jan Beulich + +# Commit 4c47c47938ea24c73d9459f9f0b6923513772b5d +# Date 2016-09-09 15:31:01 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +xen/x86: Fix build with clang following c/s 4fa0105 + +https://travis-ci.org/xen-project/xen/jobs/158494027#L2344 + +Clang complains: + + emulate.c:2016:14: error: comparison of unsigned enum expression < 0 + is always false [-Werror,-Wtautological-compare] + if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) ) + ~~~ ^ ~ + +Clang is wrong to raise a warning like this. The signed-ness of an enum is +implementation defined in C, and robust code must not assume the choices made +by the compiler. + +In this case, dropping the < 0 check creates a latent bug which would result +in an array underflow when compiled with a compiler which chooses a signed +enum. + +Work around the bug by explicitly pulling seg into an unsigned integer, and +only perform the upper bounds check. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Reviewed-by: George Dunlap + +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -534,6 +534,8 @@ static int hvmemul_virtual_to_linear( + *reps = min_t(unsigned long, *reps, max_reps); + + reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); ++ if ( IS_ERR(reg) ) ++ return -PTR_ERR(reg); + + if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) ) + { +@@ -1369,6 +1371,10 @@ static int hvmemul_read_segment( + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); ++ ++ if ( IS_ERR(sreg) ) ++ return -PTR_ERR(sreg); ++ + memcpy(reg, sreg, sizeof(struct segment_register)); + return X86EMUL_OKAY; + } +@@ -1382,6 +1388,9 @@ static int hvmemul_write_segment( + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt); + ++ if ( IS_ERR(sreg) ) ++ return -PTR_ERR(sreg); ++ + memcpy(sreg, reg, sizeof(struct segment_register)); + __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty); + +@@ -1934,13 +1943,22 @@ void hvm_emulate_writeback( + } + } + ++/* ++ * Callers which pass a known in-range x86_segment can rely on the return ++ * pointer being valid. Other callers must explicitly check for errors. ++ */ + struct segment_register *hvmemul_get_seg_reg( + enum x86_segment seg, + struct hvm_emulate_ctxt *hvmemul_ctxt) + { +- if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) ) +- hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]); +- return &hvmemul_ctxt->seg_reg[seg]; ++ unsigned int idx = seg; ++ ++ if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) ) ++ return ERR_PTR(-X86EMUL_UNHANDLEABLE); ++ ++ if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) ) ++ hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]); ++ return &hvmemul_ctxt->seg_reg[idx]; + } + + static const char *guest_x86_mode_to_str(int mode) +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -123,12 +123,22 @@ __initcall(shadow_audit_key_init); + /* x86 emulator support for the shadow code + */ + +-struct segment_register *hvm_get_seg_reg( ++/* ++ * Callers which pass a known in-range x86_segment can rely on the return ++ * pointer being valid. Other callers must explicitly check for errors. ++ */ ++static struct segment_register *hvm_get_seg_reg( + enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt) + { +- struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg]; +- if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) ) +- hvm_get_segment_register(current, seg, seg_reg); ++ unsigned int idx = seg; ++ struct segment_register *seg_reg; ++ ++ if ( idx >= ARRAY_SIZE(sh_ctxt->seg_reg) ) ++ return ERR_PTR(-X86EMUL_UNHANDLEABLE); ++ ++ seg_reg = &sh_ctxt->seg_reg[idx]; ++ if ( !__test_and_set_bit(idx, &sh_ctxt->valid_seg_regs) ) ++ hvm_get_segment_register(current, idx, seg_reg); + return seg_reg; + } + +@@ -143,14 +153,9 @@ static int hvm_translate_linear_addr( + struct segment_register *reg; + int okay; + +- /* +- * Can arrive here with non-user segments. However, no such cirucmstance +- * is part of a legitimate pagetable update, so fail the emulation. +- */ +- if ( !is_x86_user_segment(seg) ) +- return X86EMUL_UNHANDLEABLE; +- + reg = hvm_get_seg_reg(seg, sh_ctxt); ++ if ( IS_ERR(reg) ) ++ return -PTR_ERR(reg); + + okay = hvm_virtual_to_linear_addr( + seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr); +@@ -253,9 +258,6 @@ hvm_emulate_write(enum x86_segment seg, + unsigned long addr; + int rc; + +- if ( !is_x86_user_segment(seg) ) +- return X86EMUL_UNHANDLEABLE; +- + /* How many emulations could we save if we unshadowed on stack writes? */ + if ( seg == x86_seg_ss ) + perfc_incr(shadow_fault_emulate_stack); +@@ -283,7 +285,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg + unsigned long addr, old, new; + int rc; + +- if ( !is_x86_user_segment(seg) || bytes > sizeof(long) ) ++ if ( bytes > sizeof(long) ) + return X86EMUL_UNHANDLEABLE; + + rc = hvm_translate_linear_addr( +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -740,8 +740,6 @@ const struct x86_emulate_ops *shadow_ini + struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs); + void shadow_continue_emulation( + struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs); +-struct segment_register *hvm_get_seg_reg( +- enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt); + + #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) + /**************************************************************************/ +--- a/xen/include/asm-x86/hvm/emulate.h ++++ b/xen/include/asm-x86/hvm/emulate.h +@@ -13,6 +13,7 @@ + #define __ASM_X86_HVM_EMULATE_H__ + + #include ++#include + #include + #include + diff --git a/xen.changes b/xen.changes index 8e307cf..9cae873 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,29 @@ +------------------------------------------------------------------- +Mon Sep 12 08:44:11 MDT 2016 - carnold@suse.com + +- bsc#995785 - VUL-0: CVE-2016-7092: xen: x86: Disallow L3 + recursive pagetable for 32-bit PV guests (XSA-185) + 57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch +- bsc#995789 - VUL-0: CVE-2016-7093: xen: x86: Mishandling of + instruction pointer truncation during emulation (XSA-186) + 57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch + 57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch +- bsc#995792 - VUL-0: CVE-2016-7094: xen: x86 HVM: Overflow of + sh_ctxt->seg_reg[] (XSA-187) + 57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch + 57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch +- bsc#991934 - xen hypervisor crash in csched_acct + 57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch +- Upstream patches from Jan + 57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch + 57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch + 57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch + 57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch + 57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch + 57c82be2-x86-32on64-adjust-call-gate-emulation.patch + 57c96e2c-x86-correct-PT_NOTE-file-position.patch + 57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch + ------------------------------------------------------------------- Mon Sep 12 13:10:21 UTC 2016 - ohering@suse.de @@ -10,6 +36,13 @@ Mon Sep 5 11:39:21 UTC 2016 - ohering@suse.de - bnc#953518 - unplug also SCSI disks in qemu-xen-traditional for upstream unplug protocol +------------------------------------------------------------------- +Fri Sep 2 08:32:44 MDT 2016 - carnold@suse.com + +- bsc#989679 - [pvusb feature] USB device not found when + 'virsh detach-device guest usb.xml' + 57c93e52-fix-error-in-libxl_device_usbdev_list.patch + ------------------------------------------------------------------- Tue Aug 23 08:07:46 MDT 2016 - carnold@suse.com diff --git a/xen.spec b/xen.spec index 5356afa..83263f3 100644 --- a/xen.spec +++ b/xen.spec @@ -165,7 +165,7 @@ BuildRequires: xorg-x11-util-devel %endif %endif -Version: 4.7.0_11 +Version: 4.7.0_12 Release: 0 Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0 @@ -219,6 +219,21 @@ Patch13: 57a30261-x86-support-newer-Intel-CPU-models.patch Patch14: 57ac6316-don-t-restrict-DMA-heap-to-node-0.patch Patch15: 57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch Patch16: 57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch +Patch17: 57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch +Patch18: 57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch +Patch19: 57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch +Patch20: 57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch +Patch21: 57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch +Patch22: 57c82be2-x86-32on64-adjust-call-gate-emulation.patch +Patch23: 57c93e52-fix-error-in-libxl_device_usbdev_list.patch +Patch24: 57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch +Patch25: 57c96e2c-x86-correct-PT_NOTE-file-position.patch +Patch26: 57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch +Patch27: 57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch +Patch28: 57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch +Patch29: 57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch +Patch30: 57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch +Patch31: 57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch # Upstream qemu-traditional patches Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -557,6 +572,21 @@ Authors: %patch14 -p1 %patch15 -p1 %patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 +%patch26 -p1 +%patch27 -p1 +%patch28 -p1 +%patch29 -p1 +%patch30 -p1 +%patch31 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1