165 lines
5.9 KiB
Diff
165 lines
5.9 KiB
Diff
|
# Commit 08e7738ec3644350fbac0325085baac6b3c7cd11
|
||
|
# Date 2016-09-01 11:41:07 +0100
|
||
|
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
||
|
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
|
||
|
x86/levelling: Provide architectural OSXSAVE handling to masked native CPUID
|
||
|
|
||
|
Contrary to c/s b2507fe7 "x86/domctl: Update PV domain cpumasks when setting
|
||
|
cpuid policy", Intel CPUID masks are applied after fast forwarding hardware
|
||
|
state, rather than before. (All behaviour in this regard appears completely
|
||
|
undocumented by both Intel and AMD).
|
||
|
|
||
|
Therefore, a set bit in the MSR causes hardware to be fast-forwarded, while a
|
||
|
clear bit forces the guests view to 0, even if Xen's CR4.OSXSAVE is actually
|
||
|
set.
|
||
|
|
||
|
This allows Xen to provide an architectural view of a guest kernels
|
||
|
CR4.OSXSAVE setting to any native CPUID instruction issused by guest kernel or
|
||
|
userspace, even when masking is used.
|
||
|
|
||
|
The masking value defaults to 1 (if the guest has XSAVE available) to cause
|
||
|
fast-forwarding to occur for the HVM and idle vcpus.
|
||
|
|
||
|
When setting the MSRs, a PV guest kernel's choice of OXSAVE is taken into
|
||
|
account, and clobbered from the MSR if not set. This causes the
|
||
|
fast-forwarding of Xen's CR4 state not to happen.
|
||
|
|
||
|
As a side effect however, levelling potentially need updating on all PV CR4
|
||
|
changes.
|
||
|
|
||
|
Reported-by: Jan Beulich <JBeulich@suse.com>
|
||
|
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||
|
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||
|
|
||
|
# Commit 1461504ce3c414fc5dc717ce16f039d0742b455a
|
||
|
# Date 2016-09-02 08:12:29 +0200
|
||
|
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
||
|
# Committer Jan Beulich <jbeulich@suse.com>
|
||
|
x86/levelling: fix breakage on older Intel boxes from c/s 08e7738
|
||
|
|
||
|
cpufeat_mask() yields an unsigned integer constant. As a result, taking its
|
||
|
complement causes zero extention rather than sign extention.
|
||
|
|
||
|
The result is that, when a guest OS has OXSAVE disabled, all features in 1d
|
||
|
are hidden from native CPUID. Amongst other things, this causes the early
|
||
|
code in Linux to find no LAPIC, but for everything to appear fine later when
|
||
|
userspace is up and running.
|
||
|
|
||
|
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||
|
Tested-by: Jan Beulich <jbeulich@suse.com>
|
||
|
|
||
|
--- a/xen/arch/x86/cpu/amd.c
|
||
|
+++ b/xen/arch/x86/cpu/amd.c
|
||
|
@@ -211,6 +211,24 @@ static void amd_ctxt_switch_levelling(co
|
||
|
(nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
|
||
|
? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
|
||
|
|
||
|
+ if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
|
||
|
+ uint64_t val = masks->_1cd;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * OSXSAVE defaults to 1, which causes fast-forwarding of
|
||
|
+ * Xen's real setting. Clobber it if disabled by the guest
|
||
|
+ * kernel.
|
||
|
+ */
|
||
|
+ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
|
||
|
+ !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
|
||
|
+ val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32);
|
||
|
+
|
||
|
+ if (unlikely(these_masks->_1cd != val)) {
|
||
|
+ wrmsr_amd(MSR_K8_FEATURE_MASK, val);
|
||
|
+ these_masks->_1cd = val;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
#define LAZY(cap, msr, field) \
|
||
|
({ \
|
||
|
if (unlikely(these_masks->field != masks->field) && \
|
||
|
@@ -221,7 +239,6 @@ static void amd_ctxt_switch_levelling(co
|
||
|
} \
|
||
|
})
|
||
|
|
||
|
- LAZY(LCAP_1cd, MSR_K8_FEATURE_MASK, _1cd);
|
||
|
LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK, e1cd);
|
||
|
LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0);
|
||
|
LAZY(LCAP_6c, MSR_AMD_THRM_FEATURE_MASK, _6c);
|
||
|
--- a/xen/arch/x86/cpu/intel.c
|
||
|
+++ b/xen/arch/x86/cpu/intel.c
|
||
|
@@ -182,6 +182,24 @@ static void intel_ctxt_switch_levelling(
|
||
|
masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
|
||
|
? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
|
||
|
|
||
|
+ if (msr_basic) {
|
||
|
+ uint64_t val = masks->_1cd;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * OSXSAVE defaults to 1, which causes fast-forwarding of
|
||
|
+ * Xen's real setting. Clobber it if disabled by the guest
|
||
|
+ * kernel.
|
||
|
+ */
|
||
|
+ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
|
||
|
+ !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
|
||
|
+ val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE);
|
||
|
+
|
||
|
+ if (unlikely(these_masks->_1cd != val)) {
|
||
|
+ wrmsrl(msr_basic, val);
|
||
|
+ these_masks->_1cd = val;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
#define LAZY(msr, field) \
|
||
|
({ \
|
||
|
if (unlikely(these_masks->field != masks->field) && \
|
||
|
@@ -192,7 +210,6 @@ static void intel_ctxt_switch_levelling(
|
||
|
} \
|
||
|
})
|
||
|
|
||
|
- LAZY(msr_basic, _1cd);
|
||
|
LAZY(msr_ext, e1cd);
|
||
|
LAZY(msr_xsave, Da1);
|
||
|
|
||
|
@@ -218,6 +235,11 @@ static void __init noinline intel_init_l
|
||
|
ecx &= opt_cpuid_mask_ecx;
|
||
|
edx &= opt_cpuid_mask_edx;
|
||
|
|
||
|
+ /* Fast-forward bits - Must be set. */
|
||
|
+ if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
|
||
|
+ ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
|
||
|
+ edx |= cpufeat_mask(X86_FEATURE_APIC);
|
||
|
+
|
||
|
cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx;
|
||
|
}
|
||
|
|
||
|
--- a/xen/arch/x86/domctl.c
|
||
|
+++ b/xen/arch/x86/domctl.c
|
||
|
@@ -110,10 +110,18 @@ static void update_domain_cpuid_info(str
|
||
|
case X86_VENDOR_INTEL:
|
||
|
/*
|
||
|
* Intel masking MSRs are documented as AND masks.
|
||
|
- * Experimentally, they are applied before OSXSAVE and APIC
|
||
|
+ * Experimentally, they are applied after OSXSAVE and APIC
|
||
|
* are fast-forwarded from real hardware state.
|
||
|
*/
|
||
|
mask &= ((uint64_t)edx << 32) | ecx;
|
||
|
+
|
||
|
+ if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
|
||
|
+ ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
|
||
|
+ else
|
||
|
+ ecx = 0;
|
||
|
+ edx = cpufeat_mask(X86_FEATURE_APIC);
|
||
|
+
|
||
|
+ mask |= ((uint64_t)edx << 32) | ecx;
|
||
|
break;
|
||
|
|
||
|
case X86_VENDOR_AMD:
|
||
|
--- a/xen/arch/x86/traps.c
|
||
|
+++ b/xen/arch/x86/traps.c
|
||
|
@@ -2696,6 +2696,7 @@ static int emulate_privileged_op(struct
|
||
|
case 4: /* Write CR4 */
|
||
|
v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg);
|
||
|
write_cr4(pv_guest_cr4_to_real_cr4(v));
|
||
|
+ ctxt_switch_levelling(v);
|
||
|
break;
|
||
|
|
||
|
default:
|