630 lines
23 KiB
Diff
630 lines
23 KiB
Diff
|
References: bnc#833796
|
||
|
|
||
|
# Commit 4cc1344447a0458df5d222960f2adf1b65084fa8
|
||
|
# Date 2013-09-09 14:36:54 +0200
|
||
|
# Author Jan Beulich <jbeulich@suse.com>
|
||
|
# Committer Jan Beulich <jbeulich@suse.com>
|
||
|
x86/xsave: fix migration from xsave-capable to xsave-incapable host
|
||
|
|
||
|
With CPUID features suitably masked this is supposed to work, but was
|
||
|
completely broken (i.e. the case wasn't even considered when the
|
||
|
original xsave save/restore code was written).
|
||
|
|
||
|
First of all, xsave_enabled() wrongly returned the value of
|
||
|
cpu_has_xsave, i.e. not even taking into consideration attributes of
|
||
|
the vCPU in question. Instead this function ought to check whether the
|
||
|
guest ever enabled xsave support (by writing a [non-zero] value to
|
||
|
XCR0). As a result of this, a vCPU's xcr0 and xcr0_accum must no longer
|
||
|
be initialized to XSTATE_FP_SSE (since that's a valid value a guest
|
||
|
could write to XCR0), and the xsave/xrstor as well as the context
|
||
|
switch code need to suitably account for this (by always enforcing at
|
||
|
least this part of the state to be saved/loaded).
|
||
|
|
||
|
This involves undoing large parts of c/s 22945:13a7d1f7f62c ("x86: add
|
||
|
strictly sanity check for XSAVE/XRSTOR") - we need to cleanly
|
||
|
distinguish between hardware capabilities and vCPU used features.
|
||
|
|
||
|
Next both HVM and PV save code needed tweaking to not always save the
|
||
|
full state supported by the underlying hardware, but just the parts
|
||
|
that the guest actually used. Similarly the restore code should bail
|
||
|
not just on state being restored that the hardware cannot handle, but
|
||
|
also on inconsistent save state (inconsistent XCR0 settings or size of
|
||
|
saved state not in line with XCR0).
|
||
|
|
||
|
And finally the PV extended context get/set code needs to use slightly
|
||
|
different logic than the HVM one, as here we can't just key off of
|
||
|
xsave_enabled() (i.e. avoid doing anything if a guest doesn't use
|
||
|
xsave) because the tools use this function to determine host
|
||
|
capabilities as well as read/write vCPU state. The set operation in
|
||
|
particular needs to be capable of cleanly dealing with input that
|
||
|
consists of only the xcr0 and xcr0_accum values (if they're both zero
|
||
|
then no further data is required).
|
||
|
|
||
|
While for things to work correctly both sides (saving _and_ restoring
|
||
|
host) need to run with the fixed code, afaict no breakage should occur
|
||
|
if either side isn't up to date (other than the breakage that this
|
||
|
patch attempts to fix).
|
||
|
|
||
|
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||
|
Reviewed-by: Yang Zhang <yang.z.zhang@intel.com>
|
||
|
Acked-by: Keir Fraser <keir@xen.org>
|
||
|
|
||
|
--- a/xen/arch/x86/domain.c
|
||
|
+++ b/xen/arch/x86/domain.c
|
||
|
@@ -618,7 +618,7 @@ unsigned long pv_guest_cr4_fixup(const s
|
||
|
hv_cr4_mask &= ~X86_CR4_DE;
|
||
|
if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) )
|
||
|
hv_cr4_mask &= ~X86_CR4_FSGSBASE;
|
||
|
- if ( xsave_enabled(v) )
|
||
|
+ if ( cpu_has_xsave )
|
||
|
hv_cr4_mask &= ~X86_CR4_OSXSAVE;
|
||
|
|
||
|
if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
|
||
|
@@ -1347,9 +1347,13 @@ static void __context_switch(void)
|
||
|
if ( !is_idle_vcpu(n) )
|
||
|
{
|
||
|
memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
|
||
|
- if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() &&
|
||
|
- !set_xcr0(n->arch.xcr0) )
|
||
|
- BUG();
|
||
|
+ if ( cpu_has_xsave )
|
||
|
+ {
|
||
|
+ u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE;
|
||
|
+
|
||
|
+ if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
|
||
|
+ BUG();
|
||
|
+ }
|
||
|
vcpu_restore_fpu_eager(n);
|
||
|
n->arch.ctxt_switch_to(n);
|
||
|
}
|
||
|
--- a/xen/arch/x86/domctl.c
|
||
|
+++ b/xen/arch/x86/domctl.c
|
||
|
@@ -1047,11 +1047,8 @@ long arch_do_domctl(
|
||
|
struct xen_domctl_vcpuextstate *evc;
|
||
|
struct vcpu *v;
|
||
|
uint32_t offset = 0;
|
||
|
- uint64_t _xfeature_mask = 0;
|
||
|
- uint64_t _xcr0, _xcr0_accum;
|
||
|
- void *receive_buf = NULL, *_xsave_area;
|
||
|
|
||
|
-#define PV_XSAVE_SIZE (2 * sizeof(uint64_t) + xsave_cntxt_size)
|
||
|
+#define PV_XSAVE_SIZE(xcr0) (2 * sizeof(uint64_t) + xstate_ctxt_size(xcr0))
|
||
|
|
||
|
evc = &domctl->u.vcpuextstate;
|
||
|
|
||
|
@@ -1062,15 +1059,16 @@ long arch_do_domctl(
|
||
|
|
||
|
if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
|
||
|
{
|
||
|
+ unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
|
||
|
+
|
||
|
if ( !evc->size && !evc->xfeature_mask )
|
||
|
{
|
||
|
evc->xfeature_mask = xfeature_mask;
|
||
|
- evc->size = PV_XSAVE_SIZE;
|
||
|
+ evc->size = size;
|
||
|
ret = 0;
|
||
|
goto vcpuextstate_out;
|
||
|
}
|
||
|
- if ( evc->size != PV_XSAVE_SIZE ||
|
||
|
- evc->xfeature_mask != xfeature_mask )
|
||
|
+ if ( evc->size != size || evc->xfeature_mask != xfeature_mask )
|
||
|
{
|
||
|
ret = -EINVAL;
|
||
|
goto vcpuextstate_out;
|
||
|
@@ -1093,7 +1091,7 @@ long arch_do_domctl(
|
||
|
offset += sizeof(v->arch.xcr0_accum);
|
||
|
if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
|
||
|
offset, (void *)v->arch.xsave_area,
|
||
|
- xsave_cntxt_size) )
|
||
|
+ size - 2 * sizeof(uint64_t)) )
|
||
|
{
|
||
|
ret = -EFAULT;
|
||
|
goto vcpuextstate_out;
|
||
|
@@ -1101,13 +1099,14 @@ long arch_do_domctl(
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
- ret = -EINVAL;
|
||
|
+ void *receive_buf;
|
||
|
+ uint64_t _xcr0, _xcr0_accum;
|
||
|
+ const struct xsave_struct *_xsave_area;
|
||
|
|
||
|
- _xfeature_mask = evc->xfeature_mask;
|
||
|
- /* xsave context must be restored on compatible target CPUs */
|
||
|
- if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask )
|
||
|
- goto vcpuextstate_out;
|
||
|
- if ( evc->size > PV_XSAVE_SIZE || evc->size < 2 * sizeof(uint64_t) )
|
||
|
+ ret = -EINVAL;
|
||
|
+ if ( evc->size < 2 * sizeof(uint64_t) ||
|
||
|
+ evc->size > 2 * sizeof(uint64_t) +
|
||
|
+ xstate_ctxt_size(xfeature_mask) )
|
||
|
goto vcpuextstate_out;
|
||
|
|
||
|
receive_buf = xmalloc_bytes(evc->size);
|
||
|
@@ -1128,20 +1127,30 @@ long arch_do_domctl(
|
||
|
_xcr0_accum = *(uint64_t *)(receive_buf + sizeof(uint64_t));
|
||
|
_xsave_area = receive_buf + 2 * sizeof(uint64_t);
|
||
|
|
||
|
- if ( !(_xcr0 & XSTATE_FP) || _xcr0 & ~xfeature_mask )
|
||
|
+ if ( _xcr0_accum )
|
||
|
{
|
||
|
- xfree(receive_buf);
|
||
|
- goto vcpuextstate_out;
|
||
|
+ if ( evc->size >= 2 * sizeof(uint64_t) + XSTATE_AREA_MIN_SIZE )
|
||
|
+ ret = validate_xstate(_xcr0, _xcr0_accum,
|
||
|
+ _xsave_area->xsave_hdr.xstate_bv,
|
||
|
+ evc->xfeature_mask);
|
||
|
}
|
||
|
- if ( (_xcr0 & _xcr0_accum) != _xcr0 )
|
||
|
+ else if ( !_xcr0 )
|
||
|
+ ret = 0;
|
||
|
+ if ( ret )
|
||
|
{
|
||
|
xfree(receive_buf);
|
||
|
goto vcpuextstate_out;
|
||
|
}
|
||
|
|
||
|
- v->arch.xcr0 = _xcr0;
|
||
|
- v->arch.xcr0_accum = _xcr0_accum;
|
||
|
- memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 * sizeof(uint64_t) );
|
||
|
+ if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) )
|
||
|
+ {
|
||
|
+ v->arch.xcr0 = _xcr0;
|
||
|
+ v->arch.xcr0_accum = _xcr0_accum;
|
||
|
+ memcpy(v->arch.xsave_area, _xsave_area,
|
||
|
+ evc->size - 2 * sizeof(uint64_t));
|
||
|
+ }
|
||
|
+ else
|
||
|
+ ret = -EINVAL;
|
||
|
|
||
|
xfree(receive_buf);
|
||
|
}
|
||
|
--- a/xen/arch/x86/hvm/hvm.c
|
||
|
+++ b/xen/arch/x86/hvm/hvm.c
|
||
|
@@ -906,14 +906,12 @@ static int hvm_load_cpu_ctxt(struct doma
|
||
|
hvm_set_segment_register(v, x86_seg_ldtr, &seg);
|
||
|
|
||
|
/* In case xsave-absent save file is restored on a xsave-capable host */
|
||
|
- if ( xsave_enabled(v) )
|
||
|
+ if ( cpu_has_xsave && !xsave_enabled(v) )
|
||
|
{
|
||
|
struct xsave_struct *xsave_area = v->arch.xsave_area;
|
||
|
|
||
|
memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
|
||
|
xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
|
||
|
- v->arch.xcr0_accum = XSTATE_FP_SSE;
|
||
|
- v->arch.xcr0 = XSTATE_FP_SSE;
|
||
|
}
|
||
|
else
|
||
|
memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
|
||
|
@@ -957,7 +955,9 @@ static int hvm_load_cpu_ctxt(struct doma
|
||
|
HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
|
||
|
1, HVMSR_PER_VCPU);
|
||
|
|
||
|
-#define HVM_CPU_XSAVE_SIZE (3 * sizeof(uint64_t) + xsave_cntxt_size)
|
||
|
+#define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
|
||
|
+ save_area) + \
|
||
|
+ xstate_ctxt_size(xcr0))
|
||
|
|
||
|
static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
|
||
|
{
|
||
|
@@ -969,20 +969,20 @@ static int hvm_save_cpu_xsave_states(str
|
||
|
|
||
|
for_each_vcpu ( d, v )
|
||
|
{
|
||
|
+ unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum);
|
||
|
+
|
||
|
if ( !xsave_enabled(v) )
|
||
|
continue;
|
||
|
- if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, HVM_CPU_XSAVE_SIZE) )
|
||
|
+ if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size) )
|
||
|
return 1;
|
||
|
ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
|
||
|
- h->cur += HVM_CPU_XSAVE_SIZE;
|
||
|
- memset(ctxt, 0, HVM_CPU_XSAVE_SIZE);
|
||
|
+ h->cur += size;
|
||
|
|
||
|
ctxt->xfeature_mask = xfeature_mask;
|
||
|
ctxt->xcr0 = v->arch.xcr0;
|
||
|
ctxt->xcr0_accum = v->arch.xcr0_accum;
|
||
|
- if ( v->fpu_initialised )
|
||
|
- memcpy(&ctxt->save_area,
|
||
|
- v->arch.xsave_area, xsave_cntxt_size);
|
||
|
+ memcpy(&ctxt->save_area, v->arch.xsave_area,
|
||
|
+ size - offsetof(struct hvm_hw_cpu_xsave, save_area));
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
@@ -990,11 +990,11 @@ static int hvm_save_cpu_xsave_states(str
|
||
|
|
||
|
static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
|
||
|
{
|
||
|
- int vcpuid;
|
||
|
+ unsigned int vcpuid, size;
|
||
|
+ int err;
|
||
|
struct vcpu *v;
|
||
|
struct hvm_hw_cpu_xsave *ctxt;
|
||
|
struct hvm_save_descriptor *desc;
|
||
|
- uint64_t _xfeature_mask;
|
||
|
|
||
|
/* Which vcpu is this? */
|
||
|
vcpuid = hvm_load_instance(h);
|
||
|
@@ -1006,47 +1006,74 @@ static int hvm_load_cpu_xsave_states(str
|
||
|
}
|
||
|
|
||
|
/* Fails since we can't restore an img saved on xsave-capable host. */
|
||
|
- if ( !xsave_enabled(v) )
|
||
|
- return -EINVAL;
|
||
|
+ if ( !cpu_has_xsave )
|
||
|
+ return -EOPNOTSUPP;
|
||
|
|
||
|
/* Customized checking for entry since our entry is of variable length */
|
||
|
desc = (struct hvm_save_descriptor *)&h->data[h->cur];
|
||
|
if ( sizeof (*desc) > h->size - h->cur)
|
||
|
{
|
||
|
printk(XENLOG_G_WARNING
|
||
|
- "HVM%d restore: not enough data left to read descriptor"
|
||
|
- "for type %u\n", d->domain_id, CPU_XSAVE_CODE);
|
||
|
- return -1;
|
||
|
+ "HVM%d.%d restore: not enough data left to read xsave descriptor\n",
|
||
|
+ d->domain_id, vcpuid);
|
||
|
+ return -ENODATA;
|
||
|
}
|
||
|
if ( desc->length + sizeof (*desc) > h->size - h->cur)
|
||
|
{
|
||
|
printk(XENLOG_G_WARNING
|
||
|
- "HVM%d restore: not enough data left to read %u bytes "
|
||
|
- "for type %u\n", d->domain_id, desc->length, CPU_XSAVE_CODE);
|
||
|
- return -1;
|
||
|
+ "HVM%d.%d restore: not enough data left to read %u xsave bytes\n",
|
||
|
+ d->domain_id, vcpuid, desc->length);
|
||
|
+ return -ENODATA;
|
||
|
+ }
|
||
|
+ if ( desc->length < offsetof(struct hvm_hw_cpu_xsave, save_area) +
|
||
|
+ XSTATE_AREA_MIN_SIZE )
|
||
|
+ {
|
||
|
+ printk(XENLOG_G_WARNING
|
||
|
+ "HVM%d.%d restore mismatch: xsave length %u < %zu\n",
|
||
|
+ d->domain_id, vcpuid, desc->length,
|
||
|
+ offsetof(struct hvm_hw_cpu_xsave,
|
||
|
+ save_area) + XSTATE_AREA_MIN_SIZE);
|
||
|
+ return -EINVAL;
|
||
|
}
|
||
|
- if ( CPU_XSAVE_CODE != desc->typecode || (desc->length > HVM_CPU_XSAVE_SIZE) )
|
||
|
+ size = HVM_CPU_XSAVE_SIZE(xfeature_mask);
|
||
|
+ if ( desc->length > size )
|
||
|
{
|
||
|
printk(XENLOG_G_WARNING
|
||
|
- "HVM%d restore mismatch: expected type %u with max length %u, "
|
||
|
- "saw type %u length %u\n", d->domain_id, CPU_XSAVE_CODE,
|
||
|
- (unsigned int)HVM_CPU_XSAVE_SIZE,
|
||
|
- desc->typecode, desc->length);
|
||
|
- return -1;
|
||
|
+ "HVM%d.%d restore mismatch: xsave length %u > %u\n",
|
||
|
+ d->domain_id, vcpuid, desc->length, size);
|
||
|
+ return -EOPNOTSUPP;
|
||
|
}
|
||
|
h->cur += sizeof (*desc);
|
||
|
- /* Checking finished */
|
||
|
|
||
|
ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
|
||
|
h->cur += desc->length;
|
||
|
|
||
|
- _xfeature_mask = ctxt->xfeature_mask;
|
||
|
- if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask )
|
||
|
- return -EINVAL;
|
||
|
+ err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum,
|
||
|
+ ctxt->save_area.xsave_hdr.xstate_bv,
|
||
|
+ ctxt->xfeature_mask);
|
||
|
+ if ( err )
|
||
|
+ {
|
||
|
+ printk(XENLOG_G_WARNING
|
||
|
+ "HVM%d.%d restore: inconsistent xsave state (feat=%#"PRIx64
|
||
|
+ " accum=%#"PRIx64" xcr0=%#"PRIx64" bv=%#"PRIx64" err=%d)\n",
|
||
|
+ d->domain_id, vcpuid, ctxt->xfeature_mask, ctxt->xcr0_accum,
|
||
|
+ ctxt->xcr0, ctxt->save_area.xsave_hdr.xstate_bv, err);
|
||
|
+ return err;
|
||
|
+ }
|
||
|
+ size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum);
|
||
|
+ if ( desc->length > size )
|
||
|
+ {
|
||
|
+ printk(XENLOG_G_WARNING
|
||
|
+ "HVM%d.%d restore mismatch: xsave length %u > %u\n",
|
||
|
+ d->domain_id, vcpuid, desc->length, size);
|
||
|
+ return -EOPNOTSUPP;
|
||
|
+ }
|
||
|
+ /* Checking finished */
|
||
|
|
||
|
v->arch.xcr0 = ctxt->xcr0;
|
||
|
v->arch.xcr0_accum = ctxt->xcr0_accum;
|
||
|
- memcpy(v->arch.xsave_area, &ctxt->save_area, xsave_cntxt_size);
|
||
|
+ memcpy(v->arch.xsave_area, &ctxt->save_area,
|
||
|
+ desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area));
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
@@ -1060,7 +1087,8 @@ static int __init __hvm_register_CPU_XSA
|
||
|
"CPU_XSAVE",
|
||
|
hvm_save_cpu_xsave_states,
|
||
|
hvm_load_cpu_xsave_states,
|
||
|
- HVM_CPU_XSAVE_SIZE + sizeof (struct hvm_save_descriptor),
|
||
|
+ HVM_CPU_XSAVE_SIZE(xfeature_mask) +
|
||
|
+ sizeof(struct hvm_save_descriptor),
|
||
|
HVMSR_PER_VCPU);
|
||
|
return 0;
|
||
|
}
|
||
|
@@ -2767,7 +2795,7 @@ void hvm_cpuid(unsigned int input, unsig
|
||
|
__clear_bit(X86_FEATURE_APIC & 31, edx);
|
||
|
|
||
|
/* Fix up OSXSAVE. */
|
||
|
- if ( xsave_enabled(v) )
|
||
|
+ if ( cpu_has_xsave )
|
||
|
*ecx |= (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ?
|
||
|
cpufeat_mask(X86_FEATURE_OSXSAVE) : 0;
|
||
|
|
||
|
--- a/xen/arch/x86/hvm/vmx/vmcs.c
|
||
|
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
|
||
|
@@ -947,8 +947,7 @@ static int construct_vmcs(struct vcpu *v
|
||
|
/* Host control registers. */
|
||
|
v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
|
||
|
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
|
||
|
- __vmwrite(HOST_CR4,
|
||
|
- mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
|
||
|
+ __vmwrite(HOST_CR4, mmu_cr4_features);
|
||
|
|
||
|
/* Host CS:RIP. */
|
||
|
__vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
|
||
|
--- a/xen/arch/x86/i387.c
|
||
|
+++ b/xen/arch/x86/i387.c
|
||
|
@@ -38,14 +38,15 @@ static inline void fpu_xrstor(struct vcp
|
||
|
{
|
||
|
bool_t ok;
|
||
|
|
||
|
+ ASSERT(v->arch.xsave_area);
|
||
|
/*
|
||
|
* XCR0 normally represents what guest OS set. In case of Xen itself,
|
||
|
- * we set all supported feature mask before doing save/restore.
|
||
|
+ * we set the accumulated feature mask before doing save/restore.
|
||
|
*/
|
||
|
- ok = set_xcr0(v->arch.xcr0_accum);
|
||
|
+ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
|
||
|
ASSERT(ok);
|
||
|
xrstor(v, mask);
|
||
|
- ok = set_xcr0(v->arch.xcr0);
|
||
|
+ ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
|
||
|
ASSERT(ok);
|
||
|
}
|
||
|
|
||
|
@@ -124,13 +125,15 @@ static inline void fpu_xsave(struct vcpu
|
||
|
{
|
||
|
bool_t ok;
|
||
|
|
||
|
- /* XCR0 normally represents what guest OS set. In case of Xen itself,
|
||
|
- * we set all accumulated feature mask before doing save/restore.
|
||
|
+ ASSERT(v->arch.xsave_area);
|
||
|
+ /*
|
||
|
+ * XCR0 normally represents what guest OS set. In case of Xen itself,
|
||
|
+ * we set the accumulated feature mask before doing save/restore.
|
||
|
*/
|
||
|
- ok = set_xcr0(v->arch.xcr0_accum);
|
||
|
+ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
|
||
|
ASSERT(ok);
|
||
|
xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY);
|
||
|
- ok = set_xcr0(v->arch.xcr0);
|
||
|
+ ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
|
||
|
ASSERT(ok);
|
||
|
}
|
||
|
|
||
|
@@ -238,7 +241,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *
|
||
|
if ( v->fpu_dirtied )
|
||
|
return;
|
||
|
|
||
|
- if ( xsave_enabled(v) )
|
||
|
+ if ( cpu_has_xsave )
|
||
|
fpu_xrstor(v, XSTATE_LAZY);
|
||
|
else if ( v->fpu_initialised )
|
||
|
{
|
||
|
@@ -268,7 +271,7 @@ void vcpu_save_fpu(struct vcpu *v)
|
||
|
/* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
|
||
|
clts();
|
||
|
|
||
|
- if ( xsave_enabled(v) )
|
||
|
+ if ( cpu_has_xsave )
|
||
|
fpu_xsave(v);
|
||
|
else if ( cpu_has_fxsr )
|
||
|
fpu_fxsave(v);
|
||
|
--- a/xen/arch/x86/traps.c
|
||
|
+++ b/xen/arch/x86/traps.c
|
||
|
@@ -816,7 +816,7 @@ static void pv_cpuid(struct cpu_user_reg
|
||
|
__clear_bit(X86_FEATURE_PDCM % 32, &c);
|
||
|
__clear_bit(X86_FEATURE_PCID % 32, &c);
|
||
|
__clear_bit(X86_FEATURE_DCA % 32, &c);
|
||
|
- if ( !xsave_enabled(current) )
|
||
|
+ if ( !cpu_has_xsave )
|
||
|
{
|
||
|
__clear_bit(X86_FEATURE_XSAVE % 32, &c);
|
||
|
__clear_bit(X86_FEATURE_AVX % 32, &c);
|
||
|
@@ -841,7 +841,7 @@ static void pv_cpuid(struct cpu_user_reg
|
||
|
break;
|
||
|
|
||
|
case 0x0000000d: /* XSAVE */
|
||
|
- if ( !xsave_enabled(current) )
|
||
|
+ if ( !cpu_has_xsave )
|
||
|
goto unsupported;
|
||
|
break;
|
||
|
|
||
|
--- a/xen/arch/x86/xstate.c
|
||
|
+++ b/xen/arch/x86/xstate.c
|
||
|
@@ -21,7 +21,7 @@ bool_t __read_mostly cpu_has_xsaveopt;
|
||
|
* the supported and enabled features on the processor, including the
|
||
|
* XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
|
||
|
*/
|
||
|
-u32 xsave_cntxt_size;
|
||
|
+static u32 __read_mostly xsave_cntxt_size;
|
||
|
|
||
|
/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
|
||
|
u64 xfeature_mask;
|
||
|
@@ -206,13 +206,13 @@ void xrstor(struct vcpu *v, uint64_t mas
|
||
|
|
||
|
bool_t xsave_enabled(const struct vcpu *v)
|
||
|
{
|
||
|
- if ( cpu_has_xsave )
|
||
|
- {
|
||
|
- ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
|
||
|
- ASSERT(v->arch.xsave_area);
|
||
|
- }
|
||
|
+ if ( !cpu_has_xsave )
|
||
|
+ return 0;
|
||
|
|
||
|
- return cpu_has_xsave;
|
||
|
+ ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
|
||
|
+ ASSERT(v->arch.xsave_area);
|
||
|
+
|
||
|
+ return !!v->arch.xcr0_accum;
|
||
|
}
|
||
|
|
||
|
int xstate_alloc_save_area(struct vcpu *v)
|
||
|
@@ -234,8 +234,8 @@ int xstate_alloc_save_area(struct vcpu *
|
||
|
save_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
|
||
|
|
||
|
v->arch.xsave_area = save_area;
|
||
|
- v->arch.xcr0 = XSTATE_FP_SSE;
|
||
|
- v->arch.xcr0_accum = XSTATE_FP_SSE;
|
||
|
+ v->arch.xcr0 = 0;
|
||
|
+ v->arch.xcr0_accum = 0;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
@@ -253,7 +253,11 @@ void xstate_init(bool_t bsp)
|
||
|
u64 feature_mask;
|
||
|
|
||
|
if ( boot_cpu_data.cpuid_level < XSTATE_CPUID )
|
||
|
+ {
|
||
|
+ BUG_ON(!bsp);
|
||
|
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||
|
return;
|
||
|
+ }
|
||
|
|
||
|
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||
|
|
||
|
@@ -273,7 +277,6 @@ void xstate_init(bool_t bsp)
|
||
|
set_in_cr4(X86_CR4_OSXSAVE);
|
||
|
if ( !set_xcr0(feature_mask) )
|
||
|
BUG();
|
||
|
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||
|
|
||
|
if ( bsp )
|
||
|
{
|
||
|
@@ -282,14 +285,14 @@ void xstate_init(bool_t bsp)
|
||
|
* xsave_cntxt_size is the max size required by enabled features.
|
||
|
* We know FP/SSE and YMM about eax, and nothing about edx at present.
|
||
|
*/
|
||
|
- xsave_cntxt_size = ebx;
|
||
|
+ xsave_cntxt_size = xstate_ctxt_size(feature_mask);
|
||
|
printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
|
||
|
__func__, xsave_cntxt_size, xfeature_mask);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
BUG_ON(xfeature_mask != feature_mask);
|
||
|
- BUG_ON(xsave_cntxt_size != ebx);
|
||
|
+ BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask));
|
||
|
}
|
||
|
|
||
|
/* Check XSAVEOPT feature. */
|
||
|
@@ -300,6 +303,42 @@ void xstate_init(bool_t bsp)
|
||
|
BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT));
|
||
|
}
|
||
|
|
||
|
+unsigned int xstate_ctxt_size(u64 xcr0)
|
||
|
+{
|
||
|
+ u32 ebx = 0;
|
||
|
+
|
||
|
+ if ( xcr0 )
|
||
|
+ {
|
||
|
+ u64 act_xcr0 = get_xcr0();
|
||
|
+ u32 eax, ecx, edx;
|
||
|
+ bool_t ok = set_xcr0(xcr0);
|
||
|
+
|
||
|
+ ASSERT(ok);
|
||
|
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||
|
+ ASSERT(ebx <= ecx);
|
||
|
+ ok = set_xcr0(act_xcr0);
|
||
|
+ ASSERT(ok);
|
||
|
+ }
|
||
|
+
|
||
|
+ return ebx;
|
||
|
+}
|
||
|
+
|
||
|
+int validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv, u64 xfeat_mask)
|
||
|
+{
|
||
|
+ if ( (xcr0_accum & ~xfeat_mask) ||
|
||
|
+ (xstate_bv & ~xcr0_accum) ||
|
||
|
+ (xcr0 & ~xcr0_accum) ||
|
||
|
+ !(xcr0 & XSTATE_FP) ||
|
||
|
+ ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) ||
|
||
|
+ ((xcr0_accum & XSTATE_YMM) && !(xcr0_accum & XSTATE_SSE)) )
|
||
|
+ return -EINVAL;
|
||
|
+
|
||
|
+ if ( xcr0_accum & ~xfeature_mask )
|
||
|
+ return -EOPNOTSUPP;
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
int handle_xsetbv(u32 index, u64 new_bv)
|
||
|
{
|
||
|
struct vcpu *curr = current;
|
||
|
--- a/xen/include/asm-x86/domain.h
|
||
|
+++ b/xen/include/asm-x86/domain.h
|
||
|
@@ -456,9 +456,9 @@ unsigned long pv_guest_cr4_fixup(const s
|
||
|
#define pv_guest_cr4_to_real_cr4(v) \
|
||
|
(((v)->arch.pv_vcpu.ctrlreg[4] \
|
||
|
| (mmu_cr4_features \
|
||
|
- & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP)) \
|
||
|
- | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
|
||
|
- | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0)) \
|
||
|
+ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
|
||
|
+ X86_CR4_OSXSAVE)) \
|
||
|
+ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
|
||
|
& ~X86_CR4_DE)
|
||
|
#define real_cr4_to_pv_guest_cr4(c) \
|
||
|
((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \
|
||
|
--- a/xen/include/asm-x86/hvm/hvm.h
|
||
|
+++ b/xen/include/asm-x86/hvm/hvm.h
|
||
|
@@ -368,7 +368,7 @@ static inline int hvm_event_pending(stru
|
||
|
((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
|
||
|
? X86_CR4_VMXE : 0) | \
|
||
|
(cpu_has_pcid ? X86_CR4_PCIDE : 0) | \
|
||
|
- (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0))))
|
||
|
+ (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))))
|
||
|
|
||
|
/* These exceptions must always be intercepted. */
|
||
|
#define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op))
|
||
|
--- a/xen/include/asm-x86/xstate.h
|
||
|
+++ b/xen/include/asm-x86/xstate.h
|
||
|
@@ -33,7 +33,6 @@
|
||
|
#define XSTATE_NONLAZY (XSTATE_LWP)
|
||
|
#define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY)
|
||
|
|
||
|
-extern unsigned int xsave_cntxt_size;
|
||
|
extern u64 xfeature_mask;
|
||
|
|
||
|
/* extended state save area */
|
||
|
@@ -76,11 +75,14 @@ uint64_t get_xcr0(void);
|
||
|
void xsave(struct vcpu *v, uint64_t mask);
|
||
|
void xrstor(struct vcpu *v, uint64_t mask);
|
||
|
bool_t xsave_enabled(const struct vcpu *v);
|
||
|
+int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv,
|
||
|
+ u64 xfeat_mask);
|
||
|
int __must_check handle_xsetbv(u32 index, u64 new_bv);
|
||
|
|
||
|
/* extended state init and cleanup functions */
|
||
|
void xstate_free_save_area(struct vcpu *v);
|
||
|
int xstate_alloc_save_area(struct vcpu *v);
|
||
|
void xstate_init(bool_t bsp);
|
||
|
+unsigned int xstate_ctxt_size(u64 xcr0);
|
||
|
|
||
|
#endif /* __ASM_XSTATE_H */
|