# Commit ef437690af8b75e6758dce77af75a22b63982883 # Date 2014-03-28 13:33:34 +0100 # Author Jan Beulich # Committer Jan Beulich x86/HVM: correct CPUID leaf 80000008 handling CPUID[80000008].EAX[23:16] have been given the meaning of the guest physical address restriction (in case it needs to be smaller than the host's), hence we need to mirror that into vCPUID[80000008].EAX[7:0]. Enforce a lower limit at the same time, as well as a fixed value for the virtual address bits, and zero for the guest physical address ones. In order for the vMTRR code to see these overrides we need to make it call hvm_cpuid() instead of domain_cpuid(), which in turn requires special casing (and relaxing) the controlling domain. This additionally should hide an ordering problem in the tools: Both xend and xl appear to be restoring a guest from its image before setting up the CPUID policy in the hypervisor, resulting in domain_cpuid() returning all zeros and hence the check in mtrr_var_range_msr_set() failing if the guest previously had more than the minimum 36 physical address bits. Signed-off-by: Jan Beulich Reviewed-by: Tim Deegan --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2885,6 +2885,8 @@ void hvm_cpuid(unsigned int input, unsig switch ( input ) { + unsigned int sub_leaf, _eax, _ebx, _ecx, _edx; + case 0x1: /* Fix up VLAPIC details. */ *ebx &= 0x00FFFFFFu; @@ -2918,8 +2920,6 @@ void hvm_cpuid(unsigned int input, unsig *edx = v->vcpu_id * 2; break; case 0xd: - { - unsigned int sub_leaf, _eax, _ebx, _ecx, _edx; /* EBX value of main leaf 0 depends on enabled xsave features */ if ( count == 0 && v->arch.xcr0 ) { @@ -2936,7 +2936,7 @@ void hvm_cpuid(unsigned int input, unsig } } break; - } + case 0x80000001: /* We expose RDTSCP feature to guest only when tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */ @@ -2950,6 +2950,23 @@ void hvm_cpuid(unsigned int input, unsig if ( !(hvm_pae_enabled(v) || hvm_long_mode_enabled(v)) ) *edx &= ~cpufeat_mask(X86_FEATURE_PSE36); break; + + case 0x80000008: + count = cpuid_eax(0x80000008); + count = (count >> 16) & 0xff ?: count & 0xff; + if ( (*eax & 0xff) > count ) + *eax = (*eax & ~0xff) | count; + + hvm_cpuid(1, NULL, NULL, NULL, &_edx); + count = _edx & (cpufeat_mask(X86_FEATURE_PAE) | + cpufeat_mask(X86_FEATURE_PSE36)) ? 36 : 32; + if ( (*eax & 0xff) < count ) + *eax = (*eax & ~0xff) | count; + + hvm_cpuid(0x80000001, NULL, NULL, NULL, &_edx); + *eax = (*eax & ~0xffff00) | (_edx & cpufeat_mask(X86_FEATURE_LM) + ? 0x3000 : 0x2000); + break; } } --- a/xen/arch/x86/hvm/mtrr.c +++ b/xen/arch/x86/hvm/mtrr.c @@ -145,7 +145,7 @@ bool_t is_var_mtrr_overlapped(struct mtr static int hvm_mtrr_pat_init(void) { - unsigned int i, j, phys_addr; + unsigned int i, j; memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl)); for ( i = 0; i < MTRR_NUM_TYPES; i++ ) @@ -172,11 +172,7 @@ static int hvm_mtrr_pat_init(void) } } - phys_addr = 36; - if ( cpuid_eax(0x80000000) >= 0x80000008 ) - phys_addr = (uint8_t)cpuid_eax(0x80000008); - - size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); + size_or_mask = ~((1 << (paddr_bits - PAGE_SHIFT)) - 1); return 0; } @@ -455,7 +451,7 @@ bool_t mtrr_fix_range_msr_set(struct mtr bool_t mtrr_var_range_msr_set( struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content) { - uint32_t index, type, phys_addr, eax, ebx, ecx, edx; + uint32_t index, type, phys_addr, eax; uint64_t msr_mask; uint64_t *var_range_base = (uint64_t*)m->var_ranges; @@ -468,16 +464,21 @@ bool_t mtrr_var_range_msr_set( type == 4 || type == 5 || type == 6)) ) return 0; - phys_addr = 36; - domain_cpuid(d, 0x80000000, 0, &eax, &ebx, &ecx, &edx); - if ( eax >= 0x80000008 ) + if ( d == current->domain ) { - domain_cpuid(d, 0x80000008, 0, &eax, &ebx, &ecx, &edx); - phys_addr = (uint8_t)eax; + phys_addr = 36; + hvm_cpuid(0x80000000, &eax, NULL, NULL, NULL); + if ( eax >= 0x80000008 ) + { + hvm_cpuid(0x80000008, &eax, NULL, NULL, NULL); + phys_addr = (uint8_t)eax; + } } + else + phys_addr = paddr_bits; msr_mask = ~((((uint64_t)1) << phys_addr) - 1); msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL; - if ( unlikely(msr_content && (msr_content & msr_mask)) ) + if ( unlikely(msr_content & msr_mask) ) { HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n", msr_content);