Index: xen-3.2-testing/tools/libxc/xc_hvm_build.c =================================================================== --- xen-3.2-testing.orig/tools/libxc/xc_hvm_build.c +++ xen-3.2-testing/tools/libxc/xc_hvm_build.c @@ -20,6 +20,7 @@ #include #define SCRATCH_PFN 0xFFFFF +#define HVM_IDENT_PT_PAGE 0xE8000 static void build_e820map(void *e820_page, unsigned long long mem_size) { @@ -154,6 +155,7 @@ static int setup_guest(int xc_handle, struct xen_add_to_physmap xatp; struct shared_info *shared_info; void *e820_page; + uint32_t *ident_pt; struct elf_binary elf; uint64_t v_start, v_end; int rc; @@ -254,6 +256,18 @@ static int setup_guest(int xc_handle, xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); + /* Fill it with 32-bit, non-PAE superpage entries, each mapping 4MB + * of virtual address space onto the same physical address range */ + if ( (ident_pt = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, + HVM_IDENT_PT_PAGE >> PAGE_SHIFT)) == NULL ) + goto error_out; + for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ ) + ident_pt[i] = (i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER + | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE; + munmap(ident_pt , PAGE_SIZE); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, HVM_IDENT_PT_PAGE); + /* Insert JMP instruction at address 0x0 to reach entry point. */ entry_eip = elf_uval(&elf, elf.ehdr, e_entry); if ( entry_eip != 0 ) Index: xen-3.2-testing/xen/arch/x86/hvm/hvm.c =================================================================== --- xen-3.2-testing.orig/xen/arch/x86/hvm/hvm.c +++ xen-3.2-testing/xen/arch/x86/hvm/hvm.c @@ -1969,6 +1969,10 @@ long do_hvm_op(unsigned long op, XEN_GUE } d->arch.hvm_domain.params[a.index] = a.value; rc = 0; + + if ( paging_mode_hap(d) && (a.index == HVM_PARAM_IDENT_PT) ) + for_each_vcpu(d, v) + paging_update_cr3(v); } else { Index: xen-3.2-testing/xen/arch/x86/hvm/vmx/vmcs.c =================================================================== --- xen-3.2-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c +++ xen-3.2-testing/xen/arch/x86/hvm/vmx/vmcs.c @@ -105,11 +105,23 @@ static void vmx_init_vmcs_config(void) if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) { - min = 0; - opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_WBINVD_EXITING); + u32 min2 = 0, opt2; + + opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_WBINVD_EXITING | + SECONDARY_EXEC_ENABLE_EPT; _vmx_secondary_exec_control = adjust_vmx_controls( - min, opt, MSR_IA32_VMX_PROCBASED_CTLS2); + min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2); + + if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT ) + { + /* INVLPG and CR3 accesses don't need to cause VMExits */ + min &= ~(CPU_BASED_INVLPG_EXITING | + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + _vmx_cpu_based_exec_control = adjust_vmx_controls( + min, opt, MSR_IA32_VMX_PROCBASED_CTLS); + } } #if defined(__i386__) @@ -301,6 +313,8 @@ int vmx_cpu_up(void) return 0; } + ept_sync_all(); + return 1; } @@ -439,6 +453,7 @@ void vmx_disable_intercept_for_msr(struc static int construct_vmcs(struct vcpu *v) { + struct domain *d = v->domain; uint16_t sysenter_cs; unsigned long sysenter_eip; @@ -448,10 +463,23 @@ static int construct_vmcs(struct vcpu *v __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); - __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control); + v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; - if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS ) - __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control); + v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; + + if ( paging_mode_shadow(d) ) + { + v->arch.hvm_vmx.exec_control |= CPU_BASED_INVLPG_EXITING | + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING; + v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + } + + if ( cpu_has_vmx_secondary_exec_control ) + __vmwrite(SECONDARY_VM_EXEC_CONTROL, + v->arch.hvm_vmx.secondary_exec_control); + + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); /* MSR access bitmap. */ if ( cpu_has_vmx_msr_bitmap ) @@ -569,7 +597,10 @@ static int construct_vmcs(struct vcpu *v __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL); #endif - __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault)); + if ( paging_mode_hap(d) ) + __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK); + else + __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault)); v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; hvm_update_guest_cr(v, 0); @@ -584,6 +615,19 @@ static int construct_vmcs(struct vcpu *v __vmwrite(TPR_THRESHOLD, 0); } + if ( paging_mode_hap(d) ) + { + v->arch.hvm_vmx.ept_control.etmt = EPT_DEFAULT_MT; + v->arch.hvm_vmx.ept_control.gaw = EPT_DEFAULT_GAW; + v->arch.hvm_vmx.ept_control.asr = + pagetable_get_pfn(d->arch.phys_table); + + __vmwrite(EPT_POINTER, v->arch.hvm_vmx.ept_control.eptp); +#ifdef CONFIG_X86_PAE + __vmwrite(EPT_POINTER_HIGH, v->arch.hvm_vmx.ept_control.eptp >> 32); +#endif + } + vmx_vmcs_exit(v); paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ @@ -929,6 +973,8 @@ void vmcs_dump_vcpu(struct vcpu *v) (uint32_t)vmr(IDT_VECTORING_ERROR_CODE)); printk("TPR Threshold = 0x%02x\n", (uint32_t)vmr(TPR_THRESHOLD)); + printk("EPT pointer = 0x%08x%08x\n", + (uint32_t)vmr(EPT_POINTER_HIGH), (uint32_t)vmr(EPT_POINTER)); vmx_vmcs_exit(v); } Index: xen-3.2-testing/xen/arch/x86/hvm/vmx/vmx.c =================================================================== --- xen-3.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c +++ xen-3.2-testing/xen/arch/x86/hvm/vmx/vmx.c @@ -90,6 +90,8 @@ static int vmx_vcpu_initialise(struct vc return rc; } + ept_sync_domain(v); + vmx_install_vlapic_mapping(v); #ifndef VMXASSIST @@ -530,20 +532,23 @@ static int vmx_restore_cr0_cr3( unsigned long mfn = 0; p2m_type_t p2mt; - if ( cr0 & X86_CR0_PG ) + if ( paging_mode_shadow(v->domain) ) { - mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); - if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) + if ( cr0 & X86_CR0_PG ) { - gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3); - return -EINVAL; + mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) + { + gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3); + return -EINVAL; + } } - } - if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG ) - put_page(pagetable_get_page(v->arch.guest_table)); + if ( hvm_paging_enabled(v) ) + put_page(pagetable_get_page(v->arch.guest_table)); - v->arch.guest_table = pagetable_from_pfn(mfn); + v->arch.guest_table = pagetable_from_pfn(mfn); + } v->arch.hvm_vcpu.guest_cr[0] = cr0 | X86_CR0_ET; v->arch.hvm_vcpu.guest_cr[3] = cr3; @@ -1014,6 +1019,45 @@ static enum hvm_intblk vmx_interrupt_blo return hvm_intblk_none; } +/* the caller needs to check if the guest is switching to PAE mode */ +static void vmx_load_pdptrs(struct vcpu *v) +{ + unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3], mfn; + p2m_type_t p2mt; + char *p; + u64 *guest_pdptrs; + + if ( cr3 & 0x1fUL ) + { + domain_crash(v->domain); + return; + } + + mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); + p = map_domain_page(mfn); + + guest_pdptrs = (u64 *)(p + (cr3 & ~PAGE_MASK)); + + /* TODO: check if guest PDPTRS are valid */ + + vmx_vmcs_enter(v); + + __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]); + __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]); + __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]); + __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]); +#ifdef CONFIG_X86_PAE + __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32); + __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32); + __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32); + __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32); +#endif + + vmx_vmcs_exit(v); + + unmap_domain_page(p); +} + static void vmx_update_host_cr3(struct vcpu *v) { ASSERT((v == current) || !vcpu_runnable(v)); @@ -1039,21 +1083,57 @@ static void vmx_update_guest_cr(struct v __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device); } + if ( paging_mode_hap(v->domain) ) + { + if ( hvm_paging_enabled(v) ) + v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + else + v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + } + v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] | - X86_CR0_NE | X86_CR0_PG | X86_CR0_WP | X86_CR0_PE; + X86_CR0_NE | X86_CR0_PG | X86_CR0_PE; + + if ( paging_mode_shadow(v->domain) ) + v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_WP; + __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + vmx_update_guest_cr(v, 4); + break; case 2: /* CR2 is updated in exit stub. */ break; case 3: + if ( paging_mode_hap(v->domain) ) + { + if ( !hvm_paging_enabled(v) ) + v->arch.hvm_vcpu.hw_cr[3] = + v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT]; + + /* EPT needs to load PDPTRS into VMCS for PAE. */ + if ( hvm_pae_enabled(v) && + !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) ) + vmx_load_pdptrs(v); + } __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]); break; case 4: - v->arch.hvm_vcpu.hw_cr[4] = - v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK; + v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; + if ( paging_mode_hap(v->domain) ) + v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; + v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; + if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) ) + { + v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE; + if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE ) + v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; + } __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); break; @@ -1121,6 +1201,7 @@ static int vmx_event_pending(struct vcpu static struct hvm_function_table vmx_function_table = { .name = "VMX", + .p2m_init = ept_p2m_init, .domain_initialise = vmx_domain_initialise, .domain_destroy = vmx_domain_destroy, .vcpu_initialise = vmx_vcpu_initialise, @@ -1178,6 +1259,12 @@ void start_vmx(void) return; } + if ( cpu_has_vmx_ept ) + { + printk("VMX: EPT is available.\n"); + vmx_function_table.hap_supported = 1; + } + setup_vmcs_dump(); hvm_enable(&vmx_function_table); @@ -2690,6 +2777,18 @@ void vmx_wbinvd_intercept(void) wbinvd(); } +static void ept_handle_violation(unsigned long qualification, paddr_t gpa) +{ + if ( unlikely(((qualification >> 7) & 0x3) != 0x3) ) + { + domain_crash(current->domain); + return; + } + + /* must be MMIO */ + handle_mmio(gpa); +} + static void vmx_failed_vmentry(unsigned int exit_reason, struct cpu_user_regs *regs) { @@ -2729,6 +2828,15 @@ asmlinkage void vmx_vmexit_handler(struc unsigned long exit_qualification, inst_len = 0; struct vcpu *v = current; + if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) ) + { + __asm__ __volatile__ ("mov"__OS" %%cr2, %0" + : "=r"(v->arch.hvm_vcpu.guest_cr[2])); + + /* __hvm_copy() need this when paging is enabled. */ + v->arch.hvm_vcpu.guest_cr[3] = __vmread(GUEST_CR3); + } + exit_reason = __vmread(VM_EXIT_REASON); hvmtrace_vmexit(v, regs->eip, exit_reason); @@ -2969,6 +3077,21 @@ asmlinkage void vmx_vmexit_handler(struc break; } + case EXIT_REASON_EPT_VIOLATION: + { + paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS); +#ifdef CONFIG_X86_PAE + gpa |= (paddr_t)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32; +#endif + exit_qualification = __vmread(EXIT_QUALIFICATION); + ept_handle_violation(exit_qualification, gpa); + break; + } + + case EXIT_REASON_EPT_MISCONFIG: + domain_crash(current->domain); + break; + default: exit_and_crash: gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason); Index: xen-3.2-testing/xen/arch/x86/mm/Makefile =================================================================== --- xen-3.2-testing.orig/xen/arch/x86/mm/Makefile +++ xen-3.2-testing/xen/arch/x86/mm/Makefile @@ -3,3 +3,4 @@ subdir-y += hap obj-y += paging.o obj-y += p2m.o +obj-y += p2m-ept.o Index: xen-3.2-testing/xen/arch/x86/mm/p2m-ept.c =================================================================== --- /dev/null +++ xen-3.2-testing/xen/arch/x86/mm/p2m-ept.c @@ -0,0 +1,177 @@ +/* + * ept-p2m.c: use the EPT page table as p2m + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int ept_next_level(struct domain *d, bool_t read_only, + ept_entry_t **table, unsigned long *gfn_remainder, + u32 shift) +{ + ept_entry_t *ept_entry, *next; + u32 index; + + index = *gfn_remainder >> shift; + *gfn_remainder &= (1UL << shift) - 1; + + ept_entry = (*table) + index; + + if ( !(ept_entry->epte & 0x7) ) + { + struct page_info *pg; + + if ( read_only ) + return 0; + + pg = d->arch.p2m.alloc_page(d); + if ( pg == NULL ) + return 0; + pg->count_info = 1; + pg->u.inuse.type_info = 1 | PGT_validated; + list_add_tail(&pg->list, &d->arch.p2m.pages); + + ept_entry->emt = 0; + ept_entry->sp_avail = 0; + ept_entry->avail1 = 0; + ept_entry->mfn = page_to_mfn(pg); + ept_entry->rsvd = 0; + ept_entry->avail2 = 0; + /* last step */ + ept_entry->r = ept_entry->w = ept_entry->x = 1; + } + + next = map_domain_page(ept_entry->mfn); + unmap_domain_page(*table); + *table = next; + + return 1; +} + +static int +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) +{ + ept_entry_t *table = + map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); + unsigned long gfn_remainder = gfn; + ept_entry_t *ept_entry; + u32 index; + int i, rv = 0; + + /* should check if gfn obeys GAW here */ + + for ( i = EPT_DEFAULT_GAW; i > 0; i-- ) + if ( !ept_next_level(d, 0, &table, &gfn_remainder, i * EPT_TABLE_ORDER) ) + goto out; + + index = gfn_remainder; + ept_entry = table + index; + + if ( mfn_valid(mfn_x(mfn)) ) + { + /* Track the highest gfn for which we have ever had a valid mapping */ + if ( gfn > d->arch.p2m.max_mapped_pfn ) + d->arch.p2m.max_mapped_pfn = gfn; + + ept_entry->emt = EPT_DEFAULT_MT; + ept_entry->sp_avail = 0; + ept_entry->avail1 = p2mt; + ept_entry->mfn = mfn_x(mfn); + ept_entry->rsvd = 0; + ept_entry->avail2 = 0; + /* last step */ + ept_entry->r = ept_entry->w = ept_entry->x = 1; + } + else + ept_entry->epte = 0; + + /* Success */ + rv = 1; + + out: + unmap_domain_page(table); + if ( d->vcpu[0] ) + ept_sync_domain(d->vcpu[0]); + return rv; +} + +/* Read ept p2m entries */ +static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t) +{ + ept_entry_t *table = + map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); + unsigned long gfn_remainder = gfn; + ept_entry_t *ept_entry; + u32 index; + int i; + mfn_t mfn = _mfn(INVALID_MFN); + + *t = p2m_mmio_dm; + + /* This pfn is higher than the highest the p2m map currently holds */ + if ( gfn > d->arch.p2m.max_mapped_pfn ) + goto out; + + /* should check if gfn obeys GAW here */ + + for ( i = EPT_DEFAULT_GAW; i > 0; i-- ) + if ( !ept_next_level(d, 1, &table, &gfn_remainder, i * EPT_TABLE_ORDER) ) + goto out; + + index = gfn_remainder; + ept_entry = table + index; + + if ( (ept_entry->epte & 0x7) == 0x7 ) + { + if ( ept_entry->avail1 != p2m_invalid ) + { + *t = ept_entry->avail1; + mfn = _mfn(ept_entry->mfn); + } + } + + out: + unmap_domain_page(table); + return mfn; +} + +static mfn_t ept_get_entry_fast(unsigned long gfn, p2m_type_t *t) +{ + return ept_get_entry(current->domain, gfn, t); +} + +void ept_p2m_init(struct domain *d) +{ + d->arch.p2m.set_entry = ept_set_entry; + d->arch.p2m.get_entry = ept_get_entry; + d->arch.p2m.get_entry_fast = ept_get_entry_fast; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Index: xen-3.2-testing/xen/arch/x86/mm/p2m.c =================================================================== --- xen-3.2-testing.orig/xen/arch/x86/mm/p2m.c +++ xen-3.2-testing/xen/arch/x86/mm/p2m.c @@ -28,6 +28,7 @@ #include #include #include +#include /* Debugging and auditing of the P2M code? */ #define P2M_AUDIT 0 @@ -202,7 +203,7 @@ p2m_next_level(struct domain *d, mfn_t * // Returns 0 on error (out of memory) static int -set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) +p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); @@ -266,14 +267,28 @@ set_p2m_entry(struct domain *d, unsigned return rv; } +static mfn_t +p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t); /* Init the datastructures for later use by the p2m code */ void p2m_init(struct domain *d) { p2m_lock_init(d); INIT_LIST_HEAD(&d->arch.p2m.pages); + + d->arch.p2m.set_entry = p2m_set_entry; + d->arch.p2m.get_entry = p2m_gfn_to_mfn; + d->arch.p2m.get_entry_fast = p2m_gfn_to_mfn_fast; + + if ( is_hvm_domain(d) ) + hvm_p2m_init(d); } +static inline +int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) +{ + return d->arch.p2m.set_entry(d, gfn, mfn, p2mt); +} // Allocate a new p2m table for a domain. // @@ -392,8 +407,8 @@ void p2m_teardown(struct domain *d) p2m_unlock(d); } -mfn_t -gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t) +static mfn_t +p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t) /* Read another domain's p2m entries */ { mfn_t mfn; Index: xen-3.2-testing/xen/include/asm-x86/domain.h =================================================================== --- xen-3.2-testing.orig/xen/include/asm-x86/domain.h +++ xen-3.2-testing/xen/include/asm-x86/domain.h @@ -131,6 +131,27 @@ struct hap_domain { /************************************************/ /* p2m handling */ /************************************************/ +/* + * The upper levels of the p2m pagetable always contain full rights; all + * variation in the access control bits is made in the level-1 PTEs. + * + * In addition to the phys-to-machine translation, each p2m PTE contains + * *type* information about the gfn it translates, helping Xen to decide + * on the correct course of action when handling a page-fault to that + * guest frame. We store the type in the "available" bits of the PTEs + * in the table, which gives us 8 possible types on 32-bit systems. + * Further expansions of the type system will only be supported on + * 64-bit Xen. + */ +typedef enum { + p2m_invalid = 0, /* Nothing mapped here */ + p2m_ram_rw = 1, /* Normal read/write guest RAM */ + p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */ + p2m_ram_ro = 3, /* Read-only; writes go to the device model */ + p2m_mmio_dm = 4, /* Reads and write go to the device model */ + p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */ +} p2m_type_t; + struct p2m_domain { /* Lock that protects updates to the p2m */ spinlock_t lock; @@ -144,6 +165,11 @@ struct p2m_domain { struct page_info * (*alloc_page )(struct domain *d); void (*free_page )(struct domain *d, struct page_info *pg); + int (*set_entry )(struct domain *d, unsigned long gfn, + mfn_t mfn, p2m_type_t p2mt); + mfn_t (*get_entry )(struct domain *d, unsigned long gfn, + p2m_type_t *p2mt); + mfn_t (*get_entry_fast)(unsigned long gfn, p2m_type_t *p2mt); /* Highest guest frame that's ever been mapped in the p2m */ unsigned long max_mapped_pfn; Index: xen-3.2-testing/xen/include/asm-x86/hvm/hvm.h =================================================================== --- xen-3.2-testing.orig/xen/include/asm-x86/hvm/hvm.h +++ xen-3.2-testing/xen/include/asm-x86/hvm/hvm.h @@ -60,6 +60,9 @@ struct hvm_function_table { /* Support Hardware-Assisted Paging? */ int hap_supported; + /* Initialise p2m resources */ + void (*p2m_init)(struct domain *d); + /* * Initialise/destroy HVM domain/vcpu resources */ @@ -127,6 +130,12 @@ struct hvm_function_table { extern struct hvm_function_table hvm_funcs; extern int hvm_enabled; +static inline void hvm_p2m_init(struct domain *d) +{ + if ( hvm_funcs.p2m_init ) + return hvm_funcs.p2m_init(d); +} + int hvm_domain_initialise(struct domain *d); void hvm_domain_relinquish_resources(struct domain *d); void hvm_domain_destroy(struct domain *d); Index: xen-3.2-testing/xen/include/asm-x86/hvm/vmx/vmcs.h =================================================================== --- xen-3.2-testing.orig/xen/include/asm-x86/hvm/vmx/vmcs.h +++ xen-3.2-testing/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -57,6 +57,9 @@ struct vmx_msr_state { unsigned long msrs[VMX_MSR_COUNT]; }; +#define EPT_DEFAULT_MT 6 +#define EPT_DEFAULT_GAW 3 + struct arch_vmx_struct { /* Virtual address of VMCS. */ struct vmcs_struct *vmcs; @@ -73,8 +76,19 @@ struct arch_vmx_struct { int active_cpu; int launched; + union { + struct { + u64 etmt :3, + gaw :3, + rsvd :6, + asr :52; + }; + u64 eptp; + } ept_control; + /* Cache of cpu execution control. */ u32 exec_control; + u32 secondary_exec_control; #ifdef __x86_64__ struct vmx_msr_state msr_state; @@ -111,6 +125,8 @@ void vmx_vmcs_exit(struct vcpu *v); #define CPU_BASED_MWAIT_EXITING 0x00000400 #define CPU_BASED_RDPMC_EXITING 0x00000800 #define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_TPR_SHADOW 0x00200000 @@ -139,6 +155,7 @@ extern u32 vmx_vmexit_control; extern u32 vmx_vmentry_control; #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 extern u32 vmx_secondary_exec_control; @@ -154,6 +171,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS) #define cpu_has_vmx_msr_bitmap \ (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) +#define cpu_has_vmx_secondary_exec_control \ + (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) +#define cpu_has_vmx_ept \ + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define VMX_INTR_SHADOW_STI 0x00000001 @@ -195,11 +216,23 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR = 0x00002012, VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, - APIC_ACCESS_ADDR_HIGH = 0x00002015, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, VMCS_LINK_POINTER_HIGH = 0x00002801, GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, Index: xen-3.2-testing/xen/include/asm-x86/hvm/vmx/vmx.h =================================================================== --- xen-3.2-testing.orig/xen/include/asm-x86/hvm/vmx/vmx.h +++ xen-3.2-testing/xen/include/asm-x86/hvm/vmx/vmx.h @@ -23,9 +23,27 @@ #include #include #include -#include #include +#include #include +#include + +typedef union { + struct { + u64 r : 1, + w : 1, + x : 1, + emt : 4, + sp_avail : 1, + avail1 : 4, + mfn : 45, + rsvd : 5, + avail2 : 2; + }; + u64 epte; +} ept_entry_t; + +#define EPT_TABLE_ORDER 9 void vmx_asm_vmexit_handler(struct cpu_user_regs); void vmx_asm_do_vmentry(void); @@ -85,6 +103,8 @@ int vmx_realmode_io_complete(void); #define EXIT_REASON_MACHINE_CHECK 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 /* @@ -151,12 +171,14 @@ int vmx_realmode_io_complete(void); #define VMREAD_OPCODE ".byte 0x0f,0x78\n" #define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n" #define VMWRITE_OPCODE ".byte 0x0f,0x79\n" +#define INVEPT_OPCODE ".byte 0x66,0x0f,0x38,0x80\n" /* m128,r64/32 */ #define VMXOFF_OPCODE ".byte 0x0f,0x01,0xc4\n" #define VMXON_OPCODE ".byte 0xf3,0x0f,0xc7\n" +#define MODRM_EAX_08 ".byte 0x08\n" /* ECX, [EAX] */ #define MODRM_EAX_06 ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */ #define MODRM_EAX_07 ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */ -#define MODRM_EAX_ECX ".byte 0xc1\n" /* [EAX], [ECX] */ +#define MODRM_EAX_ECX ".byte 0xc1\n" /* EAX, ECX */ static inline void __vmptrld(u64 addr) { @@ -240,6 +262,21 @@ static inline void __vm_clear_bit(unsign __vmwrite(field, __vmread(field) & ~(1UL << bit)); } +static inline void __invept(int ext, u64 eptp, u64 gpa) +{ + struct { + u64 eptp, gpa; + } operand = {eptp, gpa}; + + __asm__ __volatile__ ( INVEPT_OPCODE + MODRM_EAX_08 + /* CF==1 or ZF==1 --> rc = -1 */ + "ja 1f ; ud2 ; 1:\n" + : + : "a" (&operand), "c" (ext) + : "memory"); +} + static inline void __vmxoff(void) { asm volatile ( @@ -269,6 +306,29 @@ static inline int __vmxon(u64 addr) return rc; } +static inline void __ept_sync_domain(void *info) +{ + struct vcpu *v = info; + if ( !hvm_funcs.hap_supported ) + return; + + __invept(1, v->arch.hvm_vmx.ept_control.eptp, 0); +} + +static inline void ept_sync_domain(struct vcpu *v) +{ + __ept_sync_domain(v); + smp_call_function(__ept_sync_domain, v, 1, 0); +} + +static inline void ept_sync_all(void) +{ + if ( !hvm_funcs.hap_supported ) + return; + + __invept(2, 0, 0); +} + static inline void __vmx_inject_exception( struct vcpu *v, int trap, int type, int error_code) { @@ -314,4 +374,6 @@ static inline void vmx_inject_nmi(struct HVM_DELIVER_NO_ERROR_CODE); } +void ept_p2m_init(struct domain *d); + #endif /* __ASM_X86_HVM_VMX_VMX_H__ */ Index: xen-3.2-testing/xen/include/asm-x86/p2m.h =================================================================== --- xen-3.2-testing.orig/xen/include/asm-x86/p2m.h +++ xen-3.2-testing/xen/include/asm-x86/p2m.h @@ -43,27 +43,6 @@ */ #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) -/* - * The upper levels of the p2m pagetable always contain full rights; all - * variation in the access control bits is made in the level-1 PTEs. - * - * In addition to the phys-to-machine translation, each p2m PTE contains - * *type* information about the gfn it translates, helping Xen to decide - * on the correct course of action when handling a page-fault to that - * guest frame. We store the type in the "available" bits of the PTEs - * in the table, which gives us 8 possible types on 32-bit systems. - * Further expansions of the type system will only be supported on - * 64-bit Xen. - */ -typedef enum { - p2m_invalid = 0, /* Nothing mapped here */ - p2m_ram_rw = 1, /* Normal read/write guest RAM */ - p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */ - p2m_ram_ro = 3, /* Read-only; writes go to the device model */ - p2m_mmio_dm = 4, /* Reads and write go to the device model */ - p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */ -} p2m_type_t; - /* We use bitmaps and maks to handle groups of types */ #define p2m_to_mask(_t) (1UL << (_t)) @@ -92,10 +71,16 @@ static inline p2m_type_t p2m_flags_to_ty /* Type is stored in the "available" bits, 9, 10 and 11 */ return (flags >> 9) & 0x7; } - + /* Read the current domain's p2m table (through the linear mapping). */ static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t) { + return current->domain->arch.p2m.get_entry_fast(gfn, t); +} + +/* Read the current domain's p2m table (through the linear mapping). */ +static inline mfn_t p2m_gfn_to_mfn_fast(unsigned long gfn, p2m_type_t *t) +{ mfn_t mfn = _mfn(INVALID_MFN); p2m_type_t p2mt = p2m_mmio_dm; /* XXX This is for compatibility with the old model, where anything not @@ -133,7 +118,11 @@ static inline mfn_t gfn_to_mfn_current(u } /* Read another domain's P2M table, mapping pages as we go */ -mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t); +static inline +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t) +{ + return d->arch.p2m.get_entry(d, gfn, t); +} /* General conversion function from gfn to mfn */ #define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t)) @@ -149,7 +138,7 @@ static inline mfn_t _gfn_to_mfn(struct d } if ( likely(current->domain == d) ) return gfn_to_mfn_current(gfn, t); - else + else return gfn_to_mfn_foreign(d, gfn, t); } Index: xen-3.2-testing/xen/include/public/hvm/params.h =================================================================== --- xen-3.2-testing.orig/xen/include/public/hvm/params.h +++ xen-3.2-testing/xen/include/public/hvm/params.h @@ -76,6 +76,7 @@ * Guest time always tracks wallclock (i.e., real) time. */ #define HVM_PARAM_TIMER_MODE 10 +#define HVM_PARAM_IDENT_PT 12 #define HVMPTM_delay_for_missed_ticks 0 #define HVMPTM_no_delay_for_missed_ticks 1 #define HVMPTM_no_missed_ticks_pending 2 @@ -84,6 +85,6 @@ /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ #define HVM_PARAM_HPET_ENABLED 11 -#define HVM_NR_PARAMS 12 +#define HVM_NR_PARAMS 13 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */