xen/npt_part1.patch

1097 lines
37 KiB
Diff

Index: xen-3.0.4-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-3.0.4-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-3.0.4-testing/xen/arch/x86/hvm/svm/svm.c
@@ -30,6 +30,7 @@
#include <asm/current.h>
#include <asm/io.h>
#include <asm/shadow.h>
+#include <asm/hap.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -53,7 +54,7 @@
__asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
/* External functions. We should move these to some suitable header file(s) */
-
+extern int opt_hap_enabled;
extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
extern asmlinkage void do_IRQ(struct cpu_user_regs *);
@@ -800,6 +801,8 @@ static int svm_vcpu_initialise(struct vc
v->arch.hvm_svm.saved_irq_vector = -1;
+ hap_deactivate(v);
+
if ( (rc = svm_create_vmcb(v)) != 0 )
{
dprintk(XENLOG_WARNING,
@@ -814,6 +817,7 @@ static int svm_vcpu_initialise(struct vc
static void svm_vcpu_destroy(struct vcpu *v)
{
svm_destroy_vmcb(v);
+ hap_deactivate(v);
}
static void svm_hvm_inject_exception(
@@ -856,6 +860,7 @@ int start_svm(void)
asidpool_init( cpu );
printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
+ npt_detect();
/* Initialize the HSA for this core */
phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
phys_hsa_lo = (u32) phys_hsa;
@@ -1705,6 +1710,120 @@ static inline int svm_pgbit_test(struct
return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
}
+/************************************************/
+/* nested paging functions */
+/************************************************/
+void npt_activate(struct vcpu *v)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct domain *d = v->domain;
+
+ vmcb->h_cr3 = pagetable_get_paddr(d->arch.phys_table);
+ /* use guest's CR register values */
+ vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+ vmcb->cr3 = v->arch.hvm_svm.cpu_cr3;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->np_enable = 1; /* enable nested paging */
+ vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
+ vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG;
+ npt_update_guest_paging_mode(v);
+ hap_activate(v);
+}
+
+int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
+{
+ unsigned long value;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ ASSERT(vmcb);
+
+ value = get_reg(gpreg, regs, vmcb);
+
+ switch (cr) {
+ case 0:
+ vmcb->cr0 = value;
+ v->arch.hvm_svm.cpu_shadow_cr0 = value;
+ npt_update_guest_paging_mode(v);
+ break;
+ case 3:
+ vmcb->cr3 = value;
+ v->arch.hvm_svm.cpu_cr3 = value;
+ set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+ break;
+ case 4: /* CR4 */
+ vmcb->cr4 = value;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ npt_update_guest_paging_mode(v);
+ set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+ break;
+ case 8:
+ vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+ break;
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+ domain_crash(v->domain);
+ return 0;
+ }
+
+ return 1;
+}
+
+void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+ unsigned long value = 0;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ vmcb = v->arch.hvm_svm.vmcb;
+ ASSERT(vmcb);
+
+ switch(cr)
+ {
+ case 0:
+ value = vmcb->cr0;
+ break;
+ case 2:
+ value = vmcb->cr2;
+ break;
+ case 3:
+ value = vmcb->cr3;
+ break;
+ case 4:
+ value = vmcb->cr4;
+ break;
+ case 8:
+ value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+ value = (value & 0xF0) >> 4;
+ break;
+ default:
+ domain_crash(v->domain);
+ return;
+ }
+
+ set_reg(gp, value, regs, vmcb);
+}
+
+int npt_do_nested_page_fault(unsigned long va, struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ unsigned long gpa = va;
+ int result = 1;
+
+ if (mmio_space(gpa)) {
+ handle_mmio(gpa);
+ return 1;
+ }
+
+ /* P2M table should have been constructed at the beginning */
+ domain_crash(v->domain);
+ return result;
+}
+/*************************************************/
+/* end of nested paging functions */
+/*************************************************/
/*
* Write to control registers
@@ -1905,12 +2024,21 @@ static int svm_cr_access(struct vcpu *v,
{
case INSTR_MOV2CR:
gpreg = decode_src_reg(prefix, buffer[index+2]);
- result = mov_to_cr(gpreg, cr, regs);
+ if ( hap_is_activated(v) )
+ result = npt_mov_to_cr(gpreg, cr, regs);
+ else {
+ result = mov_to_cr(gpreg, cr, regs);
+ if ( opt_hap_enabled && svm_paging_enabled(v) )
+ npt_activate(v);
+ }
break;
case INSTR_MOVCR2:
gpreg = decode_src_reg(prefix, buffer[index+2]);
- mov_from_cr(cr, gpreg, regs);
+ if ( hap_is_activated(v) )
+ npt_mov_from_cr(cr, gpreg, regs);
+ else
+ mov_from_cr(cr, gpreg, regs);
break;
case INSTR_CLTS:
@@ -2897,6 +3025,19 @@ asmlinkage void svm_vmexit_handler(struc
svm_do_msr_access(v, regs);
break;
+ case VMEXIT_NPF:
+ {
+ unsigned long gpa;
+
+ gpa = vmcb->exitinfo2;
+ regs->error_code = vmcb->exitinfo1;
+
+ if (!(error = npt_do_nested_page_fault(gpa, regs)))
+ domain_crash(v->domain);
+
+ break;
+ }
+
case VMEXIT_SHUTDOWN:
gdprintk(XENLOG_ERR, "Guest shutdown exit\n");
goto exit_and_crash;
Index: xen-3.0.4-testing/xen/arch/x86/mm/Makefile
===================================================================
--- xen-3.0.4-testing.orig/xen/arch/x86/mm/Makefile
+++ xen-3.0.4-testing/xen/arch/x86/mm/Makefile
@@ -1 +1,2 @@
subdir-y += shadow
+subdir-y += hap
Index: xen-3.0.4-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-3.0.4-testing.orig/xen/include/asm-x86/domain.h
+++ xen-3.0.4-testing/xen/include/asm-x86/domain.h
@@ -206,6 +206,8 @@ struct arch_vcpu
unsigned long shadow_ldt_mapcnt;
struct shadow_vcpu shadow;
+
+ unsigned int hap_activated:1; /* hardware assisted paging */
} __cacheline_aligned;
/* shorthands to improve code legibility */
Index: xen-3.0.4-testing/xen/arch/x86/mm/hap/Makefile
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/arch/x86/mm/hap/Makefile
@@ -0,0 +1,3 @@
+subdir-y += npt
+
+obj-y += hap.o
Index: xen-3.0.4-testing/xen/arch/x86/mm/hap/hap.c
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/arch/x86/mm/hap/hap.c
@@ -0,0 +1,36 @@
+/******************************************************************************
+ * arch/x86/mm/hap/hap.c
+ *
+ * hardware assisted paging support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/init.h>
+
+/* initialize hardware assisted paging. It checks whether hap option is enabled
+ * in Xen boot option.
+ */
+int opt_hap_enabled = 0;
+boolean_param("hap", opt_hap_enabled);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/Makefile
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/Makefile
@@ -0,0 +1 @@
+obj-y += npt.o
Index: xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/npt.c
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/npt.c
@@ -0,0 +1,464 @@
+/*
+ * npt.c: AMD SVM nested paging implementation for Xen
+ *
+ * Copyright (c) 2006, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <asm/hap.h>
+
+#include "private.h"
+#include "page-guest32.h"
+
+extern int opt_hap_enabled;
+/*******************************************/
+/* Platform Specific Functions */
+/*******************************************/
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for real mode guest.
+ */
+
+static paddr_t npt_gva_to_gpa_real_mode(struct vcpu *v, unsigned long gva)
+{
+ HERE_I_AM;
+ return (paddr_t)gva;
+}
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for 2-level guest.
+ */
+static paddr_t npt_gva_to_gpa_protected_mode(struct vcpu *v, unsigned long gva)
+{
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 2; /* two-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int result = 1;
+ l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */
+ l1_pgentry_32_t *l1e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ NPT_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ result = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ NPT_PRINTK("l2 page table entry is %ulx at index = %d\n",
+ l2e[index].l2, index);
+ if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 2 entry not present at index = %d\n", index);
+ result = 0;
+ }
+
+ if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ NPT_PRINTK("guest page table is PSE\n");
+ if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */
+ printk("guest physical memory size is too large!\n");
+ domain_crash(v->domain);
+ }
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) +
+ (gva & ~PHYSICAL_PAGE_4M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, return from here */
+ }
+ else {
+ gpfn = l2e_get_pfn( l2e[index] );
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ NPT_PRINTK("l1 page table entry is %ulx at index = %d\n",
+ l1e[index].l1, index);
+ if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 1 entry not present at index = %d\n", index);
+ result = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( result != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ NPT_PRINTK("result = %d, gva = %lx, gpa = %lx\n", result, gva, gpa);
+ return (paddr_t)gpa;
+}
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for 3-level guest.
+ */
+#if CONFIG_PAGING_LEVELS >= 3
+static paddr_t npt_gva_to_gpa_pae_mode(struct vcpu *v, unsigned long gva)
+{
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 3; /* three-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int result = 1;
+ l1_pgentry_t *l1e;
+ l2_pgentry_t *l2e;
+ l3_pgentry_t *l3e;
+
+ HERE_I_AM;
+
+ NPT_PRINTK("npt_gva_to_gpa:mode = %d, gcr3 = 0x%lx, gva = 0x%lx, "
+ "entry size = %ld\n", mode, gcr3, gva, sizeof(*l3e));
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ NPT_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ result = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 3 ) {
+ l3e = map_domain_page( mfn );
+ index += ( ((gcr3 >> 5 ) & 127 ) * 4 );
+ if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 3 entry not present at index = %d\n", index);
+ result = 0;
+ }
+ gpfn = l3e_get_pfn( l3e[index] );
+ unmap_domain_page(l3e);
+ }
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 2 entry not present at index = %d\n", index);
+ result = 0;
+ }
+
+ if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ NPT_PRINTK("guest page table is PSE\n");
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) +
+ (gva & ~PHYSICAL_PAGE_2M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, jump out from here */
+ }
+ else {
+ gpfn = l2e_get_pfn(l2e[index]);
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 1 entry not present at index = %d\n", index);
+ result = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( result != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+ NPT_PRINTK("result = %d, gva = %lx, gpa = %lx\n", result, gva, gpa);
+ return (paddr_t)gpa;
+}
+#else
+static paddr_t npt_gva_to_gpa_pae_mode(struct vcpu *v, unsigned long gva)
+{
+ HERE_I_AM;
+ printk("guest paging level (3) is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return 0UL;
+}
+#endif
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for 4-level guest.
+ */
+#if CONFIG_PAGING_LEVELS == 4
+static paddr_t npt_gva_to_gpa_long_mode(struct vcpu *v, unsigned long gva)
+{
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 4; /* four-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int result = 1;
+ l4_pgentry_t *l4e;
+ l3_pgentry_t *l3e;
+ l2_pgentry_t *l2e;
+ l1_pgentry_t *l1e;
+
+ HERE_I_AM;
+
+ NPT_PRINTK("npt_gva_to_gpa:mode = %d, gcr3 = 0x%lx, gva = 0x%lx, "
+ "entry size is = %ld\n", mode, gcr3, gva, sizeof(*l4e));
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ NPT_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ result = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 4 ) {
+ l4e = map_domain_page( mfn );
+ if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 4 entry not present at index = %d\n", index);
+ result = 0;
+ }
+ gpfn = l4e_get_pfn( l4e[index] );
+ unmap_domain_page(l4e);
+ }
+
+ if ( lev == 3 ) {
+ l3e = map_domain_page( mfn );
+ if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 3 entry not present at index = %d\n", index);
+ result = 0;
+ }
+ gpfn = l3e_get_pfn( l3e[index] );
+ unmap_domain_page(l3e);
+ }
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 2 entry not present at index = %d\n", index);
+ result = 0;
+ }
+
+ if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ NPT_PRINTK("guest page table is PSE\n");
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) +
+ (gva & ~PHYSICAL_PAGE_2M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, jump out from here */
+ }
+ else {
+ gpfn = l2e_get_pfn(l2e[index]);
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+ NPT_PRINTK("Level 1 entry not present at index = %d\n", index);
+ result = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( result != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+ NPT_PRINTK("result = %d, gva = %lx, gpa = %lx\n", result, gva, gpa);
+ return (paddr_t)gpa;
+}
+#else
+static paddr_t npt_gva_to_gpa_long_mode(struct vcpu *v, unsigned long gva)
+{
+ HERE_I_AM;
+ printk("guest paging level (4) is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return 0UL;
+}
+#endif
+
+static unsigned long
+npt_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva)
+{
+ return (npt_gva_to_gpa_real_mode(v, gva) >> PAGE_SHIFT);
+}
+
+static unsigned long
+npt_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva)
+{
+ return (npt_gva_to_gpa_protected_mode(v, gva) >> PAGE_SHIFT);
+}
+
+static unsigned long
+npt_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva)
+{
+ return (npt_gva_to_gpa_pae_mode(v, gva) >> PAGE_SHIFT);
+}
+
+static unsigned long
+npt_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva)
+{
+ return (npt_gva_to_gpa_long_mode(v, gva) >> PAGE_SHIFT);
+}
+
+/********************************************/
+/* AMD NPT Platform Specific Functions */
+/********************************************/
+void npt_detect(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ /* check CPUID for nested paging support */
+ cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
+ if ( !(edx & 0x01) && opt_hap_enabled ) {
+ printk(" nested paging is not supported by this CPU.\n");
+ opt_hap_enabled = 0; /* no nested paging, we disable op_hap_enabled */
+ }
+}
+
+/*******************************************/
+/* Public Interface to Xen */
+/*******************************************/
+
+void npt_set_guest_paging_levels(struct vcpu *v, int levels)
+{
+ HERE_I_AM;
+ switch(levels) {
+ case 1:
+ NPT_PRINTK("Install real mode guest with ID = %d\n", v->vcpu_id);
+ v->arch.shadow.mode->gva_to_gpa = &npt_gva_to_gpa_real_mode;
+ v->arch.shadow.mode->gva_to_gfn = &npt_gva_to_gfn_real_mode;
+ break;
+ case 2:
+ NPT_PRINTK("Install 32-bit non-PAE guest with ID = %d\n", v->vcpu_id);
+ v->arch.shadow.mode->gva_to_gpa = &npt_gva_to_gpa_protected_mode;
+ v->arch.shadow.mode->gva_to_gfn = &npt_gva_to_gfn_protected_mode;
+ break;
+ case 3:
+ NPT_PRINTK("Install 32-bit PAE guest with ID = %d\n", v->vcpu_id);
+ v->arch.shadow.mode->gva_to_gpa = &npt_gva_to_gpa_pae_mode;
+ v->arch.shadow.mode->gva_to_gfn = &npt_gva_to_gfn_pae_mode;
+ break;
+ case 4:
+ NPT_PRINTK("Install 64-bit guest with ID = %d\n", v->vcpu_id);
+ v->arch.shadow.mode->gva_to_gpa = &npt_gva_to_gpa_long_mode;
+ v->arch.shadow.mode->gva_to_gfn = &npt_gva_to_gfn_long_mode;
+ break;
+ default:
+ printk("Un-supported guest paging level: %d\n", levels);
+ domain_crash(v->domain);
+ break;
+ }
+}
+
+/* dom_init_npt: initialize the resources of nested paging support
+ * return 1 when successful, return 0 when fail
+ */
+
+int dom_init_npt(struct domain *d)
+{
+ u32 eax, ebx, ecx, edx;
+
+ HERE_I_AM;
+
+ /* check CPUID for nested paging support first */
+ cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
+ if ( !(edx & 0x01) ) {
+ printk("AMD nested paging is not supported ...");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* reclaim the resource */
+void dom_destroy_npt(struct domain *d)
+{
+ HERE_I_AM;
+ NPT_PRINTK("dom_destroy_npt() done!\n");
+}
+
+/* This method update the guest paging mode based on guest's accesses (both
+ * read and write) to CR registers, as well as to EFER registers. It also
+ * installs corresponding guest handler based on guest' paging levels.
+ */
+void npt_update_guest_paging_mode(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ u64 cr0_value = vmcb->cr0;
+ u64 cr4_value = vmcb->cr4;
+ u64 efer_value = vmcb->efer;
+
+ HERE_I_AM;
+
+ npt_lock(d);
+
+ if ( (cr0_value & X86_CR0_PE) && (cr0_value & X86_CR0_PG)) {
+ if ( (efer_value & EFER_LME) && (cr4_value & X86_CR4_PAE) )
+ npt_set_guest_paging_levels(v, PAGING_L4);
+ else if ( cr4_value & X86_CR4_PAE)
+ npt_set_guest_paging_levels(v, PAGING_L3);
+ else
+ npt_set_guest_paging_levels(v, PAGING_L2);
+ }
+ else {
+ NPT_PRINTK("paging is turned off by guests\n");
+ npt_set_guest_paging_levels(v, PAGING_REAL_MODE);
+ }
+
+ v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v);
+
+ npt_unlock(d);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
Index: xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/page-guest32.h
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/page-guest32.h
@@ -0,0 +1,105 @@
+
+#ifndef __X86_PAGE_GUEST_H__
+#define __X86_PAGE_GUEST_H__
+
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
+
+#define PAGETABLE_ORDER_32 10
+#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32)
+#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32)
+#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32
+
+
+#define L1_PAGETABLE_SHIFT_32 12
+#define L2_PAGETABLE_SHIFT_32 22
+
+/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
+
+#ifndef __ASSEMBLY__
+
+typedef u32 intpte_32_t;
+
+typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
+typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
+typedef l2_pgentry_t root_pgentry_32_t;
+#endif
+
+#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
+#define put_pte_flags_32(x) ((intpte_32_t)(x))
+
+/* Get pte access flags (unsigned int). */
+#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1))
+#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2))
+
+#define l1e_get_paddr_32(x) \
+ ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_paddr_32(x) \
+ ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
+
+/* Construct an empty pte. */
+#define l1e_empty_32() ((l1_pgentry_32_t) { 0 })
+#define l2e_empty_32() ((l2_pgentry_32_t) { 0 })
+
+/* Construct a pte from a pfn and access flags. */
+#define l1e_from_pfn_32(pfn, flags) \
+ ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
+#define l2e_from_pfn_32(pfn, flags) \
+ ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
+
+/* Construct a pte from a physical address and access flags. */
+#ifndef __ASSEMBLY__
+static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+#endif /* !__ASSEMBLY__ */
+
+
+/* Construct a pte from a page pointer and access flags. */
+#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags)))
+#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags)))
+
+/* Add extra flags to an existing pte. */
+#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags))
+#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags))
+
+/* Remove flags from an existing pte. */
+#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
+#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
+
+/* Check if a pte's page mapping or significant access flags have changed. */
+#define l1e_has_changed_32(x,y,flags) \
+ ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
+#define l2e_has_changed_32(x,y,flags) \
+ ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset_32(a) \
+ (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
+#define l2_table_offset_32(a) \
+ (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
+
+#define linear_l1_table_32 \
+ ((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START))
+
+#define linear_pg_table_32 linear_l1_table_32
+
+#endif /* __X86_PAGE_GUEST_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/private.h
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/arch/x86/mm/hap/npt/private.h
@@ -0,0 +1,165 @@
+/*
+ * private.h: SVM Nested Paging Related Defintion
+ * Copyright (c) 2006, Wei Huang, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __NPT_PRIVATE_H__
+#define __NPT_PRIVATE_H__
+
+#include <asm/flushtlb.h>
+#include <asm/hvm/support.h>
+
+/********************************************/
+/* NPT Debugging Utilities */
+/********************************************/
+#define HERE_I_AM \
+ debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__)
+#define NPT_PRINTK(_f, _a...) \
+ debugtrace_printk("npt: %s(): " _f, __func__, ##_a)
+#define NPT_ERROR(_f, _a...) \
+ printk("npt error: %s(): " _f, __func__, ##_a)
+#define NPT_DEBUG(flag, _f, _a...) \
+ do { \
+ debugtrace_printk("npt debug: %s(): " _f, __func__, ##_a); \
+ } while (0)
+/********************************************/
+/* NPT Inline Functions and Tools */
+/********************************************/
+#define npt_lock(_d) \
+ do { \
+ if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
+ { \
+ printk("Error: shadow lock held by %s\n", \
+ (_d)->arch.shadow.locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_d)->arch.shadow.lock); \
+ ASSERT((_d)->arch.shadow.locker == -1); \
+ (_d)->arch.shadow.locker = current->processor; \
+ (_d)->arch.shadow.locker_function = __func__; \
+ } while (0)
+
+#define npt_unlock(_d) \
+ do { \
+ ASSERT((_d)->arch.shadow.locker == current->processor); \
+ (_d)->arch.shadow.locker = -1; \
+ (_d)->arch.shadow.locker_function = "nobody"; \
+ spin_unlock(&(_d)->arch.shadow.lock); \
+ } while (0)
+/********************************************/
+/* Variable Definition */
+/********************************************/
+#define NPT_HOST_PAGE_TABLE_FLAGS (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER) & ~_PAGE_DIRTY & ~_PAGE_ACCESSED
+#define NPT_HOST_PAGE_TABLE_MMIO_FLAGS ( _PAGE_PRESENT ) & ~_PAGE_DIRTY & ~_PAGE_ACCESSED
+#if CONFIG_PAGING_LEVELS == 3
+#define NPT_HOST_PAGE_TABLE_PDPE_FLAGS _PAGE_PRESENT
+#else
+#define NPT_HOST_PAGE_TABLE_PDPE_FLAGS (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER) & ~_PAGE_DIRTY & ~_PAGE_ACCESSED
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+#define INIT_NUM_OF_NPT_PAGES 1
+#elif CONFIG_PAGING_LEVELS == 3
+#define INIT_NUM_OF_NPT_PAGES 1 + L3_PAGETABLE_ENTRIES
+#elif CONFIG_PAGING_LEVELS == 4
+#define INIT_NUM_OF_NPT_PAGES 1
+#endif
+
+/* The following is the max number of host page table pages required. For most
+ * time, we don't use so much.
+ */
+#if CONFIG_PAGING_LEVELS == 2
+#define MAX_NUM_OF_NPT_PAGES 1 + L2_PAGETABLE_ENTRIES
+#elif CONFIG_PAGING_LEVELS == 3
+#define MAX_NUM_OF_NPT_PAGES 1 + L3_PAGETABLE_ENTRIES * (1 + L2_PAGETABLE_ENTRIES)
+#elif CONFIG_PAGING_LEVELS == 4
+#define MAX_NUM_OF_NPT_PAGES 1 + L4_PAGETABLE_ENTRIES * (1 + L3_PAGETABLE_ENTRIES * (1 + L2_PAGETABLE_ENTRIES))
+#endif
+
+
+
+/********************************************/
+/* MISC DEFINITIONS */
+/********************************************/
+
+/* PT_SHIFT describes the amount by which a virtual address is shifted right
+ * to right justify the portion to be used for indexing into a page
+ * table, given the guest memory model (i.e. number of levels) and the level
+ * of the page table being accessed. The idea is from Virtual Iron's code.
+ */
+static const int PT_SHIFT[][5] =
+ { /* ------ level ------ nr_levels */
+ /* 1 2 3 4 */
+ { 0, 0, 0, 0, 0}, /* 0 not used */
+ { 0, 0, 0, 0, 0}, /* 1 not used */
+ { 0, 12, 22, 0, 0}, /* 2 */
+ { 0, 12, 21, 30, 0}, /* 3 */
+ { 0, 12, 21, 30, 39} /* 4 */
+ };
+
+/* PT_ENTRIES describes the number of entries in a page table, given the
+ * memory model (i.e. number of levels) and the level of the page table
+ * being considered. This idea from Virtual Iron's shadow code*/
+static const int PT_ENTRIES[][5] =
+ { /* ------ level ------ nr_levels */
+ /* 1 2 3 4 */
+ { 0, 0, 0, 0, 0}, /* 0 not used */
+ { 0, 0, 0, 0, 0}, /* 1 not used */
+ { 0, 1024, 1024, 0, 0}, /* 2 */
+ { 0, 512, 512, 4, 0}, /* 3 */
+ { 0, 512, 512, 512, 512} /* 4 */
+ };
+
+/********************************************/
+/* PAGING DEFINITION FOR GUEST */
+/********************************************/
+#define PHYSICAL_PAGE_4K_SIZE (1UL << 12)
+#define PHYSICAL_PAGE_2M_SIZE (1UL << 21)
+#define PHYSICAL_PAGE_4M_SIZE (1UL << 22)
+#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) )
+#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) )
+#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) )
+#define NPT_GUEST_CR3_SHIFT_NON_PAE 12 /* both legacy mode and long mode */
+#define NPT_GUEST_CR3_SHIFT_PAE 5 /* PAE mode */
+
+#define PAGING_REAL_MODE 1
+#define PAGING_L2 2
+#define PAGING_L3 3
+#define PAGING_L4 4
+
+#define PAGE_NX_BIT (1ULL << 63)
+/********************************************/
+/* MISC. DDFINITIONS */
+/********************************************/
+#if CONFIG_PAGING_LEVELS == 2
+#define NPT_PRI_LONG "08x"
+#define NPT_PRI_ULONG "08ux"
+#else /* CONFIG_PAGING_LEVELS >= 3 */
+#ifndef __x86_64__
+#define SH_PRI_LONG "016llx"
+#define NPT_PRI_ULONG "016llux"
+#else
+#define SH_PRI_LONG "016lx"
+#define NPT_PRI_ULONG "016lux"
+#endif
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+#define NPT_MAX_ORDER 0 /* Only ever need 4k allocations */
+#else
+#define NPT_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+#endif
+#endif /* __SVM_NPT_H__ */
Index: xen-3.0.4-testing/xen/include/asm-x86/hap.h
===================================================================
--- /dev/null
+++ xen-3.0.4-testing/xen/include/asm-x86/hap.h
@@ -0,0 +1,56 @@
+/******************************************************************************
+ * include/asm-x86/hap.h
+ * hardware assisted paging support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _HAP_H_
+#define _HAP_H_
+
+#include <public/domctl.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/domain_page.h>
+#include <asm/flushtlb.h>
+
+static inline unsigned int hap_is_activated(struct vcpu *v)
+{
+ return v->arch.hap_activated;
+}
+
+static inline void hap_activate(struct vcpu *v)
+{
+ v->arch.hap_activated = 1;
+}
+
+static inline void hap_deactivate(struct vcpu *v)
+{
+ v->arch.hap_activated = 0;
+}
+
+void npt_update_guest_paging_mode(struct vcpu *v);
+void npt_detect(void);
+#endif
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */