2008-10-11 16:22:01 +02:00
|
|
|
# HG changeset patch
|
|
|
|
# User Keir Fraser <keir.fraser@citrix.com>
|
|
|
|
# Date 1222087617 -3600
|
|
|
|
# Node ID 7f1a36b834e183904f069948d3037d50492d98d2
|
|
|
|
# Parent 3c42b5ad0a4f607749426f82ecf11f75d84699c5
|
|
|
|
x86: make GDT per-CPU
|
|
|
|
|
|
|
|
The major issue with supporting a significantly larger number of
|
|
|
|
physical CPUs appears to be the use of per-CPU GDT entries - at
|
|
|
|
present, x86-64 could support only up to 126 CPUs (with code changes
|
|
|
|
to also use the top-most GDT page, that would be 254). Instead of
|
|
|
|
trying to go with incremental steps here, by converting the GDT itself
|
|
|
|
to be per-CPU, limitations in that respect go away entirely.
|
|
|
|
|
|
|
|
Signed-off-by: Jan Beulich <jbeulich@novell.com>
|
|
|
|
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/boot/wakeup.S
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/wakeup.S
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/boot/wakeup.S
|
|
|
|
@@ -168,7 +168,7 @@ wakeup_32:
|
|
|
|
.word 0,0,0
|
|
|
|
lgdt_descr:
|
|
|
|
.word LAST_RESERVED_GDT_BYTE
|
|
|
|
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE
|
|
|
|
+ .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
|
|
|
|
|
|
|
|
wakeup_64:
|
|
|
|
lgdt lgdt_descr(%rip)
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/boot/x86_32.S
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/x86_32.S
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/boot/x86_32.S
|
|
|
|
@@ -78,7 +78,7 @@ idt_descr:
|
|
|
|
.word 0
|
|
|
|
gdt_descr:
|
|
|
|
.word LAST_RESERVED_GDT_BYTE
|
|
|
|
- .long gdt_table - FIRST_RESERVED_GDT_BYTE
|
|
|
|
+ .long boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
|
|
|
|
|
|
|
|
|
|
|
|
.align 32
|
|
|
|
@@ -94,7 +94,7 @@ ENTRY(idle_pg_table)
|
|
|
|
#define GUEST_DESC(d) \
|
|
|
|
.long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff, \
|
|
|
|
((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
|
|
|
|
-ENTRY(gdt_table)
|
|
|
|
+ENTRY(boot_cpu_gdt_table)
|
|
|
|
.quad 0x0000000000000000 /* unused */
|
|
|
|
.quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
|
|
|
|
.quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
|
|
|
|
@@ -102,4 +102,6 @@ ENTRY(gdt_table)
|
|
|
|
GUEST_DESC(0x00c0b200) /* 0xe021 ring 1 3.xxGB data at 0x0 */
|
|
|
|
GUEST_DESC(0x00c0fa00) /* 0xe02b ring 3 3.xxGB code at 0x0 */
|
|
|
|
GUEST_DESC(0x00c0f200) /* 0xe033 ring 3 3.xxGB data at 0x0 */
|
|
|
|
+ .fill (PER_CPU_GDT_ENTRY - FLAT_RING3_DS / 8 - 1), 8, 0
|
|
|
|
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
|
|
|
|
.align PAGE_SIZE,0
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/boot/x86_64.S
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/x86_64.S
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/boot/x86_64.S
|
|
|
|
@@ -85,7 +85,7 @@ multiboot_ptr:
|
|
|
|
.word 0
|
|
|
|
gdt_descr:
|
|
|
|
.word LAST_RESERVED_GDT_BYTE
|
|
|
|
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE
|
|
|
|
+ .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
|
|
|
|
|
|
|
|
.word 0,0,0
|
|
|
|
idt_descr:
|
|
|
|
@@ -96,7 +96,7 @@ ENTRY(stack_start)
|
|
|
|
.quad cpu0_stack
|
|
|
|
|
|
|
|
.align PAGE_SIZE, 0
|
|
|
|
-ENTRY(gdt_table)
|
|
|
|
+ENTRY(boot_cpu_gdt_table)
|
|
|
|
.quad 0x0000000000000000 /* unused */
|
|
|
|
.quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
|
|
|
|
.quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
|
|
|
|
@@ -105,11 +105,13 @@ ENTRY(gdt_table)
|
|
|
|
.quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
|
|
|
|
.quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
|
|
|
|
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
|
|
|
|
+ .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
|
|
|
|
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
|
|
|
|
|
|
|
|
.align PAGE_SIZE, 0
|
|
|
|
/* NB. Even rings != 0 get access to the full 4Gb, as only the */
|
|
|
|
/* (compatibility) machine->physical mapping table lives there. */
|
|
|
|
-ENTRY(compat_gdt_table)
|
|
|
|
+ENTRY(boot_cpu_compat_gdt_table)
|
|
|
|
.quad 0x0000000000000000 /* unused */
|
|
|
|
.quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
|
|
|
|
.quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
|
|
|
|
@@ -118,4 +120,6 @@ ENTRY(compat_gdt_table)
|
|
|
|
.quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */
|
|
|
|
.quad 0x00cff2000000ffff /* 0xe033 ring 3 data */
|
|
|
|
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
|
|
|
|
+ .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
|
|
|
|
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
|
|
|
|
.align PAGE_SIZE, 0
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/cpu/common.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/cpu/common.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/cpu/common.c
|
|
|
|
@@ -575,6 +575,9 @@ void __cpuinit cpu_init(void)
|
|
|
|
if (cpu_has_pat)
|
|
|
|
wrmsrl(MSR_IA32_CR_PAT, host_pat);
|
|
|
|
|
|
|
|
+ /* Install correct page table. */
|
|
|
|
+ write_ptbase(current);
|
|
|
|
+
|
|
|
|
*(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
|
|
|
|
*(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
|
|
|
|
asm volatile ( "lgdt %0" : "=m" (gdt_load) );
|
|
|
|
@@ -605,9 +608,6 @@ void __cpuinit cpu_init(void)
|
|
|
|
#define CD(register) asm volatile ( "mov %0,%%db" #register : : "r"(0UL) );
|
|
|
|
CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
|
|
|
|
#undef CD
|
|
|
|
-
|
|
|
|
- /* Install correct page table. */
|
|
|
|
- write_ptbase(current);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
|
|
|
|
@@ -211,7 +211,6 @@ static inline int may_switch_mode(struct
|
|
|
|
|
|
|
|
int switch_native(struct domain *d)
|
|
|
|
{
|
|
|
|
- l1_pgentry_t gdt_l1e;
|
|
|
|
unsigned int vcpuid;
|
|
|
|
|
|
|
|
if ( d == NULL )
|
|
|
|
@@ -223,12 +222,8 @@ int switch_native(struct domain *d)
|
|
|
|
|
|
|
|
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
|
|
|
|
|
|
|
|
- /* switch gdt */
|
|
|
|
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
|
|
|
|
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
|
|
|
|
{
|
|
|
|
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
|
|
|
|
if (d->vcpu[vcpuid])
|
|
|
|
release_compat_l4(d->vcpu[vcpuid]);
|
|
|
|
}
|
|
|
|
@@ -238,7 +233,6 @@ int switch_native(struct domain *d)
|
|
|
|
|
|
|
|
int switch_compat(struct domain *d)
|
|
|
|
{
|
|
|
|
- l1_pgentry_t gdt_l1e;
|
|
|
|
unsigned int vcpuid;
|
|
|
|
|
|
|
|
if ( d == NULL )
|
|
|
|
@@ -250,15 +244,11 @@ int switch_compat(struct domain *d)
|
|
|
|
|
|
|
|
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
|
|
|
|
|
|
|
|
- /* switch gdt */
|
|
|
|
- gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
|
|
|
|
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
|
|
|
|
{
|
|
|
|
if ( (d->vcpu[vcpuid] != NULL) &&
|
|
|
|
(setup_compat_l4(d->vcpu[vcpuid]) != 0) )
|
|
|
|
goto undo_and_fail;
|
|
|
|
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
|
|
|
|
}
|
|
|
|
|
|
|
|
domain_set_alloc_bitsize(d);
|
|
|
|
@@ -267,13 +257,10 @@ int switch_compat(struct domain *d)
|
|
|
|
|
|
|
|
undo_and_fail:
|
|
|
|
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
|
|
|
|
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
|
|
|
|
while ( vcpuid-- != 0 )
|
|
|
|
{
|
|
|
|
if ( d->vcpu[vcpuid] != NULL )
|
|
|
|
release_compat_l4(d->vcpu[vcpuid]);
|
|
|
|
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
|
|
|
|
}
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
@@ -322,7 +309,12 @@ int vcpu_initialise(struct vcpu *v)
|
|
|
|
if ( is_idle_domain(d) )
|
|
|
|
{
|
|
|
|
v->arch.schedule_tail = continue_idle_domain;
|
|
|
|
- v->arch.cr3 = __pa(idle_pg_table);
|
|
|
|
+ if ( v->vcpu_id )
|
|
|
|
+ v->arch.cr3 = d->vcpu[0]->arch.cr3;
|
|
|
|
+ else if ( !*idle_vcpu )
|
|
|
|
+ v->arch.cr3 = __pa(idle_pg_table);
|
|
|
|
+ else if ( !(v->arch.cr3 = clone_idle_pagetable(v)) )
|
|
|
|
+ return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
v->arch.guest_context.ctrlreg[4] =
|
|
|
|
@@ -349,8 +341,7 @@ int arch_domain_create(struct domain *d,
|
|
|
|
#ifdef __x86_64__
|
|
|
|
struct page_info *pg;
|
|
|
|
#endif
|
|
|
|
- l1_pgentry_t gdt_l1e;
|
|
|
|
- int i, vcpuid, pdpt_order, paging_initialised = 0;
|
|
|
|
+ int i, pdpt_order, paging_initialised = 0;
|
|
|
|
int rc = -ENOMEM;
|
|
|
|
|
|
|
|
d->arch.hvm_domain.hap_enabled =
|
|
|
|
@@ -369,18 +360,6 @@ int arch_domain_create(struct domain *d,
|
|
|
|
goto fail;
|
|
|
|
memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
|
|
|
|
|
|
|
|
- /*
|
|
|
|
- * Map Xen segments into every VCPU's GDT, irrespective of whether every
|
|
|
|
- * VCPU will actually be used. This avoids an NMI race during context
|
|
|
|
- * switch: if we take an interrupt after switching CR3 but before switching
|
|
|
|
- * GDT, and the old VCPU# is invalid in the new domain, we would otherwise
|
|
|
|
- * try to load CS from an invalid table.
|
|
|
|
- */
|
|
|
|
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
|
|
|
|
- for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
|
|
|
|
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
|
|
|
|
-
|
|
|
|
#if defined(__i386__)
|
|
|
|
|
|
|
|
mapcache_domain_init(d);
|
|
|
|
@@ -1193,9 +1172,12 @@ static void paravirt_ctxt_switch_to(stru
|
|
|
|
static void __context_switch(void)
|
|
|
|
{
|
|
|
|
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
|
|
|
|
- unsigned int cpu = smp_processor_id();
|
|
|
|
+ unsigned int i, cpu = smp_processor_id();
|
|
|
|
struct vcpu *p = per_cpu(curr_vcpu, cpu);
|
|
|
|
struct vcpu *n = current;
|
|
|
|
+ struct desc_struct *gdt;
|
|
|
|
+ struct page_info *page;
|
|
|
|
+ struct desc_ptr gdt_desc;
|
|
|
|
|
|
|
|
ASSERT(p != n);
|
|
|
|
ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
|
|
|
|
@@ -1221,14 +1203,30 @@ static void __context_switch(void)
|
|
|
|
cpu_set(cpu, n->domain->domain_dirty_cpumask);
|
|
|
|
cpu_set(cpu, n->vcpu_dirty_cpumask);
|
|
|
|
|
|
|
|
+ gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
|
|
|
|
+ per_cpu(compat_gdt_table, cpu);
|
|
|
|
+ page = virt_to_page(gdt);
|
|
|
|
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
|
|
|
|
+ {
|
|
|
|
+ l1e_write(n->domain->arch.mm_perdomain_pt +
|
|
|
|
+ (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
+ FIRST_RESERVED_GDT_PAGE + i,
|
|
|
|
+ l1e_from_page(page + i, __PAGE_HYPERVISOR));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if ( p->vcpu_id != n->vcpu_id )
|
|
|
|
+ {
|
|
|
|
+ gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
|
|
|
|
+ gdt_desc.base = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
|
|
|
|
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
write_ptbase(n);
|
|
|
|
|
|
|
|
if ( p->vcpu_id != n->vcpu_id )
|
|
|
|
{
|
|
|
|
- char gdt_load[10];
|
|
|
|
- *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
|
|
|
|
- *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n);
|
|
|
|
- asm volatile ( "lgdt %0" : "=m" (gdt_load) );
|
|
|
|
+ gdt_desc.base = GDT_VIRT_START(n);
|
|
|
|
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( p->domain != n->domain )
|
|
|
|
@@ -1279,8 +1277,6 @@ void context_switch(struct vcpu *prev, s
|
|
|
|
uint64_t efer = read_efer();
|
|
|
|
if ( !(efer & EFER_SCE) )
|
|
|
|
write_efer(efer | EFER_SCE);
|
|
|
|
- flush_tlb_one_local(GDT_VIRT_START(next) +
|
|
|
|
- FIRST_RESERVED_GDT_BYTE);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/domain_build.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/domain_build.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/domain_build.c
|
|
|
|
@@ -314,24 +314,11 @@ int __init construct_dom0(
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
if ( compat32 )
|
|
|
|
{
|
|
|
|
- l1_pgentry_t gdt_l1e;
|
|
|
|
-
|
|
|
|
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
|
|
|
|
v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
|
|
|
|
|
|
|
|
if ( nr_pages != (unsigned int)nr_pages )
|
|
|
|
nr_pages = UINT_MAX;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Map compatibility Xen segments into every VCPU's GDT. See
|
|
|
|
- * arch_domain_create() for further comments.
|
|
|
|
- */
|
|
|
|
- gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
|
|
|
|
- PAGE_HYPERVISOR);
|
|
|
|
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
|
|
|
|
- d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
|
|
|
|
- flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c
|
|
|
|
@@ -446,7 +446,7 @@ static void vmx_set_host_env(struct vcpu
|
|
|
|
|
|
|
|
__vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
|
|
|
|
|
|
|
|
- __vmwrite(HOST_TR_SELECTOR, __TSS(cpu) << 3);
|
|
|
|
+ __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
|
|
|
|
__vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
|
|
|
|
|
|
|
|
__vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/setup.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/setup.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/setup.c
|
|
|
|
@@ -115,6 +115,12 @@ extern void early_cpu_init(void);
|
|
|
|
extern void vesa_init(void);
|
|
|
|
extern void vesa_mtrr_init(void);
|
|
|
|
|
|
|
|
+DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
|
|
|
|
+#ifdef CONFIG_COMPAT
|
|
|
|
+DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
|
|
|
|
+ = boot_cpu_compat_gdt_table;
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
struct tss_struct init_tss[NR_CPUS];
|
|
|
|
|
|
|
|
char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
|
|
|
|
@@ -224,6 +230,7 @@ static void __init percpu_init_areas(voi
|
|
|
|
static void __init init_idle_domain(void)
|
|
|
|
{
|
|
|
|
struct domain *idle_domain;
|
|
|
|
+ unsigned int i;
|
|
|
|
|
|
|
|
/* Domain creation requires that scheduler structures are initialised. */
|
|
|
|
scheduler_init();
|
|
|
|
@@ -236,6 +243,12 @@ static void __init init_idle_domain(void
|
|
|
|
idle_vcpu[0] = this_cpu(curr_vcpu) = current;
|
|
|
|
|
|
|
|
setup_idle_pagetable();
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
|
|
|
|
+ idle_domain->arch.mm_perdomain_pt[FIRST_RESERVED_GDT_PAGE + i] =
|
|
|
|
+ l1e_from_page(virt_to_page(boot_cpu_gdt_table) + i,
|
|
|
|
+ __PAGE_HYPERVISOR);
|
|
|
|
+
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init srat_detect_node(int cpu)
|
|
|
|
@@ -443,7 +456,6 @@ void __init __start_xen(unsigned long mb
|
|
|
|
parse_video_info();
|
|
|
|
|
|
|
|
set_current((struct vcpu *)0xfffff000); /* debug sanity */
|
|
|
|
- idle_vcpu[0] = current;
|
|
|
|
set_processor_id(0); /* needed early, for smp_processor_id() */
|
|
|
|
if ( cpu_has_efer )
|
|
|
|
rdmsrl(MSR_EFER, this_cpu(efer));
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
|
2008-11-08 20:32:12 +01:00
|
|
|
@@ -828,10 +828,15 @@ static int __devinit do_boot_cpu(int api
|
2008-10-11 16:22:01 +02:00
|
|
|
*/
|
|
|
|
{
|
|
|
|
unsigned long boot_error;
|
|
|
|
+ unsigned int i;
|
|
|
|
int timeout;
|
|
|
|
unsigned long start_eip;
|
|
|
|
unsigned short nmi_high = 0, nmi_low = 0;
|
|
|
|
struct vcpu *v;
|
|
|
|
+ struct desc_struct *gdt;
|
|
|
|
+#ifdef __x86_64__
|
|
|
|
+ struct page_info *page;
|
|
|
|
+#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save current MTRR state in case it was changed since early boot
|
2008-11-08 20:32:12 +01:00
|
|
|
@@ -857,6 +862,37 @@ static int __devinit do_boot_cpu(int api
|
2008-10-11 16:22:01 +02:00
|
|
|
/* Debug build: detect stack overflow by setting up a guard page. */
|
|
|
|
memguard_guard_stack(stack_start.esp);
|
|
|
|
|
|
|
|
+ gdt = per_cpu(gdt_table, cpu);
|
|
|
|
+ if (gdt == boot_cpu_gdt_table) {
|
|
|
|
+ i = get_order_from_pages(NR_RESERVED_GDT_PAGES);
|
|
|
|
+#ifdef __x86_64__
|
|
|
|
+#ifdef CONFIG_COMPAT
|
|
|
|
+ page = alloc_domheap_pages(NULL, i,
|
|
|
|
+ MEMF_node(cpu_to_node(cpu)));
|
|
|
|
+ per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
|
|
|
|
+ memcpy(gdt, boot_cpu_compat_gdt_table,
|
|
|
|
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
|
|
|
|
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
|
|
|
|
+#endif
|
|
|
|
+ page = alloc_domheap_pages(NULL, i,
|
|
|
|
+ MEMF_node(cpu_to_node(cpu)));
|
|
|
|
+ per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
|
|
|
|
+#else
|
|
|
|
+ per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(i);
|
|
|
|
+#endif
|
|
|
|
+ memcpy(gdt, boot_cpu_gdt_table,
|
|
|
|
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
|
|
|
|
+ BUILD_BUG_ON(NR_CPUS > 0x10000);
|
|
|
|
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
|
|
|
|
+ v->domain->arch.mm_perdomain_pt
|
|
|
|
+ [(v->vcpu_id << GDT_LDT_VCPU_SHIFT) +
|
|
|
|
+ FIRST_RESERVED_GDT_PAGE + i]
|
|
|
|
+ = l1e_from_page(virt_to_page(gdt) + i,
|
|
|
|
+ __PAGE_HYPERVISOR);
|
|
|
|
+
|
|
|
|
/*
|
|
|
|
* This grunge runs the startup process for
|
|
|
|
* the targeted processor.
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/traps.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/traps.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/traps.c
|
2008-12-05 15:30:41 +01:00
|
|
|
@@ -2978,13 +2978,13 @@ void set_intr_gate(unsigned int n, void
|
2008-10-11 16:22:01 +02:00
|
|
|
void set_tss_desc(unsigned int n, void *addr)
|
|
|
|
{
|
|
|
|
_set_tssldt_desc(
|
|
|
|
- gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
|
|
|
|
+ per_cpu(gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
|
|
|
|
(unsigned long)addr,
|
|
|
|
offsetof(struct tss_struct, __cacheline_filler) - 1,
|
|
|
|
9);
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
_set_tssldt_desc(
|
|
|
|
- compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
|
|
|
|
+ per_cpu(compat_gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
|
|
|
|
(unsigned long)addr,
|
|
|
|
offsetof(struct tss_struct, __cacheline_filler) - 1,
|
|
|
|
11);
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/mm.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/mm.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/mm.c
|
|
|
|
@@ -132,6 +132,30 @@ void __init setup_idle_pagetable(void)
|
|
|
|
__PAGE_HYPERVISOR));
|
|
|
|
}
|
|
|
|
|
|
|
|
+unsigned long clone_idle_pagetable(struct vcpu *v)
|
|
|
|
+{
|
|
|
|
+ unsigned int i;
|
|
|
|
+ struct domain *d = v->domain;
|
|
|
|
+ l3_pgentry_t *l3_table = v->arch.pae_l3_cache.table[0];
|
|
|
|
+ l2_pgentry_t *l2_table = alloc_xenheap_page();
|
|
|
|
+
|
|
|
|
+ if ( !l2_table )
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ memcpy(l3_table, idle_pg_table, L3_PAGETABLE_ENTRIES * sizeof(*l3_table));
|
|
|
|
+ l3_table[l3_table_offset(PERDOMAIN_VIRT_START)] =
|
|
|
|
+ l3e_from_page(virt_to_page(l2_table), _PAGE_PRESENT);
|
|
|
|
+
|
|
|
|
+ copy_page(l2_table, idle_pg_table_l2 +
|
|
|
|
+ l3_table_offset(PERDOMAIN_VIRT_START) * L2_PAGETABLE_ENTRIES);
|
|
|
|
+ for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
|
|
|
|
+ l2_table[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
|
|
|
|
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
|
|
|
|
+ __PAGE_HYPERVISOR);
|
|
|
|
+
|
|
|
|
+ return __pa(l3_table);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
@@ -186,7 +210,7 @@ void __init subarch_init_memory(void)
|
|
|
|
{
|
|
|
|
/* Guest kernel runs in ring 0, not ring 1. */
|
|
|
|
struct desc_struct *d;
|
|
|
|
- d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
|
|
|
|
+ d = &boot_cpu_gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
|
|
|
|
d[0].b &= ~_SEGMENT_DPL;
|
|
|
|
d[1].b &= ~_SEGMENT_DPL;
|
|
|
|
}
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/supervisor_mode_kernel.S
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/supervisor_mode_kernel.S
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/supervisor_mode_kernel.S
|
|
|
|
@@ -100,15 +100,10 @@ ENTRY(fixup_ring0_guest_stack)
|
|
|
|
# %gs:%esi now points to the guest stack before the
|
|
|
|
# interrupt/exception occured.
|
|
|
|
|
|
|
|
- /*
|
|
|
|
- * Reverse the __TSS macro, giving us the CPU number.
|
|
|
|
- * The TSS for this cpu is at init_tss + ( cpu * 128 ).
|
|
|
|
- */
|
|
|
|
- str %ecx
|
|
|
|
- shrl $3,%ecx # Calculate GDT index for TSS.
|
|
|
|
- subl $(FIRST_RESERVED_GDT_ENTRY+8),%ecx # %ecx = 2*cpu.
|
|
|
|
- shll $6,%ecx # Each TSS entry is 0x80 bytes
|
|
|
|
- addl $init_tss,%ecx # but we have 2*cpu from above.
|
|
|
|
+ movl $PER_CPU_GDT_ENTRY*8,%ecx
|
|
|
|
+ lsll %ecx,%ecx
|
|
|
|
+ shll $7,%ecx # Each TSS entry is 0x80 bytes
|
|
|
|
+ addl $init_tss,%ecx
|
|
|
|
|
|
|
|
# Load Xen stack from TSS.
|
|
|
|
movw TSS_ss0(%ecx),%ax
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/traps.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/traps.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/traps.c
|
|
|
|
@@ -194,13 +194,15 @@ static unsigned char doublefault_stack[D
|
|
|
|
|
|
|
|
asmlinkage void do_double_fault(void)
|
|
|
|
{
|
|
|
|
- struct tss_struct *tss = &doublefault_tss;
|
|
|
|
- unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
|
|
|
|
+ struct tss_struct *tss;
|
|
|
|
+ unsigned int cpu;
|
|
|
|
|
|
|
|
watchdog_disable();
|
|
|
|
|
|
|
|
console_force_unlock();
|
|
|
|
|
|
|
|
+ asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
|
|
|
|
+
|
|
|
|
/* Find information saved during fault and dump it to the console. */
|
|
|
|
tss = &init_tss[cpu];
|
|
|
|
printk("*** DOUBLE FAULT ***\n");
|
|
|
|
@@ -325,7 +327,7 @@ void __devinit subarch_percpu_traps_init
|
|
|
|
tss->eflags = 2;
|
|
|
|
tss->bitmap = IOBMP_INVALID_OFFSET;
|
|
|
|
_set_tssldt_desc(
|
|
|
|
- gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
|
|
|
|
+ boot_cpu_gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
|
|
|
|
(unsigned long)tss, 235, 9);
|
|
|
|
|
|
|
|
set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/mm.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/mm.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/mm.c
|
|
|
|
@@ -21,6 +21,7 @@
|
|
|
|
#include <xen/lib.h>
|
|
|
|
#include <xen/init.h>
|
|
|
|
#include <xen/mm.h>
|
|
|
|
+#include <xen/numa.h>
|
|
|
|
#include <xen/sched.h>
|
|
|
|
#include <xen/guest_access.h>
|
|
|
|
#include <asm/current.h>
|
|
|
|
@@ -206,6 +207,24 @@ void __init setup_idle_pagetable(void)
|
|
|
|
__PAGE_HYPERVISOR));
|
|
|
|
}
|
|
|
|
|
|
|
|
+unsigned long clone_idle_pagetable(struct vcpu *v)
|
|
|
|
+{
|
|
|
|
+ struct domain *d = v->domain;
|
|
|
|
+ struct page_info *page = alloc_domheap_page(NULL,
|
|
|
|
+ MEMF_node(vcpu_to_node(v)));
|
|
|
|
+ l4_pgentry_t *l4_table = page_to_virt(page);
|
|
|
|
+
|
|
|
|
+ if ( !page )
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ copy_page(l4_table, idle_pg_table);
|
|
|
|
+ l4_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
|
|
|
|
+ l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
|
|
|
|
+ __PAGE_HYPERVISOR);
|
|
|
|
+
|
|
|
|
+ return __pa(l4_table);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
void __init zap_low_mappings(void)
|
|
|
|
{
|
|
|
|
BUG_ON(num_online_cpus() != 1);
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/traps.c
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/traps.c
|
|
|
|
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/traps.c
|
|
|
|
@@ -213,15 +213,14 @@ void show_page_walk(unsigned long addr)
|
|
|
|
asmlinkage void double_fault(void);
|
|
|
|
asmlinkage void do_double_fault(struct cpu_user_regs *regs)
|
|
|
|
{
|
|
|
|
- unsigned int cpu, tr;
|
|
|
|
-
|
|
|
|
- asm volatile ( "str %0" : "=r" (tr) );
|
|
|
|
- cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
|
|
|
|
+ unsigned int cpu;
|
|
|
|
|
|
|
|
watchdog_disable();
|
|
|
|
|
|
|
|
console_force_unlock();
|
|
|
|
|
|
|
|
+ asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
|
|
|
|
+
|
|
|
|
/* Find information saved during fault and dump it to the console. */
|
|
|
|
printk("*** DOUBLE FAULT ***\n");
|
|
|
|
print_xen_info();
|
|
|
|
Index: xen-3.3.1-testing/xen/include/asm-x86/desc.h
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/include/asm-x86/desc.h
|
|
|
|
+++ xen-3.3.1-testing/xen/include/asm-x86/desc.h
|
|
|
|
@@ -34,11 +34,9 @@
|
|
|
|
#define FLAT_COMPAT_USER_CS FLAT_COMPAT_RING3_CS
|
|
|
|
#define FLAT_COMPAT_USER_SS FLAT_COMPAT_RING3_SS
|
|
|
|
|
|
|
|
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
|
|
|
|
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
|
|
|
|
-
|
|
|
|
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
|
|
|
|
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
|
|
|
|
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
|
|
|
|
+#define LDT_ENTRY (TSS_ENTRY + 2)
|
|
|
|
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 2)
|
|
|
|
|
|
|
|
#elif defined(__i386__)
|
|
|
|
|
|
|
|
@@ -51,17 +49,15 @@
|
|
|
|
|
|
|
|
#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
|
|
|
|
|
|
|
|
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
|
|
|
|
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
|
|
|
|
-
|
|
|
|
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
|
|
|
|
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
|
|
|
|
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
|
|
|
|
+#define LDT_ENTRY (TSS_ENTRY + 1)
|
|
|
|
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 1)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
-#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
|
|
|
|
+#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (TSS_ENTRY<<3) )
|
|
|
|
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
#define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3)
|
|
|
|
@@ -205,11 +201,19 @@ do {
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
-extern struct desc_struct gdt_table[];
|
|
|
|
+struct desc_ptr {
|
|
|
|
+ unsigned short limit;
|
|
|
|
+ unsigned long base;
|
|
|
|
+} __attribute__((__packed__)) ;
|
|
|
|
+
|
|
|
|
+extern struct desc_struct boot_cpu_gdt_table[];
|
|
|
|
+DECLARE_PER_CPU(struct desc_struct *, gdt_table);
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
-extern struct desc_struct compat_gdt_table[];
|
|
|
|
+extern struct desc_struct boot_cpu_compat_gdt_table[];
|
|
|
|
+DECLARE_PER_CPU(struct desc_struct *, compat_gdt_table);
|
|
|
|
#else
|
|
|
|
-# define compat_gdt_table gdt_table
|
|
|
|
+# define boot_cpu_compat_gdt_table boot_cpu_gdt_table
|
|
|
|
+# define per_cpu__compat_gdt_table per_cpu__gdt_table
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extern void set_intr_gate(unsigned int irq, void * addr);
|
|
|
|
Index: xen-3.3.1-testing/xen/include/asm-x86/ldt.h
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/include/asm-x86/ldt.h
|
|
|
|
+++ xen-3.3.1-testing/xen/include/asm-x86/ldt.h
|
|
|
|
@@ -6,7 +6,6 @@
|
|
|
|
|
|
|
|
static inline void load_LDT(struct vcpu *v)
|
|
|
|
{
|
|
|
|
- unsigned int cpu;
|
|
|
|
struct desc_struct *desc;
|
|
|
|
unsigned long ents;
|
|
|
|
|
|
|
|
@@ -16,11 +15,11 @@ static inline void load_LDT(struct vcpu
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
- cpu = smp_processor_id();
|
|
|
|
- desc = (!is_pv_32on64_vcpu(v) ? gdt_table : compat_gdt_table)
|
|
|
|
- + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
|
|
|
|
+ desc = (!is_pv_32on64_vcpu(v)
|
|
|
|
+ ? this_cpu(gdt_table) : this_cpu(compat_gdt_table))
|
|
|
|
+ + LDT_ENTRY - FIRST_RESERVED_GDT_ENTRY;
|
|
|
|
_set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
|
|
|
|
- __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
|
|
|
|
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (LDT_ENTRY << 3) );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Index: xen-3.3.1-testing/xen/include/asm-x86/page.h
|
|
|
|
===================================================================
|
|
|
|
--- xen-3.3.1-testing.orig/xen/include/asm-x86/page.h
|
|
|
|
+++ xen-3.3.1-testing/xen/include/asm-x86/page.h
|
|
|
|
@@ -278,6 +278,7 @@ extern unsigned int m2p_compat_vstart;
|
|
|
|
#endif
|
|
|
|
void paging_init(void);
|
|
|
|
void setup_idle_pagetable(void);
|
|
|
|
+unsigned long clone_idle_pagetable(struct vcpu *);
|
|
|
|
#endif /* !defined(__ASSEMBLY__) */
|
|
|
|
|
|
|
|
#define _PAGE_PRESENT 0x001U
|