SHA256
1
0
forked from pool/xen

- bnc#735806 - VF doesn't work after hot-plug for many times

24448-x86-pt-irq-leak.patch
- Upstream patches from Jan
  24261-x86-cpuidle-Westmere-EX.patch
  24417-amd-erratum-573.patch
  24429-mceinj-tool.patch
  24447-x86-TXT-INIT-SIPI-delay.patch
  ioemu-9868-MSI-X.patch 

- bnc#732884 - remove private runlevel 4 from init scripts
  xen.no-default-runlevel-4.patch

- bnc#727515 - Fragmented packets hang network boot of HVM guest 
  ipxe-gcc45-warnings.patch
  ipxe-ipv4-fragment.patch
  ipxe-enable-nics.patch

- fate#310510 - fix xenpaging
  update xenpaging.autostart.patch, make changes with mem-swap-target 
  permanent
  update xenpaging.doc.patch, mention issues with live migration

- fate#310510 - fix xenpaging
  add xenpaging.evict_mmap_readonly.patch
  update xenpaging.error-handling.patch, reduce debug output

- bnc#736824 - Microcode patches for AMD's 15h processors panic the 
  system
  24189-x86-p2m-pod-locking.patch
  24412-x86-AMD-errata-model-shift.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=164
This commit is contained in:
Charles Arnold 2012-01-05 19:41:54 +00:00 committed by Git OBS Bridge
parent 3e2a25f4c0
commit c9e3853c04
68 changed files with 4696 additions and 479 deletions

View File

@ -16,10 +16,17 @@ a new physdevop hypercall).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-4.1.2-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/physdev.c
+++ xen-4.1.2-testing/xen/arch/x86/physdev.c
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1322813126 -3600
# Node ID 60d4e257d04ba0bd663bbef5e93a97b6d8b66e54
# Parent 3f815406feb25a9348d8be9bc49fdc8c93ccb7c2
x86-64/mmcfg: remove __initdata annotation overlooked in 23749:e8d1c8f074ba
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -16,6 +16,10 @@
#include <xsm/xsm.h>
#include <asm/p2m.h>
@ -56,10 +63,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/physdev.c
case PHYSDEVOP_restore_msi: {
struct physdev_restore_msi restore_msi;
struct pci_dev *pdev;
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig.h
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig.h
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig.h
--- a/xen/arch/x86/x86_64/mmconfig.h
+++ b/xen/arch/x86/x86_64/mmconfig.h
@@ -84,6 +84,11 @@ extern int pci_mmcfg_config_num;
extern struct acpi_mcfg_allocation *pci_mmcfg_config;
@ -73,10 +78,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig.h
-void pci_mmcfg_arch_free(void);
+int pci_mmcfg_arch_enable(unsigned int);
+void pci_mmcfg_arch_disable(unsigned int);
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig-shared.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c
--- a/xen/arch/x86/x86_64/mmconfig-shared.c
+++ b/xen/arch/x86/x86_64/mmconfig-shared.c
@@ -22,10 +22,10 @@
#include <asm/e820.h>
#include <asm/msr.h>
@ -182,12 +185,12 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c
- /* for late to exit */
- if (known_bridge)
- return;
-
- if (pci_mmcfg_check_hostbridge())
- known_bridge = 1;
+ if (pci_mmcfg_check_hostbridge()) {
+ unsigned int i;
- if (pci_mmcfg_check_hostbridge())
- known_bridge = 1;
-
- if (!known_bridge) {
+ pci_mmcfg_arch_init();
+ for (i = 0; i < pci_mmcfg_config_num; ++i)
@ -244,10 +247,17 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c
}
/**
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig_64.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -23,7 +23,7 @@ struct mmcfg_virt {
char __iomem *virt;
};
static struct mmcfg_virt *pci_mmcfg_virt;
-static int __initdata mmcfg_pci_segment_shift;
+static unsigned int mmcfg_pci_segment_shift;
static char __iomem *get_virt(unsigned int seg, unsigned int *bus)
{
@@ -112,7 +112,8 @@ int pci_mmcfg_write(unsigned int seg, un
return 0;
}
@ -350,10 +360,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
- xfree(pci_mmcfg_virt);
- pci_mmcfg_virt = NULL;
-}
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/physdev.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/physdev.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/physdev.c
--- a/xen/arch/x86/x86_64/physdev.c
+++ b/xen/arch/x86/x86_64/physdev.c
@@ -54,6 +54,10 @@
#define physdev_get_free_pirq compat_physdev_get_free_pirq
#define physdev_get_free_pirq_t physdev_get_free_pirq_compat_t
@ -365,10 +373,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/physdev.c
#define COMPAT
#undef guest_handle_okay
#define guest_handle_okay compat_handle_okay
Index: xen-4.1.2-testing/xen/include/public/physdev.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/physdev.h
+++ xen-4.1.2-testing/xen/include/public/physdev.h
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -255,6 +255,19 @@ struct physdev_get_free_pirq {
typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
@ -389,10 +395,8 @@ Index: xen-4.1.2-testing/xen/include/public/physdev.h
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
Index: xen-4.1.2-testing/xen/include/xlat.lst
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xlat.lst
+++ xen-4.1.2-testing/xen/include/xlat.lst
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -60,6 +60,7 @@
! memory_map memory.h
! memory_reservation memory.h

View File

@ -25,7 +25,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
mfn_t mfn;
int ret;
+ p2m_unlock(p2m);
+ p2m_lock(p2m);
+
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);

View File

@ -0,0 +1,107 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1321459471 0
# Node ID 644ca5d3ec435f3372ce88a4de86909bd4033819
# Parent 1cbb3c1dfb3203f5344a6c1c52507b9e75af6742
x86/emulator: add feature checks for newer instructions
Certain instructions were introduced only after the i686 or original
x86-64 architecture, so we should not try to emulate them if the guest
is not seeing the respective feature enabled (or, worse, if the
underlying hardware doesn't support them). This affects fisttp,
movnti, and cmpxchg16b.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -955,6 +955,47 @@ in_protmode(
return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & EFLG_VM));
}
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+
+static bool_t vcpu_has(
+ unsigned int eax,
+ unsigned int reg,
+ unsigned int bit,
+ struct x86_emulate_ctxt *ctxt,
+ const struct x86_emulate_ops *ops)
+{
+ unsigned int ebx = 0, ecx = 0, edx = 0;
+ int rc;
+
+ fail_if(!ops->cpuid);
+ rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt);
+ if ( rc == X86EMUL_OKAY )
+ {
+ switch ( reg )
+ {
+ case EAX: reg = eax; break;
+ case EBX: reg = ebx; break;
+ case ECX: reg = ecx; break;
+ case EDX: reg = edx; break;
+ default: BUG();
+ }
+ if ( !(reg & (1U << bit)) )
+ rc = ~X86EMUL_OKAY;
+ }
+
+ done:
+ return rc == X86EMUL_OKAY;
+}
+
+#define vcpu_must_have(leaf, reg, bit) \
+ generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
+#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
+#define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0)
+#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13)
+
static int
in_longmode(
struct x86_emulate_ctxt *ctxt,
@@ -2738,6 +2779,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fildl", src.val);
break;
case 1: /* fisttp m32i */
+ vcpu_must_have_sse3();
ea.bytes = 4;
dst = ea;
dst.type = OP_MEM;
@@ -2846,6 +2888,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fldl", src.val);
break;
case 1: /* fisttp m64i */
+ vcpu_must_have_sse3();
ea.bytes = 8;
dst = ea;
dst.type = OP_MEM;
@@ -2953,6 +2996,7 @@ x86_emulate(
emulate_fpu_insn_memsrc("fild", src.val);
break;
case 1: /* fisttp m16i */
+ vcpu_must_have_sse3();
ea.bytes = 2;
dst = ea;
dst.type = OP_MEM;
@@ -4141,6 +4185,7 @@ x86_emulate(
case 0xc3: /* movnti */
/* Ignore the non-temporal hint for now. */
+ vcpu_must_have_sse2();
generate_exception_if(dst.bytes <= 2, EXC_UD, -1);
dst.val = src.val;
break;
@@ -4151,6 +4196,8 @@ x86_emulate(
generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ if ( op_bytes == 8 )
+ vcpu_must_have_cx16();
op_bytes *= 2;
/* Get actual old value. */

View File

@ -9,8 +9,8 @@ Signed-off-by: Keir Fraser <keir@xen.org>
---
xen/common/wait.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
xen/common/wait.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
@ -25,10 +25,16 @@ Index: xen-4.1.2-testing/xen/common/wait.c
#endif
};
@@ -45,6 +45,15 @@ int init_waitqueue_vcpu(struct vcpu *v)
@@ -41,11 +41,19 @@ int init_waitqueue_vcpu(struct vcpu *v)
{
struct waitqueue_vcpu *wqv;
- wqv = xmalloc(struct waitqueue_vcpu);
+ wqv = xzalloc(struct waitqueue_vcpu);
if ( wqv == NULL )
return -ENOMEM;
- memset(wqv, 0, sizeof(*wqv));
+#ifdef CONFIG_X86
+ wqv->stack = alloc_xenheap_page();
+ if ( wqv->stack == NULL )
@ -38,10 +44,10 @@ Index: xen-4.1.2-testing/xen/common/wait.c
+ }
+#endif
+
memset(wqv, 0, sizeof(*wqv));
INIT_LIST_HEAD(&wqv->list);
wqv->vcpu = v;
@@ -63,6 +72,9 @@ void destroy_waitqueue_vcpu(struct vcpu
@@ -63,6 +71,9 @@ void destroy_waitqueue_vcpu(struct vcpu
return;
BUG_ON(!list_empty(&wqv->list));
@ -51,7 +57,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
xfree(wqv);
v->waitqueue_vcpu = NULL;
@@ -115,7 +127,7 @@ static void __prepare_to_wait(struct wai
@@ -115,7 +126,7 @@ static void __prepare_to_wait(struct wai
: "=S" (wqv->esp)
: "c" (cpu_info), "D" (wqv->stack)
: "memory" );

View File

@ -0,0 +1,53 @@
changeset: 24178:1f2a06dbbb69
user: Keir Fraser <keir@xen.org>
date: Tue Nov 22 15:35:26 2011 +0000
files: xen/common/keyhandler.c
description:
debug: Add domain/vcpu pause_count info to 'd' key.
Signed-off-by: Keir Fraser <keir@xen.org>
---
xen/common/keyhandler.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/common/keyhandler.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/keyhandler.c
+++ xen-4.1.2-testing/xen/common/keyhandler.c
@@ -244,9 +244,10 @@ static void dump_domains(unsigned char k
unsigned int i;
printk("General information for domain %u:\n", d->domain_id);
cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
- printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
- "dirty_cpus=%s max_pages=%u\n",
+ printk(" refcnt=%d dying=%d pause_count=%d\n",
atomic_read(&d->refcnt), d->is_dying,
+ atomic_read(&d->pause_count));
+ printk(" nr_pages=%d xenheap_pages=%d dirty_cpus=%s max_pages=%u\n",
d->tot_pages, d->xenheap_pages, tmpstr, d->max_pages);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
@@ -270,17 +271,18 @@ static void dump_domains(unsigned char k
d->domain_id);
for_each_vcpu ( d, v )
{
- printk(" VCPU%d: CPU%d [has=%c] flags=%lx poll=%d "
+ printk(" VCPU%d: CPU%d [has=%c] poll=%d "
"upcall_pend = %02x, upcall_mask = %02x ",
v->vcpu_id, v->processor,
- v->is_running ? 'T':'F',
- v->pause_flags, v->poll_evtchn,
+ v->is_running ? 'T':'F', v->poll_evtchn,
vcpu_info(v, evtchn_upcall_pending),
vcpu_info(v, evtchn_upcall_mask));
cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask);
printk("dirty_cpus=%s ", tmpstr);
cpuset_print(tmpstr, sizeof(tmpstr), v->cpu_affinity);
printk("cpu_affinity=%s\n", tmpstr);
+ printk(" pause_count=%d pause_flags=%lx\n",
+ atomic_read(&v->pause_count), v->pause_flags);
arch_dump_vcpu_info(v);
periodic_timer_print(tmpstr, sizeof(tmpstr), v->periodic_period);
printk(" %s\n", tmpstr);

View File

@ -0,0 +1,50 @@
# HG changeset patch
# User Andres Lagar-Cavilla <andres@lagarcavilla.org>
# Date 1322148057 0
# Node ID 7da681c490e0a8a2b3f1fb311d254dc7ce618a43
# Parent b082fdc52ad7607d93b59148fb289aafe21f294b
x86/mm/p2m: fix pod locking
The path p2m-lookup -> p2m-pt->get_entry -> 1GB PoD superpage ->
pod_demand_populate ends in the pod code performing a p2m_set_entry with
no locks held (in order to split the 1GB superpage into 512 2MB ones)
Further, it calls p2m_unlock after that, which will break the spinlock.
This patch attempts to fix that.
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1244,7 +1244,6 @@ p2m_pod_demand_populate(struct p2m_domai
set_p2m_entry(p2m, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9,
p2m_populate_on_demand, p2m->default_access);
audit_p2m(p2m, 1);
- p2m_unlock(p2m);
return 0;
}
@@ -1602,7 +1601,8 @@ pod_retry_l3:
{
if ( q != p2m_query )
{
- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+ if ( !p2m_pod_check_and_populate(p2m, gfn,
+ (l1_pgentry_t *) &l3e, 18, q) )
goto pod_retry_l3;
}
else
@@ -1733,7 +1733,8 @@ static mfn_t p2m_gfn_to_mfn_current(stru
/* The read has succeeded, so we know that mapping exists */
if ( q != p2m_query )
{
- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+ if ( !p2m_pod_check_and_populate(p2m, gfn,
+ (l1_pgentry_t *) &l3e, 18, q) )
goto pod_retry_l3;
p2mt = p2m_invalid;
printk("%s: Allocate 1GB failed!\n", __func__);

View File

@ -16,7 +16,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -107,13 +107,16 @@ void wake_up(struct waitqueue_head *wq)
@@ -106,13 +106,16 @@ void wake_up(struct waitqueue_head *wq)
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
char *cpu_info = (char *)get_cpu_info();
@ -34,7 +34,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
"pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
"pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
"pop %%rbp; pop %%rdi; pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax"
@@ -121,13 +124,20 @@ static void __prepare_to_wait(struct wai
@@ -120,13 +123,20 @@ static void __prepare_to_wait(struct wai
"push %%eax; push %%ebx; push %%ecx; push %%edx; push %%edi; "
"push %%ebp; call 1f; "
"1: mov 8(%%esp),%%edi; mov 16(%%esp),%%ecx; mov %%esp,%%esi; "
@ -58,7 +58,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
@@ -163,6 +173,7 @@ void prepare_to_wait(struct waitqueue_he
@@ -162,6 +172,7 @@ void prepare_to_wait(struct waitqueue_he
struct vcpu *curr = current;
struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;

View File

@ -21,7 +21,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -108,6 +108,8 @@ static void __prepare_to_wait(struct wai
@@ -107,6 +107,8 @@ static void __prepare_to_wait(struct wai
{
char *cpu_info = (char *)get_cpu_info();
@ -30,7 +30,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -174,14 +176,13 @@ void prepare_to_wait(struct waitqueue_he
@@ -173,14 +175,13 @@ void prepare_to_wait(struct waitqueue_he
struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
ASSERT(!in_atomic());

View File

@ -27,7 +27,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
#endif
};
@@ -107,9 +109,19 @@ void wake_up(struct waitqueue_head *wq)
@@ -106,9 +108,19 @@ void wake_up(struct waitqueue_head *wq)
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
char *cpu_info = (char *)get_cpu_info();
@ -47,7 +47,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
asm volatile (
#ifdef CONFIG_X86_64
"push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -145,6 +157,7 @@ static void __prepare_to_wait(struct wai
@@ -144,6 +156,7 @@ static void __prepare_to_wait(struct wai
static void __finish_wait(struct waitqueue_vcpu *wqv)
{
wqv->esp = NULL;
@ -55,7 +55,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
}
void check_wakeup_from_wait(void)
@@ -156,6 +169,20 @@ void check_wakeup_from_wait(void)
@@ -155,6 +168,20 @@ void check_wakeup_from_wait(void)
if ( likely(wqv->esp == NULL) )
return;

View File

@ -17,7 +17,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -88,13 +88,13 @@ void init_waitqueue_head(struct waitqueu
@@ -87,13 +87,13 @@ void init_waitqueue_head(struct waitqueu
INIT_LIST_HEAD(&wq->list);
}
@ -33,7 +33,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
{
wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
list_del_init(&wqv->list);
@@ -104,6 +104,16 @@ void wake_up(struct waitqueue_head *wq)
@@ -103,6 +103,16 @@ void wake_up(struct waitqueue_head *wq)
spin_unlock(&wq->lock);
}

View File

@ -19,7 +19,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/wait.c
+++ xen-4.1.2-testing/xen/common/wait.c
@@ -88,6 +88,11 @@ void init_waitqueue_head(struct waitqueu
@@ -87,6 +87,11 @@ void init_waitqueue_head(struct waitqueu
INIT_LIST_HEAD(&wq->list);
}
@ -31,7 +31,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
{
struct waitqueue_vcpu *wqv;
@@ -99,6 +104,7 @@ void wake_up_nr(struct waitqueue_head *w
@@ -98,6 +103,7 @@ void wake_up_nr(struct waitqueue_head *w
wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
list_del_init(&wqv->list);
vcpu_unpause(wqv->vcpu);
@ -39,7 +39,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
}
spin_unlock(&wq->lock);
@@ -219,6 +225,7 @@ void prepare_to_wait(struct waitqueue_he
@@ -218,6 +224,7 @@ void prepare_to_wait(struct waitqueue_he
spin_lock(&wq->lock);
list_add_tail(&wqv->list, &wq->list);
vcpu_pause_nosync(curr);
@ -47,7 +47,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c
spin_unlock(&wq->lock);
}
@@ -237,6 +244,7 @@ void finish_wait(struct waitqueue_head *
@@ -236,6 +243,7 @@ void finish_wait(struct waitqueue_head *
{
list_del_init(&wqv->list);
vcpu_unpause(curr);

View File

@ -0,0 +1,23 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1322645021 -3600
# Node ID 64088ba60263d3a623851b43a872c93c71cc3cbf
# Parent df7cec2c6c03f07932555954948ce7c8d09e88f4
x86/cpuidle: add Westmere-EX support to hw residencies reading logic
This is in accordance with
http://software.intel.com/en-us/articles/intel-processor-identification-with-cpuid-model-and-family-numbers/
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Haitao Shan <maillists.shan@gmail.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -113,6 +113,7 @@ static void do_get_hw_residencies(void *
/* Westmere */
case 0x25:
case 0x2C:
+ case 0x2F:
GET_PC3_RES(hw_res->pc3);
GET_PC6_RES(hw_res->pc6);
GET_PC7_RES(hw_res->pc7);

View File

@ -25,10 +25,8 @@ Committed-by: Keir Fraser <keir@xen.org>
xen/include/xen/sched.h | 18 ++++++++++++------
6 files changed, 34 insertions(+), 23 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3909,7 +3909,7 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
@ -47,10 +45,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
return 1;
}
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -252,7 +252,7 @@ int mem_event_domctl(struct domain *d, x
{
case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
@ -78,10 +74,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
}
break;
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_sharing.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -322,12 +322,12 @@ static struct page_info* mem_sharing_all
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
@ -106,11 +100,9 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c
/* Unpause domain/vcpu */
if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
@@ -2995,7 +2995,7 @@ void p2m_mem_paging_drop_page(struct p2m
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2996,7 +2996,7 @@ void p2m_mem_paging_drop_page(struct p2m
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@ -119,7 +111,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
{
/* Send release notification to pager */
memset(&req, 0, sizeof(req));
@@ -3003,7 +3003,7 @@ void p2m_mem_paging_drop_page(struct p2m
@@ -3004,7 +3004,7 @@ void p2m_mem_paging_drop_page(struct p2m
req.gfn = gfn;
req.vcpu_id = v->vcpu_id;
@ -128,7 +120,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
}
}
@@ -3038,7 +3038,7 @@ void p2m_mem_paging_populate(struct p2m_
@@ -3039,7 +3039,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@ -137,7 +129,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
return;
memset(&req, 0, sizeof(req));
@@ -3069,7 +3069,7 @@ void p2m_mem_paging_populate(struct p2m_
@@ -3070,7 +3070,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
@ -146,7 +138,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
return;
}
@@ -3078,7 +3078,7 @@ void p2m_mem_paging_populate(struct p2m_
@@ -3079,7 +3079,7 @@ void p2m_mem_paging_populate(struct p2m_
req.p2mt = p2mt;
req.vcpu_id = v->vcpu_id;
@ -155,7 +147,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
}
/**
@@ -3156,7 +3156,7 @@ void p2m_mem_paging_resume(struct p2m_do
@@ -3157,7 +3157,7 @@ void p2m_mem_paging_resume(struct p2m_do
mfn_t mfn;
/* Pull the response off the ring */
@ -164,7 +156,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
@@ -3209,7 +3209,7 @@ void p2m_mem_access_check(unsigned long
@@ -3210,7 +3210,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the message along */
@ -173,7 +165,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
if ( res < 0 )
{
/* No listener */
@@ -3253,7 +3253,7 @@ void p2m_mem_access_check(unsigned long
@@ -3254,7 +3254,7 @@ void p2m_mem_access_check(unsigned long
req.vcpu_id = v->vcpu_id;
@ -182,7 +174,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
/* VCPU paused, mem event request sent */
}
@@ -3263,7 +3263,7 @@ void p2m_mem_access_resume(struct p2m_do
@@ -3264,7 +3264,7 @@ void p2m_mem_access_resume(struct p2m_do
struct domain *d = p2m->domain;
mem_event_response_t rsp;
@ -191,10 +183,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
Index: xen-4.1.2-testing/xen/common/domain.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/domain.c
+++ xen-4.1.2-testing/xen/common/domain.c
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -298,6 +298,10 @@ struct domain *domain_create(
init_status |= INIT_gnttab;
@ -214,10 +204,8 @@ Index: xen-4.1.2-testing/xen/common/domain.c
if ( init_status & INIT_arch )
arch_domain_destroy(d);
if ( init_status & INIT_gnttab )
Index: xen-4.1.2-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/xen/sched.h
+++ xen-4.1.2-testing/xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -201,6 +201,16 @@ struct mem_event_domain
int xen_port;
};

View File

@ -0,0 +1,88 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1322725849 -3600
# Node ID 76ea126f21724b72c120aff59460f7bbe9e6960d
# Parent 07cf778d517fdf661a34027af653a489489bf222
x86/emulator: properly handle lzcnt and tzcnt
These instructions are prefix selected flavors of bsf and bsr
respectively, and hence the presences of the F3 prefix must be handled
in the emulation code in order to avoid running into problems on newer
CPUs.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -990,6 +990,9 @@ static bool_t vcpu_has(
return rc == X86EMUL_OKAY;
}
+#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops)
+#define vcpu_has_bmi1() vcpu_has(0x00000007, EBX, 3, ctxt, ops)
+
#define vcpu_must_have(leaf, reg, bit) \
generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26)
@@ -4114,13 +4117,24 @@ x86_emulate(
dst.val = (uint8_t)src.val;
break;
- case 0xbc: /* bsf */ {
- int zf;
+ case 0xbc: /* bsf or tzcnt */ {
+ bool_t zf;
asm ( "bsf %2,%0; setz %b1"
: "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
+ : "r" (src.val) );
_regs.eflags &= ~EFLG_ZF;
- if ( zf )
+ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_bmi1() )
+ {
+ _regs.eflags &= ~EFLG_CF;
+ if ( zf )
+ {
+ _regs.eflags |= EFLG_CF;
+ dst.val = op_bytes * 8;
+ }
+ else if ( !dst.val )
+ _regs.eflags |= EFLG_ZF;
+ }
+ else if ( zf )
{
_regs.eflags |= EFLG_ZF;
dst.type = OP_NONE;
@@ -4128,13 +4142,28 @@ x86_emulate(
break;
}
- case 0xbd: /* bsr */ {
- int zf;
+ case 0xbd: /* bsr or lzcnt */ {
+ bool_t zf;
asm ( "bsr %2,%0; setz %b1"
: "=r" (dst.val), "=q" (zf)
- : "r" (src.val), "1" (0) );
+ : "r" (src.val) );
_regs.eflags &= ~EFLG_ZF;
- if ( zf )
+ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_lzcnt() )
+ {
+ _regs.eflags &= ~EFLG_CF;
+ if ( zf )
+ {
+ _regs.eflags |= EFLG_CF;
+ dst.val = op_bytes * 8;
+ }
+ else
+ {
+ dst.val = op_bytes * 8 - 1 - dst.val;
+ if ( !dst.val )
+ _regs.eflags |= EFLG_ZF;
+ }
+ }
+ else if ( zf )
{
_regs.eflags |= EFLG_ZF;
dst.type = OP_NONE;

View File

@ -0,0 +1,58 @@
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1322738484 -3600
# Node ID 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317
# Parent 89f7273681696022cc44db4f2ec5b22560482869
X86: expose Intel new features to dom0
This patch expose Intel new features to dom0, including
FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -848,8 +848,11 @@ static void pv_cpuid(struct cpu_user_reg
break;
case 7:
if ( regs->ecx == 0 )
- b &= (cpufeat_mask(X86_FEATURE_FSGSBASE) |
- cpufeat_mask(X86_FEATURE_ERMS));
+ b &= (cpufeat_mask(X86_FEATURE_BMI1) |
+ cpufeat_mask(X86_FEATURE_AVX2) |
+ cpufeat_mask(X86_FEATURE_BMI2) |
+ cpufeat_mask(X86_FEATURE_ERMS) |
+ cpufeat_mask(X86_FEATURE_FSGSBASE));
else
b = 0;
a = c = d = 0;
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -93,6 +93,7 @@
#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental Streaming SIMD Extensions-3 */
#define X86_FEATURE_CID (4*32+10) /* Context ID */
+#define X86_FEATURE_FMA (4*32+12) /* Fused Multiply Add */
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
@@ -100,6 +101,7 @@
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* Extended xAPIC */
+#define X86_FEATURE_MOVBE (4*32+22) /* movbe instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
@@ -144,7 +146,10 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
#define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_BMI1 (7*32+ 3) /* 1st bit manipulation extensions */
+#define X86_FEATURE_AVX2 (7*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)

View File

@ -0,0 +1,49 @@
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1322738563 -3600
# Node ID d9cb04ed55398ea4043c85573460afaf023aa1e9
# Parent 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317
X86: Disable PCID/INVPCID for dom0
PCID (Process-context identifier) is a facility by which a logical
processor may cache information for multiple linear-address spaces.
INVPCID is an new instruction to invalidate TLB. Refer latest Intel SDM
http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html
We disable PCID/INVPCID for dom0 and pv. Exposing them into dom0 and pv
may result in performance regression, and it would trigger GP or UD
depending on whether platform suppport INVPCID or not.
This patch disables PCID/INVPCID for dom0.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -836,6 +836,7 @@ static void pv_cpuid(struct cpu_user_reg
__clear_bit(X86_FEATURE_CX16 % 32, &c);
__clear_bit(X86_FEATURE_XTPR % 32, &c);
__clear_bit(X86_FEATURE_PDCM % 32, &c);
+ __clear_bit(X86_FEATURE_PCID % 32, &c);
__clear_bit(X86_FEATURE_DCA % 32, &c);
if ( !xsave_enabled(current) )
{
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -97,6 +97,7 @@
#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */
+#define X86_FEATURE_PCID (4*32+17) /* Process Context ID */
#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
@@ -151,6 +152,7 @@
#define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */
#define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)

View File

@ -0,0 +1,38 @@
# HG changeset patch
# User Tim Deegan <tim@xen.org>
# Date 1322749036 0
# Node ID a06cda9fb25f2d7b7b5c7da170813e4a8bb0cd67
# Parent 75f4e4d9f039ea656051e6dfd73e40d4cb32896b
x86/mm: Don't lose track of the log dirty bitmap
hap_log_dirty_init unconditionally sets the top of the log dirty
bitmap to INVALID_MFN. If there had been a bitmap allocated, it is
then leaked, and the host crashes on an ASSERT when the domain is
cleaned up.
Signed-off-by: Tim Deegan <tim@xen.org>
Acked-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Committed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -665,7 +665,6 @@ void paging_log_dirty_init(struct domain
d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
}
/* This function fress log dirty bitmap resources. */
@@ -686,6 +685,11 @@ int paging_domain_init(struct domain *d,
if ( (rc = p2m_init(d)) != 0 )
return rc;
+ /* This must be initialized separately from the rest of the
+ * log-dirty init code as that can be called more than once and we
+ * don't want to leak any active log-dirty bitmaps */
+ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
+
/* The order of the *_init calls below is important, as the later
* ones may rewrite some common fields. Shadow pagetables are the
* default... */

View File

@ -0,0 +1,139 @@
changeset: 24327:8529bca7a3f0
parent: 24322:6bac46816504
user: Andres Lagar-Cavilla <andres@lagarcavilla.org>
date: Thu Dec 01 18:14:24 2011 +0000
files: xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_paging.c xen/arch/x86/mm/p2m.c xen/include/asm-x86/p2m.h xen/include/public/domctl.h
description:
After preparing a page for page-in, allow immediate fill-in of the page contents
p2m_mem_paging_prep ensures that an mfn is backing the paged-out gfn, and
transitions to the next state in the paging state machine for that page.
Foreign mappings of the gfn will now succeed. This is the key idea, as
it allows the pager to now map the gfn and fill in its contents.
Unfortunately, it also allows any other foreign mapper to map the gfn and read
its contents. This is particularly dangerous when the populate is launched
by a foreign mapper in the first place, which will be actively retrying the
map operation and might race with the pager. Qemu-dm being a prime example.
Fix the race by allowing a buffer to be optionally passed in the prep
operation, and having the hypervisor memcpy from that buffer into the newly
prepped page before promoting the gfn type.
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Tim Deegan <tim@xen.org>
---
xen/arch/x86/mm/mem_event.c | 2 +-
xen/arch/x86/mm/mem_paging.c | 2 +-
xen/arch/x86/mm/p2m.c | 32 ++++++++++++++++++++++++++++++--
xen/include/asm-x86/p2m.h | 2 +-
xen/include/public/domctl.h | 8 ++++++--
5 files changed, 39 insertions(+), 7 deletions(-)
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -45,7 +45,7 @@ static int mem_event_enable(struct domai
struct domain *dom_mem_event = current->domain;
struct vcpu *v = current;
unsigned long ring_addr = mec->ring_addr;
- unsigned long shared_addr = mec->shared_addr;
+ unsigned long shared_addr = mec->u.shared_addr;
l1_pgentry_t l1e;
unsigned long gfn;
p2m_type_t p2mt;
--- a/xen/arch/x86/mm/mem_paging.c
+++ b/xen/arch/x86/mm/mem_paging.c
@@ -50,7 +50,7 @@ int mem_paging_domctl(struct domain *d,
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP:
{
unsigned long gfn = mec->gfn;
- rc = p2m_mem_paging_prep(p2m, gfn);
+ rc = p2m_mem_paging_prep(p2m, gfn, mec->u.buffer);
}
break;
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3093,13 +3093,20 @@ void p2m_mem_paging_populate(struct p2m_
* mfn if populate was called for gfn which was nominated but not evicted. In
* this case only the p2mt needs to be forwarded.
*/
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
+int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t buffer)
{
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int ret;
+ int ret, page_extant = 1;
+ const void *user_ptr = (const void *) buffer;
+
+ if ( user_ptr )
+ /* Sanity check the buffer and bail out early if trouble */
+ if ( (buffer & (PAGE_SIZE - 1)) ||
+ (!access_ok(user_ptr, PAGE_SIZE)) )
+ return -EINVAL;
p2m_lock(p2m);
@@ -3119,6 +3126,28 @@ int p2m_mem_paging_prep(struct p2m_domai
if ( unlikely(page == NULL) )
goto out;
mfn = page_to_mfn(page);
+ page_extant = 0;
+ }
+
+ /* If we were given a buffer, now is the time to use it */
+ if ( !page_extant && user_ptr )
+ {
+ void *guest_map;
+ int rc;
+
+ ASSERT( mfn_valid(mfn) );
+ guest_map = map_domain_page(mfn_x(mfn));
+ rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE);
+ unmap_domain_page(guest_map);
+ if ( rc )
+ {
+ gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u "
+ "bytes left %d\n",
+ gfn, p2m->domain->domain_id, rc);
+ ret = -EFAULT;
+ put_page(page); /* Don't leak pages */
+ goto out;
+ }
}
/* Fix p2m mapping */
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -524,7 +524,7 @@ void p2m_mem_paging_drop_page(struct p2m
/* Start populating a paged out frame */
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn);
/* Prepare the p2m for paging a frame in */
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn);
+int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t buffer);
/* Resume normal operation (in case a domain was paused) */
void p2m_mem_paging_resume(struct p2m_domain *p2m);
#else
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -741,8 +741,12 @@ struct xen_domctl_mem_event_op {
uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */
uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */
- /* OP_ENABLE */
- uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */
+ union {
+ /* OP_ENABLE IN: Virtual address of shared page */
+ uint64_aligned_t shared_addr;
+ /* PAGING_PREP IN: buffer to immediately fill page in */
+ uint64_aligned_t buffer;
+ } u;
uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */
/* Other OPs */

View File

@ -0,0 +1,86 @@
changeset: 24328:8ad47b48047d
user: Andres Lagar-Cavilla <andres@lagarcavilla.org>
date: Thu Dec 01 18:14:24 2011 +0000
files: tools/libxc/xc_mem_event.c tools/libxc/xc_mem_paging.c tools/libxc/xenctrl.h
description:
Tools: Libxc wrappers to automatically fill in page oud page contents on prepare
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Tim Deegan <tim@xen.org>
---
tools/libxc/xc_mem_event.c | 4 ++--
tools/libxc/xc_mem_paging.c | 23 +++++++++++++++++++++++
tools/libxc/xenctrl.h | 2 ++
3 files changed, 27 insertions(+), 2 deletions(-)
Index: xen-4.1.2-testing/tools/libxc/xc_mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_event.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_event.c
@@ -24,7 +24,7 @@
#include "xc_private.h"
int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
- unsigned int mode, void *shared_page,
+ unsigned int mode, void *page,
void *ring_page, unsigned long gfn)
{
DECLARE_DOMCTL;
@@ -34,7 +34,7 @@ int xc_mem_event_control(xc_interface *x
domctl.u.mem_event_op.op = op;
domctl.u.mem_event_op.mode = mode;
- domctl.u.mem_event_op.shared_addr = (unsigned long)shared_page;
+ domctl.u.mem_event_op.u.shared_addr = (unsigned long)page;
domctl.u.mem_event_op.ring_addr = (unsigned long)ring_page;
domctl.u.mem_event_op.gfn = gfn;
Index: xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_paging.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
@@ -65,6 +65,29 @@ int xc_mem_paging_prep(xc_interface *xch
NULL, NULL, gfn);
}
+int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
+ unsigned long gfn, void *buffer)
+{
+ int rc;
+
+ if ( !buffer )
+ return -EINVAL;
+
+ if ( ((unsigned long) buffer) & (XC_PAGE_SIZE - 1) )
+ return -EINVAL;
+
+ if ( mlock(buffer, XC_PAGE_SIZE) )
+ return -errno;
+
+ rc = xc_mem_event_control(xch, domain_id,
+ XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP,
+ XEN_DOMCTL_MEM_EVENT_OP_PAGING,
+ buffer, NULL, gfn);
+
+ (void)munlock(buffer, XC_PAGE_SIZE);
+ return rc;
+}
+
int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id, unsigned long gfn)
{
return xc_mem_event_control(xch, domain_id,
Index: xen-4.1.2-testing/tools/libxc/xenctrl.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h
+++ xen-4.1.2-testing/tools/libxc/xenctrl.h
@@ -1742,6 +1742,8 @@ int xc_mem_paging_nominate(xc_interface
unsigned long gfn);
int xc_mem_paging_evict(xc_interface *xch, domid_t domain_id, unsigned long gfn);
int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, unsigned long gfn);
+int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
+ unsigned long gfn, void *buffer);
int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id,
unsigned long gfn);

View File

@ -0,0 +1,100 @@
changeset: 24329:a8f5faa127c4
user: Andres Lagar-Cavilla <andres@lagarcavilla.org>
date: Thu Dec 01 18:14:24 2011 +0000
files: tools/xenpaging/xenpaging.c
description:
Teach xenpaging to use the new and non-racy xc_mem_paging_load interface
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Acked-by: Olaf Hering <olaf@aepfle.de>
Committed-by: Tim Deegan <tim@xen.org>
---
tools/xenpaging/xenpaging.c | 43 +++++++++++++++++++++----------------------
1 file changed, 21 insertions(+), 22 deletions(-)
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -45,6 +45,7 @@ static char *dom_path;
static char watch_token[16];
static char *filename;
static int interrupted;
+static void *paging_buffer = NULL;
static void unlink_pagefile(void)
{
@@ -438,6 +439,13 @@ static xenpaging_t *xenpaging_init(int a
goto err;
}
+ paging_buffer = init_page();
+ if ( !paging_buffer )
+ {
+ ERROR("Creating page aligned load buffer");
+ goto err;
+ }
+
return paging;
err:
@@ -649,10 +657,20 @@ static int xenpaging_populate_page(xenpa
unsigned char oom = 0;
DPRINTF("populate_page < gfn %"PRI_xen_pfn" pageslot %d\n", gfn, i);
+
+ /* Read page */
+ ret = read_page(fd, paging_buffer, i);
+ if ( ret != 0 )
+ {
+ ERROR("Error reading page");
+ goto out;
+ }
+
do
{
/* Tell Xen to allocate a page for the domain */
- ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id, gfn);
+ ret = xc_mem_paging_load(xch, paging->mem_event.domain_id, gfn,
+ paging_buffer);
if ( ret != 0 )
{
if ( errno == ENOMEM )
@@ -662,33 +680,14 @@ static int xenpaging_populate_page(xenpa
sleep(1);
continue;
}
- PERROR("Error preparing %"PRI_xen_pfn" for page-in", gfn);
- goto out_map;
+ PERROR("Error loading %"PRI_xen_pfn" during page-in", gfn);
+ goto out;
}
}
while ( ret && !interrupted );
- /* Map page */
- ret = -EFAULT;
- page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
- PROT_READ | PROT_WRITE, &gfn, 1);
- if ( page == NULL )
- {
- PERROR("Error mapping page %"PRI_xen_pfn": page is null", gfn);
- goto out_map;
- }
-
- /* Read page */
- ret = read_page(fd, page, i);
- if ( ret != 0 )
- {
- PERROR("Error reading page %"PRI_xen_pfn"", gfn);
- goto out;
- }
out:
- munmap(page, PAGE_SIZE);
- out_map:
return ret;
}

View File

@ -1,27 +0,0 @@
changeset: 24341:60d4e257d04b
user: Jan Beulich <jbeulich@suse.com>
date: Fri Dec 02 09:05:26 2011 +0100
files: xen/arch/x86/x86_64/mmconfig_64.c
description:
x86-64/mmcfg: remove __initdata annotation overlooked in 23749:e8d1c8f074ba
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/x86_64/mmconfig_64.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig_64.c
+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c
@@ -23,7 +23,7 @@ struct mmcfg_virt {
char __iomem *virt;
};
static struct mmcfg_virt *pci_mmcfg_virt;
-static int __initdata mmcfg_pci_segment_shift;
+static unsigned int mmcfg_pci_segment_shift;
static char __iomem *get_virt(unsigned int seg, unsigned int *bus)
{

View File

@ -1,8 +1,8 @@
changeset: 24344:72f4e4cb7440
user: Keir Fraser <keir@xen.org>
date: Fri Dec 02 06:31:14 2011 -0800
files: tools/libxc/xc_cpuid_x86.c tools/misc/xen-detect.c
description:
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1322836274 28800
# Node ID 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac
# Parent 109b99239b21275ee2249873dcdb9a413741142d
tools/x86_64: Fix cpuid() inline asm to not clobber stack's red zone
Pushing stuff onto the stack on x86-64 when we do not specify
@ -13,16 +13,17 @@ for x86-64.
Signed-off-by: Keir Fraser <keir@xen.org>
Acked-by: Jan Beulich <jbeulich@novell.com>
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1322844002 28800
# Node ID 491c3ebf1d371d03fdd0aabe82b0f422037c67ba
# Parent 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac
tools/libxc: Fix x86_32 build breakage in previous changeset.
---
tools/libxc/xc_cpuid_x86.c | 18 +++++++++---------
tools/misc/xen-detect.c | 17 ++++++++++-------
2 files changed, 19 insertions(+), 16 deletions(-)
Signed-off-by: Keir Fraser <keir@xen.org>
Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_cpuid_x86.c
+++ xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -42,23 +42,23 @@ static int hypervisor_is_64bit(xc_interf
static void cpuid(const unsigned int *input, unsigned int *regs)
{
@ -44,8 +45,7 @@ Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
- "pop %%rdx; pop %%rbx\n\t"
-#endif
: "=a" (regs[0]), "=c" (regs[2])
- : "0" (input[0]), "1" (count), "S" (regs)
+ : "0" (input[0]), "1" (count), "S" (_regs)
: "0" (input[0]), "1" (count), "S" (regs)
: "memory" );
+#else
+ asm (
@ -56,10 +56,8 @@ Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
}
/* Get the manufacturer brand name of the host processor. */
Index: xen-4.1.2-testing/tools/misc/xen-detect.c
===================================================================
--- xen-4.1.2-testing.orig/tools/misc/xen-detect.c
+++ xen-4.1.2-testing/tools/misc/xen-detect.c
--- a/tools/misc/xen-detect.c
+++ b/tools/misc/xen-detect.c
@@ -35,18 +35,21 @@
static void cpuid(uint32_t idx, uint32_t *regs, int pv_context)

View File

@ -1,28 +0,0 @@
changeset: 24345:491c3ebf1d37
tag: tip
user: Keir Fraser <keir@xen.org>
date: Fri Dec 02 08:40:02 2011 -0800
files: tools/libxc/xc_cpuid_x86.c
description:
tools/libxc: Fix x86_32 build breakage in previous changeset.
Signed-off-by: Keir Fraser <keir@xen.org>
---
tools/libxc/xc_cpuid_x86.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_cpuid_x86.c
+++ xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c
@@ -51,7 +51,7 @@ static void cpuid(const unsigned int *in
"mov %%edx,12(%4)\n\t"
"pop %%edx; pop %%ebx\n\t"
: "=a" (regs[0]), "=c" (regs[2])
- : "0" (input[0]), "1" (count), "S" (_regs)
+ : "0" (input[0]), "1" (count), "S" (regs)
: "memory" );
#else
asm (

View File

@ -0,0 +1,49 @@
References: bnc#711219
# HG changeset patch
# User Xudong Hao <xudong.hao@intel.com>
# Date 1323113706 0
# Node ID 832fa3f3543298a7125cd5f996d1e28dd7ba47b1
# Parent 60ea36c0512b779f291bb6c007e1f05c16054ec2
tools/firmware: remove "_PS0/3" Method
Do not expose the ACPI power management "_PS0/3" Method to guest
firmware. According to section 3.4 of the APCI specification 4.0, PCI
device control the device power through its own specification but not
through APCI.
Qemu pushes "_PS0/3" to guest will cause a mess between ACPI PM and
PCI PM as a result of incorrect ACPI table shipped with the guest
BIOS, it may cause a failure of PCI device PM state transition(from
PCI_UNKNOWN to PCI_D0).
Signed-off-by: Xudong Hao <xudong.hao@intel.com>
Signed-off-by: Haitao Shan <haitao.shan@intel.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/tools/firmware/hvmloader/acpi/mk_dsdt.c
+++ b/tools/firmware/hvmloader/acpi/mk_dsdt.c
@@ -251,8 +251,6 @@ int main(int argc, char **argv)
* the ACPI event:
* _EJ0: eject a device
* _STA: return a device's status, e.g. enabled or removed
- * Other methods are optional:
- * _PS0/3: put them here for debug purpose
*
* Eject button would generate a general-purpose event, then the
* control method for this event uses Notify() to inform OSPM which
@@ -271,14 +269,6 @@ int main(int argc, char **argv)
stmt("Name", "_ADR, 0x%08x", ((slot & ~7) << 13) | (slot & 7));
/* _SUN == dev */
stmt("Name", "_SUN, 0x%08x", slot >> 3);
- push_block("Method", "_PS0, 0");
- stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
- stmt("Store", "0x80, \\_GPE.DPT2");
- pop_block();
- push_block("Method", "_PS3, 0");
- stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
- stmt("Store", "0x83, \\_GPE.DPT2");
- pop_block();
push_block("Method", "_EJ0, 1");
stmt("Store", "0x%02x, \\_GPE.DPT1", slot);
stmt("Store", "0x88, \\_GPE.DPT2");

View File

@ -0,0 +1,29 @@
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@citrix.com>
# Date 1323114166 0
# Node ID 9961a6d5356a57685b06f65133c6ade5041e3356
# Parent 832fa3f3543298a7125cd5f996d1e28dd7ba47b1
KEXEC: fix kexec_get_range_compat to fail vocally.
Fail with -ERANGE rather than silently truncating 64bit values (a
physical address and size) into 32bit integers for dom0 to consume.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Simplify the bitwise arithmetic a bit.
Signed-off-by: Keir Fraser <keir@xen.org>
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -395,6 +395,10 @@ static int kexec_get_range_compat(XEN_GU
ret = kexec_get_range_internal(&range);
+ /* Dont silently truncate physical addresses or sizes. */
+ if ( (range.start | range.size) & ~(unsigned long)(~0u) )
+ return -ERANGE;
+
if ( ret == 0 ) {
XLAT_kexec_range(&compat_range, &range);
if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )

View File

@ -0,0 +1,94 @@
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1323170838 0
# Node ID a0befa32e927cc147aaee9bce42c51f53580a875
# Parent 9961a6d5356a57685b06f65133c6ade5041e3356
X86: expose Intel new features to pv/hvm
Intel recently release some new features, including
FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE.
Refer to http://software.intel.com/file/36945
This patch expose these new features to pv and hvm.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -74,6 +74,7 @@
#define X86_FEATURE_TM2 8 /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 9 /* Supplemental Streaming SIMD Exts-3 */
#define X86_FEATURE_CID 10 /* Context ID */
+#define X86_FEATURE_FMA 12 /* Fused Multiply Add */
#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
@@ -81,6 +82,7 @@
#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC 21 /* x2APIC */
+#define X86_FEATURE_MOVBE 22 /* movbe instruction */
#define X86_FEATURE_POPCNT 23 /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE 24 /* "tdt" TSC Deadline Timer */
#define X86_FEATURE_AES 25 /* AES acceleration instructions */
@@ -125,7 +127,10 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */
+#define X86_FEATURE_AVX2 5 /* AVX2 instructions */
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
#endif /* __LIBXC_CPUFEATURE_H */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -148,7 +148,8 @@ static void intel_xc_cpuid_policy(
int is_64bit = hypervisor_is_64bit(xch) && is_pae;
/* Only a few features are advertised in Intel's 0x80000001. */
- regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0);
+ regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0) |
+ bitmaskof(X86_FEATURE_ABM);
regs[3] &= ((is_pae ? bitmaskof(X86_FEATURE_NX) : 0) |
(is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) |
(is_64bit ? bitmaskof(X86_FEATURE_SYSCALL) : 0) |
@@ -256,9 +257,11 @@ static void xc_cpuid_hvm_policy(
regs[2] &= (bitmaskof(X86_FEATURE_XMM3) |
bitmaskof(X86_FEATURE_PCLMULQDQ) |
bitmaskof(X86_FEATURE_SSSE3) |
+ bitmaskof(X86_FEATURE_FMA) |
bitmaskof(X86_FEATURE_CX16) |
bitmaskof(X86_FEATURE_SSE4_1) |
bitmaskof(X86_FEATURE_SSE4_2) |
+ bitmaskof(X86_FEATURE_MOVBE) |
bitmaskof(X86_FEATURE_POPCNT) |
bitmaskof(X86_FEATURE_AES) |
bitmaskof(X86_FEATURE_F16C) |
@@ -303,7 +306,10 @@ static void xc_cpuid_hvm_policy(
case 0x00000007: /* Intel-defined CPU features */
if ( input[1] == 0 ) {
- regs[1] &= (bitmaskof(X86_FEATURE_SMEP) |
+ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_AVX2) |
+ bitmaskof(X86_FEATURE_SMEP) |
+ bitmaskof(X86_FEATURE_BMI2) |
bitmaskof(X86_FEATURE_ERMS) |
bitmaskof(X86_FEATURE_FSGSBASE));
} else
@@ -427,8 +433,11 @@ static void xc_cpuid_pv_policy(
case 7:
if ( input[1] == 0 )
- regs[1] &= (bitmaskof(X86_FEATURE_FSGSBASE) |
- bitmaskof(X86_FEATURE_ERMS));
+ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_AVX2) |
+ bitmaskof(X86_FEATURE_BMI2) |
+ bitmaskof(X86_FEATURE_ERMS) |
+ bitmaskof(X86_FEATURE_FSGSBASE));
else
regs[1] = 0;
regs[0] = regs[2] = regs[3] = 0;

View File

@ -0,0 +1,39 @@
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1323170884 0
# Node ID d313582d4fa2157332f1d50e599aebca36c41b3b
# Parent a0befa32e927cc147aaee9bce42c51f53580a875
X86: Disable PCID/INVPCID for pv
This patch disable PCID/INVPCID for pv.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -78,6 +78,7 @@
#define X86_FEATURE_CX16 13 /* CMPXCHG16B */
#define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */
#define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */
+#define X86_FEATURE_PCID 17 /* Process Context ID */
#define X86_FEATURE_DCA 18 /* Direct Cache Access */
#define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */
@@ -132,5 +133,6 @@
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID 10 /* Invalidate Process Context ID */
#endif /* __LIBXC_CPUFEATURE_H */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -427,6 +427,7 @@ static void xc_cpuid_pv_policy(
}
clear_bit(X86_FEATURE_XTPR, regs[2]);
clear_bit(X86_FEATURE_PDCM, regs[2]);
+ clear_bit(X86_FEATURE_PCID, regs[2]);
clear_bit(X86_FEATURE_DCA, regs[2]);
set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
break;

View File

@ -0,0 +1,109 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1323765911 -3600
# Node ID 868d82faf6511de3b3edce18cc6a9e1c938f0b8f
# Parent 7ca56cca09ade16645fb4806be2c5b2b0bc3332b
x86, amd: Disable GartTlbWlkErr when BIOS forgets it
This patch disables GartTlbWlk errors on AMD Fam10h CPUs if the BIOS
forgets to do is (or is just too old). Letting these errors enabled
can cause a sync-flood on the CPU causing a reboot.
The AMD BKDG recommends disabling GART TLB Wlk Error completely.
Based on a Linux patch from Joerg Roedel <joerg.roedel@amd.com>; see e.g.
https://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=patch;h=5bbc097d890409d8eff4e3f1d26f11a9d6b7c07e
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/cpu/mcheck/amd_f10.c
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c
@@ -46,6 +46,7 @@
#include <asm/msr.h>
#include "mce.h"
+#include "mce_quirks.h"
#include "x86_mca.h"
@@ -91,9 +92,14 @@ amd_f10_handler(struct mc_info *mi, uint
/* AMD Family10 machine check */
enum mcheck_type amd_f10_mcheck_init(struct cpuinfo_x86 *c)
{
+ enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(c);
+
if (amd_k8_mcheck_init(c) == mcheck_none)
return mcheck_none;
+ if (quirkflag == MCEQUIRK_F10_GART)
+ mcequirk_amd_apply(quirkflag);
+
x86_mce_callback_register(amd_f10_handler);
return mcheck_amd_famXX;
--- a/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c
+++ b/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c
@@ -29,6 +29,8 @@ static const struct mce_quirkdata mce_am
MCEQUIRK_K7_BANK0 },
{ 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
MCEQUIRK_K8_GART },
+ { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
+ MCEQUIRK_F10_GART },
};
enum mcequirk_amd_flags
@@ -54,6 +56,8 @@ mcequirk_lookup_amd_quirkdata(struct cpu
int mcequirk_amd_apply(enum mcequirk_amd_flags flags)
{
+ u64 val;
+
switch (flags) {
case MCEQUIRK_K7_BANK0:
return 1; /* first bank */
@@ -67,6 +71,10 @@ int mcequirk_amd_apply(enum mcequirk_amd
wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10));
wrmsrl(MSR_IA32_MC4_STATUS, 0ULL);
break;
+ case MCEQUIRK_F10_GART:
+ if (rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0)
+ wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
+ break;
}
return 0;
--- a/xen/arch/x86/cpu/mcheck/mce_quirks.h
+++ b/xen/arch/x86/cpu/mcheck/mce_quirks.h
@@ -33,8 +33,9 @@ struct mce_quirkdata {
*/
enum mcequirk_amd_flags {
- MCEQUIRK_K7_BANK0 = 0x1,
- MCEQUIRK_K8_GART = 0x2,
+ MCEQUIRK_K7_BANK0 = 1,
+ MCEQUIRK_K8_GART,
+ MCEQUIRK_F10_GART
};
enum mcequirk_intel_flags {
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -98,6 +98,8 @@
#define CMCI_EN (1UL<<30)
#define CMCI_THRESHOLD_MASK 0x7FFF
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
#define MSR_IA32_MC1_CTL 0x00000404
#define MSR_IA32_MC1_CTL2 0x00000281
#define MSR_IA32_MC1_STATUS 0x00000405
@@ -151,6 +153,8 @@
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186

View File

@ -0,0 +1,155 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1323766131 -3600
# Node ID 3f4ffde189f228d88e534865023fd795f77f0d05
# Parent 77528dbced3ea74901be6b1aeddedda22bfdaf63
x86: add platform hypercall to retrieve pCPU-s' family, model, and stepping
With the recent hotplug changes to the Xen part of the microcode
loading, this allows the kernel driver to avoid unnecessary calls into
the hypervisor during pCPU hot-enabling: Knowing that the hypervisor
retains the data for already booted CPUs, only data for CPUs with a
different signature needs to be passed down. Since the microcode
loading code can be pretty verbose, avoiding to invoke it can make the
log much easier to look at in case of problems.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -469,6 +469,42 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
}
break;
+ case XENPF_get_cpu_version:
+ {
+ struct xenpf_pcpu_version *ver = &op->u.pcpu_version;
+
+ if ( !get_cpu_maps() )
+ {
+ ret = -EBUSY;
+ break;
+ }
+
+ if ( (ver->xen_cpuid >= NR_CPUS) || !cpu_online(ver->xen_cpuid) )
+ {
+ memset(ver->vendor_id, 0, sizeof(ver->vendor_id));
+ ver->family = 0;
+ ver->model = 0;
+ ver->stepping = 0;
+ }
+ else
+ {
+ const struct cpuinfo_x86 *c = &cpu_data[ver->xen_cpuid];
+
+ memcpy(ver->vendor_id, c->x86_vendor_id, sizeof(ver->vendor_id));
+ ver->family = c->x86;
+ ver->model = c->x86_model;
+ ver->stepping = c->x86_mask;
+ }
+
+ ver->max_present = cpumask_last(&cpu_present_map);
+
+ put_cpu_maps();
+
+ if ( copy_field_to_guest(u_xenpf_op, op, u.pcpu_version) )
+ ret = -EFAULT;
+ }
+ break;
+
case XENPF_cpu_online:
{
int cpu = op->u.cpu_ol.cpuid;
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -3,7 +3,7 @@
*/
#include <xen/config.h>
-#include <xen/types.h>
+#include <xen/lib.h>
#include <compat/platform.h>
DEFINE_XEN_GUEST_HANDLE(compat_platform_op_t);
@@ -26,8 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
#define xen_processor_power_t compat_processor_power_t
#define set_cx_pminfo compat_set_cx_pminfo
-#define xenpf_pcpuinfo compat_pf_pcpuinfo
-#define xenpf_pcpuinfo_t compat_pf_pcpuinfo_t
+#define xen_pf_pcpuinfo xenpf_pcpuinfo
+CHECK_pf_pcpuinfo;
+#undef xen_pf_pcpuinfo
+
+#define xen_pf_pcpu_version xenpf_pcpu_version
+CHECK_pf_pcpu_version;
+#undef xen_pf_pcpu_version
#define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -425,6 +425,21 @@ struct xenpf_pcpuinfo {
typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t);
+#define XENPF_get_cpu_version 48
+struct xenpf_pcpu_version {
+ /* IN */
+ uint32_t xen_cpuid;
+ /* OUT */
+ /* The maxium cpu_id that is present */
+ uint32_t max_present;
+ char vendor_id[12];
+ uint32_t family;
+ uint32_t model;
+ uint32_t stepping;
+};
+typedef struct xenpf_pcpu_version xenpf_pcpu_version_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t);
+
#define XENPF_cpu_online 56
#define XENPF_cpu_offline 57
struct xenpf_cpu_ol
@@ -468,6 +483,7 @@ struct xen_platform_op {
struct xenpf_getidletime getidletime;
struct xenpf_set_processor_pminfo set_pminfo;
struct xenpf_pcpuinfo pcpu_info;
+ struct xenpf_pcpu_version pcpu_version;
struct xenpf_cpu_ol cpu_ol;
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -61,6 +61,17 @@
! memory_reservation memory.h
! pod_target memory.h
? physdev_pci_mmcfg_reserved physdev.h
+! pct_register platform.h
+! power_register platform.h
+? processor_csd platform.h
+! processor_cx platform.h
+! processor_flags platform.h
+! processor_performance platform.h
+! processor_power platform.h
+? processor_px platform.h
+! psd_package platform.h
+? xenpf_pcpuinfo platform.h
+? xenpf_pcpu_version platform.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
@@ -73,12 +84,3 @@
! vcpu_set_singleshot_timer vcpu.h
? xenoprof_init xenoprof.h
? xenoprof_passive xenoprof.h
-! power_register platform.h
-? processor_csd platform.h
-! processor_cx platform.h
-! processor_flags platform.h
-! processor_power platform.h
-! pct_register platform.h
-? processor_px platform.h
-! psd_package platform.h
-! processor_performance platform.h

View File

@ -0,0 +1,143 @@
References: bnc#736824
# HG changeset patch
# User Christoph Egger <Christoph.Egger@amd.com>
# Date 1323943209 -3600
# Node ID ca5f588bd203c9207e0988fcc80f43d83eed5420
# Parent 25f8952313ae683f41b634163f62651185d7be38
x86/ucode: fix for AMD Fam15 CPUs
Remove hardcoded maximum size a microcode patch can have. This is
dynamic now.
The microcode patch for family15h can be larger than 2048 bytes and
gets silently truncated.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- 2011-12-14.orig/xen/arch/x86/microcode_amd.c 2011-12-15 14:55:15.000000000 +0100
+++ 2011-12-14/xen/arch/x86/microcode_amd.c 2011-12-15 14:59:47.000000000 +0100
@@ -27,18 +27,10 @@
#include <asm/processor.h>
#include <asm/microcode.h>
-#define pr_debug(x...) ((void)0)
-
#define UCODE_MAGIC 0x00414d44
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
-#define UCODE_MAX_SIZE (2048)
-#define DEFAULT_UCODE_DATASIZE (896)
-#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define DWSIZE (sizeof(uint32_t))
-
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
@@ -99,7 +91,7 @@ static int microcode_fits(void *mc, int
}
if ( mc_header->patch_id <= uci->cpu_sig.rev )
- return -EINVAL;
+ return 0;
printk(KERN_DEBUG "microcode: CPU%d found a matching microcode "
"update with version 0x%x (current=0x%x)\n",
@@ -147,8 +139,12 @@ static int apply_microcode(int cpu)
return 0;
}
-static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
- size_t size, unsigned long *offset)
+static int get_next_ucode_from_buffer_amd(
+ void **mc,
+ size_t *mc_size,
+ const void *buf,
+ size_t size,
+ unsigned long *offset)
{
struct microcode_header_amd *mc_header;
size_t total_size;
@@ -181,8 +177,17 @@ static int get_next_ucode_from_buffer_am
return -EINVAL;
}
- memset(mc, 0, UCODE_MAX_SIZE);
- memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
+ if ( *mc_size < total_size )
+ {
+ xfree(*mc);
+ *mc = xmalloc_bytes(total_size);
+ if ( !*mc )
+ return -ENOMEM;
+ *mc_size = total_size;
+ }
+ else if ( *mc_size > total_size )
+ memset(*mc + total_size, 0, *mc_size - total_size);
+ memcpy(*mc, mc_header, total_size);
*offset = off + total_size + 8;
@@ -236,10 +241,10 @@ static int cpu_request_microcode(int cpu
{
const uint32_t *buf_pos;
unsigned long offset = 0;
- int error = 0;
- int ret;
+ int error;
struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
void *mc;
+ size_t mc_size;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
@@ -260,7 +265,9 @@ static int cpu_request_microcode(int cpu
return -EINVAL;
}
- mc = xmalloc_bytes(UCODE_MAX_SIZE);
+ /* Size of 1st microcode patch in bytes */
+ mc_size = buf_pos[offset / sizeof(*buf_pos) + 1];
+ mc = xmalloc_bytes(mc_size);
if ( mc == NULL )
{
printk(KERN_ERR "microcode: error! "
@@ -276,24 +284,33 @@ static int cpu_request_microcode(int cpu
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
- while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 0)
+ while ( (error = get_next_ucode_from_buffer_amd(&mc, &mc_size, buf, size,
+ &offset)) == 0 )
{
+ uci->mc.mc_amd = mc;
+
error = microcode_fits(mc, cpu);
if (error <= 0)
continue;
error = apply_microcode(cpu);
if (error == 0)
+ {
+ error = 1;
break;
+ }
}
/* On success keep the microcode patch for
* re-apply on resume.
*/
- if (error) {
+ if ( error <= 0 )
+ {
xfree(mc);
mc = NULL;
}
+ else
+ error = 0;
uci->mc.mc_amd = mc;
out:

View File

@ -0,0 +1,23 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1323955725 -3600
# Node ID 99caac2e35df41cbece606f663cb5570a62613c3
# Parent ca5f588bd203c9207e0988fcc80f43d83eed5420
x86/AMD: use correct shift count when merging model and stepping
... for legacy errata matching.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -216,7 +216,7 @@ int cpu_has_amd_erratum(const struct cpu
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 8) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_mask;
while ((range = va_arg(ap, int))) {
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&

View File

@ -0,0 +1,85 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1324046740 -3600
# Node ID 1452fb248cd513832cfbbd1100b9b72a0dde7ea6
# Parent 01c8b27e3d7d4ad2b469be9922bb04b5eb0195e8
x86/emulator: workaround for AMD erratum 573
The only cases where we might end up emulating fsincos (as any other
x87 operations without memory operands) are
- when a HVM guest is in real mode (not applicable on AMD)
- between two half page table updates in PAE mode (unlikely, and not
doing the emulation here does affect only performance, not
correctness)
- when a guest maliciously (or erroneously) modifies an (MMIO or page
table update) instruction under emulation (unspecified behavior)
Hence, in order to avoid the erratum to cause harm to the entire host,
don't emulate fsincos on the affected AMD CPU families.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -3,5 +3,7 @@
#include <string.h>
#include <public/xen.h>
+#define cpu_has_amd_erratum(nr) 0
+
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,8 +10,15 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/processor.h> /* current_cpu_info */
+#include <asm/amd.h> /* cpu_has_amd_erratum() */
/* Avoid namespace pollution. */
#undef cmpxchg
+#undef cpuid
+#undef wbinvd
+
+#define cpu_has_amd_erratum(nr) \
+ cpu_has_amd_erratum(&current_cpu_data, AMD_ERRATUM_##nr)
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2621,6 +2621,9 @@ x86_emulate(
case 0xd9: /* FPU 0xd9 */
switch ( modrm )
{
+ case 0xfb: /* fsincos */
+ fail_if(cpu_has_amd_erratum(573));
+ /* fall through */
case 0xc0 ... 0xc7: /* fld %stN */
case 0xc8 ... 0xcf: /* fxch %stN */
case 0xd0: /* fnop */
@@ -2646,7 +2649,6 @@ x86_emulate(
case 0xf8: /* fprem */
case 0xf9: /* fyl2xp1 */
case 0xfa: /* fsqrt */
- case 0xfb: /* fsincos */
case 0xfc: /* frndint */
case 0xfd: /* fscale */
case 0xfe: /* fsin */
--- a/xen/include/asm-x86/amd.h
+++ b/xen/include/asm-x86/amd.h
@@ -138,6 +138,12 @@
AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \
AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf))
+#define AMD_ERRATUM_573 \
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x10, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
+
struct cpuinfo_x86;
int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);

28
24429-mceinj-tool.patch Normal file
View File

@ -0,0 +1,28 @@
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1324219200 0
# Node ID 9587ccc2ae3192fd5625a87fa58e840377471867
# Parent 5b4b7e565ab82b06940889f2be7e30042b2881fc
X86-MCE: fix a bug of xen-mceinj tool
Fix a bug of xen-mceinj tool which used to test mce by software way.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -134,8 +134,12 @@ static int mca_cpuinfo(xc_interface *xc_
{
struct xen_mc mc;
+ memset(&mc, 0, sizeof(struct xen_mc));
+
mc.cmd = XEN_MC_physcpuinfo;
- if (xc_mca_op(xc_handle, &mc))
+ mc.interface_version = XEN_MCA_INTERFACE_VERSION;
+
+ if (!xc_mca_op(xc_handle, &mc))
return mc.u.mc_physcpuinfo.ncpus;
else
return 0;

View File

@ -0,0 +1,46 @@
# HG changeset patch
# User Gang Wei <gang.wei@intel.com>
# Date 1325153274 0
# Node ID a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199
# Parent 2863b2f43a3bc9268885379d6fd55ed325b8c0a2
X86: Add a delay between INIT & SIPIs for tboot AP bring-up in X2APIC case
Without this delay, Xen could not bring APs up while working with
TXT/tboot, because tboot needs some time in APs to handle INIT before
becoming ready for receiving SIPIs (this delay was removed as part of
c/s 23724 by Tim Deegan).
Signed-off-by: Gang Wei <gang.wei@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -42,6 +42,7 @@
#include <asm/msr.h>
#include <asm/mtrr.h>
#include <asm/time.h>
+#include <asm/tboot.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
@@ -473,6 +474,18 @@ static int wakeup_secondary_cpu(int phys
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
} while ( send_status && (timeout++ < 1000) );
}
+ else if ( tboot_in_measured_env() )
+ {
+ /*
+ * With tboot AP is actually spinning in a mini-guest before
+ * receiving INIT. Upon receiving INIT ipi, AP need time to VMExit,
+ * update VMCS to tracking SIPIs and VMResume.
+ *
+ * While AP is in root mode handling the INIT the CPU will drop
+ * any SIPIs
+ */
+ udelay(10);
+ }
/*
* Should we send STARTUP IPIs ?

View File

@ -0,0 +1,31 @@
References: bnc#735806
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1325492779 -3600
# Node ID 3a22ed3ec534799b3cab55b0dc0a7380e701ecbe
# Parent a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199
x86/passthrough: don't leak guest IRQs
As unmap_domain_pirq_emuirq() fails on a never mapped pIRQ, it must not
be called for the non-emu-IRQ case (to prevent the entire unmap
operation failing).
Based on a suggestion from Stefano.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Yongjie Ren <yongjie.ren@intel.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -228,7 +228,8 @@ static int physdev_unmap_pirq(struct phy
if ( is_hvm_domain(d) )
{
spin_lock(&d->event_lock);
- ret = unmap_domain_pirq_emuirq(d, unmap->pirq);
+ if ( domain_pirq_to_emuirq(d, unmap->pirq) != IRQ_UNBOUND )
+ ret = unmap_domain_pirq_emuirq(d, unmap->pirq);
spin_unlock(&d->event_lock);
if ( unmap->domid == DOMID_SELF || ret )
goto free_domain;

View File

@ -2,7 +2,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2930,7 +2930,7 @@ class XendDomainInfo:
@@ -2931,7 +2931,7 @@ class XendDomainInfo:
self.guest_bitsize = self.image.getBitSize()
# Make sure there's enough RAM available for the domain

View File

@ -2,7 +2,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3130,6 +3130,11 @@ class XendDomainInfo:
@@ -3131,6 +3131,11 @@ class XendDomainInfo:
self._cleanup_phantom_devs(paths)
self._cleanupVm()

241
ioemu-9868-MSI-X.patch Normal file
View File

@ -0,0 +1,241 @@
# HG changeset patch
# User Ian Jackson <ian.jackson@eu.citrix.com>
# Date 1324565191 0
# Node ID 11ca857d983420a9f54e4d0e6919f8e6bd5fca48
# Parent 533ebc61dfef98d55f054c97ec30179660214852
qemu: clean up MSI-X table handling
This patch does cleaning up of QEMU MSI handling. The fixes are:
1. Changes made to MSI-X table mapping handling to eliminate the small
windows in which guest could have access to physical MSI-X table.
2. MSI-X table is mapped as read-only to QEMU, as masking of MSI-X is
already in Xen now.
3. For registers that coexists inside the MSI-X table (this could be
only PBA I think), value read from physical page would be returned.
Signed-off-by: Shan Haitao <maillists.shan@gmail.com>
Consolidated duplicate code into _pt_iomem_helper(). Fixed formatting.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Haitao Shan <haitao.shan@intel.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
committer: Ian Jackson <Ian.Jackson@eu.citrix.com>
--- a/tools/ioemu-qemu-xen/hw/pass-through.c
+++ b/tools/ioemu-qemu-xen/hw/pass-through.c
@@ -92,6 +92,7 @@
#include <unistd.h>
#include <sys/ioctl.h>
+#include <assert.h>
extern int gfx_passthru;
int igd_passthru = 0;
@@ -1097,6 +1098,44 @@ uint8_t pci_intx(struct pt_dev *ptdev)
return r_val;
}
+static int _pt_iomem_helper(struct pt_dev *assigned_device, int i,
+ uint32_t e_base, uint32_t e_size, int op)
+{
+ if ( has_msix_mapping(assigned_device, i) )
+ {
+ uint32_t msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 +
+ assigned_device->msix->total_entries * 16) >> XC_PAGE_SHIFT;
+ uint32_t bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
+ int ret = 0;
+
+ if ( assigned_device->msix->table_off )
+ ret = xc_domain_memory_mapping(xc_handle, domid,
+ e_base >> XC_PAGE_SHIFT,
+ assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT,
+ (assigned_device->msix->mmio_base_addr >> XC_PAGE_SHIFT)
+ - (e_base >> XC_PAGE_SHIFT), op);
+
+ if ( ret == 0 && msix_last_pfn != bar_last_pfn )
+ {
+ assert(msix_last_pfn < bar_last_pfn);
+ ret = xc_domain_memory_mapping(xc_handle, domid,
+ msix_last_pfn + 1,
+ (assigned_device->bases[i].access.maddr +
+ assigned_device->msix->table_off +
+ assigned_device->msix->total_entries * 16 +
+ XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
+ bar_last_pfn - msix_last_pfn, op);
+ }
+
+ return ret;
+ }
+
+ return xc_domain_memory_mapping(xc_handle, domid,
+ e_base >> XC_PAGE_SHIFT,
+ assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT,
+ (e_size + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT, op);
+}
+
/* Being called each time a mmio region has been updated */
static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
int type)
@@ -1118,13 +1157,11 @@ static void pt_iomem_map(PCIDevice *d, i
if ( !first_map && old_ebase != -1 )
{
- add_msix_mapping(assigned_device, i);
- /* Remove old mapping */
- ret = xc_domain_memory_mapping(xc_handle, domid,
- old_ebase >> XC_PAGE_SHIFT,
- assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT,
- (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT,
- DPCI_REMOVE_MAPPING);
+ if ( has_msix_mapping(assigned_device, i) )
+ unregister_iomem(assigned_device->msix->mmio_base_addr);
+
+ ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size,
+ DPCI_REMOVE_MAPPING);
if ( ret != 0 )
{
PT_LOG("Error: remove old mapping failed!\n");
@@ -1135,22 +1172,26 @@ static void pt_iomem_map(PCIDevice *d, i
/* map only valid guest address */
if (e_phys != -1)
{
- /* Create new mapping */
- ret = xc_domain_memory_mapping(xc_handle, domid,
- assigned_device->bases[i].e_physbase >> XC_PAGE_SHIFT,
- assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT,
- (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT,
- DPCI_ADD_MAPPING);
+ if ( has_msix_mapping(assigned_device, i) )
+ {
+ assigned_device->msix->mmio_base_addr =
+ assigned_device->bases[i].e_physbase
+ + assigned_device->msix->table_off;
+
+ cpu_register_physical_memory(assigned_device->msix->mmio_base_addr,
+ (assigned_device->msix->total_entries * 16 + XC_PAGE_SIZE - 1)
+ & XC_PAGE_MASK,
+ assigned_device->msix->mmio_index);
+ }
+ ret = _pt_iomem_helper(assigned_device, i, e_phys, e_size,
+ DPCI_ADD_MAPPING);
if ( ret != 0 )
{
PT_LOG("Error: create new mapping failed!\n");
+ return;
}
- ret = remove_msix_mapping(assigned_device, i);
- if ( ret != 0 )
- PT_LOG("Error: remove MSI-X mmio mapping failed!\n");
-
if ( old_ebase != e_phys && old_ebase != -1 )
pt_msix_update_remap(assigned_device, i);
}
--- a/tools/ioemu-qemu-xen/hw/pt-msi.c
+++ b/tools/ioemu-qemu-xen/hw/pt-msi.c
@@ -284,15 +284,6 @@ void pt_disable_msi_translate(struct pt_
dev->msi_trans_en = 0;
}
-/* MSI-X virtulization functions */
-static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int mask)
-{
- void *phys_off;
-
- phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12;
- *(uint32_t *)phys_off = mask;
-}
-
static int pt_msix_update_one(struct pt_dev *dev, int entry_nr)
{
struct msix_entry_info *entry = &dev->msix->msix_entry[entry_nr];
@@ -486,7 +477,6 @@ static void pci_msix_writel(void *opaque
{
if ( msix->enabled && !(val & 0x1) )
pt_msix_update_one(dev, entry_nr);
- mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
}
}
@@ -519,7 +509,11 @@ static uint32_t pci_msix_readl(void *opa
entry_nr = (addr - msix->mmio_base_addr) / 16;
offset = ((addr - msix->mmio_base_addr) % 16) / 4;
- return msix->msix_entry[entry_nr].io_mem[offset];
+ if ( addr - msix->mmio_base_addr < msix->total_entries * 16 )
+ return msix->msix_entry[entry_nr].io_mem[offset];
+ else
+ return *(uint32_t *)(msix->phys_iomem_base +
+ (addr - msix->mmio_base_addr));
}
static CPUReadMemoryFunc *pci_msix_read[] = {
@@ -528,39 +522,12 @@ static CPUReadMemoryFunc *pci_msix_read[
pci_msix_readl
};
-int add_msix_mapping(struct pt_dev *dev, int bar_index)
+int has_msix_mapping(struct pt_dev *dev, int bar_index)
{
if ( !(dev->msix && dev->msix->bar_index == bar_index) )
return 0;
- return xc_domain_memory_mapping(xc_handle, domid,
- dev->msix->mmio_base_addr >> XC_PAGE_SHIFT,
- (dev->bases[bar_index].access.maddr
- + dev->msix->table_off) >> XC_PAGE_SHIFT,
- (dev->msix->total_entries * 16
- + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT,
- DPCI_ADD_MAPPING);
-}
-
-int remove_msix_mapping(struct pt_dev *dev, int bar_index)
-{
- if ( !(dev->msix && dev->msix->bar_index == bar_index) )
- return 0;
-
- dev->msix->mmio_base_addr = dev->bases[bar_index].e_physbase
- + dev->msix->table_off;
-
- cpu_register_physical_memory(dev->msix->mmio_base_addr,
- dev->msix->total_entries * 16,
- dev->msix->mmio_index);
-
- return xc_domain_memory_mapping(xc_handle, domid,
- dev->msix->mmio_base_addr >> XC_PAGE_SHIFT,
- (dev->bases[bar_index].access.maddr
- + dev->msix->table_off) >> XC_PAGE_SHIFT,
- (dev->msix->total_entries * 16
- + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT,
- DPCI_REMOVE_MAPPING);
+ return 1;
}
int pt_msix_init(struct pt_dev *dev, int pos)
@@ -616,7 +583,7 @@ int pt_msix_init(struct pt_dev *dev, int
PT_LOG("table_off = %x, total_entries = %d\n", table_off, total_entries);
dev->msix->table_offset_adjust = table_off & 0x0fff;
dev->msix->phys_iomem_base = mmap(0, total_entries * 16 + dev->msix->table_offset_adjust,
- PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED,
+ PROT_READ, MAP_SHARED | MAP_LOCKED,
fd, dev->msix->table_base + table_off - dev->msix->table_offset_adjust);
dev->msix->phys_iomem_base = (void *)((char *)dev->msix->phys_iomem_base +
dev->msix->table_offset_adjust);
--- a/tools/ioemu-qemu-xen/hw/pt-msi.h
+++ b/tools/ioemu-qemu-xen/hw/pt-msi.h
@@ -107,10 +107,7 @@ void
pt_msix_disable(struct pt_dev *dev);
int
-remove_msix_mapping(struct pt_dev *dev, int bar_index);
-
-int
-add_msix_mapping(struct pt_dev *dev, int bar_index);
+has_msix_mapping(struct pt_dev *dev, int bar_index);
int
pt_msix_init(struct pt_dev *dev, int pos);

17
ipxe-enable-nics.patch Normal file
View File

@ -0,0 +1,17 @@
Index: xen-4.1.2-testing/tools/firmware/etherboot/Config
===================================================================
--- xen-4.1.2-testing.orig/tools/firmware/etherboot/Config
+++ xen-4.1.2-testing/tools/firmware/etherboot/Config
@@ -1,11 +1,8 @@
-NICS = rtl8139 8086100e
+NICS = rtl8139 8086100e eepro100 e1000 pcnet32 10ec8029
CFLAGS += -UPXE_DHCP_STRICT
CFLAGS += -DPXE_DHCP_STRICT
CFLAGS += -UNO_POST_PROMPT
CFLAGS += -DNO_POST_PROMPT
-
-CFLAGS += -UCONSOLE_SERIAL
-CFLAGS += -DCONSOLE_SERIAL=1

75
ipxe-gcc45-warnings.patch Normal file
View File

@ -0,0 +1,75 @@
Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-f7c5918b179b
===================================================================
--- /dev/null
+++ xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-f7c5918b179b
@@ -0,0 +1,61 @@
+
+Subject: [drivers] Fix warnings identified by gcc 4.5
+From: Bruce Rogers brogers@novell.com Fri Apr 2 18:16:38 2010 -0600
+Date: Fri Apr 16 07:32:49 2010 -0400:
+Git: f7c5918b179be57fc7f352cb33664eb43de02c30
+
+In building gpxe for openSUSE Factory (part of kvm package), there were
+a few problems identified by the compiler. This patch addresses them.
+
+Signed-off-by: Bruce Rogers <brogers@novell.com>
+Signed-off-by: Stefan Hajnoczi <stefanha@gmail.com>
+Signed-off-by: Marty Connor <mdc@etherboot.org>
+
+diff --git a/src/drivers/net/ath5k/ath5k_qcu.c b/src/drivers/net/ath5k/ath5k_qcu.c
+index a674b85..cb25029 100644
+--- a/src/drivers/net/ath5k/ath5k_qcu.c
++++ b/src/drivers/net/ath5k/ath5k_qcu.c
+@@ -268,7 +268,7 @@ int ath5k_hw_reset_tx_queue(struct ath5k_hw *ah)
+ }
+
+ if (tq->tqi_ready_time &&
+- (tq->tqi_type != AR5K_TX_QUEUE_ID_CAB))
++ (tq->tqi_type != AR5K_TX_QUEUE_CAB))
+ ath5k_hw_reg_write(ah, AR5K_REG_SM(tq->tqi_ready_time,
+ AR5K_QCU_RDYTIMECFG_INTVAL) |
+ AR5K_QCU_RDYTIMECFG_ENABLE,
+diff --git a/src/drivers/net/ns83820.c b/src/drivers/net/ns83820.c
+index 44d875f..c5f2153 100644
+--- a/src/drivers/net/ns83820.c
++++ b/src/drivers/net/ns83820.c
+@@ -687,7 +687,7 @@ static int ns83820_poll(struct nic *nic, int retrieve)
+ // rx_ring[entry].link = 0;
+ rx_ring[entry].cmdsts = cpu_to_le32(CMDSTS_OWN);
+
+- ns->cur_rx = ++ns->cur_rx % NR_RX_DESC;
++ ns->cur_rx = (ns->cur_rx + 1) % NR_RX_DESC;
+
+ if (ns->cur_rx == 0) /* We have wrapped the ring */
+ kick_rx();
+diff --git a/src/drivers/net/tulip.c b/src/drivers/net/tulip.c
+index e08e0d8..af30ec6 100644
+--- a/src/drivers/net/tulip.c
++++ b/src/drivers/net/tulip.c
+@@ -1171,7 +1171,7 @@ static int tulip_poll(struct nic *nic, int retrieve)
+ if (rx_ring[tp->cur_rx].status & 0x00008000) {
+ /* return the descriptor and buffer to receive ring */
+ rx_ring[tp->cur_rx].status = 0x80000000;
+- tp->cur_rx = (++tp->cur_rx) % RX_RING_SIZE;
++ tp->cur_rx = (tp->cur_rx + 1) % RX_RING_SIZE;
+ return 0;
+ }
+
+@@ -1180,7 +1180,7 @@ static int tulip_poll(struct nic *nic, int retrieve)
+
+ /* return the descriptor and buffer to receive ring */
+ rx_ring[tp->cur_rx].status = 0x80000000;
+- tp->cur_rx = (++tp->cur_rx) % RX_RING_SIZE;
++ tp->cur_rx = (tp->cur_rx + 1) % RX_RING_SIZE;
+
+ return 1;
+ }
Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/series
===================================================================
--- xen-4.1.2-testing.orig/tools/firmware/etherboot/patches/series
+++ xen-4.1.2-testing/tools/firmware/etherboot/patches/series
@@ -1,3 +1,4 @@
boot_prompt_option.patch
gpxe-git-0edf2405b457
gpxe-git-a803ef3dfeac
+ipxe-git-f7c5918b179b

368
ipxe-ipv4-fragment.patch Normal file
View File

@ -0,0 +1,368 @@
Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-13186b64b6c3
===================================================================
--- /dev/null 2010-05-08 03:31:08.000000000 -0600
+++ xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-13186b64b6c3 2011-12-19 15:05:32.000000000 -0700
@@ -0,0 +1,354 @@
+commit 13186b64b6c3d5cbe9ed13bda1532e79b1afe81d
+Author: Michael Brown <mcb30@ipxe.org>
+Date: Sat Jul 16 01:15:53 2011 +0100
+
+ [ipv4] Fix fragment reassembly
+
+ Signed-off-by: Michael Brown <mcb30@ipxe.org>
+ Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
+
+diff -up a/src/include/gpxe/ip.h.orig-frag b/src/include/gpxe/ip.h
+--- a/src/include/gpxe/ip.h.orig-frag 2010-02-02 17:12:44.000000000 +0100
++++ b/src/include/gpxe/ip.h 2011-11-18 15:49:17.202660163 +0100
+@@ -32,9 +32,6 @@ struct net_protocol;
+ #define IP_TOS 0
+ #define IP_TTL 64
+
+-#define IP_FRAG_IOB_SIZE 1500
+-#define IP_FRAG_TIMEOUT 50
+-
+ /** An IPv4 packet header */
+ struct iphdr {
+ uint8_t verhdrlen;
+@@ -74,20 +71,16 @@ struct ipv4_miniroute {
+ struct in_addr gateway;
+ };
+
+-/* Fragment reassembly buffer */
+-struct frag_buffer {
+- /* Identification number */
+- uint16_t ident;
+- /* Source network address */
+- struct in_addr src;
+- /* Destination network address */
+- struct in_addr dest;
+- /* Reassembled I/O buffer */
+- struct io_buffer *frag_iob;
+- /* Reassembly timer */
+- struct retry_timer frag_timer;
++/* IPv4 fragment reassembly buffer */
++struct ipv4_fragment {
+ /* List of fragment reassembly buffers */
+ struct list_head list;
++ /** Reassembled packet */
++ struct io_buffer *iobuf;
++ /** Current offset */
++ size_t offset;
++ /** Reassembly timer */
++ struct retry_timer timer;
+ };
+
+ extern struct list_head ipv4_miniroutes;
+diff -up a/src/include/gpxe/retry.h.orig-frag b/src/include/gpxe/retry.h
+--- a/src/include/gpxe/retry.h.orig-frag 2010-02-02 17:12:44.000000000 +0100
++++ b/src/include/gpxe/retry.h 2011-11-18 15:59:25.258837891 +0100
+@@ -51,6 +51,19 @@ struct retry_timer {
+ void ( * expired ) ( struct retry_timer *timer, int over );
+ };
+
++/**
++ * Initialise a timer
++ *
++ * @v timer Retry timer
++ * @v expired Timer expired callback
++ */
++static inline __attribute__ (( always_inline )) void
++timer_init ( struct retry_timer *timer,
++ void ( * expired ) ( struct retry_timer *timer, int over ) )
++{
++ timer->expired = expired;
++}
++
+ extern void start_timer ( struct retry_timer *timer );
+ extern void start_timer_fixed ( struct retry_timer *timer,
+ unsigned long timeout );
+diff -up a/src/net/ipv4.c.orig-frag b/src/net/ipv4.c
+--- a/src/net/ipv4.c.orig-frag 2010-02-02 17:12:44.000000000 +0100
++++ b/src/net/ipv4.c 2011-11-18 15:49:17.203660142 +0100
+@@ -14,6 +14,7 @@
+ #include <gpxe/tcpip.h>
+ #include <gpxe/dhcp.h>
+ #include <gpxe/settings.h>
++#include <gpxe/timer.h>
+
+ /** @file
+ *
+@@ -32,7 +33,10 @@ struct net_protocol ipv4_protocol;
+ struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
+
+ /** List of fragment reassembly buffers */
+-static LIST_HEAD ( frag_buffers );
++static LIST_HEAD ( ipv4_fragments );
++
++/** Fragment reassembly timeout */
++#define IP_FRAG_TIMEOUT ( TICKS_PER_SEC / 2 )
+
+ /**
+ * Add IPv4 minirouting table entry
+@@ -134,104 +138,126 @@ static struct ipv4_miniroute * ipv4_rout
+ }
+
+ /**
+- * Fragment reassembly counter timeout
++ * Expire fragment reassembly buffer
+ *
+- * @v timer Retry timer
+- * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
++ * @v timer Retry timer
++ * @v fail Failure indicator
+ */
+-static void ipv4_frag_expired ( struct retry_timer *timer __unused,
+- int over ) {
+- if ( over ) {
+- DBG ( "Fragment reassembly timeout" );
+- /* Free the fragment buffer */
+- }
++static void ipv4_fragment_expired ( struct retry_timer *timer,
++ int fail __unused ) {
++ struct ipv4_fragment *frag =
++ container_of ( timer, struct ipv4_fragment, timer );
++ struct iphdr *iphdr = frag->iobuf->data;
++
++ DBG ( "IPv4 fragment %04x expired\n", ntohs ( iphdr->ident ) );
++ free_iob ( frag->iobuf );
++ list_del ( &frag->list );
++ free ( frag );
+ }
+
+ /**
+- * Free fragment buffer
++ * Find matching fragment reassembly buffer
+ *
+- * @v fragbug Fragment buffer
++ * @v iphdr IPv4 header
++ * @ret frag Fragment reassembly buffer, or NULL
+ */
+-static void free_fragbuf ( struct frag_buffer *fragbuf ) {
+- free ( fragbuf );
++static struct ipv4_fragment * ipv4_fragment ( struct iphdr *iphdr ) {
++ struct ipv4_fragment *frag;
++ struct iphdr *frag_iphdr;
++
++ list_for_each_entry ( frag, &ipv4_fragments, list ) {
++ frag_iphdr = frag->iobuf->data;
++
++ if ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) &&
++ ( iphdr->ident == frag_iphdr->ident ) ) {
++ return frag;
++ }
++ }
++
++ return NULL;
+ }
+
+ /**
+ * Fragment reassembler
+ *
+- * @v iobuf I/O buffer, fragment of the datagram
+- * @ret frag_iob Reassembled packet, or NULL
++ * @v iobuf I/O buffer
++ * @ret iobuf Reassembled packet, or NULL
+ */
+-static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
++static struct io_buffer * ipv4_reassemble ( struct io_buffer *iobuf ) {
+ struct iphdr *iphdr = iobuf->data;
+- struct frag_buffer *fragbuf;
+-
+- /**
+- * Check if the fragment belongs to any fragment series
+- */
+- list_for_each_entry ( fragbuf, &frag_buffers, list ) {
+- if ( fragbuf->ident == iphdr->ident &&
+- fragbuf->src.s_addr == iphdr->src.s_addr ) {
+- /**
+- * Check if the packet is the expected fragment
+- *
+- * The offset of the new packet must be equal to the
+- * length of the data accumulated so far (the length of
+- * the reassembled I/O buffer
+- */
+- if ( iob_len ( fragbuf->frag_iob ) ==
+- ( iphdr->frags & IP_MASK_OFFSET ) ) {
+- /**
+- * Append the contents of the fragment to the
+- * reassembled I/O buffer
+- */
+- iob_pull ( iobuf, sizeof ( *iphdr ) );
+- memcpy ( iob_put ( fragbuf->frag_iob,
+- iob_len ( iobuf ) ),
+- iobuf->data, iob_len ( iobuf ) );
+- free_iob ( iobuf );
+-
+- /** Check if the fragment series is over */
+- if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) {
+- iobuf = fragbuf->frag_iob;
+- free_fragbuf ( fragbuf );
+- return iobuf;
+- }
+-
+- } else {
+- /* Discard the fragment series */
+- free_fragbuf ( fragbuf );
+- free_iob ( iobuf );
+- }
+- return NULL;
++ size_t offset = ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 );
++ unsigned int more_frags = ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ));
++ size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
++ struct ipv4_fragment *frag;
++ size_t expected_offset;
++ struct io_buffer *new_iobuf;
++
++ /* Find matching fragment reassembly buffer, if any */
++ frag = ipv4_fragment ( iphdr );
++
++ /* Drop out-of-order fragments */
++ expected_offset = ( frag ? frag->offset : 0 );
++ if ( offset != expected_offset ) {
++ DBG ( "IPv4 dropping out-of-sequence fragment %04x (%zd+%zd, "
++ "expected %zd)\n", ntohs ( iphdr->ident ), offset,
++ ( iob_len ( iobuf ) - hdrlen ), expected_offset );
++ goto drop;
++ }
++
++ /* Create or extend fragment reassembly buffer as applicable */
++ if ( frag == NULL ) {
++
++ /* Create new fragment reassembly buffer */
++ frag = zalloc ( sizeof ( *frag ) );
++ if ( ! frag )
++ goto drop;
++ list_add ( &frag->list, &ipv4_fragments );
++ frag->iobuf = iobuf;
++ frag->offset = ( iob_len ( iobuf ) - hdrlen );
++ timer_init ( &frag->timer, ipv4_fragment_expired );
++
++ } else {
++
++ /* Extend reassembly buffer */
++ iob_pull ( iobuf, hdrlen );
++ new_iobuf = alloc_iob ( iob_len ( frag->iobuf ) +
++ iob_len ( iobuf ) );
++ if ( ! new_iobuf ) {
++ DBG ( "IPv4 could not extend reassembly buffer to "
++ "%zd bytes\n",
++ ( iob_len ( frag->iobuf ) + iob_len ( iobuf ) ) );
++ goto drop;
+ }
+- }
+-
+- /** Check if the fragment is the first in the fragment series */
+- if ( iphdr->frags & IP_MASK_MOREFRAGS &&
+- ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
+-
+- /** Create a new fragment buffer */
+- fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
+- fragbuf->ident = iphdr->ident;
+- fragbuf->src = iphdr->src;
+-
+- /* Set up the reassembly I/O buffer */
+- fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
+- iob_pull ( iobuf, sizeof ( *iphdr ) );
+- memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
++ memcpy ( iob_put ( new_iobuf, iob_len ( frag->iobuf ) ),
++ frag->iobuf->data, iob_len ( frag->iobuf ) );
++ memcpy ( iob_put ( new_iobuf, iob_len ( iobuf ) ),
+ iobuf->data, iob_len ( iobuf ) );
++ free_iob ( frag->iobuf );
++ frag->iobuf = new_iobuf;
++ frag->offset += iob_len ( iobuf );
+ free_iob ( iobuf );
++ iphdr = frag->iobuf->data;
++ iphdr->len = ntohs ( iob_len ( frag->iobuf ) );
+
+- /* Set the reassembly timer */
+- fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
+- fragbuf->frag_timer.expired = ipv4_frag_expired;
+- start_timer ( &fragbuf->frag_timer );
++ /* Stop fragment reassembly timer */
++ stop_timer ( &frag->timer );
+
+- /* Add the fragment buffer to the list of fragment buffers */
+- list_add ( &fragbuf->list, &frag_buffers );
++ /* If this is the final fragment, return it */
++ if ( ! more_frags ) {
++ iobuf = frag->iobuf;
++ list_del ( &frag->list );
++ free ( frag );
++ return iobuf;
++ }
+ }
+-
++
++ /* (Re)start fragment reassembly timer */
++ start_timer_fixed ( &frag->timer, IP_FRAG_TIMEOUT );
++
++ return NULL;
++
++ drop:
++ free_iob ( iobuf );
+ return NULL;
+ }
+
+@@ -432,37 +458,38 @@ static int ipv4_rx ( struct io_buffer *i
+ goto err;
+ }
+
++ /* Truncate packet to correct length */
++ iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
++
+ /* Print IPv4 header for debugging */
+ DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
+ DBG ( "%s len %d proto %d id %04x csum %04x\n",
+ inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
+ ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
+
+- /* Truncate packet to correct length, calculate pseudo-header
+- * checksum and then strip off the IPv4 header.
+- */
+- iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
+- pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
+- iob_pull ( iobuf, hdrlen );
+-
+- /* Fragment reassembly */
+- if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
+- ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
+- /* Pass the fragment to ipv4_reassemble() which either
+- * returns a fully reassembled I/O buffer or NULL.
++ /* Perform fragment reassembly if applicable */
++ if ( iphdr->frags & htons ( IP_MASK_OFFSET | IP_MASK_MOREFRAGS ) ) {
++ /* Pass the fragment to ipv4_reassemble() which returns
++ * either a fully reassembled I/O buffer or NULL.
+ */
+ iobuf = ipv4_reassemble ( iobuf );
+ if ( ! iobuf )
+ return 0;
++ iphdr = iobuf->data;
++ hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
+ }
+
+- /* Construct socket addresses and hand off to transport layer */
++ /* Construct socket addresses, calculate pseudo-header
++ * checksum, and hand off to transport layer
++ */
+ memset ( &src, 0, sizeof ( src ) );
+ src.sin.sin_family = AF_INET;
+ src.sin.sin_addr = iphdr->src;
+ memset ( &dest, 0, sizeof ( dest ) );
+ dest.sin.sin_family = AF_INET;
+ dest.sin.sin_addr = iphdr->dest;
++ pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
++ iob_pull ( iobuf, hdrlen );
+ if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
+ &dest.st, pshdr_csum ) ) != 0 ) {
+ DBG ( "IPv4 received packet rejected by stack: %s\n",
Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/series
===================================================================
--- xen-4.1.2-testing.orig/tools/firmware/etherboot/patches/series
+++ xen-4.1.2-testing/tools/firmware/etherboot/patches/series
@@ -2,3 +2,4 @@ boot_prompt_option.patch
gpxe-git-0edf2405b457
gpxe-git-a803ef3dfeac
ipxe-git-f7c5918b179b
+ipxe-git-13186b64b6c3

View File

@ -18,7 +18,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
xc = xen.lowlevel.xc.xc()
xoptions = XendOptions.instance()
@@ -3299,33 +3299,38 @@ class XendDomainInfo:
@@ -3300,33 +3300,38 @@ class XendDomainInfo:
# This is a file, not a device. pygrub can cope with a
# file if it's raw, but if it's QCOW or other such formats
# used through blktap, then we need to mount it first.

View File

@ -699,7 +699,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
self._endRestore()
except:
log.exception('VM resume failed')
@@ -2369,7 +2367,7 @@ class XendDomainInfo:
@@ -2370,7 +2368,7 @@ class XendDomainInfo:
return self.getDeviceController(deviceClass).reconfigureDevice(
devid, devconfig)
@ -708,7 +708,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
"""Create the devices for a vm.
@raise: VmError for invalid devices
@@ -2418,7 +2416,7 @@ class XendDomainInfo:
@@ -2419,7 +2417,7 @@ class XendDomainInfo:
if self.image:
@ -717,7 +717,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
#if have pass-through devs, need the virtual pci slots info from qemu
self.pci_device_configure_boot()
@@ -3044,7 +3042,7 @@ class XendDomainInfo:
@@ -3045,7 +3043,7 @@ class XendDomainInfo:
self._introduceDomain()
self.image = image.create(self, self.info)
if self.image:

View File

@ -1,22 +0,0 @@
Index: xen-4.0.2-testing/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
===================================================================
--- xen-4.0.2-testing.orig/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
+++ xen-4.0.2-testing/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
@@ -90,7 +90,7 @@ INCLUDE_CFLAGS = -I. -I${srcdir} -I$(src
GLOBAL_CFLAGS = ${MT_CFLAGS} ${MH_CFLAGS}
#PROFILE_CFLAGS = -pg
-WARN_CFLAGS = -Wall
+WARN_CFLAGS = -Wall -Wno-sequence-point
# CFLAGS is specifically reserved for setting from the command line
# when running make. I.E. "make CFLAGS=-Wmissing-prototypes".
@@ -260,7 +260,7 @@ linux-low.o: linux-low.c $(linux_low_h)
$(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@
linux-xen-low.o: linux-xen-low.c $(linux_low_h) $(server_h)
- $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@
+ $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) -I../../../../../include/ $< @USE_THREAD_DB@
linux-arm-low.o: linux-arm-low.c $(linux_low_h) $(server_h)
linux-i386-low.o: linux-i386-low.c $(linux_low_h) $(server_h)

View File

@ -9,20 +9,18 @@
#include <asm/edd.h>
#include <asm/mtrr.h>
#include <asm/io_apic.h>
@@ -63,6 +63,7 @@ long cpu_down_helper(void *data);
ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
{
ret_t ret = 0;
+ struct vcpu *v;
struct xen_platform_op curop, *op = &curop;
if ( !IS_PRIV(current->domain) )
@@ -529,6 +530,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
@@ -565,6 +565,42 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
op->u.mem_add.epfn,
op->u.mem_add.pxm);
break;
+
+ case XENPF_get_cpu_freq:
+ case XENPF_get_cpu_freq_min:
+ case XENPF_get_cpu_freq_max:
+ {
+ struct vcpu *v;
+ const struct cpufreq_policy *policy;
+
+ if ( op->u.get_cpu_freq.vcpu >= current->domain->max_vcpus ||
+ !(v = current->domain->vcpu[op->u.get_cpu_freq.vcpu]) )
+ {
@ -30,13 +28,25 @@
+ break;
+ }
+
+ op->u.get_cpu_freq.freq = per_cpu(cpufreq_cpu_policy, v->processor)
+ ? cpufreq_driver->get
+ ? cpufreq_driver->get(v->processor)
+ : per_cpu(cpufreq_cpu_policy, v->processor)->cur
+ : 0;
+ policy = per_cpu(cpufreq_cpu_policy, v->processor);
+ switch ( op->cmd & -!!policy )
+ {
+ case XENPF_get_cpu_freq:
+ op->u.get_cpu_freq.freq = policy->cur;
+ break;
+ case XENPF_get_cpu_freq_min:
+ op->u.get_cpu_freq.freq = policy->min;
+ break;
+ case XENPF_get_cpu_freq_max:
+ op->u.get_cpu_freq.freq = policy->max;
+ break;
+ default:
+ op->u.get_cpu_freq.freq = 0;
+ break;
+ }
+ if ( copy_field_to_guest(u_xenpf_op, op, u.get_cpu_freq.freq) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
@ -44,11 +54,13 @@
break;
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -451,6 +451,14 @@ struct xenpf_mem_hotadd
@@ -466,6 +466,16 @@ struct xenpf_mem_hotadd
uint32_t flags;
};
+#define XENPF_get_cpu_freq ('N' << 24)
+#define XENPF_get_cpu_freq_min (XENPF_get_cpu_freq + 1)
+#define XENPF_get_cpu_freq_max (XENPF_get_cpu_freq_min + 1)
+struct xenpf_get_cpu_freq {
+ /* IN variables */
+ uint32_t vcpu;
@ -59,7 +71,7 @@
struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -471,6 +479,7 @@ struct xen_platform_op {
@@ -487,6 +497,7 @@ struct xen_platform_op {
struct xenpf_cpu_ol cpu_ol;
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;

View File

@ -1,8 +1,6 @@
Index: xen-4.1.2-testing/Config.mk
===================================================================
--- xen-4.1.2-testing.orig/Config.mk
+++ xen-4.1.2-testing/Config.mk
@@ -177,7 +177,7 @@ endif
--- a/Config.mk
+++ b/Config.mk
@@ -178,7 +178,7 @@ endif
# Specify which qemu-dm to use. This may be `ioemu' to use the old
# Mercurial in-tree version, or a local directory, or a git URL.
# CONFIG_QEMU ?= `pwd`/$(XEN_ROOT)/../qemu-xen.git
@ -11,7 +9,7 @@ Index: xen-4.1.2-testing/Config.mk
QEMU_TAG := xen-4.1.2
#QEMU_TAG ?= e073e69457b4d99b6da0b6536296e3498f7f6599
@@ -187,7 +187,7 @@ QEMU_TAG := xen-4.1.2
@@ -188,7 +188,7 @@ QEMU_TAG := xen-4.1.2
# Optional components
XENSTAT_XENTOP ?= y
VTPM_TOOLS ?= n
@ -20,10 +18,8 @@ Index: xen-4.1.2-testing/Config.mk
PYTHON_TOOLS ?= y
OCAML_TOOLS ?= y
CONFIG_MINITERM ?= n
Index: xen-4.1.2-testing/tools/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/Makefile
+++ xen-4.1.2-testing/tools/Makefile
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -79,14 +79,16 @@ IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TAR
--interp-prefix=$(CROSS_SYS_ROOT)
endif
@ -54,10 +50,8 @@ Index: xen-4.1.2-testing/tools/Makefile
.PHONY: ioemu-dir-force-update
ioemu-dir-force-update:
Index: xen-4.1.2-testing/tools/libxc/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/Makefile
+++ xen-4.1.2-testing/tools/libxc/Makefile
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -195,7 +195,7 @@ xc_dom_bzimageloader.opic: CFLAGS += $(c
libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(call zlib-options,l)
@ -67,10 +61,8 @@ Index: xen-4.1.2-testing/tools/libxc/Makefile
xenctrl_osdep_ENOSYS.so: $(OSDEP_PIC_OBJS) libxenctrl.so
$(CC) -g $(CFLAGS) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $(OSDEP_PIC_OBJS) -lxenctrl
Index: xen-4.1.2-testing/tools/firmware/etherboot/Makefile
===================================================================
--- xen-4.1.2-testing.orig/tools/firmware/etherboot/Makefile
+++ xen-4.1.2-testing/tools/firmware/etherboot/Makefile
--- a/tools/firmware/etherboot/Makefile
+++ b/tools/firmware/etherboot/Makefile
@@ -35,11 +35,13 @@ eb-roms.h: Config
mv -f $@.new $@

View File

@ -250,17 +250,6 @@
u8 bus, slot, func;
dev = entry->dev;
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -150,7 +150,7 @@ static int apply_microcode(int cpu)
static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
size_t size, unsigned long *offset)
{
- struct microcode_header_amd *mc_header;
+ struct microcode_header_amd __attribute__((__unused__)) *mc_header;
size_t total_size;
const uint8_t *bufp = buf;
unsigned long off;
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -356,7 +356,7 @@ int cpupool_add_domain(struct domain *d,
@ -296,7 +285,7 @@
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -569,7 +569,8 @@ static int kexec_exec(XEN_GUEST_HANDLE(v
@@ -573,7 +573,8 @@ static int kexec_exec(XEN_GUEST_HANDLE(v
{
xen_kexec_exec_t exec;
xen_kexec_image_t *image;
@ -374,7 +363,7 @@
unsigned long long value;
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2338,7 +2338,7 @@ p2m_remove_page(struct p2m_domain *p2m,
@@ -2339,7 +2339,7 @@ p2m_remove_page(struct p2m_domain *p2m,
unsigned int page_order)
{
unsigned long i;
@ -383,7 +372,7 @@
p2m_type_t t;
p2m_access_t a;
@@ -2407,7 +2407,7 @@ guest_physmap_mark_populate_on_demand(st
@@ -2408,7 +2408,7 @@ guest_physmap_mark_populate_on_demand(st
struct p2m_domain *p2m = p2m_get_hostp2m(d);
unsigned long i;
p2m_type_t ot;
@ -426,7 +415,7 @@
{
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -275,7 +275,7 @@ static void acpi_processor_ffh_cstate_en
@@ -276,7 +276,7 @@ static void acpi_processor_ffh_cstate_en
static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
@ -471,7 +460,7 @@
union hypercall_input {
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4906,7 +4906,7 @@ static int ptwr_emulated_update(
@@ -4914,7 +4914,7 @@ static int ptwr_emulated_update(
{
unsigned long mfn;
unsigned long unaligned_addr = addr;
@ -591,7 +580,7 @@
if ( tmh->persistent_pool == NULL )
--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -574,7 +574,7 @@ int is_vmce_ready(struct mcinfo_bank *ba
@@ -571,7 +571,7 @@ int is_vmce_ready(struct mcinfo_bank *ba
*/
int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn)
{
@ -634,7 +623,7 @@
case 3: /* x86_32p */
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1854,7 +1854,11 @@ static int emulate_privileged_op(struct
@@ -1858,7 +1858,11 @@ static int emulate_privileged_op(struct
struct vcpu *v = current;
unsigned long *reg, eip = regs->eip;
u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0;

View File

@ -1,3 +1,140 @@
-------------------------------------------------------------------
Tue Jan 3 08:26:42 MST 2012 - carnold@novell.com
- bnc#735806 - VF doesn't work after hot-plug for many times
24448-x86-pt-irq-leak.patch
- Upstream patches from Jan
24261-x86-cpuidle-Westmere-EX.patch
24417-amd-erratum-573.patch
24429-mceinj-tool.patch
24447-x86-TXT-INIT-SIPI-delay.patch
ioemu-9868-MSI-X.patch
-------------------------------------------------------------------
Mon Jan 2 10:05:57 CET 2012 - ohering@suse.de
- bnc#732884 - remove private runlevel 4 from init scripts
xen.no-default-runlevel-4.patch
-------------------------------------------------------------------
Mon Dec 19 15:22:13 MST 2011 - carnold@novell.com
- bnc#727515 - Fragmented packets hang network boot of HVM guest
ipxe-gcc45-warnings.patch
ipxe-ipv4-fragment.patch
ipxe-enable-nics.patch
-------------------------------------------------------------------
Mon Dec 19 12:43:11 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
update xenpaging.autostart.patch, make changes with mem-swap-target
permanent
update xenpaging.doc.patch, mention issues with live migration
-------------------------------------------------------------------
Thu Dec 15 17:53:51 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
add xenpaging.evict_mmap_readonly.patch
update xenpaging.error-handling.patch, reduce debug output
-------------------------------------------------------------------
Thu Dec 15 08:35:27 MST 2011 - carnold@novell.com
- bnc#736824 - Microcode patches for AMD's 15h processors panic the
system
24189-x86-p2m-pod-locking.patch
24412-x86-AMD-errata-model-shift.patch
24411-x86-ucode-AMD-Fam15.patch
-------------------------------------------------------------------
Wed Dec 14 10:08:24 MST 2011 - carnold@novell.com
- bnc#711219 - SR-IOV VF doesn't work in SLES11 sp2 guest
24357-firmware-no-_PS0-_PS3.patch
- Upstream patches from Jan
24153-x86-emul-feature-checks.patch
24275-x86-emul-lzcnt.patch
24277-x86-dom0-features.patch
24278-x86-dom0-no-PCID.patch
24282-x86-log-dirty-bitmap-leak.patch
24359-x86-domU-features.patch
24360-x86-pv-domU-no-PCID.patch
24389-amd-fam10-gart-tlb-walk-err.patch
24391-x86-pcpu-version.patch
-------------------------------------------------------------------
Thu Dec 8 14:19:49 CET 2011 - ohering@suse.de
- bnc#729208 - xenpaging=-1 doesn't work
xenpaging.doc.patch
-------------------------------------------------------------------
Thu Dec 8 08:41:36 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
readd xenpaging.qemu.flush-cache.patch
-------------------------------------------------------------------
Wed Dec 7 11:01:43 MST 2011 - jfehlig@suse.com
- bnc#732782 - L3: xm create hangs when maxmen value is enclosed
in "quotes"
xm-create-maxmem.patch
-------------------------------------------------------------------
Wed Dec 7 10:44:06 MST 2011 - carnold@novell.com
- Upstream patches / changes from Jan
Added 24358-kexec-compat-overflow.patch
Removed 24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch
Removed 24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch
-------------------------------------------------------------------
Wed Dec 7 16:42:44 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch
Use wait queues for paging, improve foreign mappings.
xenpaging.versioned-interface.patch
xenpaging.mmap-before-nominate.patch
xenpaging.p2m_is_paged.patch
xenpaging.evict_fail_fast_forward.patch
xenpaging.error-handling.patch
xenpaging.mem_event-use-wait_queue.patch
xenpaging.waitqueue-paging.patch
Remove obsolete patch, not needed with wait queue usage
xenpaging.HVMCOPY_gfn_paged_out.patch
-------------------------------------------------------------------
Wed Dec 7 16:23:49 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
Fix incorrect backport, remove double memset, use xzalloc
24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch
-------------------------------------------------------------------
Wed Dec 7 12:08:31 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
fix typo in nominate, use lock instead of double unlock
23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch
-------------------------------------------------------------------
Wed Dec 7 11:07:23 CET 2011 - ohering@suse.de
- fate#310510 - fix xenpaging
24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
-------------------------------------------------------------------
Tue Dec 6 11:14:51 MST 2011 - jfehlig@suse.com
- bnc#734826 - xm rename doesn't work anymore
Updated xend-migration-domname-fix.patch
-------------------------------------------------------------------
Fri Dec 2 20:35:29 CET 2011 - ohering@suse.de

View File

@ -0,0 +1,77 @@
Related to bnc#732884
Runlevel 4 is for local sysadmin.
He is responsible to create all required symlinks in this private runlevel.
---
tools/hotplug/Linux/init.d/xen-watchdog | 2 +-
tools/hotplug/Linux/init.d/xencommons | 2 +-
tools/hotplug/Linux/init.d/xend | 2 +-
tools/hotplug/Linux/init.d/xendomains | 2 +-
tools/xenballoon/xenballoond.init | 2 +-
5 files changed, 5 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xen-watchdog
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xen-watchdog
+++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xen-watchdog
@@ -10,7 +10,7 @@
# Should-Start: xend
# Required-Stop: $syslog $remote_fs
# Should-Stop: xend
-# Default-Start: 2 3 4 5
+# Default-Start: 2 3 5
# Default-Stop: 0 1 6
# Short-Description: Start/stop xen-watchdog
# Description: Run domain watchdog daemon.
Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xencommons
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xencommons
+++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xencommons
@@ -12,7 +12,7 @@
# Should-Start:
# Required-Stop: $syslog $remote_fs
# Should-Stop:
-# Default-Start: 2 3 4 5
+# Default-Start: 2 3 5
# Default-Stop: 0 1 6
# Short-Description: Start/stop xenstored and xenconsoled
# Description: Starts and stops the daemons neeeded for xl/xend
Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xend
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xend
+++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xend
@@ -12,7 +12,7 @@
# Should-Start:
# Required-Stop: $syslog $remote_fs xenstored xenconsoled
# Should-Stop:
-# Default-Start: 2 3 4 5
+# Default-Start: 2 3 5
# Default-Stop: 0 1 6
# Short-Description: Start/stop xend
# Description: Starts and stops the Xen control daemon.
Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xendomains
===================================================================
--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xendomains
+++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xendomains
@@ -20,7 +20,7 @@
# Should-Start: xend
# Required-Stop: $syslog $remote_fs xenstored xenconsoled
# Should-Stop: xend
-# Default-Start: 2 3 4 5
+# Default-Start: 2 3 5
# Default-Stop: 0 1 6
# Short-Description: Start/stop secondary xen domains
# Description: Start / stop domains automatically when domain 0
Index: xen-4.1.2-testing/tools/xenballoon/xenballoond.init
===================================================================
--- xen-4.1.2-testing.orig/tools/xenballoon/xenballoond.init
+++ xen-4.1.2-testing/tools/xenballoon/xenballoond.init
@@ -14,7 +14,7 @@
# Should-Start:
# Required-Stop: $syslog $remote_fs
# Should-Stop:
-# Default-Start: 3 4 5
+# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Start/stop xenballoond
# Description: Starts and stops the Xen ballooning daemon.

169
xen.spec
View File

@ -1,7 +1,7 @@
#
# spec file for package xen
#
# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
# Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@ -15,9 +15,6 @@
# Please submit bugfixes or comments via http://bugs.opensuse.org/
#
# norootforbuild
Name: xen
ExclusiveArch: %ix86 x86_64
%define xvers 4.1
@ -52,6 +49,7 @@ BuildRequires: curl-devel
BuildRequires: dev86
BuildRequires: graphviz
BuildRequires: latex2html
BuildRequires: libbz2-devel
BuildRequires: libjpeg-devel
BuildRequires: libxml2-devel
BuildRequires: ncurses-devel
@ -61,7 +59,6 @@ BuildRequires: pciutils-devel
BuildRequires: python-devel
BuildRequires: texinfo
BuildRequires: transfig
BuildRequires: libbz2-devel
%if %suse_version >= 1120
BuildRequires: xz-devel
%endif
@ -81,9 +78,11 @@ BuildRequires: tetex
%ifarch x86_64
%if %{?with_gcc46}0
BuildRequires: gcc46
BuildRequires: libgcc46 libgcc46-32bit
BuildRequires: libgcc46
BuildRequires: libgcc46-32bit
%endif
BuildRequires: glibc-32bit glibc-devel-32bit
BuildRequires: glibc-32bit
BuildRequires: glibc-devel-32bit
BuildRequires: gcc-32bit
BuildRequires: gcc43-32bit
%define max_cpus 256
@ -94,15 +93,17 @@ BuildRequires: gcc43-32bit
%endif
BuildRequires: glibc-devel
%if %{?with_kmp}0
BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11
BuildRequires: kernel-source
BuildRequires: kernel-syms
BuildRequires: module-init-tools
BuildRequires: xorg-x11
%endif
Version: 4.1.2_09
Release: 1
License: GPLv2+
Group: System/Kernel
AutoReqProv: on
Version: 4.1.2_11
Release: 0
PreReq: %insserv_prereq %fillup_prereq
Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
License: GPL-2.0+
Group: System/Kernel
Source0: xen-4.1.2-testing-src.tar.bz2
Source1: stubdom.tar.bz2
Source2: xen-utils-0.1.tar.bz2
@ -275,11 +276,14 @@ Patch24137: 24137-revert-23666.patch
Patch24138: 24138-xenpaging_munmap_all_pages_after_page-in.patch
Patch24144: 24144-cpufreq-turbo-crash.patch
Patch24148: 24148-shadow-pgt-dying-op-performance.patch
Patch24153: 24153-x86-emul-feature-checks.patch
Patch24155: 24155-x86-ioapic-EOI-after-migration.patch
Patch24156: 24156-x86-ioapic-shared-vectors.patch
Patch24157: 24157-x86-xstate-init.patch
Patch24168: 24168-x86-vioapic-clear-remote_irr.patch
Patch24171: 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch
Patch24178: 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch
Patch24189: 24189-x86-p2m-pod-locking.patch
Patch24190: 24190-hap-log-dirty-disable-rc.patch
Patch24193: 24193-hap-track-dirty-vram-rc.patch
Patch24195: 24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch
@ -308,14 +312,33 @@ Patch24226: 24226-xenpaging_add_debug_to_show_received_watch_event..patch
Patch24227: 24227-xenpaging_restrict_pagefile_permissions.patch
Patch24231: 24231-waitqueue_Implement_wake_up_nroneall..patch
Patch24232: 24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch
Patch24261: 24261-x86-cpuidle-Westmere-EX.patch
Patch24269: 24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch
Patch24270: 24270-Free_d-mem_event_on_domain_destruction..patch
Patch24272: 24272-xenpaging_Fix_c-s_235070a29c8c3ddf7_update_machine_to_phys_mapping_during_page_deallocation.patch
Patch24275: 24275-x86-emul-lzcnt.patch
Patch24277: 24277-x86-dom0-features.patch
Patch24278: 24278-x86-dom0-no-PCID.patch
Patch24282: 24282-x86-log-dirty-bitmap-leak.patch
Patch24318: 24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch
Patch24341: 24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch
Patch24344: 24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch
Patch24345: 24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch
Patch24327: 24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch
Patch24328: 24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch
Patch24329: 24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch
Patch24357: 24357-firmware-no-_PS0-_PS3.patch
Patch24358: 24358-kexec-compat-overflow.patch
Patch24359: 24359-x86-domU-features.patch
Patch24360: 24360-x86-pv-domU-no-PCID.patch
Patch24389: 24389-amd-fam10-gart-tlb-walk-err.patch
Patch24391: 24391-x86-pcpu-version.patch
Patch24411: 24411-x86-ucode-AMD-Fam15.patch
Patch24412: 24412-x86-AMD-errata-model-shift.patch
Patch24417: 24417-amd-erratum-573.patch
Patch24429: 24429-mceinj-tool.patch
Patch24447: 24447-x86-TXT-INIT-SIPI-delay.patch
Patch24448: 24448-x86-pt-irq-leak.patch
# Upstream qemu patches
Patch100: ioemu-9868-MSI-X.patch
# Our patches
Patch300: xen-config.diff
Patch301: xend-config.diff
@ -355,12 +378,11 @@ Patch351: xend-core-dump-loc.diff
Patch352: blktap.patch
Patch353: xen-qemu-iscsi-fix.patch
Patch354: xen-api-auth.patch
Patch355: tools-gdbserver-build.diff
Patch356: ioemu-vnc-resize.patch
Patch357: ioemu-debuginfo.patch
Patch358: vif-bridge-no-iptables.patch
Patch359: xenconsole-no-multiple-connections.patch
Patch360: disable-xl-when-using-xend.patch
Patch355: ioemu-vnc-resize.patch
Patch356: ioemu-debuginfo.patch
Patch357: vif-bridge-no-iptables.patch
Patch358: xenconsole-no-multiple-connections.patch
Patch359: disable-xl-when-using-xend.patch
# Needs to go upstream
Patch370: checkpoint-rename.patch
Patch371: xm-save-check-file.patch
@ -372,8 +394,9 @@ Patch376: xend-devid-or-name.patch
Patch377: suspend_evtchn_lock.patch
Patch378: log-guest-console.patch
Patch379: xend-migration-domname-fix.patch
Patch380: xm-create-maxmem.patch
# Sent upstream and tentatively ACK'ed, but not yet committed
Patch380: 2XXXX-vif-bridge.patch
Patch381: 2XXXX-vif-bridge.patch
# Patches for snapshot support
Patch400: snapshot-ioemu-save.patch
Patch401: snapshot-ioemu-restore.patch
@ -423,6 +446,9 @@ Patch456: xend-vcpu-affinity-fix.patch
Patch457: xenstored.XS_RESET_WATCHES.patch
Patch458: xen-cpupool-xl-config-format.patch
Patch459: xl-create-pv-with-qcow2-img.patch
Patch460: ipxe-gcc45-warnings.patch
Patch461: ipxe-ipv4-fragment.patch
Patch462: ipxe-enable-nics.patch
# Jim's domain lock patch
Patch480: xend-domain-lock.patch
Patch481: xend-domain-lock-sfex.patch
@ -444,10 +470,20 @@ Patch650: disable_emulated_device.diff
Patch651: ioemu-disable-scsi.patch
Patch652: ioemu-disable-emulated-ide-if-pv.patch
Patch700: hv_extid_compatibility.patch
Patch701: xen.no-default-runlevel-4.patch
# FATE 310510
Patch1100: xenpaging.versioned-interface.patch
Patch1101: xenpaging.mmap-before-nominate.patch
Patch1102: xenpaging.p2m_is_paged.patch
Patch1103: xenpaging.evict_fail_fast_forward.patch
Patch1104: xenpaging.error-handling.patch
Patch1105: xenpaging.mem_event-use-wait_queue.patch
Patch1106: xenpaging.waitqueue-paging.patch
Patch1107: xenpaging.evict_mmap_readonly.patch
Patch1126: xenpaging.guest-memusage.patch
Patch1129: xenpaging.autostart.patch
Patch1130: xenpaging.HVMCOPY_gfn_paged_out.patch
Patch1130: xenpaging.doc.patch
Patch1142: xenpaging.qemu.flush-cache.patch
# xenalyze
Patch20000: xenalyze.gcc46.patch
# Build patch
@ -460,7 +496,6 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build
%suse_kernel_module_package -n xen um xen -f kmp_filelist
%endif
%description
Xen is a virtual machine monitor for x86 that supports execution of
multiple guest operating systems with unprecedented levels of
@ -516,12 +551,9 @@ Authors:
...
%package libs
License: GPLv2+
Summary: Xen Virtualization: Libraries
Group: System/Kernel
#Requires: xen = %{version}
AutoReqProv: on
%description libs
Xen is a virtual machine monitor for x86 that supports execution of
@ -568,9 +600,7 @@ Authors:
%if %{?with_dom0_support}0
%package tools
License: GPLv2+
Summary: Xen Virtualization: Control tools for domain 0
Group: System/Kernel
Requires: xen-libs = %{version}
@ -578,8 +608,6 @@ Requires: bridge-utils multipath-tools python python-curses python-openssl
# subpackage existed in 10.3
Provides: xen-tools-ioemu = 3.2
Obsoletes: xen-tools-ioemu <= 3.2
AutoReqProv: on
%description tools
Xen is a virtual machine monitor for x86 that supports execution of
@ -628,14 +656,10 @@ Authors:
Ian Pratt <ian.pratt@cl.cam.ac.uk>
%endif
%package tools-domU
License: GPLv2+
Summary: Xen Virtualization: Control tools for domain U
Group: System/Kernel
Conflicts: xen-tools
AutoReqProv: on
%description tools-domU
Xen is a virtual machine monitor for x86 that supports execution of
@ -652,12 +676,10 @@ Authors:
Ian Pratt <ian.pratt@cl.cam.ac.uk>
%package devel
License: GPLv2+
Summary: Xen Virtualization: Headers and libraries for development
Group: System/Kernel
Requires: xen-libs = %{version}
%description devel
Xen is a virtual machine monitor for x86 that supports execution of
multiple guest operating systems with unprecedented levels of
@ -703,14 +725,11 @@ Authors:
%if %{?with_kmp}0
%package KMP
License: GPLv2+
Group: System/Kernel
Summary: Xen para-virtual device drivers for fully virtualized guests
Group: System/Kernel
Conflicts: xen
%description KMP
Xen para-virtual device drivers for fully virtualized guests
@ -756,13 +775,10 @@ Xen, but is not available for release due to license restrictions.
%if %{?with_dom0_support}0
%package doc-html
License: GPLv2+
Summary: Xen Virtualization: HTML documentation
Group: Documentation/HTML
%description doc-html
Xen is a virtual machine monitor for x86 that supports execution of
multiple guest operating systems with unprecedented levels of
@ -778,11 +794,9 @@ Authors:
Ian Pratt <ian.pratt@cl.cam.ac.uk>
%package doc-pdf
License: GPLv2+
Summary: Xen Virtualization: PDF documentation
Group: Documentation/Other
%description doc-pdf
Xen is a virtual machine monitor for x86 that supports execution of
multiple guest operating systems with unprecedented levels of
@ -799,7 +813,6 @@ Authors:
Ian Pratt <ian.pratt@cl.cam.ac.uk>
%endif
%prep
%setup -q -n %xen_build_dir -a 1 -a 20000
%patch20000 -p1
@ -938,11 +951,14 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch24138 -p1
%patch24144 -p1
%patch24148 -p1
%patch24153 -p1
%patch24155 -p1
%patch24156 -p1
%patch24157 -p1
%patch24168 -p1
%patch24171 -p1
%patch24178 -p1
%patch24189 -p1
%patch24190 -p1
%patch24193 -p1
%patch24195 -p1
@ -971,14 +987,34 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch24227 -p1
%patch24231 -p1
%patch24232 -p1
%patch24261 -p1
%patch24269 -p1
%patch24270 -p1
%patch24272 -p1
%patch24275 -p1
%patch24277 -p1
%patch24278 -p1
%patch24282 -p1
%patch24318 -p1
%patch24341 -p1
%patch24344 -p1
%patch24345 -p1
# Upstream patches
%patch24327 -p1
%patch24328 -p1
%patch24329 -p1
%patch24357 -p1
%patch24358 -p1
%patch24359 -p1
%patch24360 -p1
%patch24389 -p1
%patch24391 -p1
%patch24411 -p1
%patch24412 -p1
%patch24417 -p1
%patch24429 -p1
%patch24447 -p1
%patch24448 -p1
# Qemu
%patch100 -p1
# Our patches
%patch300 -p1
%patch301 -p1
%patch302 -p1
@ -1017,12 +1053,11 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch352 -p1
%patch353 -p1
%patch354 -p1
###%patch355 -p1 gdbserver
%patch355 -p1
%patch356 -p1
%patch357 -p1
%patch358 -p1
%patch359 -p1
%patch360 -p1
%patch370 -p1
%patch371 -p1
%patch372 -p1
@ -1034,6 +1069,7 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch378 -p1
%patch379 -p1
%patch380 -p1
%patch381 -p1
%patch400 -p1
%patch401 -p1
%patch402 -p1
@ -1080,6 +1116,9 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch457 -p1
%patch458 -p1
%patch459 -p1
%patch460 -p1
%patch461 -p1
%patch462 -p1
%patch480 -p1
%patch481 -p1
%patch500 -p1
@ -1099,15 +1138,24 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch651 -p1
%patch652 -p1
%patch700 -p1
%patch701 -p1
# FATE 310510
%patch1100 -p1
%patch1101 -p1
%patch1102 -p1
%patch1103 -p1
%patch1104 -p1
%patch1105 -p1
%patch1106 -p1
%patch1107 -p1
%patch1126 -p1
%patch1129 -p1
%patch1130 -p1
%patch1142 -p1
#
%patch99998 -p1
%patch99999 -p1
%build
XEN_EXTRAVERSION=%version-%release
XEN_EXTRAVERSION=${XEN_EXTRAVERSION#%{xvers}}
@ -1143,7 +1191,6 @@ for flavor in %flavors_to_build; do
done
%endif
%install
export CFLAGS="$RPM_OPT_FLAGS"
%if %{?with_dom0_support}0
@ -1346,7 +1393,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons
%if %{?with_dom0_support}0
%files -f xen.files.txt
%defattr(-,root,root)
/boot/xen-%{version}-%{release}.gz
@ -1363,7 +1409,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons
/boot/xen.gz
%endif
%files libs
%defattr(-,root,root)
%{_libdir}/fs/
@ -1371,7 +1416,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons
%if %{?with_dom0_support}0
%files tools
%defattr(-,root,root)
/usr/bin/xenalyze
@ -1475,14 +1519,12 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons
%config %{_fwdefdir}/xend-relocation-server
%endif
%files tools-domU
%defattr(-,root,root)
/usr/bin/xen-detect
/bin/domu-xenstore
/bin/xenstore-*
%files devel
%defattr(-,root,root)
%{_bindir}/serial-split
@ -1492,12 +1534,10 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons
%if %{?with_dom0_support}0
%files doc-html
%defattr(-,root,root)
%{_defaultdocdir}/xen/html
%files doc-pdf
%defattr(-,root,root)
%{_defaultdocdir}/xen/pdf
@ -1505,7 +1545,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons
%if %{?with_dom0_support}0
%post tools
%if %{?with_xend}0
# with_xend
@ -1551,11 +1590,9 @@ if [ -f /usr/bin/qemu-nbd ]; then
ln -s /usr/bin/qemu-nbd /usr/bin/qemu-nbd-xen
fi
%preun tools
%{stop_on_removal xendomains xend xencommons}
%postun tools
%if %{?with_xend}0
# with_xend
@ -1570,12 +1607,8 @@ if [ -f /usr/bin/qemu-nbd-xen ]; then
fi
%endif
%post libs -p /sbin/ldconfig
%postun libs -p /sbin/ldconfig
%changelog

View File

@ -21,7 +21,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3053,7 +3053,7 @@ class XendDomainInfo:
@@ -3054,7 +3054,7 @@ class XendDomainInfo:
# TODO: recategorise - called from XendCheckpoint
#
@ -30,7 +30,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
log.debug("XendDomainInfo.completeRestore")
@@ -3064,6 +3064,7 @@ class XendDomainInfo:
@@ -3065,6 +3065,7 @@ class XendDomainInfo:
self.image = image.create(self, self.info)
if self.image:
self._createDevices(True)

View File

@ -223,7 +223,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -4517,8 +4517,14 @@ class XendDomainInfo:
@@ -4518,8 +4518,14 @@ class XendDomainInfo:
# Return name of host contained in lock file.
def get_lock_host(self, path):
@ -240,7 +240,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
hostname = "unknown"
try:
@@ -4540,6 +4546,16 @@ class XendDomainInfo:
@@ -4541,6 +4547,16 @@ class XendDomainInfo:
path = xoptions.get_xend_domain_lock_path()
path = os.path.join(path, self.get_uuid())
@ -257,7 +257,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
try:
if not os.path.exists(path):
mkdir.parents(path, stat.S_IRWXU)
@@ -4547,12 +4563,7 @@ class XendDomainInfo:
@@ -4548,12 +4564,7 @@ class XendDomainInfo:
log.exception("%s could not be created." % path)
raise XendError("%s could not be created." % path)
@ -271,7 +271,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
if status != 0:
log.debug("Failed to aqcuire lock: status = %d" % status)
raise XendError("The VM is locked and appears to be running on host %s." % self.get_lock_host(path))
@@ -4569,12 +4580,18 @@ class XendDomainInfo:
@@ -4570,12 +4581,18 @@ class XendDomainInfo:
path = xoptions.get_xend_domain_lock_path()
path = os.path.join(path, self.get_uuid())

View File

@ -257,7 +257,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
XendTask.log_progress(0, 30, self._constructDomain)
XendTask.log_progress(31, 60, self._initDomain)
@@ -3001,6 +3002,11 @@ class XendDomainInfo:
@@ -3002,6 +3003,11 @@ class XendDomainInfo:
self._stateSet(DOM_STATE_HALTED)
self.domid = None # Do not push into _stateSet()!
@ -269,7 +269,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
finally:
self.refresh_shutdown_lock.release()
@@ -4509,6 +4515,74 @@ class XendDomainInfo:
@@ -4510,6 +4516,74 @@ class XendDomainInfo:
def has_device(self, dev_class, dev_uuid):
return (dev_uuid in self.info['%s_refs' % dev_class.lower()])

View File

@ -8,10 +8,11 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1946,6 +1946,7 @@ class XendDomainInfo:
@@ -1946,6 +1946,8 @@ class XendDomainInfo:
self.info['name_label'] = name
if to_store:
self.storeVm("name", name)
+ if self.dompath:
+ self.storeDom("name", name)
def getName(self):

View File

@ -2,7 +2,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2776,7 +2776,10 @@ class XendDomainInfo:
@@ -2777,7 +2777,10 @@ class XendDomainInfo:
from xen.xend import XendDomain
doms = XendDomain.instance().list('all')
for dom in filter (lambda d: d.domid != self.domid, doms):

View File

@ -1,151 +0,0 @@
xenpaging: handle HVMCOPY_gfn_paged_out in copy_from/to_user
copy_from_user_hvm can fail when __hvm_copy returns
HVMCOPY_gfn_paged_out for a referenced gfn, for example during guests
pagetable walk. This has to be handled in some way.
For the time being, return -EAGAIN for the most common case (xen_balloon
driver crashing in guest) until the recently added waitqueues will be
used.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/hvm.c | 4 ++++
xen/common/memory.c | 39 ++++++++++++++++++++++++++++++++++-----
2 files changed, 38 insertions(+), 5 deletions(-)
Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c
@@ -2247,6 +2247,8 @@ unsigned long copy_to_user_hvm(void *to,
rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from,
len, 0);
+ if ( unlikely(rc == HVMCOPY_gfn_paged_out) )
+ return -EAGAIN;
return rc ? len : 0; /* fake a copy_to_user() return code */
}
@@ -2264,6 +2266,8 @@ unsigned long copy_from_user_hvm(void *t
#endif
rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0);
+ if ( unlikely(rc == HVMCOPY_gfn_paged_out) )
+ return -EAGAIN;
return rc ? len : 0; /* fake a copy_from_user() return code */
}
Index: xen-4.1.2-testing/xen/common/memory.c
===================================================================
--- xen-4.1.2-testing.orig/xen/common/memory.c
+++ xen-4.1.2-testing/xen/common/memory.c
@@ -48,6 +48,7 @@ static void increase_reservation(struct
{
struct page_info *page;
unsigned long i;
+ unsigned long ctg_ret;
xen_pfn_t mfn;
struct domain *d = a->domain;
@@ -81,8 +82,13 @@ static void increase_reservation(struct
if ( !guest_handle_is_null(a->extent_list) )
{
mfn = page_to_mfn(page);
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
+ ctg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1);
+ if ( unlikely(ctg_ret) )
+ {
+ if ( (long)ctg_ret == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
}
}
@@ -94,6 +100,7 @@ static void populate_physmap(struct memo
{
struct page_info *page;
unsigned long i, j;
+ unsigned long cftg_ret;
xen_pfn_t gpfn, mfn;
struct domain *d = a->domain;
@@ -112,8 +119,13 @@ static void populate_physmap(struct memo
goto out;
}
- if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
+ cftg_ret = __copy_from_guest_offset(&gpfn, a->extent_list, i, 1);
+ if ( unlikely(cftg_ret) )
+ {
+ if ( (long)cftg_ret == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
if ( a->memflags & MEMF_populate_on_demand )
{
@@ -143,8 +155,13 @@ static void populate_physmap(struct memo
set_gpfn_from_mfn(mfn + j, gpfn + j);
/* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
+ cftg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1);
+ if ( unlikely(cftg_ret) )
+ {
+ if ( (long)cftg_ret == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
}
}
}
@@ -213,6 +230,7 @@ int guest_remove_page(struct domain *d,
static void decrease_reservation(struct memop_args *a)
{
unsigned long i, j;
+ unsigned long cfg_ret;
xen_pfn_t gmfn;
if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
@@ -227,8 +245,13 @@ static void decrease_reservation(struct
goto out;
}
- if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
+ cfg_ret = __copy_from_guest_offset(&gmfn, a->extent_list, i, 1);
+ if ( unlikely(cfg_ret) )
+ {
+ if ( (long)cfg_ret == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
if ( tb_init_done )
{
@@ -509,6 +532,7 @@ long do_memory_op(unsigned long cmd, XEN
int rc, op;
unsigned int address_bits;
unsigned long start_extent;
+ unsigned long cfg_ret;
struct xen_memory_reservation reservation;
struct memop_args args;
domid_t domid;
@@ -522,8 +546,13 @@ long do_memory_op(unsigned long cmd, XEN
case XENMEM_populate_physmap:
start_extent = cmd >> MEMOP_EXTENT_SHIFT;
- if ( copy_from_guest(&reservation, arg, 1) )
+ cfg_ret = copy_from_guest(&reservation, arg, 1);
+ if ( unlikely(cfg_ret) )
+ {
+ if ( (long)cfg_ret == -EAGAIN )
+ return hypercall_create_continuation(__HYPERVISOR_memory_op, "lh", cmd, arg);
return start_extent;
+ }
/* Is size too large for us to encode a continuation? */
if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )

View File

@ -38,12 +38,12 @@ v2:
tools/python/README.sxpcfg | 3 +
tools/python/xen/xend/XendConfig.py | 9 +++
tools/python/xen/xend/XendDomain.py | 15 +++++
tools/python/xen/xend/XendDomainInfo.py | 22 ++++++++
tools/python/xen/xend/XendDomainInfo.py | 23 ++++++++
tools/python/xen/xend/image.py | 85 ++++++++++++++++++++++++++++++++
tools/python/xen/xm/create.py | 15 +++++
tools/python/xen/xm/main.py | 14 +++++
tools/python/xen/xm/xenapi_create.py | 3 +
10 files changed, 178 insertions(+)
10 files changed, 179 insertions(+)
Index: xen-4.1.2-testing/tools/examples/xmexample.hvm
===================================================================
@ -150,7 +150,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1503,6 +1503,16 @@ class XendDomainInfo:
@@ -1503,6 +1503,17 @@ class XendDomainInfo:
break
xen.xend.XendDomain.instance().managed_config_save(self)
@ -163,11 +163,12 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
+
+ if self.domid > 0:
+ self.storeDom("memory/target-tot_pages", target * 1024)
+ self.info['platform']['actmem'] = str(target)
+
def setMemoryTarget(self, target):
"""Set the memory target of this domain.
@param target: In MiB.
@@ -2291,6 +2301,8 @@ class XendDomainInfo:
@@ -2292,6 +2303,8 @@ class XendDomainInfo:
self.info['name_label'], self.domid, self.info['uuid'],
new_name, new_uuid)
self._unwatchVm()
@ -176,7 +177,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
self._releaseDevices()
# Remove existing vm node in xenstore
self._removeVm()
@@ -2965,6 +2977,9 @@ class XendDomainInfo:
@@ -2966,6 +2979,9 @@ class XendDomainInfo:
self._createDevices()
@ -186,7 +187,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
self.image.cleanupTmpImages()
self.info['start_time'] = time.time()
@@ -2989,6 +3004,8 @@ class XendDomainInfo:
@@ -2990,6 +3006,8 @@ class XendDomainInfo:
self.refresh_shutdown_lock.acquire()
try:
self.unwatchShutdown()
@ -195,7 +196,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
self._releaseDevices()
bootloader_tidy(self)
@@ -3073,6 +3090,7 @@ class XendDomainInfo:
@@ -3074,6 +3092,7 @@ class XendDomainInfo:
self.image = image.create(self, self.info)
if self.image:
self._createDevices(True)
@ -203,7 +204,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
self.console_port = console_port
self._storeDomDetails()
self._registerWatches()
@@ -3214,6 +3232,8 @@ class XendDomainInfo:
@@ -3215,6 +3234,8 @@ class XendDomainInfo:
# could also fetch a parsed note from xenstore
fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0
if not fast:
@ -212,7 +213,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py
self._releaseDevices()
self.testDeviceComplete()
self.testvifsComplete()
@@ -3229,6 +3249,8 @@ class XendDomainInfo:
@@ -3230,6 +3251,8 @@ class XendDomainInfo:
self._storeDomDetails()
self._createDevices()

92
xenpaging.doc.patch Normal file
View File

@ -0,0 +1,92 @@
---
docs/misc/xenpaging.txt | 66 +++++++++++++++++++++++++++++-------------------
1 file changed, 40 insertions(+), 26 deletions(-)
Index: xen-4.1.2-testing/docs/misc/xenpaging.txt
===================================================================
--- xen-4.1.2-testing.orig/docs/misc/xenpaging.txt
+++ xen-4.1.2-testing/docs/misc/xenpaging.txt
@@ -1,8 +1,6 @@
Warning:
The xenpaging code is new and not fully debugged.
-Usage of xenpaging can crash Xen or cause severe data corruption in the
-guest memory and its filesystems!
Description:
@@ -14,34 +12,50 @@ than physically available on the host.
Usage:
-Once the guest is running, run xenpaging with the guest_id and the
-number of pages to page-out:
+To enable xenpaging for a guest add the option 'actmem=' to the guests
+config file and run 'xm new <vm_config_file>' to make the changes
+active. actmem= takes the amount of memory in MB which a guest is
+allowed to use at a given time. Everything above this limit will be
+paged out. This paging is transparent to the guest.
+
+Example:
+ memory=4096
+ actmem=1024
+In this example a guest gets the impression it has 4GB of memory and
+the guest OS has to configure itself for this amount of memory. But
+xenpaging will page-out 3072MB, leaving only 1024MB active at a time.
+
+At runtime the configured value of actmem= can be changed with the "xm
+mem-swap-target" command.
+ xm mem-swap-target <domain_name> 512
+
+Additional cmdline options for the xenpaging binary can be specified
+with the xenpaging_extra= config file option:
+
+ xenpaging_extra=[ '-f', '/dev/shm/pagefile-guest_name', '-v' ]
+
+To get a list of available options, run /usr/lib/xen/bin/xenpaging -h:
+
+ xenpaging [options] -f <pagefile> -d <domain_id>
+
+options:
+ -d <domid> --domain=<domid> numerical domain_id of guest. This option is required.
+ -f <file> --pagefile=<file> pagefile to use. This option is required.
+ -m <max_memkb> --max_memkb=<max_memkb> maximum amount of memory to handle.
+ -r <num> --mru_size=<num> number of paged-in pages to keep in memory.
+ -v --verbose enable debug output.
+ -h --help this output.
+
+
+Caveats:
+Live migration with a paged guest does currently not work, the guest
+will crash once it starts on the target host. As a workaround stop
+paging before starting the migration:
- chdir /var/lib/xen/xenpaging
- xenpaging <guest_id> <number_of_pages>
-
-To obtain the guest_id, run 'xm list'.
-xenpaging will write the pagefile to the current directory.
-Example with 128MB pagefile on guest 1:
-
- xenpaging 1 32768
-
-Caution: stopping xenpaging manually will cause the guest to stall or
-crash because the paged-out memory is not written back into the guest!
-
-After a reboot of a guest, its guest_id changes, the current xenpaging
-binary has no target anymore. To automate restarting of xenpaging after
-guest reboot, specify the number if pages in the guest configuration
-file /etc/xen/vm/<guest_name>:
-
-xenpaging=32768
-
-Redo the guest with 'xm create /etc/xen/vm/<guest_name>' to activate the
-changes.
+xm mem-swap-target <dom_name> 0 && xm migrate -l <dom_name> <remote_host>
Todo:
-- implement stopping of xenpaging
- implement/test live migration

View File

@ -0,0 +1,183 @@
# HG changeset patch
# Parent 5a299906312e606553e6dd2acbe44ab692722a75
xenpaging: improve evict error handling
Adjust return codes in Xen and handle errors in evict_victim() properly.
p2m_mem_paging_nominate() returns -EAGAIN, p2m_mem_paging_evict()
returns -EBUSY. Other errors indicate guest failures, which
xenpaging_evict_page() can now catch correctly. Also write() failures
are fatal.
Without this change, evict_victim() may spin forever if the guest is
killed because this function does not get a signal.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 47 ++++++++++++++++++++++++++---------------
xen/arch/x86/mm/p2m.c | 7 +-----
xen/include/public/mem_event.h | 2 -
3 files changed, 33 insertions(+), 23 deletions(-)
--- a/tools/xenpaging/xenpaging.c
+++ b/tools/xenpaging/xenpaging.c
@@ -569,29 +569,35 @@ static int xenpaging_evict_page(xenpagin
xc_interface *xch = paging->xc_handle;
void *page;
unsigned long gfn;
- int ret;
+ int ret = -1;
DECLARE_DOMCTL;
/* Map page to get a handle */
gfn = victim->gfn;
- ret = -EFAULT;
page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
PROT_READ | PROT_WRITE, &gfn, 1);
if ( page == NULL )
{
- PERROR("Error mapping page %lx", victim->gfn);
+ if ( errno == EINVAL )
+ ret = 1;
+ else
+ PERROR("Error mapping page %lx", victim->gfn);
goto out;
}
/* Nominate the page */
- ret = xc_mem_paging_nominate(xch, paging->mem_event.domain_id, gfn);
- if ( ret != 0 )
+ if ( xc_mem_paging_nominate(xch, paging->mem_event.domain_id, gfn) )
+ {
+ if ( errno == EAGAIN )
+ ret = 1;
+ else
+ PERROR("Error nominating page %lx", victim->gfn);
goto out;
+ }
/* Copy page */
- ret = write_page(fd, page, i);
- if ( ret != 0 )
+ if ( write_page(fd, page, i) )
{
PERROR("Error copying page %lx", victim->gfn);
goto out;
@@ -601,10 +607,10 @@ static int xenpaging_evict_page(xenpagin
page = NULL;
/* Tell Xen to evict page */
- ret = xc_mem_paging_evict(xch, paging->mem_event.domain_id,
- victim->gfn);
- if ( ret != 0 )
+ if ( xc_mem_paging_evict(xch, paging->mem_event.domain_id, victim->gfn) )
{
+ if ( errno == EBUSY )
+ ret = 1;
PERROR("Error evicting page %lx", victim->gfn);
goto out;
}
@@ -616,6 +622,8 @@ static int xenpaging_evict_page(xenpagin
/* Record number of evicted pages */
paging->num_paged_out++;
+ ret = 0;
+
out:
if (page)
munmap(page, PAGE_SIZE);
@@ -724,7 +732,7 @@ static int evict_victim(xenpaging_t *pag
xenpaging_victim_t *victim, int fd, int i)
{
xc_interface *xch = paging->xc_handle;
- int j = 0;
+ int flushed = 0;
int ret;
do
@@ -732,9 +740,13 @@ static int evict_victim(xenpaging_t *pag
ret = policy_choose_victim(paging, victim);
if ( ret != 0 )
{
- if ( ret != -ENOSPC )
- ERROR("Error choosing victim");
- goto out;
+ if ( !flushed ) {
+ DPRINTF("Flushing qemu cache\n");
+ xenpaging_mem_paging_flush_ioemu_cache(paging);
+ flushed = 1;
+ continue;
+ }
+ goto out;
}
if ( interrupted )
@@ -742,11 +754,12 @@ static int evict_victim(xenpaging_t *pag
ret = -EINTR;
goto out;
}
+
ret = xenpaging_evict_page(paging, victim, fd, i);
- if ( ret && j++ % 1000 == 0 )
+ if ( ret < 0 )
{
- if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
- PERROR("Error flushing ioemu cache");
+ ret = -EINTR;
+ goto out;
}
}
while ( ret );
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2863,19 +2863,17 @@ int p2m_mem_paging_nominate(struct p2m_d
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int ret;
+ int ret = -EAGAIN;
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Check if mfn is valid */
- ret = -EINVAL;
if ( !mfn_valid(mfn) )
goto out;
/* Check p2m type */
- ret = -EAGAIN;
if ( !p2m_is_pageable(p2mt) )
goto out;
@@ -2928,7 +2926,7 @@ int p2m_mem_paging_evict(struct p2m_doma
p2m_access_t a;
mfn_t mfn;
struct domain *d = p2m->domain;
- int ret = -EINVAL;
+ int ret = -EBUSY;
p2m_lock(p2m);
@@ -2941,7 +2939,6 @@ int p2m_mem_paging_evict(struct p2m_doma
if ( p2mt != p2m_ram_paging_out )
goto out;
- ret = -EBUSY;
/* Get the page so it doesn't get modified under Xen's feet */
page = mfn_to_page(mfn);
if ( unlikely(!get_page(page, d)) )
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -49,7 +49,7 @@
#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */
#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */
-#define MEM_EVENT_PAGING_AGE 2UL /* Number distinguish the mem_paging <-> pager interface */
+#define MEM_EVENT_PAGING_AGE 3UL /* Number distinguish the mem_paging <-> pager interface */
typedef struct mem_event_shared_page {
uint32_t port;

View File

@ -0,0 +1,57 @@
# HG changeset patch
# Parent 00989d5f44b59ba7f3a467342a14b9c7621fa926
xenpaging: restore p2mt if gfn is needed before evict
In the rare case that a gfn is needed by a guest or a foreign domain
between nominate and evict, restore the p2mt and skip sending a request.
A request is not needed because the pager will notice the evict failure.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/mm/p2m.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3036,6 +3036,7 @@ void p2m_mem_paging_populate(struct p2m_
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
+ int restored = 0;
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
@@ -3051,23 +3052,25 @@ void p2m_mem_paging_populate(struct p2m_
/* Allow only nominated or evicted pages to enter page-in path */
if ( p2m_do_populate(p2mt) )
{
- /* Evict will fail now, tag this request for pager */
- if ( p2mt == p2m_ram_paging_out )
- req.flags |= MEM_EVENT_FLAG_EVICT_FAIL;
-
- set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
+ /* Restore page state if gfn was requested before evict */
+ if ( p2mt == p2m_ram_paging_out && mfn_valid(mfn) ) {
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, a);
+ restored = 1;
+ } else {
+ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
+ }
audit_p2m(p2m, 1);
}
p2m_unlock(p2m);
/* Pause domain if request came from guest and gfn has paging type */
- if ( p2m_is_paging(p2mt) && v->domain == d )
+ if ( !restored && p2m_is_paging(p2mt) && v->domain == d )
{
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
/* No need to inform pager if the gfn is not in the page-out path */
- else if ( !p2m_do_populate(p2mt) )
+ else if ( restored || !p2m_do_populate(p2mt) )
{
/* gfn is already on its way back and vcpu is not paused */
mem_event_put_req_producers(&d->mem_event->paging);

View File

@ -0,0 +1,20 @@
xenpaging: mmap gfn to evict in readonly mode
nominate/evict will not modify the page so there is no need to map the page rw.
---
tools/xenpaging/xenpaging.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/tools/xenpaging/xenpaging.c
+++ b/tools/xenpaging/xenpaging.c
@@ -575,8 +575,7 @@ static int xenpaging_evict_page(xenpagin
/* Map page to get a handle */
gfn = victim->gfn;
- page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
- PROT_READ | PROT_WRITE, &gfn, 1);
+ page = xc_map_foreign_pages(xch, paging->mem_event.domain_id, PROT_READ, &gfn, 1);
if ( page == NULL )
{
if ( errno == EINVAL )

View File

@ -0,0 +1,559 @@
# HG changeset patch
# Parent aa97fafb53fc95aaec8d9890635d14304f24c362
mem_event: use wait queue when ring is full
This change is based on an idea/patch from Adin Scannell.
If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.
A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.
This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.
v6:
- take foreign requests into account before calling wake_up_nr()
- call wake_up_nr() outside of ring lock
- rename ->bit to ->pause_flag
v5:
- rename mem_event_check_ring() to mem_event_claim_slot()
- rename mem_event_put_req_producers() to mem_event_release_slot()
- add local/foreign request accounting
- keep room for at least one guest request
v4:
- fix off-by-one bug in _mem_event_put_request
- add mem_event_wake_requesters() and use wake_up_nr()
- rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
- req_producers counts foreign request producers, rename member
v3:
- rename ->mem_event_bit to ->bit
- remove me_ from new VPF_ defines
v2:
- p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
vcpu will not wake_up after a wait_event because the pause_count was
increased twice. Fixes guest hangs.
- update free space check in _mem_event_put_request()
- simplify mem_event_put_request()
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/hvm.c | 4 -
xen/arch/x86/mm/mem_event.c | 147 ++++++++++++++++++++++++++++++++++------
xen/arch/x86/mm/mem_sharing.c | 46 ++++--------
xen/arch/x86/mm/p2m.c | 36 ++++-----
xen/include/asm-x86/mem_event.h | 10 +-
xen/include/xen/sched.h | 17 +++-
6 files changed, 179 insertions(+), 81 deletions(-)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3915,8 +3915,8 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
- rc = mem_event_check_ring(d, &d->mem_event->access);
- if ( rc )
+ rc = mem_event_claim_slot(d, &d->mem_event->access);
+ if ( rc < 0 )
return rc;
memset(&req, 0, sizeof(req));
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
#include <asm/domain.h>
#include <xen/event.h>
+#include <xen/wait.h>
#include <asm/p2m.h>
#include <asm/mem_event.h>
#include <asm/mem_paging.h>
@@ -39,6 +40,7 @@
static int mem_event_enable(struct domain *d,
xen_domctl_mem_event_op_t *mec,
+ int pause_flag,
struct mem_event_domain *med)
{
int rc;
@@ -94,8 +96,12 @@ static int mem_event_enable(struct domai
mem_event_ring_lock_init(med);
+ med->pause_flag = pause_flag;
+
+ init_waitqueue_head(&med->wq);
+
/* Wake any VCPUs paused for memory events */
- mem_event_unpause_vcpus(d);
+ mem_event_wake_waiters(d, med);
return 0;
@@ -111,6 +117,9 @@ static int mem_event_enable(struct domai
static int mem_event_disable(struct mem_event_domain *med)
{
+ if (!list_empty(&med->wq.list))
+ return -EBUSY;
+
unmap_domain_page(med->ring_page);
med->ring_page = NULL;
@@ -120,13 +129,24 @@ static int mem_event_disable(struct mem_
return 0;
}
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+ struct mem_event_domain *med,
+ mem_event_request_t *req)
{
mem_event_front_ring_t *front_ring;
+ int free_req, claimed_req;
RING_IDX req_prod;
mem_event_ring_lock(med);
+ free_req = RING_FREE_REQUESTS(&med->front_ring);
+ /* Foreign requests must succeed because their vcpus can not sleep */
+ claimed_req = med->foreign_producers;
+ if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
+ mem_event_ring_unlock(med);
+ return 0;
+ }
+
front_ring = &med->front_ring;
req_prod = front_ring->req_prod_pvt;
@@ -134,14 +154,35 @@ void mem_event_put_request(struct domain
memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
req_prod++;
+ /* Update accounting */
+ if ( current->domain == d )
+ med->target_producers--;
+ else
+ med->foreign_producers--;
+
/* Update ring */
- med->req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
mem_event_ring_unlock(med);
notify_via_xen_event_channel(d, med->xen_port);
+
+ return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+ mem_event_request_t *req)
+{
+ /* Go to sleep if request came from guest */
+ if (current->domain == d) {
+ wait_event(med->wq, _mem_event_put_request(d, med, req));
+ return;
+ }
+ /* Ring was full anyway, unable to sleep in non-guest context */
+ if (!_mem_event_put_request(d, med, req))
+ printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
+ req->type, req->flags, (unsigned long)req->gfn);
}
void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
@@ -165,32 +206,97 @@ void mem_event_get_response(struct mem_e
mem_event_ring_unlock(med);
}
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+ int free_req;
+
+ mem_event_ring_lock(med);
+ free_req = RING_FREE_REQUESTS(&med->front_ring);
+ free_req -= med->foreign_producers;
+ mem_event_ring_unlock(med);
+
+ if ( free_req )
+ wake_up_nr(&med->wq, free_req);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
{
struct vcpu *v;
for_each_vcpu ( d, v )
- if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+ if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
vcpu_wake(v);
}
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
{
- set_bit(_VPF_mem_event, &v->pause_flags);
+ set_bit(med->pause_flag, &v->pause_flags);
vcpu_sleep_nosync(v);
}
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
{
mem_event_ring_lock(med);
- med->req_producers--;
+ if ( current->domain == d )
+ med->target_producers--;
+ else
+ med->foreign_producers--;
mem_event_ring_unlock(med);
}
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ * 0: the ring has some room
+ * > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
{
- struct vcpu *curr = current;
- int free_requests;
+ int free_req;
int ring_full = 1;
if ( !med->ring_page )
@@ -198,16 +304,17 @@ int mem_event_check_ring(struct domain *
mem_event_ring_lock(med);
- free_requests = RING_FREE_REQUESTS(&med->front_ring);
- if ( med->req_producers < free_requests )
+ free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+ if ( current->domain == d ) {
+ med->target_producers++;
+ ring_full = 0;
+ } else if ( med->foreign_producers + med->target_producers + 1 < free_req )
{
- med->req_producers++;
+ med->foreign_producers++;
ring_full = 0;
}
- if ( ring_full && (curr->domain == d) )
- mem_event_mark_and_pause(curr);
-
mem_event_ring_unlock(med);
return ring_full;
@@ -283,7 +390,7 @@ int mem_event_domctl(struct domain *d, x
break;
}
- rc = mem_event_enable(d, mec, med);
+ rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
}
break;
@@ -322,7 +429,7 @@ int mem_event_domctl(struct domain *d, x
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
break;
- rc = mem_event_enable(d, mec, med);
+ rc = mem_event_enable(d, mec, _VPF_mem_access, med);
}
break;
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -292,44 +292,32 @@ static void mem_sharing_audit(void)
#endif
-static struct page_info* mem_sharing_alloc_page(struct domain *d,
- unsigned long gfn,
- int must_succeed)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
{
- struct page_info* page;
struct vcpu *v = current;
- mem_event_request_t req;
+ mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
- page = alloc_domheap_page(d, 0);
- if(page != NULL) return page;
-
- memset(&req, 0, sizeof(req));
- req.type = MEM_EVENT_TYPE_SHARED;
-
- if(must_succeed)
+ if ( v->domain != d )
{
- /* We do not support 'must_succeed' any more. External operations such
- * as grant table mappings may fail with OOM condition!
- */
- BUG();
- }
- else
- {
- /* All foreign attempts to unshare pages should be handled through
- * 'must_succeed' case. */
- ASSERT(v->domain->domain_id == d->domain_id);
- vcpu_pause_nosync(v);
- req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+ /* XXX This path needs some attention. For now, just fail foreign
+ * XXX requests to unshare if there's no memory. This replaces
+ * XXX old code that BUG()ed here; the callers now BUG()
+ * XXX elewhere. */
+ gdprintk(XENLOG_ERR,
+ "Failed alloc on unshare path for foreign (%d) lookup\n",
+ d->domain_id);
+ return;
}
- if(mem_event_check_ring(d, &d->mem_event->share)) return page;
+ if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+ return;
+ req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
req.gfn = gfn;
req.p2mt = p2m_ram_shared;
req.vcpu_id = v->vcpu_id;
mem_event_put_request(d, &d->mem_event->share, &req);
-
- return page;
+ vcpu_pause_nosync(v);
}
unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -692,14 +680,14 @@ gfn_found:
if(ret == 0) goto private_page_found;
old_page = page;
- page = mem_sharing_alloc_page(d, gfn, flags & MEM_SHARING_MUST_SUCCEED);
- BUG_ON(!page && (flags & MEM_SHARING_MUST_SUCCEED));
+ page = alloc_domheap_page(d, 0);
if(!page)
{
/* We've failed to obtain memory for private page. Need to re-add the
* gfn_info to relevant list */
list_add(&gfn_info->list, &hash_entry->gfns);
shr_unlock();
+ mem_sharing_notify_helper(d, gfn);
return -ENOMEM;
}
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2988,21 +2988,13 @@ int p2m_mem_paging_evict(struct p2m_doma
*/
void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
{
- struct vcpu *v = current;
- mem_event_request_t req;
+ mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
struct domain *d = p2m->domain;
- /* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
- {
- /* Send release notification to pager */
- memset(&req, 0, sizeof(req));
- req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
- req.gfn = gfn;
- req.vcpu_id = v->vcpu_id;
+ /* Send release notification to pager */
+ req.flags = MEM_EVENT_FLAG_DROP_PAGE;
- mem_event_put_request(d, &d->mem_event->paging, &req);
- }
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
/**
@@ -3037,7 +3029,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_event->paging) )
+ if ( mem_event_claim_slot(d, &d->mem_event->paging) )
return;
memset(&req, 0, sizeof(req));
@@ -3070,7 +3062,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( restored || !p2m_do_populate(p2mt) )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(&d->mem_event->paging);
+ mem_event_release_slot(d, &d->mem_event->paging);
return;
}
@@ -3209,8 +3201,8 @@ void p2m_mem_paging_resume(struct p2m_do
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
- /* Unpause any domains that were paused because the ring was full */
- mem_event_unpause_vcpus(d);
+ /* Wake vcpus waiting for room in the ring */
+ mem_event_wake_requesters(&d->mem_event->paging);
}
void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla,
@@ -3239,7 +3231,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the message along */
- res = mem_event_check_ring(d, &d->mem_event->access);
+ res = mem_event_claim_slot(d, &d->mem_event->access);
if ( res < 0 )
{
/* No listener */
@@ -3249,7 +3241,7 @@ void p2m_mem_access_check(unsigned long
"Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
v->vcpu_id, d->domain_id);
- mem_event_mark_and_pause(v);
+ mem_event_mark_and_sleep(v, &d->mem_event->access);
}
else
{
@@ -3299,9 +3291,11 @@ void p2m_mem_access_resume(struct p2m_do
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
- /* Unpause any domains that were paused because the ring was full or no listener
- * was available */
- mem_event_unpause_vcpus(d);
+ /* Wake vcpus waiting for room in the ring */
+ mem_event_wake_requesters(&d->mem_event->access);
+
+ /* Unpause all vcpus that were paused because no listener was available */
+ mem_event_wake_waiters(d, &d->mem_event->access);
}
#endif /* __x86_64__ */
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
#ifndef __MEM_EVENT_H__
#define __MEM_EVENT_H__
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
XEN_GUEST_HANDLE(void) u_domctl);
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -26,6 +26,7 @@
#include <xen/cpumask.h>
#include <xen/nodemask.h>
#include <xen/multicall.h>
+#include <xen/wait.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
@@ -190,7 +191,8 @@ struct mem_event_domain
{
/* ring lock */
spinlock_t ring_lock;
- unsigned int req_producers;
+ unsigned short foreign_producers;
+ unsigned short target_producers;
/* shared page */
mem_event_shared_page_t *shared_page;
/* shared ring page */
@@ -199,6 +201,10 @@ struct mem_event_domain
mem_event_front_ring_t front_ring;
/* event channel port (vcpu0 only) */
int xen_port;
+ /* mem_event bit for vcpu->pause_flags */
+ int pause_flag;
+ /* list of vcpus waiting for room in the ring */
+ struct waitqueue_head wq;
};
struct mem_event_per_domain
@@ -601,9 +607,12 @@ extern struct domain *domain_list;
/* VCPU affinity has changed: migrating to a new CPU. */
#define _VPF_migrating 3
#define VPF_migrating (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event 4
-#define VPF_mem_event (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging 4
+#define VPF_mem_paging (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access 5
+#define VPF_mem_access (1UL<<_VPF_mem_access)
static inline int vcpu_runnable(struct vcpu *v)
{

View File

@ -0,0 +1,114 @@
# HG changeset patch
# Parent 4019436855ff3d44228c8eb3e78a9133a9caf870
xenpaging: map gfn before nomination
If the gfn is mapped before nomination, all special cases in do_mmu_update()
for paged gfns can be removed. If a gfn is actually in any of the paging
states the caller has to try again.
Bump interface age.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
--- a/tools/xenpaging/xenpaging.c
+++ b/tools/xenpaging/xenpaging.c
@@ -573,7 +573,7 @@ static int xenpaging_evict_page(xenpagin
DECLARE_DOMCTL;
- /* Map page */
+ /* Map page to get a handle */
gfn = victim->gfn;
ret = -EFAULT;
page = xc_map_foreign_pages(xch, paging->mem_event.domain_id,
@@ -584,16 +584,21 @@ static int xenpaging_evict_page(xenpagin
goto out;
}
+ /* Nominate the page */
+ ret = xc_mem_paging_nominate(xch, paging->mem_event.domain_id, gfn);
+ if ( ret != 0 )
+ goto out;
+
/* Copy page */
ret = write_page(fd, page, i);
if ( ret != 0 )
{
PERROR("Error copying page %lx", victim->gfn);
- munmap(page, PAGE_SIZE);
goto out;
}
munmap(page, PAGE_SIZE);
+ page = NULL;
/* Tell Xen to evict page */
ret = xc_mem_paging_evict(xch, paging->mem_event.domain_id,
@@ -612,6 +617,8 @@ static int xenpaging_evict_page(xenpagin
paging->num_paged_out++;
out:
+ if (page)
+ munmap(page, PAGE_SIZE);
return ret;
}
@@ -735,14 +742,11 @@ static int evict_victim(xenpaging_t *pag
ret = -EINTR;
goto out;
}
- ret = xc_mem_paging_nominate(xch, paging->mem_event.domain_id, victim->gfn);
- if ( ret == 0 )
- ret = xenpaging_evict_page(paging, victim, fd, i);
- else
+ ret = xenpaging_evict_page(paging, victim, fd, i);
+ if ( ret && j++ % 1000 == 0 )
{
- if ( j++ % 1000 == 0 )
- if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
- PERROR("Error flushing ioemu cache");
+ if ( xenpaging_mem_paging_flush_ioemu_cache(paging) )
+ PERROR("Error flushing ioemu cache");
}
}
while ( ret );
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2852,7 +2852,7 @@ set_shared_p2m_entry(struct p2m_domain *
* - the gfn is backed by a mfn
* - the p2mt of the gfn is pageable
* - the mfn is not used for IO
- * - the mfn has exactly one user and has no special meaning
+ * - the mfn has exactly two users (guest+pager) and has no special meaning
*
* Once the p2mt is changed the page is readonly for the guest. On success the
* pager can write the page contents to disk and later evict the page.
@@ -2886,7 +2886,7 @@ int p2m_mem_paging_nominate(struct p2m_d
/* Check page count and type */
page = mfn_to_page(mfn);
if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
- (1 | PGC_allocated) )
+ (2 | PGC_allocated) )
goto out;
if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none )
@@ -2914,7 +2914,7 @@ int p2m_mem_paging_nominate(struct p2m_d
* freed:
* - the gfn is backed by a mfn
* - the gfn was nominated
- * - the mfn has still exactly one user and has no special meaning
+ * - the mfn has still exactly one user (the guest) and has no special meaning
*
* After successful nomination some other process could have mapped the page. In
* this case eviction can not be done. If the gfn was populated before the pager
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -49,7 +49,7 @@
#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */
#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */
-#define MEM_EVENT_PAGING_AGE 1UL /* Number distinguish the mem_paging <-> pager interface */
+#define MEM_EVENT_PAGING_AGE 2UL /* Number distinguish the mem_paging <-> pager interface */
typedef struct mem_event_shared_page {
uint32_t port;

View File

@ -0,0 +1,335 @@
# HG changeset patch
# Parent 4a0a6a1cd56a8f3d242f323fb5161c2d1f52dccb
xenpaging: add need_populate and paged_no_mfn checks
There is currently a mix of p2mt checks for the various paging types.
Some mean the p2mt needs to be populated, others mean a gfn without mfn.
Add a new p2m_do_populate() helper which covers the p2m_ram_paged and
p2m_ram_paging_out types. If a gfn is not in these states anymore another
populate request for the pager is not needed. This avoids a call to
p2m_mem_paging_populate() which in turn reduces the pressure on the ring
buffer because no temporary slot needs to be claimed. As such, this helper is
an optimization.
Modify the existing p2m_is_paged() helper which now covers also
p2m_ram_paging_in_start in addition to the current p2m_ram_paged type. A gfn
in these two states is not backed by a mfn.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/emulate.c | 3 +
xen/arch/x86/hvm/hvm.c | 17 ++++++----
xen/arch/x86/mm.c | 63 ++++++++++++---------------------------
xen/arch/x86/mm/guest_walk.c | 3 +
xen/arch/x86/mm/hap/guest_walk.c | 6 ++-
xen/arch/x86/mm/hap/p2m-ept.c | 3 -
xen/arch/x86/mm/p2m.c | 4 +-
xen/common/grant_table.c | 3 +
xen/include/asm-x86/p2m.h | 9 ++++-
9 files changed, 51 insertions(+), 60 deletions(-)
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -66,7 +66,8 @@ static int hvmemul_do_io(
ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, ram_gfn);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, ram_gfn);
return X86EMUL_RETRY;
}
if ( p2m_is_shared(p2mt) )
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -360,7 +360,8 @@ static int hvm_set_ioreq_page(
return -EINVAL;
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, gmfn);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, gmfn);
return -ENOENT;
}
if ( p2m_is_shared(p2mt) )
@@ -1174,7 +1175,7 @@ bool_t hvm_hap_nested_page_fault(unsigne
#ifdef __x86_64__
/* Check if the page has been paged out */
- if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) )
+ if ( p2m_do_populate(p2mt) )
p2m_mem_paging_populate(p2m, gfn);
/* Mem sharing: unshare the page and try again */
@@ -1662,7 +1663,8 @@ static void *__hvm_map_guest_frame(unsig
return NULL;
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, gfn);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, gfn);
return NULL;
}
@@ -2120,7 +2122,8 @@ static enum hvm_copy_result __hvm_copy(
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, gfn);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, gfn);
return HVMCOPY_gfn_paged_out;
}
if ( p2m_is_shared(p2mt) )
@@ -3497,7 +3500,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
mfn_t mfn = gfn_to_mfn(p2m, pfn, &t);
if ( p2m_is_paging(t) )
{
- p2m_mem_paging_populate(p2m, pfn);
+ if ( p2m_do_populate(t) )
+ p2m_mem_paging_populate(p2m, pfn);
rc = -EINVAL;
goto param_fail3;
@@ -3594,7 +3598,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
mfn = gfn_to_mfn_unshare(p2m, pfn, &t, 0);
if ( p2m_is_paging(t) )
{
- p2m_mem_paging_populate(p2m, pfn);
+ if ( p2m_do_populate(t) )
+ p2m_mem_paging_populate(p2m, pfn);
rc = -EINVAL;
goto param_fail4;
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3465,9 +3465,10 @@ int do_mmu_update(
if ( !p2m_is_valid(p2mt) )
mfn = INVALID_MFN;
- if ( p2m_is_paged(p2mt) )
+ if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), gmfn);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), gmfn);
rc = -ENOENT;
break;
@@ -3492,24 +3493,18 @@ int do_mmu_update(
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
p2m_type_t l1e_p2mt;
- unsigned long l1emfn = mfn_x(
gfn_to_mfn(p2m_get_hostp2m(pg_owner),
- l1e_get_pfn(l1e), &l1e_p2mt));
+ l1e_get_pfn(l1e), &l1e_p2mt);
- if ( p2m_is_paged(l1e_p2mt) )
+#ifdef __x86_64__
+ if ( p2m_is_paging(l1e_p2mt) )
{
- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
+ if ( p2m_do_populate(l1e_p2mt) )
+ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
l1e_get_pfn(l1e));
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l1e_p2mt &&
- !mfn_valid(l1emfn) )
- {
- rc = -ENOENT;
- break;
- }
-#ifdef __x86_64__
/* XXX: Ugly: pull all the checks into a separate function.
* Don't want to do it now, not to interfere with mem_paging
* patches */
@@ -3536,22 +3531,16 @@ int do_mmu_update(
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
p2m_type_t l2e_p2mt;
- unsigned long l2emfn = mfn_x(
- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e), &l2e_p2mt));
+ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e), &l2e_p2mt);
- if ( p2m_is_paged(l2e_p2mt) )
+ if ( p2m_is_paging(l2e_p2mt) )
{
- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
+ if ( p2m_do_populate(l2e_p2mt) )
+ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
l2e_get_pfn(l2e));
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l2e_p2mt &&
- !mfn_valid(l2emfn) )
- {
- rc = -ENOENT;
- break;
- }
else if ( p2m_ram_shared == l2e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
@@ -3567,22 +3556,16 @@ int do_mmu_update(
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
p2m_type_t l3e_p2mt;
- unsigned long l3emfn = mfn_x(
- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e), &l3e_p2mt));
+ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e), &l3e_p2mt);
- if ( p2m_is_paged(l3e_p2mt) )
+ if ( p2m_is_paging(l3e_p2mt) )
{
- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
+ if ( p2m_do_populate(l3e_p2mt) )
+ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
l3e_get_pfn(l3e));
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l3e_p2mt &&
- !mfn_valid(l3emfn) )
- {
- rc = -ENOENT;
- break;
- }
else if ( p2m_ram_shared == l3e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
@@ -3598,23 +3581,17 @@ int do_mmu_update(
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
p2m_type_t l4e_p2mt;
- unsigned long l4emfn = mfn_x(
gfn_to_mfn(p2m_get_hostp2m(pg_owner),
- l4e_get_pfn(l4e), &l4e_p2mt));
+ l4e_get_pfn(l4e), &l4e_p2mt);
- if ( p2m_is_paged(l4e_p2mt) )
+ if ( p2m_is_paging(l4e_p2mt) )
{
- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
+ if ( p2m_do_populate(l4e_p2mt) )
+ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
l4e_get_pfn(l4e));
rc = -ENOENT;
break;
}
- else if ( p2m_ram_paging_in_start == l4e_p2mt &&
- !mfn_valid(l4emfn) )
- {
- rc = -ENOENT;
- break;
- }
else if ( p2m_ram_shared == l4e_p2mt )
{
MEM_LOG("Unexpected attempt to map shared page.\n");
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -96,7 +96,8 @@ static inline void *map_domain_gfn(struc
*mfn = gfn_to_mfn_unshare(p2m, gfn_x(gfn), p2mt, 0);
if ( p2m_is_paging(*p2mt) )
{
- p2m_mem_paging_populate(p2m, gfn_x(gfn));
+ if ( p2m_do_populate(*p2mt) )
+ p2m_mem_paging_populate(p2m, gfn_x(gfn));
*rc = _PAGE_PAGED;
return NULL;
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -50,7 +50,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT);
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
@@ -82,7 +83,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
gfn_to_mfn_unshare(p2m, gfn_x(gfn), &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, gfn_x(gfn));
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, gfn_x(gfn));
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
--- a/xen/arch/x86/mm/hap/p2m-ept.c
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -377,8 +377,7 @@ ept_set_entry(struct p2m_domain *p2m, un
* the intermediate tables will be freed below after the ept flush */
old_entry = *ept_entry;
- if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
- (p2mt == p2m_ram_paging_in_start) )
+ if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) )
{
/* Construct the new entry, and then write it once */
new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat,
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3049,7 +3049,7 @@ void p2m_mem_paging_populate(struct p2m_
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
/* Allow only nominated or evicted pages to enter page-in path */
- if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
+ if ( p2m_do_populate(p2mt) )
{
/* Evict will fail now, tag this request for pager */
if ( p2mt == p2m_ram_paging_out )
@@ -3067,7 +3067,7 @@ void p2m_mem_paging_populate(struct p2m_
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
/* No need to inform pager if the gfn is not in the page-out path */
- else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
+ else if ( !p2m_do_populate(p2mt) )
{
/* gfn is already on its way back and vcpu is not paused */
mem_event_put_req_producers(&d->mem_event->paging);
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -158,7 +158,8 @@ static int __get_paged_frame(unsigned lo
*frame = mfn_x(mfn);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(p2m, gfn);
+ if ( p2m_do_populate(p2mt) )
+ p2m_mem_paging_populate(p2m, gfn);
rc = GNTST_eagain;
}
} else {
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -157,7 +157,11 @@ typedef enum {
| p2m_to_mask(p2m_ram_paging_in_start) \
| p2m_to_mask(p2m_ram_paging_in))
-#define P2M_PAGED_TYPES (p2m_to_mask(p2m_ram_paged))
+#define P2M_POPULATE_TYPES (p2m_to_mask(p2m_ram_paged) \
+ | p2m_to_mask(p2m_ram_paging_out) )
+
+#define P2M_PAGED_NO_MFN_TYPES (p2m_to_mask(p2m_ram_paged) \
+ | p2m_to_mask(p2m_ram_paging_in_start) )
/* Shared types */
/* XXX: Sharable types could include p2m_ram_ro too, but we would need to
@@ -179,7 +183,8 @@ typedef enum {
#define p2m_has_emt(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | p2m_to_mask(p2m_mmio_direct)))
#define p2m_is_pageable(_t) (p2m_to_mask(_t) & P2M_PAGEABLE_TYPES)
#define p2m_is_paging(_t) (p2m_to_mask(_t) & P2M_PAGING_TYPES)
-#define p2m_is_paged(_t) (p2m_to_mask(_t) & P2M_PAGED_TYPES)
+#define p2m_is_paged(_t) (p2m_to_mask(_t) & P2M_PAGED_NO_MFN_TYPES)
+#define p2m_do_populate(_t) (p2m_to_mask(_t) & P2M_POPULATE_TYPES)
#define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES)
#define p2m_is_shared(_t) (p2m_to_mask(_t) & P2M_SHARED_TYPES)
#define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES)

View File

@ -0,0 +1,31 @@
Subject: xenpaging/qemu-dm: add command to flush buffer cache.
Add support for a xenstore dm command to flush qemu's buffer cache.
qemu will just keep mapping pages and not release them, which causes problems
for the memory pager (since the page is mapped, it won't get paged out). When
the pager has trouble finding a page to page out, it asks qemu to flush its
buffer, which releases all the page mappings. This makes it possible to find
pages to swap out agian.
Already-Signed-off-by: Patrick Colp <Patrick.Colp@citrix.com>
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/ioemu-qemu-xen/xenstore.c | 3 +++
1 file changed, 3 insertions(+)
Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
===================================================================
--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -1082,6 +1082,9 @@ static void xenstore_process_dm_command_
do_pci_add(par);
free(par);
#endif
+ } else if (!strncmp(command, "flush-cache", len)) {
+ fprintf(logfile, "dm-command: flush caches\n");
+ qemu_invalidate_map_cache();
} else {
fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command);
}

View File

@ -0,0 +1,87 @@
# HG changeset patch
# Parent a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
xenpaging: extend xc_mem_paging_enable() to handle interface version
Since upcoming patches will change the way how paging internally works, add a
new interface to xc_mem_paging_enable() to make sure the pager is not
out-of-date. This is similar to XEN_DOMCTL_INTERFACE_VERSION in do_domctl()
where the tools have to match the running hypervisor.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Index: xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_paging.c
+++ xen-4.1.2-testing/tools/libxc/xc_mem_paging.c
@@ -25,12 +25,13 @@
int xc_mem_paging_enable(xc_interface *xch, domid_t domain_id,
+ unsigned long interface_age,
void *shared_page, void *ring_page)
{
return xc_mem_event_control(xch, domain_id,
XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE,
XEN_DOMCTL_MEM_EVENT_OP_PAGING,
- shared_page, ring_page, INVALID_MFN);
+ shared_page, ring_page, interface_age);
}
int xc_mem_paging_disable(xc_interface *xch, domid_t domain_id)
Index: xen-4.1.2-testing/tools/libxc/xenctrl.h
===================================================================
--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h
+++ xen-4.1.2-testing/tools/libxc/xenctrl.h
@@ -1736,6 +1736,7 @@ int xc_mem_event_control(xc_interface *x
void *ring_page, unsigned long gfn);
int xc_mem_paging_enable(xc_interface *xch, domid_t domain_id,
+ unsigned long interface_age,
void *shared_page, void *ring_page);
int xc_mem_paging_disable(xc_interface *xch, domid_t domain_id);
int xc_mem_paging_nominate(xc_interface *xch, domid_t domain_id,
Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c
===================================================================
--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c
@@ -366,6 +366,7 @@ static xenpaging_t *xenpaging_init(int a
/* Initialise Xen */
rc = xc_mem_paging_enable(xch, paging->mem_event.domain_id,
+ MEM_EVENT_PAGING_AGE,
paging->mem_event.shared_page,
paging->mem_event.ring_page);
if ( rc != 0 )
Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
===================================================================
--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c
@@ -274,6 +274,15 @@ int mem_event_domctl(struct domain *d, x
if ( p2m->pod.entry_count )
break;
+ rc = -ENOEXEC;
+ /* Disallow paging in a PoD guest */
+ if ( mec->gfn != MEM_EVENT_PAGING_AGE )
+ {
+ gdprintk(XENLOG_INFO, "Expected paging age %lx, got %lx\n",
+ MEM_EVENT_PAGING_AGE, mec->gfn);
+ break;
+ }
+
rc = mem_event_enable(d, mec, med);
}
break;
Index: xen-4.1.2-testing/xen/include/public/mem_event.h
===================================================================
--- xen-4.1.2-testing.orig/xen/include/public/mem_event.h
+++ xen-4.1.2-testing/xen/include/public/mem_event.h
@@ -49,6 +49,8 @@
#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */
#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */
+#define MEM_EVENT_PAGING_AGE 1UL /* Number distinguish the mem_paging <-> pager interface */
+
typedef struct mem_event_shared_page {
uint32_t port;
} mem_event_shared_page_t;

View File

@ -0,0 +1,387 @@
# HG changeset patch
# Parent 427c10f8e1e28d942886f89ebc79ffa93cb7fce9
xenpaging: use wait queues
Use a wait queue to put a guest vcpu to sleep while the requested gfn is
in paging state. This adds missing p2m_mem_paging_populate() calls to
some callers of the new get_gfn* variants, which would crash now
because they get an invalid mfn. It also fixes guest crashes due to
unexpected returns from do_memory_op because copy_to/from_guest ran into
a paged gfn. Now those places will always get a valid mfn.
Since each gfn could be requested by several guest vcpus at the same
time a queue of paged gfns is maintained. Each vcpu will be attached to
that queue. Once p2m_mem_paging_resume restored the gfn the waiting
vcpus will resume execution.
There is untested code in p2m_mem_paging_init_queue() to allow cpu
hotplug. Since each vcpu may wait on a different gfn there have to be as
many queues as vcpus. But xl vcpu-set does not seem to work right now,
so this code path cant be excercised right now.
TODO:
- use hash in p2m_mem_paging_queue_head
- rename gfn_lock
- use mm_lock_t for gfn_lock
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/hvm.c | 2
xen/arch/x86/mm/p2m.c | 220 +++++++++++++++++++++++++++++++++------
xen/common/domctl.c | 3
xen/include/asm-x86/hvm/domain.h | 3
xen/include/asm-x86/p2m.h | 7 +
5 files changed, 205 insertions(+), 30 deletions(-)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -442,6 +442,8 @@ int hvm_domain_initialise(struct domain
spin_lock_init(&d->arch.hvm_domain.irq_lock);
spin_lock_init(&d->arch.hvm_domain.uc_lock);
+ spin_lock_init(&d->arch.hvm_domain.gfn_lock);
+
INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -30,6 +30,7 @@
#include <asm/p2m.h>
#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
#include <xen/iommu.h>
+#include <xen/wait.h>
#include <asm/mem_event.h>
#include <public/mem_event.h>
#include <asm/mem_sharing.h>
@@ -2839,6 +2840,182 @@ set_shared_p2m_entry(struct p2m_domain *
}
#ifdef __x86_64__
+struct p2m_mem_paging_queue {
+ struct list_head list;
+ struct waitqueue_head wq;
+ unsigned long gfn;
+ unsigned short waiters;
+ unsigned short woken;
+ unsigned short index;
+};
+
+struct p2m_mem_paging_queue_head {
+ struct list_head list;
+ unsigned int max;
+};
+
+int p2m_mem_paging_init_queue(struct domain *d, unsigned int max)
+{
+ struct p2m_mem_paging_queue_head *h;
+ struct p2m_mem_paging_queue *q;
+ unsigned int i, nr;
+ int ret = 0;
+
+ if (!is_hvm_domain(d))
+ return 0;
+
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+ if (!d->arch.hvm_domain.gfn_queue) {
+ ret = -ENOMEM;
+ h = xzalloc(struct p2m_mem_paging_queue_head);
+ if (!h) {
+ domain_crash(d);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&h->list);
+ nr = max;
+ } else {
+ h = d->arch.hvm_domain.gfn_queue;
+ if (max <= h->max)
+ goto out;
+ nr = max - h->max;
+ }
+
+ ret = -ENOMEM;
+ q = xzalloc_array(struct p2m_mem_paging_queue, nr);
+ if (!q) {
+ if (!d->arch.hvm_domain.gfn_queue)
+ xfree(h);
+ domain_crash(d);
+ goto out;
+ }
+
+ for (i = 0; i < nr; i++) {
+ init_waitqueue_head(&q[i].wq);
+ INIT_LIST_HEAD(&q[i].list);
+ q[i].index = h->max + i + 1;
+ list_add_tail(&q[i].list, &h->list);
+ }
+
+ h->max = max;
+ d->arch.hvm_domain.gfn_queue = h;
+ ret = 0;
+
+out:
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
+ return ret;
+}
+
+static struct p2m_mem_paging_queue *p2m_mem_paging_get_queue(struct domain *d, unsigned long gfn)
+{
+ struct p2m_mem_paging_queue_head *h;
+ struct p2m_mem_paging_queue *q, *q_match, *q_free;
+
+ h = d->arch.hvm_domain.gfn_queue;
+ q_match = q_free = NULL;
+
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+ list_for_each_entry(q, &h->list, list) {
+ if (q->gfn == gfn) {
+ q_match = q;
+ break;
+ }
+ if (!q_free && !q->waiters)
+ q_free = q;
+ }
+
+ if (!q_match && q_free)
+ q_match = q_free;
+
+ if (q_match) {
+ if (q_match->woken)
+ printk("wq woken for gfn %u:%u %lx %u %u %u\n", current->domain->domain_id, current->vcpu_id, gfn, q_match->index, q_match->woken, q_match->waiters);
+ q_match->waiters++;
+ q_match->gfn = gfn;
+ }
+
+ if (!q_match)
+ printk("No wq_get for gfn %u:%u %lx\n", current->domain->domain_id, current->vcpu_id, gfn);
+
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
+ return q_match;
+}
+
+static void p2m_mem_paging_put_queue(struct domain *d, struct p2m_mem_paging_queue *q_match)
+{
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+ if (q_match->waiters == 0)
+ printk("wq_put no waiters, gfn %u:%u %lx %u\n", current->domain->domain_id, current->vcpu_id, q_match->gfn, q_match->woken);
+ else if (--q_match->waiters == 0)
+ q_match->gfn = q_match->woken = 0;;
+
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
+}
+
+static void p2m_mem_paging_wake_queue(struct domain *d, unsigned long gfn)
+{
+ struct p2m_mem_paging_queue_head *h;
+ struct p2m_mem_paging_queue *q, *q_match = NULL;
+
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
+
+ h = d->arch.hvm_domain.gfn_queue;
+ list_for_each_entry(q, &h->list, list) {
+ if (q->gfn == gfn) {
+ q_match = q;
+ break;
+ }
+ }
+ if (q_match) {
+ if (q_match->woken || q_match->waiters == 0)
+ printk("Wrong wake for gfn %u:%u %p %lx %u %u\n", current->domain->domain_id, current->vcpu_id, q_match, gfn, q_match->woken, q_match->waiters);
+ q_match->woken++;
+ wake_up_all(&q_match->wq);
+ }
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
+}
+
+/* Returns 0 if the gfn is still paged */
+static int p2m_mem_paging_get_entry(mfn_t *mfn,
+ struct p2m_domain *p2m, unsigned long gfn,
+ p2m_type_t *t, p2m_query_t q)
+{
+ p2m_access_t a = 0;
+ *mfn = p2m->get_entry(p2m, gfn, t, &a, q);
+
+ return p2m_is_paging(*t) ? 0 : 1;
+}
+
+/* Go to sleep in case of guest access */
+void p2m_mem_paging_wait(mfn_t *mfn,
+ struct p2m_domain *p2m, unsigned long gfn,
+ p2m_type_t *t, p2m_query_t q)
+{
+ struct p2m_mem_paging_queue *pmpq;
+
+ /* Return p2mt as is in case of query */
+ if ( q == p2m_query )
+ return;
+ /* Foreign domains can not go to sleep */
+ if ( current->domain != p2m->domain )
+ return;
+
+ pmpq = p2m_mem_paging_get_queue(p2m->domain, gfn);
+ if ( !pmpq )
+ return;
+
+ /* Populate the page once */
+ if ( *t == p2m_ram_paging_out || *t == p2m_ram_paged )
+ p2m_mem_paging_populate(p2m, gfn);
+
+ wait_event(pmpq->wq, p2m_mem_paging_get_entry(mfn, p2m, gfn, t, q));
+ p2m_mem_paging_put_queue(p2m->domain, pmpq);
+}
+
/**
* p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
* @d: guest domain
@@ -3020,21 +3197,17 @@ void p2m_mem_paging_drop_page(struct p2m
*/
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
{
- struct vcpu *v = current;
- mem_event_request_t req;
+ mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
p2m_type_t p2mt;
p2m_access_t a;
mfn_t mfn;
- int restored = 0;
struct domain *d = p2m->domain;
+ int put_request = 0;
/* Check that there's space on the ring for this request */
if ( mem_event_claim_slot(d, &d->mem_event->paging) )
return;
- memset(&req, 0, sizeof(req));
- req.type = MEM_EVENT_TYPE_PAGING;
-
/* Fix p2m mapping */
p2m_lock(p2m);
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
@@ -3043,35 +3216,23 @@ void p2m_mem_paging_populate(struct p2m_
{
/* Restore page state if gfn was requested before evict */
if ( p2mt == p2m_ram_paging_out && mfn_valid(mfn) ) {
+ /* Restore gfn because it is needed by guest before evict */
set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, a);
- restored = 1;
} else {
set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
+ put_request = 1;
}
+ /* Evict will fail now, the pager has to try another gfn */
+
audit_p2m(p2m, 1);
}
p2m_unlock(p2m);
- /* Pause domain if request came from guest and gfn has paging type */
- if ( !restored && p2m_is_paging(p2mt) && v->domain == d )
- {
- vcpu_pause_nosync(v);
- req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
- }
- /* No need to inform pager if the gfn is not in the page-out path */
- else if ( restored || !p2m_do_populate(p2mt) )
- {
- /* gfn is already on its way back and vcpu is not paused */
+ /* One request per gfn, guest vcpus go to sleep, foreigners try again */
+ if ( put_request )
+ mem_event_put_request(d, &d->mem_event->paging, &req);
+ else
mem_event_release_slot(d, &d->mem_event->paging);
- return;
- }
-
- /* Send request to pager */
- req.gfn = gfn;
- req.p2mt = p2mt;
- req.vcpu_id = v->vcpu_id;
-
- mem_event_put_request(d, &d->mem_event->paging, &req);
}
/**
@@ -3197,12 +3358,11 @@ void p2m_mem_paging_resume(struct p2m_do
p2m_unlock(p2m);
}
- /* Unpause domain */
- if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(d->vcpu[rsp.vcpu_id]);
-
/* Wake vcpus waiting for room in the ring */
mem_event_wake_requesters(&d->mem_event->paging);
+
+ /* Unpause all vcpus that were paused because the gfn was paged */
+ p2m_mem_paging_wake_queue(d, rsp.gfn);
}
void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla,
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -536,6 +536,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
goto maxvcpu_out;
}
+ if ( p2m_mem_paging_init_queue(d, max) )
+ goto maxvcpu_out;
+
ret = 0;
maxvcpu_out:
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -87,6 +87,9 @@ struct hvm_domain {
struct viridian_domain viridian;
+ spinlock_t gfn_lock;
+ struct p2m_mem_paging_queue_head *gfn_queue;
+
bool_t hap_enabled;
bool_t mem_sharing_enabled;
bool_t qemu_mapcache_invalidate;
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -343,6 +343,8 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p
}
+extern void p2m_mem_paging_wait(mfn_t *mfn, struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_query_t q);
+
/* General conversion function from gfn to mfn */
static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m,
unsigned long gfn, p2m_type_t *t,
@@ -364,6 +366,9 @@ static inline mfn_t _gfn_to_mfn_type(str
mfn = gfn_to_mfn_type_p2m(p2m, gfn, t, q);
#ifdef __x86_64__
+ if (unlikely(p2m_is_paging(*t)) )
+ p2m_mem_paging_wait(&mfn, p2m, gfn, t, q);
+
if (unlikely((p2m_is_broken(*t))))
{
/* Return invalid_mfn to avoid caller's access */
@@ -520,6 +525,8 @@ int clear_mmio_p2m_entry(struct p2m_doma
/* Modify p2m table for shared gfn */
int set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
+/* Initialize per-gfn wait queue */
+int p2m_mem_paging_init_queue(struct domain *d, unsigned int max);
/* Check if a nominated gfn is valid to be paged out */
int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn);
/* Evict a frame */
@@ -533,6 +540,8 @@ int p2m_mem_paging_prep(struct p2m_domai
/* Resume normal operation (in case a domain was paused) */
void p2m_mem_paging_resume(struct p2m_domain *p2m);
#else
+static inline int p2m_mem_paging_init_queue(struct domain *d, unsigned int max)
+{ return 0; }
static inline void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
{ }
static inline void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)

19
xm-create-maxmem.patch Normal file
View File

@ -0,0 +1,19 @@
Cast maxmem to int before computation
Reported in L3 bnc#732782
From: Dario Abatianni <dabatianni@novell.com>
Index: xen-4.1.2-testing/tools/python/xen/xm/xenapi_create.py
===================================================================
--- xen-4.1.2-testing.orig/tools/python/xen/xm/xenapi_create.py
+++ xen-4.1.2-testing/tools/python/xen/xm/xenapi_create.py
@@ -764,7 +764,7 @@ class sxp2xml:
if get_child_by_name(config, "maxmem"):
memory.attributes["static_max"] = \
- str(int(get_child_by_name(config, "maxmem")*1024*1024))
+ str(int(get_child_by_name(config, "maxmem"))*1024*1024)
vm.appendChild(memory)