From c9e3853c04a02ce719b4aa21e152ff6d14c0ef6d8e1a2364ef67fbd5c5badbea Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Thu, 5 Jan 2012 19:41:54 +0000 Subject: [PATCH] - bnc#735806 - VF doesn't work after hot-plug for many times 24448-x86-pt-irq-leak.patch - Upstream patches from Jan 24261-x86-cpuidle-Westmere-EX.patch 24417-amd-erratum-573.patch 24429-mceinj-tool.patch 24447-x86-TXT-INIT-SIPI-delay.patch ioemu-9868-MSI-X.patch - bnc#732884 - remove private runlevel 4 from init scripts xen.no-default-runlevel-4.patch - bnc#727515 - Fragmented packets hang network boot of HVM guest ipxe-gcc45-warnings.patch ipxe-ipv4-fragment.patch ipxe-enable-nics.patch - fate#310510 - fix xenpaging update xenpaging.autostart.patch, make changes with mem-swap-target permanent update xenpaging.doc.patch, mention issues with live migration - fate#310510 - fix xenpaging add xenpaging.evict_mmap_readonly.patch update xenpaging.error-handling.patch, reduce debug output - bnc#736824 - Microcode patches for AMD's 15h processors panic the system 24189-x86-p2m-pod-locking.patch 24412-x86-AMD-errata-model-shift.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=164 --- 23749-mmcfg-reservation.patch | 66 ++- ..._locking_in_p2m_mem_paging_functions.patch | 2 +- 24153-x86-emul-feature-checks.patch | 107 ++++ ...llocate_whole_page_for_shadow_stack..patch | 18 +- ...main-vcpu_pause_count_info_to_d_key..patch | 53 ++ 24189-x86-p2m-pod-locking.patch | 50 ++ ...-stack_overflow_and_crash_the_guest..patch | 6 +- ...it_so_that_vcpu_is_definitely_on_the.patch | 4 +- ...er-cpu_stacks_we_must_wake_up_on_teh.patch | 6 +- ...aitqueue_Implement_wake_up_nroneall..patch | 4 +- ...eference_to_a_domain_on_a_waitqueue..patch | 8 +- 24261-x86-cpuidle-Westmere-EX.patch | 23 + ...em_event_domain_out_of_struct_domain.patch | 54 +- 24275-x86-emul-lzcnt.patch | 88 +++ 24277-x86-dom0-features.patch | 58 ++ 24278-x86-dom0-no-PCID.patch | 49 ++ 24282-x86-log-dirty-bitmap-leak.patch | 38 ++ ...mediate_fill-in_of_the_page_contents.patch | 139 +++++ ...in_page_oud_page_contents_on_prepare.patch | 86 +++ ...on-racy_xc_mem_paging_load_interface.patch | 100 ++++ ...tion_overlooked_in_23749e8d1c8f074ba.patch | 27 - ...e_asm_to_not_clobber_stacks_red_zone.patch | 36 +- ...uild_breakage_in_previous_changeset..patch | 28 - 24357-firmware-no-_PS0-_PS3.patch | 49 ++ 24358-kexec-compat-overflow.patch | 29 + 24359-x86-domU-features.patch | 94 +++ 24360-x86-pv-domU-no-PCID.patch | 39 ++ 24389-amd-fam10-gart-tlb-walk-err.patch | 109 ++++ 24391-x86-pcpu-version.patch | 155 +++++ 24411-x86-ucode-AMD-Fam15.patch | 143 +++++ 24412-x86-AMD-errata-model-shift.patch | 23 + 24417-amd-erratum-573.patch | 85 +++ 24429-mceinj-tool.patch | 28 + 24447-x86-TXT-INIT-SIPI-delay.patch | 46 ++ 24448-x86-pt-irq-leak.patch | 31 + 32on64-extra-mem.patch | 2 +- change_home_server.patch | 2 +- ioemu-9868-MSI-X.patch | 241 ++++++++ ipxe-enable-nics.patch | 17 + ipxe-gcc45-warnings.patch | 75 +++ ipxe-ipv4-fragment.patch | 368 ++++++++++++ multi-xvdp.patch | 2 +- snapshot-xend.patch | 6 +- tools-gdbserver-build.diff | 22 - x86-cpufreq-report.patch | 46 +- xen-config.diff | 28 +- xen-warnings-unused.diff | 25 +- xen.changes | 137 +++++ xen.no-default-runlevel-4.patch | 77 +++ xen.spec | 169 +++--- xend-console-port-restore.patch | 4 +- xend-domain-lock-sfex.patch | 8 +- xend-domain-lock.patch | 4 +- xend-migration-domname-fix.patch | 5 +- xend-vcpu-affinity-fix.patch | 2 +- xenpaging.HVMCOPY_gfn_paged_out.patch | 151 ----- xenpaging.autostart.patch | 19 +- xenpaging.doc.patch | 92 +++ xenpaging.error-handling.patch | 183 ++++++ xenpaging.evict_fail_fast_forward.patch | 57 ++ xenpaging.evict_mmap_readonly.patch | 20 + xenpaging.mem_event-use-wait_queue.patch | 559 ++++++++++++++++++ xenpaging.mmap-before-nominate.patch | 114 ++++ xenpaging.p2m_is_paged.patch | 335 +++++++++++ xenpaging.qemu.flush-cache.patch | 31 + xenpaging.versioned-interface.patch | 87 +++ xenpaging.waitqueue-paging.patch | 387 ++++++++++++ xm-create-maxmem.patch | 19 + 68 files changed, 4696 insertions(+), 479 deletions(-) create mode 100644 24153-x86-emul-feature-checks.patch create mode 100644 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch create mode 100644 24189-x86-p2m-pod-locking.patch create mode 100644 24261-x86-cpuidle-Westmere-EX.patch create mode 100644 24275-x86-emul-lzcnt.patch create mode 100644 24277-x86-dom0-features.patch create mode 100644 24278-x86-dom0-no-PCID.patch create mode 100644 24282-x86-log-dirty-bitmap-leak.patch create mode 100644 24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch create mode 100644 24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch create mode 100644 24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch delete mode 100644 24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch delete mode 100644 24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch create mode 100644 24357-firmware-no-_PS0-_PS3.patch create mode 100644 24358-kexec-compat-overflow.patch create mode 100644 24359-x86-domU-features.patch create mode 100644 24360-x86-pv-domU-no-PCID.patch create mode 100644 24389-amd-fam10-gart-tlb-walk-err.patch create mode 100644 24391-x86-pcpu-version.patch create mode 100644 24411-x86-ucode-AMD-Fam15.patch create mode 100644 24412-x86-AMD-errata-model-shift.patch create mode 100644 24417-amd-erratum-573.patch create mode 100644 24429-mceinj-tool.patch create mode 100644 24447-x86-TXT-INIT-SIPI-delay.patch create mode 100644 24448-x86-pt-irq-leak.patch create mode 100644 ioemu-9868-MSI-X.patch create mode 100644 ipxe-enable-nics.patch create mode 100644 ipxe-gcc45-warnings.patch create mode 100644 ipxe-ipv4-fragment.patch delete mode 100644 tools-gdbserver-build.diff create mode 100644 xen.no-default-runlevel-4.patch delete mode 100644 xenpaging.HVMCOPY_gfn_paged_out.patch create mode 100644 xenpaging.doc.patch create mode 100644 xenpaging.error-handling.patch create mode 100644 xenpaging.evict_fail_fast_forward.patch create mode 100644 xenpaging.evict_mmap_readonly.patch create mode 100644 xenpaging.mem_event-use-wait_queue.patch create mode 100644 xenpaging.mmap-before-nominate.patch create mode 100644 xenpaging.p2m_is_paged.patch create mode 100644 xenpaging.qemu.flush-cache.patch create mode 100644 xenpaging.versioned-interface.patch create mode 100644 xenpaging.waitqueue-paging.patch create mode 100644 xm-create-maxmem.patch diff --git a/23749-mmcfg-reservation.patch b/23749-mmcfg-reservation.patch index 8d69c33..e5ad5c3 100644 --- a/23749-mmcfg-reservation.patch +++ b/23749-mmcfg-reservation.patch @@ -16,10 +16,17 @@ a new physdevop hypercall). Signed-off-by: Jan Beulich -Index: xen-4.1.2-testing/xen/arch/x86/physdev.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/physdev.c -+++ xen-4.1.2-testing/xen/arch/x86/physdev.c +# HG changeset patch +# User Jan Beulich +# Date 1322813126 -3600 +# Node ID 60d4e257d04ba0bd663bbef5e93a97b6d8b66e54 +# Parent 3f815406feb25a9348d8be9bc49fdc8c93ccb7c2 +x86-64/mmcfg: remove __initdata annotation overlooked in 23749:e8d1c8f074ba + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/physdev.c ++++ b/xen/arch/x86/physdev.c @@ -16,6 +16,10 @@ #include #include @@ -56,10 +63,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/physdev.c case PHYSDEVOP_restore_msi: { struct physdev_restore_msi restore_msi; struct pci_dev *pdev; -Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig.h -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig.h -+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig.h +--- a/xen/arch/x86/x86_64/mmconfig.h ++++ b/xen/arch/x86/x86_64/mmconfig.h @@ -84,6 +84,11 @@ extern int pci_mmcfg_config_num; extern struct acpi_mcfg_allocation *pci_mmcfg_config; @@ -73,10 +78,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig.h -void pci_mmcfg_arch_free(void); +int pci_mmcfg_arch_enable(unsigned int); +void pci_mmcfg_arch_disable(unsigned int); -Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig-shared.c -+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c +--- a/xen/arch/x86/x86_64/mmconfig-shared.c ++++ b/xen/arch/x86/x86_64/mmconfig-shared.c @@ -22,10 +22,10 @@ #include #include @@ -182,12 +185,12 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c - /* for late to exit */ - if (known_bridge) - return; +- +- if (pci_mmcfg_check_hostbridge()) +- known_bridge = 1; + if (pci_mmcfg_check_hostbridge()) { + unsigned int i; -- if (pci_mmcfg_check_hostbridge()) -- known_bridge = 1; -- - if (!known_bridge) { + pci_mmcfg_arch_init(); + for (i = 0; i < pci_mmcfg_config_num; ++i) @@ -244,10 +247,17 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig-shared.c } /** -Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig_64.c -+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c +--- a/xen/arch/x86/x86_64/mmconfig_64.c ++++ b/xen/arch/x86/x86_64/mmconfig_64.c +@@ -23,7 +23,7 @@ struct mmcfg_virt { + char __iomem *virt; + }; + static struct mmcfg_virt *pci_mmcfg_virt; +-static int __initdata mmcfg_pci_segment_shift; ++static unsigned int mmcfg_pci_segment_shift; + + static char __iomem *get_virt(unsigned int seg, unsigned int *bus) + { @@ -112,7 +112,8 @@ int pci_mmcfg_write(unsigned int seg, un return 0; } @@ -350,10 +360,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c - xfree(pci_mmcfg_virt); - pci_mmcfg_virt = NULL; -} -Index: xen-4.1.2-testing/xen/arch/x86/x86_64/physdev.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/physdev.c -+++ xen-4.1.2-testing/xen/arch/x86/x86_64/physdev.c +--- a/xen/arch/x86/x86_64/physdev.c ++++ b/xen/arch/x86/x86_64/physdev.c @@ -54,6 +54,10 @@ #define physdev_get_free_pirq compat_physdev_get_free_pirq #define physdev_get_free_pirq_t physdev_get_free_pirq_compat_t @@ -365,10 +373,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/x86_64/physdev.c #define COMPAT #undef guest_handle_okay #define guest_handle_okay compat_handle_okay -Index: xen-4.1.2-testing/xen/include/public/physdev.h -=================================================================== ---- xen-4.1.2-testing.orig/xen/include/public/physdev.h -+++ xen-4.1.2-testing/xen/include/public/physdev.h +--- a/xen/include/public/physdev.h ++++ b/xen/include/public/physdev.h @@ -255,6 +255,19 @@ struct physdev_get_free_pirq { typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); @@ -389,10 +395,8 @@ Index: xen-4.1.2-testing/xen/include/public/physdev.h /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** -Index: xen-4.1.2-testing/xen/include/xlat.lst -=================================================================== ---- xen-4.1.2-testing.orig/xen/include/xlat.lst -+++ xen-4.1.2-testing/xen/include/xlat.lst +--- a/xen/include/xlat.lst ++++ b/xen/include/xlat.lst @@ -60,6 +60,7 @@ ! memory_map memory.h ! memory_reservation memory.h diff --git a/23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch b/23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch index 0d3f175..a7464c4 100644 --- a/23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch +++ b/23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch @@ -25,7 +25,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c mfn_t mfn; int ret; -+ p2m_unlock(p2m); ++ p2m_lock(p2m); + mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query); diff --git a/24153-x86-emul-feature-checks.patch b/24153-x86-emul-feature-checks.patch new file mode 100644 index 0000000..7126f25 --- /dev/null +++ b/24153-x86-emul-feature-checks.patch @@ -0,0 +1,107 @@ +# HG changeset patch +# User Jan Beulich +# Date 1321459471 0 +# Node ID 644ca5d3ec435f3372ce88a4de86909bd4033819 +# Parent 1cbb3c1dfb3203f5344a6c1c52507b9e75af6742 +x86/emulator: add feature checks for newer instructions + +Certain instructions were introduced only after the i686 or original +x86-64 architecture, so we should not try to emulate them if the guest +is not seeing the respective feature enabled (or, worse, if the +underlying hardware doesn't support them). This affects fisttp, +movnti, and cmpxchg16b. + +Signed-off-by: Jan Beulich +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -955,6 +955,47 @@ in_protmode( + return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & EFLG_VM)); + } + ++#define EAX 0 ++#define ECX 1 ++#define EDX 2 ++#define EBX 3 ++ ++static bool_t vcpu_has( ++ unsigned int eax, ++ unsigned int reg, ++ unsigned int bit, ++ struct x86_emulate_ctxt *ctxt, ++ const struct x86_emulate_ops *ops) ++{ ++ unsigned int ebx = 0, ecx = 0, edx = 0; ++ int rc; ++ ++ fail_if(!ops->cpuid); ++ rc = ops->cpuid(&eax, &ebx, &ecx, &edx, ctxt); ++ if ( rc == X86EMUL_OKAY ) ++ { ++ switch ( reg ) ++ { ++ case EAX: reg = eax; break; ++ case EBX: reg = ebx; break; ++ case ECX: reg = ecx; break; ++ case EDX: reg = edx; break; ++ default: BUG(); ++ } ++ if ( !(reg & (1U << bit)) ) ++ rc = ~X86EMUL_OKAY; ++ } ++ ++ done: ++ return rc == X86EMUL_OKAY; ++} ++ ++#define vcpu_must_have(leaf, reg, bit) \ ++ generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1) ++#define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26) ++#define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0) ++#define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13) ++ + static int + in_longmode( + struct x86_emulate_ctxt *ctxt, +@@ -2738,6 +2779,7 @@ x86_emulate( + emulate_fpu_insn_memsrc("fildl", src.val); + break; + case 1: /* fisttp m32i */ ++ vcpu_must_have_sse3(); + ea.bytes = 4; + dst = ea; + dst.type = OP_MEM; +@@ -2846,6 +2888,7 @@ x86_emulate( + emulate_fpu_insn_memsrc("fldl", src.val); + break; + case 1: /* fisttp m64i */ ++ vcpu_must_have_sse3(); + ea.bytes = 8; + dst = ea; + dst.type = OP_MEM; +@@ -2953,6 +2996,7 @@ x86_emulate( + emulate_fpu_insn_memsrc("fild", src.val); + break; + case 1: /* fisttp m16i */ ++ vcpu_must_have_sse3(); + ea.bytes = 2; + dst = ea; + dst.type = OP_MEM; +@@ -4141,6 +4185,7 @@ x86_emulate( + + case 0xc3: /* movnti */ + /* Ignore the non-temporal hint for now. */ ++ vcpu_must_have_sse2(); + generate_exception_if(dst.bytes <= 2, EXC_UD, -1); + dst.val = src.val; + break; +@@ -4151,6 +4196,8 @@ x86_emulate( + + generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); + generate_exception_if(ea.type != OP_MEM, EXC_UD, -1); ++ if ( op_bytes == 8 ) ++ vcpu_must_have_cx16(); + op_bytes *= 2; + + /* Get actual old value. */ diff --git a/24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch b/24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch index 0f930e0..3b57079 100644 --- a/24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch +++ b/24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch @@ -9,8 +9,8 @@ Signed-off-by: Keir Fraser --- - xen/common/wait.c | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) + xen/common/wait.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) Index: xen-4.1.2-testing/xen/common/wait.c =================================================================== @@ -25,10 +25,16 @@ Index: xen-4.1.2-testing/xen/common/wait.c #endif }; -@@ -45,6 +45,15 @@ int init_waitqueue_vcpu(struct vcpu *v) +@@ -41,11 +41,19 @@ int init_waitqueue_vcpu(struct vcpu *v) + { + struct waitqueue_vcpu *wqv; + +- wqv = xmalloc(struct waitqueue_vcpu); ++ wqv = xzalloc(struct waitqueue_vcpu); if ( wqv == NULL ) return -ENOMEM; +- memset(wqv, 0, sizeof(*wqv)); +#ifdef CONFIG_X86 + wqv->stack = alloc_xenheap_page(); + if ( wqv->stack == NULL ) @@ -38,10 +44,10 @@ Index: xen-4.1.2-testing/xen/common/wait.c + } +#endif + - memset(wqv, 0, sizeof(*wqv)); INIT_LIST_HEAD(&wqv->list); wqv->vcpu = v; -@@ -63,6 +72,9 @@ void destroy_waitqueue_vcpu(struct vcpu + +@@ -63,6 +71,9 @@ void destroy_waitqueue_vcpu(struct vcpu return; BUG_ON(!list_empty(&wqv->list)); @@ -51,7 +57,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c xfree(wqv); v->waitqueue_vcpu = NULL; -@@ -115,7 +127,7 @@ static void __prepare_to_wait(struct wai +@@ -115,7 +126,7 @@ static void __prepare_to_wait(struct wai : "=S" (wqv->esp) : "c" (cpu_info), "D" (wqv->stack) : "memory" ); diff --git a/24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch b/24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch new file mode 100644 index 0000000..f368990 --- /dev/null +++ b/24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch @@ -0,0 +1,53 @@ +changeset: 24178:1f2a06dbbb69 +user: Keir Fraser +date: Tue Nov 22 15:35:26 2011 +0000 +files: xen/common/keyhandler.c +description: +debug: Add domain/vcpu pause_count info to 'd' key. + +Signed-off-by: Keir Fraser + + +--- + xen/common/keyhandler.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +Index: xen-4.1.2-testing/xen/common/keyhandler.c +=================================================================== +--- xen-4.1.2-testing.orig/xen/common/keyhandler.c ++++ xen-4.1.2-testing/xen/common/keyhandler.c +@@ -244,9 +244,10 @@ static void dump_domains(unsigned char k + unsigned int i; + printk("General information for domain %u:\n", d->domain_id); + cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask); +- printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d " +- "dirty_cpus=%s max_pages=%u\n", ++ printk(" refcnt=%d dying=%d pause_count=%d\n", + atomic_read(&d->refcnt), d->is_dying, ++ atomic_read(&d->pause_count)); ++ printk(" nr_pages=%d xenheap_pages=%d dirty_cpus=%s max_pages=%u\n", + d->tot_pages, d->xenheap_pages, tmpstr, d->max_pages); + printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-" + "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n", +@@ -270,17 +271,18 @@ static void dump_domains(unsigned char k + d->domain_id); + for_each_vcpu ( d, v ) + { +- printk(" VCPU%d: CPU%d [has=%c] flags=%lx poll=%d " ++ printk(" VCPU%d: CPU%d [has=%c] poll=%d " + "upcall_pend = %02x, upcall_mask = %02x ", + v->vcpu_id, v->processor, +- v->is_running ? 'T':'F', +- v->pause_flags, v->poll_evtchn, ++ v->is_running ? 'T':'F', v->poll_evtchn, + vcpu_info(v, evtchn_upcall_pending), + vcpu_info(v, evtchn_upcall_mask)); + cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask); + printk("dirty_cpus=%s ", tmpstr); + cpuset_print(tmpstr, sizeof(tmpstr), v->cpu_affinity); + printk("cpu_affinity=%s\n", tmpstr); ++ printk(" pause_count=%d pause_flags=%lx\n", ++ atomic_read(&v->pause_count), v->pause_flags); + arch_dump_vcpu_info(v); + periodic_timer_print(tmpstr, sizeof(tmpstr), v->periodic_period); + printk(" %s\n", tmpstr); diff --git a/24189-x86-p2m-pod-locking.patch b/24189-x86-p2m-pod-locking.patch new file mode 100644 index 0000000..dfc25ab --- /dev/null +++ b/24189-x86-p2m-pod-locking.patch @@ -0,0 +1,50 @@ +# HG changeset patch +# User Andres Lagar-Cavilla +# Date 1322148057 0 +# Node ID 7da681c490e0a8a2b3f1fb311d254dc7ce618a43 +# Parent b082fdc52ad7607d93b59148fb289aafe21f294b +x86/mm/p2m: fix pod locking + +The path p2m-lookup -> p2m-pt->get_entry -> 1GB PoD superpage -> +pod_demand_populate ends in the pod code performing a p2m_set_entry with +no locks held (in order to split the 1GB superpage into 512 2MB ones) + +Further, it calls p2m_unlock after that, which will break the spinlock. + +This patch attempts to fix that. + +Signed-off-by: Andres Lagar-Cavilla +Acked-by: George Dunlap +Acked-by: Tim Deegan +Committed-by: Tim Deegan + +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -1244,7 +1244,6 @@ p2m_pod_demand_populate(struct p2m_domai + set_p2m_entry(p2m, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9, + p2m_populate_on_demand, p2m->default_access); + audit_p2m(p2m, 1); +- p2m_unlock(p2m); + return 0; + } + +@@ -1602,7 +1601,8 @@ pod_retry_l3: + { + if ( q != p2m_query ) + { +- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) ) ++ if ( !p2m_pod_check_and_populate(p2m, gfn, ++ (l1_pgentry_t *) &l3e, 18, q) ) + goto pod_retry_l3; + } + else +@@ -1733,7 +1733,8 @@ static mfn_t p2m_gfn_to_mfn_current(stru + /* The read has succeeded, so we know that mapping exists */ + if ( q != p2m_query ) + { +- if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) ) ++ if ( !p2m_pod_check_and_populate(p2m, gfn, ++ (l1_pgentry_t *) &l3e, 18, q) ) + goto pod_retry_l3; + p2mt = p2m_invalid; + printk("%s: Allocate 1GB failed!\n", __func__); diff --git a/24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch b/24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch index 82685d7..4a9eca1 100644 --- a/24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch +++ b/24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch @@ -16,7 +16,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c =================================================================== --- xen-4.1.2-testing.orig/xen/common/wait.c +++ xen-4.1.2-testing/xen/common/wait.c -@@ -107,13 +107,16 @@ void wake_up(struct waitqueue_head *wq) +@@ -106,13 +106,16 @@ void wake_up(struct waitqueue_head *wq) static void __prepare_to_wait(struct waitqueue_vcpu *wqv) { char *cpu_info = (char *)get_cpu_info(); @@ -34,7 +34,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c "pop %%r15; pop %%r14; pop %%r13; pop %%r12; " "pop %%r11; pop %%r10; pop %%r9; pop %%r8; " "pop %%rbp; pop %%rdi; pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax" -@@ -121,13 +124,20 @@ static void __prepare_to_wait(struct wai +@@ -120,13 +123,20 @@ static void __prepare_to_wait(struct wai "push %%eax; push %%ebx; push %%ecx; push %%edx; push %%edi; " "push %%ebp; call 1f; " "1: mov 8(%%esp),%%edi; mov 16(%%esp),%%ecx; mov %%esp,%%esi; " @@ -58,7 +58,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c } static void __finish_wait(struct waitqueue_vcpu *wqv) -@@ -163,6 +173,7 @@ void prepare_to_wait(struct waitqueue_he +@@ -162,6 +172,7 @@ void prepare_to_wait(struct waitqueue_he struct vcpu *curr = current; struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu; diff --git a/24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch b/24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch index 909dfa6..26ce7d9 100644 --- a/24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch +++ b/24196-waitqueue_Reorder_prepare_to_wait_so_that_vcpu_is_definitely_on_the.patch @@ -21,7 +21,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c =================================================================== --- xen-4.1.2-testing.orig/xen/common/wait.c +++ xen-4.1.2-testing/xen/common/wait.c -@@ -108,6 +108,8 @@ static void __prepare_to_wait(struct wai +@@ -107,6 +107,8 @@ static void __prepare_to_wait(struct wai { char *cpu_info = (char *)get_cpu_info(); @@ -30,7 +30,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c asm volatile ( #ifdef CONFIG_X86_64 "push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; " -@@ -174,14 +176,13 @@ void prepare_to_wait(struct waitqueue_he +@@ -173,14 +175,13 @@ void prepare_to_wait(struct waitqueue_he struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu; ASSERT(!in_atomic()); diff --git a/24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch b/24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch index 44592b2..7639099 100644 --- a/24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch +++ b/24197-x86-waitqueue_Because_we_have_per-cpu_stacks_we_must_wake_up_on_teh.patch @@ -27,7 +27,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c #endif }; -@@ -107,9 +109,19 @@ void wake_up(struct waitqueue_head *wq) +@@ -106,9 +108,19 @@ void wake_up(struct waitqueue_head *wq) static void __prepare_to_wait(struct waitqueue_vcpu *wqv) { char *cpu_info = (char *)get_cpu_info(); @@ -47,7 +47,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c asm volatile ( #ifdef CONFIG_X86_64 "push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; " -@@ -145,6 +157,7 @@ static void __prepare_to_wait(struct wai +@@ -144,6 +156,7 @@ static void __prepare_to_wait(struct wai static void __finish_wait(struct waitqueue_vcpu *wqv) { wqv->esp = NULL; @@ -55,7 +55,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c } void check_wakeup_from_wait(void) -@@ -156,6 +169,20 @@ void check_wakeup_from_wait(void) +@@ -155,6 +168,20 @@ void check_wakeup_from_wait(void) if ( likely(wqv->esp == NULL) ) return; diff --git a/24231-waitqueue_Implement_wake_up_nroneall..patch b/24231-waitqueue_Implement_wake_up_nroneall..patch index c588f5d..817d90d 100644 --- a/24231-waitqueue_Implement_wake_up_nroneall..patch +++ b/24231-waitqueue_Implement_wake_up_nroneall..patch @@ -17,7 +17,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c =================================================================== --- xen-4.1.2-testing.orig/xen/common/wait.c +++ xen-4.1.2-testing/xen/common/wait.c -@@ -88,13 +88,13 @@ void init_waitqueue_head(struct waitqueu +@@ -87,13 +87,13 @@ void init_waitqueue_head(struct waitqueu INIT_LIST_HEAD(&wq->list); } @@ -33,7 +33,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c { wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list); list_del_init(&wqv->list); -@@ -104,6 +104,16 @@ void wake_up(struct waitqueue_head *wq) +@@ -103,6 +103,16 @@ void wake_up(struct waitqueue_head *wq) spin_unlock(&wq->lock); } diff --git a/24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch b/24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch index 6546df4..b233e98 100644 --- a/24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch +++ b/24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch @@ -19,7 +19,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c =================================================================== --- xen-4.1.2-testing.orig/xen/common/wait.c +++ xen-4.1.2-testing/xen/common/wait.c -@@ -88,6 +88,11 @@ void init_waitqueue_head(struct waitqueu +@@ -87,6 +87,11 @@ void init_waitqueue_head(struct waitqueu INIT_LIST_HEAD(&wq->list); } @@ -31,7 +31,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c void wake_up_nr(struct waitqueue_head *wq, unsigned int nr) { struct waitqueue_vcpu *wqv; -@@ -99,6 +104,7 @@ void wake_up_nr(struct waitqueue_head *w +@@ -98,6 +103,7 @@ void wake_up_nr(struct waitqueue_head *w wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list); list_del_init(&wqv->list); vcpu_unpause(wqv->vcpu); @@ -39,7 +39,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c } spin_unlock(&wq->lock); -@@ -219,6 +225,7 @@ void prepare_to_wait(struct waitqueue_he +@@ -218,6 +224,7 @@ void prepare_to_wait(struct waitqueue_he spin_lock(&wq->lock); list_add_tail(&wqv->list, &wq->list); vcpu_pause_nosync(curr); @@ -47,7 +47,7 @@ Index: xen-4.1.2-testing/xen/common/wait.c spin_unlock(&wq->lock); } -@@ -237,6 +244,7 @@ void finish_wait(struct waitqueue_head * +@@ -236,6 +243,7 @@ void finish_wait(struct waitqueue_head * { list_del_init(&wqv->list); vcpu_unpause(curr); diff --git a/24261-x86-cpuidle-Westmere-EX.patch b/24261-x86-cpuidle-Westmere-EX.patch new file mode 100644 index 0000000..337a00c --- /dev/null +++ b/24261-x86-cpuidle-Westmere-EX.patch @@ -0,0 +1,23 @@ +# HG changeset patch +# User Jan Beulich +# Date 1322645021 -3600 +# Node ID 64088ba60263d3a623851b43a872c93c71cc3cbf +# Parent df7cec2c6c03f07932555954948ce7c8d09e88f4 +x86/cpuidle: add Westmere-EX support to hw residencies reading logic + +This is in accordance with +http://software.intel.com/en-us/articles/intel-processor-identification-with-cpuid-model-and-family-numbers/ + +Signed-off-by: Jan Beulich +Acked-by: Haitao Shan + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -113,6 +113,7 @@ static void do_get_hw_residencies(void * + /* Westmere */ + case 0x25: + case 0x2C: ++ case 0x2F: + GET_PC3_RES(hw_res->pc3); + GET_PC6_RES(hw_res->pc6); + GET_PC7_RES(hw_res->pc7); diff --git a/24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch b/24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch index c803449..5fa7cf0 100644 --- a/24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch +++ b/24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch @@ -25,10 +25,8 @@ Committed-by: Keir Fraser xen/include/xen/sched.h | 18 ++++++++++++------ 6 files changed, 34 insertions(+), 23 deletions(-) -Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c -+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c @@ -3909,7 +3909,7 @@ static int hvm_memory_event_traps(long p if ( (p & HVMPME_onchangeonly) && (value == old) ) return 1; @@ -47,10 +45,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c return 1; } -Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c -+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c +--- a/xen/arch/x86/mm/mem_event.c ++++ b/xen/arch/x86/mm/mem_event.c @@ -252,7 +252,7 @@ int mem_event_domctl(struct domain *d, x { case XEN_DOMCTL_MEM_EVENT_OP_PAGING: @@ -78,10 +74,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c } break; -Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_sharing.c -+++ xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c +--- a/xen/arch/x86/mm/mem_sharing.c ++++ b/xen/arch/x86/mm/mem_sharing.c @@ -322,12 +322,12 @@ static struct page_info* mem_sharing_all req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; } @@ -106,11 +100,9 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_sharing.c /* Unpause domain/vcpu */ if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) -Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/mm/p2m.c -+++ xen-4.1.2-testing/xen/arch/x86/mm/p2m.c -@@ -2995,7 +2995,7 @@ void p2m_mem_paging_drop_page(struct p2m +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -2996,7 +2996,7 @@ void p2m_mem_paging_drop_page(struct p2m struct domain *d = p2m->domain; /* Check that there's space on the ring for this request */ @@ -119,7 +111,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c { /* Send release notification to pager */ memset(&req, 0, sizeof(req)); -@@ -3003,7 +3003,7 @@ void p2m_mem_paging_drop_page(struct p2m +@@ -3004,7 +3004,7 @@ void p2m_mem_paging_drop_page(struct p2m req.gfn = gfn; req.vcpu_id = v->vcpu_id; @@ -128,7 +120,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c } } -@@ -3038,7 +3038,7 @@ void p2m_mem_paging_populate(struct p2m_ +@@ -3039,7 +3039,7 @@ void p2m_mem_paging_populate(struct p2m_ struct domain *d = p2m->domain; /* Check that there's space on the ring for this request */ @@ -137,7 +129,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c return; memset(&req, 0, sizeof(req)); -@@ -3069,7 +3069,7 @@ void p2m_mem_paging_populate(struct p2m_ +@@ -3070,7 +3070,7 @@ void p2m_mem_paging_populate(struct p2m_ else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged ) { /* gfn is already on its way back and vcpu is not paused */ @@ -146,7 +138,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c return; } -@@ -3078,7 +3078,7 @@ void p2m_mem_paging_populate(struct p2m_ +@@ -3079,7 +3079,7 @@ void p2m_mem_paging_populate(struct p2m_ req.p2mt = p2mt; req.vcpu_id = v->vcpu_id; @@ -155,7 +147,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c } /** -@@ -3156,7 +3156,7 @@ void p2m_mem_paging_resume(struct p2m_do +@@ -3157,7 +3157,7 @@ void p2m_mem_paging_resume(struct p2m_do mfn_t mfn; /* Pull the response off the ring */ @@ -164,7 +156,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c /* Fix p2m entry if the page was not dropped */ if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) ) -@@ -3209,7 +3209,7 @@ void p2m_mem_access_check(unsigned long +@@ -3210,7 +3210,7 @@ void p2m_mem_access_check(unsigned long p2m_unlock(p2m); /* Otherwise, check if there is a memory event listener, and send the message along */ @@ -173,7 +165,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c if ( res < 0 ) { /* No listener */ -@@ -3253,7 +3253,7 @@ void p2m_mem_access_check(unsigned long +@@ -3254,7 +3254,7 @@ void p2m_mem_access_check(unsigned long req.vcpu_id = v->vcpu_id; @@ -182,7 +174,7 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c /* VCPU paused, mem event request sent */ } -@@ -3263,7 +3263,7 @@ void p2m_mem_access_resume(struct p2m_do +@@ -3264,7 +3264,7 @@ void p2m_mem_access_resume(struct p2m_do struct domain *d = p2m->domain; mem_event_response_t rsp; @@ -191,10 +183,8 @@ Index: xen-4.1.2-testing/xen/arch/x86/mm/p2m.c /* Unpause domain */ if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) -Index: xen-4.1.2-testing/xen/common/domain.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/common/domain.c -+++ xen-4.1.2-testing/xen/common/domain.c +--- a/xen/common/domain.c ++++ b/xen/common/domain.c @@ -298,6 +298,10 @@ struct domain *domain_create( init_status |= INIT_gnttab; @@ -214,10 +204,8 @@ Index: xen-4.1.2-testing/xen/common/domain.c if ( init_status & INIT_arch ) arch_domain_destroy(d); if ( init_status & INIT_gnttab ) -Index: xen-4.1.2-testing/xen/include/xen/sched.h -=================================================================== ---- xen-4.1.2-testing.orig/xen/include/xen/sched.h -+++ xen-4.1.2-testing/xen/include/xen/sched.h +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h @@ -201,6 +201,16 @@ struct mem_event_domain int xen_port; }; diff --git a/24275-x86-emul-lzcnt.patch b/24275-x86-emul-lzcnt.patch new file mode 100644 index 0000000..62c51e2 --- /dev/null +++ b/24275-x86-emul-lzcnt.patch @@ -0,0 +1,88 @@ +# HG changeset patch +# User Jan Beulich +# Date 1322725849 -3600 +# Node ID 76ea126f21724b72c120aff59460f7bbe9e6960d +# Parent 07cf778d517fdf661a34027af653a489489bf222 +x86/emulator: properly handle lzcnt and tzcnt + +These instructions are prefix selected flavors of bsf and bsr +respectively, and hence the presences of the F3 prefix must be handled +in the emulation code in order to avoid running into problems on newer +CPUs. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -990,6 +990,9 @@ static bool_t vcpu_has( + return rc == X86EMUL_OKAY; + } + ++#define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX, 5, ctxt, ops) ++#define vcpu_has_bmi1() vcpu_has(0x00000007, EBX, 3, ctxt, ops) ++ + #define vcpu_must_have(leaf, reg, bit) \ + generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1) + #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26) +@@ -4114,13 +4117,24 @@ x86_emulate( + dst.val = (uint8_t)src.val; + break; + +- case 0xbc: /* bsf */ { +- int zf; ++ case 0xbc: /* bsf or tzcnt */ { ++ bool_t zf; + asm ( "bsf %2,%0; setz %b1" + : "=r" (dst.val), "=q" (zf) +- : "r" (src.val), "1" (0) ); ++ : "r" (src.val) ); + _regs.eflags &= ~EFLG_ZF; +- if ( zf ) ++ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_bmi1() ) ++ { ++ _regs.eflags &= ~EFLG_CF; ++ if ( zf ) ++ { ++ _regs.eflags |= EFLG_CF; ++ dst.val = op_bytes * 8; ++ } ++ else if ( !dst.val ) ++ _regs.eflags |= EFLG_ZF; ++ } ++ else if ( zf ) + { + _regs.eflags |= EFLG_ZF; + dst.type = OP_NONE; +@@ -4128,13 +4142,28 @@ x86_emulate( + break; + } + +- case 0xbd: /* bsr */ { +- int zf; ++ case 0xbd: /* bsr or lzcnt */ { ++ bool_t zf; + asm ( "bsr %2,%0; setz %b1" + : "=r" (dst.val), "=q" (zf) +- : "r" (src.val), "1" (0) ); ++ : "r" (src.val) ); + _regs.eflags &= ~EFLG_ZF; +- if ( zf ) ++ if ( (rep_prefix == REPE_PREFIX) && vcpu_has_lzcnt() ) ++ { ++ _regs.eflags &= ~EFLG_CF; ++ if ( zf ) ++ { ++ _regs.eflags |= EFLG_CF; ++ dst.val = op_bytes * 8; ++ } ++ else ++ { ++ dst.val = op_bytes * 8 - 1 - dst.val; ++ if ( !dst.val ) ++ _regs.eflags |= EFLG_ZF; ++ } ++ } ++ else if ( zf ) + { + _regs.eflags |= EFLG_ZF; + dst.type = OP_NONE; diff --git a/24277-x86-dom0-features.patch b/24277-x86-dom0-features.patch new file mode 100644 index 0000000..0c8cd2a --- /dev/null +++ b/24277-x86-dom0-features.patch @@ -0,0 +1,58 @@ +# HG changeset patch +# User Liu, Jinsong +# Date 1322738484 -3600 +# Node ID 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317 +# Parent 89f7273681696022cc44db4f2ec5b22560482869 +X86: expose Intel new features to dom0 + +This patch expose Intel new features to dom0, including +FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE. + +Signed-off-by: Liu, Jinsong +Committed-by: Jan Beulich + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -848,8 +848,11 @@ static void pv_cpuid(struct cpu_user_reg + break; + case 7: + if ( regs->ecx == 0 ) +- b &= (cpufeat_mask(X86_FEATURE_FSGSBASE) | +- cpufeat_mask(X86_FEATURE_ERMS)); ++ b &= (cpufeat_mask(X86_FEATURE_BMI1) | ++ cpufeat_mask(X86_FEATURE_AVX2) | ++ cpufeat_mask(X86_FEATURE_BMI2) | ++ cpufeat_mask(X86_FEATURE_ERMS) | ++ cpufeat_mask(X86_FEATURE_FSGSBASE)); + else + b = 0; + a = c = d = 0; +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -93,6 +93,7 @@ + #define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ + #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental Streaming SIMD Extensions-3 */ + #define X86_FEATURE_CID (4*32+10) /* Context ID */ ++#define X86_FEATURE_FMA (4*32+12) /* Fused Multiply Add */ + #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ + #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ + #define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */ +@@ -100,6 +101,7 @@ + #define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */ + #define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */ + #define X86_FEATURE_X2APIC (4*32+21) /* Extended xAPIC */ ++#define X86_FEATURE_MOVBE (4*32+22) /* movbe instruction */ + #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ + #define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer */ + #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +@@ -144,7 +146,10 @@ + + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */ + #define X86_FEATURE_FSGSBASE (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */ ++#define X86_FEATURE_BMI1 (7*32+ 3) /* 1st bit manipulation extensions */ ++#define X86_FEATURE_AVX2 (7*32+ 5) /* AVX2 instructions */ + #define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution Protection */ ++#define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */ + #define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */ + + #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) diff --git a/24278-x86-dom0-no-PCID.patch b/24278-x86-dom0-no-PCID.patch new file mode 100644 index 0000000..e3ff165 --- /dev/null +++ b/24278-x86-dom0-no-PCID.patch @@ -0,0 +1,49 @@ +# HG changeset patch +# User Liu, Jinsong +# Date 1322738563 -3600 +# Node ID d9cb04ed55398ea4043c85573460afaf023aa1e9 +# Parent 1f6b58c8e1ba8d27dfb97f0da96d18d3ad163317 +X86: Disable PCID/INVPCID for dom0 + +PCID (Process-context identifier) is a facility by which a logical +processor may cache information for multiple linear-address spaces. +INVPCID is an new instruction to invalidate TLB. Refer latest Intel SDM +http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html + +We disable PCID/INVPCID for dom0 and pv. Exposing them into dom0 and pv +may result in performance regression, and it would trigger GP or UD +depending on whether platform suppport INVPCID or not. + +This patch disables PCID/INVPCID for dom0. + +Signed-off-by: Liu, Jinsong +Committed-by: Jan Beulich + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -836,6 +836,7 @@ static void pv_cpuid(struct cpu_user_reg + __clear_bit(X86_FEATURE_CX16 % 32, &c); + __clear_bit(X86_FEATURE_XTPR % 32, &c); + __clear_bit(X86_FEATURE_PDCM % 32, &c); ++ __clear_bit(X86_FEATURE_PCID % 32, &c); + __clear_bit(X86_FEATURE_DCA % 32, &c); + if ( !xsave_enabled(current) ) + { +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -97,6 +97,7 @@ + #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ + #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ + #define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capability MSR */ ++#define X86_FEATURE_PCID (4*32+17) /* Process Context ID */ + #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ + #define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */ + #define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */ +@@ -151,6 +152,7 @@ + #define X86_FEATURE_SMEP (7*32+ 7) /* Supervisor Mode Execution Protection */ + #define X86_FEATURE_BMI2 (7*32+ 8) /* 2nd bit manipulation extensions */ + #define X86_FEATURE_ERMS (7*32+ 9) /* Enhanced REP MOVSB/STOSB */ ++#define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */ + + #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) + #define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) diff --git a/24282-x86-log-dirty-bitmap-leak.patch b/24282-x86-log-dirty-bitmap-leak.patch new file mode 100644 index 0000000..78432f5 --- /dev/null +++ b/24282-x86-log-dirty-bitmap-leak.patch @@ -0,0 +1,38 @@ +# HG changeset patch +# User Tim Deegan +# Date 1322749036 0 +# Node ID a06cda9fb25f2d7b7b5c7da170813e4a8bb0cd67 +# Parent 75f4e4d9f039ea656051e6dfd73e40d4cb32896b +x86/mm: Don't lose track of the log dirty bitmap + +hap_log_dirty_init unconditionally sets the top of the log dirty +bitmap to INVALID_MFN. If there had been a bitmap allocated, it is +then leaked, and the host crashes on an ASSERT when the domain is +cleaned up. + +Signed-off-by: Tim Deegan +Acked-by: Andres Lagar-Cavilla +Committed-by: Tim Deegan + +--- a/xen/arch/x86/mm/paging.c ++++ b/xen/arch/x86/mm/paging.c +@@ -665,7 +665,6 @@ void paging_log_dirty_init(struct domain + d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty; + d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty; + d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap; +- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); + } + + /* This function fress log dirty bitmap resources. */ +@@ -686,6 +685,11 @@ int paging_domain_init(struct domain *d, + if ( (rc = p2m_init(d)) != 0 ) + return rc; + ++ /* This must be initialized separately from the rest of the ++ * log-dirty init code as that can be called more than once and we ++ * don't want to leak any active log-dirty bitmaps */ ++ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN); ++ + /* The order of the *_init calls below is important, as the later + * ones may rewrite some common fields. Shadow pagetables are the + * default... */ diff --git a/24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch b/24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch new file mode 100644 index 0000000..6a80d5b --- /dev/null +++ b/24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch @@ -0,0 +1,139 @@ +changeset: 24327:8529bca7a3f0 +parent: 24322:6bac46816504 +user: Andres Lagar-Cavilla +date: Thu Dec 01 18:14:24 2011 +0000 +files: xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_paging.c xen/arch/x86/mm/p2m.c xen/include/asm-x86/p2m.h xen/include/public/domctl.h +description: +After preparing a page for page-in, allow immediate fill-in of the page contents + +p2m_mem_paging_prep ensures that an mfn is backing the paged-out gfn, and +transitions to the next state in the paging state machine for that page. +Foreign mappings of the gfn will now succeed. This is the key idea, as +it allows the pager to now map the gfn and fill in its contents. + +Unfortunately, it also allows any other foreign mapper to map the gfn and read +its contents. This is particularly dangerous when the populate is launched +by a foreign mapper in the first place, which will be actively retrying the +map operation and might race with the pager. Qemu-dm being a prime example. + +Fix the race by allowing a buffer to be optionally passed in the prep +operation, and having the hypervisor memcpy from that buffer into the newly +prepped page before promoting the gfn type. + +Signed-off-by: Andres Lagar-Cavilla +Acked-by: Tim Deegan +Committed-by: Tim Deegan + + +--- + xen/arch/x86/mm/mem_event.c | 2 +- + xen/arch/x86/mm/mem_paging.c | 2 +- + xen/arch/x86/mm/p2m.c | 32 ++++++++++++++++++++++++++++++-- + xen/include/asm-x86/p2m.h | 2 +- + xen/include/public/domctl.h | 8 ++++++-- + 5 files changed, 39 insertions(+), 7 deletions(-) + +--- a/xen/arch/x86/mm/mem_event.c ++++ b/xen/arch/x86/mm/mem_event.c +@@ -45,7 +45,7 @@ static int mem_event_enable(struct domai + struct domain *dom_mem_event = current->domain; + struct vcpu *v = current; + unsigned long ring_addr = mec->ring_addr; +- unsigned long shared_addr = mec->shared_addr; ++ unsigned long shared_addr = mec->u.shared_addr; + l1_pgentry_t l1e; + unsigned long gfn; + p2m_type_t p2mt; +--- a/xen/arch/x86/mm/mem_paging.c ++++ b/xen/arch/x86/mm/mem_paging.c +@@ -50,7 +50,7 @@ int mem_paging_domctl(struct domain *d, + case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP: + { + unsigned long gfn = mec->gfn; +- rc = p2m_mem_paging_prep(p2m, gfn); ++ rc = p2m_mem_paging_prep(p2m, gfn, mec->u.buffer); + } + break; + +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -3093,13 +3093,20 @@ void p2m_mem_paging_populate(struct p2m_ + * mfn if populate was called for gfn which was nominated but not evicted. In + * this case only the p2mt needs to be forwarded. + */ +-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn) ++int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t buffer) + { + struct page_info *page; + p2m_type_t p2mt; + p2m_access_t a; + mfn_t mfn; +- int ret; ++ int ret, page_extant = 1; ++ const void *user_ptr = (const void *) buffer; ++ ++ if ( user_ptr ) ++ /* Sanity check the buffer and bail out early if trouble */ ++ if ( (buffer & (PAGE_SIZE - 1)) || ++ (!access_ok(user_ptr, PAGE_SIZE)) ) ++ return -EINVAL; + + p2m_lock(p2m); + +@@ -3119,6 +3126,28 @@ int p2m_mem_paging_prep(struct p2m_domai + if ( unlikely(page == NULL) ) + goto out; + mfn = page_to_mfn(page); ++ page_extant = 0; ++ } ++ ++ /* If we were given a buffer, now is the time to use it */ ++ if ( !page_extant && user_ptr ) ++ { ++ void *guest_map; ++ int rc; ++ ++ ASSERT( mfn_valid(mfn) ); ++ guest_map = map_domain_page(mfn_x(mfn)); ++ rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE); ++ unmap_domain_page(guest_map); ++ if ( rc ) ++ { ++ gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u " ++ "bytes left %d\n", ++ gfn, p2m->domain->domain_id, rc); ++ ret = -EFAULT; ++ put_page(page); /* Don't leak pages */ ++ goto out; ++ } + } + + /* Fix p2m mapping */ +--- a/xen/include/asm-x86/p2m.h ++++ b/xen/include/asm-x86/p2m.h +@@ -524,7 +524,7 @@ void p2m_mem_paging_drop_page(struct p2m + /* Start populating a paged out frame */ + void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn); + /* Prepare the p2m for paging a frame in */ +-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn); ++int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn, uint64_t buffer); + /* Resume normal operation (in case a domain was paused) */ + void p2m_mem_paging_resume(struct p2m_domain *p2m); + #else +--- a/xen/include/public/domctl.h ++++ b/xen/include/public/domctl.h +@@ -741,8 +741,12 @@ struct xen_domctl_mem_event_op { + uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ + uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + +- /* OP_ENABLE */ +- uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */ ++ union { ++ /* OP_ENABLE IN: Virtual address of shared page */ ++ uint64_aligned_t shared_addr; ++ /* PAGING_PREP IN: buffer to immediately fill page in */ ++ uint64_aligned_t buffer; ++ } u; + uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */ + + /* Other OPs */ diff --git a/24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch b/24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch new file mode 100644 index 0000000..b4f6ede --- /dev/null +++ b/24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch @@ -0,0 +1,86 @@ +changeset: 24328:8ad47b48047d +user: Andres Lagar-Cavilla +date: Thu Dec 01 18:14:24 2011 +0000 +files: tools/libxc/xc_mem_event.c tools/libxc/xc_mem_paging.c tools/libxc/xenctrl.h +description: +Tools: Libxc wrappers to automatically fill in page oud page contents on prepare +Signed-off-by: Andres Lagar-Cavilla +Acked-by: Ian Jackson +Committed-by: Tim Deegan + + +--- + tools/libxc/xc_mem_event.c | 4 ++-- + tools/libxc/xc_mem_paging.c | 23 +++++++++++++++++++++++ + tools/libxc/xenctrl.h | 2 ++ + 3 files changed, 27 insertions(+), 2 deletions(-) + +Index: xen-4.1.2-testing/tools/libxc/xc_mem_event.c +=================================================================== +--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_event.c ++++ xen-4.1.2-testing/tools/libxc/xc_mem_event.c +@@ -24,7 +24,7 @@ + #include "xc_private.h" + + int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op, +- unsigned int mode, void *shared_page, ++ unsigned int mode, void *page, + void *ring_page, unsigned long gfn) + { + DECLARE_DOMCTL; +@@ -34,7 +34,7 @@ int xc_mem_event_control(xc_interface *x + domctl.u.mem_event_op.op = op; + domctl.u.mem_event_op.mode = mode; + +- domctl.u.mem_event_op.shared_addr = (unsigned long)shared_page; ++ domctl.u.mem_event_op.u.shared_addr = (unsigned long)page; + domctl.u.mem_event_op.ring_addr = (unsigned long)ring_page; + + domctl.u.mem_event_op.gfn = gfn; +Index: xen-4.1.2-testing/tools/libxc/xc_mem_paging.c +=================================================================== +--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_paging.c ++++ xen-4.1.2-testing/tools/libxc/xc_mem_paging.c +@@ -65,6 +65,29 @@ int xc_mem_paging_prep(xc_interface *xch + NULL, NULL, gfn); + } + ++int xc_mem_paging_load(xc_interface *xch, domid_t domain_id, ++ unsigned long gfn, void *buffer) ++{ ++ int rc; ++ ++ if ( !buffer ) ++ return -EINVAL; ++ ++ if ( ((unsigned long) buffer) & (XC_PAGE_SIZE - 1) ) ++ return -EINVAL; ++ ++ if ( mlock(buffer, XC_PAGE_SIZE) ) ++ return -errno; ++ ++ rc = xc_mem_event_control(xch, domain_id, ++ XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP, ++ XEN_DOMCTL_MEM_EVENT_OP_PAGING, ++ buffer, NULL, gfn); ++ ++ (void)munlock(buffer, XC_PAGE_SIZE); ++ return rc; ++} ++ + int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id, unsigned long gfn) + { + return xc_mem_event_control(xch, domain_id, +Index: xen-4.1.2-testing/tools/libxc/xenctrl.h +=================================================================== +--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h ++++ xen-4.1.2-testing/tools/libxc/xenctrl.h +@@ -1742,6 +1742,8 @@ int xc_mem_paging_nominate(xc_interface + unsigned long gfn); + int xc_mem_paging_evict(xc_interface *xch, domid_t domain_id, unsigned long gfn); + int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, unsigned long gfn); ++int xc_mem_paging_load(xc_interface *xch, domid_t domain_id, ++ unsigned long gfn, void *buffer); + int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id, + unsigned long gfn); + diff --git a/24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch b/24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch new file mode 100644 index 0000000..d314934 --- /dev/null +++ b/24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch @@ -0,0 +1,100 @@ +changeset: 24329:a8f5faa127c4 +user: Andres Lagar-Cavilla +date: Thu Dec 01 18:14:24 2011 +0000 +files: tools/xenpaging/xenpaging.c +description: +Teach xenpaging to use the new and non-racy xc_mem_paging_load interface + +Signed-off-by: Andres Lagar-Cavilla +Acked-by: Olaf Hering +Committed-by: Tim Deegan + + +--- + tools/xenpaging/xenpaging.c | 43 +++++++++++++++++++++---------------------- + 1 file changed, 21 insertions(+), 22 deletions(-) + +Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c +=================================================================== +--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c +@@ -45,6 +45,7 @@ static char *dom_path; + static char watch_token[16]; + static char *filename; + static int interrupted; ++static void *paging_buffer = NULL; + + static void unlink_pagefile(void) + { +@@ -438,6 +439,13 @@ static xenpaging_t *xenpaging_init(int a + goto err; + } + ++ paging_buffer = init_page(); ++ if ( !paging_buffer ) ++ { ++ ERROR("Creating page aligned load buffer"); ++ goto err; ++ } ++ + return paging; + + err: +@@ -649,10 +657,20 @@ static int xenpaging_populate_page(xenpa + unsigned char oom = 0; + + DPRINTF("populate_page < gfn %"PRI_xen_pfn" pageslot %d\n", gfn, i); ++ ++ /* Read page */ ++ ret = read_page(fd, paging_buffer, i); ++ if ( ret != 0 ) ++ { ++ ERROR("Error reading page"); ++ goto out; ++ } ++ + do + { + /* Tell Xen to allocate a page for the domain */ +- ret = xc_mem_paging_prep(xch, paging->mem_event.domain_id, gfn); ++ ret = xc_mem_paging_load(xch, paging->mem_event.domain_id, gfn, ++ paging_buffer); + if ( ret != 0 ) + { + if ( errno == ENOMEM ) +@@ -662,33 +680,14 @@ static int xenpaging_populate_page(xenpa + sleep(1); + continue; + } +- PERROR("Error preparing %"PRI_xen_pfn" for page-in", gfn); +- goto out_map; ++ PERROR("Error loading %"PRI_xen_pfn" during page-in", gfn); ++ goto out; + } + } + while ( ret && !interrupted ); + +- /* Map page */ +- ret = -EFAULT; +- page = xc_map_foreign_pages(xch, paging->mem_event.domain_id, +- PROT_READ | PROT_WRITE, &gfn, 1); +- if ( page == NULL ) +- { +- PERROR("Error mapping page %"PRI_xen_pfn": page is null", gfn); +- goto out_map; +- } +- +- /* Read page */ +- ret = read_page(fd, page, i); +- if ( ret != 0 ) +- { +- PERROR("Error reading page %"PRI_xen_pfn"", gfn); +- goto out; +- } + + out: +- munmap(page, PAGE_SIZE); +- out_map: + return ret; + } + diff --git a/24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch b/24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch deleted file mode 100644 index 939ba2b..0000000 --- a/24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch +++ /dev/null @@ -1,27 +0,0 @@ -changeset: 24341:60d4e257d04b -user: Jan Beulich -date: Fri Dec 02 09:05:26 2011 +0100 -files: xen/arch/x86/x86_64/mmconfig_64.c -description: -x86-64/mmcfg: remove __initdata annotation overlooked in 23749:e8d1c8f074ba - -Signed-off-by: Jan Beulich - - ---- - xen/arch/x86/x86_64/mmconfig_64.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -Index: xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/x86_64/mmconfig_64.c -+++ xen-4.1.2-testing/xen/arch/x86/x86_64/mmconfig_64.c -@@ -23,7 +23,7 @@ struct mmcfg_virt { - char __iomem *virt; - }; - static struct mmcfg_virt *pci_mmcfg_virt; --static int __initdata mmcfg_pci_segment_shift; -+static unsigned int mmcfg_pci_segment_shift; - - static char __iomem *get_virt(unsigned int seg, unsigned int *bus) - { diff --git a/24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch b/24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch index a20da91..ec2ad0a 100644 --- a/24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch +++ b/24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch @@ -1,8 +1,8 @@ -changeset: 24344:72f4e4cb7440 -user: Keir Fraser -date: Fri Dec 02 06:31:14 2011 -0800 -files: tools/libxc/xc_cpuid_x86.c tools/misc/xen-detect.c -description: +# HG changeset patch +# User Keir Fraser +# Date 1322836274 28800 +# Node ID 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac +# Parent 109b99239b21275ee2249873dcdb9a413741142d tools/x86_64: Fix cpuid() inline asm to not clobber stack's red zone Pushing stuff onto the stack on x86-64 when we do not specify @@ -13,16 +13,17 @@ for x86-64. Signed-off-by: Keir Fraser Acked-by: Jan Beulich +# HG changeset patch +# User Keir Fraser +# Date 1322844002 28800 +# Node ID 491c3ebf1d371d03fdd0aabe82b0f422037c67ba +# Parent 72f4e4cb7440c6ab64d4c08dfdc3158112cc95ac +tools/libxc: Fix x86_32 build breakage in previous changeset. ---- - tools/libxc/xc_cpuid_x86.c | 18 +++++++++--------- - tools/misc/xen-detect.c | 17 ++++++++++------- - 2 files changed, 19 insertions(+), 16 deletions(-) +Signed-off-by: Keir Fraser -Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c -=================================================================== ---- xen-4.1.2-testing.orig/tools/libxc/xc_cpuid_x86.c -+++ xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c +--- a/tools/libxc/xc_cpuid_x86.c ++++ b/tools/libxc/xc_cpuid_x86.c @@ -42,23 +42,23 @@ static int hypervisor_is_64bit(xc_interf static void cpuid(const unsigned int *input, unsigned int *regs) { @@ -44,8 +45,7 @@ Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c - "pop %%rdx; pop %%rbx\n\t" -#endif : "=a" (regs[0]), "=c" (regs[2]) -- : "0" (input[0]), "1" (count), "S" (regs) -+ : "0" (input[0]), "1" (count), "S" (_regs) + : "0" (input[0]), "1" (count), "S" (regs) : "memory" ); +#else + asm ( @@ -56,10 +56,8 @@ Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c } /* Get the manufacturer brand name of the host processor. */ -Index: xen-4.1.2-testing/tools/misc/xen-detect.c -=================================================================== ---- xen-4.1.2-testing.orig/tools/misc/xen-detect.c -+++ xen-4.1.2-testing/tools/misc/xen-detect.c +--- a/tools/misc/xen-detect.c ++++ b/tools/misc/xen-detect.c @@ -35,18 +35,21 @@ static void cpuid(uint32_t idx, uint32_t *regs, int pv_context) diff --git a/24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch b/24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch deleted file mode 100644 index 1e76459..0000000 --- a/24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch +++ /dev/null @@ -1,28 +0,0 @@ -changeset: 24345:491c3ebf1d37 -tag: tip -user: Keir Fraser -date: Fri Dec 02 08:40:02 2011 -0800 -files: tools/libxc/xc_cpuid_x86.c -description: -tools/libxc: Fix x86_32 build breakage in previous changeset. - -Signed-off-by: Keir Fraser - - ---- - tools/libxc/xc_cpuid_x86.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -Index: xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c -=================================================================== ---- xen-4.1.2-testing.orig/tools/libxc/xc_cpuid_x86.c -+++ xen-4.1.2-testing/tools/libxc/xc_cpuid_x86.c -@@ -51,7 +51,7 @@ static void cpuid(const unsigned int *in - "mov %%edx,12(%4)\n\t" - "pop %%edx; pop %%ebx\n\t" - : "=a" (regs[0]), "=c" (regs[2]) -- : "0" (input[0]), "1" (count), "S" (_regs) -+ : "0" (input[0]), "1" (count), "S" (regs) - : "memory" ); - #else - asm ( diff --git a/24357-firmware-no-_PS0-_PS3.patch b/24357-firmware-no-_PS0-_PS3.patch new file mode 100644 index 0000000..1392d7b --- /dev/null +++ b/24357-firmware-no-_PS0-_PS3.patch @@ -0,0 +1,49 @@ +References: bnc#711219 + +# HG changeset patch +# User Xudong Hao +# Date 1323113706 0 +# Node ID 832fa3f3543298a7125cd5f996d1e28dd7ba47b1 +# Parent 60ea36c0512b779f291bb6c007e1f05c16054ec2 +tools/firmware: remove "_PS0/3" Method + +Do not expose the ACPI power management "_PS0/3" Method to guest +firmware. According to section 3.4 of the APCI specification 4.0, PCI +device control the device power through its own specification but not +through APCI. + +Qemu pushes "_PS0/3" to guest will cause a mess between ACPI PM and +PCI PM as a result of incorrect ACPI table shipped with the guest +BIOS, it may cause a failure of PCI device PM state transition(from +PCI_UNKNOWN to PCI_D0). + +Signed-off-by: Xudong Hao +Signed-off-by: Haitao Shan +Committed-by: Keir Fraser + +--- a/tools/firmware/hvmloader/acpi/mk_dsdt.c ++++ b/tools/firmware/hvmloader/acpi/mk_dsdt.c +@@ -251,8 +251,6 @@ int main(int argc, char **argv) + * the ACPI event: + * _EJ0: eject a device + * _STA: return a device's status, e.g. enabled or removed +- * Other methods are optional: +- * _PS0/3: put them here for debug purpose + * + * Eject button would generate a general-purpose event, then the + * control method for this event uses Notify() to inform OSPM which +@@ -271,14 +269,6 @@ int main(int argc, char **argv) + stmt("Name", "_ADR, 0x%08x", ((slot & ~7) << 13) | (slot & 7)); + /* _SUN == dev */ + stmt("Name", "_SUN, 0x%08x", slot >> 3); +- push_block("Method", "_PS0, 0"); +- stmt("Store", "0x%02x, \\_GPE.DPT1", slot); +- stmt("Store", "0x80, \\_GPE.DPT2"); +- pop_block(); +- push_block("Method", "_PS3, 0"); +- stmt("Store", "0x%02x, \\_GPE.DPT1", slot); +- stmt("Store", "0x83, \\_GPE.DPT2"); +- pop_block(); + push_block("Method", "_EJ0, 1"); + stmt("Store", "0x%02x, \\_GPE.DPT1", slot); + stmt("Store", "0x88, \\_GPE.DPT2"); diff --git a/24358-kexec-compat-overflow.patch b/24358-kexec-compat-overflow.patch new file mode 100644 index 0000000..4fd6985 --- /dev/null +++ b/24358-kexec-compat-overflow.patch @@ -0,0 +1,29 @@ +# HG changeset patch +# User Andrew Cooper +# Date 1323114166 0 +# Node ID 9961a6d5356a57685b06f65133c6ade5041e3356 +# Parent 832fa3f3543298a7125cd5f996d1e28dd7ba47b1 +KEXEC: fix kexec_get_range_compat to fail vocally. + +Fail with -ERANGE rather than silently truncating 64bit values (a +physical address and size) into 32bit integers for dom0 to consume. + +Signed-off-by: Andrew Cooper + +Simplify the bitwise arithmetic a bit. + +Signed-off-by: Keir Fraser + +--- a/xen/common/kexec.c ++++ b/xen/common/kexec.c +@@ -395,6 +395,10 @@ static int kexec_get_range_compat(XEN_GU + + ret = kexec_get_range_internal(&range); + ++ /* Dont silently truncate physical addresses or sizes. */ ++ if ( (range.start | range.size) & ~(unsigned long)(~0u) ) ++ return -ERANGE; ++ + if ( ret == 0 ) { + XLAT_kexec_range(&compat_range, &range); + if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) ) diff --git a/24359-x86-domU-features.patch b/24359-x86-domU-features.patch new file mode 100644 index 0000000..8ee24f2 --- /dev/null +++ b/24359-x86-domU-features.patch @@ -0,0 +1,94 @@ +# HG changeset patch +# User Liu, Jinsong +# Date 1323170838 0 +# Node ID a0befa32e927cc147aaee9bce42c51f53580a875 +# Parent 9961a6d5356a57685b06f65133c6ade5041e3356 +X86: expose Intel new features to pv/hvm + +Intel recently release some new features, including +FMA/AVX2/BMI1/BMI2/LZCNT/MOVBE. +Refer to http://software.intel.com/file/36945 +This patch expose these new features to pv and hvm. + +Signed-off-by: Liu, Jinsong +Committed-by: Keir Fraser + +--- a/tools/libxc/xc_cpufeature.h ++++ b/tools/libxc/xc_cpufeature.h +@@ -74,6 +74,7 @@ + #define X86_FEATURE_TM2 8 /* Thermal Monitor 2 */ + #define X86_FEATURE_SSSE3 9 /* Supplemental Streaming SIMD Exts-3 */ + #define X86_FEATURE_CID 10 /* Context ID */ ++#define X86_FEATURE_FMA 12 /* Fused Multiply Add */ + #define X86_FEATURE_CX16 13 /* CMPXCHG16B */ + #define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */ + #define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */ +@@ -81,6 +82,7 @@ + #define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */ + #define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */ + #define X86_FEATURE_X2APIC 21 /* x2APIC */ ++#define X86_FEATURE_MOVBE 22 /* movbe instruction */ + #define X86_FEATURE_POPCNT 23 /* POPCNT instruction */ + #define X86_FEATURE_TSC_DEADLINE 24 /* "tdt" TSC Deadline Timer */ + #define X86_FEATURE_AES 25 /* AES acceleration instructions */ +@@ -125,7 +127,10 @@ + + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */ + #define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */ ++#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */ ++#define X86_FEATURE_AVX2 5 /* AVX2 instructions */ + #define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */ ++#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */ + #define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */ + + #endif /* __LIBXC_CPUFEATURE_H */ +--- a/tools/libxc/xc_cpuid_x86.c ++++ b/tools/libxc/xc_cpuid_x86.c +@@ -148,7 +148,8 @@ static void intel_xc_cpuid_policy( + int is_64bit = hypervisor_is_64bit(xch) && is_pae; + + /* Only a few features are advertised in Intel's 0x80000001. */ +- regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0); ++ regs[2] &= (is_64bit ? bitmaskof(X86_FEATURE_LAHF_LM) : 0) | ++ bitmaskof(X86_FEATURE_ABM); + regs[3] &= ((is_pae ? bitmaskof(X86_FEATURE_NX) : 0) | + (is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) | + (is_64bit ? bitmaskof(X86_FEATURE_SYSCALL) : 0) | +@@ -256,9 +257,11 @@ static void xc_cpuid_hvm_policy( + regs[2] &= (bitmaskof(X86_FEATURE_XMM3) | + bitmaskof(X86_FEATURE_PCLMULQDQ) | + bitmaskof(X86_FEATURE_SSSE3) | ++ bitmaskof(X86_FEATURE_FMA) | + bitmaskof(X86_FEATURE_CX16) | + bitmaskof(X86_FEATURE_SSE4_1) | + bitmaskof(X86_FEATURE_SSE4_2) | ++ bitmaskof(X86_FEATURE_MOVBE) | + bitmaskof(X86_FEATURE_POPCNT) | + bitmaskof(X86_FEATURE_AES) | + bitmaskof(X86_FEATURE_F16C) | +@@ -303,7 +306,10 @@ static void xc_cpuid_hvm_policy( + + case 0x00000007: /* Intel-defined CPU features */ + if ( input[1] == 0 ) { +- regs[1] &= (bitmaskof(X86_FEATURE_SMEP) | ++ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) | ++ bitmaskof(X86_FEATURE_AVX2) | ++ bitmaskof(X86_FEATURE_SMEP) | ++ bitmaskof(X86_FEATURE_BMI2) | + bitmaskof(X86_FEATURE_ERMS) | + bitmaskof(X86_FEATURE_FSGSBASE)); + } else +@@ -427,8 +433,11 @@ static void xc_cpuid_pv_policy( + + case 7: + if ( input[1] == 0 ) +- regs[1] &= (bitmaskof(X86_FEATURE_FSGSBASE) | +- bitmaskof(X86_FEATURE_ERMS)); ++ regs[1] &= (bitmaskof(X86_FEATURE_BMI1) | ++ bitmaskof(X86_FEATURE_AVX2) | ++ bitmaskof(X86_FEATURE_BMI2) | ++ bitmaskof(X86_FEATURE_ERMS) | ++ bitmaskof(X86_FEATURE_FSGSBASE)); + else + regs[1] = 0; + regs[0] = regs[2] = regs[3] = 0; diff --git a/24360-x86-pv-domU-no-PCID.patch b/24360-x86-pv-domU-no-PCID.patch new file mode 100644 index 0000000..401b41e --- /dev/null +++ b/24360-x86-pv-domU-no-PCID.patch @@ -0,0 +1,39 @@ +# HG changeset patch +# User Liu, Jinsong +# Date 1323170884 0 +# Node ID d313582d4fa2157332f1d50e599aebca36c41b3b +# Parent a0befa32e927cc147aaee9bce42c51f53580a875 +X86: Disable PCID/INVPCID for pv + +This patch disable PCID/INVPCID for pv. + +Signed-off-by: Liu, Jinsong +Committed-by: Keir Fraser + +--- a/tools/libxc/xc_cpufeature.h ++++ b/tools/libxc/xc_cpufeature.h +@@ -78,6 +78,7 @@ + #define X86_FEATURE_CX16 13 /* CMPXCHG16B */ + #define X86_FEATURE_XTPR 14 /* Send Task Priority Messages */ + #define X86_FEATURE_PDCM 15 /* Perf/Debug Capability MSR */ ++#define X86_FEATURE_PCID 17 /* Process Context ID */ + #define X86_FEATURE_DCA 18 /* Direct Cache Access */ + #define X86_FEATURE_SSE4_1 19 /* Streaming SIMD Extensions 4.1 */ + #define X86_FEATURE_SSE4_2 20 /* Streaming SIMD Extensions 4.2 */ +@@ -132,5 +133,6 @@ + #define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */ + #define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */ + #define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */ ++#define X86_FEATURE_INVPCID 10 /* Invalidate Process Context ID */ + + #endif /* __LIBXC_CPUFEATURE_H */ +--- a/tools/libxc/xc_cpuid_x86.c ++++ b/tools/libxc/xc_cpuid_x86.c +@@ -427,6 +427,7 @@ static void xc_cpuid_pv_policy( + } + clear_bit(X86_FEATURE_XTPR, regs[2]); + clear_bit(X86_FEATURE_PDCM, regs[2]); ++ clear_bit(X86_FEATURE_PCID, regs[2]); + clear_bit(X86_FEATURE_DCA, regs[2]); + set_bit(X86_FEATURE_HYPERVISOR, regs[2]); + break; diff --git a/24389-amd-fam10-gart-tlb-walk-err.patch b/24389-amd-fam10-gart-tlb-walk-err.patch new file mode 100644 index 0000000..6972509 --- /dev/null +++ b/24389-amd-fam10-gart-tlb-walk-err.patch @@ -0,0 +1,109 @@ +# HG changeset patch +# User Jan Beulich +# Date 1323765911 -3600 +# Node ID 868d82faf6511de3b3edce18cc6a9e1c938f0b8f +# Parent 7ca56cca09ade16645fb4806be2c5b2b0bc3332b +x86, amd: Disable GartTlbWlkErr when BIOS forgets it + +This patch disables GartTlbWlk errors on AMD Fam10h CPUs if the BIOS +forgets to do is (or is just too old). Letting these errors enabled +can cause a sync-flood on the CPU causing a reboot. + +The AMD BKDG recommends disabling GART TLB Wlk Error completely. + +Based on a Linux patch from Joerg Roedel ; see e.g. +https://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=patch;h=5bbc097d890409d8eff4e3f1d26f11a9d6b7c07e + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/cpu/mcheck/amd_f10.c ++++ b/xen/arch/x86/cpu/mcheck/amd_f10.c +@@ -46,6 +46,7 @@ + #include + + #include "mce.h" ++#include "mce_quirks.h" + #include "x86_mca.h" + + +@@ -91,9 +92,14 @@ amd_f10_handler(struct mc_info *mi, uint + /* AMD Family10 machine check */ + enum mcheck_type amd_f10_mcheck_init(struct cpuinfo_x86 *c) + { ++ enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(c); ++ + if (amd_k8_mcheck_init(c) == mcheck_none) + return mcheck_none; + ++ if (quirkflag == MCEQUIRK_F10_GART) ++ mcequirk_amd_apply(quirkflag); ++ + x86_mce_callback_register(amd_f10_handler); + + return mcheck_amd_famXX; +--- a/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c ++++ b/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c +@@ -29,6 +29,8 @@ static const struct mce_quirkdata mce_am + MCEQUIRK_K7_BANK0 }, + { 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */, + MCEQUIRK_K8_GART }, ++ { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */, ++ MCEQUIRK_F10_GART }, + }; + + enum mcequirk_amd_flags +@@ -54,6 +56,8 @@ mcequirk_lookup_amd_quirkdata(struct cpu + + int mcequirk_amd_apply(enum mcequirk_amd_flags flags) + { ++ u64 val; ++ + switch (flags) { + case MCEQUIRK_K7_BANK0: + return 1; /* first bank */ +@@ -67,6 +71,10 @@ int mcequirk_amd_apply(enum mcequirk_amd + wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10)); + wrmsrl(MSR_IA32_MC4_STATUS, 0ULL); + break; ++ case MCEQUIRK_F10_GART: ++ if (rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0) ++ wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10)); ++ break; + } + + return 0; +--- a/xen/arch/x86/cpu/mcheck/mce_quirks.h ++++ b/xen/arch/x86/cpu/mcheck/mce_quirks.h +@@ -33,8 +33,9 @@ struct mce_quirkdata { + */ + + enum mcequirk_amd_flags { +- MCEQUIRK_K7_BANK0 = 0x1, +- MCEQUIRK_K8_GART = 0x2, ++ MCEQUIRK_K7_BANK0 = 1, ++ MCEQUIRK_K8_GART, ++ MCEQUIRK_F10_GART + }; + + enum mcequirk_intel_flags { +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -98,6 +98,8 @@ + #define CMCI_EN (1UL<<30) + #define CMCI_THRESHOLD_MASK 0x7FFF + ++#define MSR_AMD64_MC0_MASK 0xc0010044 ++ + #define MSR_IA32_MC1_CTL 0x00000404 + #define MSR_IA32_MC1_CTL2 0x00000281 + #define MSR_IA32_MC1_STATUS 0x00000405 +@@ -151,6 +153,8 @@ + #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) + #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + ++#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) ++ + #define MSR_P6_PERFCTR0 0x000000c1 + #define MSR_P6_PERFCTR1 0x000000c2 + #define MSR_P6_EVNTSEL0 0x00000186 diff --git a/24391-x86-pcpu-version.patch b/24391-x86-pcpu-version.patch new file mode 100644 index 0000000..519c57b --- /dev/null +++ b/24391-x86-pcpu-version.patch @@ -0,0 +1,155 @@ +# HG changeset patch +# User Jan Beulich +# Date 1323766131 -3600 +# Node ID 3f4ffde189f228d88e534865023fd795f77f0d05 +# Parent 77528dbced3ea74901be6b1aeddedda22bfdaf63 +x86: add platform hypercall to retrieve pCPU-s' family, model, and stepping + +With the recent hotplug changes to the Xen part of the microcode +loading, this allows the kernel driver to avoid unnecessary calls into +the hypervisor during pCPU hot-enabling: Knowing that the hypervisor +retains the data for already booted CPUs, only data for CPUs with a +different signature needs to be passed down. Since the microcode +loading code can be pretty verbose, avoiding to invoke it can make the +log much easier to look at in case of problems. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/platform_hypercall.c ++++ b/xen/arch/x86/platform_hypercall.c +@@ -469,6 +469,42 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe + } + break; + ++ case XENPF_get_cpu_version: ++ { ++ struct xenpf_pcpu_version *ver = &op->u.pcpu_version; ++ ++ if ( !get_cpu_maps() ) ++ { ++ ret = -EBUSY; ++ break; ++ } ++ ++ if ( (ver->xen_cpuid >= NR_CPUS) || !cpu_online(ver->xen_cpuid) ) ++ { ++ memset(ver->vendor_id, 0, sizeof(ver->vendor_id)); ++ ver->family = 0; ++ ver->model = 0; ++ ver->stepping = 0; ++ } ++ else ++ { ++ const struct cpuinfo_x86 *c = &cpu_data[ver->xen_cpuid]; ++ ++ memcpy(ver->vendor_id, c->x86_vendor_id, sizeof(ver->vendor_id)); ++ ver->family = c->x86; ++ ver->model = c->x86_model; ++ ver->stepping = c->x86_mask; ++ } ++ ++ ver->max_present = cpumask_last(&cpu_present_map); ++ ++ put_cpu_maps(); ++ ++ if ( copy_field_to_guest(u_xenpf_op, op, u.pcpu_version) ) ++ ret = -EFAULT; ++ } ++ break; ++ + case XENPF_cpu_online: + { + int cpu = op->u.cpu_ol.cpuid; +--- a/xen/arch/x86/x86_64/platform_hypercall.c ++++ b/xen/arch/x86/x86_64/platform_hypercall.c +@@ -3,7 +3,7 @@ + */ + + #include +-#include ++#include + #include + + DEFINE_XEN_GUEST_HANDLE(compat_platform_op_t); +@@ -26,8 +26,13 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_ + #define xen_processor_power_t compat_processor_power_t + #define set_cx_pminfo compat_set_cx_pminfo + +-#define xenpf_pcpuinfo compat_pf_pcpuinfo +-#define xenpf_pcpuinfo_t compat_pf_pcpuinfo_t ++#define xen_pf_pcpuinfo xenpf_pcpuinfo ++CHECK_pf_pcpuinfo; ++#undef xen_pf_pcpuinfo ++ ++#define xen_pf_pcpu_version xenpf_pcpu_version ++CHECK_pf_pcpu_version; ++#undef xen_pf_pcpu_version + + #define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep + +--- a/xen/include/public/platform.h ++++ b/xen/include/public/platform.h +@@ -425,6 +425,21 @@ struct xenpf_pcpuinfo { + typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; + DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); + ++#define XENPF_get_cpu_version 48 ++struct xenpf_pcpu_version { ++ /* IN */ ++ uint32_t xen_cpuid; ++ /* OUT */ ++ /* The maxium cpu_id that is present */ ++ uint32_t max_present; ++ char vendor_id[12]; ++ uint32_t family; ++ uint32_t model; ++ uint32_t stepping; ++}; ++typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; ++DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); ++ + #define XENPF_cpu_online 56 + #define XENPF_cpu_offline 57 + struct xenpf_cpu_ol +@@ -468,6 +483,7 @@ struct xen_platform_op { + struct xenpf_getidletime getidletime; + struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; ++ struct xenpf_pcpu_version pcpu_version; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; +--- a/xen/include/xlat.lst ++++ b/xen/include/xlat.lst +@@ -61,6 +61,17 @@ + ! memory_reservation memory.h + ! pod_target memory.h + ? physdev_pci_mmcfg_reserved physdev.h ++! pct_register platform.h ++! power_register platform.h ++? processor_csd platform.h ++! processor_cx platform.h ++! processor_flags platform.h ++! processor_performance platform.h ++! processor_power platform.h ++? processor_px platform.h ++! psd_package platform.h ++? xenpf_pcpuinfo platform.h ++? xenpf_pcpu_version platform.h + ! sched_poll sched.h + ? sched_remote_shutdown sched.h + ? sched_shutdown sched.h +@@ -73,12 +84,3 @@ + ! vcpu_set_singleshot_timer vcpu.h + ? xenoprof_init xenoprof.h + ? xenoprof_passive xenoprof.h +-! power_register platform.h +-? processor_csd platform.h +-! processor_cx platform.h +-! processor_flags platform.h +-! processor_power platform.h +-! pct_register platform.h +-? processor_px platform.h +-! psd_package platform.h +-! processor_performance platform.h diff --git a/24411-x86-ucode-AMD-Fam15.patch b/24411-x86-ucode-AMD-Fam15.patch new file mode 100644 index 0000000..bf93e91 --- /dev/null +++ b/24411-x86-ucode-AMD-Fam15.patch @@ -0,0 +1,143 @@ +References: bnc#736824 + +# HG changeset patch +# User Christoph Egger +# Date 1323943209 -3600 +# Node ID ca5f588bd203c9207e0988fcc80f43d83eed5420 +# Parent 25f8952313ae683f41b634163f62651185d7be38 +x86/ucode: fix for AMD Fam15 CPUs + +Remove hardcoded maximum size a microcode patch can have. This is +dynamic now. + +The microcode patch for family15h can be larger than 2048 bytes and +gets silently truncated. + +Signed-off-by: Christoph Egger +Signed-off-by: Jan Beulich + +--- 2011-12-14.orig/xen/arch/x86/microcode_amd.c 2011-12-15 14:55:15.000000000 +0100 ++++ 2011-12-14/xen/arch/x86/microcode_amd.c 2011-12-15 14:59:47.000000000 +0100 +@@ -27,18 +27,10 @@ + #include + #include + +-#define pr_debug(x...) ((void)0) +- + #define UCODE_MAGIC 0x00414d44 + #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 + #define UCODE_UCODE_TYPE 0x00000001 + +-#define UCODE_MAX_SIZE (2048) +-#define DEFAULT_UCODE_DATASIZE (896) +-#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) +-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +-#define DWSIZE (sizeof(uint32_t)) +- + /* serialize access to the physical write */ + static DEFINE_SPINLOCK(microcode_update_lock); + +@@ -99,7 +91,7 @@ static int microcode_fits(void *mc, int + } + + if ( mc_header->patch_id <= uci->cpu_sig.rev ) +- return -EINVAL; ++ return 0; + + printk(KERN_DEBUG "microcode: CPU%d found a matching microcode " + "update with version 0x%x (current=0x%x)\n", +@@ -147,8 +139,12 @@ static int apply_microcode(int cpu) + return 0; + } + +-static int get_next_ucode_from_buffer_amd(void *mc, const void *buf, +- size_t size, unsigned long *offset) ++static int get_next_ucode_from_buffer_amd( ++ void **mc, ++ size_t *mc_size, ++ const void *buf, ++ size_t size, ++ unsigned long *offset) + { + struct microcode_header_amd *mc_header; + size_t total_size; +@@ -181,8 +177,17 @@ static int get_next_ucode_from_buffer_am + return -EINVAL; + } + +- memset(mc, 0, UCODE_MAX_SIZE); +- memcpy(mc, (const void *)(&bufp[off + 8]), total_size); ++ if ( *mc_size < total_size ) ++ { ++ xfree(*mc); ++ *mc = xmalloc_bytes(total_size); ++ if ( !*mc ) ++ return -ENOMEM; ++ *mc_size = total_size; ++ } ++ else if ( *mc_size > total_size ) ++ memset(*mc + total_size, 0, *mc_size - total_size); ++ memcpy(*mc, mc_header, total_size); + + *offset = off + total_size + 8; + +@@ -236,10 +241,10 @@ static int cpu_request_microcode(int cpu + { + const uint32_t *buf_pos; + unsigned long offset = 0; +- int error = 0; +- int ret; ++ int error; + struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu); + void *mc; ++ size_t mc_size; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); +@@ -260,7 +265,9 @@ static int cpu_request_microcode(int cpu + return -EINVAL; + } + +- mc = xmalloc_bytes(UCODE_MAX_SIZE); ++ /* Size of 1st microcode patch in bytes */ ++ mc_size = buf_pos[offset / sizeof(*buf_pos) + 1]; ++ mc = xmalloc_bytes(mc_size); + if ( mc == NULL ) + { + printk(KERN_ERR "microcode: error! " +@@ -276,24 +284,33 @@ static int cpu_request_microcode(int cpu + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ +- while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 0) ++ while ( (error = get_next_ucode_from_buffer_amd(&mc, &mc_size, buf, size, ++ &offset)) == 0 ) + { ++ uci->mc.mc_amd = mc; ++ + error = microcode_fits(mc, cpu); + if (error <= 0) + continue; + + error = apply_microcode(cpu); + if (error == 0) ++ { ++ error = 1; + break; ++ } + } + + /* On success keep the microcode patch for + * re-apply on resume. + */ +- if (error) { ++ if ( error <= 0 ) ++ { + xfree(mc); + mc = NULL; + } ++ else ++ error = 0; + uci->mc.mc_amd = mc; + + out: diff --git a/24412-x86-AMD-errata-model-shift.patch b/24412-x86-AMD-errata-model-shift.patch new file mode 100644 index 0000000..bef04f3 --- /dev/null +++ b/24412-x86-AMD-errata-model-shift.patch @@ -0,0 +1,23 @@ +# HG changeset patch +# User Jan Beulich +# Date 1323955725 -3600 +# Node ID 99caac2e35df41cbece606f663cb5570a62613c3 +# Parent ca5f588bd203c9207e0988fcc80f43d83eed5420 +x86/AMD: use correct shift count when merging model and stepping + +... for legacy errata matching. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -216,7 +216,7 @@ int cpu_has_amd_erratum(const struct cpu + } + + /* OSVW unavailable or ID unknown, match family-model-stepping range */ +- ms = (cpu->x86_model << 8) | cpu->x86_mask; ++ ms = (cpu->x86_model << 4) | cpu->x86_mask; + while ((range = va_arg(ap, int))) { + if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && + (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/24417-amd-erratum-573.patch b/24417-amd-erratum-573.patch new file mode 100644 index 0000000..ffdbb22 --- /dev/null +++ b/24417-amd-erratum-573.patch @@ -0,0 +1,85 @@ +# HG changeset patch +# User Jan Beulich +# Date 1324046740 -3600 +# Node ID 1452fb248cd513832cfbbd1100b9b72a0dde7ea6 +# Parent 01c8b27e3d7d4ad2b469be9922bb04b5eb0195e8 +x86/emulator: workaround for AMD erratum 573 + +The only cases where we might end up emulating fsincos (as any other +x87 operations without memory operands) are +- when a HVM guest is in real mode (not applicable on AMD) +- between two half page table updates in PAE mode (unlikely, and not + doing the emulation here does affect only performance, not + correctness) +- when a guest maliciously (or erroneously) modifies an (MMIO or page + table update) instruction under emulation (unspecified behavior) + +Hence, in order to avoid the erratum to cause harm to the entire host, +don't emulate fsincos on the affected AMD CPU families. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/tools/tests/x86_emulator/x86_emulate.c ++++ b/tools/tests/x86_emulator/x86_emulate.c +@@ -3,5 +3,7 @@ + #include + #include + ++#define cpu_has_amd_erratum(nr) 0 ++ + #include "x86_emulate/x86_emulate.h" + #include "x86_emulate/x86_emulate.c" +--- a/xen/arch/x86/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate.c +@@ -10,8 +10,15 @@ + */ + + #include ++#include /* current_cpu_info */ ++#include /* cpu_has_amd_erratum() */ + + /* Avoid namespace pollution. */ + #undef cmpxchg ++#undef cpuid ++#undef wbinvd ++ ++#define cpu_has_amd_erratum(nr) \ ++ cpu_has_amd_erratum(¤t_cpu_data, AMD_ERRATUM_##nr) + + #include "x86_emulate/x86_emulate.c" +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -2621,6 +2621,9 @@ x86_emulate( + case 0xd9: /* FPU 0xd9 */ + switch ( modrm ) + { ++ case 0xfb: /* fsincos */ ++ fail_if(cpu_has_amd_erratum(573)); ++ /* fall through */ + case 0xc0 ... 0xc7: /* fld %stN */ + case 0xc8 ... 0xcf: /* fxch %stN */ + case 0xd0: /* fnop */ +@@ -2646,7 +2649,6 @@ x86_emulate( + case 0xf8: /* fprem */ + case 0xf9: /* fyl2xp1 */ + case 0xfa: /* fsqrt */ +- case 0xfb: /* fsincos */ + case 0xfc: /* frndint */ + case 0xfd: /* fscale */ + case 0xfe: /* fsin */ +--- a/xen/include/asm-x86/amd.h ++++ b/xen/include/asm-x86/amd.h +@@ -138,6 +138,12 @@ + AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \ + AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)) + ++#define AMD_ERRATUM_573 \ ++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0xff, 0xf), \ ++ AMD_MODEL_RANGE(0x10, 0x0, 0x0, 0xff, 0xf), \ ++ AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \ ++ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf)) ++ + struct cpuinfo_x86; + int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); + diff --git a/24429-mceinj-tool.patch b/24429-mceinj-tool.patch new file mode 100644 index 0000000..7167052 --- /dev/null +++ b/24429-mceinj-tool.patch @@ -0,0 +1,28 @@ +# HG changeset patch +# User Liu, Jinsong +# Date 1324219200 0 +# Node ID 9587ccc2ae3192fd5625a87fa58e840377471867 +# Parent 5b4b7e565ab82b06940889f2be7e30042b2881fc +X86-MCE: fix a bug of xen-mceinj tool + +Fix a bug of xen-mceinj tool which used to test mce by software way. + +Signed-off-by: Liu, Jinsong +Committed-by: Keir Fraser + +--- a/tools/tests/mce-test/tools/xen-mceinj.c ++++ b/tools/tests/mce-test/tools/xen-mceinj.c +@@ -134,8 +134,12 @@ static int mca_cpuinfo(xc_interface *xc_ + { + struct xen_mc mc; + ++ memset(&mc, 0, sizeof(struct xen_mc)); ++ + mc.cmd = XEN_MC_physcpuinfo; +- if (xc_mca_op(xc_handle, &mc)) ++ mc.interface_version = XEN_MCA_INTERFACE_VERSION; ++ ++ if (!xc_mca_op(xc_handle, &mc)) + return mc.u.mc_physcpuinfo.ncpus; + else + return 0; diff --git a/24447-x86-TXT-INIT-SIPI-delay.patch b/24447-x86-TXT-INIT-SIPI-delay.patch new file mode 100644 index 0000000..6018922 --- /dev/null +++ b/24447-x86-TXT-INIT-SIPI-delay.patch @@ -0,0 +1,46 @@ +# HG changeset patch +# User Gang Wei +# Date 1325153274 0 +# Node ID a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199 +# Parent 2863b2f43a3bc9268885379d6fd55ed325b8c0a2 +X86: Add a delay between INIT & SIPIs for tboot AP bring-up in X2APIC case + +Without this delay, Xen could not bring APs up while working with +TXT/tboot, because tboot needs some time in APs to handle INIT before +becoming ready for receiving SIPIs (this delay was removed as part of +c/s 23724 by Tim Deegan). + +Signed-off-by: Gang Wei +Acked-by: Keir Fraser +Acked-by: Tim Deegan +Committed-by: Tim Deegan + +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -473,6 +474,18 @@ static int wakeup_secondary_cpu(int phys + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while ( send_status && (timeout++ < 1000) ); + } ++ else if ( tboot_in_measured_env() ) ++ { ++ /* ++ * With tboot AP is actually spinning in a mini-guest before ++ * receiving INIT. Upon receiving INIT ipi, AP need time to VMExit, ++ * update VMCS to tracking SIPIs and VMResume. ++ * ++ * While AP is in root mode handling the INIT the CPU will drop ++ * any SIPIs ++ */ ++ udelay(10); ++ } + + /* + * Should we send STARTUP IPIs ? diff --git a/24448-x86-pt-irq-leak.patch b/24448-x86-pt-irq-leak.patch new file mode 100644 index 0000000..3694421 --- /dev/null +++ b/24448-x86-pt-irq-leak.patch @@ -0,0 +1,31 @@ +References: bnc#735806 + +# HG changeset patch +# User Jan Beulich +# Date 1325492779 -3600 +# Node ID 3a22ed3ec534799b3cab55b0dc0a7380e701ecbe +# Parent a7b2610b8e5c9a15b1f5de9a3eabf7f19d0b4199 +x86/passthrough: don't leak guest IRQs + +As unmap_domain_pirq_emuirq() fails on a never mapped pIRQ, it must not +be called for the non-emu-IRQ case (to prevent the entire unmap +operation failing). + +Based on a suggestion from Stefano. + +Signed-off-by: Jan Beulich +Tested-by: Yongjie Ren +Acked-by: Stefano Stabellini + +--- a/xen/arch/x86/physdev.c ++++ b/xen/arch/x86/physdev.c +@@ -228,7 +228,8 @@ static int physdev_unmap_pirq(struct phy + if ( is_hvm_domain(d) ) + { + spin_lock(&d->event_lock); +- ret = unmap_domain_pirq_emuirq(d, unmap->pirq); ++ if ( domain_pirq_to_emuirq(d, unmap->pirq) != IRQ_UNBOUND ) ++ ret = unmap_domain_pirq_emuirq(d, unmap->pirq); + spin_unlock(&d->event_lock); + if ( unmap->domid == DOMID_SELF || ret ) + goto free_domain; diff --git a/32on64-extra-mem.patch b/32on64-extra-mem.patch index f022bde..6a38356 100644 --- a/32on64-extra-mem.patch +++ b/32on64-extra-mem.patch @@ -2,7 +2,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -2930,7 +2930,7 @@ class XendDomainInfo: +@@ -2931,7 +2931,7 @@ class XendDomainInfo: self.guest_bitsize = self.image.getBitSize() # Make sure there's enough RAM available for the domain diff --git a/change_home_server.patch b/change_home_server.patch index a7da5cd..c761ee1 100644 --- a/change_home_server.patch +++ b/change_home_server.patch @@ -2,7 +2,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -3130,6 +3130,11 @@ class XendDomainInfo: +@@ -3131,6 +3131,11 @@ class XendDomainInfo: self._cleanup_phantom_devs(paths) self._cleanupVm() diff --git a/ioemu-9868-MSI-X.patch b/ioemu-9868-MSI-X.patch new file mode 100644 index 0000000..9c5e7b8 --- /dev/null +++ b/ioemu-9868-MSI-X.patch @@ -0,0 +1,241 @@ +# HG changeset patch +# User Ian Jackson +# Date 1324565191 0 +# Node ID 11ca857d983420a9f54e4d0e6919f8e6bd5fca48 +# Parent 533ebc61dfef98d55f054c97ec30179660214852 +qemu: clean up MSI-X table handling + +This patch does cleaning up of QEMU MSI handling. The fixes are: +1. Changes made to MSI-X table mapping handling to eliminate the small +windows in which guest could have access to physical MSI-X table. +2. MSI-X table is mapped as read-only to QEMU, as masking of MSI-X is +already in Xen now. +3. For registers that coexists inside the MSI-X table (this could be +only PBA I think), value read from physical page would be returned. + +Signed-off-by: Shan Haitao + +Consolidated duplicate code into _pt_iomem_helper(). Fixed formatting. + +Signed-off-by: Jan Beulich + +Acked-by: Haitao Shan +Acked-by: Stefano Stabellini + +committer: Ian Jackson + +--- a/tools/ioemu-qemu-xen/hw/pass-through.c ++++ b/tools/ioemu-qemu-xen/hw/pass-through.c +@@ -92,6 +92,7 @@ + + #include + #include ++#include + + extern int gfx_passthru; + int igd_passthru = 0; +@@ -1097,6 +1098,44 @@ uint8_t pci_intx(struct pt_dev *ptdev) + return r_val; + } + ++static int _pt_iomem_helper(struct pt_dev *assigned_device, int i, ++ uint32_t e_base, uint32_t e_size, int op) ++{ ++ if ( has_msix_mapping(assigned_device, i) ) ++ { ++ uint32_t msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 + ++ assigned_device->msix->total_entries * 16) >> XC_PAGE_SHIFT; ++ uint32_t bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT; ++ int ret = 0; ++ ++ if ( assigned_device->msix->table_off ) ++ ret = xc_domain_memory_mapping(xc_handle, domid, ++ e_base >> XC_PAGE_SHIFT, ++ assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT, ++ (assigned_device->msix->mmio_base_addr >> XC_PAGE_SHIFT) ++ - (e_base >> XC_PAGE_SHIFT), op); ++ ++ if ( ret == 0 && msix_last_pfn != bar_last_pfn ) ++ { ++ assert(msix_last_pfn < bar_last_pfn); ++ ret = xc_domain_memory_mapping(xc_handle, domid, ++ msix_last_pfn + 1, ++ (assigned_device->bases[i].access.maddr + ++ assigned_device->msix->table_off + ++ assigned_device->msix->total_entries * 16 + ++ XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT, ++ bar_last_pfn - msix_last_pfn, op); ++ } ++ ++ return ret; ++ } ++ ++ return xc_domain_memory_mapping(xc_handle, domid, ++ e_base >> XC_PAGE_SHIFT, ++ assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT, ++ (e_size + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT, op); ++} ++ + /* Being called each time a mmio region has been updated */ + static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size, + int type) +@@ -1118,13 +1157,11 @@ static void pt_iomem_map(PCIDevice *d, i + + if ( !first_map && old_ebase != -1 ) + { +- add_msix_mapping(assigned_device, i); +- /* Remove old mapping */ +- ret = xc_domain_memory_mapping(xc_handle, domid, +- old_ebase >> XC_PAGE_SHIFT, +- assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT, +- (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT, +- DPCI_REMOVE_MAPPING); ++ if ( has_msix_mapping(assigned_device, i) ) ++ unregister_iomem(assigned_device->msix->mmio_base_addr); ++ ++ ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size, ++ DPCI_REMOVE_MAPPING); + if ( ret != 0 ) + { + PT_LOG("Error: remove old mapping failed!\n"); +@@ -1135,22 +1172,26 @@ static void pt_iomem_map(PCIDevice *d, i + /* map only valid guest address */ + if (e_phys != -1) + { +- /* Create new mapping */ +- ret = xc_domain_memory_mapping(xc_handle, domid, +- assigned_device->bases[i].e_physbase >> XC_PAGE_SHIFT, +- assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT, +- (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT, +- DPCI_ADD_MAPPING); ++ if ( has_msix_mapping(assigned_device, i) ) ++ { ++ assigned_device->msix->mmio_base_addr = ++ assigned_device->bases[i].e_physbase ++ + assigned_device->msix->table_off; ++ ++ cpu_register_physical_memory(assigned_device->msix->mmio_base_addr, ++ (assigned_device->msix->total_entries * 16 + XC_PAGE_SIZE - 1) ++ & XC_PAGE_MASK, ++ assigned_device->msix->mmio_index); ++ } + ++ ret = _pt_iomem_helper(assigned_device, i, e_phys, e_size, ++ DPCI_ADD_MAPPING); + if ( ret != 0 ) + { + PT_LOG("Error: create new mapping failed!\n"); ++ return; + } + +- ret = remove_msix_mapping(assigned_device, i); +- if ( ret != 0 ) +- PT_LOG("Error: remove MSI-X mmio mapping failed!\n"); +- + if ( old_ebase != e_phys && old_ebase != -1 ) + pt_msix_update_remap(assigned_device, i); + } +--- a/tools/ioemu-qemu-xen/hw/pt-msi.c ++++ b/tools/ioemu-qemu-xen/hw/pt-msi.c +@@ -284,15 +284,6 @@ void pt_disable_msi_translate(struct pt_ + dev->msi_trans_en = 0; + } + +-/* MSI-X virtulization functions */ +-static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int mask) +-{ +- void *phys_off; +- +- phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12; +- *(uint32_t *)phys_off = mask; +-} +- + static int pt_msix_update_one(struct pt_dev *dev, int entry_nr) + { + struct msix_entry_info *entry = &dev->msix->msix_entry[entry_nr]; +@@ -486,7 +477,6 @@ static void pci_msix_writel(void *opaque + { + if ( msix->enabled && !(val & 0x1) ) + pt_msix_update_one(dev, entry_nr); +- mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1); + } + } + +@@ -519,7 +509,11 @@ static uint32_t pci_msix_readl(void *opa + entry_nr = (addr - msix->mmio_base_addr) / 16; + offset = ((addr - msix->mmio_base_addr) % 16) / 4; + +- return msix->msix_entry[entry_nr].io_mem[offset]; ++ if ( addr - msix->mmio_base_addr < msix->total_entries * 16 ) ++ return msix->msix_entry[entry_nr].io_mem[offset]; ++ else ++ return *(uint32_t *)(msix->phys_iomem_base + ++ (addr - msix->mmio_base_addr)); + } + + static CPUReadMemoryFunc *pci_msix_read[] = { +@@ -528,39 +522,12 @@ static CPUReadMemoryFunc *pci_msix_read[ + pci_msix_readl + }; + +-int add_msix_mapping(struct pt_dev *dev, int bar_index) ++int has_msix_mapping(struct pt_dev *dev, int bar_index) + { + if ( !(dev->msix && dev->msix->bar_index == bar_index) ) + return 0; + +- return xc_domain_memory_mapping(xc_handle, domid, +- dev->msix->mmio_base_addr >> XC_PAGE_SHIFT, +- (dev->bases[bar_index].access.maddr +- + dev->msix->table_off) >> XC_PAGE_SHIFT, +- (dev->msix->total_entries * 16 +- + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT, +- DPCI_ADD_MAPPING); +-} +- +-int remove_msix_mapping(struct pt_dev *dev, int bar_index) +-{ +- if ( !(dev->msix && dev->msix->bar_index == bar_index) ) +- return 0; +- +- dev->msix->mmio_base_addr = dev->bases[bar_index].e_physbase +- + dev->msix->table_off; +- +- cpu_register_physical_memory(dev->msix->mmio_base_addr, +- dev->msix->total_entries * 16, +- dev->msix->mmio_index); +- +- return xc_domain_memory_mapping(xc_handle, domid, +- dev->msix->mmio_base_addr >> XC_PAGE_SHIFT, +- (dev->bases[bar_index].access.maddr +- + dev->msix->table_off) >> XC_PAGE_SHIFT, +- (dev->msix->total_entries * 16 +- + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT, +- DPCI_REMOVE_MAPPING); ++ return 1; + } + + int pt_msix_init(struct pt_dev *dev, int pos) +@@ -616,7 +583,7 @@ int pt_msix_init(struct pt_dev *dev, int + PT_LOG("table_off = %x, total_entries = %d\n", table_off, total_entries); + dev->msix->table_offset_adjust = table_off & 0x0fff; + dev->msix->phys_iomem_base = mmap(0, total_entries * 16 + dev->msix->table_offset_adjust, +- PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED, ++ PROT_READ, MAP_SHARED | MAP_LOCKED, + fd, dev->msix->table_base + table_off - dev->msix->table_offset_adjust); + dev->msix->phys_iomem_base = (void *)((char *)dev->msix->phys_iomem_base + + dev->msix->table_offset_adjust); +--- a/tools/ioemu-qemu-xen/hw/pt-msi.h ++++ b/tools/ioemu-qemu-xen/hw/pt-msi.h +@@ -107,10 +107,7 @@ void + pt_msix_disable(struct pt_dev *dev); + + int +-remove_msix_mapping(struct pt_dev *dev, int bar_index); +- +-int +-add_msix_mapping(struct pt_dev *dev, int bar_index); ++has_msix_mapping(struct pt_dev *dev, int bar_index); + + int + pt_msix_init(struct pt_dev *dev, int pos); diff --git a/ipxe-enable-nics.patch b/ipxe-enable-nics.patch new file mode 100644 index 0000000..8eb996e --- /dev/null +++ b/ipxe-enable-nics.patch @@ -0,0 +1,17 @@ +Index: xen-4.1.2-testing/tools/firmware/etherboot/Config +=================================================================== +--- xen-4.1.2-testing.orig/tools/firmware/etherboot/Config ++++ xen-4.1.2-testing/tools/firmware/etherboot/Config +@@ -1,11 +1,8 @@ + +-NICS = rtl8139 8086100e ++NICS = rtl8139 8086100e eepro100 e1000 pcnet32 10ec8029 + + CFLAGS += -UPXE_DHCP_STRICT + CFLAGS += -DPXE_DHCP_STRICT + + CFLAGS += -UNO_POST_PROMPT + CFLAGS += -DNO_POST_PROMPT +- +-CFLAGS += -UCONSOLE_SERIAL +-CFLAGS += -DCONSOLE_SERIAL=1 diff --git a/ipxe-gcc45-warnings.patch b/ipxe-gcc45-warnings.patch new file mode 100644 index 0000000..811b415 --- /dev/null +++ b/ipxe-gcc45-warnings.patch @@ -0,0 +1,75 @@ +Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-f7c5918b179b +=================================================================== +--- /dev/null ++++ xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-f7c5918b179b +@@ -0,0 +1,61 @@ ++ ++Subject: [drivers] Fix warnings identified by gcc 4.5 ++From: Bruce Rogers brogers@novell.com Fri Apr 2 18:16:38 2010 -0600 ++Date: Fri Apr 16 07:32:49 2010 -0400: ++Git: f7c5918b179be57fc7f352cb33664eb43de02c30 ++ ++In building gpxe for openSUSE Factory (part of kvm package), there were ++a few problems identified by the compiler. This patch addresses them. ++ ++Signed-off-by: Bruce Rogers ++Signed-off-by: Stefan Hajnoczi ++Signed-off-by: Marty Connor ++ ++diff --git a/src/drivers/net/ath5k/ath5k_qcu.c b/src/drivers/net/ath5k/ath5k_qcu.c ++index a674b85..cb25029 100644 ++--- a/src/drivers/net/ath5k/ath5k_qcu.c +++++ b/src/drivers/net/ath5k/ath5k_qcu.c ++@@ -268,7 +268,7 @@ int ath5k_hw_reset_tx_queue(struct ath5k_hw *ah) ++ } ++ ++ if (tq->tqi_ready_time && ++- (tq->tqi_type != AR5K_TX_QUEUE_ID_CAB)) +++ (tq->tqi_type != AR5K_TX_QUEUE_CAB)) ++ ath5k_hw_reg_write(ah, AR5K_REG_SM(tq->tqi_ready_time, ++ AR5K_QCU_RDYTIMECFG_INTVAL) | ++ AR5K_QCU_RDYTIMECFG_ENABLE, ++diff --git a/src/drivers/net/ns83820.c b/src/drivers/net/ns83820.c ++index 44d875f..c5f2153 100644 ++--- a/src/drivers/net/ns83820.c +++++ b/src/drivers/net/ns83820.c ++@@ -687,7 +687,7 @@ static int ns83820_poll(struct nic *nic, int retrieve) ++ // rx_ring[entry].link = 0; ++ rx_ring[entry].cmdsts = cpu_to_le32(CMDSTS_OWN); ++ ++- ns->cur_rx = ++ns->cur_rx % NR_RX_DESC; +++ ns->cur_rx = (ns->cur_rx + 1) % NR_RX_DESC; ++ ++ if (ns->cur_rx == 0) /* We have wrapped the ring */ ++ kick_rx(); ++diff --git a/src/drivers/net/tulip.c b/src/drivers/net/tulip.c ++index e08e0d8..af30ec6 100644 ++--- a/src/drivers/net/tulip.c +++++ b/src/drivers/net/tulip.c ++@@ -1171,7 +1171,7 @@ static int tulip_poll(struct nic *nic, int retrieve) ++ if (rx_ring[tp->cur_rx].status & 0x00008000) { ++ /* return the descriptor and buffer to receive ring */ ++ rx_ring[tp->cur_rx].status = 0x80000000; ++- tp->cur_rx = (++tp->cur_rx) % RX_RING_SIZE; +++ tp->cur_rx = (tp->cur_rx + 1) % RX_RING_SIZE; ++ return 0; ++ } ++ ++@@ -1180,7 +1180,7 @@ static int tulip_poll(struct nic *nic, int retrieve) ++ ++ /* return the descriptor and buffer to receive ring */ ++ rx_ring[tp->cur_rx].status = 0x80000000; ++- tp->cur_rx = (++tp->cur_rx) % RX_RING_SIZE; +++ tp->cur_rx = (tp->cur_rx + 1) % RX_RING_SIZE; ++ ++ return 1; ++ } +Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/series +=================================================================== +--- xen-4.1.2-testing.orig/tools/firmware/etherboot/patches/series ++++ xen-4.1.2-testing/tools/firmware/etherboot/patches/series +@@ -1,3 +1,4 @@ + boot_prompt_option.patch + gpxe-git-0edf2405b457 + gpxe-git-a803ef3dfeac ++ipxe-git-f7c5918b179b diff --git a/ipxe-ipv4-fragment.patch b/ipxe-ipv4-fragment.patch new file mode 100644 index 0000000..637c6a1 --- /dev/null +++ b/ipxe-ipv4-fragment.patch @@ -0,0 +1,368 @@ +Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-13186b64b6c3 +=================================================================== +--- /dev/null 2010-05-08 03:31:08.000000000 -0600 ++++ xen-4.1.2-testing/tools/firmware/etherboot/patches/ipxe-git-13186b64b6c3 2011-12-19 15:05:32.000000000 -0700 +@@ -0,0 +1,354 @@ ++commit 13186b64b6c3d5cbe9ed13bda1532e79b1afe81d ++Author: Michael Brown ++Date: Sat Jul 16 01:15:53 2011 +0100 ++ ++ [ipv4] Fix fragment reassembly ++ ++ Signed-off-by: Michael Brown ++ Signed-off-by: Michal Kubecek ++ ++diff -up a/src/include/gpxe/ip.h.orig-frag b/src/include/gpxe/ip.h ++--- a/src/include/gpxe/ip.h.orig-frag 2010-02-02 17:12:44.000000000 +0100 +++++ b/src/include/gpxe/ip.h 2011-11-18 15:49:17.202660163 +0100 ++@@ -32,9 +32,6 @@ struct net_protocol; ++ #define IP_TOS 0 ++ #define IP_TTL 64 ++ ++-#define IP_FRAG_IOB_SIZE 1500 ++-#define IP_FRAG_TIMEOUT 50 ++- ++ /** An IPv4 packet header */ ++ struct iphdr { ++ uint8_t verhdrlen; ++@@ -74,20 +71,16 @@ struct ipv4_miniroute { ++ struct in_addr gateway; ++ }; ++ ++-/* Fragment reassembly buffer */ ++-struct frag_buffer { ++- /* Identification number */ ++- uint16_t ident; ++- /* Source network address */ ++- struct in_addr src; ++- /* Destination network address */ ++- struct in_addr dest; ++- /* Reassembled I/O buffer */ ++- struct io_buffer *frag_iob; ++- /* Reassembly timer */ ++- struct retry_timer frag_timer; +++/* IPv4 fragment reassembly buffer */ +++struct ipv4_fragment { ++ /* List of fragment reassembly buffers */ ++ struct list_head list; +++ /** Reassembled packet */ +++ struct io_buffer *iobuf; +++ /** Current offset */ +++ size_t offset; +++ /** Reassembly timer */ +++ struct retry_timer timer; ++ }; ++ ++ extern struct list_head ipv4_miniroutes; ++diff -up a/src/include/gpxe/retry.h.orig-frag b/src/include/gpxe/retry.h ++--- a/src/include/gpxe/retry.h.orig-frag 2010-02-02 17:12:44.000000000 +0100 +++++ b/src/include/gpxe/retry.h 2011-11-18 15:59:25.258837891 +0100 ++@@ -51,6 +51,19 @@ struct retry_timer { ++ void ( * expired ) ( struct retry_timer *timer, int over ); ++ }; ++ +++/** +++ * Initialise a timer +++ * +++ * @v timer Retry timer +++ * @v expired Timer expired callback +++ */ +++static inline __attribute__ (( always_inline )) void +++timer_init ( struct retry_timer *timer, +++ void ( * expired ) ( struct retry_timer *timer, int over ) ) +++{ +++ timer->expired = expired; +++} +++ ++ extern void start_timer ( struct retry_timer *timer ); ++ extern void start_timer_fixed ( struct retry_timer *timer, ++ unsigned long timeout ); ++diff -up a/src/net/ipv4.c.orig-frag b/src/net/ipv4.c ++--- a/src/net/ipv4.c.orig-frag 2010-02-02 17:12:44.000000000 +0100 +++++ b/src/net/ipv4.c 2011-11-18 15:49:17.203660142 +0100 ++@@ -14,6 +14,7 @@ ++ #include ++ #include ++ #include +++#include ++ ++ /** @file ++ * ++@@ -32,7 +33,10 @@ struct net_protocol ipv4_protocol; ++ struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes ); ++ ++ /** List of fragment reassembly buffers */ ++-static LIST_HEAD ( frag_buffers ); +++static LIST_HEAD ( ipv4_fragments ); +++ +++/** Fragment reassembly timeout */ +++#define IP_FRAG_TIMEOUT ( TICKS_PER_SEC / 2 ) ++ ++ /** ++ * Add IPv4 minirouting table entry ++@@ -134,104 +138,126 @@ static struct ipv4_miniroute * ipv4_rout ++ } ++ ++ /** ++- * Fragment reassembly counter timeout +++ * Expire fragment reassembly buffer ++ * ++- * @v timer Retry timer ++- * @v over If asserted, the timer is greater than @c MAX_TIMEOUT +++ * @v timer Retry timer +++ * @v fail Failure indicator ++ */ ++-static void ipv4_frag_expired ( struct retry_timer *timer __unused, ++- int over ) { ++- if ( over ) { ++- DBG ( "Fragment reassembly timeout" ); ++- /* Free the fragment buffer */ ++- } +++static void ipv4_fragment_expired ( struct retry_timer *timer, +++ int fail __unused ) { +++ struct ipv4_fragment *frag = +++ container_of ( timer, struct ipv4_fragment, timer ); +++ struct iphdr *iphdr = frag->iobuf->data; +++ +++ DBG ( "IPv4 fragment %04x expired\n", ntohs ( iphdr->ident ) ); +++ free_iob ( frag->iobuf ); +++ list_del ( &frag->list ); +++ free ( frag ); ++ } ++ ++ /** ++- * Free fragment buffer +++ * Find matching fragment reassembly buffer ++ * ++- * @v fragbug Fragment buffer +++ * @v iphdr IPv4 header +++ * @ret frag Fragment reassembly buffer, or NULL ++ */ ++-static void free_fragbuf ( struct frag_buffer *fragbuf ) { ++- free ( fragbuf ); +++static struct ipv4_fragment * ipv4_fragment ( struct iphdr *iphdr ) { +++ struct ipv4_fragment *frag; +++ struct iphdr *frag_iphdr; +++ +++ list_for_each_entry ( frag, &ipv4_fragments, list ) { +++ frag_iphdr = frag->iobuf->data; +++ +++ if ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) && +++ ( iphdr->ident == frag_iphdr->ident ) ) { +++ return frag; +++ } +++ } +++ +++ return NULL; ++ } ++ ++ /** ++ * Fragment reassembler ++ * ++- * @v iobuf I/O buffer, fragment of the datagram ++- * @ret frag_iob Reassembled packet, or NULL +++ * @v iobuf I/O buffer +++ * @ret iobuf Reassembled packet, or NULL ++ */ ++-static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) { +++static struct io_buffer * ipv4_reassemble ( struct io_buffer *iobuf ) { ++ struct iphdr *iphdr = iobuf->data; ++- struct frag_buffer *fragbuf; ++- ++- /** ++- * Check if the fragment belongs to any fragment series ++- */ ++- list_for_each_entry ( fragbuf, &frag_buffers, list ) { ++- if ( fragbuf->ident == iphdr->ident && ++- fragbuf->src.s_addr == iphdr->src.s_addr ) { ++- /** ++- * Check if the packet is the expected fragment ++- * ++- * The offset of the new packet must be equal to the ++- * length of the data accumulated so far (the length of ++- * the reassembled I/O buffer ++- */ ++- if ( iob_len ( fragbuf->frag_iob ) == ++- ( iphdr->frags & IP_MASK_OFFSET ) ) { ++- /** ++- * Append the contents of the fragment to the ++- * reassembled I/O buffer ++- */ ++- iob_pull ( iobuf, sizeof ( *iphdr ) ); ++- memcpy ( iob_put ( fragbuf->frag_iob, ++- iob_len ( iobuf ) ), ++- iobuf->data, iob_len ( iobuf ) ); ++- free_iob ( iobuf ); ++- ++- /** Check if the fragment series is over */ ++- if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) { ++- iobuf = fragbuf->frag_iob; ++- free_fragbuf ( fragbuf ); ++- return iobuf; ++- } ++- ++- } else { ++- /* Discard the fragment series */ ++- free_fragbuf ( fragbuf ); ++- free_iob ( iobuf ); ++- } ++- return NULL; +++ size_t offset = ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 ); +++ unsigned int more_frags = ( iphdr->frags & htons ( IP_MASK_MOREFRAGS )); +++ size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); +++ struct ipv4_fragment *frag; +++ size_t expected_offset; +++ struct io_buffer *new_iobuf; +++ +++ /* Find matching fragment reassembly buffer, if any */ +++ frag = ipv4_fragment ( iphdr ); +++ +++ /* Drop out-of-order fragments */ +++ expected_offset = ( frag ? frag->offset : 0 ); +++ if ( offset != expected_offset ) { +++ DBG ( "IPv4 dropping out-of-sequence fragment %04x (%zd+%zd, " +++ "expected %zd)\n", ntohs ( iphdr->ident ), offset, +++ ( iob_len ( iobuf ) - hdrlen ), expected_offset ); +++ goto drop; +++ } +++ +++ /* Create or extend fragment reassembly buffer as applicable */ +++ if ( frag == NULL ) { +++ +++ /* Create new fragment reassembly buffer */ +++ frag = zalloc ( sizeof ( *frag ) ); +++ if ( ! frag ) +++ goto drop; +++ list_add ( &frag->list, &ipv4_fragments ); +++ frag->iobuf = iobuf; +++ frag->offset = ( iob_len ( iobuf ) - hdrlen ); +++ timer_init ( &frag->timer, ipv4_fragment_expired ); +++ +++ } else { +++ +++ /* Extend reassembly buffer */ +++ iob_pull ( iobuf, hdrlen ); +++ new_iobuf = alloc_iob ( iob_len ( frag->iobuf ) + +++ iob_len ( iobuf ) ); +++ if ( ! new_iobuf ) { +++ DBG ( "IPv4 could not extend reassembly buffer to " +++ "%zd bytes\n", +++ ( iob_len ( frag->iobuf ) + iob_len ( iobuf ) ) ); +++ goto drop; ++ } ++- } ++- ++- /** Check if the fragment is the first in the fragment series */ ++- if ( iphdr->frags & IP_MASK_MOREFRAGS && ++- ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) { ++- ++- /** Create a new fragment buffer */ ++- fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) ); ++- fragbuf->ident = iphdr->ident; ++- fragbuf->src = iphdr->src; ++- ++- /* Set up the reassembly I/O buffer */ ++- fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE ); ++- iob_pull ( iobuf, sizeof ( *iphdr ) ); ++- memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ), +++ memcpy ( iob_put ( new_iobuf, iob_len ( frag->iobuf ) ), +++ frag->iobuf->data, iob_len ( frag->iobuf ) ); +++ memcpy ( iob_put ( new_iobuf, iob_len ( iobuf ) ), ++ iobuf->data, iob_len ( iobuf ) ); +++ free_iob ( frag->iobuf ); +++ frag->iobuf = new_iobuf; +++ frag->offset += iob_len ( iobuf ); ++ free_iob ( iobuf ); +++ iphdr = frag->iobuf->data; +++ iphdr->len = ntohs ( iob_len ( frag->iobuf ) ); ++ ++- /* Set the reassembly timer */ ++- fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT; ++- fragbuf->frag_timer.expired = ipv4_frag_expired; ++- start_timer ( &fragbuf->frag_timer ); +++ /* Stop fragment reassembly timer */ +++ stop_timer ( &frag->timer ); ++ ++- /* Add the fragment buffer to the list of fragment buffers */ ++- list_add ( &fragbuf->list, &frag_buffers ); +++ /* If this is the final fragment, return it */ +++ if ( ! more_frags ) { +++ iobuf = frag->iobuf; +++ list_del ( &frag->list ); +++ free ( frag ); +++ return iobuf; +++ } ++ } ++- +++ +++ /* (Re)start fragment reassembly timer */ +++ start_timer_fixed ( &frag->timer, IP_FRAG_TIMEOUT ); +++ +++ return NULL; +++ +++ drop: +++ free_iob ( iobuf ); ++ return NULL; ++ } ++ ++@@ -432,37 +458,38 @@ static int ipv4_rx ( struct io_buffer *i ++ goto err; ++ } ++ +++ /* Truncate packet to correct length */ +++ iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) ); +++ ++ /* Print IPv4 header for debugging */ ++ DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) ); ++ DBG ( "%s len %d proto %d id %04x csum %04x\n", ++ inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol, ++ ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) ); ++ ++- /* Truncate packet to correct length, calculate pseudo-header ++- * checksum and then strip off the IPv4 header. ++- */ ++- iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) ); ++- pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM ); ++- iob_pull ( iobuf, hdrlen ); ++- ++- /* Fragment reassembly */ ++- if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) || ++- ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) { ++- /* Pass the fragment to ipv4_reassemble() which either ++- * returns a fully reassembled I/O buffer or NULL. +++ /* Perform fragment reassembly if applicable */ +++ if ( iphdr->frags & htons ( IP_MASK_OFFSET | IP_MASK_MOREFRAGS ) ) { +++ /* Pass the fragment to ipv4_reassemble() which returns +++ * either a fully reassembled I/O buffer or NULL. ++ */ ++ iobuf = ipv4_reassemble ( iobuf ); ++ if ( ! iobuf ) ++ return 0; +++ iphdr = iobuf->data; +++ hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); ++ } ++ ++- /* Construct socket addresses and hand off to transport layer */ +++ /* Construct socket addresses, calculate pseudo-header +++ * checksum, and hand off to transport layer +++ */ ++ memset ( &src, 0, sizeof ( src ) ); ++ src.sin.sin_family = AF_INET; ++ src.sin.sin_addr = iphdr->src; ++ memset ( &dest, 0, sizeof ( dest ) ); ++ dest.sin.sin_family = AF_INET; ++ dest.sin.sin_addr = iphdr->dest; +++ pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM ); +++ iob_pull ( iobuf, hdrlen ); ++ if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st, ++ &dest.st, pshdr_csum ) ) != 0 ) { ++ DBG ( "IPv4 received packet rejected by stack: %s\n", +Index: xen-4.1.2-testing/tools/firmware/etherboot/patches/series +=================================================================== +--- xen-4.1.2-testing.orig/tools/firmware/etherboot/patches/series ++++ xen-4.1.2-testing/tools/firmware/etherboot/patches/series +@@ -2,3 +2,4 @@ boot_prompt_option.patch + gpxe-git-0edf2405b457 + gpxe-git-a803ef3dfeac + ipxe-git-f7c5918b179b ++ipxe-git-13186b64b6c3 diff --git a/multi-xvdp.patch b/multi-xvdp.patch index 2f1b485..3d9379d 100644 --- a/multi-xvdp.patch +++ b/multi-xvdp.patch @@ -18,7 +18,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py xc = xen.lowlevel.xc.xc() xoptions = XendOptions.instance() -@@ -3299,33 +3299,38 @@ class XendDomainInfo: +@@ -3300,33 +3300,38 @@ class XendDomainInfo: # This is a file, not a device. pygrub can cope with a # file if it's raw, but if it's QCOW or other such formats # used through blktap, then we need to mount it first. diff --git a/snapshot-xend.patch b/snapshot-xend.patch index 56d5ee7..dc77ccd 100644 --- a/snapshot-xend.patch +++ b/snapshot-xend.patch @@ -699,7 +699,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py self._endRestore() except: log.exception('VM resume failed') -@@ -2369,7 +2367,7 @@ class XendDomainInfo: +@@ -2370,7 +2368,7 @@ class XendDomainInfo: return self.getDeviceController(deviceClass).reconfigureDevice( devid, devconfig) @@ -708,7 +708,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py """Create the devices for a vm. @raise: VmError for invalid devices -@@ -2418,7 +2416,7 @@ class XendDomainInfo: +@@ -2419,7 +2417,7 @@ class XendDomainInfo: if self.image: @@ -717,7 +717,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py #if have pass-through devs, need the virtual pci slots info from qemu self.pci_device_configure_boot() -@@ -3044,7 +3042,7 @@ class XendDomainInfo: +@@ -3045,7 +3043,7 @@ class XendDomainInfo: self._introduceDomain() self.image = image.create(self, self.info) if self.image: diff --git a/tools-gdbserver-build.diff b/tools-gdbserver-build.diff deleted file mode 100644 index 70b58d3..0000000 --- a/tools-gdbserver-build.diff +++ /dev/null @@ -1,22 +0,0 @@ -Index: xen-4.0.2-testing/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in -=================================================================== ---- xen-4.0.2-testing.orig/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in -+++ xen-4.0.2-testing/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in -@@ -90,7 +90,7 @@ INCLUDE_CFLAGS = -I. -I${srcdir} -I$(src - GLOBAL_CFLAGS = ${MT_CFLAGS} ${MH_CFLAGS} - #PROFILE_CFLAGS = -pg - --WARN_CFLAGS = -Wall -+WARN_CFLAGS = -Wall -Wno-sequence-point - - # CFLAGS is specifically reserved for setting from the command line - # when running make. I.E. "make CFLAGS=-Wmissing-prototypes". -@@ -260,7 +260,7 @@ linux-low.o: linux-low.c $(linux_low_h) - $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@ - - linux-xen-low.o: linux-xen-low.c $(linux_low_h) $(server_h) -- $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@ -+ $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) -I../../../../../include/ $< @USE_THREAD_DB@ - - linux-arm-low.o: linux-arm-low.c $(linux_low_h) $(server_h) - linux-i386-low.o: linux-i386-low.c $(linux_low_h) $(server_h) diff --git a/x86-cpufreq-report.patch b/x86-cpufreq-report.patch index e429726..5b39f74 100644 --- a/x86-cpufreq-report.patch +++ b/x86-cpufreq-report.patch @@ -9,20 +9,18 @@ #include #include #include -@@ -63,6 +63,7 @@ long cpu_down_helper(void *data); - ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) - { - ret_t ret = 0; -+ struct vcpu *v; - struct xen_platform_op curop, *op = &curop; - - if ( !IS_PRIV(current->domain) ) -@@ -529,6 +530,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe +@@ -565,6 +565,42 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe op->u.mem_add.epfn, op->u.mem_add.pxm); break; + + case XENPF_get_cpu_freq: ++ case XENPF_get_cpu_freq_min: ++ case XENPF_get_cpu_freq_max: ++ { ++ struct vcpu *v; ++ const struct cpufreq_policy *policy; ++ + if ( op->u.get_cpu_freq.vcpu >= current->domain->max_vcpus || + !(v = current->domain->vcpu[op->u.get_cpu_freq.vcpu]) ) + { @@ -30,25 +28,39 @@ + break; + } + -+ op->u.get_cpu_freq.freq = per_cpu(cpufreq_cpu_policy, v->processor) -+ ? cpufreq_driver->get -+ ? cpufreq_driver->get(v->processor) -+ : per_cpu(cpufreq_cpu_policy, v->processor)->cur -+ : 0; ++ policy = per_cpu(cpufreq_cpu_policy, v->processor); ++ switch ( op->cmd & -!!policy ) ++ { ++ case XENPF_get_cpu_freq: ++ op->u.get_cpu_freq.freq = policy->cur; ++ break; ++ case XENPF_get_cpu_freq_min: ++ op->u.get_cpu_freq.freq = policy->min; ++ break; ++ case XENPF_get_cpu_freq_max: ++ op->u.get_cpu_freq.freq = policy->max; ++ break; ++ default: ++ op->u.get_cpu_freq.freq = 0; ++ break; ++ } + if ( copy_field_to_guest(u_xenpf_op, op, u.get_cpu_freq.freq) ) + ret = -EFAULT; -+ break; ++ } ++ break; + default: ret = -ENOSYS; break; --- a/xen/include/public/platform.h +++ b/xen/include/public/platform.h -@@ -451,6 +451,14 @@ struct xenpf_mem_hotadd +@@ -466,6 +466,16 @@ struct xenpf_mem_hotadd uint32_t flags; }; +#define XENPF_get_cpu_freq ('N' << 24) ++#define XENPF_get_cpu_freq_min (XENPF_get_cpu_freq + 1) ++#define XENPF_get_cpu_freq_max (XENPF_get_cpu_freq_min + 1) +struct xenpf_get_cpu_freq { + /* IN variables */ + uint32_t vcpu; @@ -59,7 +71,7 @@ struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ -@@ -471,6 +479,7 @@ struct xen_platform_op { +@@ -487,6 +497,7 @@ struct xen_platform_op { struct xenpf_cpu_ol cpu_ol; struct xenpf_cpu_hotadd cpu_add; struct xenpf_mem_hotadd mem_add; diff --git a/xen-config.diff b/xen-config.diff index 15f2866..85757cb 100644 --- a/xen-config.diff +++ b/xen-config.diff @@ -1,8 +1,6 @@ -Index: xen-4.1.2-testing/Config.mk -=================================================================== ---- xen-4.1.2-testing.orig/Config.mk -+++ xen-4.1.2-testing/Config.mk -@@ -177,7 +177,7 @@ endif +--- a/Config.mk ++++ b/Config.mk +@@ -178,7 +178,7 @@ endif # Specify which qemu-dm to use. This may be `ioemu' to use the old # Mercurial in-tree version, or a local directory, or a git URL. # CONFIG_QEMU ?= `pwd`/$(XEN_ROOT)/../qemu-xen.git @@ -11,7 +9,7 @@ Index: xen-4.1.2-testing/Config.mk QEMU_TAG := xen-4.1.2 #QEMU_TAG ?= e073e69457b4d99b6da0b6536296e3498f7f6599 -@@ -187,7 +187,7 @@ QEMU_TAG := xen-4.1.2 +@@ -188,7 +188,7 @@ QEMU_TAG := xen-4.1.2 # Optional components XENSTAT_XENTOP ?= y VTPM_TOOLS ?= n @@ -20,10 +18,8 @@ Index: xen-4.1.2-testing/Config.mk PYTHON_TOOLS ?= y OCAML_TOOLS ?= y CONFIG_MINITERM ?= n -Index: xen-4.1.2-testing/tools/Makefile -=================================================================== ---- xen-4.1.2-testing.orig/tools/Makefile -+++ xen-4.1.2-testing/tools/Makefile +--- a/tools/Makefile ++++ b/tools/Makefile @@ -79,14 +79,16 @@ IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TAR --interp-prefix=$(CROSS_SYS_ROOT) endif @@ -54,10 +50,8 @@ Index: xen-4.1.2-testing/tools/Makefile .PHONY: ioemu-dir-force-update ioemu-dir-force-update: -Index: xen-4.1.2-testing/tools/libxc/Makefile -=================================================================== ---- xen-4.1.2-testing.orig/tools/libxc/Makefile -+++ xen-4.1.2-testing/tools/libxc/Makefile +--- a/tools/libxc/Makefile ++++ b/tools/libxc/Makefile @@ -195,7 +195,7 @@ xc_dom_bzimageloader.opic: CFLAGS += $(c libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(call zlib-options,l) @@ -67,10 +61,8 @@ Index: xen-4.1.2-testing/tools/libxc/Makefile xenctrl_osdep_ENOSYS.so: $(OSDEP_PIC_OBJS) libxenctrl.so $(CC) -g $(CFLAGS) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $(OSDEP_PIC_OBJS) -lxenctrl -Index: xen-4.1.2-testing/tools/firmware/etherboot/Makefile -=================================================================== ---- xen-4.1.2-testing.orig/tools/firmware/etherboot/Makefile -+++ xen-4.1.2-testing/tools/firmware/etherboot/Makefile +--- a/tools/firmware/etherboot/Makefile ++++ b/tools/firmware/etherboot/Makefile @@ -35,11 +35,13 @@ eb-roms.h: Config mv -f $@.new $@ diff --git a/xen-warnings-unused.diff b/xen-warnings-unused.diff index 696a8e0..98e0256 100644 --- a/xen-warnings-unused.diff +++ b/xen-warnings-unused.diff @@ -250,17 +250,6 @@ u8 bus, slot, func; dev = entry->dev; ---- a/xen/arch/x86/microcode_amd.c -+++ b/xen/arch/x86/microcode_amd.c -@@ -150,7 +150,7 @@ static int apply_microcode(int cpu) - static int get_next_ucode_from_buffer_amd(void *mc, const void *buf, - size_t size, unsigned long *offset) - { -- struct microcode_header_amd *mc_header; -+ struct microcode_header_amd __attribute__((__unused__)) *mc_header; - size_t total_size; - const uint8_t *bufp = buf; - unsigned long off; --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -356,7 +356,7 @@ int cpupool_add_domain(struct domain *d, @@ -296,7 +285,7 @@ --- a/xen/common/kexec.c +++ b/xen/common/kexec.c -@@ -569,7 +569,8 @@ static int kexec_exec(XEN_GUEST_HANDLE(v +@@ -573,7 +573,8 @@ static int kexec_exec(XEN_GUEST_HANDLE(v { xen_kexec_exec_t exec; xen_kexec_image_t *image; @@ -374,7 +363,7 @@ unsigned long long value; --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c -@@ -2338,7 +2338,7 @@ p2m_remove_page(struct p2m_domain *p2m, +@@ -2339,7 +2339,7 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned int page_order) { unsigned long i; @@ -383,7 +372,7 @@ p2m_type_t t; p2m_access_t a; -@@ -2407,7 +2407,7 @@ guest_physmap_mark_populate_on_demand(st +@@ -2408,7 +2408,7 @@ guest_physmap_mark_populate_on_demand(st struct p2m_domain *p2m = p2m_get_hostp2m(d); unsigned long i; p2m_type_t ot; @@ -426,7 +415,7 @@ { --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c -@@ -275,7 +275,7 @@ static void acpi_processor_ffh_cstate_en +@@ -276,7 +276,7 @@ static void acpi_processor_ffh_cstate_en static void acpi_idle_do_entry(struct acpi_processor_cx *cx) { @@ -471,7 +460,7 @@ union hypercall_input { --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c -@@ -4906,7 +4906,7 @@ static int ptwr_emulated_update( +@@ -4914,7 +4914,7 @@ static int ptwr_emulated_update( { unsigned long mfn; unsigned long unaligned_addr = addr; @@ -591,7 +580,7 @@ if ( tmh->persistent_pool == NULL ) --- a/xen/arch/x86/cpu/mcheck/vmce.c +++ b/xen/arch/x86/cpu/mcheck/vmce.c -@@ -574,7 +574,7 @@ int is_vmce_ready(struct mcinfo_bank *ba +@@ -571,7 +571,7 @@ int is_vmce_ready(struct mcinfo_bank *ba */ int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn) { @@ -634,7 +623,7 @@ case 3: /* x86_32p */ --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c -@@ -1854,7 +1854,11 @@ static int emulate_privileged_op(struct +@@ -1858,7 +1858,11 @@ static int emulate_privileged_op(struct struct vcpu *v = current; unsigned long *reg, eip = regs->eip; u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0; diff --git a/xen.changes b/xen.changes index fd7eb9a..6b89b52 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,140 @@ +------------------------------------------------------------------- +Tue Jan 3 08:26:42 MST 2012 - carnold@novell.com + +- bnc#735806 - VF doesn't work after hot-plug for many times + 24448-x86-pt-irq-leak.patch +- Upstream patches from Jan + 24261-x86-cpuidle-Westmere-EX.patch + 24417-amd-erratum-573.patch + 24429-mceinj-tool.patch + 24447-x86-TXT-INIT-SIPI-delay.patch + ioemu-9868-MSI-X.patch + +------------------------------------------------------------------- +Mon Jan 2 10:05:57 CET 2012 - ohering@suse.de + +- bnc#732884 - remove private runlevel 4 from init scripts + xen.no-default-runlevel-4.patch + +------------------------------------------------------------------- +Mon Dec 19 15:22:13 MST 2011 - carnold@novell.com + +- bnc#727515 - Fragmented packets hang network boot of HVM guest + ipxe-gcc45-warnings.patch + ipxe-ipv4-fragment.patch + ipxe-enable-nics.patch + +------------------------------------------------------------------- +Mon Dec 19 12:43:11 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + update xenpaging.autostart.patch, make changes with mem-swap-target + permanent + update xenpaging.doc.patch, mention issues with live migration + +------------------------------------------------------------------- +Thu Dec 15 17:53:51 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + add xenpaging.evict_mmap_readonly.patch + update xenpaging.error-handling.patch, reduce debug output + +------------------------------------------------------------------- +Thu Dec 15 08:35:27 MST 2011 - carnold@novell.com + +- bnc#736824 - Microcode patches for AMD's 15h processors panic the + system + 24189-x86-p2m-pod-locking.patch + 24412-x86-AMD-errata-model-shift.patch + 24411-x86-ucode-AMD-Fam15.patch + +------------------------------------------------------------------- +Wed Dec 14 10:08:24 MST 2011 - carnold@novell.com + +- bnc#711219 - SR-IOV VF doesn't work in SLES11 sp2 guest + 24357-firmware-no-_PS0-_PS3.patch +- Upstream patches from Jan + 24153-x86-emul-feature-checks.patch + 24275-x86-emul-lzcnt.patch + 24277-x86-dom0-features.patch + 24278-x86-dom0-no-PCID.patch + 24282-x86-log-dirty-bitmap-leak.patch + 24359-x86-domU-features.patch + 24360-x86-pv-domU-no-PCID.patch + 24389-amd-fam10-gart-tlb-walk-err.patch + 24391-x86-pcpu-version.patch + +------------------------------------------------------------------- +Thu Dec 8 14:19:49 CET 2011 - ohering@suse.de + +- bnc#729208 - xenpaging=-1 doesn't work + xenpaging.doc.patch + +------------------------------------------------------------------- +Thu Dec 8 08:41:36 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + readd xenpaging.qemu.flush-cache.patch + +------------------------------------------------------------------- +Wed Dec 7 11:01:43 MST 2011 - jfehlig@suse.com + +- bnc#732782 - L3: xm create hangs when maxmen value is enclosed + in "quotes" + xm-create-maxmem.patch + +------------------------------------------------------------------- +Wed Dec 7 10:44:06 MST 2011 - carnold@novell.com + +- Upstream patches / changes from Jan + Added 24358-kexec-compat-overflow.patch + Removed 24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch + Removed 24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch + +------------------------------------------------------------------- +Wed Dec 7 16:42:44 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch + Use wait queues for paging, improve foreign mappings. + xenpaging.versioned-interface.patch + xenpaging.mmap-before-nominate.patch + xenpaging.p2m_is_paged.patch + xenpaging.evict_fail_fast_forward.patch + xenpaging.error-handling.patch + xenpaging.mem_event-use-wait_queue.patch + xenpaging.waitqueue-paging.patch + Remove obsolete patch, not needed with wait queue usage + xenpaging.HVMCOPY_gfn_paged_out.patch + +------------------------------------------------------------------- +Wed Dec 7 16:23:49 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + Fix incorrect backport, remove double memset, use xzalloc + 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch + +------------------------------------------------------------------- +Wed Dec 7 12:08:31 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + fix typo in nominate, use lock instead of double unlock + 23905-xenpaging_fix_locking_in_p2m_mem_paging_functions.patch + +------------------------------------------------------------------- +Wed Dec 7 11:07:23 CET 2011 - ohering@suse.de + +- fate#310510 - fix xenpaging + 24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch + 24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch + 24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch + +------------------------------------------------------------------- +Tue Dec 6 11:14:51 MST 2011 - jfehlig@suse.com + +- bnc#734826 - xm rename doesn't work anymore + Updated xend-migration-domname-fix.patch + ------------------------------------------------------------------- Fri Dec 2 20:35:29 CET 2011 - ohering@suse.de diff --git a/xen.no-default-runlevel-4.patch b/xen.no-default-runlevel-4.patch new file mode 100644 index 0000000..2df4a60 --- /dev/null +++ b/xen.no-default-runlevel-4.patch @@ -0,0 +1,77 @@ +Related to bnc#732884 +Runlevel 4 is for local sysadmin. +He is responsible to create all required symlinks in this private runlevel. + +--- + tools/hotplug/Linux/init.d/xen-watchdog | 2 +- + tools/hotplug/Linux/init.d/xencommons | 2 +- + tools/hotplug/Linux/init.d/xend | 2 +- + tools/hotplug/Linux/init.d/xendomains | 2 +- + tools/xenballoon/xenballoond.init | 2 +- + 5 files changed, 5 insertions(+), 5 deletions(-) + +Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xen-watchdog +=================================================================== +--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xen-watchdog ++++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xen-watchdog +@@ -10,7 +10,7 @@ + # Should-Start: xend + # Required-Stop: $syslog $remote_fs + # Should-Stop: xend +-# Default-Start: 2 3 4 5 ++# Default-Start: 2 3 5 + # Default-Stop: 0 1 6 + # Short-Description: Start/stop xen-watchdog + # Description: Run domain watchdog daemon. +Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xencommons +=================================================================== +--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xencommons ++++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xencommons +@@ -12,7 +12,7 @@ + # Should-Start: + # Required-Stop: $syslog $remote_fs + # Should-Stop: +-# Default-Start: 2 3 4 5 ++# Default-Start: 2 3 5 + # Default-Stop: 0 1 6 + # Short-Description: Start/stop xenstored and xenconsoled + # Description: Starts and stops the daemons neeeded for xl/xend +Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xend +=================================================================== +--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xend ++++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xend +@@ -12,7 +12,7 @@ + # Should-Start: + # Required-Stop: $syslog $remote_fs xenstored xenconsoled + # Should-Stop: +-# Default-Start: 2 3 4 5 ++# Default-Start: 2 3 5 + # Default-Stop: 0 1 6 + # Short-Description: Start/stop xend + # Description: Starts and stops the Xen control daemon. +Index: xen-4.1.2-testing/tools/hotplug/Linux/init.d/xendomains +=================================================================== +--- xen-4.1.2-testing.orig/tools/hotplug/Linux/init.d/xendomains ++++ xen-4.1.2-testing/tools/hotplug/Linux/init.d/xendomains +@@ -20,7 +20,7 @@ + # Should-Start: xend + # Required-Stop: $syslog $remote_fs xenstored xenconsoled + # Should-Stop: xend +-# Default-Start: 2 3 4 5 ++# Default-Start: 2 3 5 + # Default-Stop: 0 1 6 + # Short-Description: Start/stop secondary xen domains + # Description: Start / stop domains automatically when domain 0 +Index: xen-4.1.2-testing/tools/xenballoon/xenballoond.init +=================================================================== +--- xen-4.1.2-testing.orig/tools/xenballoon/xenballoond.init ++++ xen-4.1.2-testing/tools/xenballoon/xenballoond.init +@@ -14,7 +14,7 @@ + # Should-Start: + # Required-Stop: $syslog $remote_fs + # Should-Stop: +-# Default-Start: 3 4 5 ++# Default-Start: 3 5 + # Default-Stop: 0 1 2 6 + # Short-Description: Start/stop xenballoond + # Description: Starts and stops the Xen ballooning daemon. diff --git a/xen.spec b/xen.spec index 11298fd..24643d6 100644 --- a/xen.spec +++ b/xen.spec @@ -1,7 +1,7 @@ # # spec file for package xen # -# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -15,9 +15,6 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # -# norootforbuild - - Name: xen ExclusiveArch: %ix86 x86_64 %define xvers 4.1 @@ -52,6 +49,7 @@ BuildRequires: curl-devel BuildRequires: dev86 BuildRequires: graphviz BuildRequires: latex2html +BuildRequires: libbz2-devel BuildRequires: libjpeg-devel BuildRequires: libxml2-devel BuildRequires: ncurses-devel @@ -61,7 +59,6 @@ BuildRequires: pciutils-devel BuildRequires: python-devel BuildRequires: texinfo BuildRequires: transfig -BuildRequires: libbz2-devel %if %suse_version >= 1120 BuildRequires: xz-devel %endif @@ -81,9 +78,11 @@ BuildRequires: tetex %ifarch x86_64 %if %{?with_gcc46}0 BuildRequires: gcc46 -BuildRequires: libgcc46 libgcc46-32bit +BuildRequires: libgcc46 +BuildRequires: libgcc46-32bit %endif -BuildRequires: glibc-32bit glibc-devel-32bit +BuildRequires: glibc-32bit +BuildRequires: glibc-devel-32bit BuildRequires: gcc-32bit BuildRequires: gcc43-32bit %define max_cpus 256 @@ -94,15 +93,17 @@ BuildRequires: gcc43-32bit %endif BuildRequires: glibc-devel %if %{?with_kmp}0 -BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 +BuildRequires: kernel-source +BuildRequires: kernel-syms +BuildRequires: module-init-tools +BuildRequires: xorg-x11 %endif -Version: 4.1.2_09 -Release: 1 -License: GPLv2+ -Group: System/Kernel -AutoReqProv: on +Version: 4.1.2_11 +Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) +License: GPL-2.0+ +Group: System/Kernel Source0: xen-4.1.2-testing-src.tar.bz2 Source1: stubdom.tar.bz2 Source2: xen-utils-0.1.tar.bz2 @@ -275,11 +276,14 @@ Patch24137: 24137-revert-23666.patch Patch24138: 24138-xenpaging_munmap_all_pages_after_page-in.patch Patch24144: 24144-cpufreq-turbo-crash.patch Patch24148: 24148-shadow-pgt-dying-op-performance.patch +Patch24153: 24153-x86-emul-feature-checks.patch Patch24155: 24155-x86-ioapic-EOI-after-migration.patch Patch24156: 24156-x86-ioapic-shared-vectors.patch Patch24157: 24157-x86-xstate-init.patch Patch24168: 24168-x86-vioapic-clear-remote_irr.patch Patch24171: 24171-x86waitqueue_Allocate_whole_page_for_shadow_stack..patch +Patch24178: 24178-debug_Add_domain-vcpu_pause_count_info_to_d_key..patch +Patch24189: 24189-x86-p2m-pod-locking.patch Patch24190: 24190-hap-log-dirty-disable-rc.patch Patch24193: 24193-hap-track-dirty-vram-rc.patch Patch24195: 24195-waitqueue_Detect_saved-stack_overflow_and_crash_the_guest..patch @@ -308,14 +312,33 @@ Patch24226: 24226-xenpaging_add_debug_to_show_received_watch_event..patch Patch24227: 24227-xenpaging_restrict_pagefile_permissions.patch Patch24231: 24231-waitqueue_Implement_wake_up_nroneall..patch Patch24232: 24232-waitqueue_Hold_a_reference_to_a_domain_on_a_waitqueue..patch +Patch24261: 24261-x86-cpuidle-Westmere-EX.patch Patch24269: 24269-mem_event_move_mem_event_domain_out_of_struct_domain.patch Patch24270: 24270-Free_d-mem_event_on_domain_destruction..patch Patch24272: 24272-xenpaging_Fix_c-s_235070a29c8c3ddf7_update_machine_to_phys_mapping_during_page_deallocation.patch +Patch24275: 24275-x86-emul-lzcnt.patch +Patch24277: 24277-x86-dom0-features.patch +Patch24278: 24278-x86-dom0-no-PCID.patch +Patch24282: 24282-x86-log-dirty-bitmap-leak.patch Patch24318: 24318-x86-mm_Fix_checks_during_foreign_mapping_of_paged_pages.patch -Patch24341: 24341-x86-64-mmcfg_remove___initdata_annotation_overlooked_in_23749e8d1c8f074ba.patch Patch24344: 24344-tools-x86_64_Fix_cpuid_inline_asm_to_not_clobber_stacks_red_zone.patch -Patch24345: 24345-tools-libxc_Fix_x86_32_build_breakage_in_previous_changeset..patch +Patch24327: 24327-After_preparing_a_page_for_page-in_allow_immediate_fill-in_of_the_page_contents.patch +Patch24328: 24328-Tools_Libxc_wrappers_to_automatically_fill_in_page_oud_page_contents_on_prepare.patch +Patch24329: 24329-Teach_xenpaging_to_use_the_new_and_non-racy_xc_mem_paging_load_interface.patch +Patch24357: 24357-firmware-no-_PS0-_PS3.patch +Patch24358: 24358-kexec-compat-overflow.patch +Patch24359: 24359-x86-domU-features.patch +Patch24360: 24360-x86-pv-domU-no-PCID.patch +Patch24389: 24389-amd-fam10-gart-tlb-walk-err.patch +Patch24391: 24391-x86-pcpu-version.patch +Patch24411: 24411-x86-ucode-AMD-Fam15.patch +Patch24412: 24412-x86-AMD-errata-model-shift.patch +Patch24417: 24417-amd-erratum-573.patch +Patch24429: 24429-mceinj-tool.patch +Patch24447: 24447-x86-TXT-INIT-SIPI-delay.patch +Patch24448: 24448-x86-pt-irq-leak.patch # Upstream qemu patches +Patch100: ioemu-9868-MSI-X.patch # Our patches Patch300: xen-config.diff Patch301: xend-config.diff @@ -355,12 +378,11 @@ Patch351: xend-core-dump-loc.diff Patch352: blktap.patch Patch353: xen-qemu-iscsi-fix.patch Patch354: xen-api-auth.patch -Patch355: tools-gdbserver-build.diff -Patch356: ioemu-vnc-resize.patch -Patch357: ioemu-debuginfo.patch -Patch358: vif-bridge-no-iptables.patch -Patch359: xenconsole-no-multiple-connections.patch -Patch360: disable-xl-when-using-xend.patch +Patch355: ioemu-vnc-resize.patch +Patch356: ioemu-debuginfo.patch +Patch357: vif-bridge-no-iptables.patch +Patch358: xenconsole-no-multiple-connections.patch +Patch359: disable-xl-when-using-xend.patch # Needs to go upstream Patch370: checkpoint-rename.patch Patch371: xm-save-check-file.patch @@ -372,8 +394,9 @@ Patch376: xend-devid-or-name.patch Patch377: suspend_evtchn_lock.patch Patch378: log-guest-console.patch Patch379: xend-migration-domname-fix.patch +Patch380: xm-create-maxmem.patch # Sent upstream and tentatively ACK'ed, but not yet committed -Patch380: 2XXXX-vif-bridge.patch +Patch381: 2XXXX-vif-bridge.patch # Patches for snapshot support Patch400: snapshot-ioemu-save.patch Patch401: snapshot-ioemu-restore.patch @@ -423,6 +446,9 @@ Patch456: xend-vcpu-affinity-fix.patch Patch457: xenstored.XS_RESET_WATCHES.patch Patch458: xen-cpupool-xl-config-format.patch Patch459: xl-create-pv-with-qcow2-img.patch +Patch460: ipxe-gcc45-warnings.patch +Patch461: ipxe-ipv4-fragment.patch +Patch462: ipxe-enable-nics.patch # Jim's domain lock patch Patch480: xend-domain-lock.patch Patch481: xend-domain-lock-sfex.patch @@ -444,10 +470,20 @@ Patch650: disable_emulated_device.diff Patch651: ioemu-disable-scsi.patch Patch652: ioemu-disable-emulated-ide-if-pv.patch Patch700: hv_extid_compatibility.patch +Patch701: xen.no-default-runlevel-4.patch # FATE 310510 +Patch1100: xenpaging.versioned-interface.patch +Patch1101: xenpaging.mmap-before-nominate.patch +Patch1102: xenpaging.p2m_is_paged.patch +Patch1103: xenpaging.evict_fail_fast_forward.patch +Patch1104: xenpaging.error-handling.patch +Patch1105: xenpaging.mem_event-use-wait_queue.patch +Patch1106: xenpaging.waitqueue-paging.patch +Patch1107: xenpaging.evict_mmap_readonly.patch Patch1126: xenpaging.guest-memusage.patch Patch1129: xenpaging.autostart.patch -Patch1130: xenpaging.HVMCOPY_gfn_paged_out.patch +Patch1130: xenpaging.doc.patch +Patch1142: xenpaging.qemu.flush-cache.patch # xenalyze Patch20000: xenalyze.gcc46.patch # Build patch @@ -460,7 +496,6 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build %suse_kernel_module_package -n xen um xen -f kmp_filelist %endif - %description Xen is a virtual machine monitor for x86 that supports execution of multiple guest operating systems with unprecedented levels of @@ -516,12 +551,9 @@ Authors: ... %package libs -License: GPLv2+ Summary: Xen Virtualization: Libraries Group: System/Kernel #Requires: xen = %{version} -AutoReqProv: on - %description libs Xen is a virtual machine monitor for x86 that supports execution of @@ -568,9 +600,7 @@ Authors: %if %{?with_dom0_support}0 - %package tools -License: GPLv2+ Summary: Xen Virtualization: Control tools for domain 0 Group: System/Kernel Requires: xen-libs = %{version} @@ -578,8 +608,6 @@ Requires: bridge-utils multipath-tools python python-curses python-openssl # subpackage existed in 10.3 Provides: xen-tools-ioemu = 3.2 Obsoletes: xen-tools-ioemu <= 3.2 -AutoReqProv: on - %description tools Xen is a virtual machine monitor for x86 that supports execution of @@ -628,14 +656,10 @@ Authors: Ian Pratt %endif - %package tools-domU -License: GPLv2+ Summary: Xen Virtualization: Control tools for domain U Group: System/Kernel Conflicts: xen-tools -AutoReqProv: on - %description tools-domU Xen is a virtual machine monitor for x86 that supports execution of @@ -652,12 +676,10 @@ Authors: Ian Pratt %package devel -License: GPLv2+ Summary: Xen Virtualization: Headers and libraries for development Group: System/Kernel Requires: xen-libs = %{version} - %description devel Xen is a virtual machine monitor for x86 that supports execution of multiple guest operating systems with unprecedented levels of @@ -703,14 +725,11 @@ Authors: %if %{?with_kmp}0 - %package KMP -License: GPLv2+ -Group: System/Kernel Summary: Xen para-virtual device drivers for fully virtualized guests +Group: System/Kernel Conflicts: xen - %description KMP Xen para-virtual device drivers for fully virtualized guests @@ -756,13 +775,10 @@ Xen, but is not available for release due to license restrictions. %if %{?with_dom0_support}0 - %package doc-html -License: GPLv2+ Summary: Xen Virtualization: HTML documentation Group: Documentation/HTML - %description doc-html Xen is a virtual machine monitor for x86 that supports execution of multiple guest operating systems with unprecedented levels of @@ -778,11 +794,9 @@ Authors: Ian Pratt %package doc-pdf -License: GPLv2+ Summary: Xen Virtualization: PDF documentation Group: Documentation/Other - %description doc-pdf Xen is a virtual machine monitor for x86 that supports execution of multiple guest operating systems with unprecedented levels of @@ -799,7 +813,6 @@ Authors: Ian Pratt %endif - %prep %setup -q -n %xen_build_dir -a 1 -a 20000 %patch20000 -p1 @@ -938,11 +951,14 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch24138 -p1 %patch24144 -p1 %patch24148 -p1 +%patch24153 -p1 %patch24155 -p1 %patch24156 -p1 %patch24157 -p1 %patch24168 -p1 %patch24171 -p1 +%patch24178 -p1 +%patch24189 -p1 %patch24190 -p1 %patch24193 -p1 %patch24195 -p1 @@ -971,14 +987,34 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch24227 -p1 %patch24231 -p1 %patch24232 -p1 +%patch24261 -p1 %patch24269 -p1 %patch24270 -p1 %patch24272 -p1 +%patch24275 -p1 +%patch24277 -p1 +%patch24278 -p1 +%patch24282 -p1 %patch24318 -p1 -%patch24341 -p1 %patch24344 -p1 -%patch24345 -p1 -# Upstream patches +%patch24327 -p1 +%patch24328 -p1 +%patch24329 -p1 +%patch24357 -p1 +%patch24358 -p1 +%patch24359 -p1 +%patch24360 -p1 +%patch24389 -p1 +%patch24391 -p1 +%patch24411 -p1 +%patch24412 -p1 +%patch24417 -p1 +%patch24429 -p1 +%patch24447 -p1 +%patch24448 -p1 +# Qemu +%patch100 -p1 +# Our patches %patch300 -p1 %patch301 -p1 %patch302 -p1 @@ -1017,12 +1053,11 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch352 -p1 %patch353 -p1 %patch354 -p1 -###%patch355 -p1 gdbserver +%patch355 -p1 %patch356 -p1 %patch357 -p1 %patch358 -p1 %patch359 -p1 -%patch360 -p1 %patch370 -p1 %patch371 -p1 %patch372 -p1 @@ -1034,6 +1069,7 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch378 -p1 %patch379 -p1 %patch380 -p1 +%patch381 -p1 %patch400 -p1 %patch401 -p1 %patch402 -p1 @@ -1080,6 +1116,9 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch457 -p1 %patch458 -p1 %patch459 -p1 +%patch460 -p1 +%patch461 -p1 +%patch462 -p1 %patch480 -p1 %patch481 -p1 %patch500 -p1 @@ -1099,15 +1138,24 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch651 -p1 %patch652 -p1 %patch700 -p1 +%patch701 -p1 # FATE 310510 +%patch1100 -p1 +%patch1101 -p1 +%patch1102 -p1 +%patch1103 -p1 +%patch1104 -p1 +%patch1105 -p1 +%patch1106 -p1 +%patch1107 -p1 %patch1126 -p1 %patch1129 -p1 %patch1130 -p1 +%patch1142 -p1 # %patch99998 -p1 %patch99999 -p1 - %build XEN_EXTRAVERSION=%version-%release XEN_EXTRAVERSION=${XEN_EXTRAVERSION#%{xvers}} @@ -1143,7 +1191,6 @@ for flavor in %flavors_to_build; do done %endif - %install export CFLAGS="$RPM_OPT_FLAGS" %if %{?with_dom0_support}0 @@ -1346,7 +1393,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons %if %{?with_dom0_support}0 - %files -f xen.files.txt %defattr(-,root,root) /boot/xen-%{version}-%{release}.gz @@ -1363,7 +1409,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons /boot/xen.gz %endif - %files libs %defattr(-,root,root) %{_libdir}/fs/ @@ -1371,7 +1416,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons %if %{?with_dom0_support}0 - %files tools %defattr(-,root,root) /usr/bin/xenalyze @@ -1475,14 +1519,12 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons %config %{_fwdefdir}/xend-relocation-server %endif - %files tools-domU %defattr(-,root,root) /usr/bin/xen-detect /bin/domu-xenstore /bin/xenstore-* - %files devel %defattr(-,root,root) %{_bindir}/serial-split @@ -1492,12 +1534,10 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons %if %{?with_dom0_support}0 - %files doc-html %defattr(-,root,root) %{_defaultdocdir}/xen/html - %files doc-pdf %defattr(-,root,root) %{_defaultdocdir}/xen/pdf @@ -1505,7 +1545,6 @@ rm -f $RPM_BUILD_ROOT/%{_bindir}/xencons %if %{?with_dom0_support}0 - %post tools %if %{?with_xend}0 # with_xend @@ -1551,11 +1590,9 @@ if [ -f /usr/bin/qemu-nbd ]; then ln -s /usr/bin/qemu-nbd /usr/bin/qemu-nbd-xen fi - %preun tools %{stop_on_removal xendomains xend xencommons} - %postun tools %if %{?with_xend}0 # with_xend @@ -1570,12 +1607,8 @@ if [ -f /usr/bin/qemu-nbd-xen ]; then fi %endif - %post libs -p /sbin/ldconfig - %postun libs -p /sbin/ldconfig - - %changelog diff --git a/xend-console-port-restore.patch b/xend-console-port-restore.patch index e418684..718ab74 100644 --- a/xend-console-port-restore.patch +++ b/xend-console-port-restore.patch @@ -21,7 +21,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -3053,7 +3053,7 @@ class XendDomainInfo: +@@ -3054,7 +3054,7 @@ class XendDomainInfo: # TODO: recategorise - called from XendCheckpoint # @@ -30,7 +30,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py log.debug("XendDomainInfo.completeRestore") -@@ -3064,6 +3064,7 @@ class XendDomainInfo: +@@ -3065,6 +3065,7 @@ class XendDomainInfo: self.image = image.create(self, self.info) if self.image: self._createDevices(True) diff --git a/xend-domain-lock-sfex.patch b/xend-domain-lock-sfex.patch index ad54d70..49b7c73 100644 --- a/xend-domain-lock-sfex.patch +++ b/xend-domain-lock-sfex.patch @@ -223,7 +223,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -4517,8 +4517,14 @@ class XendDomainInfo: +@@ -4518,8 +4518,14 @@ class XendDomainInfo: # Return name of host contained in lock file. def get_lock_host(self, path): @@ -240,7 +240,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py hostname = "unknown" try: -@@ -4540,6 +4546,16 @@ class XendDomainInfo: +@@ -4541,6 +4547,16 @@ class XendDomainInfo: path = xoptions.get_xend_domain_lock_path() path = os.path.join(path, self.get_uuid()) @@ -257,7 +257,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py try: if not os.path.exists(path): mkdir.parents(path, stat.S_IRWXU) -@@ -4547,12 +4563,7 @@ class XendDomainInfo: +@@ -4548,12 +4564,7 @@ class XendDomainInfo: log.exception("%s could not be created." % path) raise XendError("%s could not be created." % path) @@ -271,7 +271,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py if status != 0: log.debug("Failed to aqcuire lock: status = %d" % status) raise XendError("The VM is locked and appears to be running on host %s." % self.get_lock_host(path)) -@@ -4569,12 +4580,18 @@ class XendDomainInfo: +@@ -4570,12 +4581,18 @@ class XendDomainInfo: path = xoptions.get_xend_domain_lock_path() path = os.path.join(path, self.get_uuid()) diff --git a/xend-domain-lock.patch b/xend-domain-lock.patch index cd92b2e..e2f6c09 100644 --- a/xend-domain-lock.patch +++ b/xend-domain-lock.patch @@ -257,7 +257,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py XendTask.log_progress(0, 30, self._constructDomain) XendTask.log_progress(31, 60, self._initDomain) -@@ -3001,6 +3002,11 @@ class XendDomainInfo: +@@ -3002,6 +3003,11 @@ class XendDomainInfo: self._stateSet(DOM_STATE_HALTED) self.domid = None # Do not push into _stateSet()! @@ -269,7 +269,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py finally: self.refresh_shutdown_lock.release() -@@ -4509,6 +4515,74 @@ class XendDomainInfo: +@@ -4510,6 +4516,74 @@ class XendDomainInfo: def has_device(self, dev_class, dev_uuid): return (dev_uuid in self.info['%s_refs' % dev_class.lower()]) diff --git a/xend-migration-domname-fix.patch b/xend-migration-domname-fix.patch index cbcf6db..f765e47 100644 --- a/xend-migration-domname-fix.patch +++ b/xend-migration-domname-fix.patch @@ -8,11 +8,12 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -1946,6 +1946,7 @@ class XendDomainInfo: +@@ -1946,6 +1946,8 @@ class XendDomainInfo: self.info['name_label'] = name if to_store: self.storeVm("name", name) -+ self.storeDom("name", name) ++ if self.dompath: ++ self.storeDom("name", name) def getName(self): return self.info['name_label'] diff --git a/xend-vcpu-affinity-fix.patch b/xend-vcpu-affinity-fix.patch index 6fc5c8b..3b57af0 100644 --- a/xend-vcpu-affinity-fix.patch +++ b/xend-vcpu-affinity-fix.patch @@ -2,7 +2,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -2776,7 +2776,10 @@ class XendDomainInfo: +@@ -2777,7 +2777,10 @@ class XendDomainInfo: from xen.xend import XendDomain doms = XendDomain.instance().list('all') for dom in filter (lambda d: d.domid != self.domid, doms): diff --git a/xenpaging.HVMCOPY_gfn_paged_out.patch b/xenpaging.HVMCOPY_gfn_paged_out.patch deleted file mode 100644 index c17655e..0000000 --- a/xenpaging.HVMCOPY_gfn_paged_out.patch +++ /dev/null @@ -1,151 +0,0 @@ - -xenpaging: handle HVMCOPY_gfn_paged_out in copy_from/to_user - -copy_from_user_hvm can fail when __hvm_copy returns -HVMCOPY_gfn_paged_out for a referenced gfn, for example during guests -pagetable walk. This has to be handled in some way. - -For the time being, return -EAGAIN for the most common case (xen_balloon -driver crashing in guest) until the recently added waitqueues will be -used. - -Signed-off-by: Olaf Hering - ---- - xen/arch/x86/hvm/hvm.c | 4 ++++ - xen/common/memory.c | 39 ++++++++++++++++++++++++++++++++++----- - 2 files changed, 38 insertions(+), 5 deletions(-) - -Index: xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/arch/x86/hvm/hvm.c -+++ xen-4.1.2-testing/xen/arch/x86/hvm/hvm.c -@@ -2247,6 +2247,8 @@ unsigned long copy_to_user_hvm(void *to, - - rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from, - len, 0); -+ if ( unlikely(rc == HVMCOPY_gfn_paged_out) ) -+ return -EAGAIN; - return rc ? len : 0; /* fake a copy_to_user() return code */ - } - -@@ -2264,6 +2266,8 @@ unsigned long copy_from_user_hvm(void *t - #endif - - rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0); -+ if ( unlikely(rc == HVMCOPY_gfn_paged_out) ) -+ return -EAGAIN; - return rc ? len : 0; /* fake a copy_from_user() return code */ - } - -Index: xen-4.1.2-testing/xen/common/memory.c -=================================================================== ---- xen-4.1.2-testing.orig/xen/common/memory.c -+++ xen-4.1.2-testing/xen/common/memory.c -@@ -48,6 +48,7 @@ static void increase_reservation(struct - { - struct page_info *page; - unsigned long i; -+ unsigned long ctg_ret; - xen_pfn_t mfn; - struct domain *d = a->domain; - -@@ -81,8 +82,13 @@ static void increase_reservation(struct - if ( !guest_handle_is_null(a->extent_list) ) - { - mfn = page_to_mfn(page); -- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) -+ ctg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1); -+ if ( unlikely(ctg_ret) ) -+ { -+ if ( (long)ctg_ret == -EAGAIN ) -+ a->preempted = 1; - goto out; -+ } - } - } - -@@ -94,6 +100,7 @@ static void populate_physmap(struct memo - { - struct page_info *page; - unsigned long i, j; -+ unsigned long cftg_ret; - xen_pfn_t gpfn, mfn; - struct domain *d = a->domain; - -@@ -112,8 +119,13 @@ static void populate_physmap(struct memo - goto out; - } - -- if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) -+ cftg_ret = __copy_from_guest_offset(&gpfn, a->extent_list, i, 1); -+ if ( unlikely(cftg_ret) ) -+ { -+ if ( (long)cftg_ret == -EAGAIN ) -+ a->preempted = 1; - goto out; -+ } - - if ( a->memflags & MEMF_populate_on_demand ) - { -@@ -143,8 +155,13 @@ static void populate_physmap(struct memo - set_gpfn_from_mfn(mfn + j, gpfn + j); - - /* Inform the domain of the new page's machine address. */ -- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) -+ cftg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1); -+ if ( unlikely(cftg_ret) ) -+ { -+ if ( (long)cftg_ret == -EAGAIN ) -+ a->preempted = 1; - goto out; -+ } - } - } - } -@@ -213,6 +230,7 @@ int guest_remove_page(struct domain *d, - static void decrease_reservation(struct memop_args *a) - { - unsigned long i, j; -+ unsigned long cfg_ret; - xen_pfn_t gmfn; - - if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, -@@ -227,8 +245,13 @@ static void decrease_reservation(struct - goto out; - } - -- if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) ) -+ cfg_ret = __copy_from_guest_offset(&gmfn, a->extent_list, i, 1); -+ if ( unlikely(cfg_ret) ) -+ { -+ if ( (long)cfg_ret == -EAGAIN ) -+ a->preempted = 1; - goto out; -+ } - - if ( tb_init_done ) - { -@@ -509,6 +532,7 @@ long do_memory_op(unsigned long cmd, XEN - int rc, op; - unsigned int address_bits; - unsigned long start_extent; -+ unsigned long cfg_ret; - struct xen_memory_reservation reservation; - struct memop_args args; - domid_t domid; -@@ -522,8 +546,13 @@ long do_memory_op(unsigned long cmd, XEN - case XENMEM_populate_physmap: - start_extent = cmd >> MEMOP_EXTENT_SHIFT; - -- if ( copy_from_guest(&reservation, arg, 1) ) -+ cfg_ret = copy_from_guest(&reservation, arg, 1); -+ if ( unlikely(cfg_ret) ) -+ { -+ if ( (long)cfg_ret == -EAGAIN ) -+ return hypercall_create_continuation(__HYPERVISOR_memory_op, "lh", cmd, arg); - return start_extent; -+ } - - /* Is size too large for us to encode a continuation? */ - if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) ) diff --git a/xenpaging.autostart.patch b/xenpaging.autostart.patch index e516ef1..58a6880 100644 --- a/xenpaging.autostart.patch +++ b/xenpaging.autostart.patch @@ -38,12 +38,12 @@ v2: tools/python/README.sxpcfg | 3 + tools/python/xen/xend/XendConfig.py | 9 +++ tools/python/xen/xend/XendDomain.py | 15 +++++ - tools/python/xen/xend/XendDomainInfo.py | 22 ++++++++ + tools/python/xen/xend/XendDomainInfo.py | 23 ++++++++ tools/python/xen/xend/image.py | 85 ++++++++++++++++++++++++++++++++ tools/python/xen/xm/create.py | 15 +++++ tools/python/xen/xm/main.py | 14 +++++ tools/python/xen/xm/xenapi_create.py | 3 + - 10 files changed, 178 insertions(+) + 10 files changed, 179 insertions(+) Index: xen-4.1.2-testing/tools/examples/xmexample.hvm =================================================================== @@ -150,7 +150,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.1.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -1503,6 +1503,16 @@ class XendDomainInfo: +@@ -1503,6 +1503,17 @@ class XendDomainInfo: break xen.xend.XendDomain.instance().managed_config_save(self) @@ -163,11 +163,12 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py + + if self.domid > 0: + self.storeDom("memory/target-tot_pages", target * 1024) ++ self.info['platform']['actmem'] = str(target) + def setMemoryTarget(self, target): """Set the memory target of this domain. @param target: In MiB. -@@ -2291,6 +2301,8 @@ class XendDomainInfo: +@@ -2292,6 +2303,8 @@ class XendDomainInfo: self.info['name_label'], self.domid, self.info['uuid'], new_name, new_uuid) self._unwatchVm() @@ -176,7 +177,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py self._releaseDevices() # Remove existing vm node in xenstore self._removeVm() -@@ -2965,6 +2977,9 @@ class XendDomainInfo: +@@ -2966,6 +2979,9 @@ class XendDomainInfo: self._createDevices() @@ -186,7 +187,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py self.image.cleanupTmpImages() self.info['start_time'] = time.time() -@@ -2989,6 +3004,8 @@ class XendDomainInfo: +@@ -2990,6 +3006,8 @@ class XendDomainInfo: self.refresh_shutdown_lock.acquire() try: self.unwatchShutdown() @@ -195,7 +196,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py self._releaseDevices() bootloader_tidy(self) -@@ -3073,6 +3090,7 @@ class XendDomainInfo: +@@ -3074,6 +3092,7 @@ class XendDomainInfo: self.image = image.create(self, self.info) if self.image: self._createDevices(True) @@ -203,7 +204,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py self.console_port = console_port self._storeDomDetails() self._registerWatches() -@@ -3214,6 +3232,8 @@ class XendDomainInfo: +@@ -3215,6 +3234,8 @@ class XendDomainInfo: # could also fetch a parsed note from xenstore fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0 if not fast: @@ -212,7 +213,7 @@ Index: xen-4.1.2-testing/tools/python/xen/xend/XendDomainInfo.py self._releaseDevices() self.testDeviceComplete() self.testvifsComplete() -@@ -3229,6 +3249,8 @@ class XendDomainInfo: +@@ -3230,6 +3251,8 @@ class XendDomainInfo: self._storeDomDetails() self._createDevices() diff --git a/xenpaging.doc.patch b/xenpaging.doc.patch new file mode 100644 index 0000000..e304325 --- /dev/null +++ b/xenpaging.doc.patch @@ -0,0 +1,92 @@ +--- + docs/misc/xenpaging.txt | 66 +++++++++++++++++++++++++++++------------------- + 1 file changed, 40 insertions(+), 26 deletions(-) + +Index: xen-4.1.2-testing/docs/misc/xenpaging.txt +=================================================================== +--- xen-4.1.2-testing.orig/docs/misc/xenpaging.txt ++++ xen-4.1.2-testing/docs/misc/xenpaging.txt +@@ -1,8 +1,6 @@ + Warning: + + The xenpaging code is new and not fully debugged. +-Usage of xenpaging can crash Xen or cause severe data corruption in the +-guest memory and its filesystems! + + Description: + +@@ -14,34 +12,50 @@ than physically available on the host. + + Usage: + +-Once the guest is running, run xenpaging with the guest_id and the +-number of pages to page-out: ++To enable xenpaging for a guest add the option 'actmem=' to the guests ++config file and run 'xm new ' to make the changes ++active. actmem= takes the amount of memory in MB which a guest is ++allowed to use at a given time. Everything above this limit will be ++paged out. This paging is transparent to the guest. ++ ++Example: ++ memory=4096 ++ actmem=1024 ++In this example a guest gets the impression it has 4GB of memory and ++the guest OS has to configure itself for this amount of memory. But ++xenpaging will page-out 3072MB, leaving only 1024MB active at a time. ++ ++At runtime the configured value of actmem= can be changed with the "xm ++mem-swap-target" command. ++ xm mem-swap-target 512 ++ ++Additional cmdline options for the xenpaging binary can be specified ++with the xenpaging_extra= config file option: ++ ++ xenpaging_extra=[ '-f', '/dev/shm/pagefile-guest_name', '-v' ] ++ ++To get a list of available options, run /usr/lib/xen/bin/xenpaging -h: ++ ++ xenpaging [options] -f -d ++ ++options: ++ -d --domain= numerical domain_id of guest. This option is required. ++ -f --pagefile= pagefile to use. This option is required. ++ -m --max_memkb= maximum amount of memory to handle. ++ -r --mru_size= number of paged-in pages to keep in memory. ++ -v --verbose enable debug output. ++ -h --help this output. ++ ++ ++Caveats: ++Live migration with a paged guest does currently not work, the guest ++will crash once it starts on the target host. As a workaround stop ++paging before starting the migration: + +- chdir /var/lib/xen/xenpaging +- xenpaging +- +-To obtain the guest_id, run 'xm list'. +-xenpaging will write the pagefile to the current directory. +-Example with 128MB pagefile on guest 1: +- +- xenpaging 1 32768 +- +-Caution: stopping xenpaging manually will cause the guest to stall or +-crash because the paged-out memory is not written back into the guest! +- +-After a reboot of a guest, its guest_id changes, the current xenpaging +-binary has no target anymore. To automate restarting of xenpaging after +-guest reboot, specify the number if pages in the guest configuration +-file /etc/xen/vm/: +- +-xenpaging=32768 +- +-Redo the guest with 'xm create /etc/xen/vm/' to activate the +-changes. ++xm mem-swap-target 0 && xm migrate -l + + + Todo: +-- implement stopping of xenpaging + - implement/test live migration + + diff --git a/xenpaging.error-handling.patch b/xenpaging.error-handling.patch new file mode 100644 index 0000000..0248cf9 --- /dev/null +++ b/xenpaging.error-handling.patch @@ -0,0 +1,183 @@ +# HG changeset patch +# Parent 5a299906312e606553e6dd2acbe44ab692722a75 +xenpaging: improve evict error handling + +Adjust return codes in Xen and handle errors in evict_victim() properly. + +p2m_mem_paging_nominate() returns -EAGAIN, p2m_mem_paging_evict() +returns -EBUSY. Other errors indicate guest failures, which +xenpaging_evict_page() can now catch correctly. Also write() failures +are fatal. + +Without this change, evict_victim() may spin forever if the guest is +killed because this function does not get a signal. + +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/xenpaging.c | 47 ++++++++++++++++++++++++++--------------- + xen/arch/x86/mm/p2m.c | 7 +----- + xen/include/public/mem_event.h | 2 - + 3 files changed, 33 insertions(+), 23 deletions(-) + +--- a/tools/xenpaging/xenpaging.c ++++ b/tools/xenpaging/xenpaging.c +@@ -569,29 +569,35 @@ static int xenpaging_evict_page(xenpagin + xc_interface *xch = paging->xc_handle; + void *page; + unsigned long gfn; +- int ret; ++ int ret = -1; + + DECLARE_DOMCTL; + + /* Map page to get a handle */ + gfn = victim->gfn; +- ret = -EFAULT; + page = xc_map_foreign_pages(xch, paging->mem_event.domain_id, + PROT_READ | PROT_WRITE, &gfn, 1); + if ( page == NULL ) + { +- PERROR("Error mapping page %lx", victim->gfn); ++ if ( errno == EINVAL ) ++ ret = 1; ++ else ++ PERROR("Error mapping page %lx", victim->gfn); + goto out; + } + + /* Nominate the page */ +- ret = xc_mem_paging_nominate(xch, paging->mem_event.domain_id, gfn); +- if ( ret != 0 ) ++ if ( xc_mem_paging_nominate(xch, paging->mem_event.domain_id, gfn) ) ++ { ++ if ( errno == EAGAIN ) ++ ret = 1; ++ else ++ PERROR("Error nominating page %lx", victim->gfn); + goto out; ++ } + + /* Copy page */ +- ret = write_page(fd, page, i); +- if ( ret != 0 ) ++ if ( write_page(fd, page, i) ) + { + PERROR("Error copying page %lx", victim->gfn); + goto out; +@@ -601,10 +607,10 @@ static int xenpaging_evict_page(xenpagin + page = NULL; + + /* Tell Xen to evict page */ +- ret = xc_mem_paging_evict(xch, paging->mem_event.domain_id, +- victim->gfn); +- if ( ret != 0 ) ++ if ( xc_mem_paging_evict(xch, paging->mem_event.domain_id, victim->gfn) ) + { ++ if ( errno == EBUSY ) ++ ret = 1; + PERROR("Error evicting page %lx", victim->gfn); + goto out; + } +@@ -616,6 +622,8 @@ static int xenpaging_evict_page(xenpagin + /* Record number of evicted pages */ + paging->num_paged_out++; + ++ ret = 0; ++ + out: + if (page) + munmap(page, PAGE_SIZE); +@@ -724,7 +732,7 @@ static int evict_victim(xenpaging_t *pag + xenpaging_victim_t *victim, int fd, int i) + { + xc_interface *xch = paging->xc_handle; +- int j = 0; ++ int flushed = 0; + int ret; + + do +@@ -732,9 +740,13 @@ static int evict_victim(xenpaging_t *pag + ret = policy_choose_victim(paging, victim); + if ( ret != 0 ) + { +- if ( ret != -ENOSPC ) +- ERROR("Error choosing victim"); +- goto out; ++ if ( !flushed ) { ++ DPRINTF("Flushing qemu cache\n"); ++ xenpaging_mem_paging_flush_ioemu_cache(paging); ++ flushed = 1; ++ continue; ++ } ++ goto out; + } + + if ( interrupted ) +@@ -742,11 +754,12 @@ static int evict_victim(xenpaging_t *pag + ret = -EINTR; + goto out; + } ++ + ret = xenpaging_evict_page(paging, victim, fd, i); +- if ( ret && j++ % 1000 == 0 ) ++ if ( ret < 0 ) + { +- if ( xenpaging_mem_paging_flush_ioemu_cache(paging) ) +- PERROR("Error flushing ioemu cache"); ++ ret = -EINTR; ++ goto out; + } + } + while ( ret ); +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -2863,19 +2863,17 @@ int p2m_mem_paging_nominate(struct p2m_d + p2m_type_t p2mt; + p2m_access_t a; + mfn_t mfn; +- int ret; ++ int ret = -EAGAIN; + + p2m_lock(p2m); + + mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query); + + /* Check if mfn is valid */ +- ret = -EINVAL; + if ( !mfn_valid(mfn) ) + goto out; + + /* Check p2m type */ +- ret = -EAGAIN; + if ( !p2m_is_pageable(p2mt) ) + goto out; + +@@ -2928,7 +2926,7 @@ int p2m_mem_paging_evict(struct p2m_doma + p2m_access_t a; + mfn_t mfn; + struct domain *d = p2m->domain; +- int ret = -EINVAL; ++ int ret = -EBUSY; + + p2m_lock(p2m); + +@@ -2941,7 +2939,6 @@ int p2m_mem_paging_evict(struct p2m_doma + if ( p2mt != p2m_ram_paging_out ) + goto out; + +- ret = -EBUSY; + /* Get the page so it doesn't get modified under Xen's feet */ + page = mfn_to_page(mfn); + if ( unlikely(!get_page(page, d)) ) +--- a/xen/include/public/mem_event.h ++++ b/xen/include/public/mem_event.h +@@ -49,7 +49,7 @@ + #define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ + #define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ + +-#define MEM_EVENT_PAGING_AGE 2UL /* Number distinguish the mem_paging <-> pager interface */ ++#define MEM_EVENT_PAGING_AGE 3UL /* Number distinguish the mem_paging <-> pager interface */ + + typedef struct mem_event_shared_page { + uint32_t port; diff --git a/xenpaging.evict_fail_fast_forward.patch b/xenpaging.evict_fail_fast_forward.patch new file mode 100644 index 0000000..0efec1e --- /dev/null +++ b/xenpaging.evict_fail_fast_forward.patch @@ -0,0 +1,57 @@ +# HG changeset patch +# Parent 00989d5f44b59ba7f3a467342a14b9c7621fa926 +xenpaging: restore p2mt if gfn is needed before evict + +In the rare case that a gfn is needed by a guest or a foreign domain +between nominate and evict, restore the p2mt and skip sending a request. +A request is not needed because the pager will notice the evict failure. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/mm/p2m.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -3036,6 +3036,7 @@ void p2m_mem_paging_populate(struct p2m_ + p2m_type_t p2mt; + p2m_access_t a; + mfn_t mfn; ++ int restored = 0; + struct domain *d = p2m->domain; + + /* Check that there's space on the ring for this request */ +@@ -3051,23 +3052,25 @@ void p2m_mem_paging_populate(struct p2m_ + /* Allow only nominated or evicted pages to enter page-in path */ + if ( p2m_do_populate(p2mt) ) + { +- /* Evict will fail now, tag this request for pager */ +- if ( p2mt == p2m_ram_paging_out ) +- req.flags |= MEM_EVENT_FLAG_EVICT_FAIL; +- +- set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a); ++ /* Restore page state if gfn was requested before evict */ ++ if ( p2mt == p2m_ram_paging_out && mfn_valid(mfn) ) { ++ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, a); ++ restored = 1; ++ } else { ++ set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a); ++ } + audit_p2m(p2m, 1); + } + p2m_unlock(p2m); + + /* Pause domain if request came from guest and gfn has paging type */ +- if ( p2m_is_paging(p2mt) && v->domain == d ) ++ if ( !restored && p2m_is_paging(p2mt) && v->domain == d ) + { + vcpu_pause_nosync(v); + req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; + } + /* No need to inform pager if the gfn is not in the page-out path */ +- else if ( !p2m_do_populate(p2mt) ) ++ else if ( restored || !p2m_do_populate(p2mt) ) + { + /* gfn is already on its way back and vcpu is not paused */ + mem_event_put_req_producers(&d->mem_event->paging); diff --git a/xenpaging.evict_mmap_readonly.patch b/xenpaging.evict_mmap_readonly.patch new file mode 100644 index 0000000..2a1bbac --- /dev/null +++ b/xenpaging.evict_mmap_readonly.patch @@ -0,0 +1,20 @@ +xenpaging: mmap gfn to evict in readonly mode + +nominate/evict will not modify the page so there is no need to map the page rw. + +--- + tools/xenpaging/xenpaging.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/tools/xenpaging/xenpaging.c ++++ b/tools/xenpaging/xenpaging.c +@@ -575,8 +575,7 @@ static int xenpaging_evict_page(xenpagin + + /* Map page to get a handle */ + gfn = victim->gfn; +- page = xc_map_foreign_pages(xch, paging->mem_event.domain_id, +- PROT_READ | PROT_WRITE, &gfn, 1); ++ page = xc_map_foreign_pages(xch, paging->mem_event.domain_id, PROT_READ, &gfn, 1); + if ( page == NULL ) + { + if ( errno == EINVAL ) diff --git a/xenpaging.mem_event-use-wait_queue.patch b/xenpaging.mem_event-use-wait_queue.patch new file mode 100644 index 0000000..75c2097 --- /dev/null +++ b/xenpaging.mem_event-use-wait_queue.patch @@ -0,0 +1,559 @@ +# HG changeset patch +# Parent aa97fafb53fc95aaec8d9890635d14304f24c362 +mem_event: use wait queue when ring is full + +This change is based on an idea/patch from Adin Scannell. + +If the ring is full, put the current vcpu to sleep if it belongs to the +target domain. The wakeup happens in the p2m_*_resume functions. Wakeup +will take the number of free slots into account. + +A request from foreign domain has to succeed once a slot was claimed +because such vcpus can not sleep. + +This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a +full ring will lead to harmless inconsistency in the pager. + +v6: + - take foreign requests into account before calling wake_up_nr() + - call wake_up_nr() outside of ring lock + - rename ->bit to ->pause_flag + +v5: + - rename mem_event_check_ring() to mem_event_claim_slot() + - rename mem_event_put_req_producers() to mem_event_release_slot() + - add local/foreign request accounting + - keep room for at least one guest request + +v4: + - fix off-by-one bug in _mem_event_put_request + - add mem_event_wake_requesters() and use wake_up_nr() + - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions + - req_producers counts foreign request producers, rename member + +v3: + - rename ->mem_event_bit to ->bit + - remove me_ from new VPF_ defines + +v2: + - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the + vcpu will not wake_up after a wait_event because the pause_count was + increased twice. Fixes guest hangs. + - update free space check in _mem_event_put_request() + - simplify mem_event_put_request() + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/hvm/hvm.c | 4 - + xen/arch/x86/mm/mem_event.c | 147 ++++++++++++++++++++++++++++++++++------ + xen/arch/x86/mm/mem_sharing.c | 46 ++++-------- + xen/arch/x86/mm/p2m.c | 36 ++++----- + xen/include/asm-x86/mem_event.h | 10 +- + xen/include/xen/sched.h | 17 +++- + 6 files changed, 179 insertions(+), 81 deletions(-) + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -3915,8 +3915,8 @@ static int hvm_memory_event_traps(long p + if ( (p & HVMPME_onchangeonly) && (value == old) ) + return 1; + +- rc = mem_event_check_ring(d, &d->mem_event->access); +- if ( rc ) ++ rc = mem_event_claim_slot(d, &d->mem_event->access); ++ if ( rc < 0 ) + return rc; + + memset(&req, 0, sizeof(req)); +--- a/xen/arch/x86/mm/mem_event.c ++++ b/xen/arch/x86/mm/mem_event.c +@@ -23,6 +23,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -39,6 +40,7 @@ + + static int mem_event_enable(struct domain *d, + xen_domctl_mem_event_op_t *mec, ++ int pause_flag, + struct mem_event_domain *med) + { + int rc; +@@ -94,8 +96,12 @@ static int mem_event_enable(struct domai + + mem_event_ring_lock_init(med); + ++ med->pause_flag = pause_flag; ++ ++ init_waitqueue_head(&med->wq); ++ + /* Wake any VCPUs paused for memory events */ +- mem_event_unpause_vcpus(d); ++ mem_event_wake_waiters(d, med); + + return 0; + +@@ -111,6 +117,9 @@ static int mem_event_enable(struct domai + + static int mem_event_disable(struct mem_event_domain *med) + { ++ if (!list_empty(&med->wq.list)) ++ return -EBUSY; ++ + unmap_domain_page(med->ring_page); + med->ring_page = NULL; + +@@ -120,13 +129,24 @@ static int mem_event_disable(struct mem_ + return 0; + } + +-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req) ++static int _mem_event_put_request(struct domain *d, ++ struct mem_event_domain *med, ++ mem_event_request_t *req) + { + mem_event_front_ring_t *front_ring; ++ int free_req, claimed_req; + RING_IDX req_prod; + + mem_event_ring_lock(med); + ++ free_req = RING_FREE_REQUESTS(&med->front_ring); ++ /* Foreign requests must succeed because their vcpus can not sleep */ ++ claimed_req = med->foreign_producers; ++ if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) { ++ mem_event_ring_unlock(med); ++ return 0; ++ } ++ + front_ring = &med->front_ring; + req_prod = front_ring->req_prod_pvt; + +@@ -134,14 +154,35 @@ void mem_event_put_request(struct domain + memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req)); + req_prod++; + ++ /* Update accounting */ ++ if ( current->domain == d ) ++ med->target_producers--; ++ else ++ med->foreign_producers--; ++ + /* Update ring */ +- med->req_producers--; + front_ring->req_prod_pvt = req_prod; + RING_PUSH_REQUESTS(front_ring); + + mem_event_ring_unlock(med); + + notify_via_xen_event_channel(d, med->xen_port); ++ ++ return 1; ++} ++ ++void mem_event_put_request(struct domain *d, struct mem_event_domain *med, ++ mem_event_request_t *req) ++{ ++ /* Go to sleep if request came from guest */ ++ if (current->domain == d) { ++ wait_event(med->wq, _mem_event_put_request(d, med, req)); ++ return; ++ } ++ /* Ring was full anyway, unable to sleep in non-guest context */ ++ if (!_mem_event_put_request(d, med, req)) ++ printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id, ++ req->type, req->flags, (unsigned long)req->gfn); + } + + void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp) +@@ -165,32 +206,97 @@ void mem_event_get_response(struct mem_e + mem_event_ring_unlock(med); + } + +-void mem_event_unpause_vcpus(struct domain *d) ++/** ++ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring ++ * @d: guest domain ++ * @med: mem_event ring ++ * ++ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the ++ * ring. Only as many as can place another request in the ring will ++ * resume execution. ++ */ ++void mem_event_wake_requesters(struct mem_event_domain *med) ++{ ++ int free_req; ++ ++ mem_event_ring_lock(med); ++ free_req = RING_FREE_REQUESTS(&med->front_ring); ++ free_req -= med->foreign_producers; ++ mem_event_ring_unlock(med); ++ ++ if ( free_req ) ++ wake_up_nr(&med->wq, free_req); ++} ++ ++/** ++ * mem_event_wake_waiters - Wake all vcpus waiting for the ring ++ * @d: guest domain ++ * @med: mem_event ring ++ * ++ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to ++ * become available. ++ */ ++void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med) + { + struct vcpu *v; + + for_each_vcpu ( d, v ) +- if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) ) ++ if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) ) + vcpu_wake(v); + } + +-void mem_event_mark_and_pause(struct vcpu *v) ++/** ++ * mem_event_mark_and_sleep - Put vcpu to sleep ++ * @v: guest vcpu ++ * @med: mem_event ring ++ * ++ * mem_event_mark_and_sleep() tags vcpu and put it to sleep. ++ * The vcpu will resume execution in mem_event_wake_waiters(). ++ */ ++void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med) + { +- set_bit(_VPF_mem_event, &v->pause_flags); ++ set_bit(med->pause_flag, &v->pause_flags); + vcpu_sleep_nosync(v); + } + +-void mem_event_put_req_producers(struct mem_event_domain *med) ++/** ++ * mem_event_release_slot - Release a claimed slot ++ * @med: mem_event ring ++ * ++ * mem_event_release_slot() releases a claimed slot in the mem_event ring. ++ */ ++void mem_event_release_slot(struct domain *d, struct mem_event_domain *med) + { + mem_event_ring_lock(med); +- med->req_producers--; ++ if ( current->domain == d ) ++ med->target_producers--; ++ else ++ med->foreign_producers--; + mem_event_ring_unlock(med); + } + +-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med) ++/** ++ * mem_event_claim_slot - Check state of a mem_event ring ++ * @d: guest domain ++ * @med: mem_event ring ++ * ++ * Return codes: < 0: the ring is not yet configured ++ * 0: the ring has some room ++ * > 0: the ring is full ++ * ++ * mem_event_claim_slot() checks the state of the given mem_event ring. ++ * If the current vcpu belongs to the guest domain, the function assumes that ++ * mem_event_put_request() will sleep until the ring has room again. ++ * A guest can always place at least one request. ++ * ++ * If the current vcpu does not belong to the target domain the caller must try ++ * again until there is room. A slot is claimed and the caller can place a ++ * request. If the caller does not need to send a request, the claimed slot has ++ * to be released with mem_event_release_slot(). ++ */ ++int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med) + { +- struct vcpu *curr = current; +- int free_requests; ++ int free_req; + int ring_full = 1; + + if ( !med->ring_page ) +@@ -198,16 +304,17 @@ int mem_event_check_ring(struct domain * + + mem_event_ring_lock(med); + +- free_requests = RING_FREE_REQUESTS(&med->front_ring); +- if ( med->req_producers < free_requests ) ++ free_req = RING_FREE_REQUESTS(&med->front_ring); ++ ++ if ( current->domain == d ) { ++ med->target_producers++; ++ ring_full = 0; ++ } else if ( med->foreign_producers + med->target_producers + 1 < free_req ) + { +- med->req_producers++; ++ med->foreign_producers++; + ring_full = 0; + } + +- if ( ring_full && (curr->domain == d) ) +- mem_event_mark_and_pause(curr); +- + mem_event_ring_unlock(med); + + return ring_full; +@@ -283,7 +390,7 @@ int mem_event_domctl(struct domain *d, x + break; + } + +- rc = mem_event_enable(d, mec, med); ++ rc = mem_event_enable(d, mec, _VPF_mem_paging, med); + } + break; + +@@ -322,7 +429,7 @@ int mem_event_domctl(struct domain *d, x + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) + break; + +- rc = mem_event_enable(d, mec, med); ++ rc = mem_event_enable(d, mec, _VPF_mem_access, med); + } + break; + +--- a/xen/arch/x86/mm/mem_sharing.c ++++ b/xen/arch/x86/mm/mem_sharing.c +@@ -292,44 +292,32 @@ static void mem_sharing_audit(void) + #endif + + +-static struct page_info* mem_sharing_alloc_page(struct domain *d, +- unsigned long gfn, +- int must_succeed) ++static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn) + { +- struct page_info* page; + struct vcpu *v = current; +- mem_event_request_t req; ++ mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED }; + +- page = alloc_domheap_page(d, 0); +- if(page != NULL) return page; +- +- memset(&req, 0, sizeof(req)); +- req.type = MEM_EVENT_TYPE_SHARED; +- +- if(must_succeed) ++ if ( v->domain != d ) + { +- /* We do not support 'must_succeed' any more. External operations such +- * as grant table mappings may fail with OOM condition! +- */ +- BUG(); +- } +- else +- { +- /* All foreign attempts to unshare pages should be handled through +- * 'must_succeed' case. */ +- ASSERT(v->domain->domain_id == d->domain_id); +- vcpu_pause_nosync(v); +- req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; ++ /* XXX This path needs some attention. For now, just fail foreign ++ * XXX requests to unshare if there's no memory. This replaces ++ * XXX old code that BUG()ed here; the callers now BUG() ++ * XXX elewhere. */ ++ gdprintk(XENLOG_ERR, ++ "Failed alloc on unshare path for foreign (%d) lookup\n", ++ d->domain_id); ++ return; + } + +- if(mem_event_check_ring(d, &d->mem_event->share)) return page; ++ if (mem_event_claim_slot(d, &d->mem_event->share) < 0) ++ return; + ++ req.flags = MEM_EVENT_FLAG_VCPU_PAUSED; + req.gfn = gfn; + req.p2mt = p2m_ram_shared; + req.vcpu_id = v->vcpu_id; + mem_event_put_request(d, &d->mem_event->share, &req); +- +- return page; ++ vcpu_pause_nosync(v); + } + + unsigned int mem_sharing_get_nr_saved_mfns(void) +@@ -692,14 +680,14 @@ gfn_found: + if(ret == 0) goto private_page_found; + + old_page = page; +- page = mem_sharing_alloc_page(d, gfn, flags & MEM_SHARING_MUST_SUCCEED); +- BUG_ON(!page && (flags & MEM_SHARING_MUST_SUCCEED)); ++ page = alloc_domheap_page(d, 0); + if(!page) + { + /* We've failed to obtain memory for private page. Need to re-add the + * gfn_info to relevant list */ + list_add(&gfn_info->list, &hash_entry->gfns); + shr_unlock(); ++ mem_sharing_notify_helper(d, gfn); + return -ENOMEM; + } + +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -2988,21 +2988,13 @@ int p2m_mem_paging_evict(struct p2m_doma + */ + void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn) + { +- struct vcpu *v = current; +- mem_event_request_t req; ++ mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn }; + struct domain *d = p2m->domain; + +- /* Check that there's space on the ring for this request */ +- if ( mem_event_check_ring(d, &d->mem_event->paging) == 0) +- { +- /* Send release notification to pager */ +- memset(&req, 0, sizeof(req)); +- req.flags |= MEM_EVENT_FLAG_DROP_PAGE; +- req.gfn = gfn; +- req.vcpu_id = v->vcpu_id; ++ /* Send release notification to pager */ ++ req.flags = MEM_EVENT_FLAG_DROP_PAGE; + +- mem_event_put_request(d, &d->mem_event->paging, &req); +- } ++ mem_event_put_request(d, &d->mem_event->paging, &req); + } + + /** +@@ -3037,7 +3029,7 @@ void p2m_mem_paging_populate(struct p2m_ + struct domain *d = p2m->domain; + + /* Check that there's space on the ring for this request */ +- if ( mem_event_check_ring(d, &d->mem_event->paging) ) ++ if ( mem_event_claim_slot(d, &d->mem_event->paging) ) + return; + + memset(&req, 0, sizeof(req)); +@@ -3070,7 +3062,7 @@ void p2m_mem_paging_populate(struct p2m_ + else if ( restored || !p2m_do_populate(p2mt) ) + { + /* gfn is already on its way back and vcpu is not paused */ +- mem_event_put_req_producers(&d->mem_event->paging); ++ mem_event_release_slot(d, &d->mem_event->paging); + return; + } + +@@ -3209,8 +3201,8 @@ void p2m_mem_paging_resume(struct p2m_do + if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) + vcpu_unpause(d->vcpu[rsp.vcpu_id]); + +- /* Unpause any domains that were paused because the ring was full */ +- mem_event_unpause_vcpus(d); ++ /* Wake vcpus waiting for room in the ring */ ++ mem_event_wake_requesters(&d->mem_event->paging); + } + + void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, +@@ -3239,7 +3231,7 @@ void p2m_mem_access_check(unsigned long + p2m_unlock(p2m); + + /* Otherwise, check if there is a memory event listener, and send the message along */ +- res = mem_event_check_ring(d, &d->mem_event->access); ++ res = mem_event_claim_slot(d, &d->mem_event->access); + if ( res < 0 ) + { + /* No listener */ +@@ -3249,7 +3241,7 @@ void p2m_mem_access_check(unsigned long + "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n", + v->vcpu_id, d->domain_id); + +- mem_event_mark_and_pause(v); ++ mem_event_mark_and_sleep(v, &d->mem_event->access); + } + else + { +@@ -3299,9 +3291,11 @@ void p2m_mem_access_resume(struct p2m_do + if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) + vcpu_unpause(d->vcpu[rsp.vcpu_id]); + +- /* Unpause any domains that were paused because the ring was full or no listener +- * was available */ +- mem_event_unpause_vcpus(d); ++ /* Wake vcpus waiting for room in the ring */ ++ mem_event_wake_requesters(&d->mem_event->access); ++ ++ /* Unpause all vcpus that were paused because no listener was available */ ++ mem_event_wake_waiters(d, &d->mem_event->access); + } + #endif /* __x86_64__ */ + +--- a/xen/include/asm-x86/mem_event.h ++++ b/xen/include/asm-x86/mem_event.h +@@ -24,13 +24,13 @@ + #ifndef __MEM_EVENT_H__ + #define __MEM_EVENT_H__ + +-/* Pauses VCPU while marking pause flag for mem event */ +-void mem_event_mark_and_pause(struct vcpu *v); +-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med); +-void mem_event_put_req_producers(struct mem_event_domain *med); ++int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med); ++void mem_event_release_slot(struct domain *d, struct mem_event_domain *med); + void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req); + void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp); +-void mem_event_unpause_vcpus(struct domain *d); ++void mem_event_wake_requesters(struct mem_event_domain *med); ++void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med); ++void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med); + + int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, + XEN_GUEST_HANDLE(void) u_domctl); +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_COMPAT + #include +@@ -190,7 +191,8 @@ struct mem_event_domain + { + /* ring lock */ + spinlock_t ring_lock; +- unsigned int req_producers; ++ unsigned short foreign_producers; ++ unsigned short target_producers; + /* shared page */ + mem_event_shared_page_t *shared_page; + /* shared ring page */ +@@ -199,6 +201,10 @@ struct mem_event_domain + mem_event_front_ring_t front_ring; + /* event channel port (vcpu0 only) */ + int xen_port; ++ /* mem_event bit for vcpu->pause_flags */ ++ int pause_flag; ++ /* list of vcpus waiting for room in the ring */ ++ struct waitqueue_head wq; + }; + + struct mem_event_per_domain +@@ -601,9 +607,12 @@ extern struct domain *domain_list; + /* VCPU affinity has changed: migrating to a new CPU. */ + #define _VPF_migrating 3 + #define VPF_migrating (1UL<<_VPF_migrating) +- /* VCPU is blocked on memory-event ring. */ +-#define _VPF_mem_event 4 +-#define VPF_mem_event (1UL<<_VPF_mem_event) ++ /* VCPU is blocked due to missing mem_paging ring. */ ++#define _VPF_mem_paging 4 ++#define VPF_mem_paging (1UL<<_VPF_mem_paging) ++ /* VCPU is blocked due to missing mem_access ring. */ ++#define _VPF_mem_access 5 ++#define VPF_mem_access (1UL<<_VPF_mem_access) + + static inline int vcpu_runnable(struct vcpu *v) + { diff --git a/xenpaging.mmap-before-nominate.patch b/xenpaging.mmap-before-nominate.patch new file mode 100644 index 0000000..c18cbd1 --- /dev/null +++ b/xenpaging.mmap-before-nominate.patch @@ -0,0 +1,114 @@ +# HG changeset patch +# Parent 4019436855ff3d44228c8eb3e78a9133a9caf870 +xenpaging: map gfn before nomination + +If the gfn is mapped before nomination, all special cases in do_mmu_update() +for paged gfns can be removed. If a gfn is actually in any of the paging +states the caller has to try again. + +Bump interface age. + +Signed-off-by: Olaf Hering + +--- a/tools/xenpaging/xenpaging.c ++++ b/tools/xenpaging/xenpaging.c +@@ -573,7 +573,7 @@ static int xenpaging_evict_page(xenpagin + + DECLARE_DOMCTL; + +- /* Map page */ ++ /* Map page to get a handle */ + gfn = victim->gfn; + ret = -EFAULT; + page = xc_map_foreign_pages(xch, paging->mem_event.domain_id, +@@ -584,16 +584,21 @@ static int xenpaging_evict_page(xenpagin + goto out; + } + ++ /* Nominate the page */ ++ ret = xc_mem_paging_nominate(xch, paging->mem_event.domain_id, gfn); ++ if ( ret != 0 ) ++ goto out; ++ + /* Copy page */ + ret = write_page(fd, page, i); + if ( ret != 0 ) + { + PERROR("Error copying page %lx", victim->gfn); +- munmap(page, PAGE_SIZE); + goto out; + } + + munmap(page, PAGE_SIZE); ++ page = NULL; + + /* Tell Xen to evict page */ + ret = xc_mem_paging_evict(xch, paging->mem_event.domain_id, +@@ -612,6 +617,8 @@ static int xenpaging_evict_page(xenpagin + paging->num_paged_out++; + + out: ++ if (page) ++ munmap(page, PAGE_SIZE); + return ret; + } + +@@ -735,14 +742,11 @@ static int evict_victim(xenpaging_t *pag + ret = -EINTR; + goto out; + } +- ret = xc_mem_paging_nominate(xch, paging->mem_event.domain_id, victim->gfn); +- if ( ret == 0 ) +- ret = xenpaging_evict_page(paging, victim, fd, i); +- else ++ ret = xenpaging_evict_page(paging, victim, fd, i); ++ if ( ret && j++ % 1000 == 0 ) + { +- if ( j++ % 1000 == 0 ) +- if ( xenpaging_mem_paging_flush_ioemu_cache(paging) ) +- PERROR("Error flushing ioemu cache"); ++ if ( xenpaging_mem_paging_flush_ioemu_cache(paging) ) ++ PERROR("Error flushing ioemu cache"); + } + } + while ( ret ); +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -2852,7 +2852,7 @@ set_shared_p2m_entry(struct p2m_domain * + * - the gfn is backed by a mfn + * - the p2mt of the gfn is pageable + * - the mfn is not used for IO +- * - the mfn has exactly one user and has no special meaning ++ * - the mfn has exactly two users (guest+pager) and has no special meaning + * + * Once the p2mt is changed the page is readonly for the guest. On success the + * pager can write the page contents to disk and later evict the page. +@@ -2886,7 +2886,7 @@ int p2m_mem_paging_nominate(struct p2m_d + /* Check page count and type */ + page = mfn_to_page(mfn); + if ( (page->count_info & (PGC_count_mask | PGC_allocated)) != +- (1 | PGC_allocated) ) ++ (2 | PGC_allocated) ) + goto out; + + if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none ) +@@ -2914,7 +2914,7 @@ int p2m_mem_paging_nominate(struct p2m_d + * freed: + * - the gfn is backed by a mfn + * - the gfn was nominated +- * - the mfn has still exactly one user and has no special meaning ++ * - the mfn has still exactly one user (the guest) and has no special meaning + * + * After successful nomination some other process could have mapped the page. In + * this case eviction can not be done. If the gfn was populated before the pager +--- a/xen/include/public/mem_event.h ++++ b/xen/include/public/mem_event.h +@@ -49,7 +49,7 @@ + #define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ + #define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ + +-#define MEM_EVENT_PAGING_AGE 1UL /* Number distinguish the mem_paging <-> pager interface */ ++#define MEM_EVENT_PAGING_AGE 2UL /* Number distinguish the mem_paging <-> pager interface */ + + typedef struct mem_event_shared_page { + uint32_t port; diff --git a/xenpaging.p2m_is_paged.patch b/xenpaging.p2m_is_paged.patch new file mode 100644 index 0000000..0998405 --- /dev/null +++ b/xenpaging.p2m_is_paged.patch @@ -0,0 +1,335 @@ +# HG changeset patch +# Parent 4a0a6a1cd56a8f3d242f323fb5161c2d1f52dccb +xenpaging: add need_populate and paged_no_mfn checks + +There is currently a mix of p2mt checks for the various paging types. +Some mean the p2mt needs to be populated, others mean a gfn without mfn. + +Add a new p2m_do_populate() helper which covers the p2m_ram_paged and +p2m_ram_paging_out types. If a gfn is not in these states anymore another +populate request for the pager is not needed. This avoids a call to +p2m_mem_paging_populate() which in turn reduces the pressure on the ring +buffer because no temporary slot needs to be claimed. As such, this helper is +an optimization. + +Modify the existing p2m_is_paged() helper which now covers also +p2m_ram_paging_in_start in addition to the current p2m_ram_paged type. A gfn +in these two states is not backed by a mfn. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/hvm/emulate.c | 3 + + xen/arch/x86/hvm/hvm.c | 17 ++++++---- + xen/arch/x86/mm.c | 63 ++++++++++++--------------------------- + xen/arch/x86/mm/guest_walk.c | 3 + + xen/arch/x86/mm/hap/guest_walk.c | 6 ++- + xen/arch/x86/mm/hap/p2m-ept.c | 3 - + xen/arch/x86/mm/p2m.c | 4 +- + xen/common/grant_table.c | 3 + + xen/include/asm-x86/p2m.h | 9 ++++- + 9 files changed, 51 insertions(+), 60 deletions(-) + +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -66,7 +66,8 @@ static int hvmemul_do_io( + ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, ram_gfn); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, ram_gfn); + return X86EMUL_RETRY; + } + if ( p2m_is_shared(p2mt) ) +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -360,7 +360,8 @@ static int hvm_set_ioreq_page( + return -EINVAL; + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, gmfn); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, gmfn); + return -ENOENT; + } + if ( p2m_is_shared(p2mt) ) +@@ -1174,7 +1175,7 @@ bool_t hvm_hap_nested_page_fault(unsigne + + #ifdef __x86_64__ + /* Check if the page has been paged out */ +- if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) ) ++ if ( p2m_do_populate(p2mt) ) + p2m_mem_paging_populate(p2m, gfn); + + /* Mem sharing: unshare the page and try again */ +@@ -1662,7 +1663,8 @@ static void *__hvm_map_guest_frame(unsig + return NULL; + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, gfn); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, gfn); + return NULL; + } + +@@ -2120,7 +2122,8 @@ static enum hvm_copy_result __hvm_copy( + + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, gfn); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, gfn); + return HVMCOPY_gfn_paged_out; + } + if ( p2m_is_shared(p2mt) ) +@@ -3497,7 +3500,8 @@ long do_hvm_op(unsigned long op, XEN_GUE + mfn_t mfn = gfn_to_mfn(p2m, pfn, &t); + if ( p2m_is_paging(t) ) + { +- p2m_mem_paging_populate(p2m, pfn); ++ if ( p2m_do_populate(t) ) ++ p2m_mem_paging_populate(p2m, pfn); + + rc = -EINVAL; + goto param_fail3; +@@ -3594,7 +3598,8 @@ long do_hvm_op(unsigned long op, XEN_GUE + mfn = gfn_to_mfn_unshare(p2m, pfn, &t, 0); + if ( p2m_is_paging(t) ) + { +- p2m_mem_paging_populate(p2m, pfn); ++ if ( p2m_do_populate(t) ) ++ p2m_mem_paging_populate(p2m, pfn); + + rc = -EINVAL; + goto param_fail4; +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -3465,9 +3465,10 @@ int do_mmu_update( + if ( !p2m_is_valid(p2mt) ) + mfn = INVALID_MFN; + +- if ( p2m_is_paged(p2mt) ) ++ if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), gmfn); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), gmfn); + + rc = -ENOENT; + break; +@@ -3492,24 +3493,18 @@ int do_mmu_update( + { + l1_pgentry_t l1e = l1e_from_intpte(req.val); + p2m_type_t l1e_p2mt; +- unsigned long l1emfn = mfn_x( + gfn_to_mfn(p2m_get_hostp2m(pg_owner), +- l1e_get_pfn(l1e), &l1e_p2mt)); ++ l1e_get_pfn(l1e), &l1e_p2mt); + +- if ( p2m_is_paged(l1e_p2mt) ) ++#ifdef __x86_64__ ++ if ( p2m_is_paging(l1e_p2mt) ) + { +- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), ++ if ( p2m_do_populate(l1e_p2mt) ) ++ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l1e_get_pfn(l1e)); + rc = -ENOENT; + break; + } +- else if ( p2m_ram_paging_in_start == l1e_p2mt && +- !mfn_valid(l1emfn) ) +- { +- rc = -ENOENT; +- break; +- } +-#ifdef __x86_64__ + /* XXX: Ugly: pull all the checks into a separate function. + * Don't want to do it now, not to interfere with mem_paging + * patches */ +@@ -3536,22 +3531,16 @@ int do_mmu_update( + { + l2_pgentry_t l2e = l2e_from_intpte(req.val); + p2m_type_t l2e_p2mt; +- unsigned long l2emfn = mfn_x( +- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e), &l2e_p2mt)); ++ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e), &l2e_p2mt); + +- if ( p2m_is_paged(l2e_p2mt) ) ++ if ( p2m_is_paging(l2e_p2mt) ) + { +- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), ++ if ( p2m_do_populate(l2e_p2mt) ) ++ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l2e_get_pfn(l2e)); + rc = -ENOENT; + break; + } +- else if ( p2m_ram_paging_in_start == l2e_p2mt && +- !mfn_valid(l2emfn) ) +- { +- rc = -ENOENT; +- break; +- } + else if ( p2m_ram_shared == l2e_p2mt ) + { + MEM_LOG("Unexpected attempt to map shared page.\n"); +@@ -3567,22 +3556,16 @@ int do_mmu_update( + { + l3_pgentry_t l3e = l3e_from_intpte(req.val); + p2m_type_t l3e_p2mt; +- unsigned long l3emfn = mfn_x( +- gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e), &l3e_p2mt)); ++ gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e), &l3e_p2mt); + +- if ( p2m_is_paged(l3e_p2mt) ) ++ if ( p2m_is_paging(l3e_p2mt) ) + { +- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), ++ if ( p2m_do_populate(l3e_p2mt) ) ++ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l3e_get_pfn(l3e)); + rc = -ENOENT; + break; + } +- else if ( p2m_ram_paging_in_start == l3e_p2mt && +- !mfn_valid(l3emfn) ) +- { +- rc = -ENOENT; +- break; +- } + else if ( p2m_ram_shared == l3e_p2mt ) + { + MEM_LOG("Unexpected attempt to map shared page.\n"); +@@ -3598,23 +3581,17 @@ int do_mmu_update( + { + l4_pgentry_t l4e = l4e_from_intpte(req.val); + p2m_type_t l4e_p2mt; +- unsigned long l4emfn = mfn_x( + gfn_to_mfn(p2m_get_hostp2m(pg_owner), +- l4e_get_pfn(l4e), &l4e_p2mt)); ++ l4e_get_pfn(l4e), &l4e_p2mt); + +- if ( p2m_is_paged(l4e_p2mt) ) ++ if ( p2m_is_paging(l4e_p2mt) ) + { +- p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), ++ if ( p2m_do_populate(l4e_p2mt) ) ++ p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l4e_get_pfn(l4e)); + rc = -ENOENT; + break; + } +- else if ( p2m_ram_paging_in_start == l4e_p2mt && +- !mfn_valid(l4emfn) ) +- { +- rc = -ENOENT; +- break; +- } + else if ( p2m_ram_shared == l4e_p2mt ) + { + MEM_LOG("Unexpected attempt to map shared page.\n"); +--- a/xen/arch/x86/mm/guest_walk.c ++++ b/xen/arch/x86/mm/guest_walk.c +@@ -96,7 +96,8 @@ static inline void *map_domain_gfn(struc + *mfn = gfn_to_mfn_unshare(p2m, gfn_x(gfn), p2mt, 0); + if ( p2m_is_paging(*p2mt) ) + { +- p2m_mem_paging_populate(p2m, gfn_x(gfn)); ++ if ( p2m_do_populate(*p2mt) ) ++ p2m_mem_paging_populate(p2m, gfn_x(gfn)); + + *rc = _PAGE_PAGED; + return NULL; +--- a/xen/arch/x86/mm/hap/guest_walk.c ++++ b/xen/arch/x86/mm/hap/guest_walk.c +@@ -50,7 +50,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN + top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt, 0); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT); + + pfec[0] = PFEC_page_paged; + return INVALID_GFN; +@@ -82,7 +83,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN + gfn_to_mfn_unshare(p2m, gfn_x(gfn), &p2mt, 0); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, gfn_x(gfn)); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, gfn_x(gfn)); + + pfec[0] = PFEC_page_paged; + return INVALID_GFN; +--- a/xen/arch/x86/mm/hap/p2m-ept.c ++++ b/xen/arch/x86/mm/hap/p2m-ept.c +@@ -377,8 +377,7 @@ ept_set_entry(struct p2m_domain *p2m, un + * the intermediate tables will be freed below after the ept flush */ + old_entry = *ept_entry; + +- if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || +- (p2mt == p2m_ram_paging_in_start) ) ++ if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ) + { + /* Construct the new entry, and then write it once */ + new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -3049,7 +3049,7 @@ void p2m_mem_paging_populate(struct p2m_ + p2m_lock(p2m); + mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query); + /* Allow only nominated or evicted pages to enter page-in path */ +- if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged ) ++ if ( p2m_do_populate(p2mt) ) + { + /* Evict will fail now, tag this request for pager */ + if ( p2mt == p2m_ram_paging_out ) +@@ -3067,7 +3067,7 @@ void p2m_mem_paging_populate(struct p2m_ + req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; + } + /* No need to inform pager if the gfn is not in the page-out path */ +- else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged ) ++ else if ( !p2m_do_populate(p2mt) ) + { + /* gfn is already on its way back and vcpu is not paused */ + mem_event_put_req_producers(&d->mem_event->paging); +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c +@@ -158,7 +158,8 @@ static int __get_paged_frame(unsigned lo + *frame = mfn_x(mfn); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(p2m, gfn); ++ if ( p2m_do_populate(p2mt) ) ++ p2m_mem_paging_populate(p2m, gfn); + rc = GNTST_eagain; + } + } else { +--- a/xen/include/asm-x86/p2m.h ++++ b/xen/include/asm-x86/p2m.h +@@ -157,7 +157,11 @@ typedef enum { + | p2m_to_mask(p2m_ram_paging_in_start) \ + | p2m_to_mask(p2m_ram_paging_in)) + +-#define P2M_PAGED_TYPES (p2m_to_mask(p2m_ram_paged)) ++#define P2M_POPULATE_TYPES (p2m_to_mask(p2m_ram_paged) \ ++ | p2m_to_mask(p2m_ram_paging_out) ) ++ ++#define P2M_PAGED_NO_MFN_TYPES (p2m_to_mask(p2m_ram_paged) \ ++ | p2m_to_mask(p2m_ram_paging_in_start) ) + + /* Shared types */ + /* XXX: Sharable types could include p2m_ram_ro too, but we would need to +@@ -179,7 +183,8 @@ typedef enum { + #define p2m_has_emt(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | p2m_to_mask(p2m_mmio_direct))) + #define p2m_is_pageable(_t) (p2m_to_mask(_t) & P2M_PAGEABLE_TYPES) + #define p2m_is_paging(_t) (p2m_to_mask(_t) & P2M_PAGING_TYPES) +-#define p2m_is_paged(_t) (p2m_to_mask(_t) & P2M_PAGED_TYPES) ++#define p2m_is_paged(_t) (p2m_to_mask(_t) & P2M_PAGED_NO_MFN_TYPES) ++#define p2m_do_populate(_t) (p2m_to_mask(_t) & P2M_POPULATE_TYPES) + #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES) + #define p2m_is_shared(_t) (p2m_to_mask(_t) & P2M_SHARED_TYPES) + #define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES) diff --git a/xenpaging.qemu.flush-cache.patch b/xenpaging.qemu.flush-cache.patch new file mode 100644 index 0000000..272efa1 --- /dev/null +++ b/xenpaging.qemu.flush-cache.patch @@ -0,0 +1,31 @@ +Subject: xenpaging/qemu-dm: add command to flush buffer cache. + +Add support for a xenstore dm command to flush qemu's buffer cache. + +qemu will just keep mapping pages and not release them, which causes problems +for the memory pager (since the page is mapped, it won't get paged out). When +the pager has trouble finding a page to page out, it asks qemu to flush its +buffer, which releases all the page mappings. This makes it possible to find +pages to swap out agian. + +Already-Signed-off-by: Patrick Colp +Signed-off-by: Olaf Hering + +--- + tools/ioemu-qemu-xen/xenstore.c | 3 +++ + 1 file changed, 3 insertions(+) + +Index: xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c +=================================================================== +--- xen-4.1.2-testing.orig/tools/ioemu-qemu-xen/xenstore.c ++++ xen-4.1.2-testing/tools/ioemu-qemu-xen/xenstore.c +@@ -1082,6 +1082,9 @@ static void xenstore_process_dm_command_ + do_pci_add(par); + free(par); + #endif ++ } else if (!strncmp(command, "flush-cache", len)) { ++ fprintf(logfile, "dm-command: flush caches\n"); ++ qemu_invalidate_map_cache(); + } else { + fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command); + } diff --git a/xenpaging.versioned-interface.patch b/xenpaging.versioned-interface.patch new file mode 100644 index 0000000..51939e3 --- /dev/null +++ b/xenpaging.versioned-interface.patch @@ -0,0 +1,87 @@ +# HG changeset patch +# Parent a4d7c27ec1f190ecbb9a909609f6ef0eca250c00 +xenpaging: extend xc_mem_paging_enable() to handle interface version + +Since upcoming patches will change the way how paging internally works, add a +new interface to xc_mem_paging_enable() to make sure the pager is not +out-of-date. This is similar to XEN_DOMCTL_INTERFACE_VERSION in do_domctl() +where the tools have to match the running hypervisor. + +Signed-off-by: Olaf Hering + +Index: xen-4.1.2-testing/tools/libxc/xc_mem_paging.c +=================================================================== +--- xen-4.1.2-testing.orig/tools/libxc/xc_mem_paging.c ++++ xen-4.1.2-testing/tools/libxc/xc_mem_paging.c +@@ -25,12 +25,13 @@ + + + int xc_mem_paging_enable(xc_interface *xch, domid_t domain_id, ++ unsigned long interface_age, + void *shared_page, void *ring_page) + { + return xc_mem_event_control(xch, domain_id, + XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE, + XEN_DOMCTL_MEM_EVENT_OP_PAGING, +- shared_page, ring_page, INVALID_MFN); ++ shared_page, ring_page, interface_age); + } + + int xc_mem_paging_disable(xc_interface *xch, domid_t domain_id) +Index: xen-4.1.2-testing/tools/libxc/xenctrl.h +=================================================================== +--- xen-4.1.2-testing.orig/tools/libxc/xenctrl.h ++++ xen-4.1.2-testing/tools/libxc/xenctrl.h +@@ -1736,6 +1736,7 @@ int xc_mem_event_control(xc_interface *x + void *ring_page, unsigned long gfn); + + int xc_mem_paging_enable(xc_interface *xch, domid_t domain_id, ++ unsigned long interface_age, + void *shared_page, void *ring_page); + int xc_mem_paging_disable(xc_interface *xch, domid_t domain_id); + int xc_mem_paging_nominate(xc_interface *xch, domid_t domain_id, +Index: xen-4.1.2-testing/tools/xenpaging/xenpaging.c +=================================================================== +--- xen-4.1.2-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.1.2-testing/tools/xenpaging/xenpaging.c +@@ -366,6 +366,7 @@ static xenpaging_t *xenpaging_init(int a + + /* Initialise Xen */ + rc = xc_mem_paging_enable(xch, paging->mem_event.domain_id, ++ MEM_EVENT_PAGING_AGE, + paging->mem_event.shared_page, + paging->mem_event.ring_page); + if ( rc != 0 ) +Index: xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c +=================================================================== +--- xen-4.1.2-testing.orig/xen/arch/x86/mm/mem_event.c ++++ xen-4.1.2-testing/xen/arch/x86/mm/mem_event.c +@@ -274,6 +274,15 @@ int mem_event_domctl(struct domain *d, x + if ( p2m->pod.entry_count ) + break; + ++ rc = -ENOEXEC; ++ /* Disallow paging in a PoD guest */ ++ if ( mec->gfn != MEM_EVENT_PAGING_AGE ) ++ { ++ gdprintk(XENLOG_INFO, "Expected paging age %lx, got %lx\n", ++ MEM_EVENT_PAGING_AGE, mec->gfn); ++ break; ++ } ++ + rc = mem_event_enable(d, mec, med); + } + break; +Index: xen-4.1.2-testing/xen/include/public/mem_event.h +=================================================================== +--- xen-4.1.2-testing.orig/xen/include/public/mem_event.h ++++ xen-4.1.2-testing/xen/include/public/mem_event.h +@@ -49,6 +49,8 @@ + #define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ + #define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ + ++#define MEM_EVENT_PAGING_AGE 1UL /* Number distinguish the mem_paging <-> pager interface */ ++ + typedef struct mem_event_shared_page { + uint32_t port; + } mem_event_shared_page_t; diff --git a/xenpaging.waitqueue-paging.patch b/xenpaging.waitqueue-paging.patch new file mode 100644 index 0000000..35feaf9 --- /dev/null +++ b/xenpaging.waitqueue-paging.patch @@ -0,0 +1,387 @@ +# HG changeset patch +# Parent 427c10f8e1e28d942886f89ebc79ffa93cb7fce9 +xenpaging: use wait queues + +Use a wait queue to put a guest vcpu to sleep while the requested gfn is +in paging state. This adds missing p2m_mem_paging_populate() calls to +some callers of the new get_gfn* variants, which would crash now +because they get an invalid mfn. It also fixes guest crashes due to +unexpected returns from do_memory_op because copy_to/from_guest ran into +a paged gfn. Now those places will always get a valid mfn. + +Since each gfn could be requested by several guest vcpus at the same +time a queue of paged gfns is maintained. Each vcpu will be attached to +that queue. Once p2m_mem_paging_resume restored the gfn the waiting +vcpus will resume execution. + +There is untested code in p2m_mem_paging_init_queue() to allow cpu +hotplug. Since each vcpu may wait on a different gfn there have to be as +many queues as vcpus. But xl vcpu-set does not seem to work right now, +so this code path cant be excercised right now. + +TODO: + - use hash in p2m_mem_paging_queue_head + - rename gfn_lock + - use mm_lock_t for gfn_lock + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/hvm/hvm.c | 2 + xen/arch/x86/mm/p2m.c | 220 +++++++++++++++++++++++++++++++++------ + xen/common/domctl.c | 3 + xen/include/asm-x86/hvm/domain.h | 3 + xen/include/asm-x86/p2m.h | 7 + + 5 files changed, 205 insertions(+), 30 deletions(-) + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -442,6 +442,8 @@ int hvm_domain_initialise(struct domain + spin_lock_init(&d->arch.hvm_domain.irq_lock); + spin_lock_init(&d->arch.hvm_domain.uc_lock); + ++ spin_lock_init(&d->arch.hvm_domain.gfn_lock); ++ + INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); + spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); + +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -30,6 +30,7 @@ + #include + #include /* ept_p2m_init() */ + #include ++#include + #include + #include + #include +@@ -2839,6 +2840,182 @@ set_shared_p2m_entry(struct p2m_domain * + } + + #ifdef __x86_64__ ++struct p2m_mem_paging_queue { ++ struct list_head list; ++ struct waitqueue_head wq; ++ unsigned long gfn; ++ unsigned short waiters; ++ unsigned short woken; ++ unsigned short index; ++}; ++ ++struct p2m_mem_paging_queue_head { ++ struct list_head list; ++ unsigned int max; ++}; ++ ++int p2m_mem_paging_init_queue(struct domain *d, unsigned int max) ++{ ++ struct p2m_mem_paging_queue_head *h; ++ struct p2m_mem_paging_queue *q; ++ unsigned int i, nr; ++ int ret = 0; ++ ++ if (!is_hvm_domain(d)) ++ return 0; ++ ++ spin_lock(&d->arch.hvm_domain.gfn_lock); ++ ++ if (!d->arch.hvm_domain.gfn_queue) { ++ ret = -ENOMEM; ++ h = xzalloc(struct p2m_mem_paging_queue_head); ++ if (!h) { ++ domain_crash(d); ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&h->list); ++ nr = max; ++ } else { ++ h = d->arch.hvm_domain.gfn_queue; ++ if (max <= h->max) ++ goto out; ++ nr = max - h->max; ++ } ++ ++ ret = -ENOMEM; ++ q = xzalloc_array(struct p2m_mem_paging_queue, nr); ++ if (!q) { ++ if (!d->arch.hvm_domain.gfn_queue) ++ xfree(h); ++ domain_crash(d); ++ goto out; ++ } ++ ++ for (i = 0; i < nr; i++) { ++ init_waitqueue_head(&q[i].wq); ++ INIT_LIST_HEAD(&q[i].list); ++ q[i].index = h->max + i + 1; ++ list_add_tail(&q[i].list, &h->list); ++ } ++ ++ h->max = max; ++ d->arch.hvm_domain.gfn_queue = h; ++ ret = 0; ++ ++out: ++ spin_unlock(&d->arch.hvm_domain.gfn_lock); ++ return ret; ++} ++ ++static struct p2m_mem_paging_queue *p2m_mem_paging_get_queue(struct domain *d, unsigned long gfn) ++{ ++ struct p2m_mem_paging_queue_head *h; ++ struct p2m_mem_paging_queue *q, *q_match, *q_free; ++ ++ h = d->arch.hvm_domain.gfn_queue; ++ q_match = q_free = NULL; ++ ++ spin_lock(&d->arch.hvm_domain.gfn_lock); ++ ++ list_for_each_entry(q, &h->list, list) { ++ if (q->gfn == gfn) { ++ q_match = q; ++ break; ++ } ++ if (!q_free && !q->waiters) ++ q_free = q; ++ } ++ ++ if (!q_match && q_free) ++ q_match = q_free; ++ ++ if (q_match) { ++ if (q_match->woken) ++ printk("wq woken for gfn %u:%u %lx %u %u %u\n", current->domain->domain_id, current->vcpu_id, gfn, q_match->index, q_match->woken, q_match->waiters); ++ q_match->waiters++; ++ q_match->gfn = gfn; ++ } ++ ++ if (!q_match) ++ printk("No wq_get for gfn %u:%u %lx\n", current->domain->domain_id, current->vcpu_id, gfn); ++ ++ spin_unlock(&d->arch.hvm_domain.gfn_lock); ++ return q_match; ++} ++ ++static void p2m_mem_paging_put_queue(struct domain *d, struct p2m_mem_paging_queue *q_match) ++{ ++ spin_lock(&d->arch.hvm_domain.gfn_lock); ++ ++ if (q_match->waiters == 0) ++ printk("wq_put no waiters, gfn %u:%u %lx %u\n", current->domain->domain_id, current->vcpu_id, q_match->gfn, q_match->woken); ++ else if (--q_match->waiters == 0) ++ q_match->gfn = q_match->woken = 0;; ++ ++ spin_unlock(&d->arch.hvm_domain.gfn_lock); ++} ++ ++static void p2m_mem_paging_wake_queue(struct domain *d, unsigned long gfn) ++{ ++ struct p2m_mem_paging_queue_head *h; ++ struct p2m_mem_paging_queue *q, *q_match = NULL; ++ ++ spin_lock(&d->arch.hvm_domain.gfn_lock); ++ ++ h = d->arch.hvm_domain.gfn_queue; ++ list_for_each_entry(q, &h->list, list) { ++ if (q->gfn == gfn) { ++ q_match = q; ++ break; ++ } ++ } ++ if (q_match) { ++ if (q_match->woken || q_match->waiters == 0) ++ printk("Wrong wake for gfn %u:%u %p %lx %u %u\n", current->domain->domain_id, current->vcpu_id, q_match, gfn, q_match->woken, q_match->waiters); ++ q_match->woken++; ++ wake_up_all(&q_match->wq); ++ } ++ spin_unlock(&d->arch.hvm_domain.gfn_lock); ++} ++ ++/* Returns 0 if the gfn is still paged */ ++static int p2m_mem_paging_get_entry(mfn_t *mfn, ++ struct p2m_domain *p2m, unsigned long gfn, ++ p2m_type_t *t, p2m_query_t q) ++{ ++ p2m_access_t a = 0; ++ *mfn = p2m->get_entry(p2m, gfn, t, &a, q); ++ ++ return p2m_is_paging(*t) ? 0 : 1; ++} ++ ++/* Go to sleep in case of guest access */ ++void p2m_mem_paging_wait(mfn_t *mfn, ++ struct p2m_domain *p2m, unsigned long gfn, ++ p2m_type_t *t, p2m_query_t q) ++{ ++ struct p2m_mem_paging_queue *pmpq; ++ ++ /* Return p2mt as is in case of query */ ++ if ( q == p2m_query ) ++ return; ++ /* Foreign domains can not go to sleep */ ++ if ( current->domain != p2m->domain ) ++ return; ++ ++ pmpq = p2m_mem_paging_get_queue(p2m->domain, gfn); ++ if ( !pmpq ) ++ return; ++ ++ /* Populate the page once */ ++ if ( *t == p2m_ram_paging_out || *t == p2m_ram_paged ) ++ p2m_mem_paging_populate(p2m, gfn); ++ ++ wait_event(pmpq->wq, p2m_mem_paging_get_entry(mfn, p2m, gfn, t, q)); ++ p2m_mem_paging_put_queue(p2m->domain, pmpq); ++} ++ + /** + * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out + * @d: guest domain +@@ -3020,21 +3197,17 @@ void p2m_mem_paging_drop_page(struct p2m + */ + void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn) + { +- struct vcpu *v = current; +- mem_event_request_t req; ++ mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn }; + p2m_type_t p2mt; + p2m_access_t a; + mfn_t mfn; +- int restored = 0; + struct domain *d = p2m->domain; ++ int put_request = 0; + + /* Check that there's space on the ring for this request */ + if ( mem_event_claim_slot(d, &d->mem_event->paging) ) + return; + +- memset(&req, 0, sizeof(req)); +- req.type = MEM_EVENT_TYPE_PAGING; +- + /* Fix p2m mapping */ + p2m_lock(p2m); + mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query); +@@ -3043,35 +3216,23 @@ void p2m_mem_paging_populate(struct p2m_ + { + /* Restore page state if gfn was requested before evict */ + if ( p2mt == p2m_ram_paging_out && mfn_valid(mfn) ) { ++ /* Restore gfn because it is needed by guest before evict */ + set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, a); +- restored = 1; + } else { + set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a); ++ put_request = 1; + } ++ /* Evict will fail now, the pager has to try another gfn */ ++ + audit_p2m(p2m, 1); + } + p2m_unlock(p2m); + +- /* Pause domain if request came from guest and gfn has paging type */ +- if ( !restored && p2m_is_paging(p2mt) && v->domain == d ) +- { +- vcpu_pause_nosync(v); +- req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; +- } +- /* No need to inform pager if the gfn is not in the page-out path */ +- else if ( restored || !p2m_do_populate(p2mt) ) +- { +- /* gfn is already on its way back and vcpu is not paused */ ++ /* One request per gfn, guest vcpus go to sleep, foreigners try again */ ++ if ( put_request ) ++ mem_event_put_request(d, &d->mem_event->paging, &req); ++ else + mem_event_release_slot(d, &d->mem_event->paging); +- return; +- } +- +- /* Send request to pager */ +- req.gfn = gfn; +- req.p2mt = p2mt; +- req.vcpu_id = v->vcpu_id; +- +- mem_event_put_request(d, &d->mem_event->paging, &req); + } + + /** +@@ -3197,12 +3358,11 @@ void p2m_mem_paging_resume(struct p2m_do + p2m_unlock(p2m); + } + +- /* Unpause domain */ +- if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) +- vcpu_unpause(d->vcpu[rsp.vcpu_id]); +- + /* Wake vcpus waiting for room in the ring */ + mem_event_wake_requesters(&d->mem_event->paging); ++ ++ /* Unpause all vcpus that were paused because the gfn was paged */ ++ p2m_mem_paging_wake_queue(d, rsp.gfn); + } + + void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -536,6 +536,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc + goto maxvcpu_out; + } + ++ if ( p2m_mem_paging_init_queue(d, max) ) ++ goto maxvcpu_out; ++ + ret = 0; + + maxvcpu_out: +--- a/xen/include/asm-x86/hvm/domain.h ++++ b/xen/include/asm-x86/hvm/domain.h +@@ -87,6 +87,9 @@ struct hvm_domain { + + struct viridian_domain viridian; + ++ spinlock_t gfn_lock; ++ struct p2m_mem_paging_queue_head *gfn_queue; ++ + bool_t hap_enabled; + bool_t mem_sharing_enabled; + bool_t qemu_mapcache_invalidate; +--- a/xen/include/asm-x86/p2m.h ++++ b/xen/include/asm-x86/p2m.h +@@ -343,6 +343,8 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p + } + + ++extern void p2m_mem_paging_wait(mfn_t *mfn, struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_query_t q); ++ + /* General conversion function from gfn to mfn */ + static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *t, +@@ -364,6 +366,9 @@ static inline mfn_t _gfn_to_mfn_type(str + mfn = gfn_to_mfn_type_p2m(p2m, gfn, t, q); + + #ifdef __x86_64__ ++ if (unlikely(p2m_is_paging(*t)) ) ++ p2m_mem_paging_wait(&mfn, p2m, gfn, t, q); ++ + if (unlikely((p2m_is_broken(*t)))) + { + /* Return invalid_mfn to avoid caller's access */ +@@ -520,6 +525,8 @@ int clear_mmio_p2m_entry(struct p2m_doma + /* Modify p2m table for shared gfn */ + int set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn); + ++/* Initialize per-gfn wait queue */ ++int p2m_mem_paging_init_queue(struct domain *d, unsigned int max); + /* Check if a nominated gfn is valid to be paged out */ + int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn); + /* Evict a frame */ +@@ -533,6 +540,8 @@ int p2m_mem_paging_prep(struct p2m_domai + /* Resume normal operation (in case a domain was paused) */ + void p2m_mem_paging_resume(struct p2m_domain *p2m); + #else ++static inline int p2m_mem_paging_init_queue(struct domain *d, unsigned int max) ++{ return 0; } + static inline void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn) + { } + static inline void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn) diff --git a/xm-create-maxmem.patch b/xm-create-maxmem.patch new file mode 100644 index 0000000..7b9b645 --- /dev/null +++ b/xm-create-maxmem.patch @@ -0,0 +1,19 @@ +Cast maxmem to int before computation + +Reported in L3 bnc#732782 + + From: Dario Abatianni + +Index: xen-4.1.2-testing/tools/python/xen/xm/xenapi_create.py +=================================================================== +--- xen-4.1.2-testing.orig/tools/python/xen/xm/xenapi_create.py ++++ xen-4.1.2-testing/tools/python/xen/xm/xenapi_create.py +@@ -764,7 +764,7 @@ class sxp2xml: + + if get_child_by_name(config, "maxmem"): + memory.attributes["static_max"] = \ +- str(int(get_child_by_name(config, "maxmem")*1024*1024)) ++ str(int(get_child_by_name(config, "maxmem"))*1024*1024) + + vm.appendChild(memory) +