From 646cd8897b5e95bd56d01ca980bf0f998a2a362f10415cafcc270dbcb8fa569c Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Mon, 19 Nov 2012 13:58:33 +0000 Subject: [PATCH] - bnc#777628 - guest "disappears" after live migration Updated block-dmmd script - fate#310510 - fix xenpaging restore changes to integrate paging into xm/xend xenpaging.autostart.patch xenpaging.doc.patch - bnc#787163 - VUL-0: CVE-2012-4544: xen: Domain builder Out-of- memory due to malicious kernel/ramdisk (XSA 25) CVE-2012-4544-xsa25.patch - bnc#779212 - VUL-0: CVE-2012-4411: XEN / qemu: guest administrator can access qemu monitor console (XSA-19) CVE-2012-4411-xsa19.patch - bnc#786516 - VUL-0: CVE-2012-4535: xen: Timer overflow DoS vulnerability CVE-2012-4535-xsa20.patch - bnc#786518 - VUL-0: CVE-2012-4536: xen: pirq range check DoS vulnerability CVE-2012-4536-xsa21.patch - bnc#786517 - VUL-0: CVE-2012-4537: xen: Memory mapping failure DoS vulnerability CVE-2012-4537-xsa22.patch - bnc#786519 - VUL-0: CVE-2012-4538: xen: Unhooking empty PAE entries DoS vulnerability CVE-2012-4538-xsa23.patch - bnc#786520 - VUL-0: CVE-2012-4539: xen: Grant table hypercall infinite loop DoS vulnerability CVE-2012-4539-xsa24.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=212 --- 25920-x86-APICV-enable.patch | 130 +++++ 25921-x86-APICV-delivery.patch | 505 ++++++++++++++++++ 25922-x86-APICV-x2APIC.patch | 120 +++++ 25957-x86-TSC-adjust-HVM.patch | 123 +++++ 25958-x86-TSC-adjust-sr.patch | 104 ++++ 25959-x86-TSC-adjust-expose.patch | 39 ++ 25975-x86-IvyBridge.patch | 55 ++ 25984-SVM-nested-paging-mode.patch | 41 ++ 26054-x86-AMD-perf-ctr-init.patch | 69 +++ 26055-x86-oprof-hvm-mode.patch | 65 +++ 26056-page-alloc-flush-filter.patch | 84 +++ 26061-x86-oprof-counter-range.patch | 30 ++ 26062-ACPI-ERST-move-data.patch | 93 ++++ 26063-x86-HPET-affinity-lock.patch | 51 ++ ...7-stubdom_fix_compile_errors_in_grub.patch | 76 +++ ...hotplug_support_rely_on_udev_instead.patch | 187 +++++++ ...inux_close_lockfd_after_lock_attempt.patch | 36 ++ ...int_warning_spurious-executable-perm.patch | 30 ++ ...int_warning_spurious-executable-perm.patch | 30 ++ ...int_warning_spurious-executable-perm.patch | 51 ++ ..._install_hotplugpath.sh_as_data_file.patch | 34 ++ ..._install_stubdompath.sh_as_data_file.patch | 33 ++ ..._correct_sysconfig_tag_in_xendomains.patch | 21 + ...nstall_sysconfig_files_as_data_files.patch | 36 ++ ...x_wrong_condition_check_for_xml_file.patch | 29 + 26093-HVM-PoD-grant-mem-type.patch | 32 ++ ...nd_grant_pages_in_HVMOP_get_mem_type.patch | 31 ++ 26095-SVM-nested-leak.patch | 32 ++ 26096-SVM-nested-vmexit-emul.patch | 61 +++ 26098-perfc-build.patch | 43 ++ 26102-x86-IOAPIC-legacy-not-first.patch | 99 ++++ 26114-pygrub-list-entries.patch | 90 ++++ CVE-2012-4535-xsa20.patch | 42 ++ CVE-2012-4537-xsa22.patch | 43 ++ CVE-2012-4538-xsa23.patch | 35 ++ CVE-2012-4539-xsa24.patch | 29 + CVE-2012-4544-xsa25.patch | 366 +++++++++++++ ...-for-ExtendedKeyEvent-client-message.patch | 157 ++++++ altgr_2.patch | 14 +- block-dmmd | 214 ++++---- capslock_enable.patch | 2 +- change-vnc-passwd.patch | 2 +- ioemu-vnc-resize.patch | 2 +- log-guest-console.patch | 13 +- x86-ioapic-ack-default.patch | 8 +- xen-destdir.diff | 97 +--- xen.changes | 117 ++++ xen.spec | 85 ++- xend-config.diff | 6 - xenpaging.autostart.patch | 413 ++++++++++++++ xenpaging.doc.patch | 65 +++ 51 files changed, 3938 insertions(+), 232 deletions(-) create mode 100644 25920-x86-APICV-enable.patch create mode 100644 25921-x86-APICV-delivery.patch create mode 100644 25922-x86-APICV-x2APIC.patch create mode 100644 25957-x86-TSC-adjust-HVM.patch create mode 100644 25958-x86-TSC-adjust-sr.patch create mode 100644 25959-x86-TSC-adjust-expose.patch create mode 100644 25975-x86-IvyBridge.patch create mode 100644 25984-SVM-nested-paging-mode.patch create mode 100644 26054-x86-AMD-perf-ctr-init.patch create mode 100644 26055-x86-oprof-hvm-mode.patch create mode 100644 26056-page-alloc-flush-filter.patch create mode 100644 26061-x86-oprof-counter-range.patch create mode 100644 26062-ACPI-ERST-move-data.patch create mode 100644 26063-x86-HPET-affinity-lock.patch create mode 100644 26077-stubdom_fix_compile_errors_in_grub.patch create mode 100644 26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch create mode 100644 26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch create mode 100644 26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch create mode 100644 26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch create mode 100644 26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch create mode 100644 26084-hotplug_install_hotplugpath.sh_as_data_file.patch create mode 100644 26085-stubdom_install_stubdompath.sh_as_data_file.patch create mode 100644 26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch create mode 100644 26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch create mode 100644 26088-tools_xend_fix_wrong_condition_check_for_xml_file.patch create mode 100644 26093-HVM-PoD-grant-mem-type.patch create mode 100644 26093-hvm_handle_PoD_and_grant_pages_in_HVMOP_get_mem_type.patch create mode 100644 26095-SVM-nested-leak.patch create mode 100644 26096-SVM-nested-vmexit-emul.patch create mode 100644 26098-perfc-build.patch create mode 100644 26102-x86-IOAPIC-legacy-not-first.patch create mode 100644 26114-pygrub-list-entries.patch create mode 100644 CVE-2012-4535-xsa20.patch create mode 100644 CVE-2012-4537-xsa22.patch create mode 100644 CVE-2012-4538-xsa23.patch create mode 100644 CVE-2012-4539-xsa24.patch create mode 100644 CVE-2012-4544-xsa25.patch create mode 100644 VNC-Support-for-ExtendedKeyEvent-client-message.patch create mode 100644 xenpaging.autostart.patch create mode 100644 xenpaging.doc.patch diff --git a/25920-x86-APICV-enable.patch b/25920-x86-APICV-enable.patch new file mode 100644 index 0000000..ee7ff85 --- /dev/null +++ b/25920-x86-APICV-enable.patch @@ -0,0 +1,130 @@ +References: FATE#313605 + +# HG changeset patch +# User Jiongxi Li +# Date 1347912248 -3600 +# Node ID ec60de627945f17ec2ce5c14e1224b59403875f7 +# Parent 62de66cec48a1716bb700912da451a26296b8d1e +xen: enable APIC-Register Virtualization + +Add APIC register virtualization support + - APIC read doesn't cause VM-Exit + - APIC write becomes trap-like + +Signed-off-by: Gang Wei +Signed-off-by: Yang Zhang +Signed-off-by: Jiongxi Li + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -823,6 +823,14 @@ static int vlapic_write(struct vcpu *v, + return rc; + } + ++int vlapic_apicv_write(struct vcpu *v, unsigned int offset) ++{ ++ uint32_t val = vlapic_get_reg(vcpu_vlapic(v), offset); ++ ++ vlapic_reg_write(v, offset, val); ++ return 0; ++} ++ + int hvm_x2apic_msr_write(struct vcpu *v, unsigned int msr, uint64_t msr_content) + { + struct vlapic *vlapic = vcpu_vlapic(v); +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -89,6 +89,7 @@ static void __init vmx_display_features( + P(cpu_has_vmx_vnmi, "Virtual NMI"); + P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap"); + P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest"); ++ P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization"); + #undef P + + if ( !printed ) +@@ -186,6 +187,14 @@ static int vmx_init_vmcs_config(void) + if ( opt_unrestricted_guest_enabled ) + opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST; + ++ /* ++ * "APIC Register Virtualization" ++ * can be set only when "use TPR shadow" is set ++ */ ++ if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW ) ++ opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT; ++ ++ + _vmx_secondary_exec_control = adjust_vmx_controls( + "Secondary Exec Control", min, opt, + MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch); +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2274,6 +2274,16 @@ static void vmx_idtv_reinject(unsigned l + } + } + ++static int vmx_handle_apic_write(void) ++{ ++ unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); ++ unsigned int offset = exit_qualification & 0xfff; ++ ++ ASSERT(cpu_has_vmx_apic_reg_virt); ++ ++ return vlapic_apicv_write(current, offset); ++} ++ + void vmx_vmexit_handler(struct cpu_user_regs *regs) + { + unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0; +@@ -2729,6 +2739,11 @@ void vmx_vmexit_handler(struct cpu_user_ + break; + } + ++ case EXIT_REASON_APIC_WRITE: ++ if ( vmx_handle_apic_write() ) ++ hvm_inject_hw_exception(TRAP_gp_fault, 0); ++ break; ++ + case EXIT_REASON_ACCESS_GDTR_OR_IDTR: + case EXIT_REASON_ACCESS_LDTR_OR_TR: + case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED: +--- a/xen/include/asm-x86/hvm/vlapic.h ++++ b/xen/include/asm-x86/hvm/vlapic.h +@@ -103,6 +103,8 @@ void vlapic_EOI_set(struct vlapic *vlapi + + int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high); + ++int vlapic_apicv_write(struct vcpu *v, unsigned int offset); ++ + struct vlapic *vlapic_lowest_prio( + struct domain *d, struct vlapic *source, + int short_hand, uint8_t dest, uint8_t dest_mode); +--- a/xen/include/asm-x86/hvm/vmx/vmcs.h ++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h +@@ -182,6 +182,7 @@ extern u32 vmx_vmentry_control; + #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 + #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 + #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 ++#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 + #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 + #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 + extern u32 vmx_secondary_exec_control; +@@ -230,6 +231,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr + SECONDARY_EXEC_UNRESTRICTED_GUEST) + #define cpu_has_vmx_ple \ + (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) ++#define cpu_has_vmx_apic_reg_virt \ ++ (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT) + + /* GUEST_INTERRUPTIBILITY_INFO flags. */ + #define VMX_INTR_SHADOW_STI 0x00000001 +--- a/xen/include/asm-x86/hvm/vmx/vmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vmx.h +@@ -129,6 +129,7 @@ void vmx_update_cpu_exec_control(struct + #define EXIT_REASON_INVVPID 53 + #define EXIT_REASON_WBINVD 54 + #define EXIT_REASON_XSETBV 55 ++#define EXIT_REASON_APIC_WRITE 56 + #define EXIT_REASON_INVPCID 58 + + /* diff --git a/25921-x86-APICV-delivery.patch b/25921-x86-APICV-delivery.patch new file mode 100644 index 0000000..0053af6 --- /dev/null +++ b/25921-x86-APICV-delivery.patch @@ -0,0 +1,505 @@ +References: FATE#313605 + +# HG changeset patch +# User Jiongxi Li +# Date 1347912311 -3600 +# Node ID 713b8849b11afa05f1dde157a3f5086fa3aaad08 +# Parent ec60de627945f17ec2ce5c14e1224b59403875f7 +xen: enable Virtual-interrupt delivery + +Virtual interrupt delivery avoids Xen to inject vAPIC interrupts +manually, which is fully taken care of by the hardware. This needs +some special awareness into existing interrupr injection path: +For pending interrupt from vLAPIC, instead of direct injection, we may +need update architecture specific indicators before resuming to guest. +Before returning to guest, RVI should be updated if any pending IRRs +EOI exit bitmap controls whether an EOI write should cause VM-Exit. If +set, a trap-like induced EOI VM-Exit is triggered. The approach here +is to manipulate EOI exit bitmap based on value of TMR. Level +triggered irq requires a hook in vLAPIC EOI write, so that vIOAPIC EOI +is triggered and emulated + +Signed-off-by: Gang Wei +Signed-off-by: Yang Zhang +Signed-off-by: Jiongxi Li +Committed-by: Keir Fraser + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -145,6 +145,9 @@ int vlapic_set_irq(struct vlapic *vlapic + if ( trig ) + vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]); + ++ if ( hvm_funcs.update_eoi_exit_bitmap ) ++ hvm_funcs.update_eoi_exit_bitmap(vlapic_vcpu(vlapic), vec ,trig); ++ + /* We may need to wake up target vcpu, besides set pending bit here */ + return !vlapic_test_and_set_irr(vec, vlapic); + } +@@ -410,6 +413,14 @@ void vlapic_EOI_set(struct vlapic *vlapi + hvm_dpci_msi_eoi(current->domain, vector); + } + ++void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector) ++{ ++ if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) ) ++ vioapic_update_EOI(vlapic_domain(vlapic), vector); ++ ++ hvm_dpci_msi_eoi(current->domain, vector); ++} ++ + int vlapic_ipi( + struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high) + { +@@ -1000,6 +1011,14 @@ void vlapic_adjust_i8259_target(struct d + pt_adjust_global_vcpu_target(v); + } + ++int vlapic_virtual_intr_delivery_enabled(void) ++{ ++ if ( hvm_funcs.virtual_intr_delivery_enabled ) ++ return hvm_funcs.virtual_intr_delivery_enabled(); ++ else ++ return 0; ++} ++ + int vlapic_has_pending_irq(struct vcpu *v) + { + struct vlapic *vlapic = vcpu_vlapic(v); +@@ -1012,6 +1031,9 @@ int vlapic_has_pending_irq(struct vcpu * + if ( irr == -1 ) + return -1; + ++ if ( vlapic_virtual_intr_delivery_enabled() ) ++ return irr; ++ + isr = vlapic_find_highest_isr(vlapic); + isr = (isr != -1) ? isr : 0; + if ( (isr & 0xf0) >= (irr & 0xf0) ) +@@ -1024,6 +1046,9 @@ int vlapic_ack_pending_irq(struct vcpu * + { + struct vlapic *vlapic = vcpu_vlapic(v); + ++ if ( vlapic_virtual_intr_delivery_enabled() ) ++ return 1; ++ + vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); + vlapic_clear_irr(vector, vlapic); + +--- a/xen/arch/x86/hvm/vmx/intr.c ++++ b/xen/arch/x86/hvm/vmx/intr.c +@@ -206,6 +206,7 @@ void vmx_intr_assist(void) + struct vcpu *v = current; + unsigned int tpr_threshold = 0; + enum hvm_intblk intblk; ++ int pt_vector = -1; + + /* Block event injection when single step with MTF. */ + if ( unlikely(v->arch.hvm_vcpu.single_step) ) +@@ -216,7 +217,7 @@ void vmx_intr_assist(void) + } + + /* Crank the handle on interrupt state. */ +- pt_update_irq(v); ++ pt_vector = pt_update_irq(v); + + do { + intack = hvm_vcpu_has_pending_irq(v); +@@ -227,16 +228,34 @@ void vmx_intr_assist(void) + goto out; + + intblk = hvm_interrupt_blocked(v, intack); +- if ( intblk == hvm_intblk_tpr ) ++ if ( cpu_has_vmx_virtual_intr_delivery ) ++ { ++ /* Set "Interrupt-window exiting" for ExtINT */ ++ if ( (intblk != hvm_intblk_none) && ++ ( (intack.source == hvm_intsrc_pic) || ++ ( intack.source == hvm_intsrc_vector) ) ) ++ { ++ enable_intr_window(v, intack); ++ goto out; ++ } ++ ++ if ( __vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK ) ++ { ++ if ( (intack.source == hvm_intsrc_pic) || ++ (intack.source == hvm_intsrc_nmi) || ++ (intack.source == hvm_intsrc_mce) ) ++ enable_intr_window(v, intack); ++ ++ goto out; ++ } ++ } else if ( intblk == hvm_intblk_tpr ) + { + ASSERT(vlapic_enabled(vcpu_vlapic(v))); + ASSERT(intack.source == hvm_intsrc_lapic); + tpr_threshold = intack.vector >> 4; + goto out; +- } +- +- if ( (intblk != hvm_intblk_none) || +- (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) ) ++ } else if ( (intblk != hvm_intblk_none) || ++ (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) ) + { + enable_intr_window(v, intack); + goto out; +@@ -253,6 +272,44 @@ void vmx_intr_assist(void) + { + hvm_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE); + } ++ else if ( cpu_has_vmx_virtual_intr_delivery && ++ intack.source != hvm_intsrc_pic && ++ intack.source != hvm_intsrc_vector ) ++ { ++ unsigned long status = __vmread(GUEST_INTR_STATUS); ++ ++ /* ++ * Set eoi_exit_bitmap for periodic timer interrup to cause EOI-induced VM ++ * exit, then pending periodic time interrups have the chance to be injected ++ * for compensation ++ */ ++ if (pt_vector != -1) ++ vmx_set_eoi_exit_bitmap(v, pt_vector); ++ ++ /* we need update the RVI field */ ++ status &= ~(unsigned long)0x0FF; ++ status |= (unsigned long)0x0FF & ++ intack.vector; ++ __vmwrite(GUEST_INTR_STATUS, status); ++ if (v->arch.hvm_vmx.eoi_exitmap_changed) { ++#ifdef __i386__ ++#define UPDATE_EOI_EXITMAP(v, e) { \ ++ if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) { \ ++ __vmwrite(EOI_EXIT_BITMAP##e, v->arch.hvm_vmx.eoi_exit_bitmap[e]); \ ++ __vmwrite(EOI_EXIT_BITMAP##e##_HIGH, v->arch.hvm_vmx.eoi_exit_bitmap[e] >> 32);}} ++#else ++#define UPDATE_EOI_EXITMAP(v, e) { \ ++ if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) { \ ++ __vmwrite(EOI_EXIT_BITMAP##e, v->arch.hvm_vmx.eoi_exit_bitmap[e]);}} ++#endif ++ UPDATE_EOI_EXITMAP(v, 0); ++ UPDATE_EOI_EXITMAP(v, 1); ++ UPDATE_EOI_EXITMAP(v, 2); ++ UPDATE_EOI_EXITMAP(v, 3); ++ } ++ ++ pt_intr_post(v, intack); ++ } + else + { + HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0); +@@ -262,11 +319,16 @@ void vmx_intr_assist(void) + + /* Is there another IRQ to queue up behind this one? */ + intack = hvm_vcpu_has_pending_irq(v); +- if ( unlikely(intack.source != hvm_intsrc_none) ) +- enable_intr_window(v, intack); ++ if ( !cpu_has_vmx_virtual_intr_delivery || ++ intack.source == hvm_intsrc_pic || ++ intack.source == hvm_intsrc_vector ) ++ { ++ if ( unlikely(intack.source != hvm_intsrc_none) ) ++ enable_intr_window(v, intack); ++ } + + out: +- if ( cpu_has_vmx_tpr_shadow ) ++ if ( !cpu_has_vmx_virtual_intr_delivery && cpu_has_vmx_tpr_shadow ) + __vmwrite(TPR_THRESHOLD, tpr_threshold); + } + +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -90,6 +90,7 @@ static void __init vmx_display_features( + P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap"); + P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest"); + P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization"); ++ P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery"); + #undef P + + if ( !printed ) +@@ -188,11 +189,12 @@ static int vmx_init_vmcs_config(void) + opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST; + + /* +- * "APIC Register Virtualization" ++ * "APIC Register Virtualization" and "Virtual Interrupt Delivery" + * can be set only when "use TPR shadow" is set + */ + if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW ) +- opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT; ++ opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + + + _vmx_secondary_exec_control = adjust_vmx_controls( +@@ -787,6 +789,22 @@ static int construct_vmcs(struct vcpu *v + __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0)); + __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE)); + ++ if ( cpu_has_vmx_virtual_intr_delivery ) ++ { ++ /* EOI-exit bitmap */ ++ v->arch.hvm_vmx.eoi_exit_bitmap[0] = (uint64_t)0; ++ __vmwrite(EOI_EXIT_BITMAP0, v->arch.hvm_vmx.eoi_exit_bitmap[0]); ++ v->arch.hvm_vmx.eoi_exit_bitmap[1] = (uint64_t)0; ++ __vmwrite(EOI_EXIT_BITMAP1, v->arch.hvm_vmx.eoi_exit_bitmap[1]); ++ v->arch.hvm_vmx.eoi_exit_bitmap[2] = (uint64_t)0; ++ __vmwrite(EOI_EXIT_BITMAP2, v->arch.hvm_vmx.eoi_exit_bitmap[2]); ++ v->arch.hvm_vmx.eoi_exit_bitmap[3] = (uint64_t)0; ++ __vmwrite(EOI_EXIT_BITMAP3, v->arch.hvm_vmx.eoi_exit_bitmap[3]); ++ ++ /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */ ++ __vmwrite(GUEST_INTR_STATUS, 0); ++ } ++ + /* Host data selectors. */ + __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); +@@ -1028,6 +1046,30 @@ int vmx_add_host_load_msr(u32 msr) + return 0; + } + ++void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector) ++{ ++ int index, offset, changed; ++ ++ index = vector >> 6; ++ offset = vector & 63; ++ changed = !test_and_set_bit(offset, ++ (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]); ++ if (changed) ++ set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed); ++} ++ ++void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector) ++{ ++ int index, offset, changed; ++ ++ index = vector >> 6; ++ offset = vector & 63; ++ changed = test_and_clear_bit(offset, ++ (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]); ++ if (changed) ++ set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed); ++} ++ + int vmx_create_vmcs(struct vcpu *v) + { + struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1502,6 +1502,22 @@ static void vmx_set_info_guest(struct vc + vmx_vmcs_exit(v); + } + ++static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector, u8 trig) ++{ ++ if ( cpu_has_vmx_virtual_intr_delivery ) ++ { ++ if (trig) ++ vmx_set_eoi_exit_bitmap(v, vector); ++ else ++ vmx_clear_eoi_exit_bitmap(v, vector); ++ } ++} ++ ++static int vmx_virtual_intr_delivery_enabled(void) ++{ ++ return cpu_has_vmx_virtual_intr_delivery; ++} ++ + static struct hvm_function_table __read_mostly vmx_function_table = { + .name = "VMX", + .cpu_up_prepare = vmx_cpu_up_prepare, +@@ -1548,7 +1564,9 @@ static struct hvm_function_table __read_ + .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception, + .nhvm_vcpu_vmexit_trap = nvmx_vmexit_trap, + .nhvm_intr_blocked = nvmx_intr_blocked, +- .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources ++ .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources, ++ .update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap, ++ .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled + }; + + struct hvm_function_table * __init start_vmx(void) +@@ -2284,6 +2302,17 @@ static int vmx_handle_apic_write(void) + return vlapic_apicv_write(current, offset); + } + ++/* ++ * When "Virtual Interrupt Delivery" is enabled, this function is used ++ * to handle EOI-induced VM exit ++ */ ++void vmx_handle_EOI_induced_exit(struct vlapic *vlapic, int vector) ++{ ++ ASSERT(cpu_has_vmx_virtual_intr_delivery); ++ ++ vlapic_handle_EOI_induced_exit(vlapic, vector); ++} ++ + void vmx_vmexit_handler(struct cpu_user_regs *regs) + { + unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0; +@@ -2677,6 +2706,16 @@ void vmx_vmexit_handler(struct cpu_user_ + hvm_inject_hw_exception(TRAP_gp_fault, 0); + break; + ++ case EXIT_REASON_EOI_INDUCED: ++ { ++ int vector; ++ exit_qualification = __vmread(EXIT_QUALIFICATION); ++ vector = exit_qualification & 0xff; ++ ++ vmx_handle_EOI_induced_exit(vcpu_vlapic(current), vector); ++ break; ++ } ++ + case EXIT_REASON_IO_INSTRUCTION: + exit_qualification = __vmread(EXIT_QUALIFICATION); + if ( exit_qualification & 0x10 ) +--- a/xen/arch/x86/hvm/vpt.c ++++ b/xen/arch/x86/hvm/vpt.c +@@ -212,7 +212,7 @@ static void pt_timer_fn(void *data) + pt_unlock(pt); + } + +-void pt_update_irq(struct vcpu *v) ++int pt_update_irq(struct vcpu *v) + { + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct periodic_time *pt, *temp, *earliest_pt = NULL; +@@ -245,7 +245,7 @@ void pt_update_irq(struct vcpu *v) + if ( earliest_pt == NULL ) + { + spin_unlock(&v->arch.hvm_vcpu.tm_lock); +- return; ++ return -1; + } + + earliest_pt->irq_issued = 1; +@@ -263,6 +263,17 @@ void pt_update_irq(struct vcpu *v) + hvm_isa_irq_deassert(v->domain, irq); + hvm_isa_irq_assert(v->domain, irq); + } ++ ++ /* ++ * If periodic timer interrut is handled by lapic, its vector in ++ * IRR is returned and used to set eoi_exit_bitmap for virtual ++ * interrupt delivery case. Otherwise return -1 to do nothing. ++ */ ++ if ( vlapic_accept_pic_intr(v) && ++ (&v->domain->arch.hvm_domain)->vpic[0].int_output ) ++ return -1; ++ else ++ return pt_irq_vector(earliest_pt, hvm_intsrc_lapic); + } + + static struct periodic_time *is_pt_irq( +--- a/xen/include/asm-x86/hvm/hvm.h ++++ b/xen/include/asm-x86/hvm/hvm.h +@@ -180,6 +180,10 @@ struct hvm_function_table { + + enum hvm_intblk (*nhvm_intr_blocked)(struct vcpu *v); + void (*nhvm_domain_relinquish_resources)(struct domain *d); ++ ++ /* Virtual interrupt delivery */ ++ void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig); ++ int (*virtual_intr_delivery_enabled)(void); + }; + + extern struct hvm_function_table hvm_funcs; +--- a/xen/include/asm-x86/hvm/vlapic.h ++++ b/xen/include/asm-x86/hvm/vlapic.h +@@ -100,6 +100,7 @@ int vlapic_accept_pic_intr(struct vcpu * + void vlapic_adjust_i8259_target(struct domain *d); + + void vlapic_EOI_set(struct vlapic *vlapic); ++void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector); + + int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high); + +--- a/xen/include/asm-x86/hvm/vmx/vmcs.h ++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h +@@ -110,6 +110,9 @@ struct arch_vmx_struct { + unsigned int host_msr_count; + struct vmx_msr_entry *host_msr_area; + ++ uint32_t eoi_exitmap_changed; ++ uint64_t eoi_exit_bitmap[4]; ++ + unsigned long host_cr0; + + /* Is the guest in real mode? */ +@@ -183,6 +186,7 @@ extern u32 vmx_vmentry_control; + #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 + #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 + #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 ++#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 + #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 + #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 + extern u32 vmx_secondary_exec_control; +@@ -233,6 +237,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr + (vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) + #define cpu_has_vmx_apic_reg_virt \ + (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT) ++#define cpu_has_vmx_virtual_intr_delivery \ ++ (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) + + /* GUEST_INTERRUPTIBILITY_INFO flags. */ + #define VMX_INTR_SHADOW_STI 0x00000001 +@@ -251,6 +257,7 @@ enum vmcs_field { + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, ++ GUEST_INTR_STATUS = 0x00000810, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, +@@ -278,6 +285,14 @@ enum vmcs_field { + APIC_ACCESS_ADDR_HIGH = 0x00002015, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, ++ EOI_EXIT_BITMAP0 = 0x0000201c, ++ EOI_EXIT_BITMAP0_HIGH = 0x0000201d, ++ EOI_EXIT_BITMAP1 = 0x0000201e, ++ EOI_EXIT_BITMAP1_HIGH = 0x0000201f, ++ EOI_EXIT_BITMAP2 = 0x00002020, ++ EOI_EXIT_BITMAP2_HIGH = 0x00002021, ++ EOI_EXIT_BITMAP3 = 0x00002022, ++ EOI_EXIT_BITMAP3_HIGH = 0x00002023, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, +@@ -398,6 +413,8 @@ int vmx_write_guest_msr(u32 msr, u64 val + int vmx_add_guest_msr(u32 msr); + int vmx_add_host_load_msr(u32 msr); + void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to); ++void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector); ++void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector); + + #endif /* ASM_X86_HVM_VMX_VMCS_H__ */ + +--- a/xen/include/asm-x86/hvm/vmx/vmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vmx.h +@@ -119,6 +119,7 @@ void vmx_update_cpu_exec_control(struct + #define EXIT_REASON_MCE_DURING_VMENTRY 41 + #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 + #define EXIT_REASON_APIC_ACCESS 44 ++#define EXIT_REASON_EOI_INDUCED 45 + #define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46 + #define EXIT_REASON_ACCESS_LDTR_OR_TR 47 + #define EXIT_REASON_EPT_VIOLATION 48 +--- a/xen/include/asm-x86/hvm/vpt.h ++++ b/xen/include/asm-x86/hvm/vpt.h +@@ -141,7 +141,7 @@ struct pl_time { /* platform time */ + + void pt_save_timer(struct vcpu *v); + void pt_restore_timer(struct vcpu *v); +-void pt_update_irq(struct vcpu *v); ++int pt_update_irq(struct vcpu *v); + void pt_intr_post(struct vcpu *v, struct hvm_intack intack); + void pt_migrate(struct vcpu *v); + diff --git a/25922-x86-APICV-x2APIC.patch b/25922-x86-APICV-x2APIC.patch new file mode 100644 index 0000000..6a4f55e --- /dev/null +++ b/25922-x86-APICV-x2APIC.patch @@ -0,0 +1,120 @@ +References: FATE#313605 + +# HG changeset patch +# User Jiongxi Li +# Date 1347912362 -3600 +# Node ID c2578dd96b8318e108fff0f340411135dedaa47d +# Parent 713b8849b11afa05f1dde157a3f5086fa3aaad08 +xen: add virtual x2apic support for apicv + +basically to benefit from apicv, we need clear MSR bitmap for +corresponding x2apic MSRs: + 0x800 - 0x8ff: no read intercept for apicv register virtualization + TPR,EOI,SELF-IPI: no write intercept for virtual interrupt + delivery + +Signed-off-by: Jiongxi Li +Committed-by: Keir Fraser + +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -658,7 +658,7 @@ static void vmx_set_host_env(struct vcpu + (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code); + } + +-void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr) ++void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type) + { + unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap; + +@@ -673,14 +673,18 @@ void vmx_disable_intercept_for_msr(struc + */ + if ( msr <= 0x1fff ) + { +- __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */ +- __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */ ++ if (type & MSR_TYPE_R) ++ __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */ ++ if (type & MSR_TYPE_W) ++ __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */ + } + else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) ) + { + msr &= 0x1fff; +- __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */ +- __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */ ++ if (type & MSR_TYPE_R) ++ __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */ ++ if (type & MSR_TYPE_W) ++ __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */ + } + } + +@@ -776,13 +780,25 @@ static int construct_vmcs(struct vcpu *v + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); + +- vmx_disable_intercept_for_msr(v, MSR_FS_BASE); +- vmx_disable_intercept_for_msr(v, MSR_GS_BASE); +- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS); +- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP); +- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP); ++ vmx_disable_intercept_for_msr(v, MSR_FS_BASE, MSR_TYPE_R | MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(v, MSR_GS_BASE, MSR_TYPE_R | MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W); + if ( cpu_has_vmx_pat && paging_mode_hap(d) ) +- vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT); ++ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W); ++ if ( cpu_has_vmx_apic_reg_virt ) ++ { ++ int msr; ++ for (msr = MSR_IA32_APICBASE_MSR; msr <= MSR_IA32_APICBASE_MSR + 0xff; msr++) ++ vmx_disable_intercept_for_msr(v, msr, MSR_TYPE_R); ++ } ++ if ( cpu_has_vmx_virtual_intr_delivery ) ++ { ++ vmx_disable_intercept_for_msr(v, MSR_IA32_APICTPR_MSR, MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(v, MSR_IA32_APICEOI_MSR, MSR_TYPE_W); ++ vmx_disable_intercept_for_msr(v, MSR_IA32_APICSELF_MSR, MSR_TYPE_W); ++ } + } + + /* I/O access bitmap. */ +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2036,7 +2036,7 @@ static int vmx_msr_write_intercept(unsig + for ( ; (rc == 0) && lbr->count; lbr++ ) + for ( i = 0; (rc == 0) && (i < lbr->count); i++ ) + if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 ) +- vmx_disable_intercept_for_msr(v, lbr->base + i); ++ vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W); + } + + if ( (rc < 0) || +--- a/xen/include/asm-x86/hvm/vmx/vmcs.h ++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h +@@ -407,7 +407,9 @@ enum vmcs_field { + + #define VMCS_VPID_WIDTH 16 + +-void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr); ++#define MSR_TYPE_R 1 ++#define MSR_TYPE_W 2 ++void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type); + int vmx_read_guest_msr(u32 msr, u64 *val); + int vmx_write_guest_msr(u32 msr, u64 val); + int vmx_add_guest_msr(u32 msr); +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -291,6 +291,9 @@ + #define MSR_IA32_APICBASE_ENABLE (1<<11) + #define MSR_IA32_APICBASE_BASE (0xfffff<<12) + #define MSR_IA32_APICBASE_MSR 0x800 ++#define MSR_IA32_APICTPR_MSR 0x808 ++#define MSR_IA32_APICEOI_MSR 0x80b ++#define MSR_IA32_APICSELF_MSR 0x83f + + #define MSR_IA32_UCODE_WRITE 0x00000079 + #define MSR_IA32_UCODE_REV 0x0000008b diff --git a/25957-x86-TSC-adjust-HVM.patch b/25957-x86-TSC-adjust-HVM.patch new file mode 100644 index 0000000..451605e --- /dev/null +++ b/25957-x86-TSC-adjust-HVM.patch @@ -0,0 +1,123 @@ +References: FATE#313633 + +# HG changeset patch +# User Liu, Jinsong +# Date 1348654362 -7200 +# Node ID c47ef9592fb39325e33f8406b4bd736cc84482e5 +# Parent 5d63c633a60b9a1d695594f9c17cf933240bec81 +x86: Implement TSC adjust feature for HVM guest + +IA32_TSC_ADJUST MSR is maintained separately for each logical +processor. A logical processor maintains and uses the IA32_TSC_ADJUST +MSR as follows: +1). On RESET, the value of the IA32_TSC_ADJUST MSR is 0; +2). If an execution of WRMSR to the IA32_TIME_STAMP_COUNTER MSR adds + (or subtracts) value X from the TSC, the logical processor also + adds (or subtracts) value X from the IA32_TSC_ADJUST MSR; +3). If an execution of WRMSR to the IA32_TSC_ADJUST MSR adds (or + subtracts) value X from that MSR, the logical processor also adds + (or subtracts) value X from the TSC. + +This patch provides tsc adjust support for hvm guest, with it guest OS +would be happy when sync tsc. + +Signed-off-by: Liu, Jinsong +Committed-by: Jan Beulich + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -244,6 +244,7 @@ int hvm_set_guest_pat(struct vcpu *v, u6 + void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc) + { + uint64_t tsc; ++ uint64_t delta_tsc; + + if ( v->domain->arch.vtsc ) + { +@@ -255,10 +256,22 @@ void hvm_set_guest_tsc(struct vcpu *v, u + rdtscll(tsc); + } + +- v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc; ++ delta_tsc = guest_tsc - tsc; ++ v->arch.hvm_vcpu.msr_tsc_adjust += delta_tsc ++ - v->arch.hvm_vcpu.cache_tsc_offset; ++ v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc; ++ + hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset); + } + ++void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust) ++{ ++ v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust ++ - v->arch.hvm_vcpu.msr_tsc_adjust; ++ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset); ++ v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust; ++} ++ + u64 hvm_get_guest_tsc(struct vcpu *v) + { + uint64_t tsc; +@@ -277,6 +290,11 @@ u64 hvm_get_guest_tsc(struct vcpu *v) + return tsc + v->arch.hvm_vcpu.cache_tsc_offset; + } + ++u64 hvm_get_guest_tsc_adjust(struct vcpu *v) ++{ ++ return v->arch.hvm_vcpu.msr_tsc_adjust; ++} ++ + void hvm_migrate_timers(struct vcpu *v) + { + rtc_migrate_timers(v); +@@ -2794,6 +2812,10 @@ int hvm_msr_read_intercept(unsigned int + *msr_content = hvm_get_guest_tsc(v); + break; + ++ case MSR_IA32_TSC_ADJUST: ++ *msr_content = hvm_get_guest_tsc_adjust(v); ++ break; ++ + case MSR_TSC_AUX: + *msr_content = hvm_msr_tsc_aux(v); + break; +@@ -2907,6 +2929,10 @@ int hvm_msr_write_intercept(unsigned int + hvm_set_guest_tsc(v, msr_content); + break; + ++ case MSR_IA32_TSC_ADJUST: ++ hvm_set_guest_tsc_adjust(v, msr_content); ++ break; ++ + case MSR_TSC_AUX: + v->arch.hvm_vcpu.msr_tsc_aux = (uint32_t)msr_content; + if ( cpu_has_rdtscp +@@ -3478,6 +3504,8 @@ void hvm_vcpu_reset_state(struct vcpu *v + v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset; + hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset); + ++ v->arch.hvm_vcpu.msr_tsc_adjust = 0; ++ + paging_update_paging_modes(v); + + v->arch.flags |= TF_kernel_mode; +--- a/xen/include/asm-x86/hvm/vcpu.h ++++ b/xen/include/asm-x86/hvm/vcpu.h +@@ -137,6 +137,7 @@ struct hvm_vcpu { + struct hvm_vcpu_asid n1asid; + + u32 msr_tsc_aux; ++ u64 msr_tsc_adjust; + + /* VPMU */ + struct vpmu_struct vpmu; +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -284,6 +284,7 @@ + #define MSR_IA32_PLATFORM_ID 0x00000017 + #define MSR_IA32_EBL_CR_POWERON 0x0000002a + #define MSR_IA32_EBC_FREQUENCY_ID 0x0000002c ++#define MSR_IA32_TSC_ADJUST 0x0000003b + + #define MSR_IA32_APICBASE 0x0000001b + #define MSR_IA32_APICBASE_BSP (1<<8) diff --git a/25958-x86-TSC-adjust-sr.patch b/25958-x86-TSC-adjust-sr.patch new file mode 100644 index 0000000..92bca2d --- /dev/null +++ b/25958-x86-TSC-adjust-sr.patch @@ -0,0 +1,104 @@ +References: FATE#313633 + +# HG changeset patch +# User Liu, Jinsong +# Date 1348654418 -7200 +# Node ID 56fb977ce6eb4626a02d4a7a34e85009bb8ee3e0 +# Parent c47ef9592fb39325e33f8406b4bd736cc84482e5 +x86: Save/restore TSC adjust during HVM guest migration + +Signed-off-by: Liu, Jinsong +Committed-by: Jan Beulich + +--- a/tools/misc/xen-hvmctx.c ++++ b/tools/misc/xen-hvmctx.c +@@ -390,6 +390,13 @@ static void dump_vmce_vcpu(void) + printf(" VMCE_VCPU: caps %" PRIx64 "\n", p.caps); + } + ++static void dump_tsc_adjust(void) ++{ ++ HVM_SAVE_TYPE(TSC_ADJUST) p; ++ READ(p); ++ printf(" TSC_ADJUST: tsc_adjust %" PRIx64 "\n", p.tsc_adjust); ++} ++ + int main(int argc, char **argv) + { + int entry, domid; +@@ -457,6 +464,7 @@ int main(int argc, char **argv) + case HVM_SAVE_CODE(VIRIDIAN_DOMAIN): dump_viridian_domain(); break; + case HVM_SAVE_CODE(VIRIDIAN_VCPU): dump_viridian_vcpu(); break; + case HVM_SAVE_CODE(VMCE_VCPU): dump_vmce_vcpu(); break; ++ case HVM_SAVE_CODE(TSC_ADJUST): dump_tsc_adjust(); break; + case HVM_SAVE_CODE(END): break; + default: + printf(" ** Don't understand type %u: skipping\n", +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -610,6 +610,46 @@ void hvm_domain_destroy(struct domain *d + hvm_destroy_cacheattr_region_list(d); + } + ++static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h) ++{ ++ struct vcpu *v; ++ struct hvm_tsc_adjust ctxt; ++ int err = 0; ++ ++ for_each_vcpu ( d, v ) ++ { ++ ctxt.tsc_adjust = v->arch.hvm_vcpu.msr_tsc_adjust; ++ err = hvm_save_entry(TSC_ADJUST, v->vcpu_id, h, &ctxt); ++ if ( err ) ++ break; ++ } ++ ++ return err; ++} ++ ++static int hvm_load_tsc_adjust(struct domain *d, hvm_domain_context_t *h) ++{ ++ unsigned int vcpuid = hvm_load_instance(h); ++ struct vcpu *v; ++ struct hvm_tsc_adjust ctxt; ++ ++ if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) ++ { ++ dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", ++ d->domain_id, vcpuid); ++ return -EINVAL; ++ } ++ ++ if ( hvm_load_entry(TSC_ADJUST, h, &ctxt) != 0 ) ++ return -EINVAL; ++ ++ v->arch.hvm_vcpu.msr_tsc_adjust = ctxt.tsc_adjust; ++ return 0; ++} ++ ++HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, ++ hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU); ++ + static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) + { + struct vcpu *v; +--- a/xen/include/public/arch-x86/hvm/save.h ++++ b/xen/include/public/arch-x86/hvm/save.h +@@ -581,9 +581,15 @@ struct hvm_vmce_vcpu { + + DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); + ++struct hvm_tsc_adjust { ++ uint64_t tsc_adjust; ++}; ++ ++DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); ++ + /* + * Largest type-code in use + */ +-#define HVM_SAVE_CODE_MAX 18 ++#define HVM_SAVE_CODE_MAX 19 + + #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ diff --git a/25959-x86-TSC-adjust-expose.patch b/25959-x86-TSC-adjust-expose.patch new file mode 100644 index 0000000..fc0173a --- /dev/null +++ b/25959-x86-TSC-adjust-expose.patch @@ -0,0 +1,39 @@ +References: FATE#313633 + +# HG changeset patch +# User Liu, Jinsong +# Date 1348654470 -7200 +# Node ID 3aa66543a51ba77cb73e8c874e2416d065426a22 +# Parent 56fb977ce6eb4626a02d4a7a34e85009bb8ee3e0 +x86: Expose TSC adjust to HVM guest + +Intel latest SDM (17.13.3) release a new MSR CPUID.7.0.EBX[1]=1 +indicates TSC_ADJUST MSR 0x3b is supported. + +This patch expose it to hvm guest. + +Signed-off-by: Liu, Jinsong +Committed-by: Jan Beulich + +--- a/tools/libxc/xc_cpufeature.h ++++ b/tools/libxc/xc_cpufeature.h +@@ -128,6 +128,7 @@ + + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */ + #define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */ ++#define X86_FEATURE_TSC_ADJUST 1 /* Tsc thread offset */ + #define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */ + #define X86_FEATURE_HLE 4 /* Hardware Lock Elision */ + #define X86_FEATURE_AVX2 5 /* AVX2 instructions */ +--- a/tools/libxc/xc_cpuid_x86.c ++++ b/tools/libxc/xc_cpuid_x86.c +@@ -362,7 +362,8 @@ static void xc_cpuid_hvm_policy( + + case 0x00000007: /* Intel-defined CPU features */ + if ( input[1] == 0 ) { +- regs[1] &= (bitmaskof(X86_FEATURE_BMI1) | ++ regs[1] &= (bitmaskof(X86_FEATURE_TSC_ADJUST) | ++ bitmaskof(X86_FEATURE_BMI1) | + bitmaskof(X86_FEATURE_HLE) | + bitmaskof(X86_FEATURE_AVX2) | + bitmaskof(X86_FEATURE_SMEP) | diff --git a/25975-x86-IvyBridge.patch b/25975-x86-IvyBridge.patch new file mode 100644 index 0000000..399488a --- /dev/null +++ b/25975-x86-IvyBridge.patch @@ -0,0 +1,55 @@ +# HG changeset patch +# User Jan Beulich +# Date 1349172840 -7200 +# Node ID 87bf99fad7a9f018530d13213f57610621838085 +# Parent 5fbdbf585f5f2ee9a3e3c75a8a9f9f2cc6eda65c +x86/Intel: add further support for Ivy Bridge CPU models + +And some initial Haswell ones at once. + +Signed-off-by: Jan Beulich +Acked-by: "Nakajima, Jun" + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -105,11 +105,15 @@ static void do_get_hw_residencies(void * + + switch ( c->x86_model ) + { +- /* Ivy bridge */ +- case 0x3A: + /* Sandy bridge */ + case 0x2A: + case 0x2D: ++ /* Ivy bridge */ ++ case 0x3A: ++ case 0x3E: ++ /* Haswell */ ++ case 0x3C: ++ case 0x45: + GET_PC2_RES(hw_res->pc2); + GET_CC7_RES(hw_res->cc7); + /* fall through */ +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1820,7 +1820,9 @@ static const struct lbr_info *last_branc + /* Sandy Bridge */ + case 42: case 45: + /* Ivy Bridge */ +- case 58: ++ case 58: case 62: ++ /* Haswell */ ++ case 60: case 69: + return nh_lbr; + break; + /* Atom */ +--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c ++++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c +@@ -747,6 +747,7 @@ int vmx_vpmu_initialise(struct vcpu *v, + case 46: + case 47: + case 58: ++ case 62: + ret = core2_vpmu_initialise(v, vpmu_flags); + if ( !ret ) + vpmu->arch_vpmu_ops = &core2_vpmu_ops; diff --git a/25984-SVM-nested-paging-mode.patch b/25984-SVM-nested-paging-mode.patch new file mode 100644 index 0000000..5b510a9 --- /dev/null +++ b/25984-SVM-nested-paging-mode.patch @@ -0,0 +1,41 @@ +# HG changeset patch +# User Tim Deegan +# Date 1349356850 -3600 +# Node ID a9c84069c2489e2c432a5068adc7cf8d51ae3366 +# Parent 72d89cc43c72848be9bf49da9a87729ed8f48433 +x86/nested-svm: Update the paging mode on VMRUN and VMEXIT emulation. + +This allows Xen to walk the l1 hypervisor's shadow pagetable +correctly. Not needed for hap-on-hap guests because they are handled +at lookup time. Problem found with 64bit Win7 and 32bit XPMode where Win7 +switches forth and back between long mode and PAE legacy pagetables. + +Signed-off-by: Christoph Egger +[Adjusted to update in all cases where the l1 vmm uses shadows] +Signed-off-by: Tim Deegan +Committed-by: Tim Deegan + +--- a/xen/arch/x86/hvm/svm/nestedsvm.c ++++ b/xen/arch/x86/hvm/svm/nestedsvm.c +@@ -741,6 +741,10 @@ nsvm_vcpu_vmrun(struct vcpu *v, struct c + return 1; + } + ++ /* If l1 guest uses shadow paging, update the paging mode. */ ++ if (!nestedhvm_paging_mode_hap(v)) ++ paging_update_paging_modes(v); ++ + nv->nv_vmswitch_in_progress = 0; + return 0; + } +@@ -1408,6 +1412,10 @@ nestedsvm_vcpu_vmexit(struct vcpu *v, st + */ + rc = nhvm_vcpu_vmexit(v, regs, exitcode); + ++ /* If l1 guest uses shadow paging, update the paging mode. */ ++ if (!nestedhvm_paging_mode_hap(v)) ++ paging_update_paging_modes(v); ++ + nv->nv_vmswitch_in_progress = 0; + + if (rc) diff --git a/26054-x86-AMD-perf-ctr-init.patch b/26054-x86-AMD-perf-ctr-init.patch new file mode 100644 index 0000000..eca8563 --- /dev/null +++ b/26054-x86-AMD-perf-ctr-init.patch @@ -0,0 +1,69 @@ +# HG changeset patch +# User Wei Wang +# Date 1350306216 -7200 +# Node ID 983108e1b56bf809f3f5eaaebf18c4b613ff0865 +# Parent 137dfbd3190e849b3a498d8b2ea282ebbf12e77d +x86/amd: Fix xen_apic_write warnings in Dom0 + +[ 0.020294] ------------[ cut here ]------------ +[ 0.020311] WARNING: at arch/x86/xen/enlighten.c:730 +xen_apic_write+0x15/0x17() +[ 0.020318] Hardware name: empty +[ 0.020323] Modules linked in: +[ 0.020334] Pid: 1, comm: swapper/0 Not tainted 3.3.8 #7 +[ 0.020340] Call Trace: +[ 0.020354] [] warn_slowpath_common+0x80/0x98 +[ 0.020369] [] warn_slowpath_null+0x15/0x17 +[ 0.020378] [] xen_apic_write+0x15/0x17 +[ 0.020392] [] perf_events_lapic_init+0x2e/0x30 +[ 0.020410] [] init_hw_perf_events+0x250/0x407 +[ 0.020419] [] ? check_bugs+0x2d/0x2d +[ 0.020430] [] do_one_initcall+0x7a/0x131 +[ 0.020444] [] kernel_init+0x91/0x15d +[ 0.020456] [] kernel_thread_helper+0x4/0x10 +[ 0.020471] [] ? retint_restore_args+0x5/0x6 +[ 0.020481] [] ? gs_change+0x13/0x13 +[ 0.020500] ---[ end trace a7919e7f17c0a725 ]--- + +Kernel function check_hw_exists() writes 0xabcd to msr 0xc0010201 (Performance Event +Counter 0) and read it again to check if it is running as dom0. Early amd cpus does +not reset perf counters during warm reboot. If the kernel is booted with bare metal +and then as a dom0, the content of msr 0xc0010201 will stay and the checking will +pass and PMU will be enabled unexpectedly. + +Signed-off-by: Wei Wang + +Don't reset the counters when used for the NMI watchdog. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser +Committed-by: Jan Beulich + +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -11,6 +11,7 @@ + #include + #include /* amd_init_cpu */ + #include ++#include + + #include "cpu.h" + +@@ -532,6 +533,17 @@ static void __devinit init_amd(struct cp + if (c->x86 > 0x11) + set_bit(X86_FEATURE_ARAT, c->x86_capability); + ++ /* ++ * Prior to Family 0x14, perf counters are not reset during warm reboot. ++ * We have to reset them manually. ++ */ ++ if (nmi_watchdog != NMI_LOCAL_APIC && c->x86 < 0x14) { ++ wrmsrl(MSR_K7_PERFCTR0, 0); ++ wrmsrl(MSR_K7_PERFCTR1, 0); ++ wrmsrl(MSR_K7_PERFCTR2, 0); ++ wrmsrl(MSR_K7_PERFCTR3, 0); ++ } ++ + if (cpuid_edx(0x80000007) & (1 << 10)) { + rdmsr(MSR_K7_HWCR, l, h); + l |= (1 << 27); /* Enable read-only APERF/MPERF bit */ diff --git a/26055-x86-oprof-hvm-mode.patch b/26055-x86-oprof-hvm-mode.patch new file mode 100644 index 0000000..48c704a --- /dev/null +++ b/26055-x86-oprof-hvm-mode.patch @@ -0,0 +1,65 @@ +# HG changeset patch +# User Jacob Shin +# Date 1350306291 -7200 +# Node ID 14e32621dbaf5b485b134ace4558e67c4c36e1ce +# Parent 983108e1b56bf809f3f5eaaebf18c4b613ff0865 +x86/xenoprof: fix kernel/user mode detection for HVM + +While trying oprofile under Xen, I noticed that HVM passive domain's +kernel addresses were showing up as user application. It turns out +under HVM get_cpu_user_regs()->cs contains 0x0000beef. + +Signed-off-by: Jacob Shin + +Don't cast away const-ness. Use SS instead of CS to determine ring. +Special-case real and protected mode. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser +Committed-by: Jan Beulich + +--- a/xen/arch/x86/oprofile/xenoprof.c ++++ b/xen/arch/x86/oprofile/xenoprof.c +@@ -78,16 +78,26 @@ int compat_oprof_arch_counter(XEN_GUEST_ + } + #endif + +-int xenoprofile_get_mode(const struct vcpu *v, +- const struct cpu_user_regs *regs) ++int xenoprofile_get_mode(struct vcpu *curr, const struct cpu_user_regs *regs) + { + if ( !guest_mode(regs) ) + return 2; + +- if ( is_hvm_vcpu(v) ) +- return ((regs->cs & 3) != 3); ++ if ( !is_hvm_vcpu(curr) ) ++ return guest_kernel_mode(curr, regs); + +- return guest_kernel_mode(v, regs); ++ switch ( hvm_guest_x86_mode(curr) ) ++ { ++ struct segment_register ss; ++ ++ case 0: /* real mode */ ++ return 1; ++ case 1: /* vm86 mode */ ++ return 0; ++ default: ++ hvm_get_segment_register(curr, x86_seg_ss, &ss); ++ return (ss.sel & 3) != 3; ++ } + } + + /* +--- a/xen/include/asm-x86/xenoprof.h ++++ b/xen/include/asm-x86/xenoprof.h +@@ -56,7 +56,7 @@ static inline void ibs_init(void) {} + #define ibs_caps 0 + #endif + +-int xenoprofile_get_mode(const struct vcpu *, const struct cpu_user_regs *); ++int xenoprofile_get_mode(struct vcpu *, const struct cpu_user_regs *); + + static inline int xenoprof_backtrace_supported(void) + { diff --git a/26056-page-alloc-flush-filter.patch b/26056-page-alloc-flush-filter.patch new file mode 100644 index 0000000..71f3c3d --- /dev/null +++ b/26056-page-alloc-flush-filter.patch @@ -0,0 +1,84 @@ +# HG changeset patch +# User Keir Fraser +# Date 1350315491 -3600 +# Node ID 177fdda0be568ccdb62697b64aa64ee20bc55bee +# Parent 14e32621dbaf5b485b134ace4558e67c4c36e1ce +More efficient TLB-flush filtering in alloc_heap_pages(). + +Rather than per-cpu filtering for every page in a super-page +allocation, simply remember the most recent TLB timestamp across all +allocated pages, and filter on that, just once, at the end of the +function. + +For large-CPU systems, doing 2MB allocations during domain creation, +this cuts down the domain creation time *massively*. + +TODO: It may make sense to move the filtering out into some callers, +such as memory.c:populate_physmap() and +memory.c:increase_reservation(), so that the filtering can be moved +outside their loops, too. + +Signed-off-by: Keir Fraser + +--- a/xen/common/page_alloc.c ++++ b/xen/common/page_alloc.c +@@ -414,9 +414,10 @@ static struct page_info *alloc_heap_page + unsigned int first_node, i, j, zone = 0, nodemask_retry = 0; + unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1); + unsigned long request = 1UL << order; +- cpumask_t mask; + struct page_info *pg; + nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map; ++ bool_t need_tlbflush = 0; ++ uint32_t tlbflush_timestamp = 0; + + if ( node == NUMA_NO_NODE ) + { +@@ -530,22 +531,19 @@ static struct page_info *alloc_heap_page + if ( d != NULL ) + d->last_alloc_node = node; + +- cpumask_clear(&mask); +- + for ( i = 0; i < (1 << order); i++ ) + { + /* Reference count must continuously be zero for free pages. */ + BUG_ON(pg[i].count_info != PGC_state_free); + pg[i].count_info = PGC_state_inuse; + +- if ( pg[i].u.free.need_tlbflush ) ++ if ( pg[i].u.free.need_tlbflush && ++ (pg[i].tlbflush_timestamp <= tlbflush_current_time()) && ++ (!need_tlbflush || ++ (pg[i].tlbflush_timestamp > tlbflush_timestamp)) ) + { +- /* Add in extra CPUs that need flushing because of this page. */ +- static cpumask_t extra_cpus_mask; +- +- cpumask_andnot(&extra_cpus_mask, &cpu_online_map, &mask); +- tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); +- cpumask_or(&mask, &mask, &extra_cpus_mask); ++ need_tlbflush = 1; ++ tlbflush_timestamp = pg[i].tlbflush_timestamp; + } + + /* Initialise fields which have other uses for free pages. */ +@@ -555,10 +553,15 @@ static struct page_info *alloc_heap_page + + spin_unlock(&heap_lock); + +- if ( unlikely(!cpumask_empty(&mask)) ) ++ if ( need_tlbflush ) + { +- perfc_incr(need_flush_tlb_flush); +- flush_tlb_mask(&mask); ++ cpumask_t mask = cpu_online_map; ++ tlbflush_filter(mask, tlbflush_timestamp); ++ if ( !cpumask_empty(&mask) ) ++ { ++ perfc_incr(need_flush_tlb_flush); ++ flush_tlb_mask(&mask); ++ } + } + + return pg; diff --git a/26061-x86-oprof-counter-range.patch b/26061-x86-oprof-counter-range.patch new file mode 100644 index 0000000..035c20d --- /dev/null +++ b/26061-x86-oprof-counter-range.patch @@ -0,0 +1,30 @@ +# HG changeset patch +# User Jan Beulich +# Date 1350465790 -7200 +# Node ID 4b4c0c7a6031820ab521fdd6764cb0df157f44bf +# Parent 4fc87c2f31a02c770655518c9e4d389302564f00 +x86/oprof: adjust off-by-one counter range checks + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/oprofile/xenoprof.c ++++ b/xen/arch/x86/oprofile/xenoprof.c +@@ -26,7 +26,7 @@ int xenoprof_arch_counter(XEN_GUEST_HAND + if ( copy_from_guest(&counter, arg, 1) ) + return -EFAULT; + +- if ( counter.ind > OP_MAX_COUNTER ) ++ if ( counter.ind >= OP_MAX_COUNTER ) + return -E2BIG; + + counter_config[counter.ind].count = counter.count; +@@ -64,7 +64,7 @@ int compat_oprof_arch_counter(XEN_GUEST_ + if ( copy_from_guest(&counter, arg, 1) ) + return -EFAULT; + +- if ( counter.ind > OP_MAX_COUNTER ) ++ if ( counter.ind >= OP_MAX_COUNTER ) + return -E2BIG; + + counter_config[counter.ind].count = counter.count; diff --git a/26062-ACPI-ERST-move-data.patch b/26062-ACPI-ERST-move-data.patch new file mode 100644 index 0000000..963274d --- /dev/null +++ b/26062-ACPI-ERST-move-data.patch @@ -0,0 +1,93 @@ +# HG changeset patch +# User Huang Ying +# Date 1350475926 -7200 +# Node ID ec8a091efcce717584b00ce76e3cec40a6247ebc +# Parent 4b4c0c7a6031820ab521fdd6764cb0df157f44bf +ACPI/APEI: fix ERST MOVE_DATA instruction implementation + +The src_base and dst_base fields in apei_exec_context are physical +address, so they should be ioremaped before being used in ERST +MOVE_DATA instruction. + +Reported-by: Javier Martinez Canillas +Reported-by: Andrew Morton +Signed-off-by: Huang Ying + +Replace use of ioremap() by __acpi_map_table()/set_fixmap(). Fix error +handling. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser +Committed-by: Jan Beulich + +--- a/xen/drivers/acpi/apei/erst.c ++++ b/xen/drivers/acpi/apei/erst.c +@@ -247,15 +247,64 @@ static int erst_exec_move_data(struct ap + { + int rc; + u64 offset; ++#ifdef CONFIG_X86 ++ enum fixed_addresses idx; ++#endif ++ void *src, *dst; ++ ++ /* ioremap does not work in interrupt context */ ++ if (in_irq()) { ++ printk(KERN_WARNING ++ "MOVE_DATA cannot be used in interrupt context\n"); ++ return -EBUSY; ++ } + + rc = __apei_exec_read_register(entry, &offset); + if (rc) + return rc; +- memmove((void *)(unsigned long)(ctx->dst_base + offset), +- (void *)(unsigned long)(ctx->src_base + offset), +- ctx->var2); + +- return 0; ++#ifdef CONFIG_X86 ++ switch (ctx->var2) { ++ case 0: ++ return 0; ++ case 1 ... PAGE_SIZE: ++ break; ++ default: ++ printk(KERN_WARNING ++ "MOVE_DATA cannot be used for %#"PRIx64" bytes of data\n", ++ ctx->var2); ++ return -EOPNOTSUPP; ++ } ++ ++ src = __acpi_map_table(ctx->src_base + offset, ctx->var2); ++#else ++ src = ioremap(ctx->src_base + offset, ctx->var2); ++#endif ++ if (!src) ++ return -ENOMEM; ++ ++#ifdef CONFIG_X86 ++ BUILD_BUG_ON(FIX_ACPI_PAGES < 4); ++ idx = virt_to_fix((unsigned long)src + 2 * PAGE_SIZE); ++ offset += ctx->dst_base; ++ dst = (void *)fix_to_virt(idx) + (offset & ~PAGE_MASK); ++ set_fixmap(idx, offset); ++ if (PFN_DOWN(offset) != PFN_DOWN(offset + ctx->var2 - 1)) { ++ idx = virt_to_fix((unsigned long)dst + PAGE_SIZE); ++ set_fixmap(idx, offset + PAGE_SIZE); ++ } ++#else ++ dst = ioremap(ctx->dst_base + offset, ctx->var2); ++#endif ++ if (dst) { ++ memmove(dst, src, ctx->var2); ++ iounmap(dst); ++ } else ++ rc = -ENOMEM; ++ ++ iounmap(src); ++ ++ return rc; + } + + static struct apei_exec_ins_type erst_ins_type[] = { diff --git a/26063-x86-HPET-affinity-lock.patch b/26063-x86-HPET-affinity-lock.patch new file mode 100644 index 0000000..96433fc --- /dev/null +++ b/26063-x86-HPET-affinity-lock.patch @@ -0,0 +1,51 @@ +# HG changeset patch +# User Jan Beulich +# Date 1350476000 -7200 +# Node ID 1f4be6ee4619c88c273cb457d8e7f1eee49d00dd +# Parent ec8a091efcce717584b00ce76e3cec40a6247ebc +x86/HPET: obtain proper lock for changing IRQ affinity + +The IRQ descriptor lock should be held while adjusting the affinity of +any IRQ; the HPET channel lock isn't sufficient to protect namely +against races with moving the IRQ to a different CPU. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hpet.c ++++ b/xen/arch/x86/hpet.c +@@ -433,6 +433,16 @@ static struct hpet_event_channel *hpet_g + return ch; + } + ++static void set_channel_irq_affinity(const struct hpet_event_channel *ch) ++{ ++ struct irq_desc *desc = irq_to_desc(ch->irq); ++ ++ ASSERT(!local_irq_is_enabled()); ++ spin_lock(&desc->lock); ++ hpet_msi_set_affinity(desc, cpumask_of(ch->cpu)); ++ spin_unlock(&desc->lock); ++} ++ + static void hpet_attach_channel(unsigned int cpu, + struct hpet_event_channel *ch) + { +@@ -447,7 +457,7 @@ static void hpet_attach_channel(unsigned + if ( ch->cpu != cpu ) + return; + +- hpet_msi_set_affinity(irq_to_desc(ch->irq), cpumask_of(ch->cpu)); ++ set_channel_irq_affinity(ch); + } + + static void hpet_detach_channel(unsigned int cpu, +@@ -469,7 +479,7 @@ static void hpet_detach_channel(unsigned + } + + ch->cpu = cpumask_first(ch->cpumask); +- hpet_msi_set_affinity(irq_to_desc(ch->irq), cpumask_of(ch->cpu)); ++ set_channel_irq_affinity(ch); + } + + #include diff --git a/26077-stubdom_fix_compile_errors_in_grub.patch b/26077-stubdom_fix_compile_errors_in_grub.patch new file mode 100644 index 0000000..44fa2d4 --- /dev/null +++ b/26077-stubdom_fix_compile_errors_in_grub.patch @@ -0,0 +1,76 @@ +changeset: 26077:33348baecf37 +user: Olaf Hering +date: Thu Oct 18 09:34:59 2012 +0100 +files: stubdom/grub.patches/70compiler_warnings.diff +description: +stubdom: fix compile errors in grub + +Building xen.rpm in SLES11 started to fail due to these compiler +warnings: + +[ 1436s] ../grub-upstream/netboot/fsys_tftp.c:213: warning: operation on 'block' may be undefined +[ 1437s] ../grub-upstream/netboot/main.c:444: warning: operation on 'block' may be undefined + +[ 1234s] E: xen sequence-point ../grub-upstream/netboot/fsys_tftp.c:213 +[ 1234s] E: xen sequence-point ../grub-upstream/netboot/main.c:444 + +The reason for this is that the assignment is done twice: + tp.u.ack.block = ((uint16_t)( (((uint16_t)((block = prevblock)) & (uint16_t)0x00ffU) << 8) | (((uint16_t)((block = prevblock)) & (uint16_t)0xff00U) >> 8))); + +Fix this package build error by adding another patch for grub, which +moves the assignment out of the macro usage. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r 8dcab28b8081 -r 33348baecf37 stubdom/grub.patches/70compiler_warnings.diff +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/stubdom/grub.patches/70compiler_warnings.diff Thu Oct 18 09:34:59 2012 +0100 +@@ -0,0 +1,45 @@ ++[ 1436s] ../grub-upstream/netboot/fsys_tftp.c:213: warning: operation on 'block' may be undefined ++[ 1437s] ../grub-upstream/netboot/main.c:444: warning: operation on 'block' may be undefined ++ ++[ 1234s] E: xen sequence-point ../grub-upstream/netboot/fsys_tftp.c:213 ++[ 1234s] E: xen sequence-point ../grub-upstream/netboot/main.c:444 ++ ++--- ++ netboot/fsys_tftp.c | 5 ++++- ++ netboot/main.c | 5 ++++- ++ 2 files changed, 8 insertions(+), 2 deletions(-) ++ ++Index: grub-0.97/netboot/fsys_tftp.c ++=================================================================== ++--- grub-0.97.orig/netboot/fsys_tftp.c +++++ grub-0.97/netboot/fsys_tftp.c ++@@ -209,8 +209,11 @@ buf_fill (int abort) ++ break; ++ ++ if ((block || bcounter) && (block != prevblock + (unsigned short) 1)) +++ { +++ block = prevblock; ++ /* Block order should be continuous */ ++- tp.u.ack.block = htons (block = prevblock); +++ tp.u.ack.block = htons (block); +++ } ++ ++ /* Should be continuous. */ ++ tp.opcode = abort ? htons (TFTP_ERROR) : htons (TFTP_ACK); ++Index: grub-0.97/netboot/main.c ++=================================================================== ++--- grub-0.97.orig/netboot/main.c +++++ grub-0.97/netboot/main.c ++@@ -440,8 +440,11 @@ tftp (const char *name, int (*fnc) (unsi ++ break; ++ ++ if ((block || bcounter) && (block != prevblock + 1)) +++ { +++ block = prevblock; ++ /* Block order should be continuous */ ++- tp.u.ack.block = htons (block = prevblock); +++ tp.u.ack.block = htons (block); +++ } ++ ++ /* Should be continuous. */ ++ tp.opcode = htons (TFTP_ACK); diff --git a/26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch b/26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch new file mode 100644 index 0000000..e28461f --- /dev/null +++ b/26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch @@ -0,0 +1,187 @@ +changeset: 26078:019ca95dfa34 +user: Olaf Hering +date: Thu Oct 18 09:35:00 2012 +0100 +files: Makefile README install.sh tools/hotplug/Linux/Makefile tools/hotplug/Linux/xen-backend.agent +description: +hotplug/Linux: remove hotplug support, rely on udev instead + +Hotplug has been replaced by udev since several years. Remove the +hotplug related files and install udev unconditionally. + +This makes it possible to remove udev from rpm BuildRequires which +reduces the buildtime dependency chain. For openSuSE:Factory it was +done just now: +http://lists.opensuse.org/opensuse-buildservice/2012-10/msg00085.html + +The patch by itself will have no practical impact unless someone +attempts to build and run a Xen dom0 on a really old base system. e.g. +circa SLES9/2007 or earlier + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r 33348baecf37 -r 019ca95dfa34 Makefile +--- a/Makefile Thu Oct 18 09:34:59 2012 +0100 ++++ b/Makefile Thu Oct 18 09:35:00 2012 +0100 +@@ -223,7 +223,6 @@ uninstall: + $(MAKE) -C xen uninstall + rm -rf $(D)$(CONFIG_DIR)/init.d/xendomains $(D)$(CONFIG_DIR)/init.d/xend + rm -rf $(D)$(CONFIG_DIR)/init.d/xencommons $(D)$(CONFIG_DIR)/init.d/xen-watchdog +- rm -rf $(D)$(CONFIG_DIR)/hotplug/xen-backend.agent + rm -f $(D)$(CONFIG_DIR)/udev/rules.d/xen-backend.rules + rm -f $(D)$(CONFIG_DIR)/udev/rules.d/xend.rules + rm -f $(D)$(SYSCONFIG_DIR)/xendomains +diff -r 33348baecf37 -r 019ca95dfa34 README +--- a/README Thu Oct 18 09:34:59 2012 +0100 ++++ b/README Thu Oct 18 09:35:00 2012 +0100 +@@ -54,7 +54,7 @@ provided by your OS distributor: + * pkg-config + * bridge-utils package (/sbin/brctl) + * iproute package (/sbin/ip) +- * hotplug or udev ++ * udev + * GNU bison and GNU flex + * GNU gettext + * 16-bit x86 assembler, loader and compiler (dev86 rpm or bin86 & bcc debs) +@@ -120,9 +120,9 @@ 4. To rebuild an existing tree without m + + make install and make dist differ in that make install does the + right things for your local machine (installing the appropriate +- version of hotplug or udev scripts, for example), but make dist +- includes all versions of those scripts, so that you can copy the dist +- directory to another machine and install from that distribution. ++ version of udev scripts, for example), but make dist includes all ++ versions of those scripts, so that you can copy the dist directory ++ to another machine and install from that distribution. + + Python Runtime Libraries + ======================== +diff -r 33348baecf37 -r 019ca95dfa34 install.sh +--- a/install.sh Thu Oct 18 09:34:59 2012 +0100 ++++ b/install.sh Thu Oct 18 09:35:00 2012 +0100 +@@ -27,20 +27,6 @@ echo "Installing Xen from '$src' to '$ds + echo "Installing Xen from '$src' to '$dst'..." + (cd $src; tar -cf - * ) | tar -C "$tmp" -xf - + +-[ -x "$(which udevinfo)" ] && \ +- UDEV_VERSION=$(udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/') +- +-[ -z "$UDEV_VERSION" -a -x /sbin/udevadm ] && \ +- UDEV_VERSION=$(/sbin/udevadm info -V | awk '{print $NF}') +- +-if [ -n "$UDEV_VERSION" ] && [ $UDEV_VERSION -ge 059 ]; then +- echo " - installing for udev-based system" +- rm -rf "$tmp/etc/hotplug" +-else +- echo " - installing for hotplug-based system" +- rm -rf "$tmp/etc/udev" +-fi +- + echo " - modifying permissions" + chmod -R a+rX "$tmp" + +diff -r 33348baecf37 -r 019ca95dfa34 tools/hotplug/Linux/Makefile +--- a/tools/hotplug/Linux/Makefile Thu Oct 18 09:34:59 2012 +0100 ++++ b/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:00 2012 +0100 +@@ -27,31 +27,8 @@ XEN_SCRIPT_DATA += block-common.sh vtpm- + XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh + XEN_SCRIPT_DATA += vtpm-migration.sh vtpm-impl + +-XEN_HOTPLUG_DIR = $(CONFIG_DIR)/hotplug +-XEN_HOTPLUG_SCRIPTS = xen-backend.agent +- +-UDEVVER = 0 +-ifeq ($(shell [ -x /sbin/udevadm ] && echo 1),1) +-UDEVVER = $(shell /sbin/udevadm info -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' ) +-endif +-ifeq ($(shell [ -x /usr/bin/udevinfo ] && echo 1),1) +-UDEVVER = $(shell /usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' ) +-endif +- + UDEV_RULES_DIR = $(CONFIG_DIR)/udev + UDEV_RULES = xen-backend.rules xend.rules +- +-DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),) +-DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),) +-ifeq ($(findstring $(DI),$(DE)),$(DI)) +-HOTPLUGS=install-hotplug install-udev +-else +-ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1) +-HOTPLUGS=install-udev +-else +-HOTPLUGS=install-hotplug +-endif +-endif + + .PHONY: all + all: +@@ -60,7 +37,7 @@ build: + build: + + .PHONY: install +-install: all install-initd install-scripts $(HOTPLUGS) ++install: all install-initd install-scripts install-udev + + # See docs/misc/distro_mapping.txt for INITD_DIR location + .PHONY: install-initd +@@ -87,15 +64,6 @@ install-scripts: + $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \ + done + +-.PHONY: install-hotplug +-install-hotplug: +- [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \ +- $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR) +- set -e; for i in $(XEN_HOTPLUG_SCRIPTS); \ +- do \ +- $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \ +- done +- + .PHONY: install-udev + install-udev: + [ -d $(DESTDIR)$(UDEV_RULES_DIR) ] || \ +diff -r 33348baecf37 -r 019ca95dfa34 tools/hotplug/Linux/xen-backend.agent +--- a/tools/hotplug/Linux/xen-backend.agent Thu Oct 18 09:34:59 2012 +0100 ++++ /dev/null Thu Jan 01 00:00:00 1970 +0000 +@@ -1,39 +0,0 @@ +-#! /bin/bash +- +-PATH=/etc/xen/scripts:$PATH +- +-. /etc/xen/scripts/locking.sh +- +-claim_lock xenbus_hotplug_global +- +-case "$XENBUS_TYPE" in +- tap) +- /etc/xen/scripts/blktap "$ACTION" +- ;; +- vbd) +- /etc/xen/scripts/block "$ACTION" +- ;; +- vtpm) +- /etc/xen/scripts/vtpm "$ACTION" +- ;; +- vif) +- [ -n "$script" ] && $script "$ACTION" +- ;; +- vscsi) +- /etc/xen/scripts/vscsi "$ACTION" +- ;; +-esac +- +-case "$ACTION" in +- add) +- ;; +- remove) +- /etc/xen/scripts/xen-hotplug-cleanup +- ;; +- online) +- ;; +- offline) +- ;; +-esac +- +-release_lock xenbus_hotplug_global diff --git a/26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch b/26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch new file mode 100644 index 0000000..ac44364 --- /dev/null +++ b/26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch @@ -0,0 +1,36 @@ +changeset: 26079:b3b03536789a +user: Olaf Hering +date: Thu Oct 18 09:35:01 2012 +0100 +files: tools/hotplug/Linux/locking.sh +description: +hotplug/Linux: close lockfd after lock attempt + +When a HVM guest is shutdown some of the 'remove' events can not claim +the lock for some reason. Instead they try to grab the lock in a busy +loop, until udev reaps the xen-hotplug-cleanup helper. +After analyzing the resulting logfile its not obvious what the cause is. +The only explanation is that bash (?) gets confused if the same lockfd +is opened again and again. Closing it in each iteration seem to fix the +issue. + +This was observed with sles11sp2 (bash 3.2) and 4.2 xend. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +[ ijc -- added the comment ] +Committed-by: Ian Campbell + + +diff -r 019ca95dfa34 -r b3b03536789a tools/hotplug/Linux/locking.sh +--- a/tools/hotplug/Linux/locking.sh Thu Oct 18 09:35:00 2012 +0100 ++++ b/tools/hotplug/Linux/locking.sh Thu Oct 18 09:35:01 2012 +0100 +@@ -59,6 +59,9 @@ claim_lock() + print "y\n" if $fd_inum eq $file_inum; + ' "$_lockfile" ) + if [ x$rightfile = xy ]; then break; fi ++ # Some versions of bash appear to be buggy if the same ++ # $_lockfile is opened repeatedly. Close the current fd here. ++ eval "exec $_lockfd<&-" + done + } + diff --git a/26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch b/26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch new file mode 100644 index 0000000..c0d428b --- /dev/null +++ b/26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch @@ -0,0 +1,30 @@ +changeset: 26081:02064298ebcb +user: Olaf Hering +date: Thu Oct 18 09:35:03 2012 +0100 +files: stubdom/Makefile +description: +stubdom: fix rpmlint warning spurious-executable-perm + +[ 1758s] xen-tools.x86_64: E: spurious-executable-perm (Badness: 50) /usr/lib/xen/boot/xenstore-stubdom.gz +[ 1758s] The file is installed with executable permissions, but was identified as one +[ 1758s] that probably should not be executable. Verify if the executable bits are +[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged under +[ 1758s] %docdir/examples, which will avoid this warning. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r 25b2f53d2583 -r 02064298ebcb stubdom/Makefile +--- a/stubdom/Makefile Thu Oct 18 09:35:02 2012 +0100 ++++ b/stubdom/Makefile Thu Oct 18 09:35:03 2012 +0100 +@@ -396,7 +396,7 @@ install-grub: pv-grub + + install-xenstore: xenstore-stubdom + $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot" +- $(INSTALL_PROG) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz" ++ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz" + + ####### + # clean diff --git a/26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch b/26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch new file mode 100644 index 0000000..1def328 --- /dev/null +++ b/26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch @@ -0,0 +1,30 @@ +changeset: 26082:8cf26ace9ca0 +user: Olaf Hering +date: Thu Oct 18 09:35:03 2012 +0100 +files: tools/blktap2/vhd/lib/Makefile +description: +blktap2/libvhd: fix rpmlint warning spurious-executable-perm + +[ 1758s] xen-devel.x86_64: E: spurious-executable-perm (Badness: 50) /usr/lib64/libvhd.a +[ 1758s] The file is installed with executable permissions, but was identified as one +[ 1758s] that probably should not be executable. Verify if the executable bits are +[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged under +[ 1758s] %docdir/examples, which will avoid this warning. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r 02064298ebcb -r 8cf26ace9ca0 tools/blktap2/vhd/lib/Makefile +--- a/tools/blktap2/vhd/lib/Makefile Thu Oct 18 09:35:03 2012 +0100 ++++ b/tools/blktap2/vhd/lib/Makefile Thu Oct 18 09:35:03 2012 +0100 +@@ -68,7 +68,7 @@ libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR + + install: all + $(INSTALL_DIR) -p $(DESTDIR)$(INST-DIR) +- $(INSTALL_PROG) libvhd.a $(DESTDIR)$(INST-DIR) ++ $(INSTALL_DATA) libvhd.a $(DESTDIR)$(INST-DIR) + $(INSTALL_PROG) libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR) + ln -sf libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR)/libvhd.so.$(LIBVHD-MAJOR) + ln -sf libvhd.so.$(LIBVHD-MAJOR) $(DESTDIR)$(INST-DIR)/libvhd.so diff --git a/26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch b/26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch new file mode 100644 index 0000000..563b5bd --- /dev/null +++ b/26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch @@ -0,0 +1,51 @@ +changeset: 26083:3fbeb019d522 +user: Olaf Hering +date: Thu Oct 18 09:35:04 2012 +0100 +files: tools/blktap/lib/Makefile +description: +blktap: fix rpmlint warning spurious-executable-perm + +[ 1758s] xen-devel.x86_64: E: spurious-executable-perm (Badness: 50) /usr/lib64/libblktap.a +[ 1758s] The file is installed with executable permissions, but was identified as one +[ 1758s] that probably should not be executable. Verify if the executable bits are +[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged under +[ 1758s] %docdir/examples, which will avoid this warning. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r 8cf26ace9ca0 -r 3fbeb019d522 tools/blktap/lib/Makefile +--- a/tools/blktap/lib/Makefile Thu Oct 18 09:35:03 2012 +0100 ++++ b/tools/blktap/lib/Makefile Thu Oct 18 09:35:04 2012 +0100 +@@ -23,23 +23,25 @@ OBJS_PIC = $(SRCS:.c=.opic) + OBJS_PIC = $(SRCS:.c=.opic) + IBINS := + +-LIB = libblktap.a libblktap.so.$(MAJOR).$(MINOR) ++LIB = libblktap.a ++LIB_SO = libblktap.so.$(MAJOR).$(MINOR) + + .PHONY: all +-all: $(LIB) ++all: $(LIB) $(LIB_SO) + + .PHONY: install + install: all + $(INSTALL_DIR) $(DESTDIR)$(LIBDIR) + $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR) +- $(INSTALL_PROG) $(LIB) $(DESTDIR)$(LIBDIR) ++ $(INSTALL_PROG) $(LIB_SO) $(DESTDIR)$(LIBDIR) ++ $(INSTALL_DATA) $(LIB) $(DESTDIR)$(LIBDIR) + ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libblktap.so.$(MAJOR) + ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so + $(INSTALL_DATA) blktaplib.h $(DESTDIR)$(INCLUDEDIR) + + .PHONY: clean + clean: +- rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS ++ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) $(LIB_SO) *~ $(DEPS) xen TAGS + + libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC) + $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_LDFLAGS) \ diff --git a/26084-hotplug_install_hotplugpath.sh_as_data_file.patch b/26084-hotplug_install_hotplugpath.sh_as_data_file.patch new file mode 100644 index 0000000..65d8b9c --- /dev/null +++ b/26084-hotplug_install_hotplugpath.sh_as_data_file.patch @@ -0,0 +1,34 @@ +changeset: 26084:fe9a0eb9aaaa +user: Olaf Hering +date: Thu Oct 18 09:35:05 2012 +0100 +files: tools/hotplug/common/Makefile +description: +hotplug: install hotplugpath.sh as data file + +rpmlint complains a script helper which is only sourced: + +[ 1875s] xen-tools.i586: W: script-without-shebang /etc/xen/scripts/hotplugpath.sh +[ 1875s] This text file has executable bits set or is located in a path dedicated for +[ 1875s] executables, but lacks a shebang and cannot thus be executed. If the file is +[ 1875s] meant to be an executable script, add the shebang, otherwise remove the +[ 1875s] executable bits or move the file elsewhere. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r 3fbeb019d522 -r fe9a0eb9aaaa tools/hotplug/common/Makefile +--- a/tools/hotplug/common/Makefile Thu Oct 18 09:35:04 2012 +0100 ++++ b/tools/hotplug/common/Makefile Thu Oct 18 09:35:05 2012 +0100 +@@ -6,8 +6,8 @@ HOTPLUGPATH="hotplugpath.sh" + # OS-independent hotplug scripts go in this directory + + # Xen scripts to go there. +-XEN_SCRIPTS = $(HOTPLUGPATH) +-XEN_SCRIPT_DATA = ++XEN_SCRIPTS = ++XEN_SCRIPT_DATA = $(HOTPLUGPATH) + + genpath-target = $(call buildmakevars2file,$(HOTPLUGPATH)) + $(eval $(genpath-target)) diff --git a/26085-stubdom_install_stubdompath.sh_as_data_file.patch b/26085-stubdom_install_stubdompath.sh_as_data_file.patch new file mode 100644 index 0000000..7a3a0c0 --- /dev/null +++ b/26085-stubdom_install_stubdompath.sh_as_data_file.patch @@ -0,0 +1,33 @@ +changeset: 26085:e32f4301f384 +user: Olaf Hering +date: Thu Oct 18 09:35:06 2012 +0100 +files: stubdom/Makefile +description: +stubdom: install stubdompath.sh as data file + +rpmlint complains a script helper which is only sourced: + +[ 1875s] xen-tools.i586: W: script-without-shebang /usr/lib/xen/bin/stubdompath.sh +[ 1875s] This text file has executable bits set or is located in a path dedicated for +[ 1875s] executables, but lacks a shebang and cannot thus be executed. If the file is +[ 1875s] meant to be an executable script, add the shebang, otherwise remove the +[ 1875s] executable bits or move the file elsewhere. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r fe9a0eb9aaaa -r e32f4301f384 stubdom/Makefile +--- a/stubdom/Makefile Thu Oct 18 09:35:05 2012 +0100 ++++ b/stubdom/Makefile Thu Oct 18 09:35:06 2012 +0100 +@@ -386,7 +386,8 @@ install-readme: + + install-ioemu: ioemu-stubdom + $(INSTALL_DIR) "$(DESTDIR)$(LIBEXEC)" +- $(INSTALL_PROG) stubdompath.sh stubdom-dm "$(DESTDIR)$(LIBEXEC)" ++ $(INSTALL_PROG) stubdom-dm "$(DESTDIR)$(LIBEXEC)" ++ $(INSTALL_DATA) stubdompath.sh "$(DESTDIR)$(LIBEXEC)" + $(INSTALL_DIR) "$(DESTDIR)$(XENFIRMWAREDIR)" + $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-ioemu/mini-os.gz "$(DESTDIR)$(XENFIRMWAREDIR)/ioemu-stubdom.gz" + diff --git a/26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch b/26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch new file mode 100644 index 0000000..9257374 --- /dev/null +++ b/26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch @@ -0,0 +1,21 @@ +changeset: 26086:ba6b1db89ec8 +user: Olaf Hering +date: Thu Oct 18 09:35:07 2012 +0100 +files: tools/hotplug/Linux/init.d/sysconfig.xendomains +description: +hotplug/Linux: correct sysconfig tag in xendomains + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r e32f4301f384 -r ba6b1db89ec8 tools/hotplug/Linux/init.d/sysconfig.xendomains +--- a/tools/hotplug/Linux/init.d/sysconfig.xendomains Thu Oct 18 09:35:06 2012 +0100 ++++ b/tools/hotplug/Linux/init.d/sysconfig.xendomains Thu Oct 18 09:35:07 2012 +0100 +@@ -1,4 +1,4 @@ +-## Path: System/xen ++## Path: System/Virtualization + ## Description: xen domain start/stop on boot + ## Type: string + ## Default: diff --git a/26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch b/26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch new file mode 100644 index 0000000..3aa0626 --- /dev/null +++ b/26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch @@ -0,0 +1,36 @@ +changeset: 26087:6239ace16749 +user: Olaf Hering +date: Thu Oct 18 09:35:07 2012 +0100 +files: tools/hotplug/Linux/Makefile +description: +hotplug/Linux: install sysconfig files as data files + +rpmlint complains about wrong permissions of config files: + +[ 455s] xen-tools.i586: W: script-without-shebang /var/adm/fillup-templates/sysconfig.xendomains +[ 455s] xen-tools.i586: W: script-without-shebang /var/adm/fillup-templates/sysconfig.xencommons +[ 455s] This text file has executable bits set or is located in a path dedicated for +[ 455s] executables, but lacks a shebang and cannot thus be executed. If the file is +[ 455s] meant to be an executable script, add the shebang, otherwise remove the +[ 455s] executable bits or move the file elsewhere. + +Signed-off-by: Olaf Hering +Acked-by: Ian Campbell +Committed-by: Ian Campbell + + +diff -r ba6b1db89ec8 -r 6239ace16749 tools/hotplug/Linux/Makefile +--- a/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:07 2012 +0100 ++++ b/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:07 2012 +0100 +@@ -46,9 +46,9 @@ install-initd: + [ -d $(DESTDIR)$(SYSCONFIG_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(SYSCONFIG_DIR) + $(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR) + $(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR) +- $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains ++ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains + $(INSTALL_PROG) $(XENCOMMONS_INITD) $(DESTDIR)$(INITD_DIR) +- $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons ++ $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons + $(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR) + + .PHONY: install-scripts diff --git a/26088-tools_xend_fix_wrong_condition_check_for_xml_file.patch b/26088-tools_xend_fix_wrong_condition_check_for_xml_file.patch new file mode 100644 index 0000000..db67bf8 --- /dev/null +++ b/26088-tools_xend_fix_wrong_condition_check_for_xml_file.patch @@ -0,0 +1,29 @@ +changeset: 26088:dd64a1bdbe3a +user: Joe Jin +date: Thu Oct 18 09:35:08 2012 +0100 +files: tools/python/xen/xend/XendStateStore.py +description: +tools: xend: fix wrong condition check for xml file + +In commit e8d40584, it intended to check xml file size and when empty will +return, the condition should be "if os.path.getsize(xml_path) == 0" rather +then "if not os.path.getsize(xml_path) == 0". + +Signed-off-by: Chuang Cao +Signed-off-by: Joe Jin +Reviewed-by: Konrad Rzeszutek Wilk +Committed-by: Ian Campbell + + +diff -r 6239ace16749 -r dd64a1bdbe3a tools/python/xen/xend/XendStateStore.py +--- a/tools/python/xen/xend/XendStateStore.py Thu Oct 18 09:35:07 2012 +0100 ++++ b/tools/python/xen/xend/XendStateStore.py Thu Oct 18 09:35:08 2012 +0100 +@@ -101,7 +101,7 @@ class XendStateStore: + if not os.path.exists(xml_path): + return {} + +- if not os.path.getsize(xml_path) == 0: ++ if os.path.getsize(xml_path) == 0: + return {} + + dom = minidom.parse(xml_path) diff --git a/26093-HVM-PoD-grant-mem-type.patch b/26093-HVM-PoD-grant-mem-type.patch new file mode 100644 index 0000000..9b4f7e0 --- /dev/null +++ b/26093-HVM-PoD-grant-mem-type.patch @@ -0,0 +1,32 @@ +# HG changeset patch +# User Olaf Hering +# Date 1350655745 -7200 +# Node ID 4ae08ca5500f68d19a689c06489024157300d7b0 +# Parent 478ba3f146df23d2cfa95fc603d0b4b9d21ba15d +hvm: handle PoD and grant pages in HVMOP_get_mem_type + +During kexec in a ballooned PVonHVM guest the new kernel needs to check +each pfn if its backed by a mfn to find ballooned pages. Currently all +PoD and grant pages will appear as HVMMEM_mmio_dm, so the new kernel has +to assume they are ballooned. This is wrong: PoD pages may turn into +real RAM at runtime, grant pages are also RAM. + +Signed-off-by: Olaf Hering +Acked-by: Tim Deegan +Committed-by: Tim Deegan + +Index: xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c +@@ -4135,6 +4135,10 @@ long do_hvm_op(unsigned long op, XEN_GUE + a.mem_type = HVMMEM_ram_ro; + else if ( p2m_is_ram(t) ) + a.mem_type = HVMMEM_ram_rw; ++ else if ( p2m_is_magic(t) ) ++ a.mem_type = HVMMEM_ram_rw; ++ else if ( p2m_is_grant(t) ) ++ a.mem_type = HVMMEM_ram_rw; + else + a.mem_type = HVMMEM_mmio_dm; + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; diff --git a/26093-hvm_handle_PoD_and_grant_pages_in_HVMOP_get_mem_type.patch b/26093-hvm_handle_PoD_and_grant_pages_in_HVMOP_get_mem_type.patch new file mode 100644 index 0000000..30b5003 --- /dev/null +++ b/26093-hvm_handle_PoD_and_grant_pages_in_HVMOP_get_mem_type.patch @@ -0,0 +1,31 @@ +changeset: 26093:4ae08ca5500f +user: Olaf Hering +date: Fri Oct 19 16:09:05 2012 +0200 +files: xen/arch/x86/hvm/hvm.c +description: +hvm: handle PoD and grant pages in HVMOP_get_mem_type + +During kexec in a ballooned PVonHVM guest the new kernel needs to check +each pfn if its backed by a mfn to find ballooned pages. Currently all +PoD and grant pages will appear as HVMMEM_mmio_dm, so the new kernel has +to assume they are ballooned. This is wrong: PoD pages may turn into +real RAM at runtime, grant pages are also RAM. + +Signed-off-by: Olaf Hering +Acked-by: Tim Deegan +Committed-by: Tim Deegan + + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -4135,6 +4135,10 @@ long do_hvm_op(unsigned long op, XEN_GUE + a.mem_type = HVMMEM_ram_ro; + else if ( p2m_is_ram(t) ) + a.mem_type = HVMMEM_ram_rw; ++ else if ( p2m_is_magic(t) ) ++ a.mem_type = HVMMEM_ram_rw; ++ else if ( p2m_is_grant(t) ) ++ a.mem_type = HVMMEM_ram_rw; + else + a.mem_type = HVMMEM_mmio_dm; + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; diff --git a/26095-SVM-nested-leak.patch b/26095-SVM-nested-leak.patch new file mode 100644 index 0000000..53afd73 --- /dev/null +++ b/26095-SVM-nested-leak.patch @@ -0,0 +1,32 @@ +# HG changeset patch +# User Christoph Egger +# Date 1350976407 -7200 +# Node ID a7503ce27d462056421c6d74737cee08ab4ae31e +# Parent c69bcb24812896dc6d5cf033babb7e79b8a50aec +nestedsvm: fix memory leak on shutdown/crash + +Fix memory leak of l1 vmcb page when destroying a vcpu while l2 guest +is running. + +Signed-off-by: Christoph Egger +Acked-by: Tim Deegan +Committed-by: Jan Beulich + +--- a/xen/arch/x86/hvm/svm/nestedsvm.c ++++ b/xen/arch/x86/hvm/svm/nestedsvm.c +@@ -122,6 +122,15 @@ void nsvm_vcpu_destroy(struct vcpu *v) + struct nestedvcpu *nv = &vcpu_nestedhvm(v); + struct nestedsvm *svm = &vcpu_nestedsvm(v); + ++ /* ++ * When destroying the vcpu, it may be running on behalf of l2 guest. ++ * Therefore we need to switch the VMCB pointer back to the l1 vmcb, ++ * in order to avoid double free of l2 vmcb and the possible memory leak ++ * of l1 vmcb page. ++ */ ++ if (nv->nv_n1vmcx) ++ v->arch.hvm_svm.vmcb = nv->nv_n1vmcx; ++ + if (svm->ns_cached_msrpm) { + free_xenheap_pages(svm->ns_cached_msrpm, + get_order_from_bytes(MSRPM_SIZE)); diff --git a/26096-SVM-nested-vmexit-emul.patch b/26096-SVM-nested-vmexit-emul.patch new file mode 100644 index 0000000..feb8c22 --- /dev/null +++ b/26096-SVM-nested-vmexit-emul.patch @@ -0,0 +1,61 @@ +# HG changeset patch +# User Christoph Egger +# Date 1350976467 -7200 +# Node ID d642720e1ea996ce85203fc9718f64cf2cab0468 +# Parent a7503ce27d462056421c6d74737cee08ab4ae31e +nestedsvm: fix VMEXIT emulation + +Values in regs can be newer than those in the shadow vmcb (e.g. due to +an instruction emulation right before). So use the values from regs. + +Signed-off-by: Christoph Egger +Acked-by: Tim Deegan +Committed-by: Jan Beulich + +--- a/xen/arch/x86/hvm/svm/nestedsvm.c ++++ b/xen/arch/x86/hvm/svm/nestedsvm.c +@@ -995,7 +995,7 @@ nsvm_vmcb_guest_intercepts_trap(struct v + } + + static int +-nsvm_vmcb_prepare4vmexit(struct vcpu *v) ++nsvm_vmcb_prepare4vmexit(struct vcpu *v, struct cpu_user_regs *regs) + { + struct nestedvcpu *nv = &vcpu_nestedhvm(v); + struct nestedsvm *svm = &vcpu_nestedsvm(v); +@@ -1119,17 +1119,22 @@ nsvm_vmcb_prepare4vmexit(struct vcpu *v) + ns_vmcb->_dr7 = n2vmcb->_dr7; + ns_vmcb->_dr6 = n2vmcb->_dr6; + ++ /* Restore registers from regs as those values ++ * can be newer than in n2vmcb (e.g. due to an ++ * instruction emulation right before). ++ */ ++ + /* RFLAGS */ +- ns_vmcb->rflags = n2vmcb->rflags; ++ ns_vmcb->rflags = n2vmcb->rflags = regs->eflags; + + /* RIP */ +- ns_vmcb->rip = n2vmcb->rip; ++ ns_vmcb->rip = n2vmcb->rip = regs->eip; + + /* RSP */ +- ns_vmcb->rsp = n2vmcb->rsp; ++ ns_vmcb->rsp = n2vmcb->rsp = regs->esp; + + /* RAX */ +- ns_vmcb->rax = n2vmcb->rax; ++ ns_vmcb->rax = n2vmcb->rax = regs->eax; + + /* Keep the l2 guest values of the fs, gs, ldtr, tr, kerngsbase, + * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp, +@@ -1363,7 +1368,7 @@ nestedsvm_vmexit_n2n1(struct vcpu *v, st + ASSERT(vcpu_nestedhvm(v).nv_vmswitch_in_progress); + ASSERT(nestedhvm_vcpu_in_guestmode(v)); + +- rc = nsvm_vmcb_prepare4vmexit(v); ++ rc = nsvm_vmcb_prepare4vmexit(v, regs); + if (rc) + ret = NESTEDHVM_VMEXIT_ERROR; + diff --git a/26098-perfc-build.patch b/26098-perfc-build.patch new file mode 100644 index 0000000..b8a0514 --- /dev/null +++ b/26098-perfc-build.patch @@ -0,0 +1,43 @@ +# HG changeset patch +# User Dario Faggioli +# Date 1350990742 25200 +# Node ID 6d54c3338c52ec3a8e671c13284a59a535a3273f +# Parent 67c27013e191598543ccc1b8f8f1d533c7a5164b +xen: fix build when 'perfc=y' + +Which was failing with this: + + viridian.c: In function ‘wrmsr_viridian_regs’: + viridian.c:254:1: error: ‘PERFC_mshv_wrmsr_apic_msr’ undeclared + (first use in this function) + viridian.c:254:1: note: each undeclared identifier is reported only + once for each function it appears in + viridian.c: In function ‘rdmsr_viridian_regs’: + viridian.c:305:1: error: ‘PERFC_mshv_rdmsr_apic_msr’ undeclared + (first use in this function) + +as a consequence of 17b754cab7b0 using but not defining +the counters. + +Signed-off-by: Dario Faggioli +Acked-by: George Dunlap +Committed-by: Keir Fraser + +--- a/xen/include/asm-x86/perfc_defn.h ++++ b/xen/include/asm-x86/perfc_defn.h +@@ -121,6 +121,7 @@ PERFCOUNTER(mshv_rdmsr_vp_index, + PERFCOUNTER(mshv_rdmsr_icr, "MS Hv rdmsr icr") + PERFCOUNTER(mshv_rdmsr_tpr, "MS Hv rdmsr tpr") + PERFCOUNTER(mshv_rdmsr_apic_assist, "MS Hv rdmsr APIC assist") ++PERFCOUNTER(mshv_rdmsr_apic_msr, "MS Hv rdmsr APIC msr") + PERFCOUNTER(mshv_wrmsr_osid, "MS Hv wrmsr Guest OS ID") + PERFCOUNTER(mshv_wrmsr_hc_page, "MS Hv wrmsr hypercall page") + PERFCOUNTER(mshv_wrmsr_vp_index, "MS Hv wrmsr vp index") +@@ -128,6 +129,7 @@ PERFCOUNTER(mshv_wrmsr_icr, + PERFCOUNTER(mshv_wrmsr_tpr, "MS Hv wrmsr tpr") + PERFCOUNTER(mshv_wrmsr_eoi, "MS Hv wrmsr eoi") + PERFCOUNTER(mshv_wrmsr_apic_assist, "MS Hv wrmsr APIC assist") ++PERFCOUNTER(mshv_wrmsr_apic_msr, "MS Hv wrmsr APIC msr") + + PERFCOUNTER(realmode_emulations, "realmode instructions emulated") + PERFCOUNTER(realmode_exits, "vmexits from realmode") diff --git a/26102-x86-IOAPIC-legacy-not-first.patch b/26102-x86-IOAPIC-legacy-not-first.patch new file mode 100644 index 0000000..f5d232d --- /dev/null +++ b/26102-x86-IOAPIC-legacy-not-first.patch @@ -0,0 +1,99 @@ +References: bnc#784087 + +# HG changeset patch +# User Jan Beulich +# Date 1351093908 -7200 +# Node ID 22e08c9ac770db07c3c3e7c844aa7153050939f3 +# Parent 07cf00a917cd1d1849f3e40d5b8ecc2cd8964fe8 +x86: don't special case first IO-APIC + +It has always been puzzling me why the first IO-APIC gets special cased +in two places, and finally Xen got run on a system where this breaks: + +(XEN) ACPI: IOAPIC (id[0x10] address[0xfecff000] gsi_base[0]) +(XEN) IOAPIC[0]: apic_id 16, version 17, address 0xfecff000, GSI 0-2 +(XEN) ACPI: IOAPIC (id[0x0f] address[0xfec00000] gsi_base[3]) +(XEN) IOAPIC[1]: apic_id 15, version 17, address 0xfec00000, GSI 3-38 +(XEN) ACPI: IOAPIC (id[0x0e] address[0xfec01000] gsi_base[39]) +(XEN) IOAPIC[2]: apic_id 14, version 17, address 0xfec01000, GSI 39-74 +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 1 global_irq 4 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 5 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 3 global_irq 6 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 7 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 9 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 10 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 11 low edge) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 12 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 15 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 13 global_irq 16 dfl dfl) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 17 low edge) +(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 18 dfl dfl) + +i.e. all legacy IRQs (apart from the timer one, but the firmware passed +data doesn't look right for that case anyway, as both Xen and native +Linux are falling back to use the virtual wire setup for IRQ0, +apparently rather using pin 2 of the first IO-APIC) are being handled +by the second IO-APIC. + +This at once eliminates the possibility of an unmasked RTE getting +written without having got a vector put in place (in +setup_IO_APIC_irqs()). + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +Index: xen-4.2.0-testing/xen/arch/x86/io_apic.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/arch/x86/io_apic.c ++++ xen-4.2.0-testing/xen/arch/x86/io_apic.c +@@ -999,18 +999,17 @@ static void __init setup_IO_APIC_irqs(vo + else + add_pin_to_irq(irq, apic, pin); + +- if (!apic && !IO_APIC_IRQ(irq)) ++ if (!IO_APIC_IRQ(irq)) + continue; + +- if (IO_APIC_IRQ(irq)) { +- vector = assign_irq_vector(irq, NULL); +- BUG_ON(vector < 0); +- entry.vector = vector; +- ioapic_register_intr(irq, IOAPIC_AUTO); ++ vector = assign_irq_vector(irq, NULL); ++ BUG_ON(vector < 0); ++ entry.vector = vector; ++ ioapic_register_intr(irq, IOAPIC_AUTO); ++ ++ if (platform_legacy_irq(irq)) ++ disable_8259A_irq(irq_to_desc(irq)); + +- if (!apic && platform_legacy_irq(irq)) +- disable_8259A_irq(irq_to_desc(irq)); +- } + desc = irq_to_desc(irq); + SET_DEST(entry.dest.dest32, entry.dest.logical.logical_dest, + cpu_mask_to_apicid(desc->arch.cpu_mask)); +@@ -2257,18 +2256,15 @@ unsigned apic_gsi_base(int apic); + + static int apic_pin_2_gsi_irq(int apic, int pin) + { +- int idx, irq; ++ int idx; + + if (apic < 0) + return -EINVAL; + +- irq = apic_gsi_base(apic) + pin; +- if (apic == 0) { +- idx = find_irq_entry(apic, pin, mp_INT); +- if (idx >= 0) +- irq = pin_2_irq(idx, apic, pin); +- } +- return irq; ++ idx = find_irq_entry(apic, pin, mp_INT); ++ ++ return idx >= 0 ? pin_2_irq(idx, apic, pin) ++ : apic_gsi_base(apic) + pin; + } + + int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval) diff --git a/26114-pygrub-list-entries.patch b/26114-pygrub-list-entries.patch new file mode 100644 index 0000000..3f1444d --- /dev/null +++ b/26114-pygrub-list-entries.patch @@ -0,0 +1,90 @@ +# HG changeset patch +# User Charles Arnold +# Date 1351249508 -3600 +# Node ID 6f9e46917eb8771914041b98f714e8f485fca5ef +# Parent 03af0abd2b72dfab3f2e50dd502108de8603f741 +pygrub: Add option to list grub entries + +The argument to "--entry" allows 2 syntaxes, either directly the entry +number in menu.lst, or the whole string behind the "title" key word. +This poses the following issue: + +From Dom0 there is no way to guess the number and, or the complete +title string because this string contains the kernel version, which +will change with a kernel update. + +This patch adds [-l|--list-entries] as an argument to pygrub. + +Signed-off-by: Charles Arnold +Acked-by: Ian Jackson +Committed-by: Ian Jackson + +diff -r 03af0abd2b72 -r 6f9e46917eb8 tools/pygrub/src/pygrub +--- a/tools/pygrub/src/pygrub Fri Oct 26 12:03:12 2012 +0100 ++++ b/tools/pygrub/src/pygrub Fri Oct 26 12:05:08 2012 +0100 +@@ -595,7 +595,17 @@ def run_grub(file, entry, fs, cfg_args): + sel = g.run() + + g = Grub(file, fs) +- if interactive: ++ ++ if list_entries: ++ for i in range(len(g.cf.images)): ++ img = g.cf.images[i] ++ print "title: %s" % img.title ++ print " root: %s" % img.root ++ print " kernel: %s" % img.kernel[1] ++ print " args: %s" % img.args ++ print " initrd: %s" % img.initrd[1] ++ ++ if interactive and not list_entries: + curses.wrapper(run_main) + else: + sel = g.cf.default +@@ -702,7 +712,7 @@ if __name__ == "__main__": + sel = None + + def usage(): +- print >> sys.stderr, "Usage: %s [-q|--quiet] [-i|--interactive] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] " %(sys.argv[0],) ++ print >> sys.stderr, "Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] " %(sys.argv[0],) + + def copy_from_image(fs, file_to_read, file_type, output_directory, + not_really): +@@ -736,8 +746,8 @@ if __name__ == "__main__": + dataoff += len(data) + + try: +- opts, args = getopt.gnu_getopt(sys.argv[1:], 'qinh::', +- ["quiet", "interactive", "not-really", "help", ++ opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::', ++ ["quiet", "interactive", "list-entries", "not-really", "help", + "output=", "output-format=", "output-directory=", + "entry=", "kernel=", + "ramdisk=", "args=", "isconfig", "debug"]) +@@ -753,6 +763,7 @@ if __name__ == "__main__": + output = None + entry = None + interactive = True ++ list_entries = False + isconfig = False + debug = False + not_really = False +@@ -771,6 +782,8 @@ if __name__ == "__main__": + interactive = False + elif o in ("-i", "--interactive"): + interactive = True ++ elif o in ("-l", "--list-entries"): ++ list_entries = True + elif o in ("-n", "--not-really"): + not_really = True + elif o in ("-h", "--help"): +@@ -855,6 +868,9 @@ if __name__ == "__main__": + fs = None + continue + ++ if list_entries: ++ sys.exit(0) ++ + # Did looping through partitions find us a kernel? + if not fs: + raise RuntimeError, "Unable to find partition containing kernel" diff --git a/CVE-2012-4535-xsa20.patch b/CVE-2012-4535-xsa20.patch new file mode 100644 index 0000000..dcc4357 --- /dev/null +++ b/CVE-2012-4535-xsa20.patch @@ -0,0 +1,42 @@ +References: CVE-2012-4535 XSA-20 bnc#786516 + +VCPU/timers: Prevent overflow in calculations, leading to DoS vulnerability + +The timer action for a vcpu periodic timer is to calculate the next +expiry time, and to reinsert itself into the timer queue. If the +deadline ends up in the past, Xen never leaves __do_softirq(). The +affected PCPU will stay in an infinite loop until Xen is killed by the +watchdog (if enabled). + +This is a security problem, XSA-20 / CVE-2012-4535. + +Signed-off-by: Andrew Cooper +Acked-by: Ian Campbell + +Index: xen-4.2.0-testing/xen/common/domain.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/common/domain.c ++++ xen-4.2.0-testing/xen/common/domain.c +@@ -882,6 +882,9 @@ long do_vcpu_op(int cmd, int vcpuid, XEN + if ( set.period_ns < MILLISECS(1) ) + return -EINVAL; + ++ if ( set.period_ns > STIME_DELTA_MAX ) ++ return -EINVAL; ++ + v->periodic_period = set.period_ns; + vcpu_force_reschedule(v); + +Index: xen-4.2.0-testing/xen/include/xen/time.h +=================================================================== +--- xen-4.2.0-testing.orig/xen/include/xen/time.h ++++ xen-4.2.0-testing/xen/include/xen/time.h +@@ -55,6 +55,8 @@ struct tm gmtime(unsigned long t); + #define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL)) + #define MICROSECS(_us) ((s_time_t)((_us) * 1000ULL)) + #define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1)) ++/* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */ ++#define STIME_DELTA_MAX ((s_time_t)((uint64_t)~0ull>>2)) + + extern void update_vcpu_system_time(struct vcpu *v); + extern void update_domain_wallclock_time(struct domain *d); diff --git a/CVE-2012-4537-xsa22.patch b/CVE-2012-4537-xsa22.patch new file mode 100644 index 0000000..93c9b4c --- /dev/null +++ b/CVE-2012-4537-xsa22.patch @@ -0,0 +1,43 @@ +References: CVE-2012-4537 XSA-22 bnc#786517 + +x86/physmap: Prevent incorrect updates of m2p mappings + +In certain conditions, such as low memory, set_p2m_entry() can fail. +Currently, the p2m and m2p tables will get out of sync because we still +update the m2p table after the p2m update has failed. + +If that happens, subsequent guest-invoked memory operations can cause +BUG()s and ASSERT()s to kill Xen. + +This is fixed by only updating the m2p table iff the p2m was +successfully updated. + +This is a security problem, XSA-22 / CVE-2012-4537. + +Signed-off-by: Andrew Cooper +Acked-by: Ian Campbell +Acked-by: Ian Jackson + +Index: xen-4.2.0-testing/xen/arch/x86/mm/p2m.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.2.0-testing/xen/arch/x86/mm/p2m.c +@@ -654,7 +654,10 @@ guest_physmap_add_entry(struct domain *d + if ( mfn_valid(_mfn(mfn)) ) + { + if ( !set_p2m_entry(p2m, gfn, _mfn(mfn), page_order, t, p2m->default_access) ) ++ { + rc = -EINVAL; ++ goto out; /* Failed to update p2m, bail without updating m2p. */ ++ } + if ( !p2m_is_grant(t) ) + { + for ( i = 0; i < (1UL << page_order); i++ ) +@@ -677,6 +680,7 @@ guest_physmap_add_entry(struct domain *d + } + } + ++out: + p2m_unlock(p2m); + + return rc; diff --git a/CVE-2012-4538-xsa23.patch b/CVE-2012-4538-xsa23.patch new file mode 100644 index 0000000..cf22858 --- /dev/null +++ b/CVE-2012-4538-xsa23.patch @@ -0,0 +1,35 @@ +References: CVE-2012-4538 XSA-23 bnc#786519 + +xen/mm/shadow: check toplevel pagetables are present before unhooking them. + +If the guest has not fully populated its top-level PAE entries when it calls +HVMOP_pagetable_dying, the shadow code could try to unhook entries from +MFN 0. Add a check to avoid that case. + +This issue was introduced by c/s 21239:b9d2db109cf5. + +This is a security problem, XSA-23 / CVE-2012-4538. + +Signed-off-by: Tim Deegan +Tested-by: Andrew Cooper +Acked-by: Ian Campbell + +Index: xen-4.2.0-testing/xen/arch/x86/mm/shadow/multi.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/arch/x86/mm/shadow/multi.c ++++ xen-4.2.0-testing/xen/arch/x86/mm/shadow/multi.c +@@ -4734,8 +4734,12 @@ static void sh_pagetable_dying(struct vc + unsigned long gfn; + mfn_t smfn, gmfn; + +- if ( fast_path ) +- smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i])); ++ if ( fast_path ) { ++ if ( pagetable_is_null(v->arch.shadow_table[i]) ) ++ smfn = _mfn(INVALID_MFN); ++ else ++ smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i])); ++ } + else + { + /* retrieving the l2s */ diff --git a/CVE-2012-4539-xsa24.patch b/CVE-2012-4539-xsa24.patch new file mode 100644 index 0000000..3851920 --- /dev/null +++ b/CVE-2012-4539-xsa24.patch @@ -0,0 +1,29 @@ +References: CVE-2012-4539 XSA-24 bnc#786520 + +compat/gnttab: Prevent infinite loop in compat code + +c/s 20281:95ea2052b41b, which introduces Grant Table version 2 +hypercalls introduces a vulnerability whereby the compat hypercall +handler can fall into an infinite loop. + +If the watchdog is enabled, Xen will die after the timeout. + +This is a security problem, XSA-24 / CVE-2012-4539. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +Acked-by: Ian Jackson + +Index: xen-4.2.0-testing/xen/common/compat/grant_table.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/common/compat/grant_table.c ++++ xen-4.2.0-testing/xen/common/compat/grant_table.c +@@ -318,6 +318,8 @@ int compat_grant_table_op(unsigned int c + #undef XLAT_gnttab_get_status_frames_HNDL_frame_list + if ( unlikely(__copy_to_guest(cmp_uop, &cmp.get_status, 1)) ) + rc = -EFAULT; ++ else ++ i = 1; + } + break; + } diff --git a/CVE-2012-4544-xsa25.patch b/CVE-2012-4544-xsa25.patch new file mode 100644 index 0000000..fed0ec8 --- /dev/null +++ b/CVE-2012-4544-xsa25.patch @@ -0,0 +1,366 @@ +libxc: builder: limit maximum size of kernel/ramdisk. + +Allowing user supplied kernels of arbitrary sizes, especially during +decompression, can swallow up dom0 memory leading to either virtual +address space exhaustion in the builder process or allocation +failures/OOM killing of both toolstack and unrelated processes. + +We disable these checks when building in a stub domain for pvgrub +since this uses the guest's own memory and is isolated. + +Decompression of gzip compressed kernels and ramdisks has been safe +since 14954:58205257517d (Xen 3.1.0 onwards). + +This is XSA-25 / CVE-2012-4544. + +Also make explicit checks for buffer overflows in various +decompression routines. These were already ruled out due to other +properties of the code but check them as a belt-and-braces measure. + +Signed-off-by: Ian Campbell +Acked-by: Ian Jackson +[ Includes 25589:60f09d1ab1fe for CVE-2012-2625 ] + +Index: xen-4.2.0-testing/stubdom/grub/kexec.c +=================================================================== +--- xen-4.2.0-testing.orig/stubdom/grub/kexec.c ++++ xen-4.2.0-testing/stubdom/grub/kexec.c +@@ -137,6 +137,10 @@ void kexec(void *kernel, long kernel_siz + dom = xc_dom_allocate(xc_handle, cmdline, features); + dom->allocate = kexec_allocate; + ++ /* We are using guest owned memory, therefore no limits. */ ++ xc_dom_kernel_max_size(dom, 0); ++ xc_dom_ramdisk_max_size(dom, 0); ++ + dom->kernel_blob = kernel; + dom->kernel_size = kernel_size; + +Index: xen-4.2.0-testing/tools/libxc/xc_dom.h +=================================================================== +--- xen-4.2.0-testing.orig/tools/libxc/xc_dom.h ++++ xen-4.2.0-testing/tools/libxc/xc_dom.h +@@ -55,6 +55,9 @@ struct xc_dom_image { + void *ramdisk_blob; + size_t ramdisk_size; + ++ size_t max_kernel_size; ++ size_t max_ramdisk_size; ++ + /* arguments and parameters */ + char *cmdline; + uint32_t f_requested[XENFEAT_NR_SUBMAPS]; +@@ -180,6 +183,23 @@ void xc_dom_release_phys(struct xc_dom_i + void xc_dom_release(struct xc_dom_image *dom); + int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb); + ++/* Set this larger if you have enormous ramdisks/kernels. Note that ++ * you should trust all kernels not to be maliciously large (e.g. to ++ * exhaust all dom0 memory) if you do this (see CVE-2012-4544 / ++ * XSA-25). You can also set the default independently for ++ * ramdisks/kernels in xc_dom_allocate() or call ++ * xc_dom_{kernel,ramdisk}_max_size. ++ */ ++#ifndef XC_DOM_DECOMPRESS_MAX ++#define XC_DOM_DECOMPRESS_MAX (1024*1024*1024) /* 1GB */ ++#endif ++ ++int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz); ++int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz); ++ ++int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz); ++int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz); ++ + size_t xc_dom_check_gzip(xc_interface *xch, + void *blob, size_t ziplen); + int xc_dom_do_gunzip(xc_interface *xch, +@@ -240,7 +260,8 @@ void xc_dom_log_memory_footprint(struct + void *xc_dom_malloc(struct xc_dom_image *dom, size_t size); + void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size); + void *xc_dom_malloc_filemap(struct xc_dom_image *dom, +- const char *filename, size_t * size); ++ const char *filename, size_t * size, ++ const size_t max_size); + char *xc_dom_strdup(struct xc_dom_image *dom, const char *str); + + /* --- alloc memory pool ------------------------------------------- */ +Index: xen-4.2.0-testing/tools/libxc/xc_dom_bzimageloader.c +=================================================================== +--- xen-4.2.0-testing.orig/tools/libxc/xc_dom_bzimageloader.c ++++ xen-4.2.0-testing/tools/libxc/xc_dom_bzimageloader.c +@@ -47,13 +47,19 @@ static int xc_try_bzip2_decode( + char *out_buf; + char *tmp_buf; + int retval = -1; +- int outsize; ++ unsigned int outsize; + uint64_t total; + + stream.bzalloc = NULL; + stream.bzfree = NULL; + stream.opaque = NULL; + ++ if ( dom->kernel_size == 0) ++ { ++ DOMPRINTF("BZIP2: Input is 0 size"); ++ return -1; ++ } ++ + ret = BZ2_bzDecompressInit(&stream, 0, 0); + if ( ret != BZ_OK ) + { +@@ -66,6 +72,17 @@ static int xc_try_bzip2_decode( + * the input buffer to start, and we'll realloc as needed. + */ + outsize = dom->kernel_size; ++ ++ /* ++ * stream.avail_in and outsize are unsigned int, while kernel_size ++ * is a size_t. Check we aren't overflowing. ++ */ ++ if ( outsize != dom->kernel_size ) ++ { ++ DOMPRINTF("BZIP2: Input too large"); ++ goto bzip2_cleanup; ++ } ++ + out_buf = malloc(outsize); + if ( out_buf == NULL ) + { +@@ -98,13 +115,20 @@ static int xc_try_bzip2_decode( + if ( stream.avail_out == 0 ) + { + /* Protect against output buffer overflow */ +- if ( outsize > INT_MAX / 2 ) ++ if ( outsize > UINT_MAX / 2 ) + { + DOMPRINTF("BZIP2: output buffer overflow"); + free(out_buf); + goto bzip2_cleanup; + } + ++ if ( xc_dom_kernel_check_size(dom, outsize * 2) ) ++ { ++ DOMPRINTF("BZIP2: output too large"); ++ free(out_buf); ++ goto bzip2_cleanup; ++ } ++ + tmp_buf = realloc(out_buf, outsize * 2); + if ( tmp_buf == NULL ) + { +@@ -172,9 +196,15 @@ static int _xc_try_lzma_decode( + unsigned char *out_buf; + unsigned char *tmp_buf; + int retval = -1; +- int outsize; ++ size_t outsize; + const char *msg; + ++ if ( dom->kernel_size == 0) ++ { ++ DOMPRINTF("LZMA: Input is 0 size"); ++ return -1; ++ } ++ + /* sigh. We don't know up-front how much memory we are going to need + * for the output buffer. Allocate the output buffer to be equal + * the input buffer to start, and we'll realloc as needed. +@@ -244,13 +274,20 @@ static int _xc_try_lzma_decode( + if ( stream->avail_out == 0 ) + { + /* Protect against output buffer overflow */ +- if ( outsize > INT_MAX / 2 ) ++ if ( outsize > SIZE_MAX / 2 ) + { + DOMPRINTF("%s: output buffer overflow", what); + free(out_buf); + goto lzma_cleanup; + } + ++ if ( xc_dom_kernel_check_size(dom, outsize * 2) ) ++ { ++ DOMPRINTF("LZMA: output too large"); ++ free(out_buf); ++ goto lzma_cleanup; ++ } ++ + tmp_buf = realloc(out_buf, outsize * 2); + if ( tmp_buf == NULL ) + { +@@ -359,6 +396,12 @@ static int xc_try_lzo1x_decode( + 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a + }; + ++ /* ++ * lzo_uint should match size_t. Check that this is the case to be ++ * sure we won't overflow various lzo_uint fields. ++ */ ++ XC_BUILD_BUG_ON(sizeof(lzo_uint) != sizeof(size_t)); ++ + ret = lzo_init(); + if ( ret != LZO_E_OK ) + { +@@ -438,6 +481,14 @@ static int xc_try_lzo1x_decode( + if ( src_len <= 0 || src_len > dst_len || src_len > left ) + break; + ++ msg = "Output buffer overflow"; ++ if ( *size > SIZE_MAX - dst_len ) ++ break; ++ ++ msg = "Decompressed image too large"; ++ if ( xc_dom_kernel_check_size(dom, *size + dst_len) ) ++ break; ++ + msg = "Failed to (re)alloc memory"; + tmp_buf = realloc(out_buf, *size + dst_len); + if ( tmp_buf == NULL ) +Index: xen-4.2.0-testing/tools/libxc/xc_dom_core.c +=================================================================== +--- xen-4.2.0-testing.orig/tools/libxc/xc_dom_core.c ++++ xen-4.2.0-testing/tools/libxc/xc_dom_core.c +@@ -159,7 +159,8 @@ void *xc_dom_malloc_page_aligned(struct + } + + void *xc_dom_malloc_filemap(struct xc_dom_image *dom, +- const char *filename, size_t * size) ++ const char *filename, size_t * size, ++ const size_t max_size) + { + struct xc_dom_mem *block = NULL; + int fd = -1; +@@ -171,6 +172,13 @@ void *xc_dom_malloc_filemap(struct xc_do + lseek(fd, 0, SEEK_SET); + *size = lseek(fd, 0, SEEK_END); + ++ if ( max_size && *size > max_size ) ++ { ++ xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY, ++ "tried to map file which is too large"); ++ goto err; ++ } ++ + block = malloc(sizeof(*block)); + if ( block == NULL ) + goto err; +@@ -222,6 +230,40 @@ char *xc_dom_strdup(struct xc_dom_image + } + + /* ------------------------------------------------------------------------ */ ++/* decompression buffer sizing */ ++int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz) ++{ ++ /* No limit */ ++ if ( !dom->max_kernel_size ) ++ return 0; ++ ++ if ( sz > dom->max_kernel_size ) ++ { ++ xc_dom_panic(dom->xch, XC_INVALID_KERNEL, ++ "kernel image too large"); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz) ++{ ++ /* No limit */ ++ if ( !dom->max_ramdisk_size ) ++ return 0; ++ ++ if ( sz > dom->max_ramdisk_size ) ++ { ++ xc_dom_panic(dom->xch, XC_INVALID_KERNEL, ++ "ramdisk image too large"); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* ------------------------------------------------------------------------ */ + /* read files, copy memory blocks, with transparent gunzip */ + + size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen) +@@ -235,7 +277,7 @@ size_t xc_dom_check_gzip(xc_interface *x + + gzlen = blob + ziplen - 4; + unziplen = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0]; +- if ( (unziplen < 0) || (unziplen > (1024*1024*1024)) ) /* 1GB limit */ ++ if ( (unziplen < 0) || (unziplen > XC_DOM_DECOMPRESS_MAX) ) + { + xc_dom_printf + (xch, +@@ -288,6 +330,9 @@ int xc_dom_try_gunzip(struct xc_dom_imag + if ( unziplen == 0 ) + return 0; + ++ if ( xc_dom_kernel_check_size(dom, unziplen) ) ++ return 0; ++ + unzip = xc_dom_malloc(dom, unziplen); + if ( unzip == NULL ) + return -1; +@@ -588,6 +633,9 @@ struct xc_dom_image *xc_dom_allocate(xc_ + memset(dom, 0, sizeof(*dom)); + dom->xch = xch; + ++ dom->max_kernel_size = XC_DOM_DECOMPRESS_MAX; ++ dom->max_ramdisk_size = XC_DOM_DECOMPRESS_MAX; ++ + if ( cmdline ) + dom->cmdline = xc_dom_strdup(dom, cmdline); + if ( features ) +@@ -608,10 +656,25 @@ struct xc_dom_image *xc_dom_allocate(xc_ + return NULL; + } + ++int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz) ++{ ++ DOMPRINTF("%s: kernel_max_size=%zx", __FUNCTION__, sz); ++ dom->max_kernel_size = sz; ++ return 0; ++} ++ ++int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz) ++{ ++ DOMPRINTF("%s: ramdisk_max_size=%zx", __FUNCTION__, sz); ++ dom->max_ramdisk_size = sz; ++ return 0; ++} ++ + int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename) + { + DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename); +- dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size); ++ dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size, ++ dom->max_kernel_size); + if ( dom->kernel_blob == NULL ) + return -1; + return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size); +@@ -621,7 +684,9 @@ int xc_dom_ramdisk_file(struct xc_dom_im + { + DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename); + dom->ramdisk_blob = +- xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size); ++ xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size, ++ dom->max_ramdisk_size); ++ + if ( dom->ramdisk_blob == NULL ) + return -1; + // return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size); +@@ -781,7 +846,11 @@ int xc_dom_build_image(struct xc_dom_ima + void *ramdiskmap; + + unziplen = xc_dom_check_gzip(dom->xch, dom->ramdisk_blob, dom->ramdisk_size); ++ if ( xc_dom_ramdisk_check_size(dom, unziplen) != 0 ) ++ unziplen = 0; ++ + ramdisklen = unziplen ? unziplen : dom->ramdisk_size; ++ + if ( xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0, + ramdisklen) != 0 ) + goto err; diff --git a/VNC-Support-for-ExtendedKeyEvent-client-message.patch b/VNC-Support-for-ExtendedKeyEvent-client-message.patch new file mode 100644 index 0000000..3cf1c65 --- /dev/null +++ b/VNC-Support-for-ExtendedKeyEvent-client-message.patch @@ -0,0 +1,157 @@ +From 9ca313aa0824f2d350a7a6c9b1ef6c47e0408f1d Mon Sep 17 00:00:00 2001 +From: aliguori +Date: Sat, 23 Aug 2008 23:27:37 +0000 +Subject: [PATCH] VNC: Support for ExtendedKeyEvent client message + +This patch adds support for the ExtendedKeyEvent client message. This message +allows a client to send raw scan codes directly to the server. If the client +and server are using the same keymap, then it's unnecessary to use the '-k' +option with QEMU when this extension is supported. + +This is extension is currently only implemented by gtk-vnc based clients +(gvncviewer, virt-manager, vinagre, etc.). + +Signed-off-by: Anthony Liguori + + + +git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5076 c046a42c-6fe2-441c-8c8c-71466251a162 +--- + vnc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- + 1 files changed, 50 insertions(+), 9 deletions(-) + +Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c +=================================================================== +--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c ++++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c +@@ -1285,35 +1285,22 @@ static void press_key_altgr_down(VncStat + } + } + +-static void do_key_event(VncState *vs, int down, uint32_t sym) ++static void do_key_event(VncState *vs, int down, int keycode, int sym, int shift) + { +- int keycode; + int shift_keys = 0; +- int shift = 0; + int keypad = 0; + int altgr = 0; + int altgr_keys = 0; + + if (is_graphic_console()) { +- if (sym >= 'A' && sym <= 'Z') { +- sym = sym - 'A' + 'a'; +- shift = 1; +- } +- else { ++ if (!shift) + shift = keysym_is_shift(vs->kbd_layout, sym & 0xFFFF); +- } + + altgr = keysym_is_altgr(vs->kbd_layout, sym & 0xFFFF); + } + shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36]; + altgr_keys = vs->modifiers_state[0xb8]; + +- keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF); +- if (keycode == 0) { +- fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym); +- return; +- } +- + /* QEMU console switch */ + switch(keycode) { + case 0x2a: /* Left Shift */ +@@ -1445,7 +1432,25 @@ static void do_key_event(VncState *vs, i + + static void key_event(VncState *vs, int down, uint32_t sym) + { +- do_key_event(vs, down, sym); ++ int keycode; ++ int shift = 0; ++ ++ if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) { ++ sym = sym - 'A' + 'a'; ++ shift = 1; ++ } ++ keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF); ++ do_key_event(vs, down, keycode, sym, shift); ++} ++ ++static void ext_key_event(VncState *vs, int down, ++ uint32_t sym, uint16_t keycode) ++{ ++ /* if the user specifies a keyboard layout, always use it */ ++ if (keyboard_layout) ++ key_event(vs, down, sym); ++ else ++ do_key_event(vs, down, keycode, sym, 0); + } + + static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h) +@@ -1534,6 +1539,15 @@ static void framebuffer_update_request(V + qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock)); + } + ++static void send_ext_key_event_ack(VncState *vs) ++{ ++ vnc_write_u8(vs, 0); ++ vnc_write_u8(vs, 0); ++ vnc_write_u16(vs, 1); ++ vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds), ds_get_height(vs->ds), -258); ++ vnc_flush(vs); ++} ++ + static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) + { + int i; +@@ -1562,6 +1576,9 @@ static void set_encodings(VncState *vs, + case -257: + vs->has_pointer_type_change = 1; + break; ++ case -258: ++ send_ext_key_event_ack(vs); ++ break; + case 0x574D5669: + vs->has_WMVi = 1; + default: +@@ -1774,6 +1791,24 @@ static int protocol_client_msg(VncState + + client_cut_text(vs, read_u32(data, 4), (char *)(data + 8)); + break; ++ case 255: ++ if (len == 1) ++ return 2; ++ ++ switch (read_u8(data, 1)) { ++ case 0: ++ if (len == 2) ++ return 12; ++ ++ ext_key_event(vs, read_u16(data, 2), ++ read_u32(data, 4), read_u32(data, 8)); ++ break; ++ default: ++ printf("Msg: %d\n", read_u16(data, 0)); ++ vnc_client_error(vs); ++ break; ++ } ++ break; + default: + printf("Msg: %d\n", data[0]); + vnc_client_error(vs); +@@ -2445,10 +2480,11 @@ void vnc_display_init(DisplayState *ds) + + vs->ds = ds; + +- if (!keyboard_layout) +- keyboard_layout = "en-us"; ++ if (keyboard_layout) ++ vs->kbd_layout = init_keyboard_layout(keyboard_layout); ++ else ++ vs->kbd_layout = init_keyboard_layout("en-us"); + +- vs->kbd_layout = init_keyboard_layout(keyboard_layout); + if (!vs->kbd_layout) + exit(1); + vs->modifiers_state[0x45] = 1; /* NumLock on - on boot */ diff --git a/altgr_2.patch b/altgr_2.patch index f7eeabc..f320c37 100644 --- a/altgr_2.patch +++ b/altgr_2.patch @@ -33,13 +33,13 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c =================================================================== --- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c +++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c -@@ -1308,6 +1308,9 @@ static void do_key_event(VncState *vs, i - shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36]; - altgr_keys = vs->modifiers_state[0xb8]; +@@ -1440,6 +1440,9 @@ static void key_event(VncState *vs, int + int keycode; + int shift = 0; -+ if ( !strcmp(keyboard_layout,"es") && sym == 0xffea ) ++ if ( sym == 0xffea && keyboard_layout && !strcmp(keyboard_layout,"es") ) + sym = 0xffe9; + - keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF); - if (keycode == 0) { - fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym); + if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) { + sym = sym - 'A' + 'a'; + shift = 1; diff --git a/block-dmmd b/block-dmmd index 752263f..7ce5d83 100644 --- a/block-dmmd +++ b/block-dmmd @@ -26,10 +26,8 @@ dir=$(dirname "$0") . "$dir/block-common.sh" #exec >> /tmp/block-dmmd-`date +%F_%T.%N`.log 2>&1 -echo "" -date -set -x -echo shell-flags: $- +#echo shell-flags: $- + command=$1 # We check for errors ourselves: @@ -37,85 +35,87 @@ set +e function run_mdadm() { - local mdadm_cmd=$1 - local msg - local rc + local mdadm_cmd=$1 + local msg + local rc - msg="`/sbin/mdadm $mdadm_cmd 2>&1`" - rc=$? - case "$msg" in - *"has been started"* | *"already active"* ) - return 0 - ;; - *"is already in use"* ) - : hmm, might be used by another device in this domU - : leave it to upper layers to detect a real error - return 2 - ;; - * ) - return $rc - ;; - esac - - return 1 + msg="`/sbin/mdadm $mdadm_cmd 2>&1`" + rc=$? + case "$msg" in + *"has been started"* | *"already active"* ) + return 0 + ;; + *"is already in use"* ) + # hmm, might be used by another device in this domU + # leave it to upper layers to detect a real error + return 2 + ;; + * ) + return $rc + ;; + esac + return 1 } function activate_md() { - local par=$1 - local already_active=0 cfg dev rc t + local par=$1 + local already_active=0 cfg dev rc t - if [ ${par} = ${par%%(*} ]; then - # No configuration file specified: - dev=$par - cfd= - else - dev=${par%%(*} - t=${par#*(} - cfg="-c ${t%%)*}" - fi - if /sbin/mdadm -Q -D $dev; then - already_active=1 - fi - run_mdadm "-A $dev $cfg" - rc=$? - if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then - return 0 - fi - return $rc + if [ ${par} = ${par%%(*} ]; then + # No configuration file specified: + dev=$par + cfg= + else + dev=${par%%(*} + t=${par#*(} + cfg="-c ${t%%)*}" + fi + if /sbin/mdadm -Q -D $dev; then + already_active=1 + fi + run_mdadm "-A $dev $cfg" + rc=$? + if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then + return 0 + fi + return $rc } -function deactivate_md () +function deactivate_md() { - local par=$1 # Make it explicitly local + local par=$1 # Make it explicitly local - ## We need the device name only while deactivating - /sbin/mdadm -S ${par%%(*} - return $? + ## We need the device name only while deactivating + /sbin/mdadm -S ${par%%(*} + return $? } -function activate_lvm () +function activate_lvm() { - # First scan for PVs and VGs; we may then have to activate the VG - # first, but can ignore errors: -# /sbin/pvscan || : -# /sbin/vgscan --mknodes || : -# /sbin/vgchange -ay ${1%/*} || : - /sbin/lvchange -ay $1 - return $? + # First scan for PVs and VGs; we may then have to activate the VG + # first, but can ignore errors: +# /sbin/pvscan || : +# /sbin/vgscan --mknodes || : +# /sbin/vgchange -ay ${1%/*} || : + /sbin/lvchange -ay $1 + if [ $? -eq 0 ]; then + return 0 + fi + return 1 } -function deactivate_lvm () +function deactivate_lvm() { - /sbin/lvchange -an $1 - if [ $? -eq 0 ]; then - # We may have to deactivate the VG now, but can ignore errors: -# /sbin/vgchange -an ${1%/*} || : - # Maybe we need to cleanup the LVM cache: -# /sbin/vgscan --mknodes || : - return 0 - fi - return 1 + /sbin/lvchange -an $1 + if [ $? -eq 0 ]; then + # We may have to deactivate the VG now, but can ignore errors: +# /sbin/vgchange -an ${1%/*} || : + # Maybe we need to cleanup the LVM cache: +# /sbin/vgscan --mknodes || : + return 0 + fi + return 1 } BP=100 @@ -123,16 +123,16 @@ SP=$BP VBD= declare -a stack -function push () +function push() { if [ -z "$1" ]; then return fi let "SP -= 1" stack[$SP]="${1}" - return } -function pop () + +function pop() { VBD= @@ -142,11 +142,10 @@ function pop () VBD=${stack[$SP]} let "SP += 1" - return } -function activate_dmmd () + +function activate_dmmd() { -# echo $1 $2 case $1 in md) activate_md $2 @@ -158,6 +157,7 @@ function activate_dmmd () ;; esac } + function deactivate_dmmd() { case "$1" in @@ -171,7 +171,8 @@ function deactivate_dmmd() ;; esac } -function cleanup_stack () + +function cleanup_stack() { while [ 1 ]; do pop @@ -180,17 +181,15 @@ function cleanup_stack () fi deactivate_dmmd $VBD done - return } -function parse_par () +function parse_par() { local ac par rc s t # Make these explicitly local vars ac=$1 par="$2" - echo "parse_paring $1, $2" par="$par;" while [ 1 ]; do t=${par%%;*} @@ -205,7 +204,6 @@ function parse_par () fi par=${par#*;} - echo "type is $t, dev is $s" if [ "$ac" = "activate" ]; then activate_dmmd $t $s rc=$? @@ -213,45 +211,39 @@ function parse_par () return 1 fi fi - echo "push $t $s" push "$t $s" done } -echo $command case "$command" in - add) - p=`xenstore-read $XENBUS_PATH/params` || true - claim_lock "dmmd" - dmmd=$p - echo "before parse_par $dmmd" - parse_par activate "$dmmd" - rc=$? - echo "reach here with rc: $rc" - if [ $rc -ne 0 ]; then - cleanup_stack - release_lock "dmmd" - exit 1 - fi - lastparam=${dmmd##*;} - usedevice=${lastparam%(*} - claim_lock "block" - xenstore-write $XENBUS_PATH/node "$usedevice" - write_dev "$usedevice" - release_lock "block" - release_lock "dmmd" - exit 0 - ;; + add) + p=`xenstore-read $XENBUS_PATH/params` || true + claim_lock "dmmd" + dmmd=$p + parse_par activate "$dmmd" + rc=$? + if [ $rc -ne 0 ]; then + cleanup_stack + release_lock "dmmd" + exit 1 + fi + lastparam=${dmmd##*;} + usedevice=${lastparam%(*} + xenstore-write $XENBUS_PATH/node "$usedevice" + write_dev "$usedevice" + release_lock "dmmd" + exit 0 + ;; - remove) - p=`xenstore-read $XENBUS_PATH/params` || true - claim_lock "dmmd" - dmmd=$p - parse_par noactivate "$dmmd" - cleanup_stack - release_lock "dmmd" - exit 0 - ;; + remove) + p=`xenstore-read $XENBUS_PATH/params` || true + claim_lock "dmmd" + dmmd=$p + parse_par noactivate "$dmmd" + cleanup_stack + release_lock "dmmd" + exit 0 + ;; esac diff --git a/capslock_enable.patch b/capslock_enable.patch index 9bc0cba..796c79c 100644 --- a/capslock_enable.patch +++ b/capslock_enable.patch @@ -2,7 +2,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c =================================================================== --- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c +++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c -@@ -1342,6 +1342,11 @@ static void do_key_event(VncState *vs, i +@@ -1329,6 +1329,11 @@ static void do_key_event(VncState *vs, i } break; case 0x3a: /* CapsLock */ diff --git a/change-vnc-passwd.patch b/change-vnc-passwd.patch index 3b6bc76..5d5bbfa 100644 --- a/change-vnc-passwd.patch +++ b/change-vnc-passwd.patch @@ -19,7 +19,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c =================================================================== --- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c +++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c -@@ -2591,6 +2591,7 @@ int vnc_display_password(DisplayState *d +@@ -2627,6 +2627,7 @@ int vnc_display_password(DisplayState *d if (password && password[0]) { if (!(vs->password = qemu_strdup(password))) return -1; diff --git a/ioemu-vnc-resize.patch b/ioemu-vnc-resize.patch index 144e9ea..cf8f327 100644 --- a/ioemu-vnc-resize.patch +++ b/ioemu-vnc-resize.patch @@ -2,7 +2,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c =================================================================== --- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c +++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c -@@ -1734,6 +1734,25 @@ static int protocol_client_msg(VncState +@@ -1751,6 +1751,25 @@ static int protocol_client_msg(VncState } set_encodings(vs, (int32_t *)(data + 4), limit); diff --git a/log-guest-console.patch b/log-guest-console.patch index 0dd986a..c6e0ded 100644 --- a/log-guest-console.patch +++ b/log-guest-console.patch @@ -60,7 +60,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c if (buffer->max_capacity && buffer->size > buffer->max_capacity) { /* Discard the middle of the data. */ -@@ -176,6 +203,36 @@ static void xencons_send(struct XenConso +@@ -176,6 +203,37 @@ static void xencons_send(struct XenConso } } @@ -83,7 +83,8 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c + return -1; + } + -+ asprintf(&logfile, "%s/guest-%s.log", logdir, domname); ++ if (asprintf(&logfile, "%s/guest-%s.log", logdir, domname) < 0) ++ return -1; + qemu_free(domname); + + fd = open(logfile, O_WRONLY|O_CREAT|O_APPEND, 0644); @@ -97,7 +98,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c /* -------------------------------------------------------------------- */ static int con_init(struct XenDevice *xendev) -@@ -183,6 +240,7 @@ static int con_init(struct XenDevice *xe +@@ -183,6 +241,7 @@ static int con_init(struct XenDevice *xe struct XenConsole *con = container_of(xendev, struct XenConsole, xendev); char *type, *dom, label[32]; const char *output; @@ -105,7 +106,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c /* setup */ dom = xs_get_domain_path(xenstore, con->xendev.dom); -@@ -209,6 +267,10 @@ static int con_init(struct XenDevice *xe +@@ -209,6 +268,10 @@ static int con_init(struct XenDevice *xe con->chr = qemu_chr_open(label, output, NULL); xenstore_store_pv_console_info(con->xendev.dev, con->chr, output); @@ -116,7 +117,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c return 0; } -@@ -246,6 +308,9 @@ static int con_initialise(struct XenDevi +@@ -246,6 +309,9 @@ static int con_initialise(struct XenDevi con->xendev.remote_port, con->xendev.local_port, con->buffer.max_capacity); @@ -126,7 +127,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c return 0; } -@@ -266,6 +331,12 @@ static void con_disconnect(struct XenDev +@@ -266,6 +332,12 @@ static void con_disconnect(struct XenDev xc_gnttab_munmap(xendev->gnttabdev, con->sring, 1); con->sring = NULL; } diff --git a/x86-ioapic-ack-default.patch b/x86-ioapic-ack-default.patch index 464ac87..56e8e98 100644 --- a/x86-ioapic-ack-default.patch +++ b/x86-ioapic-ack-default.patch @@ -1,8 +1,10 @@ Change default IO-APIC ack mode for single IO-APIC systems to old-style. ---- a/xen/arch/x86/io_apic.c -+++ b/xen/arch/x86/io_apic.c -@@ -2013,7 +2013,10 @@ void __init setup_IO_APIC(void) +Index: xen-4.2.0-testing/xen/arch/x86/io_apic.c +=================================================================== +--- xen-4.2.0-testing.orig/xen/arch/x86/io_apic.c ++++ xen-4.2.0-testing/xen/arch/x86/io_apic.c +@@ -2012,7 +2012,10 @@ void __init setup_IO_APIC(void) io_apic_irqs = ~PIC_IRQS; printk("ENABLING IO-APIC IRQs\n"); diff --git a/xen-destdir.diff b/xen-destdir.diff index f367dac..8a94d02 100644 --- a/xen-destdir.diff +++ b/xen-destdir.diff @@ -60,33 +60,7 @@ Index: xen-4.2.0-testing/tools/hotplug/Linux/Makefile =================================================================== --- xen-4.2.0-testing.orig/tools/hotplug/Linux/Makefile +++ xen-4.2.0-testing/tools/hotplug/Linux/Makefile -@@ -41,18 +41,6 @@ endif - UDEV_RULES_DIR = $(CONFIG_DIR)/udev - UDEV_RULES = xen-backend.rules xend.rules - --DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),) --DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),) --ifeq ($(findstring $(DI),$(DE)),$(DI)) --HOTPLUGS=install-hotplug install-udev --else --ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1) --HOTPLUGS=install-udev --else --HOTPLUGS=install-hotplug --endif --endif -- - .PHONY: all - all: - -@@ -60,18 +48,18 @@ all: - build: - - .PHONY: install --install: all install-initd install-scripts $(HOTPLUGS) -+install: all install-initd install-scripts install-udev - - # See docs/misc/distro_mapping.txt for INITD_DIR location +@@ -43,12 +43,12 @@ install: all install-initd install-scrip .PHONY: install-initd install-initd: [ -d $(DESTDIR)$(INITD_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(INITD_DIR) @@ -94,11 +68,11 @@ Index: xen-4.2.0-testing/tools/hotplug/Linux/Makefile + [ -d $(DESTDIR)/var/adm/fillup-templates ] || $(INSTALL_DIR) $(DESTDIR)/var/adm/fillup-templates/ $(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR) $(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR) -- $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains -+ $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/ +- $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains ++ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/ $(INSTALL_PROG) $(XENCOMMONS_INITD) $(DESTDIR)$(INITD_DIR) -- $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons -+ $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/ +- $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons ++ $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/ $(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR) .PHONY: install-scripts @@ -125,64 +99,3 @@ Index: xen-4.2.0-testing/tools/firmware/etherboot/Makefile mv _$T $T $D/src/arch/i386/Makefile: $T Config -Index: xen-4.2.0-testing/stubdom/Makefile -=================================================================== ---- xen-4.2.0-testing.orig/stubdom/Makefile -+++ xen-4.2.0-testing/stubdom/Makefile -@@ -396,7 +396,7 @@ install-grub: pv-grub - - install-xenstore: xenstore-stubdom - $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot" -- $(INSTALL_PROG) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz" -+ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz" - - ####### - # clean -Index: xen-4.2.0-testing/tools/blktap2/vhd/lib/Makefile -=================================================================== ---- xen-4.2.0-testing.orig/tools/blktap2/vhd/lib/Makefile -+++ xen-4.2.0-testing/tools/blktap2/vhd/lib/Makefile -@@ -68,7 +68,7 @@ libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR - - install: all - $(INSTALL_DIR) -p $(DESTDIR)$(INST-DIR) -- $(INSTALL_PROG) libvhd.a $(DESTDIR)$(INST-DIR) -+ $(INSTALL_DATA) libvhd.a $(DESTDIR)$(INST-DIR) - $(INSTALL_PROG) libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR) - ln -sf libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR)/libvhd.so.$(LIBVHD-MAJOR) - ln -sf libvhd.so.$(LIBVHD-MAJOR) $(DESTDIR)$(INST-DIR)/libvhd.so -Index: xen-4.2.0-testing/tools/blktap/lib/Makefile -=================================================================== ---- xen-4.2.0-testing.orig/tools/blktap/lib/Makefile -+++ xen-4.2.0-testing/tools/blktap/lib/Makefile -@@ -23,23 +23,26 @@ OBJS = $(SRCS:.c=.o) - OBJS_PIC = $(SRCS:.c=.opic) - IBINS := - --LIB = libblktap.a libblktap.so.$(MAJOR).$(MINOR) -+LIB = libblktap.a -+LIB_SO = libblktap.so.$(MAJOR).$(MINOR) -+LIB_ALL = $(LIB) $(LIB_SO) - - .PHONY: all --all: $(LIB) -+all: $(LIB_ALL) - - .PHONY: install - install: all - $(INSTALL_DIR) $(DESTDIR)$(LIBDIR) - $(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR) -- $(INSTALL_PROG) $(LIB) $(DESTDIR)$(LIBDIR) -+ $(INSTALL_DATA) $(LIB) $(DESTDIR)$(LIBDIR) -+ $(INSTALL_PROG) $(LIB_SO) $(DESTDIR)$(LIBDIR) - ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libblktap.so.$(MAJOR) - ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so - $(INSTALL_DATA) blktaplib.h $(DESTDIR)$(INCLUDEDIR) - - .PHONY: clean - clean: -- rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS -+ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB_ALL) *~ $(DEPS) xen TAGS - - libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC) - $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_LDFLAGS) \ diff --git a/xen.changes b/xen.changes index 2cbc3a6..8a38c64 100644 --- a/xen.changes +++ b/xen.changes @@ -4,6 +4,123 @@ Sat Nov 17 10:51:05 UTC 2012 - aj@suse.de - Fix build with glibc 2.17: add patch xen-glibc217.patch, fix configure for librt. +------------------------------------------------------------------- +Tue Nov 13 16:35:55 MST 2012 - jfehlig@suse.com + +- bnc#777628 - guest "disappears" after live migration + Updated block-dmmd script + +------------------------------------------------------------------- +Tue Oct 30 15:28:27 CET 2012 - ohering@suse.de + +- fate#310510 - fix xenpaging + restore changes to integrate paging into xm/xend + xenpaging.autostart.patch + xenpaging.doc.patch + +------------------------------------------------------------------- +Mon Oct 29 10:46:50 MDT 2012 - carnold@novell.com + +- bnc#787163 - VUL-0: CVE-2012-4544: xen: Domain builder Out-of- + memory due to malicious kernel/ramdisk (XSA 25) + CVE-2012-4544-xsa25.patch +- bnc#779212 - VUL-0: CVE-2012-4411: XEN / qemu: guest + administrator can access qemu monitor console (XSA-19) + CVE-2012-4411-xsa19.patch + +------------------------------------------------------------------- +Thu Oct 25 07:02:45 MDT 2012 - carnold@novell.com + +- bnc#786516 - VUL-0: CVE-2012-4535: xen: Timer overflow DoS + vulnerability + CVE-2012-4535-xsa20.patch +- bnc#786518 - VUL-0: CVE-2012-4536: xen: pirq range check DoS + vulnerability + CVE-2012-4536-xsa21.patch +- bnc#786517 - VUL-0: CVE-2012-4537: xen: Memory mapping failure + DoS vulnerability + CVE-2012-4537-xsa22.patch +- bnc#786519 - VUL-0: CVE-2012-4538: xen: Unhooking empty PAE + entries DoS vulnerability + CVE-2012-4538-xsa23.patch +- bnc#786520 - VUL-0: CVE-2012-4539: xen: Grant table hypercall + infinite loop DoS vulnerability + CVE-2012-4539-xsa24.patch +- bnc#784087 - L3: Xen BUG at io_apic.c:129 + 26102-x86-IOAPIC-legacy-not-first.patch + +------------------------------------------------------------------- +Wed Oct 24 06:15:58 MDT 2012 - carnold@novell.com + +- Upstream patches from Jan + 25920-x86-APICV-enable.patch + 25921-x86-APICV-delivery.patch + 25922-x86-APICV-x2APIC.patch + 25957-x86-TSC-adjust-HVM.patch + 25958-x86-TSC-adjust-sr.patch + 25959-x86-TSC-adjust-expose.patch + 25975-x86-IvyBridge.patch + 25984-SVM-nested-paging-mode.patch + 26054-x86-AMD-perf-ctr-init.patch + 26055-x86-oprof-hvm-mode.patch + 26056-page-alloc-flush-filter.patch + 26061-x86-oprof-counter-range.patch + 26062-ACPI-ERST-move-data.patch + 26063-x86-HPET-affinity-lock.patch + 26095-SVM-nested-leak.patch + 26096-SVM-nested-vmexit-emul.patch + 26098-perfc-build.patch + +------------------------------------------------------------------- +Mon Oct 22 21:21:28 CEST 2012 - ohering@suse.de + +- handle possible asprintf failures in log-guest-console.patch + +------------------------------------------------------------------- +Mon Oct 22 20:19:09 CEST 2012 - ohering@suse.de + +- bnc#694863 - kexec fails in xen + 26093-hvm_handle_PoD_and_grant_pages_in_HVMOP_get_mem_type.patch + +------------------------------------------------------------------- +Thu Oct 18 16:25:53 MDT 2012 - carnold@novell.com + +- fate#312709: Pygrub needs to know which entry to select + 26114-pygrub-list-entries.patch + +------------------------------------------------------------------- +Thu Oct 18 11:13:32 CEST 2012 - ohering@suse.de + +- merge changes fron xen-unstable, obsolete our changes + 26077-stubdom_fix_compile_errors_in_grub.patch + 26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch + 26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch + 26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch + 26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch + 26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch + 26084-hotplug_install_hotplugpath.sh_as_data_file.patch + 26085-stubdom_install_stubdompath.sh_as_data_file.patch + 26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch + 26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch + 26088-tools_xend_fix_wrong_condition_check_for_xml_file.patch + +------------------------------------------------------------------- +Tue Oct 16 15:38:34 MDT 2012 - carnold@novell.com + +- fate#311966: Fix XEN VNC implementation to correctly map keyboard + layouts + VNC-Support-for-ExtendedKeyEvent-client-message.patch + +------------------------------------------------------------------- +Mon Oct 15 17:45:52 CEST 2012 - ohering@suse.de + +- workaround bash bug in locking.sh:claim_lock, close fd + +------------------------------------------------------------------- +Sat Oct 13 15:25:08 CEST 2012 - ohering@suse.de + +- fix incorrect self-provides/obsoletes of xen-tools-ioemu + ------------------------------------------------------------------- Tue Oct 9 06:19:55 MDT 2012 - carnold@novell.com diff --git a/xen.spec b/xen.spec index 90a7e90..808fdfd 100644 --- a/xen.spec +++ b/xen.spec @@ -15,7 +15,6 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # - Name: xen ExclusiveArch: %ix86 x86_64 %define xvers 4.2 @@ -186,15 +185,23 @@ Patch25866: 25866-sercon-ns16550-pci-irq.patch Patch25867: 25867-sercon-ns16550-parse.patch Patch25874: 25874-x86-EFI-chain-cfg.patch Patch25909: 25909-xenpm-consistent.patch +Patch25920: 25920-x86-APICV-enable.patch +Patch25921: 25921-x86-APICV-delivery.patch +Patch25922: 25922-x86-APICV-x2APIC.patch Patch25927: 25927-x86-domctl-ioport-mapping-range.patch Patch25929: 25929-tmem-restore-pool-version.patch Patch25931: 25931-x86-domctl-iomem-mapping-checks.patch Patch25940: 25940-x86-S3-flush-cache.patch Patch25941: 25941-pygrub_always_append_--args.patch Patch25952: 25952-x86-MMIO-remap-permissions.patch +Patch25957: 25957-x86-TSC-adjust-HVM.patch +Patch25958: 25958-x86-TSC-adjust-sr.patch +Patch25959: 25959-x86-TSC-adjust-expose.patch Patch25961: 25961-x86-HPET-interrupts.patch Patch25962: 25962-x86-assign-irq-vector-old.patch Patch25965: 25965-x86-ucode-Intel-resume.patch +Patch25975: 25975-x86-IvyBridge.patch +Patch25984: 25984-SVM-nested-paging-mode.patch Patch26006: 26006-hotplug-Linux_Remove_tracing_bash_-x_from_network-nat_script.patch Patch26007: 26007-xenballoond.init_remove_4_from_default_runlevel.patch Patch26008: 26008-xend-pvscsi_fix_passing_of_SCSI_control_LUNs.patch @@ -202,7 +209,36 @@ Patch26009: 26009-xend-pvscsi_fix_usage_of_persistant_device_names_for_SCSI_ Patch26010: 26010-xend-pvscsi_update_sysfs_parser_for_Linux_3.0.patch Patch26011: 26011-stubdom_fix_parallel_build_by_expanding_CROSS_MAKE.patch Patch26018: 26018-pygrub_correct_typo_in_--args_assignment.patch +Patch26054: 26054-x86-AMD-perf-ctr-init.patch +Patch26055: 26055-x86-oprof-hvm-mode.patch +Patch26056: 26056-page-alloc-flush-filter.patch +Patch26061: 26061-x86-oprof-counter-range.patch +Patch26062: 26062-ACPI-ERST-move-data.patch +Patch26063: 26063-x86-HPET-affinity-lock.patch +Patch26077: 26077-stubdom_fix_compile_errors_in_grub.patch +Patch26078: 26078-hotplug-Linux_remove_hotplug_support_rely_on_udev_instead.patch +Patch26079: 26079-hotplug-Linux_close_lockfd_after_lock_attempt.patch +Patch26081: 26081-stubdom_fix_rpmlint_warning_spurious-executable-perm.patch +Patch26082: 26082-blktap2-libvhd_fix_rpmlint_warning_spurious-executable-perm.patch +Patch26083: 26083-blktap_fix_rpmlint_warning_spurious-executable-perm.patch +Patch26084: 26084-hotplug_install_hotplugpath.sh_as_data_file.patch +Patch26085: 26085-stubdom_install_stubdompath.sh_as_data_file.patch +Patch26086: 26086-hotplug-Linux_correct_sysconfig_tag_in_xendomains.patch +Patch26087: 26087-hotplug-Linux_install_sysconfig_files_as_data_files.patch +Patch26088: 26088-tools_xend_fix_wrong_condition_check_for_xml_file.patch +Patch26093: 26093-HVM-PoD-grant-mem-type.patch +Patch26095: 26095-SVM-nested-leak.patch +Patch26096: 26096-SVM-nested-vmexit-emul.patch +Patch26098: 26098-perfc-build.patch +Patch26102: 26102-x86-IOAPIC-legacy-not-first.patch +Patch26114: 26114-pygrub-list-entries.patch +Patch20: CVE-2012-4535-xsa20.patch +Patch22: CVE-2012-4537-xsa22.patch +Patch23: CVE-2012-4538-xsa23.patch +Patch24: CVE-2012-4539-xsa24.patch +Patch25: CVE-2012-4544-xsa25.patch # Upstream qemu patches +Patch100: VNC-Support-for-ExtendedKeyEvent-client-message.patch # Our patches Patch301: xend-config.diff Patch302: xen-destdir.diff @@ -314,6 +350,8 @@ Patch650: disable_emulated_device.diff Patch651: ioemu-disable-scsi.patch Patch652: ioemu-disable-emulated-ide-if-pv.patch Patch700: hv_extid_compatibility.patch +Patch800: xenpaging.autostart.patch +Patch801: xenpaging.doc.patch # Build patch Patch99998: tmp-initscript-modprobe.patch Patch99999: tmp_build.patch @@ -441,8 +479,8 @@ Requires: python-pam Requires: python-xml Requires: xen-libs = %{version} # subpackage existed in 10.3 -Provides: xen-tools-ioemu = 3.2 -Obsoletes: xen-tools-ioemu <= 3.2 +Provides: xen-tools-ioemu = %{version} +Obsoletes: xen-tools-ioemu < %{version} %description tools Xen is a virtual machine monitor for x86 that supports execution of @@ -643,7 +681,7 @@ xpdf/kpdf/gpdf/gv/... to read the files in -Authors: +Authors -------- Ian Pratt %endif @@ -674,15 +712,23 @@ tar xfj %{SOURCE6} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch25867 -p1 %patch25874 -p1 %patch25909 -p1 +%patch25920 -p1 +%patch25921 -p1 +%patch25922 -p1 %patch25927 -p1 %patch25929 -p1 %patch25931 -p1 %patch25940 -p1 %patch25941 -p1 %patch25952 -p1 +%patch25957 -p1 +%patch25958 -p1 +%patch25959 -p1 %patch25961 -p1 %patch25962 -p1 %patch25965 -p1 +%patch25975 -p1 +%patch25984 -p1 %patch26006 -p1 %patch26007 -p1 %patch26008 -p1 @@ -690,7 +736,36 @@ tar xfj %{SOURCE6} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch26010 -p1 %patch26011 -p1 %patch26018 -p1 +%patch26054 -p1 +%patch26055 -p1 +%patch26056 -p1 +%patch26061 -p1 +%patch26062 -p1 +%patch26063 -p1 +%patch26077 -p1 +%patch26078 -p1 +%patch26079 -p1 +%patch26081 -p1 +%patch26082 -p1 +%patch26083 -p1 +%patch26084 -p1 +%patch26085 -p1 +%patch26086 -p1 +%patch26087 -p1 +%patch26088 -p1 +%patch26093 -p1 +%patch26095 -p1 +%patch26096 -p1 +%patch26098 -p1 +%patch26102 -p1 +%patch26114 -p1 +%patch20 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 # Qemu +%patch100 -p1 # Our patches %patch301 -p1 %patch302 -p1 @@ -799,6 +874,8 @@ tar xfj %{SOURCE6} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch651 -p1 %patch652 -p1 %patch700 -p1 +%patch800 -p1 +%patch801 -p1 %patch99998 -p1 %patch99999 -p1 # tools/qemu-xen-traditional-dir-remote/configure ./tools/qemu-xen-dir-remote/configure use diff --git a/xend-config.diff b/xend-config.diff index 3205e44..18bcead 100644 --- a/xend-config.diff +++ b/xend-config.diff @@ -2,12 +2,6 @@ Index: xen-4.2.0-testing/tools/hotplug/Linux/init.d/sysconfig.xendomains =================================================================== --- xen-4.2.0-testing.orig/tools/hotplug/Linux/init.d/sysconfig.xendomains +++ xen-4.2.0-testing/tools/hotplug/Linux/init.d/sysconfig.xendomains -@@ -1,4 +1,4 @@ --## Path: System/xen -+## Path: System/Virtualization - ## Description: xen domain start/stop on boot - ## Type: string - ## Default: @@ -98,7 +98,6 @@ XENDOMAINS_RESTORE=true # Note that the script tries to be clever if both RESTORE and AUTO are # set: It will first restore saved domains and then only start domains diff --git a/xenpaging.autostart.patch b/xenpaging.autostart.patch new file mode 100644 index 0000000..dca0d5b --- /dev/null +++ b/xenpaging.autostart.patch @@ -0,0 +1,413 @@ +# HG changeset patch +# Parent 659ee31faec91ac543578db7c9b2849fb7367419 + +xenpaging: xend: start xenpaging via config option + +Start xenpaging via config option. + +TODO: add libxl support +TODO: parse config values like 42K, 42M, 42G, 42% + +Signed-off-by: Olaf Hering + +--- +v5: + use actmem=, xenpaging_file=, xenpaging_extra= + add xm mem-swap-target + +v4: + add config option for pagefile directory + add config option to enable debug + add config option to set policy mru_size + fail if chdir fails + force self.xenpaging* variables to be strings because a xm new may turn some + of them into type int and later os.execve fails with a TypeError + +v3: + decouple create/destroycreateXenPaging from _create/_removeDevices + init xenpaging variable to 0 if xenpaging is not in config file to + avoid string None coming from sxp file + +v2: + unlink logfile instead of truncating it. + allows hardlinking for further inspection + +--- + tools/examples/xmexample.hvm | 9 +++ + tools/python/README.XendConfig | 3 + + tools/python/README.sxpcfg | 3 + + tools/python/xen/xend/XendConfig.py | 9 +++ + tools/python/xen/xend/XendDomain.py | 15 +++++ + tools/python/xen/xend/XendDomainInfo.py | 23 ++++++++ + tools/python/xen/xend/image.py | 85 ++++++++++++++++++++++++++++++++ + tools/python/xen/xm/create.py | 15 +++++ + tools/python/xen/xm/main.py | 14 +++++ + tools/python/xen/xm/xenapi_create.py | 3 + + 10 files changed, 179 insertions(+) + +Index: xen-4.2.0-testing/tools/examples/xmexample.hvm +=================================================================== +--- xen-4.2.0-testing.orig/tools/examples/xmexample.hvm ++++ xen-4.2.0-testing/tools/examples/xmexample.hvm +@@ -142,6 +142,15 @@ disk = [ 'file:/var/lib/xen/images/disk. + # Device Model to be used + device_model = 'qemu-dm' + ++# the amount of memory in MiB for the guest ++#actmem=42 ++ ++# Optional: guest page file ++#xenpaging_file="/var/lib/xen/xenpaging/..paging" ++ ++# Optional: extra cmdline options for xenpaging ++#xenpaging_extra=[ 'string', 'string' ] ++ + #----------------------------------------------------------------------------- + # boot on floppy (a), hard disk (c), Network (n) or CD-ROM (d) + # default: hard disk, cd-rom, floppy +Index: xen-4.2.0-testing/tools/python/README.XendConfig +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/README.XendConfig ++++ xen-4.2.0-testing/tools/python/README.XendConfig +@@ -120,6 +120,9 @@ otherConfig + image.vncdisplay + image.vncunused + image.hvm.device_model ++ image.hvm.actmem ++ image.hvm.xenpaging_file ++ image.hvm.xenpaging_extra + image.hvm.display + image.hvm.xauthority + image.hvm.vncconsole +Index: xen-4.2.0-testing/tools/python/README.sxpcfg +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/README.sxpcfg ++++ xen-4.2.0-testing/tools/python/README.sxpcfg +@@ -51,6 +51,9 @@ image + - vncunused + (HVM) + - device_model ++ - actmem ++ - xenpaging_file ++ - xenpaging_extra + - display + - xauthority + - vncconsole +Index: xen-4.2.0-testing/tools/python/xen/xend/XendConfig.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendConfig.py ++++ xen-4.2.0-testing/tools/python/xen/xend/XendConfig.py +@@ -147,6 +147,9 @@ XENAPI_PLATFORM_CFG_TYPES = { + 'apic': int, + 'boot': str, + 'device_model': str, ++ 'actmem': str, ++ 'xenpaging_file': str, ++ 'xenpaging_extra': str, + 'loader': str, + 'display' : str, + 'fda': str, +@@ -516,6 +519,12 @@ class XendConfig(dict): + self['platform']['nomigrate'] = 0 + + if self.is_hvm(): ++ if 'actmem' not in self['platform']: ++ self['platform']['actmem'] = "0" ++ if 'xenpaging_file' not in self['platform']: ++ self['platform']['xenpaging_file'] = "" ++ if 'xenpaging_extra' not in self['platform']: ++ self['platform']['xenpaging_extra'] = [] + if 'timer_mode' not in self['platform']: + self['platform']['timer_mode'] = 1 + if 'extid' in self['platform'] and int(self['platform']['extid']) == 1: +Index: xen-4.2.0-testing/tools/python/xen/xend/XendDomain.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendDomain.py ++++ xen-4.2.0-testing/tools/python/xen/xend/XendDomain.py +@@ -1835,6 +1835,21 @@ class XendDomain: + log.exception(ex) + raise XendError(str(ex)) + ++ def domain_swaptarget_set(self, domid, mem): ++ """Set the memory limit for a domain. ++ ++ @param domid: Domain ID or Name ++ @type domid: int or string. ++ @param mem: memory limit (in MiB) ++ @type mem: int ++ @raise XendError: fail to set memory ++ @rtype: 0 ++ """ ++ dominfo = self.domain_lookup_nr(domid) ++ if not dominfo: ++ raise XendInvalidDomain(str(domid)) ++ dominfo.setSwapTarget(mem) ++ + def domain_maxmem_set(self, domid, mem): + """Set the memory limit for a domain. + +Index: xen-4.2.0-testing/tools/python/xen/xend/XendDomainInfo.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-4.2.0-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -1505,6 +1505,17 @@ class XendDomainInfo: + break + xen.xend.XendDomain.instance().managed_config_save(self) + ++ def setSwapTarget(self, target): ++ """Set the swap target of this domain. ++ @param target: In MiB. ++ """ ++ log.debug("Setting swap target of domain %s (%s) to %d MiB.", ++ self.info['name_label'], str(self.domid), target) ++ ++ if self.domid > 0: ++ self.storeDom("memory/target-tot_pages", target * 1024) ++ self.info['platform']['actmem'] = str(target) ++ + def setMemoryTarget(self, target): + """Set the memory target of this domain. + @param target: In MiB. +@@ -2295,6 +2306,8 @@ class XendDomainInfo: + self.info['name_label'], self.domid, self.info['uuid'], + new_name, new_uuid) + self._unwatchVm() ++ if self.image: ++ self.image.destroyXenPaging() + self._releaseDevices() + # Remove existing vm node in xenstore + self._removeVm() +@@ -2974,6 +2987,9 @@ class XendDomainInfo: + + self._createDevices() + ++ if self.image: ++ self.image.createXenPaging() ++ + self.image.cleanupTmpImages() + + self.info['start_time'] = time.time() +@@ -2998,6 +3014,8 @@ class XendDomainInfo: + self.refresh_shutdown_lock.acquire() + try: + self.unwatchShutdown() ++ if self.image: ++ self.image.destroyXenPaging() + self._releaseDevices() + bootloader_tidy(self) + +@@ -3082,6 +3100,7 @@ class XendDomainInfo: + self.image = image.create(self, self.info) + if self.image: + self.image.createDeviceModel(True) ++ self.image.createXenPaging() + self.console_port = console_port + self._storeDomDetails() + self._registerWatches() +@@ -3223,6 +3242,8 @@ class XendDomainInfo: + # could also fetch a parsed note from xenstore + fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0 + if not fast: ++ if self.image: ++ self.image.destroyXenPaging() + self._releaseDevices() + self.testDeviceComplete() + self.testvifsComplete() +@@ -3238,6 +3259,8 @@ class XendDomainInfo: + self._storeDomDetails() + + self._createDevices() ++ if self.image: ++ self.image.createXenPaging() + log.debug("XendDomainInfo.resumeDomain: devices created") + + xc.domain_resume(self.domid, fast) +Index: xen-4.2.0-testing/tools/python/xen/xend/image.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xend/image.py ++++ xen-4.2.0-testing/tools/python/xen/xend/image.py +@@ -122,6 +122,10 @@ class ImageHandler: + self.vm.permissionsVm("image/cmdline", { 'dom': self.vm.getDomid(), 'read': True } ) + + self.device_model = vmConfig['platform'].get('device_model') ++ self.actmem = str(vmConfig['platform'].get('actmem')) ++ self.xenpaging_file = str(vmConfig['platform'].get('xenpaging_file')) ++ self.xenpaging_extra = vmConfig['platform'].get('xenpaging_extra') ++ self.xenpaging_pid = None + + self.display = vmConfig['platform'].get('display') + self.xauthority = vmConfig['platform'].get('xauthority') +@@ -392,6 +396,87 @@ class ImageHandler: + sentinel_fifos_inuse[sentinel_path_fifo] = 1 + self.sentinel_path_fifo = sentinel_path_fifo + ++ def createXenPaging(self): ++ if not self.vm.info.is_hvm(): ++ return ++ if self.actmem == "0": ++ return ++ if self.xenpaging_pid: ++ return ++ xenpaging_bin = auxbin.pathTo("xenpaging") ++ args = [xenpaging_bin] ++ args = args + ([ "-f", "/var/lib/xen/xenpaging/%s.%d.paging" % (str(self.vm.info['name_label']), self.vm.getDomid())]) ++ if self.xenpaging_extra: ++ args = args + (self.xenpaging_extra) ++ args = args + ([ "-d", "%d" % self.vm.getDomid()]) ++ self.xenpaging_logfile = "/var/log/xen/xenpaging-%s.log" % str(self.vm.info['name_label']) ++ logfile_mode = os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_TRUNC ++ null = os.open("/dev/null", os.O_RDONLY) ++ try: ++ os.unlink(self.xenpaging_logfile) ++ except: ++ pass ++ logfd = os.open(self.xenpaging_logfile, logfile_mode, 0644) ++ sys.stderr.flush() ++ contract = osdep.prefork("%s:%d" % (self.vm.getName(), self.vm.getDomid())) ++ xenpaging_pid = os.fork() ++ if xenpaging_pid == 0: #child ++ try: ++ osdep.postfork(contract) ++ os.dup2(null, 0) ++ os.dup2(logfd, 1) ++ os.dup2(logfd, 2) ++ try: ++ env = dict(os.environ) ++ log.info("starting %s" % args) ++ os.execve(xenpaging_bin, args, env) ++ except Exception, e: ++ log.warn('failed to execute xenpaging: %s' % utils.exception_string(e)) ++ os._exit(126) ++ except: ++ log.warn("starting xenpaging failed") ++ os._exit(127) ++ else: ++ osdep.postfork(contract, abandon=True) ++ self.xenpaging_pid = xenpaging_pid ++ os.close(null) ++ os.close(logfd) ++ self.vm.storeDom("xenpaging/xenpaging-pid", self.xenpaging_pid) ++ self.vm.storeDom("memory/target-tot_pages", int(self.actmem) * 1024) ++ ++ def destroyXenPaging(self): ++ if self.actmem == "0": ++ return ++ if self.xenpaging_pid: ++ try: ++ os.kill(self.xenpaging_pid, signal.SIGHUP) ++ except OSError, exn: ++ log.exception(exn) ++ for i in xrange(100): ++ try: ++ (p, rv) = os.waitpid(self.xenpaging_pid, os.WNOHANG) ++ if p == self.xenpaging_pid: ++ break ++ except OSError: ++ # This is expected if Xend has been restarted within ++ # the life of this domain. In this case, we can kill ++ # the process, but we can't wait for it because it's ++ # not our child. We continue this loop, and after it is ++ # terminated make really sure the process is going away ++ # (SIGKILL). ++ pass ++ time.sleep(0.1) ++ else: ++ log.warning("xenpaging %d took more than 10s " ++ "to terminate: sending SIGKILL" % self.xenpaging_pid) ++ try: ++ os.kill(self.xenpaging_pid, signal.SIGKILL) ++ os.waitpid(self.xenpaging_pid, 0) ++ except OSError: ++ # This happens if the process doesn't exist. ++ pass ++ self.xenpaging_pid = None ++ + def createDeviceModel(self, restore = False): + if self.device_model is None: + return +Index: xen-4.2.0-testing/tools/python/xen/xm/create.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xm/create.py ++++ xen-4.2.0-testing/tools/python/xen/xm/create.py +@@ -495,6 +495,18 @@ gopts.var('nfs_root', val="PATH", + fn=set_value, default=None, + use="Set the path of the root NFS directory.") + ++gopts.var('actmem', val='NUM', ++ fn=set_value, default='0', ++ use="Number of pages to swap.") ++ ++gopts.var('xenpaging_file', val='PATH', ++ fn=set_value, default=None, ++ use="pagefile to use (optional)") ++ ++gopts.var('xenpaging_extra', val='string1,string2', ++ fn=append_value, default=[], ++ use="additional args for xenpaging (optional)") ++ + gopts.var('device_model', val='FILE', + fn=set_value, default=None, + use="Path to device model program.") +@@ -1100,6 +1112,9 @@ def configure_hvm(config_image, vals): + args = [ 'acpi', 'apic', + 'boot', + 'cpuid', 'cpuid_check', ++ 'actmem', ++ 'xenpaging_file', ++ 'xenpaging_extra', + 'device_model', 'display', + 'fda', 'fdb', + 'gfx_passthru', 'guest_os_type', +Index: xen-4.2.0-testing/tools/python/xen/xm/main.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xm/main.py ++++ xen-4.2.0-testing/tools/python/xen/xm/main.py +@@ -115,6 +115,8 @@ SUBCOMMAND_HELP = { + 'Set the maximum amount reservation for a domain.'), + 'mem-set' : (' ', + 'Set the current memory usage for a domain.'), ++ 'mem-swap-target' : (' ', ++ 'Set the memory usage for a domain.'), + 'migrate' : (' ', + 'Migrate a domain to another machine.'), + 'pause' : ('', 'Pause execution of a domain.'), +@@ -1592,6 +1594,17 @@ def xm_mem_set(args): + mem_target = int_unit(args[1], 'm') + server.xend.domain.setMemoryTarget(dom, mem_target) + ++def xm_mem_swap_target(args): ++ arg_check(args, "mem-swap-target", 2) ++ ++ dom = args[0] ++ ++ if serverType == SERVER_XEN_API: ++ err("xenapi not supported") ++ else: ++ swap_target = int_unit(args[1], 'm') ++ server.xend.domain.swaptarget_set(dom, swap_target) ++ + def xm_usb_add(args): + arg_check(args, "usb-add", 2) + server.xend.domain.usb_add(args[0],args[1]) +@@ -3847,6 +3860,7 @@ commands = { + # memory commands + "mem-max": xm_mem_max, + "mem-set": xm_mem_set, ++ "mem-swap-target": xm_mem_swap_target, + # cpu commands + "vcpu-pin": xm_vcpu_pin, + "vcpu-list": xm_vcpu_list, +Index: xen-4.2.0-testing/tools/python/xen/xm/xenapi_create.py +=================================================================== +--- xen-4.2.0-testing.orig/tools/python/xen/xm/xenapi_create.py ++++ xen-4.2.0-testing/tools/python/xen/xm/xenapi_create.py +@@ -1085,6 +1085,9 @@ class sxp2xml: + 'acpi', + 'apic', + 'boot', ++ 'actmem', ++ 'xenpaging_file', ++ 'xenpaging_extra', + 'device_model', + 'loader', + 'fda', diff --git a/xenpaging.doc.patch b/xenpaging.doc.patch new file mode 100644 index 0000000..c5b38dd --- /dev/null +++ b/xenpaging.doc.patch @@ -0,0 +1,65 @@ +--- + docs/misc/xenpaging.txt | 49 +++++++++++++++++++++++++++++++++--------------- + 1 file changed, 34 insertions(+), 15 deletions(-) + +Index: xen-4.2.0-testing/docs/misc/xenpaging.txt +=================================================================== +--- xen-4.2.0-testing.orig/docs/misc/xenpaging.txt ++++ xen-4.2.0-testing/docs/misc/xenpaging.txt +@@ -22,22 +22,41 @@ functionality. + + Usage: + +-Up to now xenpaging is not integrated into libxl/xend, so it has to be +-started manually for each guest. ++Up to now xenpaging is only integrated into xm/xend. + +-Once the guest is running, run xenpaging with the guest_id and the path +-to the pagefile: +- +- /usr/lib/xen/bin/xenpaging -f /path/to/page_file -d dom_id & +- +-Once xenpaging runs it needs a memory target, which is the memory +-footprint of the guest. This value (in KiB) must be written manually to +-xenstore. The following example sets the target to 512MB: +- +- xenstore-write /local/domain//memory/target-tot_pages $((1024*512)) +- +-Now xenpaging tries to page-out as many pages to keep the overall memory +-footprint of the guest at 512MB. ++To enable xenpaging for a guest add the option 'actmem=' to the guests ++config file and run 'xm new ' to make the changes ++active. actmem= takes the amount of memory in MB which a guest is ++allowed to use at a given time. Everything above this limit will be ++paged out. This paging is transparent to the guest. ++ ++Example: ++ memory=4096 ++ actmem=1024 ++In this example a guest gets the impression it has 4GB of memory and ++the guest OS has to configure itself for this amount of memory. But ++xenpaging will page-out 3072MB, leaving only 1024MB active at a time. ++ ++At runtime the configured value of actmem= can be changed with the "xm ++mem-swap-target" command. ++ xm mem-swap-target 512 ++ ++Additional cmdline options for the xenpaging binary can be specified ++with the xenpaging_extra= config file option: ++ ++ xenpaging_extra=[ '-f', '/dev/shm/pagefile-guest_name', '-v' ] ++ ++To get a list of available options, run /usr/lib/xen/bin/xenpaging -h: ++ ++ xenpaging [options] -f -d ++ ++options: ++ -d --domain= numerical domain_id of guest. This option is required. ++ -f --pagefile= pagefile to use. This option is required. ++ -m --max_memkb= maximum amount of memory to handle. ++ -r --mru_size= number of paged-in pages to keep in memory. ++ -v --verbose enable debug output. ++ -h --help this output. + + Todo: + - integrate xenpaging into libxl