diff --git a/530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch b/530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch new file mode 100644 index 0000000..bcf837c --- /dev/null +++ b/530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch @@ -0,0 +1,188 @@ +# Commit 60ea3a3ac3d2bcd8e85b250fdbfc46b3b9dc7085 +# Date 2014-02-24 12:07:41 +0100 +# Author Frediano Ziglio +# Committer Jan Beulich +x86/MCE: Fix race condition in mctelem_reserve + +These lines (in mctelem_reserve) + + newhead = oldhead->mcte_next; + if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) { + +are racy. After you read the newhead pointer it can happen that another +flow (thread or recursive invocation) change all the list but set head +with same value. So oldhead is the same as *freelp but you are setting +a new head that could point to whatever element (even already used). + +This patch use instead a bit array and atomic bit operations. + +Signed-off-by: Frediano Ziglio +Reviewed-by: Liu Jinsong + +--- a/xen/arch/x86/cpu/mcheck/mctelem.c ++++ b/xen/arch/x86/cpu/mcheck/mctelem.c +@@ -37,24 +37,19 @@ struct mctelem_ent { + void *mcte_data; /* corresponding data payload */ + }; + +-#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent freelist */ +-#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */ +-#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */ +-#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */ ++#define MCTE_F_CLASS_URGENT 0x0001U /* in use - urgent errors */ ++#define MCTE_F_CLASS_NONURGENT 0x0002U /* in use - nonurgent errors */ + #define MCTE_F_STATE_FREE 0x0010U /* on a freelist */ + #define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */ + #define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */ + #define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */ + +-#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT) + #define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT) + #define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \ + MCTE_F_STATE_UNCOMMITTED | \ + MCTE_F_STATE_COMMITTED | \ + MCTE_F_STATE_PROCESSING) + +-#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME) +- + #define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS) + #define MCTE_SET_CLASS(tep, new) do { \ + (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \ +@@ -69,6 +64,8 @@ struct mctelem_ent { + #define MC_URGENT_NENT 10 + #define MC_NONURGENT_NENT 20 + ++#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT) ++ + #define MC_NCLASSES (MC_NONURGENT + 1) + + #define COOKIE2MCTE(c) ((struct mctelem_ent *)(c)) +@@ -77,11 +74,9 @@ struct mctelem_ent { + static struct mc_telem_ctl { + /* Linked lists that thread the array members together. + * +- * The free lists are singly-linked via mcte_next, and we allocate +- * from them by atomically unlinking an element from the head. +- * Consumed entries are returned to the head of the free list. +- * When an entry is reserved off the free list it is not linked +- * on any list until it is committed or dismissed. ++ * The free lists is a bit array where bit 1 means free. ++ * This as element number is quite small and is easy to ++ * atomically allocate that way. + * + * The committed list grows at the head and we do not maintain a + * tail pointer; insertions are performed atomically. The head +@@ -101,7 +96,7 @@ static struct mc_telem_ctl { + * we can lock it for updates. The head of the processing list + * always has the oldest telemetry, and we append (as above) + * at the tail of the processing list. */ +- struct mctelem_ent *mctc_free[MC_NCLASSES]; ++ DECLARE_BITMAP(mctc_free, MC_NENT); + struct mctelem_ent *mctc_committed[MC_NCLASSES]; + struct mctelem_ent *mctc_processing_head[MC_NCLASSES]; + struct mctelem_ent *mctc_processing_tail[MC_NCLASSES]; +@@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cp + */ + static void mctelem_free(struct mctelem_ent *tep) + { +- mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ? +- MC_URGENT : MC_NONURGENT; +- + BUG_ON(tep->mcte_refcnt != 0); + BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE); + + tep->mcte_prev = NULL; +- mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep); ++ tep->mcte_next = NULL; ++ ++ /* set free in array */ ++ set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free); + } + + /* Increment the reference count of an entry that is not linked on to +@@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz) + } + + if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent, +- MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL || +- (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) * +- datasz)) == NULL) { ++ MC_NENT)) == NULL || ++ (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) { + if (mctctl.mctc_elems) + xfree(mctctl.mctc_elems); + printk("Allocations for MCA telemetry failed\n"); + return; + } + +- for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) { +- struct mctelem_ent *tep, **tepp; ++ for (i = 0; i < MC_NENT; i++) { ++ struct mctelem_ent *tep; + + tep = mctctl.mctc_elems + i; + tep->mcte_flags = MCTE_F_STATE_FREE; + tep->mcte_refcnt = 0; + tep->mcte_data = datarr + i * datasz; + +- if (i < MC_URGENT_NENT) { +- tepp = &mctctl.mctc_free[MC_URGENT]; +- tep->mcte_flags |= MCTE_F_HOME_URGENT; +- } else { +- tepp = &mctctl.mctc_free[MC_NONURGENT]; +- tep->mcte_flags |= MCTE_F_HOME_NONURGENT; +- } +- +- tep->mcte_next = *tepp; ++ __set_bit(i, mctctl.mctc_free); ++ tep->mcte_next = NULL; + tep->mcte_prev = NULL; +- *tepp = tep; + } + } + +@@ -310,32 +296,25 @@ static int mctelem_drop_count; + + /* Reserve a telemetry entry, or return NULL if none available. + * If we return an entry then the caller must subsequently call exactly one of +- * mctelem_unreserve or mctelem_commit for that entry. ++ * mctelem_dismiss or mctelem_commit for that entry. + */ + mctelem_cookie_t mctelem_reserve(mctelem_class_t which) + { +- struct mctelem_ent **freelp; +- struct mctelem_ent *oldhead, *newhead; +- mctelem_class_t target = (which == MC_URGENT) ? +- MC_URGENT : MC_NONURGENT; ++ unsigned bit; ++ unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT; + +- freelp = &mctctl.mctc_free[target]; + for (;;) { +- if ((oldhead = *freelp) == NULL) { +- if (which == MC_URGENT && target == MC_URGENT) { +- /* raid the non-urgent freelist */ +- target = MC_NONURGENT; +- freelp = &mctctl.mctc_free[target]; +- continue; +- } else { +- mctelem_drop_count++; +- return (NULL); +- } ++ bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit); ++ ++ if (bit >= MC_NENT) { ++ mctelem_drop_count++; ++ return (NULL); + } + +- newhead = oldhead->mcte_next; +- if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) { +- struct mctelem_ent *tep = oldhead; ++ /* try to allocate, atomically clear free bit */ ++ if (test_and_clear_bit(bit, mctctl.mctc_free)) { ++ /* return element we got */ ++ struct mctelem_ent *tep = mctctl.mctc_elems + bit; + + mctelem_hold(tep); + MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED); diff --git a/530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch b/530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch new file mode 100644 index 0000000..466ae63 --- /dev/null +++ b/530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch @@ -0,0 +1,29 @@ +# Commit fd1864f48d8914fb8eeb6841cd08c2c09b368909 +# Date 2014-02-24 12:09:52 +0100 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: update nested paging mode on vmexit + +Since SVM and VMX use different mechanism to emulate the virtual-vmentry +and virtual-vmexit, it's hard to update the nested paging mode correctly in +common code. So we need to update the nested paging mode in their respective +code path. +SVM already updates the nested paging mode on vmexit. This patch adds the same +logic in VMX side. + +Previous discussion is here: +http://lists.xen.org/archives/html/xen-devel/2013-12/msg01759.html + +Signed-off-by: Yang Zhang +Reviewed-by: Christoph Egger + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2541,6 +2541,7 @@ void vmx_vmexit_handler(struct cpu_user_ + vcpu_nestedhvm(v).nv_vmswitch_in_progress = 0; + if ( nestedhvm_vcpu_in_guestmode(v) ) + { ++ paging_update_nestedmode(v); + if ( nvmx_n2_vmexit_handler(regs, exit_reason) ) + goto out; + } diff --git a/530b28c5-x86-MSI-don-t-risk-division-by-zero.patch b/530b28c5-x86-MSI-don-t-risk-division-by-zero.patch new file mode 100644 index 0000000..d78382f --- /dev/null +++ b/530b28c5-x86-MSI-don-t-risk-division-by-zero.patch @@ -0,0 +1,24 @@ +# Commit 5d160d913e03b581bdddde73535c18ac670cf0a9 +# Date 2014-02-24 12:11:01 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/MSI: don't risk division by zero + +The check in question is redundant with the one in the immediately +following if(), where dividing by zero gets carefully avoided. + +Spotted-by: Boris Ostrovsky +Signed-off-by: Jan Beulich +Reviewed-by: Boris Ostrovsky + +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c +@@ -636,7 +636,7 @@ static u64 read_pci_mem_bar(u16 seg, u8 + return 0; + base = pos + PCI_SRIOV_BAR; + vf -= PCI_BDF(bus, slot, func) + offset; +- if ( vf < 0 || (vf && vf % stride) ) ++ if ( vf < 0 ) + return 0; + if ( stride ) + { diff --git a/530c54c3-x86-mce-Reduce-boot-time-logspam.patch b/530c54c3-x86-mce-Reduce-boot-time-logspam.patch new file mode 100644 index 0000000..4a88c02 --- /dev/null +++ b/530c54c3-x86-mce-Reduce-boot-time-logspam.patch @@ -0,0 +1,27 @@ +# Commit a5ab9c9fa29cda7e1b18dbcaa69a5dbded96de32 +# Date 2014-02-25 09:30:59 +0100 +# Author Andrew Cooper +# Committer Jan Beulich +x86/mce: Reduce boot-time logspam + +When booting with "no-mce", the user does not need to be told that "MCE +support [was] disabled by bootparam" for each cpu. Furthermore, a file:line +reference is unnecessary. + +Signed-off-by: Andrew Cooper + +--- a/xen/arch/x86/cpu/mcheck/mce.c ++++ b/xen/arch/x86/cpu/mcheck/mce.c +@@ -729,8 +729,10 @@ void mcheck_init(struct cpuinfo_x86 *c, + { + enum mcheck_type inited = mcheck_none; + +- if (mce_disabled == 1) { +- dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n"); ++ if ( mce_disabled ) ++ { ++ if ( bsp ) ++ printk(XENLOG_INFO "MCE support disabled by bootparam\n"); + return; + } + diff --git a/5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch b/5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch new file mode 100644 index 0000000..0adf29f --- /dev/null +++ b/5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch @@ -0,0 +1,32 @@ +# Commit 79de2d31f1ff8910231b7ec15519405953e6571a +# Date 2014-02-28 17:35:15 +0100 +# Author Wei Liu +# Committer Jan Beulich +mm: ensure useful progress in decrease_reservation + +During my fun time playing with balloon driver I found that hypervisor's +preemption check kept decrease_reservation from doing any useful work +for 32 bit guests, resulting in hanging the guests. + +As Andrew suggested, we can force the check to fail for the first +iteration to ensure progress. We did this in d3a55d7d9 "x86/mm: Ensure +useful progress in alloc_l2_table()" already. + +After this change I cannot see the hang caused by continuation logic +anymore. + +Signed-off-by: Wei Liu +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/common/memory.c ++++ b/xen/common/memory.c +@@ -268,7 +268,7 @@ static void decrease_reservation(struct + + for ( i = a->nr_done; i < a->nr_extents; i++ ) + { +- if ( hypercall_preempt_check() ) ++ if ( hypercall_preempt_check() && i != a->nr_done ) + { + a->preempted = 1; + goto out; diff --git a/5315a254-IOMMU-generalize-and-correct-softirq-processing.patch b/5315a254-IOMMU-generalize-and-correct-softirq-processing.patch new file mode 100644 index 0000000..4c0cc6d --- /dev/null +++ b/5315a254-IOMMU-generalize-and-correct-softirq-processing.patch @@ -0,0 +1,85 @@ +References: bnc#858178 + +# Commit 9ef5aa944a6a0df7f5938983043c7e46f158bbc6 +# Date 2014-03-04 10:52:20 +0100 +# Author Jan Beulich +# Committer Jan Beulich +IOMMU: generalize and correct softirq processing during Dom0 device setup + +c/s 21039:95f5a4ce8f24 ("VT-d: reduce default verbosity") having put a +call to process_pending_softirqs() in VT-d's domain_context_mapping() +was wrong in two ways: For one we shouldn't be doing this when setting +up a device during DomU assignment. And then - I didn't check whether +that was the case already back then - we shouldn't call that function +with the pcidevs_lock (or in fact any spin lock) held. + +Move the "preemption" into generic code, at once dealing with further +actual (too much output elsewhere - particularly on systems with very +many host bridge like devices - having been observed to still cause the +watchdog to trigger when enabled) and potential (other IOMMU code may +also end up being too verbose) issues. + +Do the "preemption" once per device actually being set up when in +verbose mode, and once per bus otherwise. + +Note that dropping pcidevs_lock around the process_pending_softirqs() +invocation is specifically not a problem here: We're in an __init +function and aren't racing with potential additions/removals of PCI +devices. Not acquiring the lock in setup_dom0_pci_devices() otoh is not +an option, as there are too many places that assert the lock being +held. + +Signed-off-by: Jan Beulich +Acked-by: Xiantao Zhang + +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -922,6 +923,20 @@ static int __init _setup_dom0_pci_device + printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n", + pdev->domain->domain_id, pseg->nr, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); ++ ++ if ( iommu_verbose ) ++ { ++ spin_unlock(&pcidevs_lock); ++ process_pending_softirqs(); ++ spin_lock(&pcidevs_lock); ++ } ++ } ++ ++ if ( !iommu_verbose ) ++ { ++ spin_unlock(&pcidevs_lock); ++ process_pending_softirqs(); ++ spin_lock(&pcidevs_lock); + } + } + +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -31,7 +31,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -1494,9 +1493,6 @@ static int domain_context_mapping( + break; + } + +- if ( iommu_verbose ) +- process_pending_softirqs(); +- + return ret; + } + diff --git a/5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch b/5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch new file mode 100644 index 0000000..02c73f9 --- /dev/null +++ b/5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch @@ -0,0 +1,59 @@ +# Commit 8db22866dbbcabf30ad6e3814489c730b53d1cf4 +# Date 2014-03-04 10:58:19 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86: don't propagate acpi_skip_timer_override do Dom0 + +It's unclear why c/s 4850:923dd9975981 added this - Dom0 isn't +controlling the timer interrupt, and hence has no need to know. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/acpi/boot.c ++++ b/xen/arch/x86/acpi/boot.c +@@ -56,7 +56,9 @@ bool_t __initdata acpi_ht = 1; /* enable + bool_t __initdata acpi_lapic; + bool_t __initdata acpi_ioapic; + +-bool_t acpi_skip_timer_override __initdata; ++/* acpi_skip_timer_override: Skip IRQ0 overrides. */ ++static bool_t acpi_skip_timer_override __initdata; ++boolean_param("acpi_skip_timer_override", acpi_skip_timer_override); + + #ifdef CONFIG_X86_LOCAL_APIC + static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -71,10 +71,6 @@ static void parse_acpi_param(char *s); + custom_param("acpi", parse_acpi_param); + + /* **** Linux config option: propagated to domain0. */ +-/* acpi_skip_timer_override: Skip IRQ0 overrides. */ +-boolean_param("acpi_skip_timer_override", acpi_skip_timer_override); +- +-/* **** Linux config option: propagated to domain0. */ + /* noapic: Disable IOAPIC setup. */ + boolean_param("noapic", skip_ioapic_setup); + +@@ -1365,9 +1361,6 @@ void __init __start_xen(unsigned long mb + /* Append any extra parameters. */ + if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") ) + safe_strcat(dom0_cmdline, " noapic"); +- if ( acpi_skip_timer_override && +- !strstr(dom0_cmdline, "acpi_skip_timer_override") ) +- safe_strcat(dom0_cmdline, " acpi_skip_timer_override"); + if ( (strlen(acpi_param) == 0) && acpi_disabled ) + { + printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n"); +--- a/xen/include/asm-x86/acpi.h ++++ b/xen/include/asm-x86/acpi.h +@@ -80,7 +80,6 @@ int __acpi_release_global_lock(unsigned + + extern bool_t acpi_lapic, acpi_ioapic, acpi_noirq; + extern bool_t acpi_force, acpi_ht, acpi_disabled; +-extern bool_t acpi_skip_timer_override; + extern u32 acpi_smi_cmd; + extern u8 acpi_enable_value, acpi_disable_value; + void acpi_pic_sci_set_trigger(unsigned int, u16); diff --git a/5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch b/5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch new file mode 100644 index 0000000..b6b0b7b --- /dev/null +++ b/5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch @@ -0,0 +1,32 @@ +# Commit 7acf827b951b4e8501a777676ddf050d200103a4 +# Date 2014-03-04 11:00:26 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/ACPI: also print address space for PM1x fields + +At least one vendor is in the process of making systems available where +these live in MMIO, not in I/O port space. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/acpi/boot.c ++++ b/xen/arch/x86/acpi/boot.c +@@ -404,11 +404,15 @@ acpi_fadt_parse_sleep_info(struct acpi_t + acpi_fadt_copy_address(pm1b_evt, pm1b_event, pm1_event); + + printk(KERN_INFO PREFIX +- "SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], " +- "pm1x_evt[%"PRIx64",%"PRIx64"]\n", ++ "SLEEP INFO: pm1x_cnt[%d:%"PRIx64",%d:%"PRIx64"], " ++ "pm1x_evt[%d:%"PRIx64",%d:%"PRIx64"]\n", ++ acpi_sinfo.pm1a_cnt_blk.space_id, + acpi_sinfo.pm1a_cnt_blk.address, ++ acpi_sinfo.pm1b_cnt_blk.space_id, + acpi_sinfo.pm1b_cnt_blk.address, ++ acpi_sinfo.pm1a_evt_blk.space_id, + acpi_sinfo.pm1a_evt_blk.address, ++ acpi_sinfo.pm1b_evt_blk.space_id, + acpi_sinfo.pm1b_evt_blk.address); + + /* Now FACS... */ diff --git a/531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch b/531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch new file mode 100644 index 0000000..33db8d9 --- /dev/null +++ b/531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch @@ -0,0 +1,56 @@ +# Commit cadfd7bca999c0a795dc27be72d43c92e8943a0b +# Date 2014-03-10 11:02:25 +0100 +# Author Dongxiao Xu +# Committer Jan Beulich +x86/hvm: refine the judgment on IDENT_PT for EMT + +When trying to get the EPT EMT type, the judgment on +HVM_PARAM_IDENT_PT is not correct which always returns WB type if +the parameter is not set. Remove the related code. + +Signed-off-by: Dongxiao Xu + +We can't fully drop the dependency yet, but we should certainly avoid +overriding cases already properly handled. The reason for this is that +the guest setting up its MTRRs happens _after_ the EPT tables got +already constructed, and no code is in place to propagate this to the +EPT code. Without this check we're forcing the guest to run with all of +its memory uncachable until something happens to re-write every single +EPT entry. But of course this has to be just a temporary solution. + +In the same spirit we should defer the "very early" (when the guest is +still being constructed and has no vCPU yet) override to the last +possible point. + +Signed-off-by: Jan Beulich +Reviewed-by: "Xu, Dongxiao" +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hvm/mtrr.c ++++ b/xen/arch/x86/hvm/mtrr.c +@@ -689,13 +689,8 @@ uint8_t epte_get_entry_emt(struct domain + + *ipat = 0; + +- if ( (current->domain != d) && +- ((d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL)) ) +- return MTRR_TYPE_WRBACK; +- +- if ( !is_pvh_vcpu(v) && +- !v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ) +- return MTRR_TYPE_WRBACK; ++ if ( v->domain != d ) ++ v = d->vcpu ? d->vcpu[0] : NULL; + + if ( !mfn_valid(mfn_x(mfn)) ) + return MTRR_TYPE_UNCACHABLE; +@@ -718,7 +713,8 @@ uint8_t epte_get_entry_emt(struct domain + return MTRR_TYPE_WRBACK; + } + +- gmtrr_mtype = is_hvm_vcpu(v) ? ++ gmtrr_mtype = is_hvm_domain(d) && v && ++ d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ? + get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) : + MTRR_TYPE_WRBACK; + diff --git a/531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch b/531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch new file mode 100644 index 0000000..6b04e45 --- /dev/null +++ b/531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch @@ -0,0 +1,54 @@ +# Commit b99113b9d5fac5149de8496f55afa00e285b1ff3 +# Date 2014-03-10 11:03:53 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/HVM: fix memory type merging in epte_get_entry_emt() + +Using the minimum numeric value of guest and host specified memory +types is too simplistic - it works only correctly for a subset of +types. It is in particular the WT/WP combination that needs conversion +to UC if the two types conflict. + +Signed-off-by: Jan Beulich +Reviewed-by: "Xu, Dongxiao" +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hvm/mtrr.c ++++ b/xen/arch/x86/hvm/mtrr.c +@@ -719,5 +719,35 @@ uint8_t epte_get_entry_emt(struct domain + MTRR_TYPE_WRBACK; + + hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT)); +- return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype); ++ ++ /* If both types match we're fine. */ ++ if ( likely(gmtrr_mtype == hmtrr_mtype) ) ++ return hmtrr_mtype; ++ ++ /* If either type is UC, we have to go with that one. */ ++ if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE || ++ hmtrr_mtype == MTRR_TYPE_UNCACHABLE ) ++ return MTRR_TYPE_UNCACHABLE; ++ ++ /* If either type is WB, we have to go with the other one. */ ++ if ( gmtrr_mtype == MTRR_TYPE_WRBACK ) ++ return hmtrr_mtype; ++ if ( hmtrr_mtype == MTRR_TYPE_WRBACK ) ++ return gmtrr_mtype; ++ ++ /* ++ * At this point we have disagreeing WC, WT, or WP types. The only ++ * combination that can be cleanly resolved is WT:WP. The ones involving ++ * WC need to be converted to UC, both due to the memory ordering ++ * differences and because WC disallows reads to be cached (WT and WP ++ * permit this), while WT and WP require writes to go straight to memory ++ * (WC can buffer them). ++ */ ++ if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH && ++ hmtrr_mtype == MTRR_TYPE_WRPROT) || ++ (gmtrr_mtype == MTRR_TYPE_WRPROT && ++ hmtrr_mtype == MTRR_TYPE_WRTHROUGH) ) ++ return MTRR_TYPE_WRPROT; ++ ++ return MTRR_TYPE_UNCACHABLE; + } diff --git a/531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch b/531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch new file mode 100644 index 0000000..8552bb9 --- /dev/null +++ b/531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch @@ -0,0 +1,59 @@ +# Commit 3089a6d82bdf3112ccb1dd074ce34a8cbdc4ccd8 +# Date 2014-03-10 11:04:36 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/HVM: consolidate passthrough handling in epte_get_entry_emt() + +It is inconsistent to depend on iommu_enabled alone: For a guest +without devices passed through to it, it is of no concern whether the +IOMMU is enabled. + +There's one rather special case to take care of: VMX code marks the +LAPIC access page as MMIO. The added assertion needs to take this into +consideration, and the subsequent handling of the direct MMIO case was +inconsistent too: That page would have been WB in the absence of an +IOMMU, but UC in the presence of it, while in fact the cachabilty of +this page is entirely unrelated to an IOMMU being in use. + +Signed-off-by: Jan Beulich +Reviewed-by: "Xu, Dongxiao" +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hvm/mtrr.c ++++ b/xen/arch/x86/hvm/mtrr.c +@@ -698,14 +698,20 @@ uint8_t epte_get_entry_emt(struct domain + if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) ) + return type; + +- if ( !iommu_enabled ) ++ if ( !iommu_enabled || ++ (rangeset_is_empty(d->iomem_caps) && ++ rangeset_is_empty(d->arch.ioport_caps) && ++ !has_arch_pdevs(d)) ) + { ++ ASSERT(!direct_mmio || ++ mfn_x(mfn) == d->arch.hvm_domain.vmx.apic_access_mfn); + *ipat = 1; + return MTRR_TYPE_WRBACK; + } + + if ( direct_mmio ) +- return MTRR_TYPE_UNCACHABLE; ++ return mfn_x(mfn) != d->arch.hvm_domain.vmx.apic_access_mfn ++ ? MTRR_TYPE_UNCACHABLE : MTRR_TYPE_WRBACK; + + if ( iommu_snoop ) + { +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2038,9 +2038,9 @@ static int vmx_alloc_vlapic_mapping(stru + if ( apic_va == NULL ) + return -ENOMEM; + share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable); ++ d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va); + set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), + _mfn(virt_to_mfn(apic_va))); +- d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va); + + return 0; + } diff --git a/531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch b/531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch new file mode 100644 index 0000000..548430a --- /dev/null +++ b/531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch @@ -0,0 +1,36 @@ +# Commit 4509ada6ba1f09cc8f4fa23e009e7e5a963b6086 +# Date 2014-03-10 11:11:28 +0100 +# Author Andrew Cooper +# Committer Jan Beulich +kexec: identify which cpu the kexec image is being executed on + +A patch to this effect has been in XenServer for a little while, and has +proved to be a useful debugging point for servers which have different +behaviours depending when crashing on the non-bootstrap processor. + +Moving the printk() from kexec_panic() to one_cpu_only() means that it will +only be printed for the cpu which wins the race along the kexec path. + +Signed-off-by: Andrew Cooper +Acked-by: David Vrabel + +--- a/xen/common/kexec.c ++++ b/xen/common/kexec.c +@@ -265,6 +265,8 @@ static int noinline one_cpu_only(void) + } + + set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags); ++ printk("Executing kexec image on cpu%u\n", cpu); ++ + return 0; + } + +@@ -340,8 +342,6 @@ void kexec_crash(void) + if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) ) + return; + +- printk("Executing crash image\n"); +- + kexecing = TRUE; + + if ( kexec_common_shutdown() != 0 ) diff --git a/531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch b/531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch new file mode 100644 index 0000000..f6beb85 --- /dev/null +++ b/531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch @@ -0,0 +1,29 @@ +# Commit ac2cba2901779f66bbfab298faa15c956e91393a +# Date 2014-03-10 14:40:50 +0100 +# Author Julien Grall +# Committer Jan Beulich +xmalloc: handle correctly page allocation when align > size + +When align is superior to size, we need to retrieve the order from +align during multiple page allocation. I guess it was the goal of the commit +fb034f42 "xmalloc: make close-to-PAGE_SIZE allocations more efficient". + +Signed-off-by: Julien Grall +Acked-by: Keir Fraser + +--- a/xen/common/xmalloc_tlsf.c ++++ b/xen/common/xmalloc_tlsf.c +@@ -527,11 +527,10 @@ static void xmalloc_pool_put(void *p) + + static void *xmalloc_whole_pages(unsigned long size, unsigned long align) + { +- unsigned int i, order = get_order_from_bytes(size); ++ unsigned int i, order; + void *res, *p; + +- if ( align > size ) +- get_order_from_bytes(align); ++ order = get_order_from_bytes(max(align, size)); + + res = alloc_xenheap_pages(order, 0); + if ( res == NULL ) diff --git a/block-npiv b/block-npiv index 7128935..b393bb4 100644 --- a/block-npiv +++ b/block-npiv @@ -13,48 +13,52 @@ case "$command" in add) # Params is one big arg, with fields separated by hyphens: # single path: - # FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN# + # VPWWPN-TGTWWPN-LUN# # multipath: - # {FABRIC1.FABRIC2}-{VPWWPN1.VPWWPN2.VPWWPN3}-VPWWNN-TGTWWPN-LUN# - # arg 2 - Fabric Name - # arg 3 - VPORT's WWPN - # arg 4 - VPORT's WWNN - # arg 5 - Target's WWPN - # arg 6 - LUN # on Target + # {VPWWPN1.VPWWPN2....VPWWPNx}-{TGTWWPN1.TGTWWPN2....TGTWWPNx}-LUN# + # arg 1 - VPORT's WWPN + # arg 2 - Target's WWPN + # arg 3 - LUN # on Target # no wwn contains a leading 0x - it is a 16 character hex value # You may want to optionally pick a specific adapter ? par=`xenstore-read $XENBUS_PATH/params` || true - #par=$2 - NPIVARGS=$par; - LUN=${NPIVARGS##*-*-*-*-}; NPIVARGS=${NPIVARGS%-*} - if test $LUN = $NPIVARGS ; then exit 1; fi - TGTWWPN=${NPIVARGS##*-*-*-}; NPIVARGS=${NPIVARGS%-*} - if test $TGTWWPN = $NPIVARGS ; then exit 1; fi - VPORTWWNN=${NPIVARGS##*-*-}; NPIVARGS=${NPIVARGS%-*} - if test $VPORTWWNN = $NPIVARGS ; then exit 1; fi - VPORTWWPNS=${NPIVARGS##*-}; NPIVARGS=${NPIVARGS%-*} - if test $VPORTWWPNS = $NPIVARGS ; then exit 1; fi - FABRICNMS=$NPIVARGS + NPIVARGS=(${par//-/ }) + wc=${#NPIVARGS[@]} + if [ $wc -eq 5 ]; then + # support old syntax + # FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN + VPORTWWPNS=${NPIVARGS[1]} + VPORTWWNNS=${NPIVARGS[2]} + TGTWWPNS=${NPIVARGS[3]} + LUN=${NPIVARGS[4]} + elif [ $wc -eq 3 ]; then + # new syntax + VPORTWWPNS=${NPIVARGS[0]} + TGTWWPNS=${NPIVARGS[1]} + LUN=${NPIVARGS[2]} + else + # wrong syntax + exit 1 + fi # Ensure we compare everything using lower-case hex characters - TGTWWPN=`echo $TGTWWPN | tr A-Z a-z` + TGTWWPNS=`echo $TGTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'` VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'` - VPORTWWNN=`echo $VPORTWWNN | tr A-Z a-z` - FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'` - + # Only one VPWWNN is supported + VPORTWWNN=`echo $VPORTWWNNS | tr A-Z a-z | sed -e 's/\..*//g' -e 's/{//'` claim_lock "npiv" paths=0 - for FABRICNM in $FABRICNMS; do - for VPORTWWPN in $VPORTWWPNS; do - find_vhost $VPORTWWPN $FABRICNM - if test -z "$vhost" ; then - create_vport $FABRICNM $VPORTWWPN $VPORTWWNN - if [ $? -ne 0 ] ; then exit 2; fi - sleep 8 - find_vhost $VPORTWWPN $FABRICNM - if test -z "$vhost" ; then exit 3; fi - fi + for VPORTWWPN in $VPORTWWPNS; do + find_vhost $VPORTWWPN + if test -z "$vhost" ; then + create_vport $VPORTWWPN $VPORTWWNN + if [ $? -ne 0 ] ; then exit 2; fi + sleep 8 + find_vhost $VPORTWWPN + if test -z "$vhost" ; then exit 3; fi + fi + for TGTWWPN in $TGTWWPNS; do find_sdev $vhost $TGTWWPN $LUN if test -z "$dev"; then echo "- - -" > /sys/class/scsi_host/$vhost/scan @@ -95,18 +99,22 @@ case "$command" in # terminate the entire vport (all luns) if test $multipath = 1; then par=`xenstore-read $XENBUS_PATH/params` || true - NPIVARGS=$par; - FABRICNMS=${NPIVARGS%%-*}; NPIVARGS=${NPIVARGS#*-} - VPORTWWPNS=${NPIVARGS%%-*} + NPIVARGS=(${par//-/ }) + wc=${#NPIVARGS[@]} + if [ $wc -eq 5 ]; then + # old syntax + # FABRIC-VPWWPN-VPWWNN-TGTWWPN-LUN + VPORTWWPNS=${NPIVARGS[1]} + elif [ $wc -eq 3 ]; then + # new syntax + VPORTWWPNS=${NPIVARGS[0]} + fi VPORTWWPNS=`echo $VPORTWWPNS | tr A-Z a-z |sed 's/[{.}]/ /g'` - FABRICNMS=`echo $FABRICNMS | tr A-Z a-z |sed 's/[{.}]/ /g'` - for FABRICNM in $FABRICNMS; do - for VPORTWWPN in $VPORTWWPNS; do - find_vhost $VPORTWWPN $FABRICNM - if test -z "$vhost" ; then exit 5; fi - flush_nodes_on_vhost $vhost - delete_vhost $vhost - done + for VPORTWWPN in $VPORTWWPNS; do + find_vhost $VPORTWWPN + if test -z "$vhost" ; then exit 5; fi + flush_nodes_on_vhost $vhost + delete_vhost $vhost done else dev=$node; dev=${dev#/dev/} diff --git a/block-npiv-common.sh b/block-npiv-common.sh index 23069b5..cbffdd1 100644 --- a/block-npiv-common.sh +++ b/block-npiv-common.sh @@ -2,7 +2,6 @@ # Look for the NPIV vport with the WWPN # $1 contains the WWPN (assumes it does not contain a leading "0x") -# $2 contains the FABRICNM (assumes it does not contain "0x") find_vhost() { unset vhost @@ -15,10 +14,7 @@ find_vhost() # Note: makes the assumption the vport will always have an scsi_host child vhost=`ls -d $fchost/device/host*` vhost=`basename $vhost` - fname=`cat /sys/class/fc_host/$vhost/fabric_name | sed -e s/^0x//` - if test $fname = $2 ; then - return - fi + return fi fi done @@ -32,23 +28,27 @@ find_vhost() if test $wwpn = $1 ; then # Note: makes the assumption the vport will always have an scsi_host child vhost=`basename $fchost` - fname=`cat $fchost/fabric_name | sed -e s/^0x//` - if test $fname = $2 ; then - return - fi + return fi fi done } -# Create a NPIV vport on the fabric w/ FABRICNM, with WWPN,WWNN -# $1 contains FABRICNM -# $2 contains the VPORT WWPN -# $3 contains the VPORT WWNN +# Create a NPIV vport with WWPN +# $1 contains the VPORT WWPN +# $2 may contain the VPORT WWNN # (assumes no name contains a leading "0x") create_vport() { + wwpn=$1 + wwnn=$2 + if [ -z "$wwnn" ]; then + # auto generate wwnn, follow FluidLabUpdateForEmulex.pdf + # Novell specific identifier + # byte 6 = 0 indicates WWNN, = 1 indicates WWPN + wwnn=${wwpn:0:6}"0"${wwpn:7} + fi # find a base adapter with npiv support that is on the right fabric # Look via upstream interfaces @@ -57,14 +57,13 @@ create_vport() # is the link up, w/ NPIV support ? pstate=`cat $fchost/port_state` ptype=`cat $fchost/port_type | cut -c 1-5` - fname=`cat $fchost/fabric_name | sed -e s/^0x//` - if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then + if [ $pstate = "Online" -a $ptype = "NPort" ] ; then vmax=`cat $fchost/max_npiv_vports` vinuse=`cat $fchost/npiv_vports_inuse` avail=`expr $vmax - $vinuse` if [ $avail -gt 0 ] ; then # create the vport - echo $2":"$3 > $fchost/vport_create + echo $wwpn":"$wwnn > $fchost/vport_create if [ $? -eq 0 ] ; then return 0 fi @@ -84,14 +83,13 @@ create_vport() if [ -e $fchost/port_state ] ; then pstate=`cat $fchost/port_state` ptype=`cat $fchost/port_type | cut -c 1-5` - fname=`cat $fchost/fabric_name | sed -e s/^0x//` - if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then + if [ $pstate = "Online" -a $ptype = "NPort" ] ; then vmax=`cat $shost/max_npiv_vports` vinuse=`cat $shost/npiv_vports_inuse` avail=`expr $vmax - $vinuse` if [ $avail -gt 0 ] ; then # create the vport - echo $2":"$3 > $shost/vport_create + echo $wwpn":"$wwnn > $shost/vport_create if [ $? -eq 0 ] ; then return 0 fi @@ -110,10 +108,9 @@ create_vport() if [ -e $fchost/port_state ] ; then pstate=`cat $fchost/port_state` ptype=`cat $fchost/port_type | cut -c 1-5` - fname=`cat $fchost/fabric_name | sed -e s/^0x//` - if [ $pstate = "Online" -a $ptype = "NPort" -a $fname = $1 ] ; then + if [ $pstate = "Online" -a $ptype = "NPort" ] ; then # create the vport - echo $2":"$3 > $shost/vport_create + echo $wwpn":"$wwnn > $shost/vport_create if [ $? -eq 0 ] ; then return 0 fi diff --git a/libxl.honor-more-top-level-vfb-options.patch b/libxl.honor-more-top-level-vfb-options.patch new file mode 100644 index 0000000..2a2173e --- /dev/null +++ b/libxl.honor-more-top-level-vfb-options.patch @@ -0,0 +1,77 @@ +From: Wei Liu +Date: Tue, 28 Jan 2014 15:38:01 +0000 +Subject: xl: honor more top level vfb options + +Now that SDL and keymap options for VFB can also be specified in top +level options. Documentation is also updated. + +This fixes bug #31 and further possible problems. + +Signed-off-by: Wei Liu +Cc: Olaf Hering +Cc: Ian Campbell +Cc: Ian Jackson +--- + docs/man/xl.cfg.pod.5 | 4 ++-- + tools/libxl/xl_cmdimpl.c | 17 ++++++++++++++--- + 2 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 +index e15a49f..49a9250 100644 +--- a/docs/man/xl.cfg.pod.5 ++++ b/docs/man/xl.cfg.pod.5 +@@ -389,8 +389,8 @@ This options does not control the emulated graphics card presented to + an HVM guest. See L below for how to + configure the emulated device. If L options + are used in a PV guest configuration, xl will pick up B, B, +-B, B and B to construct paravirtual +-framebuffer device for the guest. ++B, B, B, B, B and ++B to construct paravirtual framebuffer device for the guest. + + Each B is a comma-separated list of C + settings, from the following list: +diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c +index 4fc46eb..28212e2 100644 +--- a/tools/libxl/xl_cmdimpl.c ++++ b/tools/libxl/xl_cmdimpl.c +@@ -721,6 +721,15 @@ static void parse_top_level_vnc_options(XLU_Config *config, + xlu_cfg_get_defbool(config, "vncunused", &vnc->findunused, 0); + } + ++static void parse_top_level_sdl_options(XLU_Config *config, ++ libxl_sdl_info *sdl) ++{ ++ xlu_cfg_get_defbool(config, "sdl", &sdl->enable, 0); ++ xlu_cfg_get_defbool(config, "opengl", &sdl->opengl, 0); ++ xlu_cfg_replace_string (config, "display", &sdl->display, 0); ++ xlu_cfg_replace_string (config, "xauthority", &sdl->xauthority, 0); ++} ++ + static void parse_config_data(const char *config_source, + const char *config_data, + int config_len, +@@ -1657,9 +1666,13 @@ skip_vfb: + libxl_device_vkb_init); + + parse_top_level_vnc_options(config, &vfb->vnc); ++ parse_top_level_sdl_options(config, &vfb->sdl); ++ xlu_cfg_replace_string (config, "keymap", &vfb->keymap, 0); + } +- } else ++ } else { + parse_top_level_vnc_options(config, &b_info->u.hvm.vnc); ++ parse_top_level_sdl_options(config, &b_info->u.hvm.sdl); ++ } + + if (c_info->type == LIBXL_DOMAIN_TYPE_HVM) { + if (!xlu_cfg_get_string (config, "vga", &buf, 0)) { +@@ -1676,8 +1689,6 @@ skip_vfb: + LIBXL_VGA_INTERFACE_TYPE_CIRRUS; + + xlu_cfg_replace_string (config, "keymap", &b_info->u.hvm.keymap, 0); +- xlu_cfg_get_defbool(config, "sdl", &b_info->u.hvm.sdl.enable, 0); +- xlu_cfg_get_defbool(config, "opengl", &b_info->u.hvm.sdl.opengl, 0); + xlu_cfg_get_defbool (config, "spice", &b_info->u.hvm.spice.enable, 0); + if (!xlu_cfg_get_long (config, "spiceport", &l, 0)) + b_info->u.hvm.spice.port = l; diff --git a/libxl.set-migration-constraints-from-cmdline.patch b/libxl.set-migration-constraints-from-cmdline.patch index 2ad6174..184d522 100644 --- a/libxl.set-migration-constraints-from-cmdline.patch +++ b/libxl.set-migration-constraints-from-cmdline.patch @@ -74,19 +74,19 @@ v2: Signed-off-by: Olaf Hering --- - docs/man/xl.pod.1 | 20 ++++++++++++++++++++ - tools/libxc/xc_domain_save.c | 27 ++++++++++++++++++++++++-- - tools/libxc/xc_nomigrate.c | 10 +++++++++ - tools/libxc/xenguest.h | 7 +++++++ - tools/libxl/libxl.c | 27 +++++++++++++++++++++++--- - tools/libxl/libxl.h | 14 ++++++++++++++ - tools/libxl/libxl_dom.c | 1 + - tools/libxl/libxl_internal.h | 4 ++++ - tools/libxl/libxl_save_callout.c | 4 +++- - tools/libxl/libxl_save_helper.c | 4 +++- - tools/libxl/xl_cmdimpl.c | 41 ++++++++++++++++++++++++++++++++++------ - tools/libxl/xl_cmdtable.c | 23 ++++++++++++++-------- - 12 files changed, 161 insertions(+), 21 deletions(-) + docs/man/xl.pod.1 | 20 +++++++++++++++++++ + tools/libxc/xc_domain_save.c | 27 +++++++++++++++++++++++-- + tools/libxc/xc_nomigrate.c | 10 +++++++++ + tools/libxc/xenguest.h | 7 ++++++ + tools/libxl/libxl.c | 27 ++++++++++++++++++++++--- + tools/libxl/libxl.h | 15 ++++++++++++++ + tools/libxl/libxl_dom.c | 1 + tools/libxl/libxl_internal.h | 4 +++ + tools/libxl/libxl_save_callout.c | 4 ++- + tools/libxl/libxl_save_helper.c | 4 ++- + tools/libxl/xl_cmdimpl.c | 41 +++++++++++++++++++++++++++++++++------ + tools/libxl/xl_cmdtable.c | 23 ++++++++++++++------- + 12 files changed, 162 insertions(+), 21 deletions(-) Index: xen-4.4.0-testing/docs/man/xl.pod.1 =================================================================== @@ -298,7 +298,7 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.h =================================================================== --- xen-4.4.0-testing.orig/tools/libxl/libxl.h +++ xen-4.4.0-testing/tools/libxl/libxl.h -@@ -687,8 +687,22 @@ int libxl_domain_suspend(libxl_ctx *ctx, +@@ -687,8 +687,23 @@ int libxl_domain_suspend(libxl_ctx *ctx, int flags, /* LIBXL_SUSPEND_* */ const libxl_asyncop_how *ao_how) LIBXL_EXTERNAL_CALLERS_ONLY; @@ -310,6 +310,7 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.h + int min_remaining; +} libxl_domain_suspend_suse_properties; + ++#define LIBXL_HAVE_DOMAIN_SUSPEND_SUSE +int libxl_domain_suspend_suse(libxl_ctx *ctx, uint32_t domid, int fd, + const libxl_domain_suspend_suse_properties *props, /* optional */ + const libxl_asyncop_how *ao_how) diff --git a/local_attach_support_for_phy.patch b/local_attach_support_for_phy.patch new file mode 100644 index 0000000..fc5877b --- /dev/null +++ b/local_attach_support_for_phy.patch @@ -0,0 +1,58 @@ +commit 3bcf91cbbd9a18db9ae7d594ffde7979774ed512 +Author: Roger Pau Monne +Date: Wed Feb 12 11:15:17 2014 +0100 + + libxl: local attach support for PHY backends using scripts + + Allow disks using the PHY backend to locally attach if using a script. + + Signed-off-by: Roger Pau Monnà + Suggested-by: Ian Campbell + + +Index: xen-4.4.0-testing/tools/libxl/libxl.c +=================================================================== +--- xen-4.4.0-testing.orig/tools/libxl/libxl.c ++++ xen-4.4.0-testing/tools/libxl/libxl.c +@@ -2630,6 +2630,16 @@ void libxl__device_disk_local_initiate_a + + switch (disk->backend) { + case LIBXL_DISK_BACKEND_PHY: ++ if (disk->script != NULL) { ++ LOG(DEBUG, "trying to locally attach PHY device %s with script %s", ++ disk->pdev_path, disk->script); ++ libxl__prepare_ao_device(ao, &dls->aodev); ++ dls->aodev.callback = local_device_attach_cb; ++ device_disk_add(egc, LIBXL_TOOLSTACK_DOMID, disk, ++ &dls->aodev, libxl__alloc_vdev, ++ (void *) blkdev_start); ++ return; ++ } + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching PHY disk %s", + disk->pdev_path); + dev = disk->pdev_path; +@@ -2709,7 +2719,7 @@ static void local_device_attach_cb(libxl + } + + dev = GCSPRINTF("/dev/%s", disk->vdev); +- LOG(DEBUG, "locally attaching qdisk %s", dev); ++ LOG(DEBUG, "locally attached disk %s", dev); + + rc = libxl__device_from_disk(gc, LIBXL_TOOLSTACK_DOMID, disk, &device); + if (rc < 0) +@@ -2749,6 +2759,7 @@ void libxl__device_disk_local_initiate_d + if (!dls->diskpath) goto out; + + switch (disk->backend) { ++ case LIBXL_DISK_BACKEND_PHY: + case LIBXL_DISK_BACKEND_QDISK: + if (disk->vdev != NULL) { + GCNEW(device); +@@ -2766,7 +2777,6 @@ void libxl__device_disk_local_initiate_d + /* disk->vdev == NULL; fall through */ + default: + /* +- * Nothing to do for PHYSTYPE_PHY. + * For other device types assume that the blktap2 process is + * needed by the soon to be started domain and do nothing. + */ diff --git a/qemu-xen-dir-remote.tar.bz2 b/qemu-xen-dir-remote.tar.bz2 index 2d52504..c6076d8 100644 --- a/qemu-xen-dir-remote.tar.bz2 +++ b/qemu-xen-dir-remote.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e62addd6c9481d6b12e951dffb8474b7ccffa50a007caf7d9c9ad17fbdac4ee -size 7571602 +oid sha256:f4c30b6deacd63d74ed498ded6cdddc448c9b91a9fe90fa0490387c52723baac +size 7571312 diff --git a/qemu-xen-traditional-dir-remote.tar.bz2 b/qemu-xen-traditional-dir-remote.tar.bz2 index 9aab035..e6e5038 100644 --- a/qemu-xen-traditional-dir-remote.tar.bz2 +++ b/qemu-xen-traditional-dir-remote.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee8f1852348a9054add13614aa36cd5d0dc06522bad0e442bb7c6a8e1fcd871a -size 3213098 +oid sha256:1193cf071ed647d44dccc73b58574a12efeae28ed6393b9ee6cea35ad86425dc +size 3212848 diff --git a/seabios-dir-remote.tar.bz2 b/seabios-dir-remote.tar.bz2 index c6443e9..c4056e7 100644 --- a/seabios-dir-remote.tar.bz2 +++ b/seabios-dir-remote.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76b7b90cca79d7804cb4234ca1d403b8e4e03f103e3dcd03489541e66e611bf0 -size 366520 +oid sha256:1a64318a51031dfd03b4de2eaaed75696505a627e2eba5dd1703bb1e4100c92a +size 366380 diff --git a/xen-4.4.0-testing-src.tar.bz2 b/xen-4.4.0-testing-src.tar.bz2 index b9d98ca..5dda865 100644 --- a/xen-4.4.0-testing-src.tar.bz2 +++ b/xen-4.4.0-testing-src.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ad02731bbe8fffa1c57d5b10ddf435c01c6ecadf2a8d342660abfda1d6419d2 -size 4356222 +oid sha256:aa87fcd1134ccb8dde89b06aacb7f922d85f4a6c318af2c71b8af47d5a20c165 +size 4356476 diff --git a/xen.changes b/xen.changes index ba964bd..0979dac 100644 --- a/xen.changes +++ b/xen.changes @@ -1,8 +1,50 @@ ------------------------------------------------------------------- -Tue Feb 17 12:33:42 MST 2014 - carnold@suse.com +Wed Mar 12 08:20:42 MDT 2014 - carnold@suse.com -- Update to Xen 4.4.0 RC4 c/s 28391 - xen-4.4.0-testing-src.tar.bz2 +- Upstream patches from Jan + 530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch + 530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch + 530b28c5-x86-MSI-don-t-risk-division-by-zero.patch + 530c54c3-x86-mce-Reduce-boot-time-logspam.patch + 5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch + 5315a254-IOMMU-generalize-and-correct-softirq-processing.patch + 5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch + 5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch + 531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch + 531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch + 531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch + 531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch + 531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch + +------------------------------------------------------------------- +Tue Mar 11 16:21:11 MDT 2014 - carnold@suse.com + +- Add conversion tool for migrating xend/xm managed VMs to libvirt + xen2libvirt.py (Jim Fehlig) + +------------------------------------------------------------------- +Mon Mar 10 07:17:17 MDT 2014 - carnold@suse.com + +- Update to Xen 4.4.0 FCS + +------------------------------------------------------------------- +Thu Mar 06 13:46:24 MST 2014 - mlatimer@suse.com + +- bnc#865682 - Local attach support for PHY backends using scripts + local_attach_support_for_phy.patch + +------------------------------------------------------------------- +Wed Feb 26 00:36:24 UTC 2014 - mlatimer@suse.com + +- bnc#798770 - Improve multipath support for npiv devices + block-npiv + block-npiv-common.sh + +------------------------------------------------------------------- +Wed Feb 19 19:56:31 CET 2014 - ohering@suse.de + +- honor global keymap= option in libxl + libxl.honor-more-top-level-vfb-options.patch ------------------------------------------------------------------- Tue Feb 11 13:27:42 MST 2014 - carnold@suse.com diff --git a/xen.spec b/xen.spec index 2f1c993..ce4feb1 100644 --- a/xen.spec +++ b/xen.spec @@ -20,7 +20,7 @@ Name: xen ExclusiveArch: %ix86 x86_64 %arm aarch64 %define xvers 4.4 %define xvermaj 4 -%define changeset 28391 +%define changeset 28401 %define xen_build_dir xen-4.4.0-testing # %define with_kmp 0 @@ -139,7 +139,7 @@ BuildRequires: xorg-x11-util-devel %endif %endif -Version: 4.4.0_04 +Version: 4.4.0_08 Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) @@ -177,6 +177,7 @@ Source32: xen-updown.sh Source34: init.pciback Source35: sysconfig.pciback Source36: xnloader.py +Source37: xen2libvirt.py # Systemd service files Source41: xencommons.service Source42: xendomains.service @@ -197,6 +198,19 @@ Source99: baselibs.conf # http://xenbits.xensource.com/ext/xenalyze Source20000: xenalyze.hg.tar.bz2 # Upstream patches +Patch1: 530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch +Patch2: 530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch +Patch3: 530b28c5-x86-MSI-don-t-risk-division-by-zero.patch +Patch4: 530c54c3-x86-mce-Reduce-boot-time-logspam.patch +Patch5: 5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch +Patch6: 5315a254-IOMMU-generalize-and-correct-softirq-processing.patch +Patch7: 5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch +Patch8: 5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch +Patch9: 531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch +Patch10: 531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch +Patch11: 531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch +Patch12: 531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch +Patch13: 531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch # Upstream qemu Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -222,6 +236,7 @@ Patch321: udev-rules.patch # Needs to go upstream Patch330: suspend_evtchn_lock.patch Patch331: xenpaging.doc.patch +Patch332: local_attach_support_for_phy.patch # Qemu traditional Patch350: blktap.patch Patch351: cdrom-removable.patch @@ -261,6 +276,7 @@ Patch384: qemu-xen-upstream-blkif-discard.patch Patch385: xen_pvonhvm.xen_emul_unplug.patch Patch386: libxc-pass-errno-to-callers-of-xc_domain_save.patch Patch387: libxl.set-migration-constraints-from-cmdline.patch +Patch388: libxl.honor-more-top-level-vfb-options.patch # Xend Patch400: xend-set-migration-constraints-from-cmdline.patch Patch402: xen.migrate.tools-xend_move_assert_to_exception_block.patch @@ -548,6 +564,19 @@ Authors: %prep %setup -q -n %xen_build_dir -a 1 -a 2 -a 3 -a 4 -a 5 -a 57 -a 20000 # Upstream patches +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1 @@ -572,6 +601,7 @@ Authors: %patch321 -p1 %patch330 -p1 %patch331 -p1 +%patch332 -p1 # Qemu traditional %patch350 -p1 %patch351 -p1 @@ -611,6 +641,7 @@ Authors: %patch385 -p1 %patch386 -p1 %patch387 -p1 +%patch388 -p1 # Xend %patch400 -p1 %patch402 -p1 @@ -884,6 +915,7 @@ mkdir -p %{buildroot}%{_unitdir} install -m 644 %{SOURCE56} %{buildroot}%{_unitdir}/xend.service %endif %endif +install -m755 %SOURCE37 $RPM_BUILD_ROOT/usr/sbin/xen2libvirt # Example config mkdir -p $RPM_BUILD_ROOT/etc/xen/{vm,examples,scripts} @@ -1090,6 +1122,7 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper /usr/sbin/gdbsx %endif /usr/sbin/xl +/usr/sbin/xen2libvirt %ifarch %ix86 x86_64 /usr/sbin/xen-hptool /usr/sbin/xen-hvmcrash diff --git a/xen2libvirt.py b/xen2libvirt.py new file mode 100644 index 0000000..aeeee9b --- /dev/null +++ b/xen2libvirt.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# +# Copyright (C) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see +# . +# +# Authors: +# Jim Fehlig +# +# Read native Xen configuration format, convert to libvirt domXML, and +# import (virsh define ) into libvirt. + +import sys +import os +import argparse +import re + +try: + import libvirt +except ImportError: + print 'Unable to import the libvirt module. Is libvirt-python installed?' + sys.exit(1) + +parser = argparse.ArgumentParser(description='Import Xen domain configuration into libvirt') +parser.add_argument('-c', '--convert-only', help='Convert Xen domain configuration into libvirt domXML, but do not import into libvirt', action='store_true', dest='convert_only') +parser.add_argument('-r', '--recursive', help='Operate recursivelly on all Xen domain configuration rooted at path', action='store_true') +parser.add_argument('-f', '--format', help='Format of Xen domain configuration. Supported formats are xm and sexpr', choices=['xm', 'sexpr'], default=None) +parser.add_argument('-v', '--verbose', help='Print information about the import process', action='store_true') +parser.add_argument('path', help='Path to Xen domain configuration') + + +def print_verbose(msg): + if args.verbose: + print msg + + +def check_config(path, config): + isbinary = os.system('file -b ' + path + ' | grep text > /dev/null') + + if isbinary: + print 'File %s is not a text file containing Xen xm or sexpr configuration' + sys.exit(1) + + if config.find('\(domain'): + return 'sexpr' + return 'xm' + + +def import_domain(conn, path, format=None, convert_only=False): + + f = open(path, 'r') + config = f.read() + print_verbose('Xen domain configuration read from %s:\n %s' % (path, config)) + if format is None: + format = check_config(path, config) + + if format == 'sexpr': + print_verbose('scrubbing domin from configuration') + config = re.sub("\(domid [0-9]*\)", "", config) + print_verbose('scrubbed sexpr:\n %s' % config) + xml = conn.domainXMLFromNative('xen-sxpr', config, 0) + else: + # if format != sexpr, try xm + xml = conn.domainXMLFromNative('xen-xm', config, 0) + + f.close() + + print_verbose('Successfully converted Xen domain configuration to ' + 'libvirt domXML:\n %s' % xml) + if convert_only: + print xml + else: + print_verbose('Importing converted libvirt domXML into libvirt...') + dom = conn.defineXML(xml) + if dom is None: + print 'Failed to define domain from converted domXML' + sys.exit(1) + print_verbose('domXML successfully imported into libvirt') + + +args = parser.parse_args() +path = args.path + +# Connect to libvirt +conn = libvirt.open(None) +if conn is None: + print('Failed to open connection to the hypervisor') + sys.exit(1) + +if args.recursive: + try: + for root, dirs, files in os.walk(path): + for name in files: + abs_name = os.path.join(root, name) + print_verbose('Processing file %s' % abs_name) + import_domain(conn, abs_name, args.format, args.convert_only) + except IOError: + print('Failed to open/read path %s' % path) + sys.exit(1) +else: + import_domain(conn, args.path, args.format, args.convert_only) diff --git a/xend-tools-watchdog-support.patch b/xend-tools-watchdog-support.patch index 62cfc5a..106e29a 100644 --- a/xend-tools-watchdog-support.patch +++ b/xend-tools-watchdog-support.patch @@ -129,7 +129,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c =================================================================== --- xen-4.4.0-testing.orig/tools/libxl/xl_cmdimpl.c +++ xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c -@@ -1726,6 +1726,8 @@ skip_vfb: +@@ -1737,6 +1737,8 @@ skip_vfb: xlu_cfg_replace_string (config, "soundhw", &b_info->u.hvm.soundhw, 0); xlu_cfg_get_defbool(config, "xen_platform_pci", &b_info->u.hvm.xen_platform_pci, 0);