- Upstream patches from Jan

530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch 530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch 530b28c5-x86-MSI-don-t-risk-division-by-zero.patch 530c54c3-x86-mce-Reduce-boot-time-logspam.patch 5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch 5315a254-IOMMU-generalize-and-correct-softirq-processing.patch 5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch 5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch 531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch 531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch 531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch 531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch 531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch - Add conversion tool for migrating xend/xm managed VMs to libvirt xen2libvirt.py (Jim Fehlig) OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=304
2014-03-13 23:46:35 +00:00 · 2014-03-13 23:46:35 +00:00 · e46082b3ea
commit e46082b3ea
parent a785b8c089
16 changed files with 876 additions and 0 deletions
--- a/530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
+++ b/530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
@ -0,0 +1,188 @@
+# Commit 60ea3a3ac3d2bcd8e85b250fdbfc46b3b9dc7085
+# Date 2014-02-24 12:07:41 +0100
+# Author Frediano Ziglio <frediano.ziglio@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/MCE: Fix race condition in mctelem_reserve
+
+These lines (in mctelem_reserve)
+
+        newhead = oldhead->mcte_next;
+        if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
+
+are racy. After you read the newhead pointer it can happen that another
+flow (thread or recursive invocation) change all the list but set head
+with same value. So oldhead is the same as *freelp but you are setting
+a new head that could point to whatever element (even already used).
+
+This patch use instead a bit array and atomic bit operations.
+
+Signed-off-by: Frediano Ziglio <frediano.ziglio@citrix.com>
+Reviewed-by: Liu Jinsong <jinsong.liu@intel.com>
+
+--- a/xen/arch/x86/cpu/mcheck/mctelem.c
+++ b/xen/arch/x86/cpu/mcheck/mctelem.c
+@@ -37,24 +37,19 @@ struct mctelem_ent {
+ 	void *mcte_data;		/* corresponding data payload */
+ };
+ 
+-#define	MCTE_F_HOME_URGENT		0x0001U	/* free to urgent freelist */
+-#define	MCTE_F_HOME_NONURGENT		0x0002U /* free to nonurgent freelist */
+-#define	MCTE_F_CLASS_URGENT		0x0004U /* in use - urgent errors */
+-#define	MCTE_F_CLASS_NONURGENT		0x0008U /* in use - nonurgent errors */
+#define	MCTE_F_CLASS_URGENT		0x0001U /* in use - urgent errors */
+#define	MCTE_F_CLASS_NONURGENT		0x0002U /* in use - nonurgent errors */
+ #define	MCTE_F_STATE_FREE		0x0010U	/* on a freelist */
+ #define	MCTE_F_STATE_UNCOMMITTED	0x0020U	/* reserved; on no list */
+ #define	MCTE_F_STATE_COMMITTED		0x0040U	/* on a committed list */
+ #define	MCTE_F_STATE_PROCESSING		0x0080U	/* on a processing list */
+ 
+-#define	MCTE_F_MASK_HOME	(MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
+ #define	MCTE_F_MASK_CLASS	(MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
+ #define	MCTE_F_MASK_STATE	(MCTE_F_STATE_FREE | \
+ 				MCTE_F_STATE_UNCOMMITTED | \
+ 				MCTE_F_STATE_COMMITTED | \
+ 				MCTE_F_STATE_PROCESSING)
+ 
+-#define	MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
+-
+ #define	MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
+ #define	MCTE_SET_CLASS(tep, new) do { \
+     (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
+@@ -69,6 +64,8 @@ struct mctelem_ent {
+ #define	MC_URGENT_NENT		10
+ #define	MC_NONURGENT_NENT	20
+ 
+#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT)
+
+ #define	MC_NCLASSES		(MC_NONURGENT + 1)
+ 
+ #define	COOKIE2MCTE(c)		((struct mctelem_ent *)(c))
+@@ -77,11 +74,9 @@ struct mctelem_ent {
+ static struct mc_telem_ctl {
+ 	/* Linked lists that thread the array members together.
+ 	 *
+-	 * The free lists are singly-linked via mcte_next, and we allocate
+-	 * from them by atomically unlinking an element from the head.
+-	 * Consumed entries are returned to the head of the free list.
+-	 * When an entry is reserved off the free list it is not linked
+-	 * on any list until it is committed or dismissed.
+	 * The free lists is a bit array where bit 1 means free.
+	 * This as element number is quite small and is easy to
+	 * atomically allocate that way.
+ 	 *
+ 	 * The committed list grows at the head and we do not maintain a
+ 	 * tail pointer; insertions are performed atomically.  The head
+@@ -101,7 +96,7 @@ static struct mc_telem_ctl {
+ 	 * we can lock it for updates.  The head of the processing list
+ 	 * always has the oldest telemetry, and we append (as above)
+ 	 * at the tail of the processing list. */
+-	struct mctelem_ent *mctc_free[MC_NCLASSES];
+	DECLARE_BITMAP(mctc_free, MC_NENT);
+ 	struct mctelem_ent *mctc_committed[MC_NCLASSES];
+ 	struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
+ 	struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
+@@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cp
+  */
+ static void mctelem_free(struct mctelem_ent *tep)
+ {
+-	mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
+-	    MC_URGENT : MC_NONURGENT;
+-
+ 	BUG_ON(tep->mcte_refcnt != 0);
+ 	BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
+ 
+ 	tep->mcte_prev = NULL;
+-	mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
+	tep->mcte_next = NULL;
+
+	/* set free in array */
+	set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free);
+ }
+ 
+ /* Increment the reference count of an entry that is not linked on to
+@@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz)
+ 	}
+ 
+ 	if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
+-	    MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
+-	    (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
+-	    datasz)) == NULL) {
+	    MC_NENT)) == NULL ||
+	    (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) {
+ 		if (mctctl.mctc_elems)
+ 			xfree(mctctl.mctc_elems);
+ 		printk("Allocations for MCA telemetry failed\n");
+ 		return;
+ 	}
+ 
+-	for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
+-		struct mctelem_ent *tep, **tepp;
+	for (i = 0; i < MC_NENT; i++) {
+		struct mctelem_ent *tep;
+ 
+ 		tep = mctctl.mctc_elems + i;
+ 		tep->mcte_flags = MCTE_F_STATE_FREE;
+ 		tep->mcte_refcnt = 0;
+ 		tep->mcte_data = datarr + i * datasz;
+ 
+-		if (i < MC_URGENT_NENT) {
+-			tepp = &mctctl.mctc_free[MC_URGENT];
+-			tep->mcte_flags |= MCTE_F_HOME_URGENT;
+-		} else {
+-			tepp = &mctctl.mctc_free[MC_NONURGENT];
+-			tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
+-		}
+-
+-		tep->mcte_next = *tepp;
+		__set_bit(i, mctctl.mctc_free);
+		tep->mcte_next = NULL;
+ 		tep->mcte_prev = NULL;
+-		*tepp = tep;
+ 	}
+ }
+ 
+@@ -310,32 +296,25 @@ static int mctelem_drop_count;
+ 
+ /* Reserve a telemetry entry, or return NULL if none available.
+  * If we return an entry then the caller must subsequently call exactly one of
+- * mctelem_unreserve or mctelem_commit for that entry.
+ * mctelem_dismiss or mctelem_commit for that entry.
+  */
+ mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
+ {
+-	struct mctelem_ent **freelp;
+-	struct mctelem_ent *oldhead, *newhead;
+-	mctelem_class_t target = (which == MC_URGENT) ?
+-	    MC_URGENT : MC_NONURGENT;
+	unsigned bit;
+	unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT;
+ 
+-	freelp = &mctctl.mctc_free[target];
+ 	for (;;) {
+-		if ((oldhead = *freelp) == NULL) {
+-			if (which == MC_URGENT && target == MC_URGENT) {
+-				/* raid the non-urgent freelist */
+-				target = MC_NONURGENT;
+-				freelp = &mctctl.mctc_free[target];
+-				continue;
+-			} else {
+-				mctelem_drop_count++;
+-				return (NULL);
+-			}
+		bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit);
+
+		if (bit >= MC_NENT) {
+			mctelem_drop_count++;
+			return (NULL);
+ 		}
+ 
+-		newhead = oldhead->mcte_next;
+-		if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
+-			struct mctelem_ent *tep = oldhead;
+		/* try to allocate, atomically clear free bit */
+		if (test_and_clear_bit(bit, mctctl.mctc_free)) {
+			/* return element we got */
+			struct mctelem_ent *tep = mctctl.mctc_elems + bit;
+ 
+ 			mctelem_hold(tep);
+ 			MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
--- a/530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
+++ b/530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
@ -0,0 +1,29 @@
+# Commit fd1864f48d8914fb8eeb6841cd08c2c09b368909
+# Date 2014-02-24 12:09:52 +0100
+# Author Yang Zhang <yang.z.zhang@Intel.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+Nested VMX: update nested paging mode on vmexit
+
+Since SVM and VMX use different mechanism to emulate the virtual-vmentry
+and virtual-vmexit, it's hard to update the nested paging mode correctly in
+common code. So we need to update the nested paging mode in their respective
+code path.
+SVM already updates the nested paging mode on vmexit. This patch adds the same
+logic in VMX side.
+
+Previous discussion is here:
+http://lists.xen.org/archives/html/xen-devel/2013-12/msg01759.html
+
+Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
+Reviewed-by: Christoph Egger <chegger@amazon.de>
+
+--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -2541,6 +2541,7 @@ void vmx_vmexit_handler(struct cpu_user_
+     vcpu_nestedhvm(v).nv_vmswitch_in_progress = 0;
+     if ( nestedhvm_vcpu_in_guestmode(v) )
+     {
+        paging_update_nestedmode(v);
+         if ( nvmx_n2_vmexit_handler(regs, exit_reason) )
+             goto out;
+     }
--- a/530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
+++ b/530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
@ -0,0 +1,24 @@
+# Commit 5d160d913e03b581bdddde73535c18ac670cf0a9
+# Date 2014-02-24 12:11:01 +0100
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/MSI: don't risk division by zero
+
+The check in question is redundant with the one in the immediately
+following if(), where dividing by zero gets carefully avoided.
+
+Spotted-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+
+--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
+@@ -636,7 +636,7 @@ static u64 read_pci_mem_bar(u16 seg, u8 
+             return 0;
+         base = pos + PCI_SRIOV_BAR;
+         vf -= PCI_BDF(bus, slot, func) + offset;
+-        if ( vf < 0 || (vf && vf % stride) )
+        if ( vf < 0 )
+             return 0;
+         if ( stride )
+         {
--- a/530c54c3-x86-mce-Reduce-boot-time-logspam.patch
+++ b/530c54c3-x86-mce-Reduce-boot-time-logspam.patch
@ -0,0 +1,27 @@
+# Commit a5ab9c9fa29cda7e1b18dbcaa69a5dbded96de32
+# Date 2014-02-25 09:30:59 +0100
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/mce: Reduce boot-time logspam
+
+When booting with "no-mce", the user does not need to be told that "MCE
+support [was] disabled by bootparam" for each cpu.  Furthermore, a file:line
+reference is unnecessary.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
+@@ -729,8 +729,10 @@ void mcheck_init(struct cpuinfo_x86 *c, 
+ {
+     enum mcheck_type inited = mcheck_none;
+ 
+-    if (mce_disabled == 1) {
+-        dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
+    if ( mce_disabled )
+    {
+        if ( bsp )
+            printk(XENLOG_INFO "MCE support disabled by bootparam\n");
+         return;
+     }
+ 
--- a/5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
+++ b/5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
@ -0,0 +1,32 @@
+# Commit 79de2d31f1ff8910231b7ec15519405953e6571a
+# Date 2014-02-28 17:35:15 +0100
+# Author Wei Liu <wei.liu2@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+mm: ensure useful progress in decrease_reservation
+
+During my fun time playing with balloon driver I found that hypervisor's
+preemption check kept decrease_reservation from doing any useful work
+for 32 bit guests, resulting in hanging the guests.
+
+As Andrew suggested, we can force the check to fail for the first
+iteration to ensure progress. We did this in d3a55d7d9 "x86/mm: Ensure
+useful progress in alloc_l2_table()" already.
+
+After this change I cannot see the hang caused by continuation logic
+anymore.
+
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/common/memory.c
+++ b/xen/common/memory.c
+@@ -268,7 +268,7 @@ static void decrease_reservation(struct 
+ 
+     for ( i = a->nr_done; i < a->nr_extents; i++ )
+     {
+-        if ( hypercall_preempt_check() )
+        if ( hypercall_preempt_check() && i != a->nr_done )
+         {
+             a->preempted = 1;
+             goto out;
--- a/5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
+++ b/5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
@ -0,0 +1,85 @@
+References: bnc#858178
+
+# Commit 9ef5aa944a6a0df7f5938983043c7e46f158bbc6
+# Date 2014-03-04 10:52:20 +0100
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+IOMMU: generalize and correct softirq processing during Dom0 device setup
+
+c/s 21039:95f5a4ce8f24 ("VT-d: reduce default verbosity") having put a
+call to process_pending_softirqs() in VT-d's domain_context_mapping()
+was wrong in two ways: For one we shouldn't be doing this when setting
+up a device during DomU assignment. And then - I didn't check whether
+that was the case already back then - we shouldn't call that function
+with the pcidevs_lock (or in fact any spin lock) held.
+
+Move the "preemption" into generic code, at once dealing with further
+actual (too much output elsewhere - particularly on systems with very
+many host bridge like devices - having been observed to still cause the
+watchdog to trigger when enabled) and potential (other IOMMU code may
+also end up being too verbose) issues.
+
+Do the "preemption" once per device actually being set up when in
+verbose mode, and once per bus otherwise.
+
+Note that dropping pcidevs_lock around the process_pending_softirqs()
+invocation is specifically not a problem here: We're in an __init
+function and aren't racing with potential additions/removals of PCI
+devices. Not acquiring the lock in setup_dom0_pci_devices() otoh is not
+an option, as there are too many places that assert the lock being
+held.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
+
+--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
+@@ -27,6 +27,7 @@
+ #include <xen/delay.h>
+ #include <xen/keyhandler.h>
+ #include <xen/radix-tree.h>
+#include <xen/softirq.h>
+ #include <xen/tasklet.h>
+ #include <xsm/xsm.h>
+ #include <asm/msi.h>
+@@ -922,6 +923,20 @@ static int __init _setup_dom0_pci_device
+                 printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
+                        pdev->domain->domain_id, pseg->nr, bus,
+                        PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+            if ( iommu_verbose )
+            {
+                spin_unlock(&pcidevs_lock);
+                process_pending_softirqs();
+                spin_lock(&pcidevs_lock);
+            }
+        }
+
+        if ( !iommu_verbose )
+        {
+            spin_unlock(&pcidevs_lock);
+            process_pending_softirqs();
+            spin_lock(&pcidevs_lock);
+         }
+     }
+ 
+--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -31,7 +31,6 @@
+ #include <xen/pci.h>
+ #include <xen/pci_regs.h>
+ #include <xen/keyhandler.h>
+-#include <xen/softirq.h>
+ #include <asm/msi.h>
+ #include <asm/irq.h>
+ #include <asm/hvm/vmx/vmx.h>
+@@ -1494,9 +1493,6 @@ static int domain_context_mapping(
+         break;
+     }
+ 
+-    if ( iommu_verbose )
+-        process_pending_softirqs();
+-
+     return ret;
+ }
+ 
--- a/5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
+++ b/5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
@ -0,0 +1,59 @@
+# Commit 8db22866dbbcabf30ad6e3814489c730b53d1cf4
+# Date 2014-03-04 10:58:19 +0100
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86: don't propagate acpi_skip_timer_override do Dom0
+
+It's unclear why c/s 4850:923dd9975981 added this - Dom0 isn't
+controlling the timer interrupt, and hence has no need to know.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Keir Fraser <keir@xen.org>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
+@@ -56,7 +56,9 @@ bool_t __initdata acpi_ht = 1;	/* enable
+ bool_t __initdata acpi_lapic;
+ bool_t __initdata acpi_ioapic;
+ 
+-bool_t acpi_skip_timer_override __initdata;
+/* acpi_skip_timer_override: Skip IRQ0 overrides. */
+static bool_t acpi_skip_timer_override __initdata;
+boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
+@@ -71,10 +71,6 @@ static void parse_acpi_param(char *s);
+ custom_param("acpi", parse_acpi_param);
+ 
+ /* **** Linux config option: propagated to domain0. */
+-/* acpi_skip_timer_override: Skip IRQ0 overrides. */
+-boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
+-
+-/* **** Linux config option: propagated to domain0. */
+ /* noapic: Disable IOAPIC setup. */
+ boolean_param("noapic", skip_ioapic_setup);
+ 
+@@ -1365,9 +1361,6 @@ void __init __start_xen(unsigned long mb
+         /* Append any extra parameters. */
+         if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
+             safe_strcat(dom0_cmdline, " noapic");
+-        if ( acpi_skip_timer_override &&
+-             !strstr(dom0_cmdline, "acpi_skip_timer_override") )
+-            safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
+         if ( (strlen(acpi_param) == 0) && acpi_disabled )
+         {
+             printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
+--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
+@@ -80,7 +80,6 @@ int __acpi_release_global_lock(unsigned 
+ 
+ extern bool_t acpi_lapic, acpi_ioapic, acpi_noirq;
+ extern bool_t acpi_force, acpi_ht, acpi_disabled;
+-extern bool_t acpi_skip_timer_override;
+ extern u32 acpi_smi_cmd;
+ extern u8 acpi_enable_value, acpi_disable_value;
+ void acpi_pic_sci_set_trigger(unsigned int, u16);
--- a/5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
+++ b/5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
@ -0,0 +1,32 @@
+# Commit 7acf827b951b4e8501a777676ddf050d200103a4
+# Date 2014-03-04 11:00:26 +0100
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/ACPI: also print address space for PM1x fields
+
+At least one vendor is in the process of making systems available where
+these live in MMIO, not in I/O port space.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
+@@ -404,11 +404,15 @@ acpi_fadt_parse_sleep_info(struct acpi_t
+ 	acpi_fadt_copy_address(pm1b_evt, pm1b_event, pm1_event);
+ 
+ 	printk(KERN_INFO PREFIX
+-	       "SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], "
+-	       "pm1x_evt[%"PRIx64",%"PRIx64"]\n",
+	       "SLEEP INFO: pm1x_cnt[%d:%"PRIx64",%d:%"PRIx64"], "
+	       "pm1x_evt[%d:%"PRIx64",%d:%"PRIx64"]\n",
+	       acpi_sinfo.pm1a_cnt_blk.space_id,
+ 	       acpi_sinfo.pm1a_cnt_blk.address,
+	       acpi_sinfo.pm1b_cnt_blk.space_id,
+ 	       acpi_sinfo.pm1b_cnt_blk.address,
+	       acpi_sinfo.pm1a_evt_blk.space_id,
+ 	       acpi_sinfo.pm1a_evt_blk.address,
+	       acpi_sinfo.pm1b_evt_blk.space_id,
+ 	       acpi_sinfo.pm1b_evt_blk.address);
+ 
+ 	/* Now FACS... */
--- a/531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
+++ b/531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
@ -0,0 +1,56 @@
+# Commit cadfd7bca999c0a795dc27be72d43c92e8943a0b
+# Date 2014-03-10 11:02:25 +0100
+# Author Dongxiao Xu <dongxiao.xu@intel.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/hvm: refine the judgment on IDENT_PT for EMT
+
+When trying to get the EPT EMT type, the judgment on
+HVM_PARAM_IDENT_PT is not correct which always returns WB type if
+the parameter is not set. Remove the related code.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
+
+We can't fully drop the dependency yet, but we should certainly avoid
+overriding cases already properly handled. The reason for this is that
+the guest setting up its MTRRs happens _after_ the EPT tables got
+already constructed, and no code is in place to propagate this to the
+EPT code. Without this check we're forcing the guest to run with all of
+its memory uncachable until something happens to re-write every single
+EPT entry. But of course this has to be just a temporary solution.
+
+In the same spirit we should defer the "very early" (when the guest is
+still being constructed and has no vCPU yet) override to the last
+possible point.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
+@@ -689,13 +689,8 @@ uint8_t epte_get_entry_emt(struct domain
+ 
+     *ipat = 0;
+ 
+-    if ( (current->domain != d) &&
+-         ((d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL)) )
+-        return MTRR_TYPE_WRBACK;
+-
+-    if ( !is_pvh_vcpu(v) &&
+-         !v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] )
+-        return MTRR_TYPE_WRBACK;
+    if ( v->domain != d )
+        v = d->vcpu ? d->vcpu[0] : NULL;
+ 
+     if ( !mfn_valid(mfn_x(mfn)) )
+         return MTRR_TYPE_UNCACHABLE;
+@@ -718,7 +713,8 @@ uint8_t epte_get_entry_emt(struct domain
+         return MTRR_TYPE_WRBACK;
+     }
+ 
+-    gmtrr_mtype = is_hvm_vcpu(v) ?
+    gmtrr_mtype = is_hvm_domain(d) && v &&
+                  d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
+                   get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
+                   MTRR_TYPE_WRBACK;
+ 
--- a/531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
+++ b/531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
@ -0,0 +1,54 @@
+# Commit b99113b9d5fac5149de8496f55afa00e285b1ff3
+# Date 2014-03-10 11:03:53 +0100
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/HVM: fix memory type merging in epte_get_entry_emt()
+
+Using the minimum numeric value of guest and host specified memory
+types is too simplistic - it works only correctly for a subset of
+types. It is in particular the WT/WP combination that needs conversion
+to UC if the two types conflict.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
+@@ -719,5 +719,35 @@ uint8_t epte_get_entry_emt(struct domain
+                   MTRR_TYPE_WRBACK;
+ 
+     hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT));
+-    return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
+
+    /* If both types match we're fine. */
+    if ( likely(gmtrr_mtype == hmtrr_mtype) )
+        return hmtrr_mtype;
+
+    /* If either type is UC, we have to go with that one. */
+    if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
+         hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
+        return MTRR_TYPE_UNCACHABLE;
+
+    /* If either type is WB, we have to go with the other one. */
+    if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
+        return hmtrr_mtype;
+    if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
+        return gmtrr_mtype;
+
+    /*
+     * At this point we have disagreeing WC, WT, or WP types. The only
+     * combination that can be cleanly resolved is WT:WP. The ones involving
+     * WC need to be converted to UC, both due to the memory ordering
+     * differences and because WC disallows reads to be cached (WT and WP
+     * permit this), while WT and WP require writes to go straight to memory
+     * (WC can buffer them).
+     */
+    if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
+          hmtrr_mtype == MTRR_TYPE_WRPROT) ||
+         (gmtrr_mtype == MTRR_TYPE_WRPROT &&
+          hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
+        return MTRR_TYPE_WRPROT;
+
+    return MTRR_TYPE_UNCACHABLE;
+ }
--- a/531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
+++ b/531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
@ -0,0 +1,59 @@
+# Commit 3089a6d82bdf3112ccb1dd074ce34a8cbdc4ccd8
+# Date 2014-03-10 11:04:36 +0100
+# Author Jan Beulich <jbeulich@suse.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+x86/HVM: consolidate passthrough handling in epte_get_entry_emt()
+
+It is inconsistent to depend on iommu_enabled alone: For a guest
+without devices passed through to it, it is of no concern whether the
+IOMMU is enabled.
+
+There's one rather special case to take care of: VMX code marks the
+LAPIC access page as MMIO. The added assertion needs to take this into
+consideration, and the subsequent handling of the direct MMIO case was
+inconsistent too: That page would have been WB in the absence of an
+IOMMU, but UC in the presence of it, while in fact the cachabilty of
+this page is entirely unrelated to an IOMMU being in use.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
+@@ -698,14 +698,20 @@ uint8_t epte_get_entry_emt(struct domain
+     if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
+         return type;
+ 
+-    if ( !iommu_enabled )
+    if ( !iommu_enabled ||
+         (rangeset_is_empty(d->iomem_caps) &&
+          rangeset_is_empty(d->arch.ioport_caps) &&
+          !has_arch_pdevs(d)) )
+     {
+        ASSERT(!direct_mmio ||
+               mfn_x(mfn) == d->arch.hvm_domain.vmx.apic_access_mfn);
+         *ipat = 1;
+         return MTRR_TYPE_WRBACK;
+     }
+ 
+     if ( direct_mmio )
+-        return MTRR_TYPE_UNCACHABLE;
+        return mfn_x(mfn) != d->arch.hvm_domain.vmx.apic_access_mfn
+               ? MTRR_TYPE_UNCACHABLE : MTRR_TYPE_WRBACK;
+ 
+     if ( iommu_snoop )
+     {
+--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -2038,9 +2038,9 @@ static int vmx_alloc_vlapic_mapping(stru
+     if ( apic_va == NULL )
+         return -ENOMEM;
+     share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
+    d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
+     set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
+         _mfn(virt_to_mfn(apic_va)));
+-    d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
+ 
+     return 0;
+ }
--- a/531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
+++ b/531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
@ -0,0 +1,36 @@
+# Commit 4509ada6ba1f09cc8f4fa23e009e7e5a963b6086
+# Date 2014-03-10 11:11:28 +0100
+# Author Andrew Cooper <andrew.cooper3@citrix.com>
+# Committer Jan Beulich <jbeulich@suse.com>
+kexec: identify which cpu the kexec image is being executed on
+
+A patch to this effect has been in XenServer for a little while, and has
+proved to be a useful debugging point for servers which have different
+behaviours depending when crashing on the non-bootstrap processor.
+
+Moving the printk() from kexec_panic() to one_cpu_only() means that it will
+only be printed for the cpu which wins the race along the kexec path.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: David Vrabel <david.vrabel@citrix.com>
+
+--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
+@@ -265,6 +265,8 @@ static int noinline one_cpu_only(void)
+     }
+ 
+     set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags);
+    printk("Executing kexec image on cpu%u\n", cpu);
+
+     return 0;
+ }
+ 
+@@ -340,8 +342,6 @@ void kexec_crash(void)
+     if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
+         return;
+ 
+-    printk("Executing crash image\n");
+-
+     kexecing = TRUE;
+ 
+     if ( kexec_common_shutdown() != 0 )
--- a/531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
+++ b/531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
@ -0,0 +1,29 @@
+# Commit ac2cba2901779f66bbfab298faa15c956e91393a
+# Date 2014-03-10 14:40:50 +0100
+# Author Julien Grall <julien.grall@linaro.org>
+# Committer Jan Beulich <jbeulich@suse.com>
+xmalloc: handle correctly page allocation when align > size
+
+When align is superior to size, we need to retrieve the order from
+align during multiple page allocation. I guess it was the goal of the commit
+fb034f42 "xmalloc: make close-to-PAGE_SIZE allocations more efficient".
+
+Signed-off-by: Julien Grall <julien.grall@linaro.org>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/common/xmalloc_tlsf.c
+++ b/xen/common/xmalloc_tlsf.c
+@@ -527,11 +527,10 @@ static void xmalloc_pool_put(void *p)
+ 
+ static void *xmalloc_whole_pages(unsigned long size, unsigned long align)
+ {
+-    unsigned int i, order = get_order_from_bytes(size);
+    unsigned int i, order;
+     void *res, *p;
+ 
+-    if ( align > size )
+-        get_order_from_bytes(align);
+    order = get_order_from_bytes(max(align, size));
+ 
+     res = alloc_xenheap_pages(order, 0);
+     if ( res == NULL )
--- a/xen.changes
+++ b/xen.changes
@ -1,3 +1,27 @@
+-------------------------------------------------------------------
+Wed Mar 12 08:20:42 MDT 2014 - carnold@suse.com
+
+- Upstream patches from Jan
+  530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
+  530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
+  530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
+  530c54c3-x86-mce-Reduce-boot-time-logspam.patch
+  5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
+  5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
+  5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
+  5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
+  531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
+  531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
+  531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
+  531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
+  531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
+
+-------------------------------------------------------------------
+Tue Mar 11 16:21:11 MDT 2014 - carnold@suse.com
+
+- Add conversion tool for migrating xend/xm managed VMs to libvirt 
+  xen2libvirt.py (Jim Fehlig)
+
 -------------------------------------------------------------------
 Mon Mar 10 07:17:17 MDT 2014 - carnold@suse.com

--- a/xen.spec
+++ b/xen.spec
@ -177,6 +177,7 @@ Source32:       xen-updown.sh
 Source34:       init.pciback
 Source35:       sysconfig.pciback
 Source36:       xnloader.py
+Source37:       xen2libvirt.py
 # Systemd service files
 Source41:       xencommons.service
 Source42:       xendomains.service
@ -197,6 +198,19 @@ Source99:       baselibs.conf
 # http://xenbits.xensource.com/ext/xenalyze
 Source20000:    xenalyze.hg.tar.bz2
 # Upstream patches
+Patch1:         530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
+Patch2:         530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
+Patch3:         530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
+Patch4:         530c54c3-x86-mce-Reduce-boot-time-logspam.patch
+Patch5:         5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
+Patch6:         5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
+Patch7:         5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
+Patch8:         5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
+Patch9:         531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
+Patch10:        531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
+Patch11:        531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
+Patch12:        531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
+Patch13:        531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
 # Upstream qemu
 Patch250:       VNC-Support-for-ExtendedKeyEvent-client-message.patch
 Patch251:       0001-net-move-the-tap-buffer-into-TAPState.patch
@ -550,6 +564,19 @@ Authors:
 %prep
 %setup -q -n %xen_build_dir -a 1 -a 2 -a 3 -a 4 -a 5 -a 57 -a 20000
 # Upstream patches
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
+%patch13 -p1
 # Upstream qemu patches
 %patch250 -p1
 %patch251 -p1
@ -888,6 +915,7 @@ mkdir -p %{buildroot}%{_unitdir}
 install -m 644 %{SOURCE56} %{buildroot}%{_unitdir}/xend.service
 %endif
 %endif
+install -m755 %SOURCE37 $RPM_BUILD_ROOT/usr/sbin/xen2libvirt

 # Example config
 mkdir -p $RPM_BUILD_ROOT/etc/xen/{vm,examples,scripts}
@ -1094,6 +1122,7 @@ rm -f  $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper
 /usr/sbin/gdbsx
 %endif
 /usr/sbin/xl
+/usr/sbin/xen2libvirt
 %ifarch %ix86 x86_64
 /usr/sbin/xen-hptool
 /usr/sbin/xen-hvmcrash
--- a/xen2libvirt.py
+++ b/xen2libvirt.py
@ -0,0 +1,113 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library.  If not, see
+# <http://www.gnu.org/licenses/>.
+#
+# Authors:
+#     Jim Fehlig <jfehlig@suse.com>
+#
+# Read native Xen configuration format, convert to libvirt domXML, and
+# import (virsh define <xml>) into libvirt.
+
+import sys
+import os
+import argparse
+import re
+
+try:
+    import libvirt
+except ImportError:
+    print 'Unable to import the libvirt module.  Is libvirt-python installed?'
+    sys.exit(1)
+
+parser = argparse.ArgumentParser(description='Import Xen domain configuration into libvirt')
+parser.add_argument('-c', '--convert-only', help='Convert Xen domain configuration into libvirt domXML, but do not import into libvirt', action='store_true', dest='convert_only')
+parser.add_argument('-r', '--recursive', help='Operate recursivelly on all Xen domain configuration rooted at path', action='store_true')
+parser.add_argument('-f', '--format', help='Format of Xen domain configuration.  Supported formats are xm and sexpr', choices=['xm', 'sexpr'], default=None)
+parser.add_argument('-v', '--verbose', help='Print information about the import process', action='store_true')
+parser.add_argument('path', help='Path to Xen domain configuration')
+
+
+def print_verbose(msg):
+    if args.verbose:
+        print msg
+
+
+def check_config(path, config):
+    isbinary = os.system('file -b ' + path + ' | grep text > /dev/null')
+
+    if isbinary:
+        print 'File %s is not a text file containing Xen xm or sexpr configuration'
+        sys.exit(1)
+
+    if config.find('\(domain'):
+        return 'sexpr'
+    return 'xm'
+
+
+def import_domain(conn, path, format=None, convert_only=False):
+
+    f = open(path, 'r')
+    config = f.read()
+    print_verbose('Xen domain configuration read from %s:\n %s' % (path, config))
+    if format is None:
+        format = check_config(path, config)
+
+    if format == 'sexpr':
+        print_verbose('scrubbing domin from configuration')
+        config = re.sub("\(domid [0-9]*\)", "", config)
+        print_verbose('scrubbed sexpr:\n %s' % config)
+        xml = conn.domainXMLFromNative('xen-sxpr', config, 0)
+    else:
+        # if format != sexpr, try xm
+        xml = conn.domainXMLFromNative('xen-xm', config, 0)
+
+    f.close()
+
+    print_verbose('Successfully converted Xen domain configuration to '
+                  'libvirt domXML:\n %s' % xml)
+    if convert_only:
+        print xml
+    else:
+        print_verbose('Importing converted libvirt domXML into libvirt...')
+        dom = conn.defineXML(xml)
+        if dom is None:
+            print 'Failed to define domain from converted domXML'
+            sys.exit(1)
+        print_verbose('domXML successfully imported into libvirt')
+
+
+args = parser.parse_args()
+path = args.path
+
+# Connect to libvirt
+conn = libvirt.open(None)
+if conn is None:
+    print('Failed to open connection to the hypervisor')
+    sys.exit(1)
+
+if args.recursive:
+    try:
+        for root, dirs, files in os.walk(path):
+            for name in files:
+                abs_name = os.path.join(root, name)
+                print_verbose('Processing file %s' % abs_name)
+                import_domain(conn, abs_name, args.format, args.convert_only)
+    except IOError:
+        print('Failed to open/read path %s' % path)
+        sys.exit(1)
+else:
+    import_domain(conn, args.path, args.format, args.convert_only)