- Upstream patches from Jan

530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
  530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
  530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
  530c54c3-x86-mce-Reduce-boot-time-logspam.patch
  5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
  5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
  5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
  5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
  531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
  531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
  531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
  531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
  531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch

- Add conversion tool for migrating xend/xm managed VMs to libvirt 
  xen2libvirt.py (Jim Fehlig)

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=304
This commit is contained in:
Charles Arnold 2014-03-13 23:46:35 +00:00 committed by Git OBS Bridge
parent a785b8c089
commit e46082b3ea
16 changed files with 876 additions and 0 deletions

View File

@ -0,0 +1,188 @@
# Commit 60ea3a3ac3d2bcd8e85b250fdbfc46b3b9dc7085
# Date 2014-02-24 12:07:41 +0100
# Author Frediano Ziglio <frediano.ziglio@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MCE: Fix race condition in mctelem_reserve
These lines (in mctelem_reserve)
newhead = oldhead->mcte_next;
if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
are racy. After you read the newhead pointer it can happen that another
flow (thread or recursive invocation) change all the list but set head
with same value. So oldhead is the same as *freelp but you are setting
a new head that could point to whatever element (even already used).
This patch use instead a bit array and atomic bit operations.
Signed-off-by: Frediano Ziglio <frediano.ziglio@citrix.com>
Reviewed-by: Liu Jinsong <jinsong.liu@intel.com>
--- a/xen/arch/x86/cpu/mcheck/mctelem.c
+++ b/xen/arch/x86/cpu/mcheck/mctelem.c
@@ -37,24 +37,19 @@ struct mctelem_ent {
void *mcte_data; /* corresponding data payload */
};
-#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent freelist */
-#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */
-#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */
-#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */
+#define MCTE_F_CLASS_URGENT 0x0001U /* in use - urgent errors */
+#define MCTE_F_CLASS_NONURGENT 0x0002U /* in use - nonurgent errors */
#define MCTE_F_STATE_FREE 0x0010U /* on a freelist */
#define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */
#define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */
#define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */
-#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
#define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
#define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \
MCTE_F_STATE_UNCOMMITTED | \
MCTE_F_STATE_COMMITTED | \
MCTE_F_STATE_PROCESSING)
-#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
-
#define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
#define MCTE_SET_CLASS(tep, new) do { \
(tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
@@ -69,6 +64,8 @@ struct mctelem_ent {
#define MC_URGENT_NENT 10
#define MC_NONURGENT_NENT 20
+#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT)
+
#define MC_NCLASSES (MC_NONURGENT + 1)
#define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
@@ -77,11 +74,9 @@ struct mctelem_ent {
static struct mc_telem_ctl {
/* Linked lists that thread the array members together.
*
- * The free lists are singly-linked via mcte_next, and we allocate
- * from them by atomically unlinking an element from the head.
- * Consumed entries are returned to the head of the free list.
- * When an entry is reserved off the free list it is not linked
- * on any list until it is committed or dismissed.
+ * The free lists is a bit array where bit 1 means free.
+ * This as element number is quite small and is easy to
+ * atomically allocate that way.
*
* The committed list grows at the head and we do not maintain a
* tail pointer; insertions are performed atomically. The head
@@ -101,7 +96,7 @@ static struct mc_telem_ctl {
* we can lock it for updates. The head of the processing list
* always has the oldest telemetry, and we append (as above)
* at the tail of the processing list. */
- struct mctelem_ent *mctc_free[MC_NCLASSES];
+ DECLARE_BITMAP(mctc_free, MC_NENT);
struct mctelem_ent *mctc_committed[MC_NCLASSES];
struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
@@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cp
*/
static void mctelem_free(struct mctelem_ent *tep)
{
- mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
- MC_URGENT : MC_NONURGENT;
-
BUG_ON(tep->mcte_refcnt != 0);
BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
tep->mcte_prev = NULL;
- mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
+ tep->mcte_next = NULL;
+
+ /* set free in array */
+ set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free);
}
/* Increment the reference count of an entry that is not linked on to
@@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz)
}
if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
- MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
- (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
- datasz)) == NULL) {
+ MC_NENT)) == NULL ||
+ (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) {
if (mctctl.mctc_elems)
xfree(mctctl.mctc_elems);
printk("Allocations for MCA telemetry failed\n");
return;
}
- for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
- struct mctelem_ent *tep, **tepp;
+ for (i = 0; i < MC_NENT; i++) {
+ struct mctelem_ent *tep;
tep = mctctl.mctc_elems + i;
tep->mcte_flags = MCTE_F_STATE_FREE;
tep->mcte_refcnt = 0;
tep->mcte_data = datarr + i * datasz;
- if (i < MC_URGENT_NENT) {
- tepp = &mctctl.mctc_free[MC_URGENT];
- tep->mcte_flags |= MCTE_F_HOME_URGENT;
- } else {
- tepp = &mctctl.mctc_free[MC_NONURGENT];
- tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
- }
-
- tep->mcte_next = *tepp;
+ __set_bit(i, mctctl.mctc_free);
+ tep->mcte_next = NULL;
tep->mcte_prev = NULL;
- *tepp = tep;
}
}
@@ -310,32 +296,25 @@ static int mctelem_drop_count;
/* Reserve a telemetry entry, or return NULL if none available.
* If we return an entry then the caller must subsequently call exactly one of
- * mctelem_unreserve or mctelem_commit for that entry.
+ * mctelem_dismiss or mctelem_commit for that entry.
*/
mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
{
- struct mctelem_ent **freelp;
- struct mctelem_ent *oldhead, *newhead;
- mctelem_class_t target = (which == MC_URGENT) ?
- MC_URGENT : MC_NONURGENT;
+ unsigned bit;
+ unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT;
- freelp = &mctctl.mctc_free[target];
for (;;) {
- if ((oldhead = *freelp) == NULL) {
- if (which == MC_URGENT && target == MC_URGENT) {
- /* raid the non-urgent freelist */
- target = MC_NONURGENT;
- freelp = &mctctl.mctc_free[target];
- continue;
- } else {
- mctelem_drop_count++;
- return (NULL);
- }
+ bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit);
+
+ if (bit >= MC_NENT) {
+ mctelem_drop_count++;
+ return (NULL);
}
- newhead = oldhead->mcte_next;
- if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
- struct mctelem_ent *tep = oldhead;
+ /* try to allocate, atomically clear free bit */
+ if (test_and_clear_bit(bit, mctctl.mctc_free)) {
+ /* return element we got */
+ struct mctelem_ent *tep = mctctl.mctc_elems + bit;
mctelem_hold(tep);
MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);

View File

@ -0,0 +1,29 @@
# Commit fd1864f48d8914fb8eeb6841cd08c2c09b368909
# Date 2014-02-24 12:09:52 +0100
# Author Yang Zhang <yang.z.zhang@Intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: update nested paging mode on vmexit
Since SVM and VMX use different mechanism to emulate the virtual-vmentry
and virtual-vmexit, it's hard to update the nested paging mode correctly in
common code. So we need to update the nested paging mode in their respective
code path.
SVM already updates the nested paging mode on vmexit. This patch adds the same
logic in VMX side.
Previous discussion is here:
http://lists.xen.org/archives/html/xen-devel/2013-12/msg01759.html
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Reviewed-by: Christoph Egger <chegger@amazon.de>
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2541,6 +2541,7 @@ void vmx_vmexit_handler(struct cpu_user_
vcpu_nestedhvm(v).nv_vmswitch_in_progress = 0;
if ( nestedhvm_vcpu_in_guestmode(v) )
{
+ paging_update_nestedmode(v);
if ( nvmx_n2_vmexit_handler(regs, exit_reason) )
goto out;
}

View File

@ -0,0 +1,24 @@
# Commit 5d160d913e03b581bdddde73535c18ac670cf0a9
# Date 2014-02-24 12:11:01 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI: don't risk division by zero
The check in question is redundant with the one in the immediately
following if(), where dividing by zero gets carefully avoided.
Spotted-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -636,7 +636,7 @@ static u64 read_pci_mem_bar(u16 seg, u8
return 0;
base = pos + PCI_SRIOV_BAR;
vf -= PCI_BDF(bus, slot, func) + offset;
- if ( vf < 0 || (vf && vf % stride) )
+ if ( vf < 0 )
return 0;
if ( stride )
{

View File

@ -0,0 +1,27 @@
# Commit a5ab9c9fa29cda7e1b18dbcaa69a5dbded96de32
# Date 2014-02-25 09:30:59 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mce: Reduce boot-time logspam
When booting with "no-mce", the user does not need to be told that "MCE
support [was] disabled by bootparam" for each cpu. Furthermore, a file:line
reference is unnecessary.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -729,8 +729,10 @@ void mcheck_init(struct cpuinfo_x86 *c,
{
enum mcheck_type inited = mcheck_none;
- if (mce_disabled == 1) {
- dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
+ if ( mce_disabled )
+ {
+ if ( bsp )
+ printk(XENLOG_INFO "MCE support disabled by bootparam\n");
return;
}

View File

@ -0,0 +1,32 @@
# Commit 79de2d31f1ff8910231b7ec15519405953e6571a
# Date 2014-02-28 17:35:15 +0100
# Author Wei Liu <wei.liu2@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
mm: ensure useful progress in decrease_reservation
During my fun time playing with balloon driver I found that hypervisor's
preemption check kept decrease_reservation from doing any useful work
for 32 bit guests, resulting in hanging the guests.
As Andrew suggested, we can force the check to fail for the first
iteration to ensure progress. We did this in d3a55d7d9 "x86/mm: Ensure
useful progress in alloc_l2_table()" already.
After this change I cannot see the hang caused by continuation logic
anymore.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -268,7 +268,7 @@ static void decrease_reservation(struct
for ( i = a->nr_done; i < a->nr_extents; i++ )
{
- if ( hypercall_preempt_check() )
+ if ( hypercall_preempt_check() && i != a->nr_done )
{
a->preempted = 1;
goto out;

View File

@ -0,0 +1,85 @@
References: bnc#858178
# Commit 9ef5aa944a6a0df7f5938983043c7e46f158bbc6
# Date 2014-03-04 10:52:20 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
IOMMU: generalize and correct softirq processing during Dom0 device setup
c/s 21039:95f5a4ce8f24 ("VT-d: reduce default verbosity") having put a
call to process_pending_softirqs() in VT-d's domain_context_mapping()
was wrong in two ways: For one we shouldn't be doing this when setting
up a device during DomU assignment. And then - I didn't check whether
that was the case already back then - we shouldn't call that function
with the pcidevs_lock (or in fact any spin lock) held.
Move the "preemption" into generic code, at once dealing with further
actual (too much output elsewhere - particularly on systems with very
many host bridge like devices - having been observed to still cause the
watchdog to trigger when enabled) and potential (other IOMMU code may
also end up being too verbose) issues.
Do the "preemption" once per device actually being set up when in
verbose mode, and once per bus otherwise.
Note that dropping pcidevs_lock around the process_pending_softirqs()
invocation is specifically not a problem here: We're in an __init
function and aren't racing with potential additions/removals of PCI
devices. Not acquiring the lock in setup_dom0_pci_devices() otoh is not
an option, as there are too many places that assert the lock being
held.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -27,6 +27,7 @@
#include <xen/delay.h>
#include <xen/keyhandler.h>
#include <xen/radix-tree.h>
+#include <xen/softirq.h>
#include <xen/tasklet.h>
#include <xsm/xsm.h>
#include <asm/msi.h>
@@ -922,6 +923,20 @@ static int __init _setup_dom0_pci_device
printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
pdev->domain->domain_id, pseg->nr, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if ( iommu_verbose )
+ {
+ spin_unlock(&pcidevs_lock);
+ process_pending_softirqs();
+ spin_lock(&pcidevs_lock);
+ }
+ }
+
+ if ( !iommu_verbose )
+ {
+ spin_unlock(&pcidevs_lock);
+ process_pending_softirqs();
+ spin_lock(&pcidevs_lock);
}
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -31,7 +31,6 @@
#include <xen/pci.h>
#include <xen/pci_regs.h>
#include <xen/keyhandler.h>
-#include <xen/softirq.h>
#include <asm/msi.h>
#include <asm/irq.h>
#include <asm/hvm/vmx/vmx.h>
@@ -1494,9 +1493,6 @@ static int domain_context_mapping(
break;
}
- if ( iommu_verbose )
- process_pending_softirqs();
-
return ret;
}

View File

@ -0,0 +1,59 @@
# Commit 8db22866dbbcabf30ad6e3814489c730b53d1cf4
# Date 2014-03-04 10:58:19 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: don't propagate acpi_skip_timer_override do Dom0
It's unclear why c/s 4850:923dd9975981 added this - Dom0 isn't
controlling the timer interrupt, and hence has no need to know.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -56,7 +56,9 @@ bool_t __initdata acpi_ht = 1; /* enable
bool_t __initdata acpi_lapic;
bool_t __initdata acpi_ioapic;
-bool_t acpi_skip_timer_override __initdata;
+/* acpi_skip_timer_override: Skip IRQ0 overrides. */
+static bool_t acpi_skip_timer_override __initdata;
+boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -71,10 +71,6 @@ static void parse_acpi_param(char *s);
custom_param("acpi", parse_acpi_param);
/* **** Linux config option: propagated to domain0. */
-/* acpi_skip_timer_override: Skip IRQ0 overrides. */
-boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
-
-/* **** Linux config option: propagated to domain0. */
/* noapic: Disable IOAPIC setup. */
boolean_param("noapic", skip_ioapic_setup);
@@ -1365,9 +1361,6 @@ void __init __start_xen(unsigned long mb
/* Append any extra parameters. */
if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
safe_strcat(dom0_cmdline, " noapic");
- if ( acpi_skip_timer_override &&
- !strstr(dom0_cmdline, "acpi_skip_timer_override") )
- safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
if ( (strlen(acpi_param) == 0) && acpi_disabled )
{
printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -80,7 +80,6 @@ int __acpi_release_global_lock(unsigned
extern bool_t acpi_lapic, acpi_ioapic, acpi_noirq;
extern bool_t acpi_force, acpi_ht, acpi_disabled;
-extern bool_t acpi_skip_timer_override;
extern u32 acpi_smi_cmd;
extern u8 acpi_enable_value, acpi_disable_value;
void acpi_pic_sci_set_trigger(unsigned int, u16);

View File

@ -0,0 +1,32 @@
# Commit 7acf827b951b4e8501a777676ddf050d200103a4
# Date 2014-03-04 11:00:26 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/ACPI: also print address space for PM1x fields
At least one vendor is in the process of making systems available where
these live in MMIO, not in I/O port space.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -404,11 +404,15 @@ acpi_fadt_parse_sleep_info(struct acpi_t
acpi_fadt_copy_address(pm1b_evt, pm1b_event, pm1_event);
printk(KERN_INFO PREFIX
- "SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], "
- "pm1x_evt[%"PRIx64",%"PRIx64"]\n",
+ "SLEEP INFO: pm1x_cnt[%d:%"PRIx64",%d:%"PRIx64"], "
+ "pm1x_evt[%d:%"PRIx64",%d:%"PRIx64"]\n",
+ acpi_sinfo.pm1a_cnt_blk.space_id,
acpi_sinfo.pm1a_cnt_blk.address,
+ acpi_sinfo.pm1b_cnt_blk.space_id,
acpi_sinfo.pm1b_cnt_blk.address,
+ acpi_sinfo.pm1a_evt_blk.space_id,
acpi_sinfo.pm1a_evt_blk.address,
+ acpi_sinfo.pm1b_evt_blk.space_id,
acpi_sinfo.pm1b_evt_blk.address);
/* Now FACS... */

View File

@ -0,0 +1,56 @@
# Commit cadfd7bca999c0a795dc27be72d43c92e8943a0b
# Date 2014-03-10 11:02:25 +0100
# Author Dongxiao Xu <dongxiao.xu@intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/hvm: refine the judgment on IDENT_PT for EMT
When trying to get the EPT EMT type, the judgment on
HVM_PARAM_IDENT_PT is not correct which always returns WB type if
the parameter is not set. Remove the related code.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
We can't fully drop the dependency yet, but we should certainly avoid
overriding cases already properly handled. The reason for this is that
the guest setting up its MTRRs happens _after_ the EPT tables got
already constructed, and no code is in place to propagate this to the
EPT code. Without this check we're forcing the guest to run with all of
its memory uncachable until something happens to re-write every single
EPT entry. But of course this has to be just a temporary solution.
In the same spirit we should defer the "very early" (when the guest is
still being constructed and has no vCPU yet) override to the last
possible point.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -689,13 +689,8 @@ uint8_t epte_get_entry_emt(struct domain
*ipat = 0;
- if ( (current->domain != d) &&
- ((d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL)) )
- return MTRR_TYPE_WRBACK;
-
- if ( !is_pvh_vcpu(v) &&
- !v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] )
- return MTRR_TYPE_WRBACK;
+ if ( v->domain != d )
+ v = d->vcpu ? d->vcpu[0] : NULL;
if ( !mfn_valid(mfn_x(mfn)) )
return MTRR_TYPE_UNCACHABLE;
@@ -718,7 +713,8 @@ uint8_t epte_get_entry_emt(struct domain
return MTRR_TYPE_WRBACK;
}
- gmtrr_mtype = is_hvm_vcpu(v) ?
+ gmtrr_mtype = is_hvm_domain(d) && v &&
+ d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
MTRR_TYPE_WRBACK;

View File

@ -0,0 +1,54 @@
# Commit b99113b9d5fac5149de8496f55afa00e285b1ff3
# Date 2014-03-10 11:03:53 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: fix memory type merging in epte_get_entry_emt()
Using the minimum numeric value of guest and host specified memory
types is too simplistic - it works only correctly for a subset of
types. It is in particular the WT/WP combination that needs conversion
to UC if the two types conflict.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -719,5 +719,35 @@ uint8_t epte_get_entry_emt(struct domain
MTRR_TYPE_WRBACK;
hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT));
- return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
+
+ /* If both types match we're fine. */
+ if ( likely(gmtrr_mtype == hmtrr_mtype) )
+ return hmtrr_mtype;
+
+ /* If either type is UC, we have to go with that one. */
+ if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
+ hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
+ return MTRR_TYPE_UNCACHABLE;
+
+ /* If either type is WB, we have to go with the other one. */
+ if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
+ return hmtrr_mtype;
+ if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
+ return gmtrr_mtype;
+
+ /*
+ * At this point we have disagreeing WC, WT, or WP types. The only
+ * combination that can be cleanly resolved is WT:WP. The ones involving
+ * WC need to be converted to UC, both due to the memory ordering
+ * differences and because WC disallows reads to be cached (WT and WP
+ * permit this), while WT and WP require writes to go straight to memory
+ * (WC can buffer them).
+ */
+ if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
+ hmtrr_mtype == MTRR_TYPE_WRPROT) ||
+ (gmtrr_mtype == MTRR_TYPE_WRPROT &&
+ hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
+ return MTRR_TYPE_WRPROT;
+
+ return MTRR_TYPE_UNCACHABLE;
}

View File

@ -0,0 +1,59 @@
# Commit 3089a6d82bdf3112ccb1dd074ce34a8cbdc4ccd8
# Date 2014-03-10 11:04:36 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: consolidate passthrough handling in epte_get_entry_emt()
It is inconsistent to depend on iommu_enabled alone: For a guest
without devices passed through to it, it is of no concern whether the
IOMMU is enabled.
There's one rather special case to take care of: VMX code marks the
LAPIC access page as MMIO. The added assertion needs to take this into
consideration, and the subsequent handling of the direct MMIO case was
inconsistent too: That page would have been WB in the absence of an
IOMMU, but UC in the presence of it, while in fact the cachabilty of
this page is entirely unrelated to an IOMMU being in use.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -698,14 +698,20 @@ uint8_t epte_get_entry_emt(struct domain
if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
return type;
- if ( !iommu_enabled )
+ if ( !iommu_enabled ||
+ (rangeset_is_empty(d->iomem_caps) &&
+ rangeset_is_empty(d->arch.ioport_caps) &&
+ !has_arch_pdevs(d)) )
{
+ ASSERT(!direct_mmio ||
+ mfn_x(mfn) == d->arch.hvm_domain.vmx.apic_access_mfn);
*ipat = 1;
return MTRR_TYPE_WRBACK;
}
if ( direct_mmio )
- return MTRR_TYPE_UNCACHABLE;
+ return mfn_x(mfn) != d->arch.hvm_domain.vmx.apic_access_mfn
+ ? MTRR_TYPE_UNCACHABLE : MTRR_TYPE_WRBACK;
if ( iommu_snoop )
{
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2038,9 +2038,9 @@ static int vmx_alloc_vlapic_mapping(stru
if ( apic_va == NULL )
return -ENOMEM;
share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
+ d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
_mfn(virt_to_mfn(apic_va)));
- d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
return 0;
}

View File

@ -0,0 +1,36 @@
# Commit 4509ada6ba1f09cc8f4fa23e009e7e5a963b6086
# Date 2014-03-10 11:11:28 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
kexec: identify which cpu the kexec image is being executed on
A patch to this effect has been in XenServer for a little while, and has
proved to be a useful debugging point for servers which have different
behaviours depending when crashing on the non-bootstrap processor.
Moving the printk() from kexec_panic() to one_cpu_only() means that it will
only be printed for the cpu which wins the race along the kexec path.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -265,6 +265,8 @@ static int noinline one_cpu_only(void)
}
set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags);
+ printk("Executing kexec image on cpu%u\n", cpu);
+
return 0;
}
@@ -340,8 +342,6 @@ void kexec_crash(void)
if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
return;
- printk("Executing crash image\n");
-
kexecing = TRUE;
if ( kexec_common_shutdown() != 0 )

View File

@ -0,0 +1,29 @@
# Commit ac2cba2901779f66bbfab298faa15c956e91393a
# Date 2014-03-10 14:40:50 +0100
# Author Julien Grall <julien.grall@linaro.org>
# Committer Jan Beulich <jbeulich@suse.com>
xmalloc: handle correctly page allocation when align > size
When align is superior to size, we need to retrieve the order from
align during multiple page allocation. I guess it was the goal of the commit
fb034f42 "xmalloc: make close-to-PAGE_SIZE allocations more efficient".
Signed-off-by: Julien Grall <julien.grall@linaro.org>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/xmalloc_tlsf.c
+++ b/xen/common/xmalloc_tlsf.c
@@ -527,11 +527,10 @@ static void xmalloc_pool_put(void *p)
static void *xmalloc_whole_pages(unsigned long size, unsigned long align)
{
- unsigned int i, order = get_order_from_bytes(size);
+ unsigned int i, order;
void *res, *p;
- if ( align > size )
- get_order_from_bytes(align);
+ order = get_order_from_bytes(max(align, size));
res = alloc_xenheap_pages(order, 0);
if ( res == NULL )

View File

@ -1,3 +1,27 @@
-------------------------------------------------------------------
Wed Mar 12 08:20:42 MDT 2014 - carnold@suse.com
- Upstream patches from Jan
530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
530c54c3-x86-mce-Reduce-boot-time-logspam.patch
5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
-------------------------------------------------------------------
Tue Mar 11 16:21:11 MDT 2014 - carnold@suse.com
- Add conversion tool for migrating xend/xm managed VMs to libvirt
xen2libvirt.py (Jim Fehlig)
-------------------------------------------------------------------
Mon Mar 10 07:17:17 MDT 2014 - carnold@suse.com

View File

@ -177,6 +177,7 @@ Source32: xen-updown.sh
Source34: init.pciback
Source35: sysconfig.pciback
Source36: xnloader.py
Source37: xen2libvirt.py
# Systemd service files
Source41: xencommons.service
Source42: xendomains.service
@ -197,6 +198,19 @@ Source99: baselibs.conf
# http://xenbits.xensource.com/ext/xenalyze
Source20000: xenalyze.hg.tar.bz2
# Upstream patches
Patch1: 530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
Patch2: 530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
Patch3: 530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
Patch4: 530c54c3-x86-mce-Reduce-boot-time-logspam.patch
Patch5: 5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
Patch6: 5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
Patch7: 5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
Patch8: 5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
Patch9: 531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
Patch10: 531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
Patch11: 531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
Patch12: 531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
Patch13: 531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
# Upstream qemu
Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch
Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch
@ -550,6 +564,19 @@ Authors:
%prep
%setup -q -n %xen_build_dir -a 1 -a 2 -a 3 -a 4 -a 5 -a 57 -a 20000
# Upstream patches
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%patch12 -p1
%patch13 -p1
# Upstream qemu patches
%patch250 -p1
%patch251 -p1
@ -888,6 +915,7 @@ mkdir -p %{buildroot}%{_unitdir}
install -m 644 %{SOURCE56} %{buildroot}%{_unitdir}/xend.service
%endif
%endif
install -m755 %SOURCE37 $RPM_BUILD_ROOT/usr/sbin/xen2libvirt
# Example config
mkdir -p $RPM_BUILD_ROOT/etc/xen/{vm,examples,scripts}
@ -1094,6 +1122,7 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper
/usr/sbin/gdbsx
%endif
/usr/sbin/xl
/usr/sbin/xen2libvirt
%ifarch %ix86 x86_64
/usr/sbin/xen-hptool
/usr/sbin/xen-hvmcrash

113
xen2libvirt.py Normal file
View File

@ -0,0 +1,113 @@
#!/usr/bin/env python
#
# Copyright (C) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see
# <http://www.gnu.org/licenses/>.
#
# Authors:
# Jim Fehlig <jfehlig@suse.com>
#
# Read native Xen configuration format, convert to libvirt domXML, and
# import (virsh define <xml>) into libvirt.
import sys
import os
import argparse
import re
try:
import libvirt
except ImportError:
print 'Unable to import the libvirt module. Is libvirt-python installed?'
sys.exit(1)
parser = argparse.ArgumentParser(description='Import Xen domain configuration into libvirt')
parser.add_argument('-c', '--convert-only', help='Convert Xen domain configuration into libvirt domXML, but do not import into libvirt', action='store_true', dest='convert_only')
parser.add_argument('-r', '--recursive', help='Operate recursivelly on all Xen domain configuration rooted at path', action='store_true')
parser.add_argument('-f', '--format', help='Format of Xen domain configuration. Supported formats are xm and sexpr', choices=['xm', 'sexpr'], default=None)
parser.add_argument('-v', '--verbose', help='Print information about the import process', action='store_true')
parser.add_argument('path', help='Path to Xen domain configuration')
def print_verbose(msg):
if args.verbose:
print msg
def check_config(path, config):
isbinary = os.system('file -b ' + path + ' | grep text > /dev/null')
if isbinary:
print 'File %s is not a text file containing Xen xm or sexpr configuration'
sys.exit(1)
if config.find('\(domain'):
return 'sexpr'
return 'xm'
def import_domain(conn, path, format=None, convert_only=False):
f = open(path, 'r')
config = f.read()
print_verbose('Xen domain configuration read from %s:\n %s' % (path, config))
if format is None:
format = check_config(path, config)
if format == 'sexpr':
print_verbose('scrubbing domin from configuration')
config = re.sub("\(domid [0-9]*\)", "", config)
print_verbose('scrubbed sexpr:\n %s' % config)
xml = conn.domainXMLFromNative('xen-sxpr', config, 0)
else:
# if format != sexpr, try xm
xml = conn.domainXMLFromNative('xen-xm', config, 0)
f.close()
print_verbose('Successfully converted Xen domain configuration to '
'libvirt domXML:\n %s' % xml)
if convert_only:
print xml
else:
print_verbose('Importing converted libvirt domXML into libvirt...')
dom = conn.defineXML(xml)
if dom is None:
print 'Failed to define domain from converted domXML'
sys.exit(1)
print_verbose('domXML successfully imported into libvirt')
args = parser.parse_args()
path = args.path
# Connect to libvirt
conn = libvirt.open(None)
if conn is None:
print('Failed to open connection to the hypervisor')
sys.exit(1)
if args.recursive:
try:
for root, dirs, files in os.walk(path):
for name in files:
abs_name = os.path.join(root, name)
print_verbose('Processing file %s' % abs_name)
import_domain(conn, abs_name, args.format, args.convert_only)
except IOError:
print('Failed to open/read path %s' % path)
sys.exit(1)
else:
import_domain(conn, args.path, args.format, args.convert_only)