Accepting request 244904 from Virtualization

Bug fixes for os13.2

OBS-URL: https://build.opensuse.org/request/show/244904
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/xen?expand=0&rev=192
This commit is contained in:
Ludwig Nussel 2014-08-20 15:52:54 +00:00 committed by Git OBS Bridge
commit 4da480390c
84 changed files with 1151 additions and 5115 deletions

View File

@ -1,188 +0,0 @@
# Commit 60ea3a3ac3d2bcd8e85b250fdbfc46b3b9dc7085
# Date 2014-02-24 12:07:41 +0100
# Author Frediano Ziglio <frediano.ziglio@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MCE: Fix race condition in mctelem_reserve
These lines (in mctelem_reserve)
newhead = oldhead->mcte_next;
if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
are racy. After you read the newhead pointer it can happen that another
flow (thread or recursive invocation) change all the list but set head
with same value. So oldhead is the same as *freelp but you are setting
a new head that could point to whatever element (even already used).
This patch use instead a bit array and atomic bit operations.
Signed-off-by: Frediano Ziglio <frediano.ziglio@citrix.com>
Reviewed-by: Liu Jinsong <jinsong.liu@intel.com>
--- a/xen/arch/x86/cpu/mcheck/mctelem.c
+++ b/xen/arch/x86/cpu/mcheck/mctelem.c
@@ -37,24 +37,19 @@ struct mctelem_ent {
void *mcte_data; /* corresponding data payload */
};
-#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent freelist */
-#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */
-#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */
-#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */
+#define MCTE_F_CLASS_URGENT 0x0001U /* in use - urgent errors */
+#define MCTE_F_CLASS_NONURGENT 0x0002U /* in use - nonurgent errors */
#define MCTE_F_STATE_FREE 0x0010U /* on a freelist */
#define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */
#define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */
#define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */
-#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
#define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
#define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \
MCTE_F_STATE_UNCOMMITTED | \
MCTE_F_STATE_COMMITTED | \
MCTE_F_STATE_PROCESSING)
-#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
-
#define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
#define MCTE_SET_CLASS(tep, new) do { \
(tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
@@ -69,6 +64,8 @@ struct mctelem_ent {
#define MC_URGENT_NENT 10
#define MC_NONURGENT_NENT 20
+#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT)
+
#define MC_NCLASSES (MC_NONURGENT + 1)
#define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
@@ -77,11 +74,9 @@ struct mctelem_ent {
static struct mc_telem_ctl {
/* Linked lists that thread the array members together.
*
- * The free lists are singly-linked via mcte_next, and we allocate
- * from them by atomically unlinking an element from the head.
- * Consumed entries are returned to the head of the free list.
- * When an entry is reserved off the free list it is not linked
- * on any list until it is committed or dismissed.
+ * The free lists is a bit array where bit 1 means free.
+ * This as element number is quite small and is easy to
+ * atomically allocate that way.
*
* The committed list grows at the head and we do not maintain a
* tail pointer; insertions are performed atomically. The head
@@ -101,7 +96,7 @@ static struct mc_telem_ctl {
* we can lock it for updates. The head of the processing list
* always has the oldest telemetry, and we append (as above)
* at the tail of the processing list. */
- struct mctelem_ent *mctc_free[MC_NCLASSES];
+ DECLARE_BITMAP(mctc_free, MC_NENT);
struct mctelem_ent *mctc_committed[MC_NCLASSES];
struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
@@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cp
*/
static void mctelem_free(struct mctelem_ent *tep)
{
- mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
- MC_URGENT : MC_NONURGENT;
-
BUG_ON(tep->mcte_refcnt != 0);
BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
tep->mcte_prev = NULL;
- mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
+ tep->mcte_next = NULL;
+
+ /* set free in array */
+ set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free);
}
/* Increment the reference count of an entry that is not linked on to
@@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz)
}
if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
- MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
- (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
- datasz)) == NULL) {
+ MC_NENT)) == NULL ||
+ (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) {
if (mctctl.mctc_elems)
xfree(mctctl.mctc_elems);
printk("Allocations for MCA telemetry failed\n");
return;
}
- for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
- struct mctelem_ent *tep, **tepp;
+ for (i = 0; i < MC_NENT; i++) {
+ struct mctelem_ent *tep;
tep = mctctl.mctc_elems + i;
tep->mcte_flags = MCTE_F_STATE_FREE;
tep->mcte_refcnt = 0;
tep->mcte_data = datarr + i * datasz;
- if (i < MC_URGENT_NENT) {
- tepp = &mctctl.mctc_free[MC_URGENT];
- tep->mcte_flags |= MCTE_F_HOME_URGENT;
- } else {
- tepp = &mctctl.mctc_free[MC_NONURGENT];
- tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
- }
-
- tep->mcte_next = *tepp;
+ __set_bit(i, mctctl.mctc_free);
+ tep->mcte_next = NULL;
tep->mcte_prev = NULL;
- *tepp = tep;
}
}
@@ -310,32 +296,25 @@ static int mctelem_drop_count;
/* Reserve a telemetry entry, or return NULL if none available.
* If we return an entry then the caller must subsequently call exactly one of
- * mctelem_unreserve or mctelem_commit for that entry.
+ * mctelem_dismiss or mctelem_commit for that entry.
*/
mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
{
- struct mctelem_ent **freelp;
- struct mctelem_ent *oldhead, *newhead;
- mctelem_class_t target = (which == MC_URGENT) ?
- MC_URGENT : MC_NONURGENT;
+ unsigned bit;
+ unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT;
- freelp = &mctctl.mctc_free[target];
for (;;) {
- if ((oldhead = *freelp) == NULL) {
- if (which == MC_URGENT && target == MC_URGENT) {
- /* raid the non-urgent freelist */
- target = MC_NONURGENT;
- freelp = &mctctl.mctc_free[target];
- continue;
- } else {
- mctelem_drop_count++;
- return (NULL);
- }
+ bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit);
+
+ if (bit >= MC_NENT) {
+ mctelem_drop_count++;
+ return (NULL);
}
- newhead = oldhead->mcte_next;
- if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
- struct mctelem_ent *tep = oldhead;
+ /* try to allocate, atomically clear free bit */
+ if (test_and_clear_bit(bit, mctctl.mctc_free)) {
+ /* return element we got */
+ struct mctelem_ent *tep = mctctl.mctc_elems + bit;
mctelem_hold(tep);
MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);

View File

@ -1,29 +0,0 @@
# Commit fd1864f48d8914fb8eeb6841cd08c2c09b368909
# Date 2014-02-24 12:09:52 +0100
# Author Yang Zhang <yang.z.zhang@Intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: update nested paging mode on vmexit
Since SVM and VMX use different mechanism to emulate the virtual-vmentry
and virtual-vmexit, it's hard to update the nested paging mode correctly in
common code. So we need to update the nested paging mode in their respective
code path.
SVM already updates the nested paging mode on vmexit. This patch adds the same
logic in VMX side.
Previous discussion is here:
http://lists.xen.org/archives/html/xen-devel/2013-12/msg01759.html
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Reviewed-by: Christoph Egger <chegger@amazon.de>
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2541,6 +2541,7 @@ void vmx_vmexit_handler(struct cpu_user_
vcpu_nestedhvm(v).nv_vmswitch_in_progress = 0;
if ( nestedhvm_vcpu_in_guestmode(v) )
{
+ paging_update_nestedmode(v);
if ( nvmx_n2_vmexit_handler(regs, exit_reason) )
goto out;
}

View File

@ -1,24 +0,0 @@
# Commit 5d160d913e03b581bdddde73535c18ac670cf0a9
# Date 2014-02-24 12:11:01 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI: don't risk division by zero
The check in question is redundant with the one in the immediately
following if(), where dividing by zero gets carefully avoided.
Spotted-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -636,7 +636,7 @@ static u64 read_pci_mem_bar(u16 seg, u8
return 0;
base = pos + PCI_SRIOV_BAR;
vf -= PCI_BDF(bus, slot, func) + offset;
- if ( vf < 0 || (vf && vf % stride) )
+ if ( vf < 0 )
return 0;
if ( stride )
{

View File

@ -1,27 +0,0 @@
# Commit a5ab9c9fa29cda7e1b18dbcaa69a5dbded96de32
# Date 2014-02-25 09:30:59 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mce: Reduce boot-time logspam
When booting with "no-mce", the user does not need to be told that "MCE
support [was] disabled by bootparam" for each cpu. Furthermore, a file:line
reference is unnecessary.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -729,8 +729,10 @@ void mcheck_init(struct cpuinfo_x86 *c,
{
enum mcheck_type inited = mcheck_none;
- if (mce_disabled == 1) {
- dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
+ if ( mce_disabled )
+ {
+ if ( bsp )
+ printk(XENLOG_INFO "MCE support disabled by bootparam\n");
return;
}

View File

@ -1,85 +0,0 @@
References: bnc#858178
# Commit 9ef5aa944a6a0df7f5938983043c7e46f158bbc6
# Date 2014-03-04 10:52:20 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
IOMMU: generalize and correct softirq processing during Dom0 device setup
c/s 21039:95f5a4ce8f24 ("VT-d: reduce default verbosity") having put a
call to process_pending_softirqs() in VT-d's domain_context_mapping()
was wrong in two ways: For one we shouldn't be doing this when setting
up a device during DomU assignment. And then - I didn't check whether
that was the case already back then - we shouldn't call that function
with the pcidevs_lock (or in fact any spin lock) held.
Move the "preemption" into generic code, at once dealing with further
actual (too much output elsewhere - particularly on systems with very
many host bridge like devices - having been observed to still cause the
watchdog to trigger when enabled) and potential (other IOMMU code may
also end up being too verbose) issues.
Do the "preemption" once per device actually being set up when in
verbose mode, and once per bus otherwise.
Note that dropping pcidevs_lock around the process_pending_softirqs()
invocation is specifically not a problem here: We're in an __init
function and aren't racing with potential additions/removals of PCI
devices. Not acquiring the lock in setup_dom0_pci_devices() otoh is not
an option, as there are too many places that assert the lock being
held.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -27,6 +27,7 @@
#include <xen/delay.h>
#include <xen/keyhandler.h>
#include <xen/radix-tree.h>
+#include <xen/softirq.h>
#include <xen/tasklet.h>
#include <xsm/xsm.h>
#include <asm/msi.h>
@@ -922,6 +923,20 @@ static int __init _setup_dom0_pci_device
printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
pdev->domain->domain_id, pseg->nr, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if ( iommu_verbose )
+ {
+ spin_unlock(&pcidevs_lock);
+ process_pending_softirqs();
+ spin_lock(&pcidevs_lock);
+ }
+ }
+
+ if ( !iommu_verbose )
+ {
+ spin_unlock(&pcidevs_lock);
+ process_pending_softirqs();
+ spin_lock(&pcidevs_lock);
}
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -31,7 +31,6 @@
#include <xen/pci.h>
#include <xen/pci_regs.h>
#include <xen/keyhandler.h>
-#include <xen/softirq.h>
#include <asm/msi.h>
#include <asm/irq.h>
#include <asm/hvm/vmx/vmx.h>
@@ -1494,9 +1493,6 @@ static int domain_context_mapping(
break;
}
- if ( iommu_verbose )
- process_pending_softirqs();
-
return ret;
}

View File

@ -1,56 +0,0 @@
# Commit cadfd7bca999c0a795dc27be72d43c92e8943a0b
# Date 2014-03-10 11:02:25 +0100
# Author Dongxiao Xu <dongxiao.xu@intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/hvm: refine the judgment on IDENT_PT for EMT
When trying to get the EPT EMT type, the judgment on
HVM_PARAM_IDENT_PT is not correct which always returns WB type if
the parameter is not set. Remove the related code.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
We can't fully drop the dependency yet, but we should certainly avoid
overriding cases already properly handled. The reason for this is that
the guest setting up its MTRRs happens _after_ the EPT tables got
already constructed, and no code is in place to propagate this to the
EPT code. Without this check we're forcing the guest to run with all of
its memory uncachable until something happens to re-write every single
EPT entry. But of course this has to be just a temporary solution.
In the same spirit we should defer the "very early" (when the guest is
still being constructed and has no vCPU yet) override to the last
possible point.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -689,13 +689,8 @@ uint8_t epte_get_entry_emt(struct domain
*ipat = 0;
- if ( (current->domain != d) &&
- ((d->vcpu == NULL) || ((v = d->vcpu[0]) == NULL)) )
- return MTRR_TYPE_WRBACK;
-
- if ( !is_pvh_vcpu(v) &&
- !v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] )
- return MTRR_TYPE_WRBACK;
+ if ( v->domain != d )
+ v = d->vcpu ? d->vcpu[0] : NULL;
if ( !mfn_valid(mfn_x(mfn)) )
return MTRR_TYPE_UNCACHABLE;
@@ -718,7 +713,8 @@ uint8_t epte_get_entry_emt(struct domain
return MTRR_TYPE_WRBACK;
}
- gmtrr_mtype = is_hvm_vcpu(v) ?
+ gmtrr_mtype = is_hvm_domain(d) && v &&
+ d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
MTRR_TYPE_WRBACK;

View File

@ -1,54 +0,0 @@
# Commit b99113b9d5fac5149de8496f55afa00e285b1ff3
# Date 2014-03-10 11:03:53 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: fix memory type merging in epte_get_entry_emt()
Using the minimum numeric value of guest and host specified memory
types is too simplistic - it works only correctly for a subset of
types. It is in particular the WT/WP combination that needs conversion
to UC if the two types conflict.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -719,5 +719,35 @@ uint8_t epte_get_entry_emt(struct domain
MTRR_TYPE_WRBACK;
hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT));
- return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
+
+ /* If both types match we're fine. */
+ if ( likely(gmtrr_mtype == hmtrr_mtype) )
+ return hmtrr_mtype;
+
+ /* If either type is UC, we have to go with that one. */
+ if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
+ hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
+ return MTRR_TYPE_UNCACHABLE;
+
+ /* If either type is WB, we have to go with the other one. */
+ if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
+ return hmtrr_mtype;
+ if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
+ return gmtrr_mtype;
+
+ /*
+ * At this point we have disagreeing WC, WT, or WP types. The only
+ * combination that can be cleanly resolved is WT:WP. The ones involving
+ * WC need to be converted to UC, both due to the memory ordering
+ * differences and because WC disallows reads to be cached (WT and WP
+ * permit this), while WT and WP require writes to go straight to memory
+ * (WC can buffer them).
+ */
+ if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
+ hmtrr_mtype == MTRR_TYPE_WRPROT) ||
+ (gmtrr_mtype == MTRR_TYPE_WRPROT &&
+ hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
+ return MTRR_TYPE_WRPROT;
+
+ return MTRR_TYPE_UNCACHABLE;
}

View File

@ -1,75 +0,0 @@
# Commit 3089a6d82bdf3112ccb1dd074ce34a8cbdc4ccd8
# Date 2014-03-10 11:04:36 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: consolidate passthrough handling in epte_get_entry_emt()
It is inconsistent to depend on iommu_enabled alone: For a guest
without devices passed through to it, it is of no concern whether the
IOMMU is enabled.
There's one rather special case to take care of: VMX code marks the
LAPIC access page as MMIO. The added assertion needs to take this into
consideration, and the subsequent handling of the direct MMIO case was
inconsistent too: That page would have been WB in the absence of an
IOMMU, but UC in the presence of it, while in fact the cachabilty of
this page is entirely unrelated to an IOMMU being in use.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: "Xu, Dongxiao" <dongxiao.xu@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
# Commit 1f8b57779785bf9f55c16312bb1ec679929c314b
# Date 2014-03-28 13:43:25 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/EPT: relax treatment of APIC MFN
There's no point in this being mapped UC by the guest due to using a
respective PAT index - set the ignore-PAT flag to true.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -698,14 +698,24 @@ uint8_t epte_get_entry_emt(struct domain
if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
return type;
- if ( !iommu_enabled )
+ if ( !iommu_enabled ||
+ (rangeset_is_empty(d->iomem_caps) &&
+ rangeset_is_empty(d->arch.ioport_caps) &&
+ !has_arch_pdevs(d)) )
{
+ ASSERT(!direct_mmio ||
+ mfn_x(mfn) == d->arch.hvm_domain.vmx.apic_access_mfn);
*ipat = 1;
return MTRR_TYPE_WRBACK;
}
if ( direct_mmio )
- return MTRR_TYPE_UNCACHABLE;
+ {
+ if ( mfn_x(mfn) != d->arch.hvm_domain.vmx.apic_access_mfn )
+ return MTRR_TYPE_UNCACHABLE;
+ *ipat = 1;
+ return MTRR_TYPE_WRBACK;
+ }
if ( iommu_snoop )
{
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2038,9 +2038,9 @@ static int vmx_alloc_vlapic_mapping(stru
if ( apic_va == NULL )
return -ENOMEM;
share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
+ d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
_mfn(virt_to_mfn(apic_va)));
- d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
return 0;
}

View File

@ -1,36 +0,0 @@
# Commit 4509ada6ba1f09cc8f4fa23e009e7e5a963b6086
# Date 2014-03-10 11:11:28 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
kexec: identify which cpu the kexec image is being executed on
A patch to this effect has been in XenServer for a little while, and has
proved to be a useful debugging point for servers which have different
behaviours depending when crashing on the non-bootstrap processor.
Moving the printk() from kexec_panic() to one_cpu_only() means that it will
only be printed for the cpu which wins the race along the kexec path.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -265,6 +265,8 @@ static int noinline one_cpu_only(void)
}
set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags);
+ printk("Executing kexec image on cpu%u\n", cpu);
+
return 0;
}
@@ -340,8 +342,6 @@ void kexec_crash(void)
if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
return;
- printk("Executing crash image\n");
-
kexecing = TRUE;
if ( kexec_common_shutdown() != 0 )

View File

@ -1,29 +0,0 @@
# Commit ac2cba2901779f66bbfab298faa15c956e91393a
# Date 2014-03-10 14:40:50 +0100
# Author Julien Grall <julien.grall@linaro.org>
# Committer Jan Beulich <jbeulich@suse.com>
xmalloc: handle correctly page allocation when align > size
When align is superior to size, we need to retrieve the order from
align during multiple page allocation. I guess it was the goal of the commit
fb034f42 "xmalloc: make close-to-PAGE_SIZE allocations more efficient".
Signed-off-by: Julien Grall <julien.grall@linaro.org>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/xmalloc_tlsf.c
+++ b/xen/common/xmalloc_tlsf.c
@@ -527,11 +527,10 @@ static void xmalloc_pool_put(void *p)
static void *xmalloc_whole_pages(unsigned long size, unsigned long align)
{
- unsigned int i, order = get_order_from_bytes(size);
+ unsigned int i, order;
void *res, *p;
- if ( align > size )
- get_order_from_bytes(align);
+ order = get_order_from_bytes(max(align, size));
res = alloc_xenheap_pages(order, 0);
if ( res == NULL )

View File

@ -1,165 +0,0 @@
Subject: xen/pygrub: grub2/grub.cfg from RHEL 7 has new commands in menuentry
From: Joby Poriyath joby.poriyath@citrix.com Tue Feb 4 18:10:35 2014 +0000
Date: Wed Mar 12 13:51:29 2014 +0000:
Git: dd03048708af072374963d6d0721cc6d4c5f52cf
menuentry in grub2/grub.cfg uses linux16 and initrd16 commands
instead of linux and initrd. Due to this RHEL 7 (beta) guest failed to
boot after the installation.
In addition to this, RHEL 7 menu entries have two different single-quote
delimited strings on the same line, and the greedy grouping for menuentry
parsing gets both strings, and the options inbetween.
Signed-off-by: Joby Poriyath <joby.poriyath@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Cc: george.dunlap@citrix.com
diff --git a/tools/pygrub/examples/rhel-7-beta.grub2 b/tools/pygrub/examples/rhel-7-beta.grub2
new file mode 100644
index 0000000..88f0f99
--- /dev/null
+++ b/tools/pygrub/examples/rhel-7-beta.grub2
@@ -0,0 +1,118 @@
+#
+# DO NOT EDIT THIS FILE
+#
+# It is automatically generated by grub2-mkconfig using templates
+# from /etc/grub.d and settings from /etc/default/grub
+#
+
+### BEGIN /etc/grub.d/00_header ###
+set pager=1
+
+if [ -s $prefix/grubenv ]; then
+ load_env
+fi
+if [ "${next_entry}" ] ; then
+ set default="${next_entry}"
+ set next_entry=
+ save_env next_entry
+ set boot_once=true
+else
+ set default="${saved_entry}"
+fi
+
+if [ x"${feature_menuentry_id}" = xy ]; then
+ menuentry_id_option="--id"
+else
+ menuentry_id_option=""
+fi
+
+export menuentry_id_option
+
+if [ "${prev_saved_entry}" ]; then
+ set saved_entry="${prev_saved_entry}"
+ save_env saved_entry
+ set prev_saved_entry=
+ save_env prev_saved_entry
+ set boot_once=true
+fi
+
+function savedefault {
+ if [ -z "${boot_once}" ]; then
+ saved_entry="${chosen}"
+ save_env saved_entry
+ fi
+}
+
+function load_video {
+ if [ x$feature_all_video_module = xy ]; then
+ insmod all_video
+ else
+ insmod efi_gop
+ insmod efi_uga
+ insmod ieee1275_fb
+ insmod vbe
+ insmod vga
+ insmod video_bochs
+ insmod video_cirrus
+ fi
+}
+
+terminal_output console
+set timeout=5
+### END /etc/grub.d/00_header ###
+
+### BEGIN /etc/grub.d/10_linux ###
+menuentry 'Red Hat Enterprise Linux Everything, with Linux 3.10.0-54.0.1.el7.x86_64' --class red --class gnu-linux --class gnu --class os $menuentry_id_option 'gnulinux-3.10.0-54.0.1.el7.x86_64-advanced-d23b8b49-4cfe-4900-8ef1-ec80bc633163' {
+ load_video
+ set gfxpayload=keep
+ insmod gzio
+ insmod part_msdos
+ insmod xfs
+ set root='hd0,msdos1'
+ if [ x$feature_platform_search_hint = xy ]; then
+ search --no-floppy --fs-uuid --set=root --hint='hd0,msdos1' 89ffef78-82b3-457c-bc57-42cccc373851
+ else
+ search --no-floppy --fs-uuid --set=root 89ffef78-82b3-457c-bc57-42cccc373851
+ fi
+ linux16 /vmlinuz-3.10.0-54.0.1.el7.x86_64 root=/dev/mapper/rhel-root ro rd.lvm.lv=rhel/swap vconsole.keymap=uk crashkernel=auto rd.lvm.lv=rhel/root vconsole.font=latarcyrheb-sun16 LANG=en_GB.UTF-8
+ initrd16 /initramfs-3.10.0-54.0.1.el7.x86_64.img
+}
+menuentry 'Red Hat Enterprise Linux Everything, with Linux 0-rescue-af34f0b8cf364cdbbe6d093f8228a37f' --class red --class gnu-linux --class gnu --class os $menuentry_id_option 'gnulinux-0-rescue-af34f0b8cf364cdbbe6d093f8228a37f-advanced-d23b8b49-4cfe-4900-8ef1-ec80bc633163' {
+ load_video
+ insmod gzio
+ insmod part_msdos
+ insmod xfs
+ set root='hd0,msdos1'
+ if [ x$feature_platform_search_hint = xy ]; then
+ search --no-floppy --fs-uuid --set=root --hint='hd0,msdos1' 89ffef78-82b3-457c-bc57-42cccc373851
+ else
+ search --no-floppy --fs-uuid --set=root 89ffef78-82b3-457c-bc57-42cccc373851
+ fi
+ linux16 /vmlinuz-0-rescue-af34f0b8cf364cdbbe6d093f8228a37f root=/dev/mapper/rhel-root ro rd.lvm.lv=rhel/swap vconsole.keymap=uk crashkernel=auto rd.lvm.lv=rhel/root vconsole.font=latarcyrheb-sun16
+ initrd16 /initramfs-0-rescue-af34f0b8cf364cdbbe6d093f8228a37f.img
+}
+
+### END /etc/grub.d/10_linux ###
+
+### BEGIN /etc/grub.d/20_linux_xen ###
+### END /etc/grub.d/20_linux_xen ###
+
+### BEGIN /etc/grub.d/20_ppc_terminfo ###
+### END /etc/grub.d/20_ppc_terminfo ###
+
+### BEGIN /etc/grub.d/30_os-prober ###
+### END /etc/grub.d/30_os-prober ###
+
+### BEGIN /etc/grub.d/40_custom ###
+# This file provides an easy way to add custom menu entries. Simply type the
+# menu entries you want to add after this comment. Be careful not to change
+# the 'exec tail' line above.
+### END /etc/grub.d/40_custom ###
+
+### BEGIN /etc/grub.d/41_custom ###
+if [ -f ${config_directory}/custom.cfg ]; then
+ source ${config_directory}/custom.cfg
+elif [ -z "${config_directory}" -a -f $prefix/custom.cfg ]; then
+ source $prefix/custom.cfg;
+fi
+### END /etc/grub.d/41_custom ###
diff --git a/tools/pygrub/src/GrubConf.py b/tools/pygrub/src/GrubConf.py
index cb853c9..974cded 100644
--- a/tools/pygrub/src/GrubConf.py
+++ b/tools/pygrub/src/GrubConf.py
@@ -348,7 +348,9 @@ class Grub2Image(_GrubImage):
commands = {'set:root': 'root',
'linux': 'kernel',
+ 'linux16': 'kernel',
'initrd': 'initrd',
+ 'initrd16': 'initrd',
'echo': None,
'insmod': None,
'search': None}
@@ -394,7 +396,7 @@ class Grub2ConfigFile(_GrubConfigFile):
continue
# new image
- title_match = re.match('^menuentry ["\'](.*)["\'] (.*){', l)
+ title_match = re.match('^menuentry ["\'](.*?)["\'] (.*){', l)
if title_match:
if img is not None:
raise RuntimeError, "syntax error: cannot nest menuentry (%d %s)" % (len(img),img)

View File

@ -1,81 +0,0 @@
# Commit 8c0eed2cc8d8a2ccccdffe4c386b625b672dc12a
# Date 2014-03-13 14:26:35 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
common: make hypercall preemption checks consistent
- never preempt on the first iteration (ensure forward progress)
- do cheap checks first
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -63,7 +63,7 @@ static void increase_reservation(struct
for ( i = a->nr_done; i < a->nr_extents; i++ )
{
- if ( hypercall_preempt_check() )
+ if ( i != a->nr_done && hypercall_preempt_check() )
{
a->preempted = 1;
goto out;
@@ -109,7 +109,7 @@ static void populate_physmap(struct memo
for ( i = a->nr_done; i < a->nr_extents; i++ )
{
- if ( hypercall_preempt_check() )
+ if ( i != a->nr_done && hypercall_preempt_check() )
{
a->preempted = 1;
goto out;
@@ -268,7 +268,7 @@ static void decrease_reservation(struct
for ( i = a->nr_done; i < a->nr_extents; i++ )
{
- if ( hypercall_preempt_check() )
+ if ( i != a->nr_done && hypercall_preempt_check() )
{
a->preempted = 1;
goto out;
@@ -398,7 +398,8 @@ static long memory_exchange(XEN_GUEST_HA
i < (exch.in.nr_extents >> in_chunk_order);
i++ )
{
- if ( hypercall_preempt_check() )
+ if ( i != (exch.nr_exchanged >> in_chunk_order) &&
+ hypercall_preempt_check() )
{
exch.nr_exchanged = i << in_chunk_order;
rcu_unlock_domain(d);
--- a/xen/common/multicall.c
+++ b/xen/common/multicall.c
@@ -52,7 +52,7 @@ do_multicall(
for ( i = 0; !rc && i < nr_calls; i++ )
{
- if ( hypercall_preempt_check() )
+ if ( i && hypercall_preempt_check() )
goto preempted;
if ( unlikely(__copy_from_guest(&mcs->call, call_list, 1)) )
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -375,12 +375,12 @@ static DECLARE_SOFTIRQ_TASKLET(notify_do
static long guest_console_write(XEN_GUEST_HANDLE_PARAM(char) buffer, int count)
{
char kbuf[128];
- int kcount;
+ int kcount = 0;
struct domain *cd = current->domain;
while ( count > 0 )
{
- if ( hypercall_preempt_check() )
+ if ( kcount && hypercall_preempt_check() )
return hypercall_create_continuation(
__HYPERVISOR_console_io, "iih",
CONSOLEIO_write, count, buffer);

View File

@ -1,156 +0,0 @@
# Commit fd7bfce0395ace266159760e35dc49f7af3b90ce
# Date 2014-03-13 14:27:51 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: make hypercall preemption checks consistent
- never preempt on the first iteration (ensure forward progress)
- never preempt on the last iteration (pointless/wasteful)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2934,7 +2934,7 @@ long do_mmuext_op(
for ( i = 0; i < count; i++ )
{
- if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+ if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
{
rc = -EAGAIN;
break;
@@ -3481,7 +3481,7 @@ long do_mmu_update(
for ( i = 0; i < count; i++ )
{
- if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+ if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
{
rc = -EAGAIN;
break;
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -326,7 +326,7 @@ hap_set_allocation(struct domain *d, uns
else
pages -= d->arch.paging.hap.p2m_pages;
- while ( d->arch.paging.hap.total_pages != pages )
+ for ( ; ; )
{
if ( d->arch.paging.hap.total_pages < pages )
{
@@ -355,6 +355,8 @@ hap_set_allocation(struct domain *d, uns
d->arch.paging.hap.total_pages--;
free_domheap_page(pg);
}
+ else
+ break;
/* Check to see if we need to yield and try again */
if ( preempted && hypercall_preempt_check() )
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -242,7 +242,8 @@ p2m_pod_set_cache_target(struct p2m_doma
p2m_pod_cache_add(p2m, page, order);
- if ( hypercall_preempt_check() && preemptible )
+ if ( preemptible && pod_target != p2m->pod.count &&
+ hypercall_preempt_check() )
{
ret = -EAGAIN;
goto out;
@@ -286,7 +287,8 @@ p2m_pod_set_cache_target(struct p2m_doma
put_page(page+i);
- if ( hypercall_preempt_check() && preemptible )
+ if ( preemptible && pod_target != p2m->pod.count &&
+ hypercall_preempt_check() )
{
ret = -EAGAIN;
goto out;
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1674,7 +1674,7 @@ static unsigned int sh_set_allocation(st
SHADOW_PRINTK("current %i target %i\n",
d->arch.paging.shadow.total_pages, pages);
- while ( d->arch.paging.shadow.total_pages != pages )
+ for ( ; ; )
{
if ( d->arch.paging.shadow.total_pages < pages )
{
@@ -1709,6 +1709,8 @@ static unsigned int sh_set_allocation(st
d->arch.paging.shadow.total_pages--;
free_domheap_page(sp);
}
+ else
+ break;
/* Check to see if we need to yield and try again */
if ( preempted && hypercall_preempt_check() )
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3595,13 +3595,6 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
for ( ; ; )
{
- if ( hypercall_preempt_check() )
- {
- rc = hypercall_create_continuation(
- __HYPERVISOR_set_trap_table, "h", traps);
- break;
- }
-
if ( copy_from_guest(&cur, traps, 1) )
{
rc = -EFAULT;
@@ -3622,6 +3615,13 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
init_int80_direct_trap(curr);
guest_handle_add_offset(traps, 1);
+
+ if ( hypercall_preempt_check() )
+ {
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_set_trap_table, "h", traps);
+ break;
+ }
}
return rc;
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -329,13 +329,6 @@ int compat_set_trap_table(XEN_GUEST_HAND
for ( ; ; )
{
- if ( hypercall_preempt_check() )
- {
- rc = hypercall_create_continuation(
- __HYPERVISOR_set_trap_table, "h", traps);
- break;
- }
-
if ( copy_from_guest(&cur, traps, 1) )
{
rc = -EFAULT;
@@ -353,6 +346,13 @@ int compat_set_trap_table(XEN_GUEST_HAND
init_int80_direct_trap(current);
guest_handle_add_offset(traps, 1);
+
+ if ( hypercall_preempt_check() )
+ {
+ rc = hypercall_create_continuation(
+ __HYPERVISOR_set_trap_table, "h", traps);
+ break;
+ }
}
return rc;

View File

@ -1,255 +0,0 @@
# Commit dd527061770789d8152b1dea68056987b202d87a
# Date 2014-03-17 16:45:04 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: fix RMRR handling
Removing mapped RMRR tracking structures in dma_pte_clear_one() is
wrong for two reasons: First, these regions may cover more than a
single page. And second, multiple devices (and hence multiple devices
assigned to any particular guest) may share a single RMRR (whether
assigning such devices to distinct guests is a safe thing to do is
another question).
Therefore move the removal of the tracking structures into the
counterpart function to the one doing the insertion -
intel_iommu_remove_device(), and add a reference count to the tracking
structure.
Further, for the handling of the mappings of the respective memory
regions to be correct, RMRRs must not overlap. Add a respective check
to acpi_parse_one_rmrr().
And finally, with all of this being VT-d specific, move the cleanup
of the list as well as the structure type definition where it belongs -
in VT-d specific rather than IOMMU generic code.
Note that this doesn't address yet another issue associated with RMRR
handling: The purpose of the RMRRs as well as the way the respective
IOMMU page table mappings get inserted both suggest that these regions
would need to be marked E820_RESERVED in all (HVM?) guests' memory
maps, yet nothing like this is being done in hvmloader. (For PV guests
this would also seem to be necessary, but may conflict with PV guests
possibly assuming there to be just a single E820 entry representing all
of its RAM.)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -412,9 +412,8 @@ static int iommu_populate_page_table(str
void iommu_domain_destroy(struct domain *d)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
- struct list_head *ioport_list, *rmrr_list, *tmp;
+ struct list_head *ioport_list, *tmp;
struct g2m_ioport *ioport;
- struct mapped_rmrr *mrmrr;
if ( !iommu_enabled || !hd->platform_ops )
return;
@@ -428,13 +427,6 @@ void iommu_domain_destroy(struct domain
list_del(&ioport->list);
xfree(ioport);
}
-
- list_for_each_safe ( rmrr_list, tmp, &hd->mapped_rmrrs )
- {
- mrmrr = list_entry(rmrr_list, struct mapped_rmrr, list);
- list_del(&mrmrr->list);
- xfree(mrmrr);
- }
}
int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -580,6 +580,16 @@ acpi_parse_one_rmrr(struct acpi_dmar_hea
if ( (ret = acpi_dmar_check_length(header, sizeof(*rmrr))) != 0 )
return ret;
+ list_for_each_entry(rmrru, &acpi_rmrr_units, list)
+ if ( base_addr <= rmrru->end_address && rmrru->base_address <= end_addr )
+ {
+ printk(XENLOG_ERR VTDPREFIX
+ "Overlapping RMRRs [%"PRIx64",%"PRIx64"] and [%"PRIx64",%"PRIx64"]\n",
+ rmrru->base_address, rmrru->end_address,
+ base_addr, end_addr);
+ return -EEXIST;
+ }
+
/* This check is here simply to detect when RMRR values are
* not properly represented in the system memory map and
* inform the user
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -42,6 +42,12 @@
#include "vtd.h"
#include "../ats.h"
+struct mapped_rmrr {
+ struct list_head list;
+ u64 base, end;
+ unsigned int count;
+};
+
/* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
bool_t __read_mostly untrusted_msi;
@@ -619,7 +625,6 @@ static void dma_pte_clear_one(struct dom
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct dma_pte *page = NULL, *pte = NULL;
u64 pg_maddr;
- struct mapped_rmrr *mrmrr;
spin_lock(&hd->mapping_lock);
/* get last level pte */
@@ -648,21 +653,6 @@ static void dma_pte_clear_one(struct dom
__intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1);
unmap_vtd_domain_page(page);
-
- /* if the cleared address is between mapped RMRR region,
- * remove the mapped RMRR
- */
- spin_lock(&hd->mapping_lock);
- list_for_each_entry ( mrmrr, &hd->mapped_rmrrs, list )
- {
- if ( addr >= mrmrr->base && addr <= mrmrr->end )
- {
- list_del(&mrmrr->list);
- xfree(mrmrr);
- break;
- }
- }
- spin_unlock(&hd->mapping_lock);
}
static void iommu_free_pagetable(u64 pt_maddr, int level)
@@ -1700,10 +1690,17 @@ static int reassign_device_ownership(
void iommu_domain_teardown(struct domain *d)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct mapped_rmrr *mrmrr, *tmp;
if ( list_empty(&acpi_drhd_units) )
return;
+ list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
+ {
+ list_del(&mrmrr->list);
+ xfree(mrmrr);
+ }
+
if ( iommu_use_hap_pt(d) )
return;
@@ -1848,14 +1845,17 @@ static int rmrr_identity_mapping(struct
ASSERT(rmrr->base_address < rmrr->end_address);
/*
- * No need to acquire hd->mapping_lock, as the only theoretical race is
- * with the insertion below (impossible due to holding pcidevs_lock).
+ * No need to acquire hd->mapping_lock: Both insertion and removal
+ * get done while holding pcidevs_lock.
*/
list_for_each_entry( mrmrr, &hd->mapped_rmrrs, list )
{
if ( mrmrr->base == rmrr->base_address &&
mrmrr->end == rmrr->end_address )
+ {
+ ++mrmrr->count;
return 0;
+ }
}
base = rmrr->base_address & PAGE_MASK_4K;
@@ -1876,9 +1876,8 @@ static int rmrr_identity_mapping(struct
return -ENOMEM;
mrmrr->base = rmrr->base_address;
mrmrr->end = rmrr->end_address;
- spin_lock(&hd->mapping_lock);
+ mrmrr->count = 1;
list_add_tail(&mrmrr->list, &hd->mapped_rmrrs);
- spin_unlock(&hd->mapping_lock);
return 0;
}
@@ -1940,17 +1939,52 @@ static int intel_iommu_remove_device(u8
if ( !pdev->domain )
return -EINVAL;
- /* If the device belongs to dom0, and it has RMRR, don't remove it
- * from dom0, because BIOS may use RMRR at booting time.
- */
- if ( pdev->domain->domain_id == 0 )
+ for_each_rmrr_device ( rmrr, bdf, i )
{
- for_each_rmrr_device ( rmrr, bdf, i )
+ struct hvm_iommu *hd;
+ struct mapped_rmrr *mrmrr, *tmp;
+
+ if ( rmrr->segment != pdev->seg ||
+ PCI_BUS(bdf) != pdev->bus ||
+ PCI_DEVFN2(bdf) != devfn )
+ continue;
+
+ /*
+ * If the device belongs to dom0, and it has RMRR, don't remove
+ * it from dom0, because BIOS may use RMRR at booting time.
+ */
+ if ( is_hardware_domain(pdev->domain) )
+ return 0;
+
+ hd = domain_hvm_iommu(pdev->domain);
+
+ /*
+ * No need to acquire hd->mapping_lock: Both insertion and removal
+ * get done while holding pcidevs_lock.
+ */
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ list_for_each_entry_safe ( mrmrr, tmp, &hd->mapped_rmrrs, list )
{
- if ( rmrr->segment == pdev->seg &&
- PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == devfn )
- return 0;
+ unsigned long base_pfn, end_pfn;
+
+ if ( rmrr->base_address != mrmrr->base ||
+ rmrr->end_address != mrmrr->end )
+ continue;
+
+ if ( --mrmrr->count )
+ break;
+
+ base_pfn = (mrmrr->base & PAGE_MASK_4K) >> PAGE_SHIFT_4K;
+ end_pfn = PAGE_ALIGN_4K(mrmrr->end) >> PAGE_SHIFT_4K;
+ while ( base_pfn < end_pfn )
+ {
+ if ( intel_iommu_unmap_page(pdev->domain, base_pfn) )
+ return -ENXIO;
+ base_pfn++;
+ }
+
+ list_del(&mrmrr->list);
+ xfree(mrmrr);
}
}
--- a/xen/include/xen/hvm/iommu.h
+++ b/xen/include/xen/hvm/iommu.h
@@ -29,12 +29,6 @@ struct g2m_ioport {
unsigned int np;
};
-struct mapped_rmrr {
- struct list_head list;
- u64 base;
- u64 end;
-};
-
struct hvm_iommu {
u64 pgd_maddr; /* io page directory machine address */
spinlock_t mapping_lock; /* io page table lock */

View File

@ -1,62 +0,0 @@
# Commit 96d1b237ae9b2f2718bb1c59820701f17d3d86e0
# Date 2014-03-17 16:47:22 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/Intel: work around Xeon 7400 series erratum AAI65
Linux commit 40e2d7f9b5dae048789c64672bf3027fbb663ffa ("x86 idle:
Repair large-server 50-watt idle-power regression") tells us that this
applies not just to the named Xeon 7400 series, but also NHM-EX and
WSM-EX; sadly Intel's documentation is so badly searchable that I
wasn't able to locate the respective errata (and hence can't quote
their numbers here).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -296,6 +296,9 @@ void mwait_idle_with_hints(unsigned int
unsigned int cpu = smp_processor_id();
s_time_t expires = per_cpu(timer_deadline, cpu);
+ if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) )
+ clflush((void *)&mwait_wakeup(cpu));
+
__monitor((void *)&mwait_wakeup(cpu), 0, 0);
smp_mb();
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -147,6 +147,9 @@ void __devinit early_intel_workaround(st
/*
* P4 Xeon errata 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
+ *
+ * Xeon 7400 erratum AAI65 (and further newer Xeons)
+ * MONITOR/MWAIT may have excessive false wakeups
*/
static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
{
@@ -161,6 +164,10 @@ static void __devinit Intel_errata_worka
wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
}
}
+
+ if (c->x86 == 6 && cpu_has_clflush &&
+ (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
+ set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
}
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -71,6 +71,7 @@
#define X86_FEATURE_TSC_RELIABLE (3*32+12) /* TSC is known to be reliable */
#define X86_FEATURE_XTOPOLOGY (3*32+13) /* cpu topology enum extensions */
#define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
+#define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */

View File

@ -1,57 +0,0 @@
# Commit b3d2f8b2cba9fce5bc8995612d0d13fcefec7769
# Date 2014-03-24 10:48:03 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: fix determination of bit count for struct domain allocations
We can't just add in the hole shift value, as the hole may be at or
above the 44-bit boundary. Instead we need to determine the total bit
count until reaching 32 significant (not squashed out) bits in PFN
representations.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -180,6 +180,28 @@ void dump_pageframe_info(struct domain *
spin_unlock(&d->page_alloc_lock);
}
+/*
+ * The hole may be at or above the 44-bit boundary, so we need to determine
+ * the total bit count until reaching 32 significant (not squashed out) bits
+ * in PFN representations.
+ * Note that the way "bits" gets initialized/updated/bounds-checked guarantees
+ * that the function will never return zero, and hence will never be called
+ * more than once (which is important due to it being deliberately placed in
+ * .init.text).
+ */
+static unsigned int __init noinline _domain_struct_bits(void)
+{
+ unsigned int bits = 32 + PAGE_SHIFT;
+ unsigned int sig = hweight32(~pfn_hole_mask);
+ unsigned int mask = pfn_hole_mask >> 32;
+
+ for ( ; bits < BITS_PER_LONG && sig < 32; ++bits, mask >>= 1 )
+ if ( !(mask & 1) )
+ ++sig;
+
+ return bits;
+}
+
struct domain *alloc_domain_struct(void)
{
struct domain *d;
@@ -187,7 +209,10 @@ struct domain *alloc_domain_struct(void)
* We pack the PDX of the domain structure into a 32-bit field within
* the page_info structure. Hence the MEMF_bits() restriction.
*/
- unsigned int bits = 32 + PAGE_SHIFT + pfn_pdx_hole_shift;
+ static unsigned int __read_mostly bits;
+
+ if ( unlikely(!bits) )
+ bits = _domain_struct_bits();
BUILD_BUG_ON(sizeof(*d) > PAGE_SIZE);
d = alloc_xenheap_pages(0, MEMF_bits(bits));

View File

@ -1,102 +0,0 @@
References: bnc#867910 CVE-2014-2599 XSA-89
# Commit 0fe53c4f279e1a8ef913e71ed000236d21ce96de
# Date 2014-03-25 15:23:57 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: enforce preemption in HVM_set_mem_access / p2m_set_mem_access()
Processing up to 4G PFNs may take almost arbitrarily long, so
preemption is needed here.
This is CVE-2014-2599 / XSA-89.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4465,6 +4465,15 @@ long do_hvm_op(unsigned long op, XEN_GUE
goto param_fail5;
rc = p2m_set_mem_access(d, a.first_pfn, a.nr, a.hvmmem_access);
+ if ( rc > 0 )
+ {
+ a.first_pfn += a.nr - rc;
+ a.nr = rc;
+ if ( __copy_to_guest(arg, &a, 1) )
+ rc = -EFAULT;
+ else
+ rc = -EAGAIN;
+ }
param_fail5:
rcu_unlock_domain(d);
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1366,15 +1366,14 @@ void p2m_mem_access_resume(struct domain
/* Set access type for a region of pfns.
* If start_pfn == -1ul, sets the default access type */
-int p2m_set_mem_access(struct domain *d, unsigned long start_pfn,
- uint32_t nr, hvmmem_access_t access)
+long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr,
+ hvmmem_access_t access)
{
struct p2m_domain *p2m = p2m_get_hostp2m(d);
- unsigned long pfn;
p2m_access_t a, _a;
p2m_type_t t;
mfn_t mfn;
- int rc = 0;
+ long rc;
/* N.B. _not_ static: initializer depends on p2m->default_access */
p2m_access_t memaccess[] = {
@@ -1397,14 +1396,17 @@ int p2m_set_mem_access(struct domain *d,
a = memaccess[access];
/* If request to set default access */
- if ( start_pfn == ~0ull )
+ if ( pfn == ~0ul )
{
p2m->default_access = a;
return 0;
}
+ if ( !nr )
+ return 0;
+
p2m_lock(p2m);
- for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ )
+ for ( ; ; ++pfn )
{
mfn = p2m->get_entry(p2m, pfn, &t, &_a, 0, NULL);
if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 )
@@ -1412,6 +1414,13 @@ int p2m_set_mem_access(struct domain *d,
rc = -ENOMEM;
break;
}
+
+ /* Check for continuation if it's not the last interation. */
+ if ( !--nr || hypercall_preempt_check() )
+ {
+ rc = nr;
+ break;
+ }
}
p2m_unlock(p2m);
return rc;
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -576,8 +576,8 @@ void p2m_mem_access_resume(struct domain
/* Set access type for a region of pfns.
* If start_pfn == -1ul, sets the default access type */
-int p2m_set_mem_access(struct domain *d, unsigned long start_pfn,
- uint32_t nr, hvmmem_access_t access);
+long p2m_set_mem_access(struct domain *d, unsigned long start_pfn,
+ uint32_t nr, hvmmem_access_t access);
/* Get access type for a pfn
* If pfn == -1ul, gets the default access type */

View File

@ -1,141 +0,0 @@
# Commit ef437690af8b75e6758dce77af75a22b63982883
# Date 2014-03-28 13:33:34 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: correct CPUID leaf 80000008 handling
CPUID[80000008].EAX[23:16] have been given the meaning of the guest
physical address restriction (in case it needs to be smaller than the
host's), hence we need to mirror that into vCPUID[80000008].EAX[7:0].
Enforce a lower limit at the same time, as well as a fixed value for
the virtual address bits, and zero for the guest physical address ones.
In order for the vMTRR code to see these overrides we need to make it
call hvm_cpuid() instead of domain_cpuid(), which in turn requires
special casing (and relaxing) the controlling domain.
This additionally should hide an ordering problem in the tools: Both
xend and xl appear to be restoring a guest from its image before
setting up the CPUID policy in the hypervisor, resulting in
domain_cpuid() returning all zeros and hence the check in
mtrr_var_range_msr_set() failing if the guest previously had more than
the minimum 36 physical address bits.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2885,6 +2885,8 @@ void hvm_cpuid(unsigned int input, unsig
switch ( input )
{
+ unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
+
case 0x1:
/* Fix up VLAPIC details. */
*ebx &= 0x00FFFFFFu;
@@ -2918,8 +2920,6 @@ void hvm_cpuid(unsigned int input, unsig
*edx = v->vcpu_id * 2;
break;
case 0xd:
- {
- unsigned int sub_leaf, _eax, _ebx, _ecx, _edx;
/* EBX value of main leaf 0 depends on enabled xsave features */
if ( count == 0 && v->arch.xcr0 )
{
@@ -2936,7 +2936,7 @@ void hvm_cpuid(unsigned int input, unsig
}
}
break;
- }
+
case 0x80000001:
/* We expose RDTSCP feature to guest only when
tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */
@@ -2950,6 +2950,23 @@ void hvm_cpuid(unsigned int input, unsig
if ( !(hvm_pae_enabled(v) || hvm_long_mode_enabled(v)) )
*edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
break;
+
+ case 0x80000008:
+ count = cpuid_eax(0x80000008);
+ count = (count >> 16) & 0xff ?: count & 0xff;
+ if ( (*eax & 0xff) > count )
+ *eax = (*eax & ~0xff) | count;
+
+ hvm_cpuid(1, NULL, NULL, NULL, &_edx);
+ count = _edx & (cpufeat_mask(X86_FEATURE_PAE) |
+ cpufeat_mask(X86_FEATURE_PSE36)) ? 36 : 32;
+ if ( (*eax & 0xff) < count )
+ *eax = (*eax & ~0xff) | count;
+
+ hvm_cpuid(0x80000001, NULL, NULL, NULL, &_edx);
+ *eax = (*eax & ~0xffff00) | (_edx & cpufeat_mask(X86_FEATURE_LM)
+ ? 0x3000 : 0x2000);
+ break;
}
}
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -145,7 +145,7 @@ bool_t is_var_mtrr_overlapped(struct mtr
static int hvm_mtrr_pat_init(void)
{
- unsigned int i, j, phys_addr;
+ unsigned int i, j;
memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
for ( i = 0; i < MTRR_NUM_TYPES; i++ )
@@ -172,11 +172,7 @@ static int hvm_mtrr_pat_init(void)
}
}
- phys_addr = 36;
- if ( cpuid_eax(0x80000000) >= 0x80000008 )
- phys_addr = (uint8_t)cpuid_eax(0x80000008);
-
- size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+ size_or_mask = ~((1 << (paddr_bits - PAGE_SHIFT)) - 1);
return 0;
}
@@ -455,7 +451,7 @@ bool_t mtrr_fix_range_msr_set(struct mtr
bool_t mtrr_var_range_msr_set(
struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content)
{
- uint32_t index, type, phys_addr, eax, ebx, ecx, edx;
+ uint32_t index, type, phys_addr, eax;
uint64_t msr_mask;
uint64_t *var_range_base = (uint64_t*)m->var_ranges;
@@ -468,16 +464,21 @@ bool_t mtrr_var_range_msr_set(
type == 4 || type == 5 || type == 6)) )
return 0;
- phys_addr = 36;
- domain_cpuid(d, 0x80000000, 0, &eax, &ebx, &ecx, &edx);
- if ( eax >= 0x80000008 )
+ if ( d == current->domain )
{
- domain_cpuid(d, 0x80000008, 0, &eax, &ebx, &ecx, &edx);
- phys_addr = (uint8_t)eax;
+ phys_addr = 36;
+ hvm_cpuid(0x80000000, &eax, NULL, NULL, NULL);
+ if ( eax >= 0x80000008 )
+ {
+ hvm_cpuid(0x80000008, &eax, NULL, NULL, NULL);
+ phys_addr = (uint8_t)eax;
+ }
}
+ else
+ phys_addr = paddr_bits;
msr_mask = ~((((uint64_t)1) << phys_addr) - 1);
msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL;
- if ( unlikely(msr_content && (msr_content & msr_mask)) )
+ if ( unlikely(msr_content & msr_mask) )
{
HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
msr_content);

View File

@ -1,34 +0,0 @@
# Commit fce79f8ce91dc45f3a4d699ee67c49e6cbeb1197
# Date 2014-04-01 16:49:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VMX: fix PAT value seen by guest
The XSA-60 fixes introduced a window during which the guest PAT gets
forced to all zeros. This shouldn't be visible to the guest. Therefore
we need to intercept PAT MSR accesses during that time period.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Liu Jinsong <jinsong.liu@intel.com>
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -984,6 +984,8 @@ static void vmx_handle_cd(struct vcpu *v
vmx_get_guest_pat(v, pat);
vmx_set_guest_pat(v, uc_pat);
+ vmx_enable_intercept_for_msr(v, MSR_IA32_CR_PAT,
+ MSR_TYPE_R | MSR_TYPE_W);
wbinvd(); /* flush possibly polluted cache */
hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
@@ -993,6 +995,9 @@ static void vmx_handle_cd(struct vcpu *v
{
v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
vmx_set_guest_pat(v, *pat);
+ if ( !iommu_enabled || iommu_snoop )
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT,
+ MSR_TYPE_R | MSR_TYPE_W);
hvm_asid_flush_vcpu(v); /* no need to flush cache */
}
}

View File

@ -1,38 +0,0 @@
# Commit 088ee1d47b65d6bb92de61b404805f4ca92e3240
# Date 2014-04-03 12:08:43 +0100
# Author Jan Beulich <JBeulich@suse.com>
# Committer Tim Deegan <tim@xen.org>
x86/mm: fix checks against max_mapped_pfn
This value is an inclusive one, i.e. this fixes an off-by-one in memory
sharing and an off-by-two in shadow code.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1268,8 +1268,8 @@ int relinquish_shared_pages(struct domai
return 0;
p2m_lock(p2m);
- for (gfn = p2m->next_shared_gfn_to_relinquish;
- gfn < p2m->max_mapped_pfn; gfn++ )
+ for ( gfn = p2m->next_shared_gfn_to_relinquish;
+ gfn <= p2m->max_mapped_pfn; gfn++ )
{
p2m_access_t a;
p2m_type_t t;
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3489,9 +3489,7 @@ int shadow_track_dirty_vram(struct domai
struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
struct p2m_domain *p2m = p2m_get_hostp2m(d);
- if (end_pfn < begin_pfn
- || begin_pfn > p2m->max_mapped_pfn
- || end_pfn >= p2m->max_mapped_pfn)
+ if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
return -EINVAL;
/* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */

View File

@ -1,31 +0,0 @@
# Commit 70e79fad6dc6f533ff83ee23b8d13de5a696d896
# Date 2014-04-09 16:13:25 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/AMD: feature masking is unavailable on Fam11
Reported-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -107,6 +107,10 @@ static void __devinit set_cpuidmask(cons
ASSERT((status == not_parsed) && (smp_processor_id() == 0));
status = no_mask;
+ /* Fam11 doesn't support masking at all. */
+ if (c->x86 == 0x11)
+ return;
+
if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx)) {
feat_ecx = opt_cpuid_mask_ecx;
@@ -176,7 +180,6 @@ static void __devinit set_cpuidmask(cons
extfeat_ecx, extfeat_edx);
setmask:
- /* FIXME check if processor supports CPUID masking */
/* AMD processors prior to family 10h required a 32-bit password */
if (c->x86 >= 0x10) {
wrmsr(MSR_K8_FEATURE_MASK, feat_edx, feat_ecx);

View File

@ -1,59 +0,0 @@
# Commit 8f7f6ab879a9ad9d2bf66b8c6b46a0653086b79f
# Date 2014-04-11 11:25:56 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: eliminate 64k ports limitation
The introduction of FIFO event channels claimed to support over 100k
ports, but failed to widen a number of 16-bit variables/operations.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -275,12 +275,12 @@ static long evtchn_bind_interdomain(evtc
goto out;
lchn->u.interdomain.remote_dom = rd;
- lchn->u.interdomain.remote_port = (u16)rport;
+ lchn->u.interdomain.remote_port = rport;
lchn->state = ECS_INTERDOMAIN;
evtchn_port_init(ld, lchn);
rchn->u.interdomain.remote_dom = ld;
- rchn->u.interdomain.remote_port = (u16)lport;
+ rchn->u.interdomain.remote_port = lport;
rchn->state = ECS_INTERDOMAIN;
/*
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -86,13 +86,13 @@ struct evtchn
domid_t remote_domid;
} unbound; /* state == ECS_UNBOUND */
struct {
- u16 remote_port;
+ evtchn_port_t remote_port;
struct domain *remote_dom;
} interdomain; /* state == ECS_INTERDOMAIN */
struct {
- u16 irq;
- u16 next_port;
- u16 prev_port;
+ u32 irq;
+ evtchn_port_t next_port;
+ evtchn_port_t prev_port;
} pirq; /* state == ECS_PIRQ */
u16 virq; /* state == ECS_VIRQ */
} u;
@@ -190,7 +190,7 @@ struct vcpu
atomic_t pause_count;
/* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
- u16 virq_to_evtchn[NR_VIRQS];
+ evtchn_port_t virq_to_evtchn[NR_VIRQS];
spinlock_t virq_lock;
/* Bitmask of CPUs on which this VCPU may run. */

View File

@ -1,24 +0,0 @@
# Commit 1ca73aaf51eba14256794bf045c2eb01e88e1324
# Date 2014-04-14 12:50:56 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/nested HAP: don't BUG() on legitimate error
p2m_set_entry() can fail without there being a bug in the code - crash
the domain rather than the host in that case.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/hap/nested_hap.c
+++ b/xen/arch/x86/mm/hap/nested_hap.c
@@ -133,7 +133,7 @@ nestedhap_fix_p2m(struct vcpu *v, struct
gdprintk(XENLOG_ERR,
"failed to set entry for %#"PRIx64" -> %#"PRIx64"\n",
L2_gpa, L0_gpa);
- BUG();
+ domain_crash(p2m->domain);
}
}

View File

@ -1,23 +0,0 @@
# Commit c82fbfe6ec8be597218eb943641d1f7a81c4c01e
# Date 2014-04-14 15:14:47 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HAP: also flush TLB when altering a present 1G or intermediate entry
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -711,9 +711,8 @@ hap_write_p2m_entry(struct vcpu *v, unsi
}
safe_write_pte(p, new);
- if ( (old_flags & _PAGE_PRESENT)
- && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) )
- flush_tlb_mask(d->domain_dirty_cpumask);
+ if ( old_flags & _PAGE_PRESENT )
+ flush_tlb_mask(d->domain_dirty_cpumask);
paging_unlock(d);

View File

@ -1,21 +0,0 @@
# Commit 815dc9f1dba5782dcef77d8a002a11f5b1e5cc37
# Date 2014-04-23 15:07:11 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: add missing break in dom0_pit_access()
Coverity ID 1203045
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1632,6 +1632,7 @@ int dom0_pit_access(struct ioreq *ioreq)
outb(ioreq->data, PIT_MODE);
return 1;
}
+ break;
case 0x61:
if ( ioreq->dir == IOREQ_READ )

View File

@ -1,215 +0,0 @@
References: bnc#826717 CVE-2013-3495 XSA-59
# Commit d061d200eb92bcb1d86f9b55c6de73e35ce63fdf
# Date 2014-04-25 12:11:55 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: suppress UR signaling for server chipsets
Unsupported Requests can be signaled for malformed writes to the MSI
address region, e.g. due to buggy or malicious DMA set up to that
region. These should normally result in IOMMU faults, but don't on
the server chipsets dealt with here.
IDs 0xe00, 0xe01, and 0xe04 ... 0xe0b (Ivytown) aren't needed here -
Intel confirmed the issue to be fixed in hardware there.
This is CVE-2013-3495 / XSA-59.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Don Dugger <donald.d.dugger@intel.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -27,6 +27,7 @@
#include <xen/softirq.h>
#include <xen/time.h>
#include <xen/pci.h>
+#include <xen/pci_ids.h>
#include <xen/pci_regs.h>
#include <xen/keyhandler.h>
#include <asm/msi.h>
@@ -390,12 +391,68 @@ void __init pci_vtd_quirk(struct pci_dev
int bus = pdev->bus;
int dev = PCI_SLOT(pdev->devfn);
int func = PCI_FUNC(pdev->devfn);
- int id, val;
+ int pos;
+ u32 val;
- id = pci_conf_read32(seg, bus, dev, func, 0);
- if ( id == 0x342e8086 || id == 0x3c288086 )
+ if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
+ PCI_VENDOR_ID_INTEL )
+ return;
+
+ switch ( pci_conf_read16(seg, bus, dev, func, PCI_DEVICE_ID) )
{
+ case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */
+ case 0x3c28: /* Sandybridge */
val = pci_conf_read32(seg, bus, dev, func, 0x1AC);
pci_conf_write32(seg, bus, dev, func, 0x1AC, val | (1 << 31));
+ break;
+
+ /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */
+ case 0x3400 ... 0x3407: /* host bridges */
+ case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */
+ /* JasperForest (Intel Xeon Processor C5500/C3500 */
+ case 0x3700 ... 0x370f: /* host bridges */
+ case 0x3720 ... 0x3724: /* root ports */
+ /* Sandybridge-EP (Romley) */
+ case 0x3c00: /* host bridge */
+ case 0x3c01 ... 0x3c0b: /* root ports */
+ pos = pci_find_ext_capability(seg, bus, pdev->devfn,
+ PCI_EXT_CAP_ID_ERR);
+ if ( !pos )
+ {
+ pos = pci_find_ext_capability(seg, bus, pdev->devfn,
+ PCI_EXT_CAP_ID_VNDR);
+ while ( pos )
+ {
+ val = pci_conf_read32(seg, bus, dev, func, pos + PCI_VNDR_HEADER);
+ if ( PCI_VNDR_HEADER_ID(val) == 4 && PCI_VNDR_HEADER_REV(val) == 1 )
+ {
+ pos += PCI_VNDR_HEADER;
+ break;
+ }
+ pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos,
+ PCI_EXT_CAP_ID_VNDR);
+ }
+ }
+ if ( !pos )
+ {
+ printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER capability?\n",
+ seg, bus, dev, func);
+ break;
+ }
+
+ val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK);
+ pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
+ val | PCI_ERR_UNC_UNSUP);
+ val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
+ pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
+ val | PCI_ERR_COR_ADV_NFAT);
+
+ /* XPUNCERRMSK Send Completion with Unsupported Request */
+ val = pci_conf_read32(seg, bus, dev, func, 0x20c);
+ pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4));
+
+ printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
+ seg, bus, dev, func);
+ break;
}
}
--- a/xen/drivers/pci/pci.c
+++ b/xen/drivers/pci/pci.c
@@ -66,23 +66,33 @@ int pci_find_next_cap(u16 seg, u8 bus, u
/**
* pci_find_ext_capability - Find an extended capability
- * @dev: PCI device to query
+ * @seg/@bus/@devfn: PCI device to query
* @cap: capability code
*
* Returns the address of the requested extended capability structure
* within the device's PCI configuration space or 0 if the device does
- * not support it. Possible values for @cap:
- *
- * %PCI_EXT_CAP_ID_ERR Advanced Error Reporting
- * %PCI_EXT_CAP_ID_VC Virtual Channel
- * %PCI_EXT_CAP_ID_DSN Device Serial Number
- * %PCI_EXT_CAP_ID_PWR Power Budgeting
+ * not support it.
*/
int pci_find_ext_capability(int seg, int bus, int devfn, int cap)
{
+ return pci_find_next_ext_capability(seg, bus, devfn, 0, cap);
+}
+
+/**
+ * pci_find_next_ext_capability - Find another extended capability
+ * @seg/@bus/@devfn: PCI device to query
+ * @pos: starting position
+ * @cap: capability code
+ *
+ * Returns the address of the requested extended capability structure
+ * within the device's PCI configuration space or 0 if the device does
+ * not support it.
+ */
+int pci_find_next_ext_capability(int seg, int bus, int devfn, int start, int cap)
+{
u32 header;
int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */
- int pos = 0x100;
+ int pos = max(start, 0x100);
header = pci_conf_read32(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
@@ -92,9 +102,10 @@ int pci_find_ext_capability(int seg, int
*/
if ( (header == 0) || (header == -1) )
return 0;
+ ASSERT(start != pos || PCI_EXT_CAP_ID(header) == cap);
while ( ttl-- > 0 ) {
- if ( PCI_EXT_CAP_ID(header) == cap )
+ if ( PCI_EXT_CAP_ID(header) == cap && pos != start )
return pos;
pos = PCI_EXT_CAP_NEXT(header);
if ( pos < 0x100 )
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -140,6 +140,7 @@ int pci_mmcfg_write(unsigned int seg, un
int pci_find_cap_offset(u16 seg, u8 bus, u8 dev, u8 func, u8 cap);
int pci_find_next_cap(u16 seg, u8 bus, unsigned int devfn, u8 pos, int cap);
int pci_find_ext_capability(int seg, int bus, int devfn, int cap);
+int pci_find_next_ext_capability(int seg, int bus, int devfn, int pos, int cap);
const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus,
unsigned int *dev, unsigned int *func);
--- /dev/null
+++ b/xen/include/xen/pci_ids.h
@@ -0,0 +1,9 @@
+#define PCI_VENDOR_ID_AMD 0x1022
+
+#define PCI_VENDOR_ID_NVIDIA 0x10de
+
+#define PCI_VENDOR_ID_OXSEMI 0x1415
+
+#define PCI_VENDOR_ID_BROADCOM 0x14e4
+
+#define PCI_VENDOR_ID_INTEL 0x8086
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -431,6 +431,7 @@
#define PCI_EXT_CAP_ID_VC 2
#define PCI_EXT_CAP_ID_DSN 3
#define PCI_EXT_CAP_ID_PWR 4
+#define PCI_EXT_CAP_ID_VNDR 11
#define PCI_EXT_CAP_ID_ACS 13
#define PCI_EXT_CAP_ID_ARI 14
#define PCI_EXT_CAP_ID_ATS 15
@@ -459,6 +460,7 @@
#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */
#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */
#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */
+#define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */
#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */
/* Same bits as above */
#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */
@@ -510,6 +512,12 @@
#define PCI_PWR_CAP 12 /* Capability */
#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
+/* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */
+#define PCI_VNDR_HEADER 4 /* Vendor-Specific Header */
+#define PCI_VNDR_HEADER_ID(x) ((x) & 0xffff)
+#define PCI_VNDR_HEADER_REV(x) (((x) >> 16) & 0xf)
+#define PCI_VNDR_HEADER_LEN(x) (((x) >> 20) & 0xfff)
+
/*
* Hypertransport sub capability types
*

View File

@ -1,66 +0,0 @@
References: bnc#826717 CVE-2013-3495 XSA-59
# Commit d6cb14b34ffc2a830022d059f1aa22bf19dcf55f
# Date 2014-04-25 12:12:38 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: suppress UR signaling for desktop chipsets
Unsupported Requests can be signaled for malformed writes to the MSI
address region, e.g. due to buggy or malicious DMA set up to that
region. These should normally result in IOMMU faults, but don't on
the desktop chipsets dealt with here.
This is CVE-2013-3495 / XSA-59.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Don Dugger <donald.d.dugger@intel.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -393,6 +393,8 @@ void __init pci_vtd_quirk(struct pci_dev
int func = PCI_FUNC(pdev->devfn);
int pos;
u32 val;
+ u64 bar;
+ paddr_t pa;
if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
PCI_VENDOR_ID_INTEL )
@@ -454,5 +456,33 @@ void __init pci_vtd_quirk(struct pci_dev
printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
seg, bus, dev, func);
break;
+
+ case 0x100: case 0x104: case 0x108: /* Sandybridge */
+ case 0x150: case 0x154: case 0x158: /* Ivybridge */
+ case 0xa04: /* Haswell ULT */
+ case 0xc00: case 0xc04: case 0xc08: /* Haswell */
+ bar = pci_conf_read32(seg, bus, dev, func, 0x6c);
+ bar = (bar << 32) | pci_conf_read32(seg, bus, dev, func, 0x68);
+ pa = bar & 0x7fffff000; /* bits 12...38 */
+ if ( (bar & 1) && pa &&
+ page_is_ram_type(paddr_to_pfn(pa), RAM_TYPE_RESERVED) )
+ {
+ u32 __iomem *va = ioremap(pa, PAGE_SIZE);
+
+ if ( va )
+ {
+ __set_bit(0x1c8 * 8 + 20, va);
+ iounmap(va);
+ printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
+ seg, bus, dev, func);
+ }
+ else
+ printk(XENLOG_ERR "Could not map %"PRIpaddr" for %04x:%02x:%02x.%u\n",
+ pa, seg, bus, dev, func);
+ }
+ else
+ printk(XENLOG_WARNING "Bogus DMIBAR %#"PRIx64" on %04x:%02x:%02x.%u\n",
+ bar, seg, bus, dev, func);
+ break;
}
}

View File

@ -1,187 +0,0 @@
# Commit 1a2a390a560e8319a6be98c7ab6cfaebd230f67e
# Date 2014-04-25 12:13:31 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
passthrough: allow to suppress SERR and PERR signaling altogether
This is just to have a workaround at hand in case other chipsets (not
covered by the previous two patches) also have similar issues.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Don Dugger <donald.d.dugger@intel.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -772,6 +772,14 @@ Defaults to booting secondary processors
Default: `on`
+### pci
+> `= {no-}serr | {no-}perr`
+
+Disable signaling of SERR (system errors) and/or PERR (parity errors)
+on all PCI devices.
+
+Default: Signaling left as set by firmware.
+
### pci-phantom
> `=[<seg>:]<bus>:<device>,<stride>`
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -154,6 +154,115 @@ static void __init parse_phantom_dev(cha
}
custom_param("pci-phantom", parse_phantom_dev);
+static u16 __read_mostly command_mask;
+static u16 __read_mostly bridge_ctl_mask;
+
+/*
+ * The 'pci' parameter controls certain PCI device aspects.
+ * Optional comma separated value may contain:
+ *
+ * serr don't suppress system errors (default)
+ * no-serr suppress system errors
+ * perr don't suppress parity errors (default)
+ * no-perr suppress parity errors
+ */
+static void __init parse_pci_param(char *s)
+{
+ char *ss;
+
+ do {
+ bool_t on = !!strncmp(s, "no-", 3);
+ u16 cmd_mask = 0, brctl_mask = 0;
+
+ if ( !on )
+ s += 3;
+
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ if ( !strcmp(s, "serr") )
+ {
+ cmd_mask = PCI_COMMAND_SERR;
+ brctl_mask = PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_DTMR_SERR;
+ }
+ else if ( !strcmp(s, "perr") )
+ {
+ cmd_mask = PCI_COMMAND_PARITY;
+ brctl_mask = PCI_BRIDGE_CTL_PARITY;
+ }
+
+ if ( on )
+ {
+ command_mask &= ~cmd_mask;
+ bridge_ctl_mask &= ~brctl_mask;
+ }
+ else
+ {
+ command_mask |= cmd_mask;
+ bridge_ctl_mask |= brctl_mask;
+ }
+
+ s = ss + 1;
+ } while ( ss );
+}
+custom_param("pci", parse_pci_param);
+
+static void check_pdev(const struct pci_dev *pdev)
+{
+#define PCI_STATUS_CHECK \
+ (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | \
+ PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
+ PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
+ u16 seg = pdev->seg;
+ u8 bus = pdev->bus;
+ u8 dev = PCI_SLOT(pdev->devfn);
+ u8 func = PCI_FUNC(pdev->devfn);
+ u16 val;
+
+ if ( command_mask )
+ {
+ val = pci_conf_read16(seg, bus, dev, func, PCI_COMMAND);
+ if ( val & command_mask )
+ pci_conf_write16(seg, bus, dev, func, PCI_COMMAND,
+ val & ~command_mask);
+ val = pci_conf_read16(seg, bus, dev, func, PCI_STATUS);
+ if ( val & PCI_STATUS_CHECK )
+ {
+ printk(XENLOG_INFO "%04x:%02x:%02x.%u status %04x -> %04x\n",
+ seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
+ pci_conf_write16(seg, bus, dev, func, PCI_STATUS,
+ val & PCI_STATUS_CHECK);
+ }
+ }
+
+ switch ( pci_conf_read8(seg, bus, dev, func, PCI_HEADER_TYPE) & 0x7f )
+ {
+ case PCI_HEADER_TYPE_BRIDGE:
+ if ( !bridge_ctl_mask )
+ break;
+ val = pci_conf_read16(seg, bus, dev, func, PCI_BRIDGE_CONTROL);
+ if ( val & bridge_ctl_mask )
+ pci_conf_write16(seg, bus, dev, func, PCI_BRIDGE_CONTROL,
+ val & ~bridge_ctl_mask);
+ val = pci_conf_read16(seg, bus, dev, func, PCI_SEC_STATUS);
+ if ( val & PCI_STATUS_CHECK )
+ {
+ printk(XENLOG_INFO
+ "%04x:%02x:%02x.%u secondary status %04x -> %04x\n",
+ seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
+ pci_conf_write16(seg, bus, dev, func, PCI_SEC_STATUS,
+ val & PCI_STATUS_CHECK);
+ }
+ break;
+
+ case PCI_HEADER_TYPE_CARDBUS:
+ /* TODO */
+ break;
+ }
+#undef PCI_STATUS_CHECK
+}
+
static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
{
struct pci_dev *pdev;
@@ -252,6 +361,8 @@ static struct pci_dev *alloc_pdev(struct
break;
}
+ check_pdev(pdev);
+
return pdev;
}
@@ -566,6 +677,8 @@ int pci_add_device(u16 seg, u8 bus, u8 d
seg, bus, slot, func, ctrl);
}
+ check_pdev(pdev);
+
ret = 0;
if ( !pdev->domain )
{
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -125,7 +125,7 @@
#define PCI_IO_RANGE_TYPE_16 0x00
#define PCI_IO_RANGE_TYPE_32 0x01
#define PCI_IO_RANGE_MASK (~0x0fUL)
-#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */
+#define PCI_SEC_STATUS 0x1e /* Secondary status register */
#define PCI_MEMORY_BASE 0x20 /* Memory range behind */
#define PCI_MEMORY_LIMIT 0x22
#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
@@ -152,6 +152,7 @@
#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
+#define PCI_BRIDGE_CTL_DTMR_SERR 0x800 /* SERR upon discard timer expiry */
/* Header type 2 (CardBus bridges) */
#define PCI_CB_CAPABILITY_LIST 0x14

View File

@ -1,43 +0,0 @@
# Commit 31ee951a3bee6e7cc21f94f900fe989e3701a79a
# Date 2014-04-28 12:47:24 +0200
# Author Feng Wu <feng.wu@intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: correct the SMEP logic for HVM_CR0_GUEST_RESERVED_BITS
When checking the SMEP feature for HVM guests, we should check the
VCPU instead of the host CPU.
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -347,6 +347,19 @@ static inline int hvm_event_pending(stru
return hvm_funcs.event_pending(v);
}
+static inline bool_t hvm_vcpu_has_smep(void)
+{
+ unsigned int eax, ebx;
+
+ hvm_cpuid(0, &eax, NULL, NULL, NULL);
+
+ if ( eax < 7 )
+ return 0;
+
+ hvm_cpuid(7, NULL, &ebx, NULL, NULL);
+ return !!(ebx & cpufeat_mask(X86_FEATURE_SMEP));
+}
+
/* These reserved bits in lower 32 remain 0 after any load of CR0 */
#define HVM_CR0_GUEST_RESERVED_BITS \
(~((unsigned long) \
@@ -366,7 +379,7 @@ static inline int hvm_event_pending(stru
X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \
- (cpu_has_smep ? X86_CR4_SMEP : 0) | \
+ (hvm_vcpu_has_smep() ? X86_CR4_SMEP : 0) | \
(cpu_has_fsgsbase ? X86_CR4_FSGSBASE : 0) | \
((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
? X86_CR4_VMXE : 0) | \

View File

@ -1,42 +0,0 @@
References: bnc#875668 CVE-2014-3124 XSA-92
# Commit 83bb5eb4d340acebf27b34108fb1dae062146a68
# Date 2014-04-29 15:11:31 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: restrict HVMOP_set_mem_type
Permitting arbitrary type changes here has the potential of creating
present P2M (and hence EPT/NPT/IOMMU) entries pointing to an invalid
MFN (INVALID_MFN truncated to the respective hardware structure field's
width). This would become a problem the latest when something real sat
at the end of the physical address space; I'm suspecting though that
other things might break with such bogus entries.
Along with that drop a bogus (and otherwise becoming stale) log
message.
Afaict the similar operation in p2m_set_mem_access() is safe.
This is CVE-2014-3124 / XSA-92.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4410,12 +4410,10 @@ long do_hvm_op(unsigned long op, XEN_GUE
rc = -EINVAL;
goto param_fail4;
}
- if ( p2m_is_grant(t) )
+ if ( !p2m_is_ram(t) &&
+ (!p2m_is_hole(t) || a.hvmmem_type != HVMMEM_mmio_dm) )
{
put_gfn(d, pfn);
- gdprintk(XENLOG_WARNING,
- "type for pfn %#lx changed to grant while "
- "we were working?\n", pfn);
goto param_fail4;
}
else

View File

@ -1,27 +0,0 @@
# Commit 16e2a7596e9fc86881c73cef57602b2c88155528
# Date 2014-05-02 11:46:32 +0200
# Author Paul Durrant <paul.durrant@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
hvm_set_ioreq_page() releases wrong page in error path
The function calls prepare_ring_for_helper() to acquire a mapping for the
given gmfn, then checks (under lock) to see if the ioreq page is already
set up but, if it is, the function then releases the in-use ioreq page
mapping on the error path rather than the one it just acquired. This patch
fixes this bug.
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -478,7 +478,7 @@ static int hvm_set_ioreq_page(
if ( (iorp->va != NULL) || d->is_dying )
{
- destroy_ring_for_helper(&iorp->va, iorp->page);
+ destroy_ring_for_helper(&va, page);
spin_unlock(&iorp->lock);
return -EINVAL;
}

View File

@ -1,81 +0,0 @@
# Commit 4c0ff6bd54b5a67f8f820f9ed0a89a79f1a26a1c
# Date 2014-05-02 12:09:03 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: fix guest CPUID handling
The way XEN_DOMCTL_set_cpuid got handled so far allowed for surprises
to the caller. With this set of operations
- set leaf A (using array index 0)
- set leaf B (using array index 1)
- clear leaf A (clearing array index 0)
- set leaf B (using array index 0)
- clear leaf B (clearing array index 0)
the entry for leaf B at array index 1 would still be in place, while
the caller would expect it to be cleared.
While looking at the use sites of d->arch.cpuid[] I also noticed that
the allocation of the array needlessly uses the zeroing form - the
relevant fields of the array elements get set in a loop immediately
following the allocation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -553,7 +553,7 @@ int arch_domain_create(struct domain *d,
if ( !is_idle_domain(d) )
{
- d->arch.cpuids = xzalloc_array(cpuid_input_t, MAX_CPUID_INPUT);
+ d->arch.cpuids = xmalloc_array(cpuid_input_t, MAX_CPUID_INPUT);
rc = -ENOMEM;
if ( d->arch.cpuids == NULL )
goto fail;
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -920,7 +920,7 @@ long arch_do_domctl(
case XEN_DOMCTL_set_cpuid:
{
xen_domctl_cpuid_t *ctl = &domctl->u.cpuid;
- cpuid_input_t *cpuid = NULL;
+ cpuid_input_t *cpuid, *unused = NULL;
int i;
for ( i = 0; i < MAX_CPUID_INPUT; i++ )
@@ -928,7 +928,11 @@ long arch_do_domctl(
cpuid = &d->arch.cpuids[i];
if ( cpuid->input[0] == XEN_CPUID_INPUT_UNUSED )
- break;
+ {
+ if ( !unused )
+ unused = cpuid;
+ continue;
+ }
if ( (cpuid->input[0] == ctl->input[0]) &&
((cpuid->input[1] == XEN_CPUID_INPUT_UNUSED) ||
@@ -936,15 +940,12 @@ long arch_do_domctl(
break;
}
- if ( i == MAX_CPUID_INPUT )
- {
- ret = -ENOENT;
- }
+ if ( i < MAX_CPUID_INPUT )
+ *cpuid = *ctl;
+ else if ( unused )
+ *unused = *ctl;
else
- {
- memcpy(cpuid, ctl, sizeof(cpuid_input_t));
- ret = 0;
- }
+ ret = -ENOENT;
}
break;

View File

@ -1,31 +0,0 @@
# Commit 99c03bc6a1f8c6722926d2db781ece045f9d09ae
# Date 2014-05-12 11:59:19 +0200
# Author Edmund H White <edmund.h.white@intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: load current_vmcs only when it exists
There may not have valid vmcs on current CPU. So only load it when it exists.
This original fixing is from Edmud <edmund.h.white@intel.com>.
Signed-off-by: Edmund H White <edmund.h.white@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -828,8 +828,12 @@ void virtual_vmcs_enter(void *vvmcs)
void virtual_vmcs_exit(void *vvmcs)
{
+ struct vmcs_struct *cur = this_cpu(current_vmcs);
+
__vmpclear(pfn_to_paddr(domain_page_map_to_mfn(vvmcs)));
- __vmptrld(virt_to_maddr(this_cpu(current_vmcs)));
+ if ( cur )
+ __vmptrld(virt_to_maddr(cur));
+
}
u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding)

View File

@ -1,40 +0,0 @@
Subject: tools/pygrub: Fix error handling if no valid partitions are found
From: Andrew Cooper andrew.cooper3@citrix.com Sat May 10 02:18:33 2014 +0100
Date: Mon May 12 15:52:43 2014 +0100:
Git: d75215805ce6ed20b3807955fab6a7f7a3368bee
If no partitions at all are found, pygrub never creates the name 'fs',
resulting in a NameError indicating the lack of fs, rather than a
RuntimeError explaining that no partitions were found.
Set fs to None right at the start, and use the pythonic idiom "if fs is None:"
to protect against otherwise valid values for fs which compare equal to
0/False.
Reported-by: Sven Köhler <sven.koehler@gmail.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Index: xen-4.4.0-testing/tools/pygrub/src/pygrub
===================================================================
--- xen-4.4.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.4.0-testing/tools/pygrub/src/pygrub
@@ -760,7 +760,7 @@ if __name__ == "__main__":
usage()
sys.exit(1)
file = args[0]
-
+ fs = None
output = None
entry = None
interactive = True
@@ -882,7 +882,7 @@ if __name__ == "__main__":
sys.exit(0)
# Did looping through partitions find us a kernel?
- if not fs:
+ if fs is None:
raise RuntimeError, "Unable to find partition containing kernel"
bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel",

View File

@ -1,26 +0,0 @@
# Commit a07084525c126c596326dc1442dd218f522f51b4
# Date 2014-05-14 10:54:39 +0200
# Author Kai Huang <kai.huang@linux.intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MCE: bypass uninitialized vcpu in vMCE injection
Dom0 may bring up less number of vCPUs than xen hypervisor actually created for
it, and in this case, on Intel platform, vMCE injection to dom0 will fail due to
injecting vMCE to uninitialized vcpu, and cause dom0 crash.
Signed-off-by: Kai Huang <kai.huang@linux.intel.com>
Acked-by: Christoph Egger <chegger@amazon.de>
--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -357,6 +357,10 @@ int inject_vmce(struct domain *d, int vc
if ( vcpu != VMCE_INJECT_BROADCAST && vcpu != v->vcpu_id )
continue;
+ /* Don't inject to uninitialized VCPU. */
+ if ( !v->is_initialised )
+ continue;
+
if ( (has_hvm_container_domain(d) ||
guest_has_trap_callback(d, v->vcpu_id, TRAP_machine_check)) &&
!test_and_set_bool(v->mce_pending) )

View File

@ -1,83 +0,0 @@
# Commit 5786718fbaafbe47d72cc1512cd93de79b8fc2fa
# Date 2014-05-20 15:53:20 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: apply quirks at device setup time rather than only at boot
Accessing extended config space may not be possible at boot time, e.g.
when the memory space used by MMCFG is reserved only via ACPI tables,
but not in the E820/UEFI memory maps (which we need Dom0 to tell us
about). Consequently the change here still leaves the issue unaddressed
for systems where the extended config space remains inaccessible (due
to firmware bugs, i.e. not properly reserving the address space of
those regions).
With the respective messages now potentially getting logged more than
once, we ought to consider whether we should issue them only if we in
fact were required to do any masking (i.e. if the relevant mask bits
weren't already set).
This is CVE-2013-3495 / XSA-59.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Acked-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -99,7 +99,7 @@ void platform_quirks_init(void);
void vtd_ops_preamble_quirk(struct iommu* iommu);
void vtd_ops_postamble_quirk(struct iommu* iommu);
void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map);
-void pci_vtd_quirk(struct pci_dev *pdev);
+void pci_vtd_quirk(const struct pci_dev *);
int platform_supports_intremap(void);
int platform_supports_x2apic(void);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1483,6 +1483,9 @@ static int domain_context_mapping(
break;
}
+ if ( !ret && devfn == pdev->devfn )
+ pci_vtd_quirk(pdev);
+
return ret;
}
@@ -1922,6 +1925,8 @@ static int intel_iommu_enable_device(str
struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
int ret = drhd ? ats_device(pdev, drhd) : -ENODEV;
+ pci_vtd_quirk(pdev);
+
if ( ret <= 0 )
return ret;
@@ -1993,12 +1998,7 @@ static int intel_iommu_remove_device(u8
static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
- int err;
-
- err = domain_context_mapping(pdev->domain, devfn, pdev);
- if ( !err && devfn == pdev->devfn )
- pci_vtd_quirk(pdev);
- return err;
+ return domain_context_mapping(pdev->domain, devfn, pdev);
}
void clear_fault_bits(struct iommu *iommu)
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -385,7 +385,7 @@ void me_wifi_quirk(struct domain *domain
* - This can cause system failure upon non-fatal VT-d faults
* - Potential security issue if malicious guest trigger VT-d faults
*/
-void __init pci_vtd_quirk(struct pci_dev *pdev)
+void pci_vtd_quirk(const struct pci_dev *pdev)
{
int seg = pdev->seg;
int bus = pdev->bus;

View File

@ -1,57 +0,0 @@
# Commit 04734664eb20c3bf239e473af182bb7ab901d779
# Date 2014-05-20 15:54:01 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: extend error report masking workaround to newer chipsets
Add two more PCI IDs to the set that has been taken care of with a
different workaround long before XSA-59, and (for constency with the
newer workarounds) log a message here too.
Also move the function wide comment to the cases it applies to; this
should really have been done by d061d200 ("VT-d: suppress UR signaling
for server chipsets").
This is CVE-2013-3495 / XSA-59.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Acked-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -379,12 +379,6 @@ void me_wifi_quirk(struct domain *domain
}
}
-/*
- * Mask reporting Intel VT-d faults to IOH core logic:
- * - Some platform escalates VT-d faults to platform errors
- * - This can cause system failure upon non-fatal VT-d faults
- * - Potential security issue if malicious guest trigger VT-d faults
- */
void pci_vtd_quirk(const struct pci_dev *pdev)
{
int seg = pdev->seg;
@@ -402,10 +396,20 @@ void pci_vtd_quirk(const struct pci_dev
switch ( pci_conf_read16(seg, bus, dev, func, PCI_DEVICE_ID) )
{
+ /*
+ * Mask reporting Intel VT-d faults to IOH core logic:
+ * - Some platform escalates VT-d faults to platform errors.
+ * - This can cause system failure upon non-fatal VT-d faults.
+ * - Potential security issue if malicious guest trigger VT-d faults.
+ */
+ case 0x0e28: /* Xeon-E5v2 (IvyBridge) */
case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */
+ case 0x3728: /* Xeon C5500/C3500 (JasperForest) */
case 0x3c28: /* Sandybridge */
val = pci_conf_read32(seg, bus, dev, func, 0x1AC);
pci_conf_write32(seg, bus, dev, func, 0x1AC, val | (1 << 31));
+ printk(XENLOG_INFO "Masked VT-d error signaling on %04x:%02x:%02x.%u\n",
+ seg, bus, dev, func);
break;
/* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */

View File

@ -1,53 +0,0 @@
# Commit bac6334b51d9bcfe57ecf4a4cb5288348fcf044a
# Date 2014-05-20 15:55:42 +0200
# Author Juergen Gross <juergen.gross@ts.fujitsu.com>
# Committer Jan Beulich <jbeulich@suse.com>
move domain to cpupool0 before destroying it
Currently when a domain is destroyed it is removed from the domain_list
before all of it's resources, including the cpupool membership, are freed.
This can lead to a situation where the domain is still member of a cpupool
without for_each_domain_in_cpupool() (or even for_each_domain()) being
able to find it any more. This in turn can result in rejection of removing
the last cpu from a cpupool, because there seems to be still a domain in
the cpupool, even if it can't be found by scanning through all domains.
This situation can be avoided by moving the domain to be destroyed to
cpupool0 first and then remove it from this cpupool BEFORE deleting it from
the domain_list. As cpupool0 is always active and a domain without any cpupool
membership is implicitly regarded as belonging to cpupool0, this poses no
problem.
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -539,6 +539,8 @@ int domain_kill(struct domain *d)
BUG_ON(rc != -EAGAIN);
break;
}
+ if ( sched_move_domain(d, cpupool0) )
+ return -EAGAIN;
for_each_vcpu ( d, v )
unmap_vcpu_info(v);
d->is_dying = DOMDYING_dead;
@@ -721,8 +723,6 @@ static void complete_domain_destroy(stru
sched_destroy_domain(d);
- cpupool_rm_domain(d);
-
/* Free page used by xen oprofile buffer. */
#ifdef CONFIG_XENOPROF
free_xenoprof_pages(d);
@@ -770,6 +770,8 @@ void domain_destroy(struct domain *d)
if ( _atomic_read(old) != 0 )
return;
+ cpupool_rm_domain(d);
+
/* Delete from task list and task hashtable. */
TRACE_1D(TRC_SCHED_DOM_REM, d->domain_id);
spin_lock(&domlist_update_lock);

View File

@ -1,46 +0,0 @@
# Commit fca69b1fc606ece62430076ca4a157e4bed749a8
# Date 2014-05-26 12:25:01 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
ACPI/ERST: fix table mapping
acpi_get_table(), when executed before reaching SYS_STATE_active, will
return a mapping valid only until the next invocation of that funciton.
Consequently storing the returned pointer for later use is incorrect.
Copy the logic used in VT-d's DMAR handling.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/acpi/apei/erst.c
+++ b/xen/drivers/acpi/apei/erst.c
@@ -760,21 +760,27 @@ int __init erst_init(void)
{
int rc = 0;
acpi_status status;
+ acpi_physical_address erst_addr;
+ acpi_native_uint erst_len;
struct apei_exec_context ctx;
if (acpi_disabled)
return -ENODEV;
- status = acpi_get_table(ACPI_SIG_ERST, 0,
- (struct acpi_table_header **)&erst_tab);
+ status = acpi_get_table_phys(ACPI_SIG_ERST, 0, &erst_addr, &erst_len);
if (status == AE_NOT_FOUND) {
printk(KERN_INFO "ERST table was not found\n");
return -ENODEV;
- } else if (ACPI_FAILURE(status)) {
+ }
+ if (ACPI_FAILURE(status)) {
const char *msg = acpi_format_exception(status);
printk(KERN_WARNING "Failed to get ERST table: %s\n", msg);
return -EINVAL;
}
+ map_pages_to_xen((unsigned long)__va(erst_addr), PFN_DOWN(erst_addr),
+ PFN_UP(erst_addr + erst_len) - PFN_DOWN(erst_addr),
+ PAGE_HYPERVISOR);
+ erst_tab = __va(erst_addr);
rc = erst_check_table(erst_tab);
if (rc) {

View File

@ -1,30 +0,0 @@
# Commit f8ecf31c31906552522c2a1b0d1cada07d78876e
# Date 2014-05-26 12:28:46 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: fix mask applied to DMIBAR in desktop chipset XSA-59 workaround
In commit ("VT-d: suppress UR signaling for desktop chipsets")
the mask applied to the value read from DMIBAR is to narrow, only the
comment accompanying it was correct. Fix that and tag the literal
number as "long" at once to avoid eventual compiler warnings.
The widest possible value so far is 39 bits; all chipsets covered here
but having less than this number of bits have the remaining bits marked
reserved (zero), and hence there's no need for making the mask chipset
specific.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -467,7 +467,7 @@ void pci_vtd_quirk(const struct pci_dev
case 0xc00: case 0xc04: case 0xc08: /* Haswell */
bar = pci_conf_read32(seg, bus, dev, func, 0x6c);
bar = (bar << 32) | pci_conf_read32(seg, bus, dev, func, 0x68);
- pa = bar & 0x7fffff000; /* bits 12...38 */
+ pa = bar & 0x7ffffff000UL; /* bits 12...38 */
if ( (bar & 1) && pa &&
page_is_ram_type(paddr_to_pfn(pa), RAM_TYPE_RESERVED) )
{

View File

@ -1,38 +0,0 @@
# Commit 6b4d71d028f445cba7426a144751fddc8bfdd67b
# Date 2014-05-28 09:50:33 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: don't free page table prematurely
iommu_merge_pages() still wants to look at the next level page table,
the TLB flush necessary before freeing too happens in that function,
and if it fails no free should happen at all. Hence the freeing must
be done after that function returned successfully, not before it's
being called.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -691,8 +691,6 @@ int amd_iommu_map_page(struct domain *d,
if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
gfn, mfn, merge_level) )
break;
- /* Deallocate lower level page table */
- free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn,
flags, merge_level) )
@@ -703,6 +701,9 @@ int amd_iommu_map_page(struct domain *d,
domain_crash(d);
return -EFAULT;
}
+
+ /* Deallocate lower level page table */
+ free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
}
out:

View File

@ -1,35 +0,0 @@
# Commit 50df6f7429f73364bbddb0970a3a34faa01a7790
# Date 2014-05-28 09:51:07 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: don't use VA for cache flush when also flushing TLB
Doing both flushes at once is a strong indication for the address
mapping to either having got dropped (in which case the cache flush,
when done via INVLPG, would fault) or its physical address having
changed (in which case the cache flush would end up being done on the
wrong address range). There is no adverse effect (other than the
obvious performance one) using WBINVD in this case regardless of the
range's size; only map_pages_to_xen() uses combined flushes at present.
This problem was observed with the 2nd try backport of d6cb14b3 ("VT-d:
suppress UR signaling for desktop chipsets") to 4.2 (where ioremap()
needs to be replaced with set_fixmap_nocache(); the now commented out
__set_fixmap(, 0, 0) there to undo the mapping resulted in the first of
the above two scenarios).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -152,7 +152,8 @@ void flush_area_local(const void *va, un
if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
sz = 1UL << (order + PAGE_SHIFT);
- if ( c->x86_clflush_size && c->x86_cache_size && sz &&
+ if ( !(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) &&
+ c->x86_clflush_size && c->x86_cache_size && sz &&
((sz >> 10) < c->x86_cache_size) )
{
va = (const void *)((unsigned long)va & ~(sz - 1));

View File

@ -1,29 +0,0 @@
# Commit 054b6dfb61eab00d86ddd5d0ac508f5302da0d52
# Date 2014-05-28 10:07:50 +0200
# Author Ross Lagerwall <ross.lagerwall@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
timers: set the deadline more accurately
Program the timer to the deadline of the closest timer if it is further
than 50us ahead, otherwise set it 50us ahead. This way a single event
fires on time rather than 50us late (as it would have previously) while
still preventing too many timer wakeups in the case of having many
timers scheduled close together.
(where 50us is the timer_slop)
Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -492,8 +492,9 @@ static void timer_softirq_action(void)
deadline = heap[1]->expires;
if ( (ts->list != NULL) && (ts->list->expires < deadline) )
deadline = ts->list->expires;
+ now = NOW();
this_cpu(timer_deadline) =
- (deadline == STIME_MAX) ? 0 : deadline + timer_slop;
+ (deadline == STIME_MAX) ? 0 : MAX(deadline, now + timer_slop);
if ( !reprogram_timer(this_cpu(timer_deadline)) )
raise_softirq(TIMER_SOFTIRQ);

View File

@ -1,24 +0,0 @@
# Commit 071a4c70a634f7d4f74cde4086ff3202968538c9
# Date 2014-06-02 10:19:27 +0200
# Author Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86, amd_ucode: flip revision numbers in printk
A failure would result in log message like so-
(XEN) microcode: CPU0 update from revision 0x6000637 to 0x6000626 failed
^^^^^^^^^^^^^^^^^^^^^^
The above message has the revision numbers inverted. Fix this.
Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -164,7 +164,7 @@ static int apply_microcode(int cpu)
if ( rev != hdr->patch_id )
{
printk(KERN_ERR "microcode: CPU%d update from revision "
- "%#x to %#x failed\n", cpu, hdr->patch_id, rev);
+ "%#x to %#x failed\n", cpu, rev, hdr->patch_id);
return -EIO;
}

View File

@ -1,44 +0,0 @@
References: bnc#878841 CVE-2014-3967 CVE-2014-3968 XSA-96
# Commit 6f4cc0ac41625a054861b417ea1fc3ab88e2e40a
# Date 2014-06-03 15:17:14 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: eliminate vulnerabilities from hvm_inject_msi()
- pirq_info() returns NULL for a non-allocated pIRQ, and hence we
mustn't unconditionally de-reference it, and we need to invoke it
another time after having called map_domain_emuirq_pirq()
- don't use printk(), namely without XENLOG_GUEST, for error reporting
This is XSA-96.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -289,20 +289,18 @@ void hvm_inject_msi(struct domain *d, ui
struct pirq *info = pirq_info(d, pirq);
/* if it is the first time, allocate the pirq */
- if (info->arch.hvm.emuirq == IRQ_UNBOUND)
+ if ( !info || info->arch.hvm.emuirq == IRQ_UNBOUND )
{
spin_lock(&d->event_lock);
map_domain_emuirq_pirq(d, pirq, IRQ_MSI_EMU);
spin_unlock(&d->event_lock);
+ info = pirq_info(d, pirq);
+ if ( !info )
+ return;
} else if (info->arch.hvm.emuirq != IRQ_MSI_EMU)
- {
- printk("%s: pirq %d does not correspond to an emulated MSI\n", __func__, pirq);
return;
- }
send_guest_pirq(d, info);
return;
- } else {
- printk("%s: error getting pirq from MSI: pirq = %d\n", __func__, pirq);
}
}

View File

@ -1,35 +0,0 @@
# Commit 9c1e8cae657bc13e8b1ddeede17603d77f3ad341
# Date 2014-06-04 11:26:15 +0200
# Author Malcolm Crossley <malcolm.crossley@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
ACPI: Prevent acpi_table_entries from falling into a infinite loop
If a buggy BIOS programs an ACPI table with to small an entry length
then acpi_table_entries gets stuck in an infinite loop.
To aid debugging, report the error and exit the loop.
Based on Linux kernel commit 369d913b242cae2205471b11b6e33ac368ed33ec
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Use < instead of <= (which I wrongly suggested), return -ENODATA
instead of -EINVAL, and make description match code.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/acpi/tables.c
+++ b/xen/drivers/acpi/tables.c
@@ -233,6 +233,12 @@ acpi_table_parse_entries(char *id,
while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
table_end) {
+ if (entry->length < sizeof(*entry)) {
+ printk(KERN_ERR PREFIX "[%4.4s:%#x] Invalid length\n",
+ id, entry_id);
+ return -ENODATA;
+ }
+
if (entry->type == entry_id
&& (!max_entries || count++ < max_entries))
if (handler(entry, table_end))

View File

@ -1,467 +0,0 @@
# Commit 1cc37ba8dbd89fb86dad3f6c78c3fba06019fe21
# Date 2014-06-05 17:49:14 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: honor APEI firmware-first mode in XSA-59 workaround code
When firmware-first mode is being indicated by firmware, we shouldn't
be modifying AER registers - these are considered to be owned by
firmware in that case. Violating this is being reported to result in
SMI storms. While circumventing the workaround means re-exposing
affected hosts to the XSA-59 issues, this in any event seems better
than not booting at all. Respective messages are being issued to the
log, so the situation can be diagnosed.
The basic building blocks were taken from Linux 3.15-rc. Note that
this includes a block of code enclosed in #ifdef CONFIG_X86_MCE - we
don't define that symbol, and that code also wouldn't build without
suitable machine check side code added; that should happen eventually,
but isn't subject of this change.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reported-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -754,6 +754,8 @@ int __init acpi_boot_init(void)
erst_init();
+ acpi_hest_init();
+
acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt);
return 0;
--- a/xen/drivers/acpi/apei/Makefile
+++ b/xen/drivers/acpi/apei/Makefile
@@ -1,3 +1,4 @@
obj-y += erst.o
+obj-y += hest.o
obj-y += apei-base.o
obj-y += apei-io.o
--- /dev/null
+++ b/xen/drivers/acpi/apei/hest.c
@@ -0,0 +1,200 @@
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/mm.h>
+#include <xen/pfn.h>
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+static bool_t hest_disable;
+boolean_param("hest_disable", hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *__read_mostly hest_tab;
+
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+ [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
+ [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+ [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+ [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+ [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+ [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+ [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+};
+
+static int hest_esrc_len(const struct acpi_hest_header *hest_hdr)
+{
+ u16 hest_type = hest_hdr->type;
+ int len;
+
+ if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+ return 0;
+
+ len = hest_esrc_len_tab[hest_type];
+
+ if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+ const struct acpi_hest_ia_corrected *cmc =
+ container_of(hest_hdr,
+ const struct acpi_hest_ia_corrected,
+ header);
+
+ len = sizeof(*cmc) + cmc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+ const struct acpi_hest_ia_machine_check *mc =
+ container_of(hest_hdr,
+ const struct acpi_hest_ia_machine_check,
+ header);
+
+ len = sizeof(*mc) + mc->num_hardware_banks *
+ sizeof(struct acpi_hest_ia_error_bank);
+ }
+ BUG_ON(len == -1);
+
+ return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+ struct acpi_hest_header *hest_hdr;
+ int i, rc, len;
+
+ if (hest_disable || !hest_tab)
+ return -EINVAL;
+
+ hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+ for (i = 0; i < hest_tab->error_source_count; i++) {
+ len = hest_esrc_len(hest_hdr);
+ if (!len) {
+ printk(XENLOG_WARNING HEST_PFX
+ "Unknown or unused hardware error source "
+ "type: %d for hardware error source: %d\n",
+ hest_hdr->type, hest_hdr->source_id);
+ return -EINVAL;
+ }
+ if ((void *)hest_hdr + len >
+ (void *)hest_tab + hest_tab->header.length) {
+ printk(XENLOG_WARNING HEST_PFX
+ "Table contents overflow for hardware error source: %d\n",
+ hest_hdr->source_id);
+ return -EINVAL;
+ }
+
+ rc = func(hest_hdr, data);
+ if (rc)
+ return rc;
+
+ hest_hdr = (void *)hest_hdr + len;
+ }
+
+ return 0;
+}
+
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr,
+ void *data)
+{
+#ifdef CONFIG_X86_MCE
+ unsigned int i;
+ const struct acpi_hest_ia_corrected *cmc;
+ const struct acpi_hest_ia_error_bank *mc_bank;
+
+ if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
+ return 0;
+
+ cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected, header);
+ if (!cmc->enabled)
+ return 0;
+
+ /*
+ * We expect HEST to provide a list of MC banks that report errors
+ * in firmware first mode. Otherwise, return non-zero value to
+ * indicate that we are done parsing HEST.
+ */
+ if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
+ return 1;
+
+ printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
+
+ mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1);
+ for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
+ mce_disable_bank(mc_bank->bank_number);
+#else
+# define acpi_disable_cmcff 1
+#endif
+
+ return 1;
+}
+
+void __init acpi_hest_init(void)
+{
+ acpi_status status;
+ acpi_physical_address hest_addr;
+ acpi_native_uint hest_len;
+
+ if (acpi_disabled)
+ return;
+
+ if (hest_disable) {
+ printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n");
+ return;
+ }
+
+ status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len);
+ if (status == AE_NOT_FOUND)
+ goto err;
+ if (ACPI_FAILURE(status)) {
+ printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n",
+ acpi_format_exception(status));
+ goto err;
+ }
+ map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr),
+ PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr),
+ PAGE_HYPERVISOR);
+ hest_tab = __va(hest_addr);
+
+ if (!acpi_disable_cmcff)
+ apei_hest_parse(hest_parse_cmc, NULL);
+
+ printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n");
+ return;
+err:
+ hest_disable = 1;
+}
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1066,6 +1066,106 @@ void __init setup_dom0_pci_devices(
spin_unlock(&pcidevs_lock);
}
+#ifdef CONFIG_ACPI
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+static int hest_match_pci(const struct acpi_hest_aer_common *p,
+ const struct pci_dev *pdev)
+{
+ return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
+ ACPI_HEST_BUS(p->bus) == pdev->bus &&
+ p->device == PCI_SLOT(pdev->devfn) &&
+ p->function == PCI_FUNC(pdev->devfn);
+}
+
+static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
+ const struct pci_dev *pdev)
+{
+ unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn),
+ PCI_CAP_ID_EXP);
+ u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn),
+ pos + PCI_EXP_FLAGS),
+ PCI_EXP_FLAGS_TYPE);
+
+ switch ( hest_hdr->type )
+ {
+ case ACPI_HEST_TYPE_AER_ROOT_PORT:
+ return pcie == PCI_EXP_TYPE_ROOT_PORT;
+ case ACPI_HEST_TYPE_AER_ENDPOINT:
+ return pcie == PCI_EXP_TYPE_ENDPOINT;
+ case ACPI_HEST_TYPE_AER_BRIDGE:
+ return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
+ PCI_CLASS_BRIDGE_PCI;
+ }
+
+ return 0;
+}
+
+struct aer_hest_parse_info {
+ const struct pci_dev *pdev;
+ bool_t firmware_first;
+};
+
+static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
+{
+ if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
+ hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
+ hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
+ return 1;
+ return 0;
+}
+
+static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
+{
+ struct aer_hest_parse_info *info = data;
+ const struct acpi_hest_aer_common *p;
+ bool_t ff;
+
+ if ( !hest_source_is_pcie_aer(hest_hdr) )
+ return 0;
+
+ p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
+ ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+
+ /*
+ * If no specific device is supplied, determine whether
+ * FIRMWARE_FIRST is set for *any* PCIe device.
+ */
+ if ( !info->pdev )
+ {
+ info->firmware_first |= ff;
+ return 0;
+ }
+
+ /* Otherwise, check the specific device */
+ if ( p->flags & ACPI_HEST_GLOBAL ?
+ hest_match_type(hest_hdr, info->pdev) :
+ hest_match_pci(p, info->pdev) )
+ {
+ info->firmware_first = ff;
+ return 1;
+ }
+
+ return 0;
+}
+
+bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
+{
+ struct aer_hest_parse_info info = { .pdev = pdev };
+
+ return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
+ apei_hest_parse(aer_hest_parse, &info) >= 0 &&
+ info.firmware_first;
+}
+#endif
+
static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
{
struct pci_dev *pdev;
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -386,9 +386,11 @@ void pci_vtd_quirk(const struct pci_dev
int dev = PCI_SLOT(pdev->devfn);
int func = PCI_FUNC(pdev->devfn);
int pos;
- u32 val;
+ bool_t ff;
+ u32 val, val2;
u64 bar;
paddr_t pa;
+ const char *action;
if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
PCI_VENDOR_ID_INTEL )
@@ -438,7 +440,10 @@ void pci_vtd_quirk(const struct pci_dev
pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos,
PCI_EXT_CAP_ID_VNDR);
}
+ ff = 0;
}
+ else
+ ff = pcie_aer_get_firmware_first(pdev);
if ( !pos )
{
printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER capability?\n",
@@ -447,18 +452,26 @@ void pci_vtd_quirk(const struct pci_dev
}
val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK);
- pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
- val | PCI_ERR_UNC_UNSUP);
- val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
- pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
- val | PCI_ERR_COR_ADV_NFAT);
+ val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
+ if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) )
+ action = "Found masked";
+ else if ( !ff )
+ {
+ pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
+ val | PCI_ERR_UNC_UNSUP);
+ pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
+ val2 | PCI_ERR_COR_ADV_NFAT);
+ action = "Masked";
+ }
+ else
+ action = "Must not mask";
/* XPUNCERRMSK Send Completion with Unsupported Request */
val = pci_conf_read32(seg, bus, dev, func, 0x20c);
pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4));
- printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
- seg, bus, dev, func);
+ printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n",
+ action, seg, bus, dev, func);
break;
case 0x100: case 0x104: case 0x108: /* Sandybridge */
--- a/xen/include/acpi/actbl1.h
+++ b/xen/include/acpi/actbl1.h
@@ -445,6 +445,14 @@ struct acpi_hest_aer_common {
#define ACPI_HEST_FIRMWARE_FIRST (1)
#define ACPI_HEST_GLOBAL (1<<1)
+/*
+ * Macros to access the bus/segment numbers in Bus field above:
+ * Bus number is encoded in bits 7:0
+ * Segment number is encoded in bits 23:8
+ */
+#define ACPI_HEST_BUS(bus) ((bus) & 0xFF)
+#define ACPI_HEST_SEGMENT(bus) (((bus) >> 8) & 0xFFFF)
+
/* Hardware Error Notification */
struct acpi_hest_notify {
--- a/xen/include/acpi/apei.h
+++ b/xen/include/acpi/apei.h
@@ -12,6 +12,9 @@
#define FIX_APEI_RANGE_MAX 64
+typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *);
+int apei_hest_parse(apei_hest_func_t, void *);
+
int erst_write(const struct cper_record_header *record);
size_t erst_get_record_count(void);
int erst_get_next_record_id(u64 *record_id);
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -61,6 +61,7 @@ int acpi_boot_init (void);
int acpi_boot_table_init (void);
int acpi_numa_init (void);
int erst_init(void);
+void acpi_hest_init(void);
int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_table_handler handler);
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -144,6 +144,8 @@ int pci_find_next_ext_capability(int seg
const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus,
unsigned int *dev, unsigned int *func);
+bool_t pcie_aer_get_firmware_first(const struct pci_dev *);
+
struct pirq;
int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
void msixtbl_pt_unregister(struct domain *, struct pirq *);

View File

@ -1,133 +0,0 @@
# Commit 090ca8c155b7321404ea7713a28aaedb7ac4fffd
# Date 2014-06-05 17:52:57 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/domctl: two functional fixes to XEN_DOMCTL_[gs]etvcpuextstate
Interacting with the vcpu itself should be protected by vcpu_pause().
Buggy/naive toolstacks might encounter adverse interaction with a vcpu context
switch, or increase of xcr0_accum. There are no much problems with current
in-tree code.
Explicitly permit a NULL guest handle as being a request for size. It is the
prevailing Xen style, and without it, valgrind's ioctl handler is unable to
determine whether evc->buffer actually got written to.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
# Commit 895661ae98f0249f50280b4acfb9dda70b76d7e9
# Date 2014-06-10 12:03:16 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/domctl: further fix to XEN_DOMCTL_[gs]etvcpuextstate
Do not clobber errors from certain codepaths. Clobbering of -EINVAL from
failing "evc->size <= PV_XSAVE_SIZE(_xcr0_accum)" was a pre-existing bug.
However, clobbering -EINVAL/-EFAULT from the get codepath was a bug
unintentionally introduced by 090ca8c1 "x86/domctl: two functional fixes to
XEN_DOMCTL_[gs]etvcpuextstate".
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1089,45 +1089,48 @@ long arch_do_domctl(
((v = d->vcpu[evc->vcpu]) == NULL) )
goto vcpuextstate_out;
+ ret = -EINVAL;
+ if ( v == current ) /* no vcpu_pause() */
+ goto vcpuextstate_out;
+
if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
{
- unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
+ unsigned int size;
+
+ ret = 0;
+ vcpu_pause(v);
- if ( !evc->size && !evc->xfeature_mask )
+ size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
+ if ( (!evc->size && !evc->xfeature_mask) ||
+ guest_handle_is_null(evc->buffer) )
{
evc->xfeature_mask = xfeature_mask;
evc->size = size;
- ret = 0;
+ vcpu_unpause(v);
goto vcpuextstate_out;
}
+
if ( evc->size != size || evc->xfeature_mask != xfeature_mask )
- {
ret = -EINVAL;
- goto vcpuextstate_out;
- }
- if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
- offset, (void *)&v->arch.xcr0,
- sizeof(v->arch.xcr0)) )
- {
+
+ if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+ (void *)&v->arch.xcr0,
+ sizeof(v->arch.xcr0)) )
ret = -EFAULT;
- goto vcpuextstate_out;
- }
+
offset += sizeof(v->arch.xcr0);
- if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
- offset, (void *)&v->arch.xcr0_accum,
- sizeof(v->arch.xcr0_accum)) )
- {
+ if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+ (void *)&v->arch.xcr0_accum,
+ sizeof(v->arch.xcr0_accum)) )
ret = -EFAULT;
- goto vcpuextstate_out;
- }
+
offset += sizeof(v->arch.xcr0_accum);
- if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
- offset, (void *)v->arch.xsave_area,
- size - 2 * sizeof(uint64_t)) )
- {
+ if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+ (void *)v->arch.xsave_area,
+ size - 2 * sizeof(uint64_t)) )
ret = -EFAULT;
- goto vcpuextstate_out;
- }
+
+ vcpu_unpause(v);
}
else
{
@@ -1176,12 +1179,14 @@ long arch_do_domctl(
if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) )
{
+ vcpu_pause(v);
v->arch.xcr0 = _xcr0;
v->arch.xcr0_accum = _xcr0_accum;
if ( _xcr0_accum & XSTATE_NONLAZY )
v->arch.nonlazy_xstate_used = 1;
memcpy(v->arch.xsave_area, _xsave_area,
evc->size - 2 * sizeof(uint64_t));
+ vcpu_unpause(v);
}
else
ret = -EINVAL;
@@ -1189,8 +1194,6 @@ long arch_do_domctl(
xfree(receive_buf);
}
- ret = 0;
-
vcpuextstate_out:
if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
copyback = 1;

View File

@ -1,108 +0,0 @@
# Commit 39ede234d1fd683430ffb1784d6d35b096f16457
# Date 2014-06-05 17:53:35 +0200
# Author Roger Pau Monné <roger.pau@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: fix reboot/shutdown with running HVM guests
If there's a guest using VMX/SVM when the hypervisor shuts down, it
can lead to the following crash due to VMX/SVM functions being called
after hvm_cpu_down has been called. In order to prevent that, check in
{svm/vmx}_ctxt_switch_from that the cpu virtualization extensions are
still enabled.
(XEN) Domain 0 shutdown: rebooting machine.
(XEN) Assertion 'read_cr0() & X86_CR0_TS' failed at vmx.c:644
(XEN) ----[ Xen-4.5-unstable x86_64 debug=y Tainted: C ]----
(XEN) CPU: 0
(XEN) RIP: e008:[<ffff82d0801d90ce>] vmx_ctxt_switch_from+0x1e/0x14c
...
(XEN) Xen call trace:
(XEN) [<ffff82d0801d90ce>] vmx_ctxt_switch_from+0x1e/0x14c
(XEN) [<ffff82d08015d129>] __context_switch+0x127/0x462
(XEN) [<ffff82d080160acf>] __sync_local_execstate+0x6a/0x8b
(XEN) [<ffff82d080160af9>] sync_local_execstate+0x9/0xb
(XEN) [<ffff82d080161728>] map_domain_page+0x88/0x4de
(XEN) [<ffff82d08014e721>] map_vtd_domain_page+0xd/0xf
(XEN) [<ffff82d08014cda2>] io_apic_read_remap_rte+0x158/0x29f
(XEN) [<ffff82d0801448a8>] iommu_read_apic_from_ire+0x27/0x29
(XEN) [<ffff82d080165625>] io_apic_read+0x17/0x65
(XEN) [<ffff82d080166143>] __ioapic_read_entry+0x38/0x61
(XEN) [<ffff82d080166aa8>] clear_IO_APIC_pin+0x1a/0xf3
(XEN) [<ffff82d080166bae>] clear_IO_APIC+0x2d/0x60
(XEN) [<ffff82d080166f63>] disable_IO_APIC+0xd/0x81
(XEN) [<ffff82d08018228b>] smp_send_stop+0x58/0x68
(XEN) [<ffff82d080181aa7>] machine_restart+0x80/0x20a
(XEN) [<ffff82d080181c3c>] __machine_restart+0xb/0xf
(XEN) [<ffff82d080128fb9>] smp_call_function_interrupt+0x99/0xc0
(XEN) [<ffff82d080182330>] call_function_interrupt+0x33/0x43
(XEN) [<ffff82d08016bd89>] do_IRQ+0x9e/0x63a
(XEN) [<ffff82d08016406f>] common_interrupt+0x5f/0x70
(XEN) [<ffff82d0801a8600>] mwait_idle+0x29c/0x2f7
(XEN) [<ffff82d08015cf67>] idle_loop+0x58/0x76
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) Assertion 'read_cr0() & X86_CR0_TS' failed at vmx.c:644
(XEN) ****************************************
Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -861,6 +861,14 @@ static void svm_ctxt_switch_from(struct
{
int cpu = smp_processor_id();
+ /*
+ * Return early if trying to do a context switch without SVM enabled,
+ * this can happen when the hypervisor shuts down with HVM guests
+ * still running.
+ */
+ if ( unlikely((read_efer() & EFER_SVME) == 0) )
+ return;
+
svm_fpu_leave(v);
svm_save_dr(v);
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -74,7 +74,7 @@ u64 vmx_ept_vpid_cap __read_mostly;
static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region);
static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
-static DEFINE_PER_CPU(bool_t, vmxon);
+DEFINE_PER_CPU(bool_t, vmxon);
static u32 vmcs_revision_id __read_mostly;
u64 __read_mostly vmx_basic_msr;
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -617,6 +617,14 @@ static void vmx_fpu_leave(struct vcpu *v
static void vmx_ctxt_switch_from(struct vcpu *v)
{
+ /*
+ * Return early if trying to do a context switch without VMX enabled,
+ * this can happen when the hypervisor shuts down with HVM guests
+ * still running.
+ */
+ if ( unlikely(!this_cpu(vmxon)) )
+ return;
+
vmx_fpu_leave(v);
vmx_save_guest_msrs(v);
vmx_restore_host_msrs();
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -483,6 +483,8 @@ void virtual_vmcs_exit(void *vvmcs);
u64 virtual_vmcs_vmread(void *vvmcs, u32 vmcs_encoding);
void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val);
+DECLARE_PER_CPU(bool_t, vmxon);
+
#endif /* ASM_X86_HVM_VMX_VMCS_H__ */
/*

View File

@ -1,26 +0,0 @@
# Commit b9ae60907e6dbc686403e52a7e61a6f856401a1b
# Date 2014-06-10 12:04:08 +0200
# Author Juergen Gross <jgross@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
avoid crash on HVM domain destroy with PCI passthrough
c/s bac6334b5 "move domain to cpupool0 before destroying it" introduced a
problem when destroying a HVM domain with PCI passthrough enabled. The
moving of the domain to cpupool0 includes moving the pirqs to the cpupool0
cpus, but the event channel infrastructure already is unusable for the
domain. So just avoid moving pirqs for dying domains.
Signed-off-by: Juergen Gross <jgross@suse.com>
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -297,7 +297,8 @@ int sched_move_domain(struct domain *d,
spin_unlock_irq(lock);
v->sched_priv = vcpu_priv[v->vcpu_id];
- evtchn_move_pirqs(v);
+ if ( !d->is_dying )
+ evtchn_move_pirqs(v);
new_p = cpumask_cycle(new_p, c->cpu_valid);

View File

@ -1,79 +0,0 @@
# Commit 584287380baf81e5acdd9dc7dfc7ffccd1e9a856
# Date 2014-06-10 13:12:05 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: refine SMEP test in HVM_CR4_GUEST_RESERVED_BITS()
Andrew validly points out that the use of the macro on the restore path
can't rely on the CPUID bits for the guest already being in place (as
their setting by the tool stack in turn requires the other restore
operations already having taken place). And even worse, using
hvm_cpuid() is invalid here because that function assumes to be used in
the context of the vCPU in question.
Reverting to the behavior prior to the change from checking
cpu_has_sm?p to hvm_vcpu_has_sm?p() would break the other (non-restore)
use of the macro. So let's revert to the prior behavior only for the
restore path, by adding a respective second parameter to the macro.
Obviously the two cpu_has_* uses in the macro should really also be
converted to hvm_cpuid() based checks at least for the non-restore
path.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -858,7 +858,7 @@ static int hvm_load_cpu_ctxt(struct doma
return -EINVAL;
}
- if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS(v) )
+ if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS(v, 1) )
{
printk(XENLOG_G_ERR "HVM%d restore: bad CR4 %#" PRIx64 "\n",
d->domain_id, ctxt.cr4);
@@ -1977,7 +1977,7 @@ int hvm_set_cr4(unsigned long value)
struct vcpu *v = current;
unsigned long old_cr;
- if ( value & HVM_CR4_GUEST_RESERVED_BITS(v) )
+ if ( value & HVM_CR4_GUEST_RESERVED_BITS(v, 0) )
{
HVM_DBG_LOG(DBG_LEVEL_1,
"Guest attempts to set reserved bit in CR4: %lx",
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -373,18 +373,24 @@ static inline bool_t hvm_vcpu_has_smep(v
(X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
/* These bits in CR4 cannot be set by the guest. */
-#define HVM_CR4_GUEST_RESERVED_BITS(_v) \
+#define HVM_CR4_GUEST_RESERVED_BITS(v, restore) ({ \
+ const struct vcpu *_v = (v); \
+ bool_t _restore = !!(restore); \
+ ASSERT((_restore) || _v == current); \
(~((unsigned long) \
(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \
- (hvm_vcpu_has_smep() ? X86_CR4_SMEP : 0) | \
+ (((_restore) ? cpu_has_smep : \
+ hvm_vcpu_has_smep()) ? \
+ X86_CR4_SMEP : 0) | \
(cpu_has_fsgsbase ? X86_CR4_FSGSBASE : 0) | \
- ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
- ? X86_CR4_VMXE : 0) | \
- (cpu_has_pcid ? X86_CR4_PCIDE : 0) | \
- (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))))
+ ((nestedhvm_enabled(_v->domain) && cpu_has_vmx) \
+ ? X86_CR4_VMXE : 0) | \
+ (cpu_has_pcid ? X86_CR4_PCIDE : 0) | \
+ (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)))); \
+})
/* These exceptions must always be intercepted. */
#define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op))

View File

@ -1,87 +0,0 @@
# Commit 323338f86fb6cd6f6dba4f59a84eed71b3552d21
# Date 2014-06-16 11:59:32 +0200
# Author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mce: don't spam the console with "CPUx: Temperature z"
If the machine has been quite busy it ends up with these messages
printed on the hypervisor console:
(XEN) CPU3: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature above threshold
(XEN) CPU0: Running in modulated clock mode
(XEN) CPU1: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
While the state changes are important, the non-altered state
information is not needed. As such add a latch mechanism to only print
the information if it has changed since the last update (and the
hardware doesn't properly suppress redundant notifications).
This was observed on Intel DQ67SW,
BIOS SWQ6710H.86A.0066.2012.1105.1504 11/05/2012
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Christoph Egger <chegger@amazon.de>
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -49,11 +49,15 @@ static int __read_mostly nr_intel_ext_ms
#define INTEL_SRAR_INSTR_FETCH 0x150
#ifdef CONFIG_X86_MCE_THERMAL
+#define MCE_RING 0x1
+static DEFINE_PER_CPU(int, last_state);
+
static void intel_thermal_interrupt(struct cpu_user_regs *regs)
{
uint64_t msr_content;
unsigned int cpu = smp_processor_id();
static DEFINE_PER_CPU(s_time_t, next);
+ int *this_last_state;
ack_APIC_irq();
@@ -62,13 +66,17 @@ static void intel_thermal_interrupt(stru
per_cpu(next, cpu) = NOW() + MILLISECS(5000);
rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
- if (msr_content & 0x1) {
- printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
- printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
- cpu);
+ this_last_state = &per_cpu(last_state, cpu);
+ if ( *this_last_state == (msr_content & MCE_RING) )
+ return;
+ *this_last_state = msr_content & MCE_RING;
+ if ( msr_content & MCE_RING )
+ {
+ printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu);
+ printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu);
add_taint(TAINT_MACHINE_CHECK);
} else {
- printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu);
}
}
@@ -802,6 +810,7 @@ static int cpu_mcabank_alloc(unsigned in
per_cpu(no_cmci_banks, cpu) = cmci;
per_cpu(mce_banks_owned, cpu) = owned;
+ per_cpu(last_state, cpu) = -1;
return 0;
out:

View File

@ -1,39 +0,0 @@
References: bnc#880751 CVE-2014-4021 XSA-100
# Commit 4bd78937ec324bcef4e29ef951e0ff9815770de1
# Date 2014-06-17 15:21:10 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
page-alloc: scrub pages used by hypervisor upon freeing
... unless they're part of a fully separate pool (and hence can't ever
be used for guest allocations).
This is CVE-2014-4021 / XSA-100.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1409,7 +1409,10 @@ void free_xenheap_pages(void *v, unsigne
pg = virt_to_page(v);
for ( i = 0; i < (1u << order); i++ )
+ {
+ scrub_one_page(&pg[i]);
pg[i].count_info &= ~PGC_xen_heap;
+ }
free_heap_pages(pg, order);
}
@@ -1579,6 +1582,8 @@ void free_domheap_pages(struct page_info
else
{
/* Freeing anonymous domain-heap pages. */
+ for ( i = 0; i < (1 << order); i++ )
+ scrub_one_page(&pg[i]);
free_heap_pages(pg, order);
drop_dom_ref = 0;
}

View File

@ -1,109 +0,0 @@
# Commit 84c340ba4c3eb99278b6ba885616bb183b88ad67
# Date 2014-06-18 15:50:02 +0200
# Author Malcolm Crossley <malcolm.crossley@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
IOMMU: prevent VT-d device IOTLB operations on wrong IOMMU
PCIe ATS allows for devices to contain IOTLBs, the VT-d code was iterating
around all ATS capable devices and issuing IOTLB operations for all IOMMUs,
even though each ATS device is only accessible via one particular IOMMU.
Issuing an IOMMU operation to a device not accessible via that IOMMU results
in an IOMMU timeout because the device does not reply. VT-d IOMMU timeouts
result in a Xen panic.
Therefore this bug prevents any Intel system with 2 or more ATS enabled IOMMUs,
each with an ATS device connected to them, from booting Xen.
The patch adds a IOMMU pointer to the ATS device struct so the VT-d code can
ensure it does not issue IOMMU ATS operations on the wrong IOMMU. A void
pointer has to be used because AMD and Intel IOMMU implementations do not have
a common IOMMU structure or indexing mechanism.
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -163,7 +163,7 @@ static void amd_iommu_setup_domain_devic
!pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
{
if ( devfn == pdev->devfn )
- enable_ats_device(iommu->seg, bus, devfn);
+ enable_ats_device(iommu->seg, bus, devfn, iommu);
amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -24,6 +24,7 @@ struct pci_ats_dev {
u8 bus;
u8 devfn;
u16 ats_queue_depth; /* ATS device invalidation queue depth */
+ const void *iommu; /* No common IOMMU struct so use void pointer */
};
#define ATS_REG_CAP 4
@@ -34,7 +35,7 @@ struct pci_ats_dev {
extern struct list_head ats_devices;
extern bool_t ats_enabled;
-int enable_ats_device(int seg, int bus, int devfn);
+int enable_ats_device(int seg, int bus, int devfn, const void *iommu);
void disable_ats_device(int seg, int bus, int devfn);
struct pci_ats_dev *get_ats_device(int seg, int bus, int devfn);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1442,7 +1442,7 @@ static int domain_context_mapping(
ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
pdev);
if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
- enable_ats_device(seg, bus, devfn);
+ enable_ats_device(seg, bus, devfn, drhd->iommu);
break;
@@ -1930,7 +1930,7 @@ static int intel_iommu_enable_device(str
if ( ret <= 0 )
return ret;
- ret = enable_ats_device(pdev->seg, pdev->bus, pdev->devfn);
+ ret = enable_ats_device(pdev->seg, pdev->bus, pdev->devfn, drhd->iommu);
return ret >= 0 ? 0 : ret;
}
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -120,6 +120,10 @@ int dev_invalidate_iotlb(struct iommu *i
{
sid = (pdev->bus << 8) | pdev->devfn;
+ /* Only invalidate devices that belong to this IOMMU */
+ if ( pdev->iommu != iommu )
+ continue;
+
switch ( type ) {
case DMA_TLB_DSI_FLUSH:
if ( !device_in_domain(iommu, pdev, did) )
--- a/xen/drivers/passthrough/x86/ats.c
+++ b/xen/drivers/passthrough/x86/ats.c
@@ -23,7 +23,7 @@ LIST_HEAD(ats_devices);
bool_t __read_mostly ats_enabled = 1;
boolean_param("ats", ats_enabled);
-int enable_ats_device(int seg, int bus, int devfn)
+int enable_ats_device(int seg, int bus, int devfn, const void *iommu)
{
struct pci_ats_dev *pdev = NULL;
u32 value;
@@ -66,6 +66,7 @@ int enable_ats_device(int seg, int bus,
pdev->seg = seg;
pdev->bus = bus;
pdev->devfn = devfn;
+ pdev->iommu = iommu;
value = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
PCI_FUNC(devfn), pos + ATS_REG_CAP);
pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?:

View File

@ -1,105 +0,0 @@
References: bnc#882127
# Commit e0fe297dabc96d8161d568f19a99722c4739b9f9
# Date 2014-06-18 15:53:27 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/EFI: allow FPU/XMM use in runtime service functions
UEFI spec update 2.4B developed a requirement to enter runtime service
functions with CR0.TS (and CR0.EM) clear, thus making feasible the
already previously stated permission for these functions to use some of
the XMM registers. Enforce this requirement (along with the connected
ones on FPU control word and MXCSR) by going through a full FPU save
cycle (if the FPU was dirty) in efi_rs_enter() (along with loading the
specified values into the other two registers).
Note that the UEFI spec mandates that extension registers other than
XMM ones (for our purposes all that get restored eagerly) are preserved
across runtime function calls, hence there's nothing we need to restore
in efi_rs_leave() (they do get saved, but just for simplicity's sake).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/efi/runtime.c
+++ b/xen/arch/x86/efi/runtime.c
@@ -10,6 +10,8 @@ DEFINE_XEN_GUEST_HANDLE(CHAR16);
#ifndef COMPAT
+# include <asm/i387.h>
+# include <asm/xstate.h>
# include <public/platform.h>
const bool_t efi_enabled = 1;
@@ -45,8 +47,14 @@ const struct efi_pci_rom *__read_mostly
unsigned long efi_rs_enter(void)
{
+ static const u16 fcw = FCW_DEFAULT;
+ static const u32 mxcsr = MXCSR_DEFAULT;
unsigned long cr3 = read_cr3();
+ save_fpu_enable();
+ asm volatile ( "fldcw %0" :: "m" (fcw) );
+ asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) );
+
spin_lock(&efi_rs_lock);
/* prevent fixup_page_fault() from doing anything */
@@ -82,6 +90,7 @@ void efi_rs_leave(unsigned long cr3)
}
irq_exit();
spin_unlock(&efi_rs_lock);
+ stts();
}
unsigned long efi_get_time(void)
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -266,10 +266,10 @@ void vcpu_restore_fpu_lazy(struct vcpu *
* On each context switch, save the necessary FPU info of VCPU being switch
* out. It dispatches saving operation based on CPU's capability.
*/
-void vcpu_save_fpu(struct vcpu *v)
+static bool_t _vcpu_save_fpu(struct vcpu *v)
{
if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used )
- return;
+ return 0;
ASSERT(!is_idle_vcpu(v));
@@ -284,9 +284,22 @@ void vcpu_save_fpu(struct vcpu *v)
fpu_fsave(v);
v->fpu_dirtied = 0;
+
+ return 1;
+}
+
+void vcpu_save_fpu(struct vcpu *v)
+{
+ _vcpu_save_fpu(v);
stts();
}
+void save_fpu_enable(void)
+{
+ if ( !_vcpu_save_fpu(current) )
+ clts();
+}
+
/* Initialize FPU's context save area */
int vcpu_init_fpu(struct vcpu *v)
{
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -38,6 +38,7 @@ struct ix87_state {
void vcpu_restore_fpu_eager(struct vcpu *v);
void vcpu_restore_fpu_lazy(struct vcpu *v);
void vcpu_save_fpu(struct vcpu *v);
+void save_fpu_enable(void);
int vcpu_init_fpu(struct vcpu *v);
void vcpu_destroy_fpu(struct vcpu *v);

View File

@ -15,9 +15,11 @@ features the guest is shown rather than the host's.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -828,6 +828,73 @@ static bool_t hvm_efer_valid(struct doma
Index: xen-4.4.1-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.4.1-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.4.1-testing/xen/arch/x86/hvm/hvm.c
@@ -827,6 +827,73 @@ static bool_t hvm_efer_valid(struct doma
((value & (EFER_LME|EFER_LMA)) == EFER_LMA));
}
@ -91,7 +93,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
{
int vcpuid;
@@ -858,7 +925,7 @@ static int hvm_load_cpu_ctxt(struct doma
@@ -857,7 +924,7 @@ static int hvm_load_cpu_ctxt(struct doma
return -EINVAL;
}
@ -100,7 +102,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
{
printk(XENLOG_G_ERR "HVM%d restore: bad CR4 %#" PRIx64 "\n",
d->domain_id, ctxt.cr4);
@@ -1977,7 +2044,7 @@ int hvm_set_cr4(unsigned long value)
@@ -1981,7 +2048,7 @@ int hvm_set_cr4(unsigned long value)
struct vcpu *v = current;
unsigned long old_cr;
@ -109,8 +111,10 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
{
HVM_DBG_LOG(DBG_LEVEL_1,
"Guest attempts to set reserved bit in CR4: %lx",
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
Index: xen-4.4.1-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-4.4.1-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-4.4.1-testing/xen/include/asm-x86/hvm/hvm.h
@@ -347,51 +347,10 @@ static inline int hvm_event_pending(stru
return hvm_funcs.event_pending(v);
}

View File

@ -1,103 +0,0 @@
# Commit fd33987ba27607c3cc7da258cf1d86d21beeb735
# Date 2014-06-30 15:57:40 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d/ATS: correct and clean up dev_invalidate_iotlb()
While this was intended to only do cleanup (replace the two bogus
"ret |= " constructs, and a simple formatting correction), this now
also
- fixes the bit manipulations for size_order > 0
a) correct an off-by-one in the use of size_order for shifting (till
now double the requested size got invalidated)
b) in fact setting bit 12 and up if necessary (without which too
small a region might have got invalidated)
c) making them capable of dealing with regions of 4Gb size and up
- corrects the return value handling, such that a later iteration's
success won't clear an earlier iteration's error indication
- uses PCI_BDF2() instead of open coding it
- bail immediately on bad passed in invalidation type, rather than
repeatedly printing the same message for each ATS-capable device, at
once also no longer hiding that failure from the caller
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -110,21 +110,23 @@ int dev_invalidate_iotlb(struct iommu *i
u64 addr, unsigned int size_order, u64 type)
{
struct pci_ats_dev *pdev;
- int sbit, ret = 0;
- u16 sid;
+ int ret = 0;
if ( !ecap_dev_iotlb(iommu->ecap) )
return ret;
list_for_each_entry( pdev, &ats_devices, list )
{
- sid = (pdev->bus << 8) | pdev->devfn;
+ u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
+ bool_t sbit;
+ int rc = 0;
/* Only invalidate devices that belong to this IOMMU */
if ( pdev->iommu != iommu )
continue;
- switch ( type ) {
+ switch ( type )
+ {
case DMA_TLB_DSI_FLUSH:
if ( !device_in_domain(iommu, pdev, did) )
break;
@@ -133,32 +135,37 @@ int dev_invalidate_iotlb(struct iommu *i
/* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
sbit = 1;
addr = (~0 << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
- ret |= qinval_device_iotlb(iommu, pdev->ats_queue_depth,
- sid, sbit, addr);
+ rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
+ sid, sbit, addr);
break;
case DMA_TLB_PSI_FLUSH:
if ( !device_in_domain(iommu, pdev, did) )
break;
- addr &= ~0 << (PAGE_SHIFT + size_order);
-
/* if size <= 4K, set sbit = 0, else set sbit = 1 */
sbit = size_order ? 1 : 0;
/* clear lower bits */
- addr &= (~0 << (PAGE_SHIFT + size_order));
+ addr &= ~0 << PAGE_SHIFT_4K;
/* if sbit == 1, zero out size_order bit and set lower bits to 1 */
if ( sbit )
- addr &= (~0 & ~(1 << (PAGE_SHIFT + size_order)));
+ {
+ addr &= ~((u64)PAGE_SIZE_4K << (size_order - 1));
+ addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
+ }
- ret |= qinval_device_iotlb(iommu, pdev->ats_queue_depth,
- sid, sbit, addr);
+ rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
+ sid, sbit, addr);
break;
default:
dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
- break;
+ return -EOPNOTSUPP;
}
+
+ if ( !ret )
+ ret = rc;
}
+
return ret;
}

View File

@ -1,218 +0,0 @@
# Commit 3eb1c708ab0fe1067a436498a684907afa14dacf
# Date 2014-07-03 16:51:13 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
properly reference count DOMCTL_{,un}pausedomain hypercalls
For safety reasons, c/s 6ae2df93c27 "mem_access: Add helper API to setup
ring and enable mem_access" has to pause the domain while it performs a set of
operations.
However without properly reference counted hypercalls, xc_mem_event_enable()
now unconditionally unpauses a previously paused domain.
To prevent toolstack software running wild, there is an arbitrary limit of 255
on the toolstack pause count. This is high enough for several components of
the toolstack to safely use, but prevents over/underflow of d->pause_count.
The previous domain_{,un}pause_by_systemcontroller() functions are updated to
return an error code. domain_pause_by_systemcontroller() is modified to have
a common stub and take a pause_fn pointer, allowing for both sync and nosync
domain pauses. domain_pause_for_debugger() has a hand-rolled nosync pause
replaced with the new domain_pause_by_systemcontroller_nosync(), and has its
variables shuffled slightly to avoid rereading current multiple times.
Suggested-by: Don Slutz <dslutz@verizon.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
With a couple of formatting adjustments:
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1019,7 +1019,7 @@ long arch_do_domctl(
struct vcpu *v;
ret = -EBUSY;
- if ( !d->is_paused_by_controller )
+ if ( !d->controller_pause_count )
break;
ret = -EINVAL;
if ( domctl->u.gdbsx_pauseunp_vcpu.vcpu >= MAX_VIRT_CPUS ||
@@ -1035,7 +1035,7 @@ long arch_do_domctl(
struct vcpu *v;
ret = -EBUSY;
- if ( !d->is_paused_by_controller )
+ if ( !d->controller_pause_count )
break;
ret = -EINVAL;
if ( domctl->u.gdbsx_pauseunp_vcpu.vcpu >= MAX_VIRT_CPUS ||
@@ -1053,7 +1053,7 @@ long arch_do_domctl(
struct vcpu *v;
domctl->u.gdbsx_domstatus.vcpu_id = -1;
- domctl->u.gdbsx_domstatus.paused = d->is_paused_by_controller;
+ domctl->u.gdbsx_domstatus.paused = d->controller_pause_count > 0;
if ( domctl->u.gdbsx_domstatus.paused )
{
for_each_vcpu ( d, v )
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -264,7 +264,7 @@ struct domain *domain_create(
if ( (err = xsm_domain_create(XSM_HOOK, d, ssidref)) != 0 )
goto fail;
- d->is_paused_by_controller = 1;
+ d->controller_pause_count = 1;
atomic_inc(&d->pause_count);
if ( domid )
@@ -680,18 +680,13 @@ void vcpu_end_shutdown_deferral(struct v
#ifdef HAS_GDBSX
void domain_pause_for_debugger(void)
{
- struct domain *d = current->domain;
- struct vcpu *v;
-
- atomic_inc(&d->pause_count);
- if ( test_and_set_bool(d->is_paused_by_controller) )
- domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
- for_each_vcpu ( d, v )
- vcpu_sleep_nosync(v);
+ domain_pause_by_systemcontroller_nosync(d);
/* if gdbsx active, we just need to pause the domain */
- if (current->arch.gdbsx_vcpu_event == 0)
+ if ( curr->arch.gdbsx_vcpu_event == 0 )
send_global_virq(VIRQ_DEBUGGER);
}
#endif
@@ -839,17 +834,49 @@ void domain_unpause(struct domain *d)
vcpu_wake(v);
}
-void domain_pause_by_systemcontroller(struct domain *d)
+int __domain_pause_by_systemcontroller(struct domain *d,
+ void (*pause_fn)(struct domain *d))
{
- domain_pause(d);
- if ( test_and_set_bool(d->is_paused_by_controller) )
- domain_unpause(d);
+ int old, new, prev = d->controller_pause_count;
+
+ do
+ {
+ old = prev;
+ new = old + 1;
+
+ /*
+ * Limit the toolstack pause count to an arbitrary 255 to prevent the
+ * toolstack overflowing d->pause_count with many repeated hypercalls.
+ */
+ if ( new > 255 )
+ return -EUSERS;
+
+ prev = cmpxchg(&d->controller_pause_count, old, new);
+ } while ( prev != old );
+
+ pause_fn(d);
+
+ return 0;
}
-void domain_unpause_by_systemcontroller(struct domain *d)
+int domain_unpause_by_systemcontroller(struct domain *d)
{
- if ( test_and_clear_bool(d->is_paused_by_controller) )
- domain_unpause(d);
+ int old, new, prev = d->controller_pause_count;
+
+ do
+ {
+ old = prev;
+ new = old - 1;
+
+ if ( new < 0 )
+ return -EINVAL;
+
+ prev = cmpxchg(&d->controller_pause_count, old, new);
+ } while ( prev != old );
+
+ domain_unpause(d);
+
+ return 0;
}
int vcpu_reset(struct vcpu *v)
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -181,7 +181,7 @@ void getdomaininfo(struct domain *d, str
info->flags = (info->nr_online_vcpus ? flags : 0) |
((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying : 0) |
(d->is_shut_down ? XEN_DOMINF_shutdown : 0) |
- (d->is_paused_by_controller ? XEN_DOMINF_paused : 0) |
+ (d->controller_pause_count > 0 ? XEN_DOMINF_paused : 0) |
(d->debugger_attached ? XEN_DOMINF_debugged : 0) |
d->shutdown_code << XEN_DOMINF_shutdownshift;
@@ -384,22 +384,14 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
break;
case XEN_DOMCTL_pausedomain:
- {
ret = -EINVAL;
if ( d != current->domain )
- {
- domain_pause_by_systemcontroller(d);
- ret = 0;
- }
- }
- break;
+ ret = domain_pause_by_systemcontroller(d);
+ break;
case XEN_DOMCTL_unpausedomain:
- {
- domain_unpause_by_systemcontroller(d);
- ret = 0;
- }
- break;
+ ret = domain_unpause_by_systemcontroller(d);
+ break;
case XEN_DOMCTL_resumedomain:
{
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -338,7 +338,7 @@ struct domain
/* Is this guest dying (i.e., a zombie)? */
enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
/* Domain is paused by controller software? */
- bool_t is_paused_by_controller;
+ int controller_pause_count;
/* Domain's VCPUs are pinned 1:1 to physical CPUs? */
bool_t is_pinned;
@@ -742,8 +742,17 @@ void domain_pause(struct domain *d);
void domain_pause_nosync(struct domain *d);
void vcpu_unpause(struct vcpu *v);
void domain_unpause(struct domain *d);
-void domain_pause_by_systemcontroller(struct domain *d);
-void domain_unpause_by_systemcontroller(struct domain *d);
+int domain_unpause_by_systemcontroller(struct domain *d);
+int __domain_pause_by_systemcontroller(struct domain *d,
+ void (*pause_fn)(struct domain *d));
+static inline int domain_pause_by_systemcontroller(struct domain *d)
+{
+ return __domain_pause_by_systemcontroller(d, domain_pause);
+}
+static inline int domain_pause_by_systemcontroller_nosync(struct domain *d)
+{
+ return __domain_pause_by_systemcontroller(d, domain_pause_nosync);
+}
void cpu_init(void);
struct scheduler;

View File

@ -1,46 +0,0 @@
# Commit 05377dede434c746e6708f055858378d20f619db
# Date 2014-07-23 18:03:19 +0200
# Author Juergen Gross <jgross@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
avoid crash when doing shutdown with active cpupools
When shutting down the machine while there are cpus in a cpupool other than
Pool-0 a crash is triggered due to cpupool handling rejecting offlining the
non-boot cpus in other cpupools.
It is easy to detect this case and allow offlining those cpus.
Reported-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: Stefan Bader <stefan.bader@canonical.com>
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -471,12 +471,24 @@ static void cpupool_cpu_add(unsigned int
*/
static int cpupool_cpu_remove(unsigned int cpu)
{
- int ret = 0;
+ int ret = -EBUSY;
+ struct cpupool **c;
spin_lock(&cpupool_lock);
- if ( !cpumask_test_cpu(cpu, cpupool0->cpu_valid))
- ret = -EBUSY;
+ if ( cpumask_test_cpu(cpu, cpupool0->cpu_valid) )
+ ret = 0;
else
+ {
+ for_each_cpupool(c)
+ {
+ if ( cpumask_test_cpu(cpu, (*c)->cpu_suspended ) )
+ {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ if ( !ret )
cpumask_set_cpu(cpu, &cpupool_locked_cpus);
spin_unlock(&cpupool_lock);

View File

@ -1,86 +0,0 @@
# Commit ee75480b3c8856db9ef1aa45418f35ec0d78989d
# Date 2014-07-23 18:07:11 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mem_event: validate the response vcpu_id before acting on it
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Reviewed-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Tested-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -596,11 +596,20 @@ int mem_sharing_sharing_resume(struct do
/* Get all requests off the ring */
while ( mem_event_get_response(d, &d->mem_event->share, &rsp) )
{
+ struct vcpu *v;
+
if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
continue;
+
+ /* Validate the vcpu_id in the response. */
+ if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
+ continue;
+
+ v = d->vcpu[rsp.vcpu_id];
+
/* Unpause domain/vcpu */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+ vcpu_unpause(v);
}
return 0;
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1228,8 +1228,17 @@ void p2m_mem_paging_resume(struct domain
/* Pull all responses off the ring */
while( mem_event_get_response(d, &d->mem_event->paging, &rsp) )
{
+ struct vcpu *v;
+
if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
continue;
+
+ /* Validate the vcpu_id in the response. */
+ if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
+ continue;
+
+ v = d->vcpu[rsp.vcpu_id];
+
/* Fix p2m entry if the page was not dropped */
if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
{
@@ -1248,7 +1257,7 @@ void p2m_mem_paging_resume(struct domain
}
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+ vcpu_unpause(v);
}
}
@@ -1356,11 +1365,20 @@ void p2m_mem_access_resume(struct domain
/* Pull all responses off the ring */
while( mem_event_get_response(d, &d->mem_event->access, &rsp) )
{
+ struct vcpu *v;
+
if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
continue;
+
+ /* Validate the vcpu_id in the response. */
+ if ( (rsp.vcpu_id >= d->max_vcpus) || !d->vcpu[rsp.vcpu_id] )
+ continue;
+
+ v = d->vcpu[rsp.vcpu_id];
+
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+ vcpu_unpause(v);
}
}

View File

@ -1,145 +0,0 @@
# Commit 868d9b99b39c53dc1f6ae9bfd7b148c206fd7240
# Date 2014-07-23 18:08:04 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mem_event: prevent underflow of vcpu pause counts
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Reviewed-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Tested-by: Aravindh Puthiyaparambil <aravindp@cisco.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4762,7 +4762,7 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_MODE_MASK) == HVMPME_mode_sync )
{
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
- vcpu_pause_nosync(v);
+ mem_event_vcpu_pause(v);
}
req.gfn = value;
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -655,6 +655,38 @@ int mem_event_domctl(struct domain *d, x
return rc;
}
+void mem_event_vcpu_pause(struct vcpu *v)
+{
+ ASSERT(v == current);
+
+ atomic_inc(&v->mem_event_pause_count);
+ vcpu_pause_nosync(v);
+}
+
+void mem_event_vcpu_unpause(struct vcpu *v)
+{
+ int old, new, prev = v->mem_event_pause_count.counter;
+
+ /* All unpause requests as a result of toolstack responses. Prevent
+ * underflow of the vcpu pause count. */
+ do
+ {
+ old = prev;
+ new = old - 1;
+
+ if ( new < 0 )
+ {
+ printk(XENLOG_G_WARNING
+ "d%d:v%d mem_event: Too many unpause attempts\n",
+ v->domain->domain_id, v->vcpu_id);
+ return;
+ }
+
+ prev = cmpxchg(&v->mem_event_pause_count.counter, old, new);
+ } while ( prev != old );
+
+ vcpu_unpause(v);
+}
/*
* Local variables:
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -568,7 +568,7 @@ int mem_sharing_notify_enomem(struct dom
if ( v->domain == d )
{
req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
- vcpu_pause_nosync(v);
+ mem_event_vcpu_pause(v);
}
req.p2mt = p2m_ram_shared;
@@ -609,7 +609,7 @@ int mem_sharing_sharing_resume(struct do
/* Unpause domain/vcpu */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(v);
+ mem_event_vcpu_unpause(v);
}
return 0;
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1094,7 +1094,7 @@ void p2m_mem_paging_populate(struct doma
/* Pause domain if request came from guest and gfn has paging type */
if ( p2m_is_paging(p2mt) && v->domain == d )
{
- vcpu_pause_nosync(v);
+ mem_event_vcpu_pause(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
/* No need to inform pager if the gfn is not in the page-out path */
@@ -1257,7 +1257,7 @@ void p2m_mem_paging_resume(struct domain
}
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(v);
+ mem_event_vcpu_unpause(v);
}
}
@@ -1352,7 +1352,7 @@ bool_t p2m_mem_access_check(paddr_t gpa,
/* Pause the current VCPU */
if ( p2ma != p2m_access_n2rwx )
- vcpu_pause_nosync(v);
+ mem_event_vcpu_pause(v);
/* VCPU may be paused, return whether we promoted automatically */
return (p2ma == p2m_access_n2rwx);
@@ -1378,7 +1378,7 @@ void p2m_mem_access_resume(struct domain
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
- vcpu_unpause(v);
+ mem_event_vcpu_unpause(v);
}
}
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -66,6 +66,9 @@ int do_mem_event_op(int op, uint32_t dom
int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
XEN_GUEST_HANDLE_PARAM(void) u_domctl);
+void mem_event_vcpu_pause(struct vcpu *v);
+void mem_event_vcpu_unpause(struct vcpu *v);
+
#endif /* __MEM_EVENT_H__ */
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -189,6 +189,9 @@ struct vcpu
unsigned long pause_flags;
atomic_t pause_count;
+ /* VCPU paused for mem_event replies. */
+ atomic_t mem_event_pause_count;
+
/* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
evtchn_port_t virq_to_evtchn[NR_VIRQS];
spinlock_t virq_lock;

View File

@ -0,0 +1,43 @@
From ef8cf0d4902c90595a0b0766e2a9a6ecbcbc4581 Mon Sep 17 00:00:00 2001
From: Chunyan Liu <cyliu@suse.com>
Date: Wed, 23 Jul 2014 17:42:09 +0800
Subject: [PATCH] fix list_domain_details: check config data length=0
If domain is created through virsh, then in xl, one could see it
with 'xl list', but with 'xl list --long domU', it reports:
"Domain name must be specified."
The reason is xl config data does not exist but it still tries
to parse_config_data in current code.
Improve list_domain_details:
If len of config data is 0, just pass, do not go forward to
parse_config_data, otherwise, it will meet error like
"Domain name not specified" and exit. This error is not expected,
since if code enters list_domain_details, domain name validness
is already checked and domain does exist.
Length of config data is 0 may means: config data does not exist due
to some reason, like: domain is created by libvirt, or in destroying
domain process config data is cleared but domain fails to clean up.
No matter in which case, list_domain_details could just show empty
info, but not error like "Domain name not specified".
Signed-off-by: Chunyan Liu <cyliu@suse.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
tools/libxl/xl_cmdimpl.c | 2 ++
1 file changed, 2 insertions(+)
Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
@@ -3198,6 +3198,8 @@ static void list_domains_details(const l
rc = libxl_userdata_retrieve(ctx, info[i].domid, "xl", &data, &len);
if (rc)
continue;
+ if (len == 0)
+ continue;
CHK_SYSCALL(asprintf(&config_source, "<domid %d data>", info[i].domid));
libxl_domain_config_init(&d_config);
parse_config_data(config_source, (char *)data, len, &d_config, NULL);

View File

@ -0,0 +1,46 @@
# Commit a1ac4cf52e38386bac7ac3440c7da0099662ca5c
# Date 2014-07-29 17:02:25 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/cpu: undo BIOS CPUID max_leaf limit before querying for features
If IA32_MISC_ENABLE[22] is set by the BIOS, CPUID.0.EAX will be limited to 3.
Lift this limit before considering whether to query CPUID.7[ECX=0].EBX for
features.
Without this change, dom0 is able to see this feature leaf (as the limit was
subsequently lifted), and will set features appropriately in HVM domain cpuid
policies.
The specific bug XenServer observed was the advertisement of the FSGSBASE
feature, but an inability to set CR4.FSGSBASE as Xen considered the bit to be
reserved as cpu_has_fsgsbase incorrectly evaluated as false.
This is a regression introduced by c/s 44e24f8567 "x86: don't call
generic_identify() redundantly" where the redundant call actually resampled
CPUID.7[ECX=0] properly to obtain the feature flags.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -234,6 +234,9 @@ static void __cpuinit generic_identify(s
paddr_bits = cpuid_eax(0x80000008) & 0xff;
}
+ /* Might lift BIOS max_leaf=3 limit. */
+ early_intel_workaround(c);
+
/* Intel-defined flags: level 0x00000007 */
if ( c->cpuid_level >= 0x00000007 ) {
u32 dummy;
@@ -241,8 +244,6 @@ static void __cpuinit generic_identify(s
c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
}
- early_intel_workaround(c);
-
#ifdef CONFIG_X86_HT
c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
#endif

View File

@ -0,0 +1,166 @@
References: bnc#883112
# Commit e9425f05b90811458a08355a55a0b0d608c440cf
# Date 2014-08-01 16:29:27 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/ACPI: allow CMOS RTC use even when ACPI says there is none
HP is setting the ACPI_FADT_NO_CMOS_RTC flag on newer systems,
regardless of whether they're being booted from UEFI. Add a command
line option to allow probing for a working RTC in that case.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -207,6 +207,14 @@ If set, override Xen's calculation of th
If set, override Xen's default choice for the platform timer.
+### cmos-rtc-probe
+> `= <boolean>`
+
+> Default: `false`
+
+Flag to indicate whether to probe for a CMOS Real Time Clock irrespective of
+ACPI indicating none to be there.
+
### com1,com2
> `= <baud>[/<clock_hz>][,[DPS][,[<io-base>|pci|amt][,[<irq>][,[<port-bdf>][,[<bridge-bdf>]]]]]]`
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -651,37 +651,40 @@ mktime (unsigned int year, unsigned int
)*60 + sec; /* finally seconds */
}
-static unsigned long __get_cmos_time(void)
-{
+struct rtc_time {
unsigned int year, mon, day, hour, min, sec;
+};
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
+static void __get_cmos_time(struct rtc_time *rtc)
+{
+ rtc->sec = CMOS_READ(RTC_SECONDS);
+ rtc->min = CMOS_READ(RTC_MINUTES);
+ rtc->hour = CMOS_READ(RTC_HOURS);
+ rtc->day = CMOS_READ(RTC_DAY_OF_MONTH);
+ rtc->mon = CMOS_READ(RTC_MONTH);
+ rtc->year = CMOS_READ(RTC_YEAR);
if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
{
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
+ BCD_TO_BIN(rtc->sec);
+ BCD_TO_BIN(rtc->min);
+ BCD_TO_BIN(rtc->hour);
+ BCD_TO_BIN(rtc->day);
+ BCD_TO_BIN(rtc->mon);
+ BCD_TO_BIN(rtc->year);
}
- if ( (year += 1900) < 1970 )
- year += 100;
-
- return mktime(year, mon, day, hour, min, sec);
+ if ( (rtc->year += 1900) < 1970 )
+ rtc->year += 100;
}
static unsigned long get_cmos_time(void)
{
unsigned long res, flags;
- int i;
+ struct rtc_time rtc;
+ unsigned int seconds = 60;
+ static bool_t __read_mostly cmos_rtc_probe;
+ boolean_param("cmos-rtc-probe", cmos_rtc_probe);
if ( efi_enabled )
{
@@ -690,23 +693,58 @@ static unsigned long get_cmos_time(void)
return res;
}
- if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) )
- panic("System without CMOS RTC must be booted from EFI");
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- /* read RTC exactly on falling edge of update flag */
- for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */
- if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
+ if ( likely(!(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC)) )
+ cmos_rtc_probe = 0;
+ else if ( system_state < SYS_STATE_active && !cmos_rtc_probe )
+ panic("System with no CMOS RTC advertised must be booted from EFI"
+ " (or with command line option \"cmos-rtc-probe\")");
+
+ for ( ; ; )
+ {
+ s_time_t start, t1, t2;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+
+ /* read RTC exactly on falling edge of update flag */
+ start = NOW();
+ do { /* may take up to 1 second... */
+ t1 = NOW() - start;
+ } while ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) &&
+ t1 <= SECONDS(1) );
+
+ start = NOW();
+ do { /* must try at least 2.228 ms */
+ t2 = NOW() - start;
+ } while ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) &&
+ t2 < MILLISECS(3) );
+
+ __get_cmos_time(&rtc);
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ if ( likely(!cmos_rtc_probe) ||
+ t1 > SECONDS(1) || t2 >= MILLISECS(3) ||
+ rtc.sec >= 60 || rtc.min >= 60 || rtc.hour >= 24 ||
+ !rtc.day || rtc.day > 31 ||
+ !rtc.mon || rtc.mon > 12 )
break;
- for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */
- if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
+
+ if ( seconds < 60 )
+ {
+ if ( rtc.sec != seconds )
+ cmos_rtc_probe = 0;
break;
+ }
+
+ process_pending_softirqs();
+
+ seconds = rtc.sec;
+ }
- res = __get_cmos_time();
+ if ( unlikely(cmos_rtc_probe) )
+ panic("No CMOS RTC found - system must be booted from EFI");
- spin_unlock_irqrestore(&rtc_lock, flags);
- return res;
+ return mktime(rtc.year, rtc.mon, rtc.day, rtc.hour, rtc.min, rtc.sec);
}
/***************************************************************************

View File

@ -0,0 +1,54 @@
# Commit 9143a6c55ef7e8f630857cb08c03844d372c2345
# Date 2014-08-04 13:43:03 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
lz4: check for underruns
While overruns are already being taken care of, underruns (resulting
from overflows in the respective "op + length" (or similar) operations
weren't.
This is CVE-2014-4611.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/common/lz4/decompress.c
+++ b/xen/common/lz4/decompress.c
@@ -84,6 +84,8 @@ static int INIT lz4_uncompress(const uns
ip += length;
break; /* EOF */
}
+ if (unlikely((unsigned long)cpy < (unsigned long)op))
+ goto _output_error;
LZ4_WILDCOPY(ip, op, cpy);
ip -= (op - cpy);
op = cpy;
@@ -142,6 +144,8 @@ static int INIT lz4_uncompress(const uns
goto _output_error;
continue;
}
+ if (unlikely((unsigned long)cpy < (unsigned long)op))
+ goto _output_error;
LZ4_SECURECOPY(ref, op, cpy);
op = cpy; /* correction */
}
@@ -207,6 +211,8 @@ static int lz4_uncompress_unknownoutputs
op += length;
break;/* Necessarily EOF, due to parsing restrictions */
}
+ if (unlikely((unsigned long)cpy < (unsigned long)op))
+ goto _output_error;
LZ4_WILDCOPY(ip, op, cpy);
ip -= (op - cpy);
op = cpy;
@@ -270,6 +276,8 @@ static int lz4_uncompress_unknownoutputs
goto _output_error;
continue;
}
+ if (unlikely((unsigned long)cpy < (unsigned long)op))
+ goto _output_error;
LZ4_SECURECOPY(ref, op, cpy);
op = cpy; /* correction */
}

View File

@ -0,0 +1,110 @@
# Commit fd1863847af15c3676348447755e1a1801f9d394
# Date 2014-08-04 13:46:03 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: extend LAPIC shortcuts around P2M lookups
... to all internally handled MMIO regions. It is in particular the
HPET page that, e.g. on Windows Server 2012 R2, can get heavily
accessed, and hence avoiding the unnecessary lookups is rather
beneficial (in the reported case a 40+-vCPU guest would previously not
have booted at all while with hvm_hap_nested_page_fault() shortcut
alone it was able to boot up in 18 minutes [i.e. still room for
improvement]).
Note the apparently unrelated addition of a is_hvm_vcpu() check to the
__hvm_copy() code: Afaict for PVH this shortcut should never have taken
effect (since there's no LAPIC in that case).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1521,11 +1521,14 @@ int hvm_hap_nested_page_fault(paddr_t gp
}
}
- /* For the benefit of 32-bit WinXP (& older Windows) on AMD CPUs,
- * a fast path for LAPIC accesses, skipping the p2m lookup. */
+ /*
+ * No need to do the P2M lookup for internally handled MMIO, benefiting
+ * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses,
+ * - newer Windows (like Server 2012) for HPET accesses.
+ */
if ( !nestedhvm_vcpu_in_guestmode(v)
&& is_hvm_vcpu(v)
- && gfn == PFN_DOWN(vlapic_base_address(vcpu_vlapic(v))) )
+ && hvm_mmio_internal(gpa) )
{
if ( !handle_mmio() )
hvm_inject_hw_exception(TRAP_gp_fault, 0);
@@ -2644,7 +2647,9 @@ static enum hvm_copy_result __hvm_copy(
while ( todo > 0 )
{
- count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
+ paddr_t gpa = addr & ~PAGE_MASK;
+
+ count = min_t(int, PAGE_SIZE - gpa, todo);
if ( flags & HVMCOPY_virt )
{
@@ -2659,16 +2664,22 @@ static enum hvm_copy_result __hvm_copy(
hvm_inject_page_fault(pfec, addr);
return HVMCOPY_bad_gva_to_gfn;
}
+ gpa |= (paddr_t)gfn << PAGE_SHIFT;
}
else
{
gfn = addr >> PAGE_SHIFT;
+ gpa = addr;
}
- /* For the benefit of 32-bit WinXP (& older Windows) on AMD CPUs,
- * a fast path for LAPIC accesses, skipping the p2m lookup. */
+ /*
+ * No need to do the P2M lookup for internally handled MMIO, benefiting
+ * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses,
+ * - newer Windows (like Server 2012) for HPET accesses.
+ */
if ( !nestedhvm_vcpu_in_guestmode(curr)
- && gfn == PFN_DOWN(vlapic_base_address(vcpu_vlapic(curr))) )
+ && is_hvm_vcpu(curr)
+ && hvm_mmio_internal(gpa) )
return HVMCOPY_bad_gfn_to_mfn;
page = get_page_from_gfn(curr->domain, gfn, &p2mt, P2M_UNSHARE);
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -163,6 +163,18 @@ static int hvm_mmio_access(struct vcpu *
return rc;
}
+bool_t hvm_mmio_internal(paddr_t gpa)
+{
+ struct vcpu *curr = current;
+ unsigned int i;
+
+ for ( i = 0; i < HVM_MMIO_HANDLER_NR; ++i )
+ if ( hvm_mmio_handlers[i]->check_handler(curr, gpa) )
+ return 1;
+
+ return 0;
+}
+
int hvm_mmio_intercept(ioreq_t *p)
{
struct vcpu *v = current;
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -91,6 +91,7 @@ static inline int hvm_buffered_io_interc
return hvm_io_intercept(p, HVM_BUFFERED_IO);
}
+bool_t hvm_mmio_internal(paddr_t gpa);
int hvm_mmio_intercept(ioreq_t *p);
int hvm_buffered_io_send(ioreq_t *p);

View File

@ -0,0 +1,378 @@
# Commit 3af450fd2d9403f208d3ac6459716f027b8597ad
# Date 2014-08-08 09:34:03 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86_emulate: properly do IP updates and other side effects on success
The two MMX/SSE/AVX code blocks failed to update IP properly, and these
as well as get_reg_refix(), which "manually" updated IP so far, failed
to do the TF and RF processing needed at the end of successfully
emulated instructions.
Fix the test utility at once to check IP is properly getting updated,
and while at it macroize the respective code quite a bit, hopefully
making it easier to add further tests when the need arises.
Reported-by: Andrei LUTAS <vlutas@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Reviewed-by: Andrew Cooper <andrew.cooper@citrix.com>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -597,23 +597,32 @@ int main(int argc, char **argv)
printf("skipped\n");
#endif
+#define decl_insn(which) extern const unsigned char which[], which##_len[]
+#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \
+ #which ": " insn "\n" \
+ ".equ " #which "_len, .-" #which "\n" \
+ ".popsection"
+#define set_insn(which) (regs.eip = (unsigned long)memcpy(instr, which, \
+ (unsigned long)which##_len))
+#define check_eip(which) (regs.eip == (unsigned long)instr + \
+ (unsigned long)which##_len)
+
printf("%-40s", "Testing movq %mm3,(%ecx)...");
if ( stack_exec && cpu_has_mmx )
{
- extern const unsigned char movq_to_mem[];
+ decl_insn(movq_to_mem);
asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
- ".pushsection .test, \"a\", @progbits\n"
- "movq_to_mem: movq %%mm3, (%0)\n"
- ".popsection" :: "c" (NULL) );
+ put_insn(movq_to_mem, "movq %%mm3, (%0)")
+ :: "c" (NULL) );
- memcpy(instr, movq_to_mem, 15);
+ set_insn(movq_to_mem);
memset(res, 0x33, 64);
memset(res + 8, 0xff, 8);
- regs.eip = (unsigned long)&instr[0];
regs.ecx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+ !check_eip(movq_to_mem) )
goto fail;
printf("okay\n");
}
@@ -623,19 +632,17 @@ int main(int argc, char **argv)
printf("%-40s", "Testing movq (%edx),%mm5...");
if ( stack_exec && cpu_has_mmx )
{
- extern const unsigned char movq_from_mem[];
+ decl_insn(movq_from_mem);
asm volatile ( "pcmpgtb %%mm5, %%mm5\n"
- ".pushsection .test, \"a\", @progbits\n"
- "movq_from_mem: movq (%0), %%mm5\n"
- ".popsection" :: "d" (NULL) );
+ put_insn(movq_from_mem, "movq (%0), %%mm5")
+ :: "d" (NULL) );
- memcpy(instr, movq_from_mem, 15);
- regs.eip = (unsigned long)&instr[0];
+ set_insn(movq_from_mem);
regs.ecx = 0;
regs.edx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( rc != X86EMUL_OKAY )
+ if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem) )
goto fail;
asm ( "pcmpeqb %%mm3, %%mm3\n\t"
"pcmpeqb %%mm5, %%mm3\n\t"
@@ -650,20 +657,19 @@ int main(int argc, char **argv)
printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
if ( stack_exec && cpu_has_sse2 )
{
- extern const unsigned char movdqu_to_mem[];
+ decl_insn(movdqu_to_mem);
asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
- ".pushsection .test, \"a\", @progbits\n"
- "movdqu_to_mem: movdqu %%xmm2, (%0)\n"
- ".popsection" :: "c" (NULL) );
+ put_insn(movdqu_to_mem, "movdqu %%xmm2, (%0)")
+ :: "c" (NULL) );
- memcpy(instr, movdqu_to_mem, 15);
+ set_insn(movdqu_to_mem);
memset(res, 0x55, 64);
memset(res + 8, 0xff, 16);
- regs.eip = (unsigned long)&instr[0];
regs.ecx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+ !check_eip(movdqu_to_mem) )
goto fail;
printf("okay\n");
}
@@ -673,19 +679,17 @@ int main(int argc, char **argv)
printf("%-40s", "Testing movdqu (%edx),%xmm4...");
if ( stack_exec && cpu_has_sse2 )
{
- extern const unsigned char movdqu_from_mem[];
+ decl_insn(movdqu_from_mem);
asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
- ".pushsection .test, \"a\", @progbits\n"
- "movdqu_from_mem: movdqu (%0), %%xmm4\n"
- ".popsection" :: "d" (NULL) );
+ put_insn(movdqu_from_mem, "movdqu (%0), %%xmm4")
+ :: "d" (NULL) );
- memcpy(instr, movdqu_from_mem, 15);
- regs.eip = (unsigned long)&instr[0];
+ set_insn(movdqu_from_mem);
regs.ecx = 0;
regs.edx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( rc != X86EMUL_OKAY )
+ if ( rc != X86EMUL_OKAY || !check_eip(movdqu_from_mem) )
goto fail;
asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
"pcmpeqb %%xmm4, %%xmm2\n\t"
@@ -700,21 +704,20 @@ int main(int argc, char **argv)
printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)...");
if ( stack_exec && cpu_has_avx )
{
- extern const unsigned char vmovdqu_to_mem[];
+ decl_insn(vmovdqu_to_mem);
asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n"
- ".pushsection .test, \"a\", @progbits\n"
- "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n"
- ".popsection" :: "c" (NULL) );
+ put_insn(vmovdqu_to_mem, "vmovdqu %%ymm2, (%0)")
+ :: "c" (NULL) );
- memcpy(instr, vmovdqu_to_mem, 15);
+ set_insn(vmovdqu_to_mem);
memset(res, 0x55, 128);
memset(res + 16, 0xff, 16);
memset(res + 20, 0x00, 16);
- regs.eip = (unsigned long)&instr[0];
regs.ecx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) )
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ||
+ !check_eip(vmovdqu_to_mem) )
goto fail;
printf("okay\n");
}
@@ -724,7 +727,7 @@ int main(int argc, char **argv)
printf("%-40s", "Testing vmovdqu (%edx),%ymm4...");
if ( stack_exec && cpu_has_avx )
{
- extern const unsigned char vmovdqu_from_mem[];
+ decl_insn(vmovdqu_from_mem);
#if 0 /* Don't use AVX2 instructions for now */
asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
@@ -732,17 +735,15 @@ int main(int argc, char **argv)
asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
"vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
#endif
- ".pushsection .test, \"a\", @progbits\n"
- "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n"
- ".popsection" :: "d" (NULL) );
+ put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4")
+ :: "d" (NULL) );
- memcpy(instr, vmovdqu_from_mem, 15);
+ set_insn(vmovdqu_from_mem);
memset(res + 4, 0xff, 16);
- regs.eip = (unsigned long)&instr[0];
regs.ecx = 0;
regs.edx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( rc != X86EMUL_OKAY )
+ if ( rc != X86EMUL_OKAY || !check_eip(vmovdqu_from_mem) )
goto fail;
#if 0 /* Don't use AVX2 instructions for now */
asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
@@ -769,20 +770,19 @@ int main(int argc, char **argv)
memset(res + 10, 0x66, 8);
if ( stack_exec && cpu_has_sse2 )
{
- extern const unsigned char movsd_to_mem[];
+ decl_insn(movsd_to_mem);
asm volatile ( "movlpd %0, %%xmm5\n\t"
"movhpd %0, %%xmm5\n"
- ".pushsection .test, \"a\", @progbits\n"
- "movsd_to_mem: movsd %%xmm5, (%1)\n"
- ".popsection" :: "m" (res[10]), "c" (NULL) );
+ put_insn(movsd_to_mem, "movsd %%xmm5, (%1)")
+ :: "m" (res[10]), "c" (NULL) );
- memcpy(instr, movsd_to_mem, 15);
- regs.eip = (unsigned long)&instr[0];
+ set_insn(movsd_to_mem);
regs.ecx = (unsigned long)(res + 2);
regs.edx = 0;
rc = x86_emulate(&ctxt, &emulops);
- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+ !check_eip(movsd_to_mem) )
goto fail;
printf("okay\n");
}
@@ -795,19 +795,17 @@ int main(int argc, char **argv)
printf("%-40s", "Testing movaps (%edx),%xmm7...");
if ( stack_exec && cpu_has_sse )
{
- extern const unsigned char movaps_from_mem[];
+ decl_insn(movaps_from_mem);
asm volatile ( "xorps %%xmm7, %%xmm7\n"
- ".pushsection .test, \"a\", @progbits\n"
- "movaps_from_mem: movaps (%0), %%xmm7\n"
- ".popsection" :: "d" (NULL) );
+ put_insn(movaps_from_mem, "movaps (%0), %%xmm7")
+ :: "d" (NULL) );
- memcpy(instr, movaps_from_mem, 15);
- regs.eip = (unsigned long)&instr[0];
+ set_insn(movaps_from_mem);
regs.ecx = 0;
regs.edx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( rc != X86EMUL_OKAY )
+ if ( rc != X86EMUL_OKAY || !check_eip(movaps_from_mem) )
goto fail;
asm ( "cmpeqps %1, %%xmm7\n\t"
"movmskps %%xmm7, %0" : "=r" (rc) : "m" (res[8]) );
@@ -823,19 +821,18 @@ int main(int argc, char **argv)
memset(res + 10, 0x77, 8);
if ( stack_exec && cpu_has_avx )
{
- extern const unsigned char vmovsd_to_mem[];
+ decl_insn(vmovsd_to_mem);
asm volatile ( "vbroadcastsd %0, %%ymm5\n"
- ".pushsection .test, \"a\", @progbits\n"
- "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n"
- ".popsection" :: "m" (res[10]), "c" (NULL) );
+ put_insn(vmovsd_to_mem, "vmovsd %%xmm5, (%1)")
+ :: "m" (res[10]), "c" (NULL) );
- memcpy(instr, vmovsd_to_mem, 15);
- regs.eip = (unsigned long)&instr[0];
+ set_insn(vmovsd_to_mem);
regs.ecx = (unsigned long)(res + 2);
regs.edx = 0;
rc = x86_emulate(&ctxt, &emulops);
- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ||
+ !check_eip(vmovsd_to_mem) )
goto fail;
printf("okay\n");
}
@@ -848,19 +845,17 @@ int main(int argc, char **argv)
printf("%-40s", "Testing vmovaps (%edx),%ymm7...");
if ( stack_exec && cpu_has_avx )
{
- extern const unsigned char vmovaps_from_mem[];
+ decl_insn(vmovaps_from_mem);
asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n"
- ".pushsection .test, \"a\", @progbits\n"
- "vmovaps_from_mem: vmovaps (%0), %%ymm7\n"
- ".popsection" :: "d" (NULL) );
+ put_insn(vmovaps_from_mem, "vmovaps (%0), %%ymm7")
+ :: "d" (NULL) );
- memcpy(instr, vmovaps_from_mem, 15);
- regs.eip = (unsigned long)&instr[0];
+ set_insn(vmovaps_from_mem);
regs.ecx = 0;
regs.edx = (unsigned long)res;
rc = x86_emulate(&ctxt, &emulops);
- if ( rc != X86EMUL_OKAY )
+ if ( rc != X86EMUL_OKAY || !check_eip(vmovaps_from_mem) )
goto fail;
asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t"
"vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) );
@@ -871,6 +866,11 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+#undef decl_insn
+#undef put_insn
+#undef set_insn
+#undef check_eip
+
for ( j = 1; j <= 2; j++ )
{
#if defined(__i386__)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -720,29 +720,26 @@ do{ uint8_t stub[] = { _bytes, 0xc3 };
put_fpu(&fic); \
} while (0)
-static unsigned long __get_rep_prefix(
- struct cpu_user_regs *int_regs,
- struct cpu_user_regs *ext_regs,
+static unsigned long _get_rep_prefix(
+ const struct cpu_user_regs *int_regs,
int ad_bytes)
{
- unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
- (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
- int_regs->ecx);
-
- /* Skip the instruction if no repetitions are required. */
- if ( ecx == 0 )
- ext_regs->eip = int_regs->eip;
-
- return ecx;
+ return (ad_bytes == 2) ? (uint16_t)int_regs->ecx :
+ (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
+ int_regs->ecx;
}
#define get_rep_prefix() ({ \
unsigned long max_reps = 1; \
if ( rep_prefix() ) \
- max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \
+ max_reps = _get_rep_prefix(&_regs, ad_bytes); \
if ( max_reps == 0 ) \
- goto done; \
- max_reps; \
+ { \
+ /* Skip the instruction if no repetitions are required. */ \
+ dst.type = OP_NONE; \
+ goto writeback; \
+ } \
+ max_reps; \
})
static void __put_rep_prefix(
@@ -3921,7 +3918,8 @@ x86_emulate(
if ( !rc && (b & 1) && (ea.type == OP_MEM) )
rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
ea.bytes, ctxt);
- goto done;
+ dst.type = OP_NONE;
+ break;
}
case 0x20: /* mov cr,reg */
@@ -4188,7 +4186,8 @@ x86_emulate(
if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) )
rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
ea.bytes, ctxt);
- goto done;
+ dst.type = OP_NONE;
+ break;
}
case 0x80 ... 0x8f: /* jcc (near) */ {

View File

@ -120,17 +120,13 @@ Normally, xen.gz requires no parameters. However, in special cases (such as
debugging or a dedicated VM server) you may wish to pass it parameters.
Adding parameters to xen.gz can be done by editing the /etc/default/grub file.
Add the following line to this file; GRUB_CMDLINE_XEN="<parameters>". The
Add the following line to this file; GRUB_CMDLINE_XEN_DEFAULT="<parameters>". The
parameters may be valid options passed to xen.gz (the hypervisor). After
editing this file, you must run 'grub2-mkconfig -o /boot/grub2/grub.cfg' for
the changes to take effect and then reboot.
editing this file, you must first run 'grub2-mkconfig -o /boot/grub2/grub.cfg'
and then reboot for the changes to take effect.
In particular in case of problems you may want to attach a serial terminal and
direct Xen to send its output not only to the screen, but also to that
terminal. In order to do so, add "console=vga,com<n> com<n>=<baud>" (without
the quotes and with <n> replaced by the serial port number - generally 1 or 2 -
and with <baud> replaced by the baud rate the serial terminal is configured
for) to the GRUB_CMDLINE_XEN line.
For more information on how to add options to the hypervisor, see the sections
below called; "Dom0 Memory Ballooning" and "Troubleshooting".
For a more complete discussion of possible parameters, see the user
documentation in the xen-doc-* packages.
@ -504,18 +500,28 @@ to take effect.
Dom0 Memory Ballooning
----------------------
It is recommended that you dedicate a fixed amount of RAM to dom0 rather than
relying on dom0 ballooning. The amount of RAM dedicated to dom0 should never
be less that the recommended minimum amount for running your SUSE distribution
in native mode. The following example shows the xen.gz syntax for doing this.
This would be added to your grub1 or grub2 configuration;
For some server deployments it may be best to dedicate a fixed amount of RAM
rather than relying on dom0 ballooning. The amount of RAM dedicated to dom0
should never be less than the recommended minimum amount for running your SUSE
distribution in native mode. The actual amount of RAM needed for dom0 will
depend on how much physical RAM your host contains and the number of VMs you
plan on running simultaneously. The following example shows the syntax for
doing this. This would be added to your grub1 or grub2 configuration;
dom0_mem=1024M,max:1024M
Grub2 Example:
Edit /etc/default/grub and add,
GRUB_CMDLINE_XEN_DEFAULT="dom0_mem=1024M,max:1024M"
and then run
grub2-mkconfig -o /boot/grub2/grub.cfg
Grub1 Example:
Edit /boot/grub/menu.lst and edit the line containing xen.gz
kernel /boot/xen.gz dom0_mem=1024M,max:1024M
After modifying your grub configuration, you will need to edit /etc/xen/xl.conf
and set autoballoon=0. This will prevent xl from automatically adjusting the
amount of memory assigned to dom0. Reboot the host for these changes to take
effect.
and set autoballoon="off". This will prevent xl from automatically adjusting
the amount of memory assigned to dom0. Reboot the host for these changes to
take effect.
Troubleshooting
@ -527,34 +533,94 @@ The hypervisor and domain 0 kernel are a matched set, and usually must be
upgraded together. Consult the online documentation for a matrix of supported
32- and 64-bit combinations
On certain machines with 2GB or less of RAM, domain 0 Linux may fail to boot,
printing the following messages:
PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
...
Kernel panic - not syncing: PCI-DMA: Memory would be corrupted
Fix this by adding "swiotlb=16" to the Linux kernel command line, which
reserves additional memory for the swiotlb (the actual number to be used here
of course depends on the system configuration).
If you have trouble early in the boot, try passing pnpacpi=off to the Linux
kernel. If you have trouble with interrupts or timers, passing lapic to Xen
may help. Xen and Linux understand similar ACPI boot parameters. Try the
options acpi=off,force,strict,ht,noirq or acpi_skip_timer_override.
options acpi=off,force,ht,noirq or acpi_skip_timer_override.
Other useful debugging options to Xen may be nosmp, noreboot, mem=1024M,
Other useful debugging options to Xen may be nosmp, noreboot, mem=4096M,
sync_console, noirqbalance (Dell). For a complete list of Xen boot options,
consult chapter 11.3 of the Xen users' manual.
consult the "Xen Hypervisor Command Line Options" documentation.
If domain 0 Linux crashes on X11 startup, please try to boot into runlevel 3.
To debug Xen or domain 0 Linux crashes or hangs, it may be useful to use the
debug-enabled hypervisor, and/or to prevent automatic rebooting. Change your
Grub configuration from something like this:
1) As a first step in debugging Xen you should add the following hypervisor
options to the xen.gz line in your grub configuration file. After rebooting,
the 'xl dmesg' command will produce more output to better analyze problems.
Grub2 Example:
Edit /etc/default/grub and add,
GRUB_CMDLINE_XEN_DEFAULT="loglvl=all loglvl_guest=all"
and then run,
grub2-mkconfig -o /boot/grub2/grub.cfg
Grub1 Example:
Edit /boot/grub/menu.lst and edit the line containing xen.gz
kernel /boot/xen.gz loglvl=all loglvl_guest=all
2) With the log levels specified above and the host rebooted, more useful
information about domain 0 and running VMs can be obtained using using the
'xl dmesg' and 'xl debug-keys' commands. For example, from the command line
run:
xl debug-keys h
and then run:
xl dmesg
Note that at the end of the output from 'xl dmesg' it includes help on a
series of commands that may be passed to 'xl debug-keys'. For example, by
passing the letter 'q' to 'xl debug-keys' it will "dump domain (and guest
debug) info".
xl debug-keys q
Now you can again run 'xl dmesg' to see the domain and guest debug info.
3) Sometimes it is useful to attach a serial terminal and direct Xen to send
its output not only to the screen, but also to that terminal. First you need
to attach a serial cable from the serial port on the server to a second
machine's serial port. That second machine could be running minicom (or some
other program that can be setup to read from the serial port). Do the
following to prepare Xen to send its output over this serial line.
Grub2 Example:
Edit /etc/default/grub and add,
GRUB_CMDLINE_XEN_DEFAULT="loglvl=all loglvl_guest=all console=com1 com1=115200,8n1"
Also append additional serial flags to the option below such that it appears as,
GRUB_CMDLINE_LINUX_DEFAULT="<pre-existing flags> console=ttyS0, 115200"
where pre-existing flags are those options already present and then run,
grub2-mkconfig -o /boot/grub2/grub.cfg
Grub1 Example:
Edit the /etc/grub/menu.lst file and add the following to the Xen entry,
kernel /boot/xen.gz loglvl=all guest_loglvl=all console=com1 com1=115200,8n1
module /boot/vmlinuz-xen <pre-existing flags> console=ttyS0, 115200
Once the hardware and software are configured correctly the server is rebooted
and its output should appear on the other terminal as the server boots up.
4) To further debug Xen or domain 0 Linux crashes or hangs, it may be useful to
use the debug-enabled hypervisor, and/or to prevent automatic rebooting.
Grub2 Example:
Edit /etc/default/grub and add,
GRUB_CMDLINE_XEN_DEFAULT="noreboot loglvl=all loglvl_guest=all"
Edit /etc/grub.d/20_linux_xen file. Look for this line:
while [ "x${xen_list}" != "x" ] ; do
and add *before* the above line something like this:
xen_dbg_list=`readlink -f /boot/xen-dbg.gz`
xen_list="$xen_list $xen_dbg_list"
finally run:
grub2-mkconfig -o /boot/grub2/grub.cfg
On reboot, select "Advanced options for SUSE Linux Enterprise Server 12
(with Xen hypervisor)" from the grub menu and the Xen hypervisor debug entry
in the submenu.
Grub1 Example:
Edit your menu.lst configuration from something like this:
kernel (hd0,5)/xen.gz
To something like this:
kernel (hd0,5)/xen-dbg.gz noreboot
After rebooting, the Xen hypervisor will write any error messages to the log
file (viewable with the "xl dmesg" command).
To something like this:
kernel (hd0,5)/xen-dbg.gz noreboot loglvl=all loglvl_guest=all
All hypervisor options require a reboot to take effect. After rebooting, the
Xen hypervisor will write any error messages to the log file (viewable with
the "xl dmesg" command).
If problems persist, check if a newer version is available. Well-tested
versions will be shipped with SUSE and via YaST Online Update. More frequent

View File

@ -1,6 +1,6 @@
#! /bin/sh
#
# Copyright (c) 1996 SuSE GmbH Nuernberg, Germany. All rights reserved.
# Copyright (c) 2014 SUSE GmbH Nuernberg, Germany. All rights reserved.
#
# Author: Werner Fink <werner@suse.de>, 1996
# Burchard Steinbild <bs@suse.de>, 1996

View File

@ -1,7 +1,7 @@
Index: xen-4.4.0-testing/tools/configure
Index: xen-4.4.1-testing/tools/configure
===================================================================
--- xen-4.4.0-testing.orig/tools/configure
+++ xen-4.4.0-testing/tools/configure
--- xen-4.4.1-testing.orig/tools/configure
+++ xen-4.4.1-testing/tools/configure
@@ -633,9 +633,6 @@ libgcrypt
EXTFS_LIBS
system_aio
@ -12,7 +12,7 @@ Index: xen-4.4.0-testing/tools/configure
glib_LIBS
glib_CFLAGS
PKG_CONFIG_LIBDIR
@@ -7202,104 +7199,104 @@ $as_echo "yes" >&6; }
@@ -7375,104 +7372,104 @@ $as_echo "yes" >&6; }
fi
# Extract the first word of "wget", so it can be a program name with args.
@ -215,10 +215,10 @@ Index: xen-4.4.0-testing/tools/configure
Index: xen-4.4.0-testing/stubdom/configure
Index: xen-4.4.1-testing/stubdom/configure
===================================================================
--- xen-4.4.0-testing.orig/stubdom/configure
+++ xen-4.4.0-testing/stubdom/configure
--- xen-4.4.1-testing.orig/stubdom/configure
+++ xen-4.4.1-testing/stubdom/configure
@@ -623,8 +623,6 @@ LDFLAGS
CFLAGS
CC

View File

@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2001 SuSE GmbH Nuernberg, Germany. All rights reserved.
# Copyright (c) 2014 SUSE GmbH Nuernberg, Germany. All rights reserved.
#
# /etc/init.d/pciback
#

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:71ec2c199f41fe74e647f4065df60d99cc62ed684ef37f4267a457a2f65b1aff
size 2877265
oid sha256:736479fcbc2a8679a67ecd06a60e9c6af9bb7f0b582b31db6e9fad1679eeea1b
size 2877494

View File

@ -31,10 +31,10 @@ ee2e7e5 Merge pull request #1 from aaannz/pvscsi
7de6f49 support character devices too
c84381b allow /dev/sda as scsi devspec
f11e3a2 pvscsi
Index: xen-4.4.0-testing/docs/man/xl.cfg.pod.5
Index: xen-4.4.1-testing/docs/man/xl.cfg.pod.5
===================================================================
--- xen-4.4.0-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.4.0-testing/docs/man/xl.cfg.pod.5
--- xen-4.4.1-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.4.1-testing/docs/man/xl.cfg.pod.5
@@ -380,6 +380,36 @@ value is optional if this is a guest dom
=back
@ -72,10 +72,10 @@ Index: xen-4.4.0-testing/docs/man/xl.cfg.pod.5
=item B<vfb=[ "VFB_SPEC_STRING", "VFB_SPEC_STRING", ...]>
Specifies the paravirtual framebuffer devices which should be supplied
Index: xen-4.4.0-testing/docs/man/xl.pod.1
Index: xen-4.4.1-testing/docs/man/xl.pod.1
===================================================================
--- xen-4.4.0-testing.orig/docs/man/xl.pod.1
+++ xen-4.4.0-testing/docs/man/xl.pod.1
--- xen-4.4.1-testing.orig/docs/man/xl.pod.1
+++ xen-4.4.1-testing/docs/man/xl.pod.1
@@ -1208,6 +1208,26 @@ List virtual trusted platform modules fo
=back
@ -103,10 +103,10 @@ Index: xen-4.4.0-testing/docs/man/xl.pod.1
=head1 PCI PASS-THROUGH
=over 4
Index: xen-4.4.0-testing/tools/libxl/libxl.c
Index: xen-4.4.1-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl.c
+++ xen-4.4.0-testing/tools/libxl/libxl.c
--- xen-4.4.1-testing.orig/tools/libxl/libxl.c
+++ xen-4.4.1-testing/tools/libxl/libxl.c
@@ -2021,6 +2021,273 @@ int libxl_devid_to_device_vtpm(libxl_ctx
return rc;
}
@ -419,7 +419,7 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.c
#undef DEFINE_DEVICE_ADD
/******************************************************************************/
@@ -5682,6 +5959,20 @@ int libxl_fd_set_cloexec(libxl_ctx *ctx,
@@ -5683,6 +5960,20 @@ int libxl_fd_set_cloexec(libxl_ctx *ctx,
int libxl_fd_set_nonblock(libxl_ctx *ctx, int fd, int nonblock)
{ return fd_set_flags(ctx,fd, F_GETFL,F_SETFL,"FL", O_NONBLOCK, nonblock); }
@ -440,10 +440,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.c
/*
* Local variables:
* mode: C
Index: xen-4.4.0-testing/tools/libxl/libxl.h
Index: xen-4.4.1-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl.h
+++ xen-4.4.0-testing/tools/libxl/libxl.h
--- xen-4.4.1-testing.orig/tools/libxl/libxl.h
+++ xen-4.4.1-testing/tools/libxl/libxl.h
@@ -951,6 +951,26 @@ libxl_device_vtpm *libxl_device_vtpm_lis
int libxl_device_vtpm_getinfo(libxl_ctx *ctx, uint32_t domid,
libxl_device_vtpm *vtpm, libxl_vtpminfo *vtpminfo);
@ -499,10 +499,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.h
#endif /* LIBXL_H */
/*
Index: xen-4.4.0-testing/tools/libxl/libxl_create.c
Index: xen-4.4.1-testing/tools/libxl/libxl_create.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.4.0-testing/tools/libxl/libxl_create.c
--- xen-4.4.1-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.4.1-testing/tools/libxl/libxl_create.c
@@ -1029,6 +1029,7 @@ static void domcreate_rebuild_done(libxl
libxl__multidev_begin(ao, &dcs->multidev);
dcs->multidev.callback = domcreate_launch_dm;
@ -511,10 +511,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_create.c
libxl__multidev_prepared(egc, &dcs->multidev, 0);
return;
Index: xen-4.4.0-testing/tools/libxl/libxl_device.c
Index: xen-4.4.1-testing/tools/libxl/libxl_device.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_device.c
+++ xen-4.4.0-testing/tools/libxl/libxl_device.c
--- xen-4.4.1-testing.orig/tools/libxl/libxl_device.c
+++ xen-4.4.1-testing/tools/libxl/libxl_device.c
@@ -523,6 +523,7 @@ void libxl__multidev_prepared(libxl__egc
* The following functions are defined:
* libxl__add_disks
@ -556,10 +556,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_device.c
/******************************************************************************/
int libxl__device_destroy(libxl__gc *gc, libxl__device *dev)
Index: xen-4.4.0-testing/tools/libxl/libxl_internal.h
Index: xen-4.4.1-testing/tools/libxl/libxl_internal.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.4.0-testing/tools/libxl/libxl_internal.h
--- xen-4.4.1-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.4.1-testing/tools/libxl/libxl_internal.h
@@ -982,6 +982,7 @@ _hidden int libxl__device_disk_setdefaul
_hidden int libxl__device_nic_setdefault(libxl__gc *gc, libxl_device_nic *nic,
uint32_t domid);
@ -590,10 +590,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_internal.h
/*----- device model creation -----*/
/* First layer; wraps libxl__spawn_spawn. */
Index: xen-4.4.0-testing/tools/libxl/libxl_types.idl
Index: xen-4.4.1-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.4.0-testing/tools/libxl/libxl_types.idl
--- xen-4.4.1-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.4.1-testing/tools/libxl/libxl_types.idl
@@ -453,6 +453,26 @@ libxl_device_vtpm = Struct("device_vtpm"
("uuid", libxl_uuid),
])
@ -659,10 +659,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_types.idl
libxl_vcpuinfo = Struct("vcpuinfo", [
("vcpuid", uint32),
("cpu", uint32),
Index: xen-4.4.0-testing/tools/libxl/libxl_types_internal.idl
Index: xen-4.4.1-testing/tools/libxl/libxl_types_internal.idl
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_types_internal.idl
+++ xen-4.4.0-testing/tools/libxl/libxl_types_internal.idl
--- xen-4.4.1-testing.orig/tools/libxl/libxl_types_internal.idl
+++ xen-4.4.1-testing/tools/libxl/libxl_types_internal.idl
@@ -20,6 +20,7 @@ libxl__device_kind = Enumeration("device
(6, "VKBD"),
(7, "CONSOLE"),
@ -671,10 +671,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_types_internal.idl
])
libxl__console_backend = Enumeration("console_backend", [
Index: xen-4.4.0-testing/tools/libxl/xl.h
Index: xen-4.4.1-testing/tools/libxl/xl.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/xl.h
+++ xen-4.4.0-testing/tools/libxl/xl.h
--- xen-4.4.1-testing.orig/tools/libxl/xl.h
+++ xen-4.4.1-testing/tools/libxl/xl.h
@@ -81,6 +81,9 @@ int main_networkdetach(int argc, char **
int main_blockattach(int argc, char **argv);
int main_blocklist(int argc, char **argv);
@ -685,10 +685,10 @@ Index: xen-4.4.0-testing/tools/libxl/xl.h
int main_vtpmattach(int argc, char **argv);
int main_vtpmlist(int argc, char **argv);
int main_vtpmdetach(int argc, char **argv);
Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
Index: xen-4.4.1-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
--- xen-4.4.1-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.4.1-testing/tools/libxl/xl_cmdimpl.c
@@ -17,6 +17,7 @@
#include "libxl_osdeps.h"
@ -904,7 +904,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
if (!xlu_cfg_get_list(config, "vtpm", &vtpms, 0, 0)) {
d_config->num_vtpms = 0;
d_config->vtpms = NULL;
@@ -6041,6 +6219,256 @@ int main_blockdetach(int argc, char **ar
@@ -6043,6 +6221,256 @@ int main_blockdetach(int argc, char **ar
return rc;
}
@ -1161,10 +1161,10 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
int main_vtpmattach(int argc, char **argv)
{
int opt;
Index: xen-4.4.0-testing/tools/libxl/xl_cmdtable.c
Index: xen-4.4.1-testing/tools/libxl/xl_cmdtable.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.4.0-testing/tools/libxl/xl_cmdtable.c
--- xen-4.4.1-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.4.1-testing/tools/libxl/xl_cmdtable.c
@@ -354,6 +354,21 @@ struct cmd_spec cmd_table[] = {
"Destroy a domain's virtual block device",
"<Domain> <DevId>",

View File

@ -88,10 +88,10 @@ Signed-off-by: Olaf Hering <olaf@aepfle.de>
tools/libxl/xl_cmdtable.c | 23 ++++++++++++++-------
12 files changed, 162 insertions(+), 21 deletions(-)
Index: xen-4.4.0-testing/docs/man/xl.pod.1
Index: xen-4.4.1-testing/docs/man/xl.pod.1
===================================================================
--- xen-4.4.0-testing.orig/docs/man/xl.pod.1
+++ xen-4.4.0-testing/docs/man/xl.pod.1
--- xen-4.4.1-testing.orig/docs/man/xl.pod.1
+++ xen-4.4.1-testing/docs/man/xl.pod.1
@@ -392,6 +392,26 @@ Send <config> instead of config file fro
Print huge (!) amount of debug during the migration process.
@ -119,10 +119,10 @@ Index: xen-4.4.0-testing/docs/man/xl.pod.1
=back
=item B<remus> [I<OPTIONS>] I<domain-id> I<host>
Index: xen-4.4.0-testing/tools/libxc/xc_domain_save.c
Index: xen-4.4.1-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.4.0-testing/tools/libxc/xc_domain_save.c
--- xen-4.4.1-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.4.1-testing/tools/libxc/xc_domain_save.c
@@ -43,6 +43,7 @@
*/
#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
@ -196,10 +196,10 @@ Index: xen-4.4.0-testing/tools/libxc/xc_domain_save.c
/*
* Local variables:
Index: xen-4.4.0-testing/tools/libxc/xc_nomigrate.c
Index: xen-4.4.1-testing/tools/libxc/xc_nomigrate.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxc/xc_nomigrate.c
+++ xen-4.4.0-testing/tools/libxc/xc_nomigrate.c
--- xen-4.4.1-testing.orig/tools/libxc/xc_nomigrate.c
+++ xen-4.4.1-testing/tools/libxc/xc_nomigrate.c
@@ -21,6 +21,16 @@
#include <xenctrl.h>
#include <xenguest.h>
@ -217,10 +217,10 @@ Index: xen-4.4.0-testing/tools/libxc/xc_nomigrate.c
int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags,
struct save_callbacks* callbacks, int hvm,
Index: xen-4.4.0-testing/tools/libxc/xenguest.h
Index: xen-4.4.1-testing/tools/libxc/xenguest.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxc/xenguest.h
+++ xen-4.4.0-testing/tools/libxc/xenguest.h
--- xen-4.4.1-testing.orig/tools/libxc/xenguest.h
+++ xen-4.4.1-testing/tools/libxc/xenguest.h
@@ -28,6 +28,7 @@
#define XCFLAGS_HVM (1 << 2)
#define XCFLAGS_STDVGA (1 << 3)
@ -242,10 +242,10 @@ Index: xen-4.4.0-testing/tools/libxc/xenguest.h
/* callbacks provided by xc_domain_restore */
struct restore_callbacks {
Index: xen-4.4.0-testing/tools/libxl/libxl.c
Index: xen-4.4.1-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl.c
+++ xen-4.4.0-testing/tools/libxl/libxl.c
--- xen-4.4.1-testing.orig/tools/libxl/libxl.c
+++ xen-4.4.1-testing/tools/libxl/libxl.c
@@ -763,7 +763,8 @@ static void domain_suspend_cb(libxl__egc
}
@ -294,10 +294,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.c
int libxl_domain_pause(libxl_ctx *ctx, uint32_t domid)
{
int ret;
Index: xen-4.4.0-testing/tools/libxl/libxl.h
Index: xen-4.4.1-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl.h
+++ xen-4.4.0-testing/tools/libxl/libxl.h
--- xen-4.4.1-testing.orig/tools/libxl/libxl.h
+++ xen-4.4.1-testing/tools/libxl/libxl.h
@@ -687,8 +687,23 @@ int libxl_domain_suspend(libxl_ctx *ctx,
int flags, /* LIBXL_SUSPEND_* */
const libxl_asyncop_how *ao_how)
@ -322,11 +322,11 @@ Index: xen-4.4.0-testing/tools/libxl/libxl.h
/* @param suspend_cancel [from xenctrl.h:xc_domain_resume( @param fast )]
* If this parameter is true, use co-operative resume. The guest
Index: xen-4.4.0-testing/tools/libxl/libxl_dom.c
Index: xen-4.4.1-testing/tools/libxl/libxl_dom.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_dom.c
+++ xen-4.4.0-testing/tools/libxl/libxl_dom.c
@@ -1337,6 +1337,7 @@ void libxl__domain_suspend(libxl__egc *e
--- xen-4.4.1-testing.orig/tools/libxl/libxl_dom.c
+++ xen-4.4.1-testing/tools/libxl/libxl_dom.c
@@ -1341,6 +1341,7 @@ void libxl__domain_suspend(libxl__egc *e
dss->xcflags = (live ? XCFLAGS_LIVE : 0)
| (debug ? XCFLAGS_DEBUG : 0)
@ -334,10 +334,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_dom.c
| (dss->hvm ? XCFLAGS_HVM : 0);
dss->suspend_eventchn = -1;
Index: xen-4.4.0-testing/tools/libxl/libxl_internal.h
Index: xen-4.4.1-testing/tools/libxl/libxl_internal.h
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.4.0-testing/tools/libxl/libxl_internal.h
--- xen-4.4.1-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.4.1-testing/tools/libxl/libxl_internal.h
@@ -2319,6 +2319,10 @@ struct libxl__domain_suspend_state {
xc_evtchn *xce; /* event channel handle */
int suspend_eventchn;
@ -349,10 +349,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_internal.h
int xcflags;
int guest_responded;
const char *dm_savefile;
Index: xen-4.4.0-testing/tools/libxl/libxl_save_callout.c
Index: xen-4.4.1-testing/tools/libxl/libxl_save_callout.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_save_callout.c
+++ xen-4.4.0-testing/tools/libxl/libxl_save_callout.c
--- xen-4.4.1-testing.orig/tools/libxl/libxl_save_callout.c
+++ xen-4.4.1-testing/tools/libxl/libxl_save_callout.c
@@ -108,7 +108,9 @@ void libxl__xc_domain_save(libxl__egc *e
}
@ -364,10 +364,10 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_save_callout.c
toolstack_data_fd, toolstack_data_len,
cbflags,
};
Index: xen-4.4.0-testing/tools/libxl/libxl_save_helper.c
Index: xen-4.4.1-testing/tools/libxl/libxl_save_helper.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/libxl_save_helper.c
+++ xen-4.4.0-testing/tools/libxl/libxl_save_helper.c
--- xen-4.4.1-testing.orig/tools/libxl/libxl_save_helper.c
+++ xen-4.4.1-testing/tools/libxl/libxl_save_helper.c
@@ -221,6 +221,7 @@ int main(int argc, char **argv)
uint32_t dom = strtoul(NEXTARG,0,10);
uint32_t max_iters = strtoul(NEXTARG,0,10);
@ -386,11 +386,11 @@ Index: xen-4.4.0-testing/tools/libxl/libxl_save_helper.c
&helper_save_callbacks, hvm, genidad);
complete(r);
Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
Index: xen-4.4.1-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
@@ -3645,6 +3645,8 @@ static void migrate_do_preamble(int send
--- xen-4.4.1-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.4.1-testing/tools/libxl/xl_cmdimpl.c
@@ -3647,6 +3647,8 @@ static void migrate_do_preamble(int send
}
static void migrate_domain(uint32_t domid, const char *rune, int debug,
@ -399,7 +399,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
const char *override_config_file)
{
pid_t child = -1;
@@ -3653,7 +3655,13 @@ static void migrate_domain(uint32_t domi
@@ -3655,7 +3657,13 @@ static void migrate_domain(uint32_t domi
char *away_domname;
char rc_buf;
uint8_t *config_data;
@ -414,7 +414,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
save_domain_core_begin(domid, override_config_file,
&config_data, &config_len);
@@ -3672,10 +3680,13 @@ static void migrate_domain(uint32_t domi
@@ -3674,10 +3682,13 @@ static void migrate_domain(uint32_t domi
xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
if (debug)
@ -431,7 +431,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
" (rc=%d)\n", rc);
if (rc == ERROR_GUEST_TIMEDOUT)
goto failed_suspend;
@@ -4062,13 +4073,18 @@ int main_migrate(int argc, char **argv)
@@ -4064,13 +4075,18 @@ int main_migrate(int argc, char **argv)
char *rune = NULL;
char *host;
int opt, daemonize = 1, monitor = 1, debug = 0;
@ -451,7 +451,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
case 'C':
config_filename = optarg;
break;
@@ -4085,6 +4101,18 @@ int main_migrate(int argc, char **argv)
@@ -4087,6 +4103,18 @@ int main_migrate(int argc, char **argv)
case 0x100:
debug = 1;
break;
@ -470,7 +470,7 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
}
domid = find_domain(argv[optind]);
@@ -4100,7 +4128,8 @@ int main_migrate(int argc, char **argv)
@@ -4102,7 +4130,8 @@ int main_migrate(int argc, char **argv)
return 1;
}
@ -480,10 +480,10 @@ Index: xen-4.4.0-testing/tools/libxl/xl_cmdimpl.c
return 0;
}
#endif
Index: xen-4.4.0-testing/tools/libxl/xl_cmdtable.c
Index: xen-4.4.1-testing/tools/libxl/xl_cmdtable.c
===================================================================
--- xen-4.4.0-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.4.0-testing/tools/libxl/xl_cmdtable.c
--- xen-4.4.1-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.4.1-testing/tools/libxl/xl_cmdtable.c
@@ -150,14 +150,21 @@ struct cmd_spec cmd_table[] = {
&main_migrate, 0, 1,
"Migrate a domain to another host",

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f4c30b6deacd63d74ed498ded6cdddc448c9b91a9fe90fa0490387c52723baac
size 7571312
oid sha256:a4d6448804cd43cc4c942dd031c17d1b46a1f68948b60d385e0c962ab4d4bdfd
size 7597461

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1193cf071ed647d44dccc73b58574a12efeae28ed6393b9ee6cea35ad86425dc
size 3212848
oid sha256:f9015611676155eea1abfbe65dda13bdc7195bfb87d3da7a6d27f0df77376014
size 3235746

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1a64318a51031dfd03b4de2eaaed75696505a627e2eba5dd1703bb1e4100c92a
size 366380
oid sha256:cf7450369a8a4693ab95adcdb79317d0b524fe5e5c5f79bd6a630efcf376db25
size 378198

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:96339e810d23134d84ed306a42c24d8d648a6e6815808b9a5f418de073f56e01
size 17477089
oid sha256:90784022712efebb69d602810d07c2b86b572eb7bbc12dac58566c42dd5353c1
size 17477566

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aa87fcd1134ccb8dde89b06aacb7f922d85f4a6c318af2c71b8af47d5a20c165
size 4356476

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f277f2bd5714773978d9a3688fd1f65577178aaac1add3b7d66c01e3648267b1
size 4432926

View File

@ -1,3 +1,109 @@
-------------------------------------------------------------------
Fri Aug 15 15:09:27 MDT 2014 - carnold@suse.com
- Update README.SUSE with additional debug help
-------------------------------------------------------------------
Fri Aug 8 07:34:38 MDT 2014 - carnold@suse.com
- bnc#883112 - Xen Panic during boot "System without CMOS RTC must
be booted from EFI"
53dba447-x86-ACPI-allow-CMOS-RTC-use-even-when-ACPI-says-there-is-none.patch
- Upstream patches from Jan
53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch
53df71c7-lz4-check-for-underruns.patch
53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch
53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch
-------------------------------------------------------------------
Thu Aug 7 11:26:15 UTC 2014 - carnold@suse.com
- Update to Xen Version 4.4.1-rc2
xen-4.4.1-testing-src.tar.bz2
- Dropped the following upstream patches and xen-4.4.0-testing-src.tar.bz2
537b5ede-move-domain-to-cpupool0-before-destroying-it.patch
5327190a-x86-Intel-work-around-Xeon-7400-series-erratum-AAI65.patch
534bdf47-x86-HAP-also-flush-TLB-when-altering-a-present-1G-or-intermediate-entry.patch
535a354b-passthrough-allow-to-suppress-SERR-and-PERR-signaling.patch
53636ebf-x86-fix-guest-CPUID-handling.patch
5347b524-evtchn-eliminate-64k-ports-limitation.patch
53a040c6-page-alloc-scrub-pages-used-by-hypervisor-upon-freeing.patch
53a1990a-IOMMU-prevent-VT-d-device-IOTLB-operations-on-wrong-IOMMU.patch
53732f4f-x86-MCE-bypass-uninitialized-vcpu-in-vMCE-injection.patch
531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
5331917d-x86-enforce-preemption-in-HVM_set_mem_access-p2m_set_mem_access.patch
531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
538ee637-ACPI-Prevent-acpi_table_entries-from-falling-into-a-infinite-loop.patch
535a34eb-VT-d-suppress-UR-signaling-for-server-chipsets.patch
535e31bc-x86-HVM-correct-the-SMEP-logic-for-HVM_CR0_GUEST_RESERVED_BITS.patch
53859956-timers-set-the-deadline-more-accurately.patch
53636978-hvm_set_ioreq_page-releases-wrong-page-in-error-path.patch
535a3516-VT-d-suppress-UR-signaling-for-desktop-chipsets.patch
53cfdcc7-avoid-crash-when-doing-shutdown-with-active-cpupools.patch
5383175e-VT-d-fix-mask-applied-to-DMIBAR-in-desktop-chipset-XSA-59-workaround.patch
531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
532fff53-x86-fix-determination-of-bit-count-for-struct-domain-allocations.patch
5357baff-x86-add-missing-break-in-dom0_pit_access.patch
530c54c3-x86-mce-Reduce-boot-time-logspam.patch
5383167d-ACPI-ERST-fix-table-mapping.patch
5390927f-x86-fix-reboot-shutdown-with-running-HVM-guests.patch
530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
53709b77-Nested-VMX-load-current_vmcs-only-when-it-exists.patch
5396d818-avoid-crash-on-HVM-domain-destroy-with-PCI-passthrough.patch
531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
5385956b-x86-don-t-use-VA-for-cache-flush-when-also-flushing-TLB.patch
539ec004-x86-mce-don-t-spam-the-console-with-CPUx-Temperature-z.patch
53909259-x86-domctl-two-functional-fixes-to-XEN_DOMCTL_-gs-etvcpuextstate.patch
53859549-AMD-IOMMU-don-t-free-page-table-prematurely.patch
533d413b-x86-mm-fix-checks-against-max_mapped_pfn.patch
535fa503-x86-HVM-restrict-HVMOP_set_mem_type.patch
53271880-VT-d-fix-RMRR-handling.patch
5390917a-VT-d-honor-APEI-firmware-first-mode-in-XSA-59-workaround-code.patch
538dcada-x86-HVM-eliminate-vulnerabilities-from-hvm_inject_msi.patch
53455585-x86-AMD-feature-masking-is-unavailable-on-Fam11.patch
537b5e50-VT-d-apply-quirks-at-device-setup-time-rather-than-only-at-boot.patch
53a199d7-x86-EFI-allow-FPU-XMM-use-in-runtime-service-functions.patch
53cfddaf-x86-mem_event-validate-the-response-vcpu_id-before-acting-on-it.patch
53b16cd4-VT-d-ATS-correct-and-clean-up-dev_invalidate_iotlb.patch
53cfdde4-x86-mem_event-prevent-underflow-of-vcpu-pause-counts.patch
53356c1e-x86-HVM-correct-CPUID-leaf-80000008-handling.patch
534bbd90-x86-nested-HAP-don-t-BUG-on-legitimate-error.patch
530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
5396e805-x86-HVM-refine-SMEP-test-in-HVM_CR4_GUEST_RESERVED_BITS.patch
5370e03b-pygrub-fix-error-handling-if-no-valid-partitions-are-found.patch
5321b257-x86-make-hypercall-preemption-checks-consistent.patch
5321b20b-common-make-hypercall-preemption-checks-consistent.patch
538c338f-x86-amd_ucode-flip-revision-numbers-in-printk.patch
537b5e79-VT-d-extend-error-report-masking-workaround-to-newer-chipsets.patch
531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
53b56de1-properly-reference-count-DOMCTL_-un-pausedomain-hypercalls.patch
530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
533ad1ee-VMX-fix-PAT-value-seen-by-guest.patch
53206661-pygrub-support-linux16-and-initrd16.patch
5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
-------------------------------------------------------------------
Fri Aug 1 08:25:12 UTC 2014 - cyliu@suse.com
- bnc#820873 - The "long" option doesn't work with "xl list"
53d124e7-fix-list_domain_details-check-config-data-length-0.patch
-------------------------------------------------------------------
Thu Jul 30 09:31:52 MDT 2014 - carnold@suse.com
- bnc#888996 - Package 'xen-tool' contains 'SuSE' spelling in a
filename and/or SPEC file
Renamed README.SuSE -> README.SUSE
Modified files: xen.spec, boot.local.xenU, init.pciback
xend-config.patch, xend-vif-route-ifup.patch
-------------------------------------------------------------------
Wed Jul 29 16:44:48 MDT 2014 - carnold@suse.com
- bnc#882673 - Dom0 memory should enforce a minimum memory size
(e.g. dom0_mem=min:512M)
xen.spec (Mike Latimer)
-------------------------------------------------------------------
Thu Jul 24 07:54:34 MDT 2014 - carnold@suse.com

160
xen.spec
View File

@ -21,8 +21,8 @@ Name: xen
ExclusiveArch: %ix86 x86_64 %arm aarch64
%define xvers 4.4
%define xvermaj 4
%define changeset 28401
%define xen_build_dir xen-4.4.0-testing
%define changeset 28531
%define xen_build_dir xen-4.4.1-testing
#
%define with_kmp 0
%define with_xend 0
@ -153,19 +153,19 @@ BuildRequires: xorg-x11-util-devel
%endif
%endif
Version: 4.4.0_26
Version: 4.4.1_02
Release: 0
PreReq: %insserv_prereq %fillup_prereq
Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
License: GPL-2.0
Group: System/Kernel
Source0: xen-4.4.0-testing-src.tar.bz2
Source0: xen-4.4.1-testing-src.tar.bz2
Source1: stubdom.tar.bz2
Source2: qemu-xen-traditional-dir-remote.tar.bz2
Source3: qemu-xen-dir-remote.tar.bz2
Source4: seabios-dir-remote.tar.bz2
Source5: ipxe.tar.bz2
Source10: README.SuSE
Source10: README.SUSE
Source11: boot.xen
Source12: boot.local.xenU
Source14: init.xendomains
@ -212,77 +212,23 @@ Source99: baselibs.conf
# http://xenbits.xensource.com/ext/xenalyze
Source20000: xenalyze.hg.tar.bz2
# Upstream patches
Patch1: 530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
Patch2: 530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
Patch3: 530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
Patch4: 530c54c3-x86-mce-Reduce-boot-time-logspam.patch
Patch5: 5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
Patch6: 5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
Patch7: 5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
Patch8: 531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
Patch9: 531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
Patch10: 531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
Patch11: 531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
Patch12: 531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch
Patch13: 53206661-pygrub-support-linux16-and-initrd16.patch
Patch14: 5321b20b-common-make-hypercall-preemption-checks-consistent.patch
Patch15: 5321b257-x86-make-hypercall-preemption-checks-consistent.patch
Patch16: 53271880-VT-d-fix-RMRR-handling.patch
Patch17: 5327190a-x86-Intel-work-around-Xeon-7400-series-erratum-AAI65.patch
Patch18: 53299d8f-xenconsole-reset-tty-on-failure.patch
Patch19: 53299d8f-xenconsole-tolerate-tty-errors.patch
Patch20: 532fff53-x86-fix-determination-of-bit-count-for-struct-domain-allocations.patch
Patch21: 5331917d-x86-enforce-preemption-in-HVM_set_mem_access-p2m_set_mem_access.patch
Patch22: 53356c1e-x86-HVM-correct-CPUID-leaf-80000008-handling.patch
Patch23: 533ad1ee-VMX-fix-PAT-value-seen-by-guest.patch
Patch24: 533d413b-x86-mm-fix-checks-against-max_mapped_pfn.patch
Patch25: 53455585-x86-AMD-feature-masking-is-unavailable-on-Fam11.patch
Patch26: 5346a7a0-x86-AMD-support-further-feature-masking-MSRs.patch
Patch27: 5347b524-evtchn-eliminate-64k-ports-limitation.patch
Patch28: 534bbd90-x86-nested-HAP-don-t-BUG-on-legitimate-error.patch
Patch29: 534bdf47-x86-HAP-also-flush-TLB-when-altering-a-present-1G-or-intermediate-entry.patch
Patch30: 53563ea4-x86-MSI-drop-workaround-for-insecure-Dom0-kernels.patch
Patch31: 5357baff-x86-add-missing-break-in-dom0_pit_access.patch
Patch32: 535a34eb-VT-d-suppress-UR-signaling-for-server-chipsets.patch
Patch33: 535a3516-VT-d-suppress-UR-signaling-for-desktop-chipsets.patch
Patch34: 535a354b-passthrough-allow-to-suppress-SERR-and-PERR-signaling.patch
Patch35: 535e31bc-x86-HVM-correct-the-SMEP-logic-for-HVM_CR0_GUEST_RESERVED_BITS.patch
Patch36: 535fa503-x86-HVM-restrict-HVMOP_set_mem_type.patch
Patch37: 53636978-hvm_set_ioreq_page-releases-wrong-page-in-error-path.patch
Patch38: 53636ebf-x86-fix-guest-CPUID-handling.patch
Patch39: 53709b77-Nested-VMX-load-current_vmcs-only-when-it-exists.patch
Patch40: 5370e03b-pygrub-fix-error-handling-if-no-valid-partitions-are-found.patch
Patch41: 53732f4f-x86-MCE-bypass-uninitialized-vcpu-in-vMCE-injection.patch
Patch42: 537b5e50-VT-d-apply-quirks-at-device-setup-time-rather-than-only-at-boot.patch
Patch43: 537b5e79-VT-d-extend-error-report-masking-workaround-to-newer-chipsets.patch
Patch44: 537b5ede-move-domain-to-cpupool0-before-destroying-it.patch
Patch45: 537cd0b0-hvmloader-also-cover-PCI-MMIO-ranges-above-4G-with-UC-MTRR-ranges.patch
Patch46: 537cd0cc-hvmloader-PA-range-0xfc000000-0xffffffff-should-be-UC.patch
Patch47: 5383167d-ACPI-ERST-fix-table-mapping.patch
Patch48: 5383175e-VT-d-fix-mask-applied-to-DMIBAR-in-desktop-chipset-XSA-59-workaround.patch
Patch49: 53859549-AMD-IOMMU-don-t-free-page-table-prematurely.patch
Patch50: 5385956b-x86-don-t-use-VA-for-cache-flush-when-also-flushing-TLB.patch
Patch51: 53859956-timers-set-the-deadline-more-accurately.patch
Patch52: 538c338f-x86-amd_ucode-flip-revision-numbers-in-printk.patch
Patch53: 538dcada-x86-HVM-eliminate-vulnerabilities-from-hvm_inject_msi.patch
Patch54: 538ee637-ACPI-Prevent-acpi_table_entries-from-falling-into-a-infinite-loop.patch
Patch55: 5390917a-VT-d-honor-APEI-firmware-first-mode-in-XSA-59-workaround-code.patch
Patch56: 53909259-x86-domctl-two-functional-fixes-to-XEN_DOMCTL_-gs-etvcpuextstate.patch
Patch57: 5390927f-x86-fix-reboot-shutdown-with-running-HVM-guests.patch
Patch58: 5396d818-avoid-crash-on-HVM-domain-destroy-with-PCI-passthrough.patch
Patch59: 5396e805-x86-HVM-refine-SMEP-test-in-HVM_CR4_GUEST_RESERVED_BITS.patch
Patch60: 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch
Patch61: 539ec004-x86-mce-don-t-spam-the-console-with-CPUx-Temperature-z.patch
Patch62: 53a040c6-page-alloc-scrub-pages-used-by-hypervisor-upon-freeing.patch
Patch63: 53a1990a-IOMMU-prevent-VT-d-device-IOTLB-operations-on-wrong-IOMMU.patch
Patch64: 53a199d7-x86-EFI-allow-FPU-XMM-use-in-runtime-service-functions.patch
Patch65: 53aac342-x86-HVM-consolidate-and-sanitize-CR4-guest-reserved-bit-determination.patch
Patch66: 53b16cd4-VT-d-ATS-correct-and-clean-up-dev_invalidate_iotlb.patch
Patch67: 53b56de1-properly-reference-count-DOMCTL_-un-pausedomain-hypercalls.patch
Patch68: 53c9151b-Fix-xl-vncviewer-accesses-port-0-by-any-invalid-domid.patch
Patch69: 53cfdcc7-avoid-crash-when-doing-shutdown-with-active-cpupools.patch
Patch70: 53cfddaf-x86-mem_event-validate-the-response-vcpu_id-before-acting-on-it.patch
Patch71: 53cfdde4-x86-mem_event-prevent-underflow-of-vcpu-pause-counts.patch
Patch1: 5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
Patch2: 5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
Patch3: 53299d8f-xenconsole-reset-tty-on-failure.patch
Patch4: 53299d8f-xenconsole-tolerate-tty-errors.patch
Patch5: 5346a7a0-x86-AMD-support-further-feature-masking-MSRs.patch
Patch6: 53563ea4-x86-MSI-drop-workaround-for-insecure-Dom0-kernels.patch
Patch7: 537cd0b0-hvmloader-also-cover-PCI-MMIO-ranges-above-4G-with-UC-MTRR-ranges.patch
Patch8: 537cd0cc-hvmloader-PA-range-0xfc000000-0xffffffff-should-be-UC.patch
Patch9: 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch
Patch10: 53aac342-x86-HVM-consolidate-and-sanitize-CR4-guest-reserved-bit-determination.patch
Patch11: 53c9151b-Fix-xl-vncviewer-accesses-port-0-by-any-invalid-domid.patch
Patch12: 53d124e7-fix-list_domain_details-check-config-data-length-0.patch
Patch13: 53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch
Patch14: 53dba447-x86-ACPI-allow-CMOS-RTC-use-even-when-ACPI-says-there-is-none.patch
Patch15: 53df71c7-lz4-check-for-underruns.patch
Patch16: 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch
Patch17: 53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch
# Upstream qemu
Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch
Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch
@ -665,60 +611,6 @@ Authors:
%patch15 -p1
%patch16 -p1
%patch17 -p1
%patch18 -p1
%patch19 -p1
%patch20 -p1
%patch21 -p1
%patch22 -p1
%patch23 -p1
%patch24 -p1
%patch25 -p1
%patch26 -p1
%patch27 -p1
%patch28 -p1
%patch29 -p1
%patch30 -p1
%patch31 -p1
%patch32 -p1
%patch33 -p1
%patch34 -p1
%patch35 -p1
%patch36 -p1
%patch37 -p1
%patch38 -p1
%patch39 -p1
%patch40 -p1
%patch41 -p1
%patch42 -p1
%patch43 -p1
%patch44 -p1
%patch45 -p1
%patch46 -p1
%patch47 -p1
%patch48 -p1
%patch49 -p1
%patch50 -p1
%patch51 -p1
%patch52 -p1
%patch53 -p1
%patch54 -p1
%patch55 -p1
%patch56 -p1
%patch57 -p1
%patch58 -p1
%patch59 -p1
%patch60 -p1
%patch61 -p1
%patch62 -p1
%patch63 -p1
%patch64 -p1
%patch65 -p1
%patch66 -p1
%patch67 -p1
%patch68 -p1
%patch69 -p1
%patch70 -p1
%patch71 -p1
# Upstream qemu patches
%patch250 -p1
%patch251 -p1
@ -1361,7 +1253,7 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper
%{_libdir}/python%{pyver}/site-packages/xnloader.py
%dir %{_defaultdocdir}/xen
%{_defaultdocdir}/xen/COPYING
%{_defaultdocdir}/xen/README.SuSE
%{_defaultdocdir}/xen/README.SUSE
%{_defaultdocdir}/xen/boot.local.xenU
%{_defaultdocdir}/xen/boot.xen
%{_defaultdocdir}/xen/misc
@ -1504,6 +1396,12 @@ if [ -f /usr/bin/qemu-io ]; then
rm -f /usr/lib/xen/bin/qemu-io-xen
ln -s /usr/bin/qemu-io /usr/lib/xen/bin/qemu-io-xen
fi
if [ -f /etc/default/grub ] && ! (/usr/bin/grep GRUB_CMDLINE_XEN /etc/default/grub >/dev/null); then
echo '# Xen boot parameters for all Xen boots' >> /etc/default/grub
echo 'GRUB_CMDLINE_XEN=""' >> /etc/default/grub
echo '# Xen boot parameters for non-recovery Xen boots (in addition to GRUB_CMDLINE_XEN)' >> /etc/default/grub
echo 'GRUB_CMDLINE_XEN_DEFAULT=""' >> /etc/default/grub
fi
%if %{?with_xend}0
%if %{?with_xend_tools_pkg}0

View File

@ -34,7 +34,7 @@ Index: xen-4.2.0-testing/tools/examples/xend-config.sxp
# yourself a wrapper script, and call network-bridge from it, as appropriate.
#
-(network-script network-bridge)
+# SuSE users note:
+# SUSE users note:
+# On openSUSE >= 11.1 and SLES >= 11, networks should be configured using
+# native platform tool - YaST. vif-bridge and qemu-ifup can be used to
+# connect vifs to the YaST-managed networks.

View File

@ -12,7 +12,7 @@ Index: xen-4.4.0-testing/tools/examples/xend-config.sxp
#(network-script network-route)
#(vif-script vif-route)
+# SuSE users note:
+# SUSE users note:
+# If using a routed network configuration it is advised to NOT use
+# network-route and vif-route scripts but instead use sysconfig scripts
+# in dom0 and vif-route-ifup script to "connect" the domU vif to dom0.