xen/530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
Charles Arnold e46082b3ea - Upstream patches from Jan
530b27fd-x86-MCE-Fix-race-condition-in-mctelem_reserve.patch
  530b2880-Nested-VMX-update-nested-paging-mode-on-vmexit.patch
  530b28c5-x86-MSI-don-t-risk-division-by-zero.patch
  530c54c3-x86-mce-Reduce-boot-time-logspam.patch
  5310bac3-mm-ensure-useful-progress-in-decrease_reservation.patch
  5315a254-IOMMU-generalize-and-correct-softirq-processing.patch
  5315a3bb-x86-don-t-propagate-acpi_skip_timer_override-do-Dom0.patch
  5315a43a-x86-ACPI-also-print-address-space-for-PM1x-fields.patch
  531d8db1-x86-hvm-refine-the-judgment-on-IDENT_PT-for-EMT.patch
  531d8e09-x86-HVM-fix-memory-type-merging-in-epte_get_entry_emt.patch
  531d8e34-x86-HVM-consolidate-passthrough-handling-in-epte_get_entry_emt.patch
  531d8fd0-kexec-identify-which-cpu-the-kexec-image-is-being-executed-on.patch
  531dc0e2-xmalloc-handle-correctly-page-allocation-when-align-size.patch

- Add conversion tool for migrating xend/xm managed VMs to libvirt 
  xen2libvirt.py (Jim Fehlig)

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=304
2014-03-13 23:46:35 +00:00

189 lines
6.7 KiB
Diff

# Commit 60ea3a3ac3d2bcd8e85b250fdbfc46b3b9dc7085
# Date 2014-02-24 12:07:41 +0100
# Author Frediano Ziglio <frediano.ziglio@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MCE: Fix race condition in mctelem_reserve
These lines (in mctelem_reserve)
newhead = oldhead->mcte_next;
if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
are racy. After you read the newhead pointer it can happen that another
flow (thread or recursive invocation) change all the list but set head
with same value. So oldhead is the same as *freelp but you are setting
a new head that could point to whatever element (even already used).
This patch use instead a bit array and atomic bit operations.
Signed-off-by: Frediano Ziglio <frediano.ziglio@citrix.com>
Reviewed-by: Liu Jinsong <jinsong.liu@intel.com>
--- a/xen/arch/x86/cpu/mcheck/mctelem.c
+++ b/xen/arch/x86/cpu/mcheck/mctelem.c
@@ -37,24 +37,19 @@ struct mctelem_ent {
void *mcte_data; /* corresponding data payload */
};
-#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent freelist */
-#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */
-#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */
-#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */
+#define MCTE_F_CLASS_URGENT 0x0001U /* in use - urgent errors */
+#define MCTE_F_CLASS_NONURGENT 0x0002U /* in use - nonurgent errors */
#define MCTE_F_STATE_FREE 0x0010U /* on a freelist */
#define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */
#define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */
#define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */
-#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
#define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
#define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \
MCTE_F_STATE_UNCOMMITTED | \
MCTE_F_STATE_COMMITTED | \
MCTE_F_STATE_PROCESSING)
-#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
-
#define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
#define MCTE_SET_CLASS(tep, new) do { \
(tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
@@ -69,6 +64,8 @@ struct mctelem_ent {
#define MC_URGENT_NENT 10
#define MC_NONURGENT_NENT 20
+#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT)
+
#define MC_NCLASSES (MC_NONURGENT + 1)
#define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
@@ -77,11 +74,9 @@ struct mctelem_ent {
static struct mc_telem_ctl {
/* Linked lists that thread the array members together.
*
- * The free lists are singly-linked via mcte_next, and we allocate
- * from them by atomically unlinking an element from the head.
- * Consumed entries are returned to the head of the free list.
- * When an entry is reserved off the free list it is not linked
- * on any list until it is committed or dismissed.
+ * The free lists is a bit array where bit 1 means free.
+ * This as element number is quite small and is easy to
+ * atomically allocate that way.
*
* The committed list grows at the head and we do not maintain a
* tail pointer; insertions are performed atomically. The head
@@ -101,7 +96,7 @@ static struct mc_telem_ctl {
* we can lock it for updates. The head of the processing list
* always has the oldest telemetry, and we append (as above)
* at the tail of the processing list. */
- struct mctelem_ent *mctc_free[MC_NCLASSES];
+ DECLARE_BITMAP(mctc_free, MC_NENT);
struct mctelem_ent *mctc_committed[MC_NCLASSES];
struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
@@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cp
*/
static void mctelem_free(struct mctelem_ent *tep)
{
- mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
- MC_URGENT : MC_NONURGENT;
-
BUG_ON(tep->mcte_refcnt != 0);
BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
tep->mcte_prev = NULL;
- mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
+ tep->mcte_next = NULL;
+
+ /* set free in array */
+ set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free);
}
/* Increment the reference count of an entry that is not linked on to
@@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz)
}
if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
- MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
- (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
- datasz)) == NULL) {
+ MC_NENT)) == NULL ||
+ (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) {
if (mctctl.mctc_elems)
xfree(mctctl.mctc_elems);
printk("Allocations for MCA telemetry failed\n");
return;
}
- for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
- struct mctelem_ent *tep, **tepp;
+ for (i = 0; i < MC_NENT; i++) {
+ struct mctelem_ent *tep;
tep = mctctl.mctc_elems + i;
tep->mcte_flags = MCTE_F_STATE_FREE;
tep->mcte_refcnt = 0;
tep->mcte_data = datarr + i * datasz;
- if (i < MC_URGENT_NENT) {
- tepp = &mctctl.mctc_free[MC_URGENT];
- tep->mcte_flags |= MCTE_F_HOME_URGENT;
- } else {
- tepp = &mctctl.mctc_free[MC_NONURGENT];
- tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
- }
-
- tep->mcte_next = *tepp;
+ __set_bit(i, mctctl.mctc_free);
+ tep->mcte_next = NULL;
tep->mcte_prev = NULL;
- *tepp = tep;
}
}
@@ -310,32 +296,25 @@ static int mctelem_drop_count;
/* Reserve a telemetry entry, or return NULL if none available.
* If we return an entry then the caller must subsequently call exactly one of
- * mctelem_unreserve or mctelem_commit for that entry.
+ * mctelem_dismiss or mctelem_commit for that entry.
*/
mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
{
- struct mctelem_ent **freelp;
- struct mctelem_ent *oldhead, *newhead;
- mctelem_class_t target = (which == MC_URGENT) ?
- MC_URGENT : MC_NONURGENT;
+ unsigned bit;
+ unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT;
- freelp = &mctctl.mctc_free[target];
for (;;) {
- if ((oldhead = *freelp) == NULL) {
- if (which == MC_URGENT && target == MC_URGENT) {
- /* raid the non-urgent freelist */
- target = MC_NONURGENT;
- freelp = &mctctl.mctc_free[target];
- continue;
- } else {
- mctelem_drop_count++;
- return (NULL);
- }
+ bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit);
+
+ if (bit >= MC_NENT) {
+ mctelem_drop_count++;
+ return (NULL);
}
- newhead = oldhead->mcte_next;
- if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
- struct mctelem_ent *tep = oldhead;
+ /* try to allocate, atomically clear free bit */
+ if (test_and_clear_bit(bit, mctctl.mctc_free)) {
+ /* return element we got */
+ struct mctelem_ent *tep = mctctl.mctc_elems + bit;
mctelem_hold(tep);
MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);