xen/557eb55f-gnttab-per-active-entry-locking.patch
Charles Arnold 4a5ee0f11d - bsc#945164 - Xl destroy show error with kernel of SLES 12 sp1
5537a4d8-libxl-use-DEBUG-log-level-instead-of-INFO.patch

- Upstream patches from Jan
  55dc78e9-x86-amd_ucode-skip-updates-for-final-levels.patch
  55dc7937-x86-IO-APIC-don-t-create-pIRQ-mapping-from-masked-RTE.patch
  55df2f76-IOMMU-skip-domains-without-page-tables-when-dumping.patch
  55e43fd8-x86-NUMA-fix-setup_node.patch
  55e43ff8-x86-NUMA-don-t-account-hotplug-regions.patch
  55e593f1-x86-NUMA-make-init_node_heap-respect-Xen-heap-limit.patch
  54c2553c-grant-table-use-uint16_t-consistently-for-offset-and-length.patch
  54ca33bc-grant-table-refactor-grant-copy-to-reduce-duplicate-code.patch
  54ca340e-grant-table-defer-releasing-pages-acquired-in-a-grant-copy.patch

- bsc#944463 - VUL-0: CVE-2015-5239: qemu-kvm: Integer overflow in
  vnc_client_read() and protocol_client_msg()
  CVE-2015-5239-qemuu-limit-client_cut_text-msg-payload-size.patch
  CVE-2015-5239-qemut-limit-client_cut_text-msg-payload-size.patch
- bsc#944697 - VUL-1: CVE-2015-6815: qemu: net: e1000: infinite
  loop issue
  CVE-2015-6815-qemuu-e1000-fix-infinite-loop.patch
  CVE-2015-6815-qemut-e1000-fix-infinite-loop.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=375
2015-09-16 16:29:39 +00:00

552 lines
18 KiB
Diff

# Commit b4650e9a96d78b87ccf7deb4f74733ccfcc64db5
# Date 2015-06-15 13:22:07 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: per-active entry locking
Introduce a per-active entry spin lock to protect active entry state
The grant table lock must be locked before acquiring (locking) an
active entry.
This is a step in reducing contention on the grant table lock, but
will only do so once the grant table lock is turned into a read-write
lock.
Based on a patch originally by Matt Wilson <msw@amazon.com>.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -63,6 +63,7 @@ is complete.
act->domid : remote domain being granted rights
act->frame : machine frame being granted
act->pin : used to hold reference counts
+ act->lock : spinlock used to serialize access to active entry state
Map tracking
~~~~~~~~~~~~
@@ -74,7 +75,46 @@ is complete.
matching map track entry is then removed, as if unmap had been invoked.
These are not used by the transfer mechanism.
map->domid : owner of the mapped frame
- map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+ map->ref : grant reference
+ map->flags : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+ grant_table->lock : lock used to prevent readers from accessing
+ inconsistent grant table state such as current
+ version, partially initialized active table pages,
+ etc.
+ active_grant_entry->lock : spinlock used to serialize modifications to
+ active entries
+
+ The primary lock for the grant table is a spinlock. All functions
+ that access members of struct grant_table must acquire the lock
+ around critical sections.
+
+ Active entries are obtained by calling active_entry_acquire(gt, ref).
+ This function returns a pointer to the active entry after locking its
+ spinlock. The caller must hold the grant table lock for the gt in
+ question before calling active_entry_acquire(). This is because the
+ grant table can be dynamically extended via gnttab_grow_table() while
+ a domain is running and must be fully initialized. Once all access to
+ the active entry is complete, release the lock by calling
+ active_entry_release(act).
+
+ Summary of rules for locking:
+ active_entry_acquire() and active_entry_release() can only be
+ called when holding the relevant grant table's lock. I.e.:
+ spin_lock(&gt->lock);
+ act = active_entry_acquire(gt, ref);
+ ...
+ active_entry_release(act);
+ spin_unlock(&gt->lock);
+
+ Active entries cannot be acquired while holding the maptrack lock.
+ Multiple active entries can be acquired while holding the grant table
+ lock.
********************************************************************************
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -157,10 +157,13 @@ struct active_grant_entry {
in the page. */
unsigned length:16; /* For sub-page grants, the length of the
grant. */
+ spinlock_t lock; /* lock to protect access of this entry.
+ see docs/misc/grant-tables.txt for
+ locking protocol */
};
#define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
-#define active_entry(t, e) \
+#define _active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
static inline void gnttab_flush_tlb(const struct domain *d)
@@ -188,6 +191,24 @@ nr_active_grant_frames(struct grant_tabl
return num_act_frames_from_sha_frames(nr_grant_frames(gt));
}
+static inline struct active_grant_entry *
+active_entry_acquire(struct grant_table *t, grant_ref_t e)
+{
+ struct active_grant_entry *act;
+
+ ASSERT(spin_is_locked(&t->lock));
+
+ act = &_active_entry(t, e);
+ spin_lock(&act->lock);
+
+ return act;
+}
+
+static inline void active_entry_release(struct active_grant_entry *act)
+{
+ spin_unlock(&act->lock);
+}
+
/* Check if the page has been paged out, or needs unsharing.
If rc == GNTST_okay, *page contains the page struct with a ref taken.
Caller must do put_page(*page).
@@ -505,7 +526,6 @@ static int grant_map_exists(const struct
unsigned long mfn,
unsigned int *ref_count)
{
- const struct active_grant_entry *act;
unsigned int ref, max_iter;
ASSERT(spin_is_locked(&rgt->lock));
@@ -514,18 +534,19 @@ static int grant_map_exists(const struct
nr_grant_entries(rgt));
for ( ref = *ref_count; ref < max_iter; ref++ )
{
- act = &active_entry(rgt, ref);
+ struct active_grant_entry *act;
+ bool_t exists;
- if ( !act->pin )
- continue;
+ act = active_entry_acquire(rgt, ref);
- if ( act->domid != ld->domain_id )
- continue;
+ exists = act->pin
+ && act->domid == ld->domain_id
+ && act->frame == mfn;
- if ( act->frame != mfn )
- continue;
+ active_entry_release(act);
- return 0;
+ if ( exists )
+ return 0;
}
if ( ref < nr_grant_entries(rgt) )
@@ -546,13 +567,24 @@ static void mapcount(
*wrc = *rdc = 0;
+ /*
+ * Must have the local domain's grant table lock when iterating
+ * over its maptrack entries.
+ */
+ ASSERT(spin_is_locked(&lgt->lock));
+ /*
+ * Must have the remote domain's grant table lock while counting
+ * its active entries.
+ */
+ ASSERT(spin_is_locked(&rd->grant_table->lock));
+
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
map = &maptrack_entry(lgt, handle);
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
map->domid != rd->domain_id )
continue;
- if ( active_entry(rd->grant_table, map->ref).frame == mfn )
+ if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
(map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++;
}
}
@@ -639,7 +671,7 @@ __gnttab_map_grant_ref(
if ( unlikely(op->ref >= nr_grant_entries(rgt)))
PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
shah = shared_entry_header(rgt, op->ref);
if (rgt->gt_version == 1) {
sha1 = &shared_entry_v1(rgt, op->ref);
@@ -656,7 +688,7 @@ __gnttab_map_grant_ref(
((act->domid != ld->domain_id) ||
(act->pin & 0x80808080U) != 0 ||
(act->is_sub_page)) )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
act->domid, ld->domain_id, act->pin, act->is_sub_page);
@@ -667,7 +699,7 @@ __gnttab_map_grant_ref(
if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
op->flags & GNTMAP_readonly,
1, shah, act, status) ) != GNTST_okay )
- goto unlock_out;
+ goto act_release_out;
if ( !act->pin )
{
@@ -702,6 +734,7 @@ __gnttab_map_grant_ref(
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+ active_entry_release(act);
spin_unlock(&rgt->lock);
/* pg may be set, with a refcount included, from __get_paged_frame */
@@ -839,7 +872,7 @@ __gnttab_map_grant_ref(
spin_lock(&rgt->lock);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
if ( op->flags & GNTMAP_device_map )
act->pin -= (op->flags & GNTMAP_readonly) ?
@@ -856,6 +889,9 @@ __gnttab_map_grant_ref(
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
+ act_release_out:
+ active_entry_release(act);
+
unlock_out:
spin_unlock(&rgt->lock);
op->status = rc;
@@ -950,7 +986,7 @@ __gnttab_unmap_common(
}
op->rd = rd;
- act = &active_entry(rgt, op->map->ref);
+ act = active_entry_acquire(rgt, op->map->ref);
if ( op->frame == 0 )
{
@@ -959,7 +995,7 @@ __gnttab_unmap_common(
else
{
if ( unlikely(op->frame != act->frame) )
- PIN_FAIL(unmap_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad frame number doesn't match gntref. (%lx != %lx)\n",
op->frame, act->frame);
if ( op->flags & GNTMAP_device_map )
@@ -978,7 +1014,7 @@ __gnttab_unmap_common(
if ( (rc = replace_grant_host_mapping(op->host_addr,
op->frame, op->new_addr,
op->flags)) < 0 )
- goto unmap_out;
+ goto act_release_out;
ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
op->map->flags &= ~GNTMAP_host_map;
@@ -1000,7 +1036,7 @@ __gnttab_unmap_common(
if ( err )
{
rc = GNTST_general_error;
- goto unmap_out;
+ goto act_release_out;
}
}
@@ -1008,8 +1044,11 @@ __gnttab_unmap_common(
if ( !(op->flags & GNTMAP_readonly) )
gnttab_mark_dirty(rd, op->frame);
+ act_release_out:
+ active_entry_release(act);
unmap_out:
double_gt_unlock(lgt, rgt);
+
op->status = rc;
rcu_unlock_domain(rd);
}
@@ -1042,9 +1081,9 @@ __gnttab_unmap_common_complete(struct gn
spin_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- goto unmap_out;
+ goto unlock_out;
- act = &active_entry(rgt, op->map->ref);
+ act = active_entry_acquire(rgt, op->map->ref);
sha = shared_entry_header(rgt, op->map->ref);
if ( rgt->gt_version == 1 )
@@ -1058,7 +1097,7 @@ __gnttab_unmap_common_complete(struct gn
* Suggests that __gntab_unmap_common failed early and so
* nothing further to do
*/
- goto unmap_out;
+ goto act_release_out;
}
pg = mfn_to_page(op->frame);
@@ -1082,7 +1121,7 @@ __gnttab_unmap_common_complete(struct gn
* Suggests that __gntab_unmap_common failed in
* replace_grant_host_mapping() so nothing further to do
*/
- goto unmap_out;
+ goto act_release_out;
}
if ( !is_iomem_page(op->frame) )
@@ -1103,8 +1142,11 @@ __gnttab_unmap_common_complete(struct gn
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
- unmap_out:
+ act_release_out:
+ active_entry_release(act);
+ unlock_out:
spin_unlock(&rgt->lock);
+
if ( put_handle )
{
op->map->flags = 0;
@@ -1296,7 +1338,7 @@ gnttab_grow_table(struct domain *d, unsi
/* d's grant table lock must be held by the caller */
struct grant_table *gt = d->grant_table;
- unsigned int i;
+ unsigned int i, j;
ASSERT(req_nr_frames <= max_grant_frames);
@@ -1311,6 +1353,8 @@ gnttab_grow_table(struct domain *d, unsi
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
clear_page(gt->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(&gt->active[i][j].lock);
}
/* Shared */
@@ -1805,7 +1849,7 @@ __release_grant_for_copy(
spin_lock(&rgt->lock);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
r_frame = act->frame;
@@ -1844,6 +1888,7 @@ __release_grant_for_copy(
released_read = 1;
}
+ active_entry_release(act);
spin_unlock(&rgt->lock);
if ( td != rd )
@@ -1905,14 +1950,14 @@ __acquire_grant_for_copy(
spin_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(gt_unlock_out, GNTST_general_error,
"remote grant table not ready\n");
if ( unlikely(gref >= nr_grant_entries(rgt)) )
- PIN_FAIL(unlock_out, GNTST_bad_gntref,
+ PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
"Bad grant reference %ld\n", gref);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
shah = shared_entry_header(rgt, gref);
if ( rgt->gt_version == 1 )
{
@@ -1971,6 +2016,13 @@ __acquire_grant_for_copy(
PIN_FAIL(unlock_out_clear, GNTST_general_error,
"transitive grant referenced bad domain %d\n",
trans_domid);
+
+ /*
+ * __acquire_grant_for_copy() could take the lock on the
+ * remote table (if rd == td), so we have to drop the lock
+ * here and reacquire
+ */
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
@@ -1978,9 +2030,12 @@ __acquire_grant_for_copy(
&trans_page_off, &trans_length, 0);
spin_lock(&rgt->lock);
+ act = active_entry_acquire(rgt, gref);
+
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
return rc;
}
@@ -1993,6 +2048,7 @@ __acquire_grant_for_copy(
{
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
@@ -2061,6 +2117,7 @@ __acquire_grant_for_copy(
*length = act->length;
*frame = act->frame;
+ active_entry_release(act);
spin_unlock(&rgt->lock);
return rc;
@@ -2073,7 +2130,11 @@ __acquire_grant_for_copy(
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
+ active_entry_release(act);
+
+ gt_unlock_out:
spin_unlock(&rgt->lock);
+
return rc;
}
@@ -2373,7 +2434,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
gnttab_set_version_t op;
struct domain *d = current->domain;
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
long res;
int i;
@@ -2398,8 +2458,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
{
for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
{
- act = &active_entry(gt, i);
- if ( act->pin != 0 )
+ if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
{
gdprintk(XENLOG_WARNING,
"tried to change grant table version from %d to %d, but some grant entries still in use\n",
@@ -2586,7 +2645,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
{
struct domain *d = rcu_lock_current_domain();
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
+ struct active_grant_entry *act_a = NULL;
+ struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
spin_lock(&gt->lock);
@@ -2600,12 +2660,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b);
- act = &active_entry(gt, ref_a);
- if ( act->pin )
+ /* Swapping the same ref is a no-op. */
+ if ( ref_a == ref_b )
+ goto out;
+
+ act_a = active_entry_acquire(gt, ref_a);
+ if ( act_a->pin )
PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a);
- act = &active_entry(gt, ref_b);
- if ( act->pin )
+ act_b = active_entry_acquire(gt, ref_b);
+ if ( act_b->pin )
PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b);
if ( gt->gt_version == 1 )
@@ -2632,6 +2696,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
}
out:
+ if ( act_b != NULL )
+ active_entry_release(act_b);
+ if ( act_a != NULL )
+ active_entry_release(act_a);
spin_unlock(&gt->lock);
rcu_unlock_domain(d);
@@ -2941,7 +3009,7 @@ grant_table_create(
struct domain *d)
{
struct grant_table *t;
- int i;
+ unsigned int i, j;
if ( (t = xzalloc(struct grant_table)) == NULL )
goto no_mem_0;
@@ -2960,6 +3028,8 @@ grant_table_create(
if ( (t->active[i] = alloc_xenheap_page()) == NULL )
goto no_mem_2;
clear_page(t->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(&t->active[i][j].lock);
}
/* Tracking of mapped foreign frames table */
@@ -3056,7 +3126,7 @@ gnttab_release_mappings(
rgt = rd->grant_table;
spin_lock(&rgt->lock);
- act = &active_entry(rgt, ref);
+ act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
if (rgt->gt_version == 1)
status = &sha->flags;
@@ -3114,6 +3184,7 @@ gnttab_release_mappings(
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rcu_unlock_domain(rd);
@@ -3176,9 +3247,12 @@ static void gnttab_usage_print(struct do
uint16_t status;
uint64_t frame;
- act = &active_entry(gt, ref);
+ act = active_entry_acquire(gt, ref);
if ( !act->pin )
+ {
+ active_entry_release(act);
continue;
+ }
sha = shared_entry_header(gt, ref);
@@ -3208,6 +3282,7 @@ static void gnttab_usage_print(struct do
printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n",
ref, act->domid, act->frame, act->pin,
sha->domid, frame, status);
+ active_entry_release(act);
}
out: