- Update to Xen Version 4.6.0

xen-4.6.0-testing-src.tar.bz2
  mini-os.tar.bz2
  blktap2-no-uninit.patch
  stubdom-have-iovec.patch
- Renamed
  xsa149.patch to CVE-2015-7969-xsa149.patch
- Dropped patches now contained in tarball or unnecessary
  xen-4.5.2-testing-src.tar.bz2
  54c2553c-grant-table-use-uint16_t-consistently-for-offset-and-length.patch
  54ca33bc-grant-table-refactor-grant-copy-to-reduce-duplicate-code.patch
  54ca340e-grant-table-defer-releasing-pages-acquired-in-a-grant-copy.patch
  54f4985f-libxl-fix-libvirtd-double-free.patch
  55103616-vm-assist-prepare-for-discontiguous-used-bit-numbers.patch
  551ac326-xentop-add-support-for-qdisk.patch
  552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch
  552d0fe8-x86-mtrr-include-asm-atomic.h.patch
  552d293b-x86-vMSI-X-honor-all-mask-requests.patch
  552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch
  5537a4d8-libxl-use-DEBUG-log-level-instead-of-INFO.patch
  5548e903-domctl-don-t-truncate-XEN_DOMCTL_max_mem-requests.patch
  5548e95d-x86-allow-to-suppress-M2P-user-mode-exposure.patch
  554c7aee-x86-provide-arch_fetch_and_add.patch
  554c7b00-arm-provide-arch_fetch_and_add.patch
  554cc211-libxl-add-qxl.patch 55534b0a-x86-provide-add_sized.patch
  55534b25-arm-provide-add_sized.patch
  5555a4f8-use-ticket-locks-for-spin-locks.patch
  5555a5b9-x86-arm-remove-asm-spinlock-h.patch
  5555a8ec-introduce-non-contiguous-allocation.patch
  556d973f-unmodified-drivers-tolerate-IRQF_DISABLED-being-undefined.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=387
This commit is contained in:
Charles Arnold 2015-11-11 17:04:52 +00:00 committed by Git OBS Bridge
parent 95031aabfd
commit 047483513a
114 changed files with 500 additions and 10098 deletions

View File

@ -1,49 +0,0 @@
# Commit b7f74a19fe099e373ad52e4218c466f3e91b5f43
# Date 2015-01-23 15:05:48 +0100
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
grant-table: use uint16_t consistently for grant copy offset and length
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1882,7 +1882,7 @@ static int
__acquire_grant_for_copy(
struct domain *rd, unsigned long gref, domid_t ldom, int readonly,
unsigned long *frame, struct page_info **page,
- unsigned *page_off, unsigned *length, unsigned allow_transitive)
+ uint16_t *page_off, uint16_t *length, unsigned allow_transitive)
{
struct grant_table *rgt = rd->grant_table;
grant_entry_v1_t *sha1;
@@ -1895,8 +1895,8 @@ __acquire_grant_for_copy(
grant_ref_t trans_gref;
struct domain *td;
unsigned long grant_frame;
- unsigned trans_page_off;
- unsigned trans_length;
+ uint16_t trans_page_off;
+ uint16_t trans_length;
int is_sub_page;
s16 rc = GNTST_okay;
@@ -2122,7 +2122,7 @@ __gnttab_copy(
if ( src_is_gref )
{
- unsigned source_off, source_len;
+ uint16_t source_off, source_len;
rc = __acquire_grant_for_copy(sd, op->source.u.ref,
current->domain->domain_id, 1,
&s_frame, &s_pg,
@@ -2147,7 +2147,7 @@ __gnttab_copy(
if ( dest_is_gref )
{
- unsigned dest_off, dest_len;
+ uint16_t dest_off, dest_len;
rc = __acquire_grant_for_copy(dd, op->dest.u.ref,
current->domain->domain_id, 0,
&d_frame, &d_pg, &dest_off, &dest_len, 1);

View File

@ -1,373 +0,0 @@
# Commit 3c72f8c2cf19f735d813081c836f03e3078ee5c1
# Date 2015-01-29 14:21:00 +0100
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
grant-table: refactor grant copy to reduce duplicate code
Much of the grant copy operation is identical for the source and
destination buffers. Refactor the code into per-buffer functions.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2077,139 +2077,230 @@ __acquire_grant_for_copy(
return rc;
}
-static void
-__gnttab_copy(
- struct gnttab_copy *op)
-{
- struct domain *sd = NULL, *dd = NULL;
- unsigned long s_frame, d_frame;
- struct page_info *s_pg = NULL, *d_pg = NULL;
- char *sp, *dp;
- s16 rc = GNTST_okay;
- int have_d_grant = 0, have_s_grant = 0;
- int src_is_gref, dest_is_gref;
-
- if ( ((op->source.offset + op->len) > PAGE_SIZE) ||
- ((op->dest.offset + op->len) > PAGE_SIZE) )
- PIN_FAIL(error_out, GNTST_bad_copy_arg, "copy beyond page area.\n");
+struct gnttab_copy_buf {
+ /* Guest provided. */
+ struct gnttab_copy_ptr ptr;
+ uint16_t len;
+
+ /* Mapped etc. */
+ struct domain *domain;
+ unsigned long frame;
+ struct page_info *page;
+ void *virt;
+ bool_t read_only;
+ bool_t have_grant;
+ bool_t have_type;
+};
- src_is_gref = op->flags & GNTCOPY_source_gref;
- dest_is_gref = op->flags & GNTCOPY_dest_gref;
+static int gnttab_copy_lock_domain(domid_t domid, unsigned int gref_flag,
+ struct gnttab_copy_buf *buf)
+{
+ int rc;
- if ( (op->source.domid != DOMID_SELF && !src_is_gref ) ||
- (op->dest.domid != DOMID_SELF && !dest_is_gref) )
- PIN_FAIL(error_out, GNTST_permission_denied,
+ if ( domid != DOMID_SELF && !gref_flag )
+ PIN_FAIL(out, GNTST_permission_denied,
"only allow copy-by-mfn for DOMID_SELF.\n");
- if ( op->source.domid == DOMID_SELF )
- sd = rcu_lock_current_domain();
- else if ( (sd = rcu_lock_domain_by_id(op->source.domid)) == NULL )
- PIN_FAIL(error_out, GNTST_bad_domain,
- "couldn't find %d\n", op->source.domid);
-
- if ( op->dest.domid == DOMID_SELF )
- dd = rcu_lock_current_domain();
- else if ( (dd = rcu_lock_domain_by_id(op->dest.domid)) == NULL )
- PIN_FAIL(error_out, GNTST_bad_domain,
- "couldn't find %d\n", op->dest.domid);
+ if ( domid == DOMID_SELF )
+ buf->domain = rcu_lock_current_domain();
+ else
+ {
+ buf->domain = rcu_lock_domain_by_id(domid);
+ if ( buf->domain == NULL )
+ PIN_FAIL(out, GNTST_bad_domain, "couldn't find %d\n", domid);
+ }
- rc = xsm_grant_copy(XSM_HOOK, sd, dd);
- if ( rc )
+ buf->ptr.domid = domid;
+ rc = GNTST_okay;
+ out:
+ return rc;
+}
+
+static void gnttab_copy_unlock_domains(struct gnttab_copy_buf *src,
+ struct gnttab_copy_buf *dest)
+{
+ if ( src->domain )
+ {
+ rcu_unlock_domain(src->domain);
+ src->domain = NULL;
+ }
+ if ( dest->domain )
+ {
+ rcu_unlock_domain(dest->domain);
+ dest->domain = NULL;
+ }
+}
+
+static int gnttab_copy_lock_domains(const struct gnttab_copy *op,
+ struct gnttab_copy_buf *src,
+ struct gnttab_copy_buf *dest)
+{
+ int rc;
+
+ rc = gnttab_copy_lock_domain(op->source.domid,
+ op->flags & GNTCOPY_source_gref, src);
+ if ( rc < 0 )
+ goto error;
+ rc = gnttab_copy_lock_domain(op->dest.domid,
+ op->flags & GNTCOPY_dest_gref, dest);
+ if ( rc < 0 )
+ goto error;
+
+ rc = xsm_grant_copy(XSM_HOOK, src->domain, dest->domain);
+ if ( rc < 0 )
{
rc = GNTST_permission_denied;
- goto error_out;
+ goto error;
}
+ return 0;
+
+ error:
+ gnttab_copy_unlock_domains(src, dest);
+ return rc;
+}
- if ( src_is_gref )
+static void gnttab_copy_release_buf(struct gnttab_copy_buf *buf)
+{
+ if ( buf->virt )
{
- uint16_t source_off, source_len;
- rc = __acquire_grant_for_copy(sd, op->source.u.ref,
- current->domain->domain_id, 1,
- &s_frame, &s_pg,
- &source_off, &source_len, 1);
- if ( rc != GNTST_okay )
- goto error_out;
- have_s_grant = 1;
- if ( op->source.offset < source_off ||
- op->len > source_len )
- PIN_FAIL(error_out, GNTST_general_error,
- "copy source out of bounds: %d < %d || %d > %d\n",
- op->source.offset, source_off,
- op->len, source_len);
+ unmap_domain_page(buf->virt);
+ buf->virt = NULL;
}
- else
+ if ( buf->have_type )
{
- rc = __get_paged_frame(op->source.u.gmfn, &s_frame, &s_pg, 1, sd);
- if ( rc != GNTST_okay )
- PIN_FAIL(error_out, rc,
- "source frame %lx invalid.\n", s_frame);
+ put_page_type(buf->page);
+ buf->have_type = 0;
+ }
+ if ( buf->page )
+ {
+ put_page(buf->page);
+ buf->page = NULL;
+ }
+ if ( buf->have_grant )
+ {
+ __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only);
+ buf->have_grant = 0;
}
+}
+
+static int gnttab_copy_claim_buf(const struct gnttab_copy *op,
+ const struct gnttab_copy_ptr *ptr,
+ struct gnttab_copy_buf *buf,
+ unsigned int gref_flag)
+{
+ int rc;
+
+ buf->read_only = gref_flag == GNTCOPY_source_gref;
- if ( dest_is_gref )
+ if ( op->flags & gref_flag )
{
- uint16_t dest_off, dest_len;
- rc = __acquire_grant_for_copy(dd, op->dest.u.ref,
- current->domain->domain_id, 0,
- &d_frame, &d_pg, &dest_off, &dest_len, 1);
+ rc = __acquire_grant_for_copy(buf->domain, ptr->u.ref,
+ current->domain->domain_id,
+ buf->read_only,
+ &buf->frame, &buf->page,
+ &buf->ptr.offset, &buf->len, 1);
if ( rc != GNTST_okay )
- goto error_out;
- have_d_grant = 1;
- if ( op->dest.offset < dest_off ||
- op->len > dest_len )
- PIN_FAIL(error_out, GNTST_general_error,
- "copy dest out of bounds: %d < %d || %d > %d\n",
- op->dest.offset, dest_off,
- op->len, dest_len);
+ goto out;
+ buf->ptr.u.ref = ptr->u.ref;
+ buf->have_grant = 1;
}
else
{
- rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, &d_pg, 0, dd);
+ rc = __get_paged_frame(ptr->u.gmfn, &buf->frame, &buf->page,
+ buf->read_only, buf->domain);
if ( rc != GNTST_okay )
- PIN_FAIL(error_out, rc,
- "destination frame %lx invalid.\n", d_frame);
+ PIN_FAIL(out, rc,
+ "source frame %lx invalid.\n", ptr->u.gmfn);
+
+ buf->ptr.u.gmfn = ptr->u.gmfn;
+ buf->ptr.offset = 0;
+ buf->len = PAGE_SIZE;
}
- if ( !get_page_type(d_pg, PGT_writable_page) )
+ if ( !buf->read_only )
{
- if ( !dd->is_dying )
- gdprintk(XENLOG_WARNING, "Could not get dst frame %lx\n", d_frame);
- rc = GNTST_general_error;
- goto error_out;
- }
-
- sp = map_domain_page(s_frame);
- dp = map_domain_page(d_frame);
-
- memcpy(dp + op->dest.offset, sp + op->source.offset, op->len);
-
- unmap_domain_page(dp);
- unmap_domain_page(sp);
-
- gnttab_mark_dirty(dd, d_frame);
-
- put_page_type(d_pg);
- error_out:
- if ( d_pg )
- put_page(d_pg);
- if ( s_pg )
- put_page(s_pg);
- if ( have_s_grant )
- __release_grant_for_copy(sd, op->source.u.ref, 1);
- if ( have_d_grant )
- __release_grant_for_copy(dd, op->dest.u.ref, 0);
- if ( sd )
- rcu_unlock_domain(sd);
- if ( dd )
- rcu_unlock_domain(dd);
- op->status = rc;
+ if ( !get_page_type(buf->page, PGT_writable_page) )
+ {
+ if ( !buf->domain->is_dying )
+ gdprintk(XENLOG_WARNING, "Could not get writable frame %lx\n", buf->frame);
+ rc = GNTST_general_error;
+ goto out;
+ }
+ buf->have_type = 1;
+ }
+
+ buf->virt = map_domain_page(buf->frame);
+ rc = GNTST_okay;
+
+ out:
+ return rc;
}
-static long
-gnttab_copy(
+static int gnttab_copy_buf(const struct gnttab_copy *op,
+ struct gnttab_copy_buf *dest,
+ const struct gnttab_copy_buf *src)
+{
+ int rc;
+
+ if ( ((op->source.offset + op->len) > PAGE_SIZE) ||
+ ((op->dest.offset + op->len) > PAGE_SIZE) )
+ PIN_FAIL(out, GNTST_bad_copy_arg, "copy beyond page area.\n");
+
+ if ( op->source.offset < src->ptr.offset ||
+ op->source.offset + op->len > src->ptr.offset + src->len )
+ PIN_FAIL(out, GNTST_general_error,
+ "copy source out of bounds: %d < %d || %d > %d\n",
+ op->source.offset, src->ptr.offset,
+ op->len, src->len);
+
+ if ( op->dest.offset < dest->ptr.offset ||
+ op->dest.offset + op->len > dest->ptr.offset + dest->len )
+ PIN_FAIL(out, GNTST_general_error,
+ "copy dest out of bounds: %d < %d || %d > %d\n",
+ op->dest.offset, dest->ptr.offset,
+ op->len, dest->len);
+
+ memcpy(dest->virt + op->dest.offset, src->virt + op->source.offset,
+ op->len);
+ gnttab_mark_dirty(dest->domain, dest->frame);
+ rc = GNTST_okay;
+ out:
+ return rc;
+}
+
+static int gnttab_copy_one(const struct gnttab_copy *op,
+ struct gnttab_copy_buf *dest,
+ struct gnttab_copy_buf *src)
+{
+ int rc;
+
+ rc = gnttab_copy_lock_domains(op, src, dest);
+ if ( rc < 0 )
+ goto out;
+
+ rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
+ if ( rc < 0 )
+ goto out;
+
+ rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
+ if ( rc < 0 )
+ goto out;
+
+ rc = gnttab_copy_buf(op, dest, src);
+ out:
+ gnttab_copy_release_buf(src);
+ gnttab_copy_release_buf(dest);
+ gnttab_copy_unlock_domains(src, dest);
+ return rc;
+}
+
+static long gnttab_copy(
XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
{
- int i;
+ unsigned int i;
struct gnttab_copy op;
+ struct gnttab_copy_buf src = {};
+ struct gnttab_copy_buf dest = {};
for ( i = 0; i < count; i++ )
{
@@ -2217,7 +2308,9 @@ gnttab_copy(
return i;
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
return -EFAULT;
- __gnttab_copy(&op);
+
+ op.status = gnttab_copy_one(&op, &dest, &src);
+
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
return -EFAULT;
guest_handle_add_offset(uop, 1);
--- a/xen/include/public/grant_table.h
+++ b/xen/include/public/grant_table.h
@@ -453,7 +453,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_
struct gnttab_copy {
/* IN parameters. */
- struct {
+ struct gnttab_copy_ptr {
union {
grant_ref_t ref;
xen_pfn_t gmfn;

View File

@ -1,155 +0,0 @@
# Commit d28f42f2703e483116bafd2b0b76a32af67d83ad
# Date 2015-01-29 14:22:22 +0100
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
grant-table: defer releasing pages acquired in a grant copy
Acquiring a page for the source or destination of a grant copy is an
expensive operation. A common use case is for two adjacent grant copy
ops to operate on either the same source or the same destination page.
Instead of always acquiring and releasing destination and source pages
for each operation, release the page once it is no longer valid for
the next op.
If either the source or destination domains changes both pages are
released as it is unlikely that either will still be valid.
XenServer's performance benchmarks show modest improvements in network
receive throughput (netback uses grant copy in the guest Rx path) and
no regressions in disk performance (using tapdisk3 which grant copies
as the backend).
Baseline Deferred Release
Interhost receive to VM 7.2 Gb/s ~9 Gbit/s
Interhost aggregate 24 Gb/s 28 Gb/s
Intrahost single stream 14 Gb/s 14 Gb/s
Intrahost aggregate 34 Gb/s 36 Gb/s
Aggregate disk write 900 MB/s 900 MB/s
Aggregate disk read 890 MB/s 890 MB/s
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2236,6 +2236,17 @@ static int gnttab_copy_claim_buf(const s
return rc;
}
+static bool_t gnttab_copy_buf_valid(const struct gnttab_copy_ptr *p,
+ const struct gnttab_copy_buf *b,
+ bool_t has_gref)
+{
+ if ( !b->virt )
+ return 0;
+ if ( has_gref )
+ return b->have_grant && p->u.ref == b->ptr.u.ref;
+ return p->u.gmfn == b->ptr.u.gmfn;
+}
+
static int gnttab_copy_buf(const struct gnttab_copy *op,
struct gnttab_copy_buf *dest,
const struct gnttab_copy_buf *src)
@@ -2274,23 +2285,40 @@ static int gnttab_copy_one(const struct
{
int rc;
- rc = gnttab_copy_lock_domains(op, src, dest);
- if ( rc < 0 )
- goto out;
+ if ( !src->domain || op->source.domid != src->ptr.domid ||
+ !dest->domain || op->dest.domid != dest->ptr.domid )
+ {
+ gnttab_copy_release_buf(src);
+ gnttab_copy_release_buf(dest);
+ gnttab_copy_unlock_domains(src, dest);
- rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
- if ( rc < 0 )
- goto out;
+ rc = gnttab_copy_lock_domains(op, src, dest);
+ if ( rc < 0 )
+ goto out;
+ }
- rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
- if ( rc < 0 )
- goto out;
+ /* Different source? */
+ if ( !gnttab_copy_buf_valid(&op->source, src,
+ op->flags & GNTCOPY_source_gref) )
+ {
+ gnttab_copy_release_buf(src);
+ rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
+ if ( rc < 0 )
+ goto out;
+ }
+
+ /* Different dest? */
+ if ( !gnttab_copy_buf_valid(&op->dest, dest,
+ op->flags & GNTCOPY_dest_gref) )
+ {
+ gnttab_copy_release_buf(dest);
+ rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
+ if ( rc < 0 )
+ goto out;
+ }
rc = gnttab_copy_buf(op, dest, src);
out:
- gnttab_copy_release_buf(src);
- gnttab_copy_release_buf(dest);
- gnttab_copy_unlock_domains(src, dest);
return rc;
}
@@ -2301,21 +2329,42 @@ static long gnttab_copy(
struct gnttab_copy op;
struct gnttab_copy_buf src = {};
struct gnttab_copy_buf dest = {};
+ long rc = 0;
for ( i = 0; i < count; i++ )
{
- if (i && hypercall_preempt_check())
- return i;
+ if ( i && hypercall_preempt_check() )
+ {
+ rc = i;
+ break;
+ }
+
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
- return -EFAULT;
+ {
+ rc = -EFAULT;
+ break;
+ }
op.status = gnttab_copy_one(&op, &dest, &src);
+ if ( op.status != GNTST_okay )
+ {
+ gnttab_copy_release_buf(&src);
+ gnttab_copy_release_buf(&dest);
+ }
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
- return -EFAULT;
+ {
+ rc = -EFAULT;
+ break;
+ }
guest_handle_add_offset(uop, 1);
}
- return 0;
+
+ gnttab_copy_release_buf(&src);
+ gnttab_copy_release_buf(&dest);
+ gnttab_copy_unlock_domains(&src, &dest);
+
+ return rc;
}
static long

View File

@ -1,90 +0,0 @@
References: bsc#949138
Subject: libxl: make some _dispose functions idempotent and tolerate NULL
From: Wei Liu wei.liu2@citrix.com Wed Feb 25 14:56:02 2015 +0000
Date: Mon Mar 2 17:05:35 2015 +0000:
Git: 1ea68f1a82ef94b3cc644fa70307c5151f356baf
These functions are not generated, so we need to do it by hand.
Functions list:
libxl_bitmap_dispose
libxl_string_list_dispose
libxl_key_value_list_dipose
libxl_cpuid_dispose
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Index: xen-4.5.2-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.c
+++ xen-4.5.2-testing/tools/libxl/libxl.c
@@ -218,9 +218,12 @@ void libxl_string_list_dispose(libxl_str
if (!sl)
return;
- for (i = 0; sl[i] != NULL; i++)
+ for (i = 0; sl[i] != NULL; i++) {
free(sl[i]);
+ sl[i] = NULL;
+ }
free(sl);
+ *psl = NULL;
}
void libxl_string_list_copy(libxl_ctx *ctx,
@@ -280,10 +283,14 @@ void libxl_key_value_list_dispose(libxl_
for (i = 0; kvl[i] != NULL; i += 2) {
free(kvl[i]);
- if (kvl[i + 1])
+ kvl[i] = NULL;
+ if (kvl[i + 1]) {
free(kvl[i + 1]);
+ kvl[i+1] = NULL;
+ }
}
free(kvl);
+ *pkvl = NULL;
}
void libxl_key_value_list_copy(libxl_ctx *ctx,
Index: xen-4.5.2-testing/tools/libxl/libxl_cpuid.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_cpuid.c
+++ xen-4.5.2-testing/tools/libxl/libxl_cpuid.c
@@ -28,10 +28,13 @@ void libxl_cpuid_dispose(libxl_cpuid_pol
return;
for (i = 0; cpuid_list[i].input[0] != XEN_CPUID_INPUT_UNUSED; i++) {
for (j = 0; j < 4; j++)
- if (cpuid_list[i].policy[j] != NULL)
+ if (cpuid_list[i].policy[j] != NULL) {
free(cpuid_list[i].policy[j]);
+ cpuid_list[i].policy[j] = NULL;
+ }
}
free(cpuid_list);
+ *p_cpuid_list = NULL;
return;
}
Index: xen-4.5.2-testing/tools/libxl/libxl_utils.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_utils.c
+++ xen-4.5.2-testing/tools/libxl/libxl_utils.c
@@ -604,7 +604,12 @@ void libxl_bitmap_init(libxl_bitmap *map
void libxl_bitmap_dispose(libxl_bitmap *map)
{
+ if (!map)
+ return;
+
free(map->map);
+ map->map = NULL;
+ map->size = 0;
}
void libxl_bitmap_copy(libxl_ctx *ctx, libxl_bitmap *dptr,

View File

@ -1,125 +0,0 @@
# Commit 88a2372c6ba44dd42b915a95a823cf9d4d260e25
# Date 2015-03-23 16:49:42 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
vm-assist: prepare for discontiguous used bit numbers
Since the a flag will get assigned a value discontiguous to the
existing ones (in order to preserve the low bits, as only those are
currently accessible to 32-bit guests), this requires a little bit of
rework of the VM assist code in general: An architecture specific
VM_ASSIST_VALID definition gets introduced (with an optional compat
mode counterpart), and compilation of the respective code becomes
conditional upon this being defined (ARM doesn't wire these up and
hence doesn't need that code).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Index: xen-4.5.2-testing/xen/common/compat/kernel.c
===================================================================
--- xen-4.5.2-testing.orig/xen/common/compat/kernel.c
+++ xen-4.5.2-testing/xen/common/compat/kernel.c
@@ -41,6 +41,11 @@ CHECK_TYPE(domain_handle);
#define xennmi_callback compat_nmi_callback
#define xennmi_callback_t compat_nmi_callback_t
+#ifdef COMPAT_VM_ASSIST_VALID
+#undef VM_ASSIST_VALID
+#define VM_ASSIST_VALID COMPAT_VM_ASSIST_VALID
+#endif
+
#define DO(fn) int compat_##fn
#define COMPAT
Index: xen-4.5.2-testing/xen/common/domain.c
===================================================================
--- xen-4.5.2-testing.orig/xen/common/domain.c
+++ xen-4.5.2-testing/xen/common/domain.c
@@ -1326,9 +1326,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
return rc;
}
-long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
+#ifdef VM_ASSIST_VALID
+long vm_assist(struct domain *p, unsigned int cmd, unsigned int type,
+ unsigned long valid)
{
- if ( type > MAX_VMASST_TYPE )
+ if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
return -EINVAL;
switch ( cmd )
@@ -1343,6 +1345,7 @@ long vm_assist(struct domain *p, unsigne
return -ENOSYS;
}
+#endif
struct pirq *pirq_get_info(struct domain *d, int pirq)
{
Index: xen-4.5.2-testing/xen/common/kernel.c
===================================================================
--- xen-4.5.2-testing.orig/xen/common/kernel.c
+++ xen-4.5.2-testing/xen/common/kernel.c
@@ -396,10 +396,12 @@ DO(nmi_op)(unsigned int cmd, XEN_GUEST_H
return rc;
}
+#ifdef VM_ASSIST_VALID
DO(vm_assist)(unsigned int cmd, unsigned int type)
{
- return vm_assist(current->domain, cmd, type);
+ return vm_assist(current->domain, cmd, type, VM_ASSIST_VALID);
}
+#endif
DO(ni_hypercall)(void)
{
Index: xen-4.5.2-testing/xen/include/asm-x86/config.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/config.h
+++ xen-4.5.2-testing/xen/include/asm-x86/config.h
@@ -327,6 +327,14 @@ extern unsigned long xen_phys_start;
#define ARG_XLAT_START(v) \
(ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
+#define NATIVE_VM_ASSIST_VALID ((1UL << VMASST_TYPE_4gb_segments) | \
+ (1UL << VMASST_TYPE_4gb_segments_notify) | \
+ (1UL << VMASST_TYPE_writable_pagetables) | \
+ (1UL << VMASST_TYPE_pae_extended_cr3))
+#define VM_ASSIST_VALID NATIVE_VM_ASSIST_VALID
+#define COMPAT_VM_ASSIST_VALID (NATIVE_VM_ASSIST_VALID & \
+ ((1UL << COMPAT_BITS_PER_LONG) - 1))
+
#define ELFSIZE 64
#define ARCH_CRASH_SAVE_VMCOREINFO
Index: xen-4.5.2-testing/xen/include/public/xen.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/public/xen.h
+++ xen-4.5.2-testing/xen/include/public/xen.h
@@ -486,7 +486,9 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
/* x86/PAE guests: support PDPTs above 4GB. */
#define VMASST_TYPE_pae_extended_cr3 3
+#if __XEN_INTERFACE_VERSION__ < 0x00040600
#define MAX_VMASST_TYPE 3
+#endif
#ifndef __ASSEMBLY__
Index: xen-4.5.2-testing/xen/include/xen/lib.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/xen/lib.h
+++ xen-4.5.2-testing/xen/include/xen/lib.h
@@ -92,7 +92,8 @@ extern void guest_printk(const struct do
__attribute__ ((format (printf, 2, 3)));
extern void noreturn panic(const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
-extern long vm_assist(struct domain *, unsigned int, unsigned int);
+extern long vm_assist(struct domain *, unsigned int cmd, unsigned int type,
+ unsigned long valid);
extern int __printk_ratelimit(int ratelimit_ms, int ratelimit_burst);
extern int printk_ratelimit(void);

View File

@ -1,609 +0,0 @@
Index: xen-4.5.2-testing/tools/libxl/libxl_dm.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_dm.c
+++ xen-4.5.2-testing/tools/libxl/libxl_dm.c
@@ -447,6 +447,15 @@ static char ** libxl__build_device_model
flexarray_append(dm_args, "-mon");
flexarray_append(dm_args, "chardev=libxl-cmd,mode=control");
+ flexarray_append(dm_args, "-chardev");
+ flexarray_append(dm_args,
+ libxl__sprintf(gc, "socket,id=libxenstat-cmd,"
+ "path=%s/qmp-libxenstat-%d,server,nowait",
+ libxl__run_dir_path(), guest_domid));
+
+ flexarray_append(dm_args, "-mon");
+ flexarray_append(dm_args, "chardev=libxenstat-cmd,mode=control");
+
for (i = 0; i < guest_config->num_channels; i++) {
connection = guest_config->channels[i].connection;
devid = guest_config->channels[i].devid;
Index: xen-4.5.2-testing/tools/libxl/libxl_qmp.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_qmp.c
+++ xen-4.5.2-testing/tools/libxl/libxl_qmp.c
@@ -723,6 +723,13 @@ void libxl__qmp_cleanup(libxl__gc *gc, u
LOGE(ERROR, "Failed to remove QMP socket file %s", qmp_socket);
}
}
+
+ qmp_socket = GCSPRINTF("%s/qmp-libxenstat-%d", libxl__run_dir_path(), domid);
+ if (unlink(qmp_socket) == -1) {
+ if (errno != ENOENT) {
+ LOGE(ERROR, "Failed to remove QMP socket file %s", qmp_socket);
+ }
+ }
}
int libxl__qmp_query_serial(libxl__qmp_handler *qmp)
Index: xen-4.5.2-testing/tools/xenstat/libxenstat/Makefile
===================================================================
--- xen-4.5.2-testing.orig/tools/xenstat/libxenstat/Makefile
+++ xen-4.5.2-testing/tools/xenstat/libxenstat/Makefile
@@ -24,7 +24,7 @@ MINOR=0
LIB=src/libxenstat.a
SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR)
SHLIB_LINKS=src/libxenstat.so.$(MAJOR) src/libxenstat.so
-OBJECTS-y=src/xenstat.o
+OBJECTS-y=src/xenstat.o src/xenstat_qmp.o
OBJECTS-$(CONFIG_Linux) += src/xenstat_linux.o
OBJECTS-$(CONFIG_SunOS) += src/xenstat_solaris.o
OBJECTS-$(CONFIG_NetBSD) += src/xenstat_netbsd.o
@@ -32,7 +32,7 @@ OBJECTS-$(CONFIG_FreeBSD) += src/xenstat
SONAME_FLAGS=-Wl,$(SONAME_LDFLAG) -Wl,libxenstat.so.$(MAJOR)
CFLAGS+=-fPIC
-CFLAGS+=-Isrc $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore) $(CFLAGS_xeninclude)
+CFLAGS+=-Isrc $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore) $(CFLAGS_xeninclude) -include $(XEN_ROOT)/tools/config.h
LDLIBS-y = $(LDLIBS_libxenstore) $(LDLIBS_libxenctrl)
LDLIBS-$(CONFIG_SunOS) += -lkstat
Index: xen-4.5.2-testing/tools/xenstat/xentop/Makefile
===================================================================
--- xen-4.5.2-testing.orig/tools/xenstat/xentop/Makefile
+++ xen-4.5.2-testing/tools/xenstat/xentop/Makefile
@@ -19,7 +19,7 @@ all install xentop:
else
CFLAGS += -DGCC_PRINTF -Werror $(CFLAGS_libxenstat)
-LDLIBS += $(LDLIBS_libxenstat) $(CURSES_LIBS) $(SOCKET_LIBS) -lm
+LDLIBS += $(LDLIBS_libxenstat) $(CURSES_LIBS) $(SOCKET_LIBS) -lm -lyajl
CFLAGS += -DHOST_$(XEN_OS)
# Include configure output (config.h) to headers search path
Index: xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat_priv.h
===================================================================
--- xen-4.5.2-testing.orig/tools/xenstat/libxenstat/src/xenstat_priv.h
+++ xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat_priv.h
@@ -109,5 +109,7 @@ extern int xenstat_collect_networks(xens
extern void xenstat_uninit_networks(xenstat_handle * handle);
extern int xenstat_collect_vbds(xenstat_node * node);
extern void xenstat_uninit_vbds(xenstat_handle * handle);
+extern void read_attributes_qdisk(xenstat_node * node);
+extern xenstat_vbd *xenstat_save_vbd(xenstat_domain * domain, xenstat_vbd * vbd);
#endif /* XENSTAT_PRIV_H */
Index: xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat.c
===================================================================
--- xen-4.5.2-testing.orig/tools/xenstat/libxenstat/src/xenstat.c
+++ xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat.c
@@ -657,6 +657,27 @@ static void xenstat_uninit_xen_version(x
* VBD functions
*/
+/* Save VBD information */
+xenstat_vbd *xenstat_save_vbd(xenstat_domain *domain, xenstat_vbd *vbd)
+{
+ xenstat_vbd *vbds = domain->vbds;
+
+ domain->num_vbds++;
+ domain->vbds = realloc(domain->vbds,
+ domain->num_vbds *
+ sizeof(xenstat_vbd));
+
+ if (domain->vbds == NULL) {
+ domain->num_vbds = 0;
+ free(vbds);
+ }
+ else {
+ domain->vbds[domain->num_vbds - 1] = *vbd;
+ }
+
+ return domain->vbds;
+}
+
/* Free VBD information */
static void xenstat_free_vbds(xenstat_node * node)
{
Index: xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat_linux.c
===================================================================
--- xen-4.5.2-testing.orig/tools/xenstat/libxenstat/src/xenstat_linux.c
+++ xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat_linux.c
@@ -417,6 +417,9 @@ int xenstat_collect_vbds(xenstat_node *
}
}
+ /* Get qdisk statistics */
+ read_attributes_qdisk(node);
+
rewinddir(priv->sysfsvbd);
for(dp = readdir(priv->sysfsvbd); dp != NULL ;
@@ -477,18 +480,10 @@ int xenstat_collect_vbds(xenstat_node *
continue;
}
- if (domain->vbds == NULL) {
- domain->num_vbds = 1;
- domain->vbds = malloc(sizeof(xenstat_vbd));
- } else {
- domain->num_vbds++;
- domain->vbds = realloc(domain->vbds,
- domain->num_vbds *
- sizeof(xenstat_vbd));
- }
- if (domain->vbds == NULL)
+ if ((xenstat_save_vbd(domain, &vbd)) == NULL) {
+ perror("Allocation error");
return 0;
- domain->vbds[domain->num_vbds - 1] = vbd;
+ }
}
return 1;
Index: xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat_qmp.c
===================================================================
--- /dev/null
+++ xen-4.5.2-testing/tools/xenstat/libxenstat/src/xenstat_qmp.c
@@ -0,0 +1,451 @@
+/* libxenstat: statistics-collection library for Xen
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/un.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <xenctrl.h>
+
+#include "xenstat_priv.h"
+
+#ifdef HAVE_YAJL_YAJL_VERSION_H
+# include <yajl/yajl_version.h>
+#endif
+
+/* YAJL version check */
+#if defined(YAJL_MAJOR) && (YAJL_MAJOR > 1)
+# define HAVE_YAJL_V2 1
+#endif
+
+#ifdef HAVE_YAJL_V2
+
+#include <yajl/yajl_tree.h>
+
+static unsigned char *qmp_query(int, char *);
+
+enum query_blockstats {
+ QMP_STATS_RETURN = 0,
+ QMP_STATS_DEVICE = 1,
+ QMP_STATS = 2,
+ QMP_RD_BYTES = 3,
+ QMP_WR_BYTES = 4,
+ QMP_RD_OPERATIONS = 5,
+ QMP_WR_OPERATIONS = 6,
+};
+
+enum query_block {
+ QMP_BLOCK_RETURN = 0,
+ QMP_BLOCK_DEVICE = 1,
+ QMP_INSERTED = 2,
+ QMP_FILE = 3,
+};
+
+
+/* Given the qmp device name, get the image filename associated with it
+ QMP Syntax for querying block infomation:
+ In: { "execute": "query-block" }
+ Out: {"return": [{
+ "device": 'str, "locked": 'bool', "removable": bool,
+ "inserted": {
+ "iops_rd": 'int',
+ "image": {
+ "virtual-size": 'int', "filename": 'str', "cluster-size": 'int',
+ "format": 'str', "actual-size": 'int', "dirty-flag": 'bool'
+ },
+ "iops_wr": 'int', "ro": 'bool', "backing_file_depth": 'int',
+ "drv": 'str', "iops": 'int', "bps_wr": 'int', "encrypted": 'bool',
+ "bps": 'int', "bps_rd": 'int',
+ "file": 'str', "encryption_key_missing": 'bool'
+ },
+ "type": 'str'
+ }]}
+*/
+static char *qmp_get_block_image(xenstat_node *node, char *qmp_devname, int qfd)
+{
+ char *tmp, *file = NULL;
+ char *query_block_cmd = "{ \"execute\": \"query-block\" }";
+ static const char *const qblock[] = {
+ [ QMP_BLOCK_RETURN ] = "return",
+ [ QMP_BLOCK_DEVICE ] = "device",
+ [ QMP_INSERTED ] = "inserted",
+ [ QMP_FILE ] = "file",
+ };
+ const char *ptr[] = {0, 0};
+ unsigned char *qmp_stats;
+ yajl_val info, ret_obj, dev_obj, n;
+ int i;
+
+ if ((qmp_stats = qmp_query(qfd, query_block_cmd)) == NULL)
+ return NULL;
+
+ /* Use libyajl version 2.0.3 or newer for the tree parser feature with bug fixes */
+ if ((info = yajl_tree_parse((char *)qmp_stats, NULL, 0)) == NULL) {
+ free(qmp_stats);
+ return NULL;
+ }
+
+ ptr[0] = qblock[QMP_BLOCK_RETURN]; /* "return" */
+ if ((ret_obj = yajl_tree_get(info, ptr, yajl_t_array)) == NULL)
+ goto done;
+
+ for (i=0; i<YAJL_GET_ARRAY(ret_obj)->len; i++) {
+ n = YAJL_GET_ARRAY(ret_obj)->values[i];
+
+ ptr[0] = qblock[QMP_BLOCK_DEVICE]; /* "device" */
+ if ((dev_obj = yajl_tree_get(n, ptr, yajl_t_any)) != NULL) {
+ tmp = YAJL_GET_STRING(dev_obj);
+ if (strcmp(qmp_devname, tmp))
+ continue;
+ }
+ else
+ continue;
+
+ ptr[0] = qblock[QMP_INSERTED]; /* "inserted" */
+ n = yajl_tree_get(n, ptr, yajl_t_any);
+ if (n) {
+ ptr[0] = qblock[QMP_FILE]; /* "file" */
+ n = yajl_tree_get(n, ptr, yajl_t_any);
+ if (n && YAJL_IS_STRING(n)) {
+ tmp = YAJL_GET_STRING(n);
+ file = malloc(strlen(tmp)+1);
+ if (file != NULL)
+ strcpy(file, tmp);
+ goto done;
+ }
+ }
+ }
+done:
+ yajl_tree_free(info);
+ return file;
+}
+
+
+/* Given a QMP device name, lookup the associated xenstore qdisk device id */
+static void lookup_xenstore_devid(xenstat_node * node, unsigned int domid, char *qmp_devname,
+ int qfd, unsigned int *dev, unsigned int *sector_size)
+{
+ char **dev_ids, *tmp, *ptr, *image, path[80];
+ unsigned int num_dev_ids;
+ int i, devid;
+
+ /* Get all the qdisk dev IDs associated with the this VM */
+ snprintf(path, sizeof(path),"/local/domain/0/backend/qdisk/%i", domid);
+ dev_ids = xs_directory(node->handle->xshandle, XBT_NULL, path, &num_dev_ids);
+ if (dev_ids == NULL) {
+ return;
+ }
+
+ /* Get the filename of the image associated with this QMP device */
+ image = qmp_get_block_image(node, qmp_devname, qfd);
+ if (image == NULL) {
+ free(dev_ids);
+ return;
+ }
+
+ /* Look for a matching image in xenstore */
+ for (i=0; i<num_dev_ids; i++) {
+ devid = atoi(dev_ids[i]);
+ /* Get the xenstore name of the image */
+ snprintf(path, sizeof(path),"/local/domain/0/backend/qdisk/%i/%i/params", domid, devid);
+ if ((ptr = xs_read(node->handle->xshandle, XBT_NULL, path, NULL)) == NULL)
+ continue;
+
+ /* Get to actual path in string */
+ if ((tmp = strchr(ptr, '/')) == NULL)
+ tmp = ptr;
+ if (!strcmp(tmp,image)) {
+ *dev = devid;
+ free(ptr);
+
+ /* Get the xenstore sector size of the image while we're here */
+ snprintf(path, sizeof(path),"/local/domain/0/backend/qdisk/%i/%i/sector-size", domid, devid);
+ if ((ptr = xs_read(node->handle->xshandle, XBT_NULL, path, NULL)) != NULL) {
+ *sector_size = atoi((char *)ptr);
+ free(ptr);
+ }
+ break;
+ }
+ free(ptr);
+ }
+
+ free(image);
+ free(dev_ids);
+}
+
+/* Parse the stats buffer which contains I/O data for all the disks belonging to domid */
+static void qmp_parse_stats(xenstat_node *node, unsigned int domid, unsigned char *stats_buf, int qfd)
+{
+ char *qmp_devname;
+ static const char *const qstats[] = {
+ [ QMP_STATS_RETURN ] = "return",
+ [ QMP_STATS_DEVICE ] = "device",
+ [ QMP_STATS ] = "stats",
+ [ QMP_RD_BYTES ] = "rd_bytes",
+ [ QMP_WR_BYTES ] = "wr_bytes",
+ [ QMP_RD_OPERATIONS ] = "rd_operations",
+ [ QMP_WR_OPERATIONS ] = "wr_operations",
+ };
+ const char *ptr[] = {0, 0};
+ yajl_val info, ret_obj, stats_obj, n;
+ xenstat_vbd vbd;
+ xenstat_domain *domain;
+ unsigned int sector_size = 512;
+ int i, j;
+
+ /* Use libyajl version 2.0.3 or newer for the tree parser feature */
+ if ((info = yajl_tree_parse((char *)stats_buf, NULL, 0)) == NULL)
+ return;
+
+ ptr[0] = qstats[QMP_STATS_RETURN]; /* "return" */
+ if ((ret_obj = yajl_tree_get(info, ptr, yajl_t_array)) == NULL)
+ goto done;
+
+ /* Array of devices */
+ for (i=0; i<YAJL_GET_ARRAY(ret_obj)->len; i++) {
+ memset(&vbd, 0, sizeof(xenstat_vbd));
+ qmp_devname = NULL;
+ stats_obj = YAJL_GET_ARRAY(ret_obj)->values[i];
+
+ ptr[0] = qstats[QMP_STATS_DEVICE]; /* "device" */
+ if ((n = yajl_tree_get(stats_obj, ptr, yajl_t_any)) != NULL)
+ qmp_devname = YAJL_GET_STRING(n);
+
+ ptr[0] = qstats[QMP_STATS]; /* "stats" */
+ stats_obj = yajl_tree_get(stats_obj, ptr, yajl_t_object);
+ if (stats_obj && YAJL_IS_OBJECT(stats_obj)) {
+ for (j=3; j<7; j++) {
+ ptr[0] = qstats[j];
+ n = yajl_tree_get(stats_obj, ptr, yajl_t_number);
+ if (n && YAJL_IS_NUMBER(n)) {
+ switch(j) {
+ case QMP_RD_BYTES: /* "rd_bytes" */
+ vbd.rd_sects = YAJL_GET_INTEGER(n) / sector_size;
+ break;
+ case QMP_WR_BYTES: /* "wr_bytes" */
+ vbd.wr_sects = YAJL_GET_INTEGER(n) / sector_size;
+ break;
+ case QMP_RD_OPERATIONS: /* "rd_operations" */
+ vbd.rd_reqs = YAJL_GET_INTEGER(n);
+ break;
+ case QMP_WR_OPERATIONS: /* "wr_operations" */
+ vbd.wr_reqs = YAJL_GET_INTEGER(n);
+ break;
+ }
+ }
+ }
+ /* With the QMP device name, lookup the xenstore qdisk device ID and set vdb.dev */
+ if (qmp_devname)
+ lookup_xenstore_devid(node, domid, qmp_devname, qfd, &vbd.dev, &sector_size);
+ if ((domain = xenstat_node_domain(node, domid)) == NULL)
+ continue;
+ if ((xenstat_save_vbd(domain, &vbd)) == NULL)
+ goto done;
+ }
+ }
+done:
+ yajl_tree_free(info);
+}
+
+/* Write a command via the QMP. Returns number of bytes written */
+static size_t qmp_write(int qfd, char *cmd, size_t cmd_len)
+{
+ size_t pos = 0;
+ ssize_t res;
+
+ while (cmd_len > pos) {
+ res = write(qfd, cmd + pos, cmd_len - pos);
+ switch (res) {
+ case -1:
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ return 0;
+ case 0:
+ errno = EPIPE;
+ return pos;
+ default:
+ pos += (size_t)res;
+ }
+ }
+ return pos;
+}
+
+/* Read the data sent in response to a QMP execute query. Returns 1 for success */
+static int qmp_read(int qfd, unsigned char **qstats)
+{
+ unsigned char buf[1024], *ptr;
+ struct pollfd pfd[2];
+ int n, qsize = 0;
+
+ *qstats = NULL;
+ pfd[0].fd = qfd;
+ pfd[0].events = POLLIN;
+ while ((n = poll(pfd, POLLIN, 10)) > 0) {
+ if (pfd[0].revents & POLLIN) {
+ if ((n = read(qfd, buf, sizeof(buf))) < 0) {
+ free(*qstats);
+ return 0;
+ }
+ ptr = realloc(*qstats, qsize+n+1);
+ if (ptr == NULL) {
+ free(*qstats);
+ return 0;
+ }
+ memcpy(&ptr[qsize], buf, n);
+ qsize += n;
+ ptr[qsize] = 0;
+ *qstats = ptr;
+ }
+ }
+ return 1;
+}
+
+/* With the given cmd, query QMP for requested data. Returns allocated buffer containing data or NULL */
+static unsigned char *qmp_query(int qfd, char *cmd)
+{
+ unsigned char *qstats = NULL;
+ int n;
+
+ n = strlen(cmd);
+ if (qmp_write(qfd, cmd, n) != n)
+ return NULL;
+ if (!qmp_read(qfd, &qstats))
+ return NULL;
+ return qstats;
+}
+
+/* Returns a socket connected to the QMP socket. Returns -1 on failure. */
+static int qmp_connect(char *path)
+{
+ struct sockaddr_un sun;
+ int s;
+
+ if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return -1;
+ (void)fcntl(s, F_SETFD, 1);
+
+ memset(&sun, 0, sizeof(struct sockaddr_un));
+ sun.sun_family = AF_UNIX;
+
+ if (strlen(path) >= sizeof(sun.sun_path)) {
+ close(s);
+ return -1;
+ }
+
+ strcpy(sun.sun_path, path);
+ if (connect(s, (struct sockaddr *)&sun, SUN_LEN(&sun)) < 0) {
+ close(s);
+ return -1;
+ }
+
+ return s;
+}
+
+/* Get up to 1024 active domains */
+static xc_domaininfo_t *get_domain_ids(int *num_doms)
+{
+ xc_domaininfo_t *dominfo;
+ xc_interface *xc_handle;
+
+ dominfo = calloc(1024, sizeof(xc_domaininfo_t));
+ if (dominfo == NULL)
+ return NULL;
+ xc_handle = xc_interface_open(0,0,0);
+ *num_doms = xc_domain_getinfolist(xc_handle, 0, 1024, dominfo);
+ xc_interface_close(xc_handle);
+ return dominfo;
+}
+
+/* Gather the qdisk statistics by querying QMP
+ Resources: http://wiki.qemu.org/QMP and qmp-commands.hx from the qemu code
+ QMP Syntax for entering command mode. This command must be issued before
+ issuing any other command:
+ In: {"execute": "qmp_capabilities"}
+ Out: {"return": {}}
+ QMP Syntax for querying block statistics:
+ In: { "execute": "query-blockstats" }
+ Out: {"return": [{
+ "device": 'str',
+ "parent": {
+ "stats": {
+ "flush_total_time_ns": 'int', "wr_highest_offset": 'int',
+ "wr_total_time_ns": 'int', "wr_bytes": 'int',
+ "rd_total_time_ns": 'int', "flush_operations": 'int',
+ "wr_operations": 'int', "rd_bytes": 'int', "rd_operations": 'int'
+ }
+ },
+ "stats": {
+ "flush_total_time_ns": 'int', "wr_highest_offset": 'int',
+ "wr_total_time_ns": 'int', "wr_bytes": 'int',
+ "rd_total_time_ns": 'int', "flush_operations": 'int',
+ "wr_operations": 'int', "rd_bytes": 'int', "rd_operations": 'int'
+ }
+ }]}
+*/
+void read_attributes_qdisk(xenstat_node * node)
+{
+ char *cmd_mode = "{ \"execute\": \"qmp_capabilities\" }";
+ char *query_blockstats_cmd = "{ \"execute\": \"query-blockstats\" }";
+ xc_domaininfo_t *dominfo = NULL;
+ unsigned char *qmp_stats, *val;
+ char path[80];
+ int i, qfd, num_doms;
+
+ dominfo = get_domain_ids(&num_doms);
+ if (dominfo == NULL)
+ return;
+
+ for (i=0; i<num_doms; i++) {
+ if (dominfo[i].domain <= 0)
+ continue;
+
+ /* Verify that qdisk disks are used with this VM */
+ snprintf(path, sizeof(path),"/local/domain/0/backend/qdisk/%i", dominfo[i].domain);
+ if ((val = xs_read(node->handle->xshandle, XBT_NULL, path, NULL)) == NULL)
+ continue;
+ free(val);
+
+ /* Connect to this VMs QMP socket */
+ snprintf(path, sizeof(path), "/var/run/xen/qmp-libxenstat-%i", dominfo[i].domain);
+ if ((qfd = qmp_connect(path)) < 0) {
+ continue;
+ }
+
+ /* First enable QMP capabilities so that we can query for data */
+ if ((qmp_stats = qmp_query(qfd, cmd_mode)) != NULL) {
+ free(qmp_stats);
+ /* Query QMP for this VMs blockstats */
+ if ((qmp_stats = qmp_query(qfd, query_blockstats_cmd)) != NULL) {
+ qmp_parse_stats(node, dominfo[i].domain, qmp_stats, qfd);
+ free(qmp_stats);
+ }
+ }
+ close(qfd);
+ }
+
+ free(dominfo);
+}
+
+#else /* !HAVE_YAJL_V2 */
+
+/* Statistics gathering for qdisks requires at least yajl v2 */
+void read_attributes_qdisk(xenstat_node * node)
+{
+}
+
+#endif /* !HAVE_YAJL_V2 */

View File

@ -1,41 +0,0 @@
# Commit 63dcef9fe5b880007075b5eb53f9950a826519ce
# Date 2015-04-14 15:02:10 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/hvm: don't include asm/spinlock.h
asm/spinlock.h should not be included directly.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -52,7 +52,6 @@
#include <asm/xstate.h>
#include <asm/traps.h>
#include <asm/mc146818rtc.h>
-#include <asm/spinlock.h>
#include <asm/mce.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/vpt.h>
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -41,7 +41,6 @@
#include <asm/msr.h>
#include <asm/i387.h>
#include <asm/iocap.h>
-#include <asm/spinlock.h>
#include <asm/hvm/emulate.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -35,7 +35,6 @@
#include <asm/types.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
-#include <asm/spinlock.h>
#include <asm/paging.h>
#include <asm/p2m.h>
#include <asm/mem_sharing.h>

View File

@ -1,22 +0,0 @@
# Commit f70df9ec1ab72b6bbebad72d81109c1b214007e1
# Date 2015-04-14 15:02:32 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mtrr: include asm/atomic.h
asm/atomic.h is needed but only included indirectly via
asm/spinlock.h.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/cpu/mtrr/main.c
+++ b/xen/arch/x86/cpu/mtrr/main.c
@@ -36,6 +36,7 @@
#include <xen/lib.h>
#include <xen/smp.h>
#include <xen/spinlock.h>
+#include <asm/atomic.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>

View File

@ -1,46 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 70a3cbb8c9cb17a61fa25c48ba3d7b44fd059c90
# Date 2015-04-14 16:50:35 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/vMSI-X: honor all mask requests
Commit 74fd0036de ("x86: properly handle MSI-X unmask operation from
guests") didn't go far enough: it fixed an issue with unmasking, but
left an issue with masking in place: Due to the (late) point in time
when qemu requests the hypervisor to set up MSI-X interrupts (which is
where the MMIO intercept gets put in place), the hypervisor doesn't
see all guest writes, and hence shouldn't make assumptions on the state
the virtual MSI-X resources are in. Bypassing the rest of the logic on
a guest mask operation leads to
[00:04.0] pci_msix_write: Error: Can't update msix entry 1 since MSI-X is already enabled.
which surprisingly enough doesn't lead to the device not working
anymore (I didn't dig in deep enough to figure out why that is). But it
does prevent the IRQ to be migrated inside the guest, i.e. all
interrupts will always arrive in vCPU 0.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -286,11 +286,11 @@ static int msixtbl_write(struct vcpu *v,
goto out;
}
- /* exit to device model if address/data has been modified */
- if ( test_and_clear_bit(nr_entry, &entry->table_flags) )
+ /* Exit to device model when unmasking and address/data got modified. */
+ if ( !(val & PCI_MSIX_VECTOR_BITMASK) &&
+ test_and_clear_bit(nr_entry, &entry->table_flags) )
{
- if ( !(val & PCI_MSIX_VECTOR_BITMASK) )
- v->arch.hvm_vcpu.hvm_io.msix_unmask_address = address;
+ v->arch.hvm_vcpu.hvm_io.msix_unmask_address = address;
goto out;
}

View File

@ -1,58 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit df9f5676b3711c95127d44e871ad7ca38d6ed28a
# Date 2015-04-14 16:51:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/vMSI-X: add valid bits for read acceleration
Again because Xen doesn't get to see all guest writes, it shouldn't
serve reads from its cache before having seen a write to the respective
address.
Also use DECLARE_BITMAP() in a related field declaration instead of
open coding it.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -154,11 +154,14 @@ struct msixtbl_entry
struct pci_dev *pdev;
unsigned long gtable; /* gpa of msix table */
unsigned long table_len;
- unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
+ DECLARE_BITMAP(table_flags, MAX_MSIX_TABLE_ENTRIES);
#define MAX_MSIX_ACC_ENTRIES 3
struct {
uint32_t msi_ad[3]; /* Shadow of address low, high and data */
} gentries[MAX_MSIX_ACC_ENTRIES];
+ DECLARE_BITMAP(acc_valid, 3 * MAX_MSIX_ACC_ENTRIES);
+#define acc_bit(what, ent, slot, idx) \
+ what##_bit((slot) * 3 + (idx), (ent)->acc_valid)
struct rcu_head rcu;
};
@@ -233,9 +236,10 @@ static int msixtbl_read(
if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
{
nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
- if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
- goto out;
index = offset / sizeof(uint32_t);
+ if ( nr_entry >= MAX_MSIX_ACC_ENTRIES ||
+ !acc_bit(test, entry, nr_entry, index) )
+ goto out;
*pval = entry->gentries[nr_entry].msi_ad[index];
}
else
@@ -281,6 +285,7 @@ static int msixtbl_write(struct vcpu *v,
{
index = offset / sizeof(uint32_t);
entry->gentries[nr_entry].msi_ad[index] = val;
+ acc_bit(set, entry, nr_entry, index);
}
set_bit(nr_entry, &entry->table_flags);
goto out;

View File

@ -1,27 +0,0 @@
References: bsc#945164
Subject: libxl: use DEBUG log level instead of INFO
From: Wei Liu wei.liu2@citrix.com Fri Apr 17 12:31:29 2015 +0100
Date: Wed Apr 22 14:40:40 2015 +0100:
Git: ddc17f311099c1f0f37a771a2f5f904d848102f7
Make libxl less noisy when destroying a domain.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Index: xen-4.5.2-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.c
+++ xen-4.5.2-testing/tools/libxl/libxl.c
@@ -1702,7 +1702,7 @@ static void devices_destroy_cb(libxl__eg
_exit(-1);
}
}
- LOG(INFO, "forked pid %ld for destroy of domain %d", (long)rc, domid);
+ LOG(DEBUG, "forked pid %ld for destroy of domain %d", (long)rc, domid);
return;

View File

@ -1,33 +0,0 @@
# Commit 017e667c433a1040306db6265b05e104568c70c8
# Date 2015-05-05 18:00:03 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
domctl: don't truncate XEN_DOMCTL_max_mem requests
Instead saturate the value if the input can't be represented in the
respective struct domain field.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -943,7 +943,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
case XEN_DOMCTL_max_mem:
{
- unsigned long new_max;
+ uint64_t new_max;
ret = -EINVAL;
new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT-10);
@@ -954,7 +954,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
* that the domain will now be allowed to "ratchet" down to new_max. In
* the meantime, while tot > max, all new allocations are disallowed.
*/
- d->max_pages = new_max;
+ d->max_pages = min(new_max, (uint64_t)(typeof(d->max_pages))-1);
ret = 0;
spin_unlock(&d->page_alloc_lock);
}

View File

@ -1,266 +0,0 @@
# Commit d72a4605e18d3a61c4469ff092dbbbfa4ac919f7
# Date 2015-05-05 18:01:33 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
Xen L4 entries being uniformly installed into any L4 table and 64-bit
PV kernels running in ring 3 means that user mode was able to see the
read-only M2P presented by Xen to the guests. While apparently not
really representing an exploitable information leak, this still very
certainly was never meant to be that way.
Building on the fact that these guests already have separate kernel and
user mode page tables we can allow guest kernels to tell Xen that they
don't want user mode to see this table. We can't, however, do this by
default: There is no ABI requirement that kernel and user mode page
tables be separate. Therefore introduce a new VM-assist flag allowing
the guest to control respective hypervisor behavior:
- when not set, L4 tables get created with the respective slot blank,
and whenever the L4 table gets used as a kernel one the missing
mapping gets inserted,
- when set, L4 tables get created with the respective slot initialized
as before, and whenever the L4 table gets used as a user one the
mapping gets zapped.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Index: xen-4.5.2-testing/xen/arch/x86/domain.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/domain.c
+++ xen-4.5.2-testing/xen/arch/x86/domain.c
@@ -338,7 +338,7 @@ static int setup_compat_l4(struct vcpu *
l4tab = __map_domain_page(pg);
clear_page(l4tab);
- init_guest_l4_table(l4tab, v->domain);
+ init_guest_l4_table(l4tab, v->domain, 1);
unmap_domain_page(l4tab);
v->arch.guest_table = pagetable_from_page(pg);
@@ -977,7 +977,11 @@ int arch_set_info_guest(
case -EINTR:
rc = -ERESTART;
case -ERESTART:
+ break;
case 0:
+ if ( !compat && !VM_ASSIST(d, VMASST_TYPE_m2p_strict) &&
+ !paging_mode_refcounts(d) )
+ fill_ro_mpt(cr3_gfn);
break;
default:
if ( cr3_page == current->arch.old_guest_table )
@@ -1012,7 +1016,10 @@ int arch_set_info_guest(
default:
if ( cr3_page == current->arch.old_guest_table )
cr3_page = NULL;
+ break;
case 0:
+ if ( VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+ zap_ro_mpt(cr3_gfn);
break;
}
}
Index: xen-4.5.2-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.5.2-testing/xen/arch/x86/domain_build.c
@@ -1096,7 +1096,7 @@ int __init construct_dom0(
l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
}
clear_page(l4tab);
- init_guest_l4_table(l4tab, d);
+ init_guest_l4_table(l4tab, d, 0);
v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
if ( is_pv_32on64_domain(d) )
v->arch.guest_table_user = v->arch.guest_table;
Index: xen-4.5.2-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.5.2-testing/xen/arch/x86/mm.c
@@ -1383,7 +1383,8 @@ static int alloc_l3_table(struct page_in
return rc > 0 ? 0 : rc;
}
-void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d)
+void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d,
+ bool_t zap_ro_mpt)
{
/* Xen private mappings. */
memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
@@ -1398,6 +1399,25 @@ void init_guest_l4_table(l4_pgentry_t l4
l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR);
+ if ( zap_ro_mpt || is_pv_32on64_domain(d) || paging_mode_refcounts(d) )
+ l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+}
+
+void fill_ro_mpt(unsigned long mfn)
+{
+ l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+ l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
+ idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
+ unmap_domain_page(l4tab);
+}
+
+void zap_ro_mpt(unsigned long mfn)
+{
+ l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+ l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+ unmap_domain_page(l4tab);
}
static int alloc_l4_table(struct page_info *page)
@@ -1447,7 +1467,7 @@ static int alloc_l4_table(struct page_in
adjust_guest_l4e(pl4e[i], d);
}
- init_guest_l4_table(pl4e, d);
+ init_guest_l4_table(pl4e, d, !VM_ASSIST(d, VMASST_TYPE_m2p_strict));
unmap_domain_page(pl4e);
return rc > 0 ? 0 : rc;
@@ -2761,6 +2781,8 @@ int new_guest_cr3(unsigned long mfn)
invalidate_shadow_ldt(curr, 0);
+ if ( !VM_ASSIST(d, VMASST_TYPE_m2p_strict) && !paging_mode_refcounts(d) )
+ fill_ro_mpt(mfn);
curr->arch.guest_table = pagetable_from_pfn(mfn);
update_cr3(curr);
@@ -3117,6 +3139,9 @@ long do_mmuext_op(
op.arg1.mfn);
break;
}
+ if ( VM_ASSIST(d, VMASST_TYPE_m2p_strict) &&
+ !paging_mode_refcounts(d) )
+ zap_ro_mpt(op.arg1.mfn);
}
curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
Index: xen-4.5.2-testing/xen/arch/x86/mm/shadow/multi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/mm/shadow/multi.c
+++ xen-4.5.2-testing/xen/arch/x86/mm/shadow/multi.c
@@ -1438,6 +1438,13 @@ void sh_install_xen_entries_in_l4(struct
shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
__PAGE_HYPERVISOR);
+ if ( !shadow_mode_external(d) && !is_pv_32on64_domain(d) &&
+ !VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+ {
+ /* open coded zap_ro_mpt(mfn_x(sl4mfn)): */
+ sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
+ }
+
/* Shadow linear mapping for 4-level shadows. N.B. for 3-level
* shadows on 64-bit xen, this linear mapping is later replaced by the
* monitor pagetable structure, which is built in make_monitor_table
@@ -4062,6 +4069,17 @@ sh_update_cr3(struct vcpu *v, int do_loc
if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 )
flush_tlb_mask(d->domain_dirty_cpumask);
sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
+ if ( !shadow_mode_external(d) && !is_pv_32on64_domain(d) )
+ {
+ mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]);
+
+ if ( !(v->arch.flags & TF_kernel_mode) &&
+ VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+ zap_ro_mpt(mfn_x(smfn));
+ else if ( (v->arch.flags & TF_kernel_mode) &&
+ !VM_ASSIST(d, VMASST_TYPE_m2p_strict) )
+ fill_ro_mpt(mfn_x(smfn));
+ }
#else
#error This should never happen
#endif
Index: xen-4.5.2-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.5.2-testing/xen/arch/x86/x86_64/mm.c
@@ -480,7 +480,7 @@ static int setup_m2p_table(struct mem_ho
l2_ro_mpt += l2_table_offset(va);
}
- /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+ /* NB. Cannot be GLOBAL: guest user mode should not see it. */
l2e_write(l2_ro_mpt, l2e_from_pfn(mfn,
/*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
}
@@ -583,7 +583,7 @@ void __init paging_init(void)
0x77, 1UL << L3_PAGETABLE_SHIFT);
ASSERT(!l2_table_offset(va));
- /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+ /* NB. Cannot be GLOBAL: guest user mode should not see it. */
l3e_write(&l3_ro_mpt[l3_table_offset(va)],
l3e_from_page(l1_pg,
/*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
@@ -621,7 +621,7 @@ void __init paging_init(void)
l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
ASSERT(!l2_table_offset(va));
}
- /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+ /* NB. Cannot be GLOBAL: guest user mode should not see it. */
if ( l1_pg )
l2e_write(l2_ro_mpt, l2e_from_page(
l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
Index: xen-4.5.2-testing/xen/include/asm-x86/config.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/config.h
+++ xen-4.5.2-testing/xen/include/asm-x86/config.h
@@ -330,7 +330,8 @@ extern unsigned long xen_phys_start;
#define NATIVE_VM_ASSIST_VALID ((1UL << VMASST_TYPE_4gb_segments) | \
(1UL << VMASST_TYPE_4gb_segments_notify) | \
(1UL << VMASST_TYPE_writable_pagetables) | \
- (1UL << VMASST_TYPE_pae_extended_cr3))
+ (1UL << VMASST_TYPE_pae_extended_cr3) | \
+ (1UL << VMASST_TYPE_m2p_strict))
#define VM_ASSIST_VALID NATIVE_VM_ASSIST_VALID
#define COMPAT_VM_ASSIST_VALID (NATIVE_VM_ASSIST_VALID & \
((1UL << COMPAT_BITS_PER_LONG) - 1))
Index: xen-4.5.2-testing/xen/include/asm-x86/mm.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/mm.h
+++ xen-4.5.2-testing/xen/include/asm-x86/mm.h
@@ -314,7 +314,10 @@ static inline void *__page_to_virt(const
int free_page_type(struct page_info *page, unsigned long type,
int preemptible);
-void init_guest_l4_table(l4_pgentry_t[], const struct domain *);
+void init_guest_l4_table(l4_pgentry_t[], const struct domain *,
+ bool_t zap_ro_mpt);
+void fill_ro_mpt(unsigned long mfn);
+void zap_ro_mpt(unsigned long mfn);
int is_iomem_page(unsigned long mfn);
Index: xen-4.5.2-testing/xen/include/public/xen.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/public/xen.h
+++ xen-4.5.2-testing/xen/include/public/xen.h
@@ -486,6 +486,18 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
/* x86/PAE guests: support PDPTs above 4GB. */
#define VMASST_TYPE_pae_extended_cr3 3
+/*
+ * x86/64 guests: strictly hide M2P from user mode.
+ * This allows the guest to control respective hypervisor behavior:
+ * - when not set, L4 tables get created with the respective slot blank,
+ * and whenever the L4 table gets used as a kernel one the missing
+ * mapping gets inserted,
+ * - when set, L4 tables get created with the respective slot initialized
+ * as before, and whenever the L4 table gets used as a user one the
+ * mapping gets zapped.
+ */
+#define VMASST_TYPE_m2p_strict 32
+
#if __XEN_INTERFACE_VERSION__ < 0x00040600
#define MAX_VMASST_TYPE 3
#endif

View File

@ -1,68 +0,0 @@
# Commit 2bfc9fc52ce8485fa43e79bbdc32360c74e12fe8
# Date 2015-05-08 10:59:26 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: provide arch_fetch_and_add()
arch_fetch_and_add() atomically adds a value and returns the previous
value.
This is needed to implement ticket locks.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -118,6 +118,52 @@ static always_inline unsigned long __cmp
})
/*
+ * Undefined symbol to cause link failure if a wrong size is used with
+ * arch_fetch_and_add().
+ */
+extern unsigned long __bad_fetch_and_add_size(void);
+
+static always_inline unsigned long __xadd(
+ volatile void *ptr, unsigned long v, int size)
+{
+ switch ( size )
+ {
+ case 1:
+ asm volatile ( "lock; xaddb %b0,%1"
+ : "+r" (v), "+m" (*__xg(ptr))
+ :: "memory");
+ return v;
+ case 2:
+ asm volatile ( "lock; xaddw %w0,%1"
+ : "+r" (v), "+m" (*__xg(ptr))
+ :: "memory");
+ return v;
+ case 4:
+ asm volatile ( "lock; xaddl %k0,%1"
+ : "+r" (v), "+m" (*__xg(ptr))
+ :: "memory");
+ return v;
+ case 8:
+ asm volatile ( "lock; xaddq %q0,%1"
+ : "+r" (v), "+m" (*__xg(ptr))
+ :: "memory");
+
+ return v;
+ default:
+ return __bad_fetch_and_add_size();
+ }
+}
+
+/*
+ * Atomically add @v to the 1, 2, 4, or 8 byte value at @ptr. Returns
+ * the previous value.
+ *
+ * This is a full memory barrier.
+ */
+#define arch_fetch_and_add(ptr, v) \
+ ((typeof(*(ptr)))__xadd(ptr, (typeof(*(ptr)))(v), sizeof(*(ptr))))
+
+/*
* Both Intel and AMD agree that, from a programmer's viewpoint:
* Loads cannot be reordered relative to other loads.
* Stores cannot be reordered relative to other stores.

View File

@ -1,29 +0,0 @@
# Commit f9cc3cd9b4de58cf032c8624406384c172937e57
# Date 2015-05-08 10:59:44 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
arm: provide arch_fetch_and_add()
arch_fetch_and_add() atomically adds a value and returns the previous
value.
This generic arm implementation uses the GCC __sync_fetch_and_add()
builtin. This builtin resulted in suitable inlined asm for GCC 4.8.3
(arm64) and GCC 4.6.3 (arm32).
This is needed to implement ticket locks.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/include/asm-arm/system.h
+++ b/xen/include/asm-arm/system.h
@@ -51,6 +51,8 @@
# error "unknown ARM variant"
#endif
+#define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v)
+
extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next);
#endif

View File

@ -1,155 +0,0 @@
commit 161212ef02312c0681d2d809c8ff1e1f0ea6f6f9
Author: Fabio Fantoni <fabio.fantoni@m2r.biz>
Date: Wed Apr 29 11:20:28 2015 +0200
libxl: Add qxl vga interface support for upstream qemu
Usage:
vga="qxl"
Qxl vga support many resolutions that not supported by stdvga,
mainly the 16:9 ones and other high up to 2560x1600.
With QXL you can get improved performance and smooth video also
with high resolutions and high quality.
Require their drivers installed in the domU and spice used
otherwise act as a simple stdvga.
Signed-off-by: Fabio Fantoni <fabio.fantoni@m2r.biz>
Signed-off-by: Zhou Peng <zpengxen@gmail.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Index: xen-4.5.2-testing/docs/man/xl.cfg.pod.5
===================================================================
--- xen-4.5.2-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.5.2-testing/docs/man/xl.cfg.pod.5
@@ -1294,6 +1294,9 @@ qemu-xen-traditional device-model, the a
which is sufficient for 1024x768 at 32 bpp. For the upstream qemu-xen
device-model, the default and minimum is 8 MB.
+For B<qxl> vga, the default is both default and minimal 128MB.
+If B<videoram> is set less than 128MB, an error will be triggered.
+
=item B<stdvga=BOOLEAN>
Select a standard VGA card with VBE (VESA BIOS Extensions) as the
@@ -1305,9 +1308,14 @@ This option is deprecated, use vga="stdv
=item B<vga="STRING">
-Selects the emulated video card (none|stdvga|cirrus).
+Selects the emulated video card (none|stdvga|cirrus|qxl).
The default is cirrus.
+In general, QXL should work with the Spice remote display protocol
+for acceleration, and QXL driver is necessary in guest in this case.
+QXL can also work with the VNC protocol, but it will be like a standard
+VGA without acceleration.
+
=item B<vnc=BOOLEAN>
Allow access to the display via the VNC protocol. This enables the
Index: xen-4.5.2-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.h
+++ xen-4.5.2-testing/tools/libxl/libxl.h
@@ -506,6 +506,16 @@ typedef struct libxl__ctx libxl_ctx;
#define LIBXL_HAVE_DOMINFO_OUTSTANDING_MEMKB 1
/*
+ * LIBXL_HAVE_QXL
+ *
+ * If defined, then the libxl_vga_interface_type will contain another value:
+ * "QXL". This value define if qxl vga is supported.
+ *
+ * If this is not defined, the qxl vga support is missed.
+ */
+#define LIBXL_HAVE_QXL 1
+
+/*
* LIBXL_HAVE_SPICE_VDAGENT
*
* If defined, then the libxl_spice_info structure will contain a boolean type:
Index: xen-4.5.2-testing/tools/libxl/libxl_create.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.5.2-testing/tools/libxl/libxl_create.c
@@ -240,6 +240,10 @@ int libxl__domain_build_info_setdefault(
if (b_info->video_memkb == LIBXL_MEMKB_DEFAULT)
b_info->video_memkb = 0;
break;
+ case LIBXL_VGA_INTERFACE_TYPE_QXL:
+ LOG(ERROR,"qemu upstream required for qxl vga");
+ return ERROR_INVAL;
+ break;
case LIBXL_VGA_INTERFACE_TYPE_STD:
if (b_info->video_memkb == LIBXL_MEMKB_DEFAULT)
b_info->video_memkb = 8 * 1024;
@@ -264,6 +268,15 @@ int libxl__domain_build_info_setdefault(
if (b_info->video_memkb == LIBXL_MEMKB_DEFAULT)
b_info->video_memkb = 0;
break;
+ case LIBXL_VGA_INTERFACE_TYPE_QXL:
+ if (b_info->video_memkb == LIBXL_MEMKB_DEFAULT) {
+ b_info->video_memkb = (128 * 1024);
+ } else if (b_info->video_memkb < (128 * 1024)) {
+ LOG(ERROR,
+ "128 Mib videoram is the minimum for qxl default");
+ return ERROR_INVAL;
+ }
+ break;
case LIBXL_VGA_INTERFACE_TYPE_STD:
if (b_info->video_memkb == LIBXL_MEMKB_DEFAULT)
b_info->video_memkb = 16 * 1024;
Index: xen-4.5.2-testing/tools/libxl/libxl_dm.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_dm.c
+++ xen-4.5.2-testing/tools/libxl/libxl_dm.c
@@ -253,6 +253,8 @@ static char ** libxl__build_device_model
case LIBXL_VGA_INTERFACE_TYPE_NONE:
flexarray_append_pair(dm_args, "-vga", "none");
break;
+ case LIBXL_VGA_INTERFACE_TYPE_QXL:
+ break;
}
if (b_info->u.hvm.boot) {
@@ -618,6 +620,12 @@ static char ** libxl__build_device_model
break;
case LIBXL_VGA_INTERFACE_TYPE_NONE:
break;
+ case LIBXL_VGA_INTERFACE_TYPE_QXL:
+ /* QXL have 2 ram regions, ram and vram */
+ flexarray_append_pair(dm_args, "-device",
+ GCSPRINTF("qxl-vga,vram_size_mb=%"PRIu64",ram_size_mb=%"PRIu64,
+ (b_info->video_memkb/2/1024), (b_info->video_memkb/2/1024) ) );
+ break;
}
if (b_info->u.hvm.boot) {
Index: xen-4.5.2-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.5.2-testing/tools/libxl/libxl_types.idl
@@ -181,6 +181,7 @@ libxl_vga_interface_type = Enumeration("
(1, "CIRRUS"),
(2, "STD"),
(3, "NONE"),
+ (4, "QXL"),
], init_val = "LIBXL_VGA_INTERFACE_TYPE_CIRRUS")
libxl_vendor_device = Enumeration("vendor_device", [
Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
@@ -1910,6 +1910,8 @@ skip_vfb:
b_info->u.hvm.vga.kind = LIBXL_VGA_INTERFACE_TYPE_CIRRUS;
} else if (!strcmp(buf, "none")) {
b_info->u.hvm.vga.kind = LIBXL_VGA_INTERFACE_TYPE_NONE;
+ } else if (!strcmp(buf, "qxl")) {
+ b_info->u.hvm.vga.kind = LIBXL_VGA_INTERFACE_TYPE_QXL;
} else {
fprintf(stderr, "Unknown vga \"%s\" specified\n", buf);
exit(1);

View File

@ -1,65 +0,0 @@
# Commit 3c694aec08dda782d9c866e599b848dff86f474f
# Date 2015-05-13 15:00:58 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: provide add_sized()
add_sized(ptr, inc) adds inc to the value at ptr using only the correct
size of loads and stores for the type of *ptr. The add is /not/ atomic.
This is needed for ticket locks to ensure the increment of the head ticket
does not affect the tail ticket.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -14,6 +14,14 @@ static inline void name(volatile type *a
{ asm volatile("mov" size " %1,%0": "=m" (*(volatile type *)addr) \
:reg (val) barrier); }
+#define build_add_sized(name, size, type, reg) \
+ static inline void name(volatile type *addr, type val) \
+ { \
+ asm volatile("add" size " %1,%0" \
+ : "=m" (*addr) \
+ : reg (val)); \
+ }
+
build_read_atomic(read_u8_atomic, "b", uint8_t, "=q", )
build_read_atomic(read_u16_atomic, "w", uint16_t, "=r", )
build_read_atomic(read_u32_atomic, "l", uint32_t, "=r", )
@@ -25,8 +33,14 @@ build_write_atomic(write_u32_atomic, "l"
build_read_atomic(read_u64_atomic, "q", uint64_t, "=r", )
build_write_atomic(write_u64_atomic, "q", uint64_t, "r", )
+build_add_sized(add_u8_sized, "b", uint8_t, "qi")
+build_add_sized(add_u16_sized, "w", uint16_t, "ri")
+build_add_sized(add_u32_sized, "l", uint32_t, "ri")
+build_add_sized(add_u64_sized, "q", uint64_t, "ri")
+
#undef build_read_atomic
#undef build_write_atomic
+#undef build_add_sized
void __bad_atomic_size(void);
@@ -54,6 +68,18 @@ void __bad_atomic_size(void);
__x; \
})
+#define add_sized(p, x) ({ \
+ typeof(*(p)) x_ = (x); \
+ switch ( sizeof(*(p)) ) \
+ { \
+ case 1: add_u8_sized((uint8_t *)(p), x_); break; \
+ case 2: add_u16_sized((uint16_t *)(p), x_); break; \
+ case 4: add_u32_sized((uint32_t *)(p), x_); break; \
+ case 8: add_u64_sized((uint64_t *)(p), x_); break; \
+ default: __bad_atomic_size(); break; \
+ } \
+})
+
/*
* NB. I've pushed the volatile qualifier into the operations. This allows
* fast accessors such as _atomic_read() and _atomic_set() which don't give

View File

@ -1,64 +0,0 @@
# Commit 890674d13feb4a270aa112ca452dcf62fdd53f34
# Date 2015-05-13 15:01:25 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
arm: provide add_sized()
add_sized(ptr, inc) adds inc to the value at ptr using only the correct
size of loads and stores for the type of *ptr. The add is /not/ atomic.
This is needed for ticket locks to ensure the increment of the head ticket
does not affect the tail ticket.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/include/asm-arm/atomic.h
+++ b/xen/include/asm-arm/atomic.h
@@ -23,6 +23,17 @@ static inline void name(volatile type *a
: reg (val)); \
}
+#define build_add_sized(name, size, width, type, reg) \
+static inline void name(volatile type *addr, type val) \
+{ \
+ type t; \
+ asm volatile("ldr" size " %"width"1,%0\n" \
+ "add %"width"1,%"width"1,%"width"2\n" \
+ "str" size " %"width"1,%0" \
+ : "=m" (*(volatile type *)addr), "=r" (t) \
+ : reg (val)); \
+}
+
#if defined (CONFIG_ARM_32)
#define BYTE ""
#define WORD ""
@@ -46,6 +57,10 @@ build_atomic_read(read_u64_atomic, "x",
build_atomic_write(write_u64_atomic, "x", uint64_t, "r")
#endif
+build_add_sized(add_u8_sized, "b", BYTE, uint8_t, "ri")
+build_add_sized(add_u16_sized, "h", WORD, uint16_t, "ri")
+build_add_sized(add_u32_sized, "", WORD, uint32_t, "ri")
+
void __bad_atomic_size(void);
#define read_atomic(p) ({ \
@@ -70,6 +85,17 @@ void __bad_atomic_size(void);
__x; \
})
+#define add_sized(p, x) ({ \
+ typeof(*(p)) __x = (x); \
+ switch ( sizeof(*(p)) ) \
+ { \
+ case 1: add_u8_sized((uint8_t *)(p), __x); break; \
+ case 2: add_u16_sized((uint16_t *)(p), __x); break; \
+ case 4: add_u32_sized((uint32_t *)(p), __x); break; \
+ default: __bad_atomic_size(); break; \
+ } \
+})
+
/*
* NB. I've pushed the volatile qualifier into the operations. This allows
* fast accessors such as _atomic_read() and _atomic_set() which don't give

View File

@ -1,305 +0,0 @@
# Commit 45fcc4568c5162b00fb3907fb158af82dd484a3d
# Date 2015-05-15 09:49:12 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
use ticket locks for spin locks
Replace the byte locks with ticket locks. Ticket locks are: a) fair;
and b) peform better when contented since they spin without an atomic
operation.
The lock is split into two ticket values: head and tail. A locker
acquires a ticket by (atomically) increasing tail and using the
previous tail value. A CPU holds the lock if its ticket == head. The
lock is released by increasing head.
spin_lock_irq() and spin_lock_irqsave() now spin with irqs disabled
(previously, they would spin with irqs enabled if possible). This is
required to prevent deadlocks when the irq handler tries to take the
same lock with a higher ticket.
Architectures need only provide arch_fetch_and_add() and two barriers:
arch_lock_acquire_barrier() and arch_lock_release_barrier().
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -115,125 +115,134 @@ void spin_debug_disable(void)
#endif
+static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
+{
+ spinlock_tickets_t v;
+
+ smp_rmb();
+ v.head_tail = read_atomic(&t->head_tail);
+ return v;
+}
+
+static always_inline u16 observe_head(spinlock_tickets_t *t)
+{
+ smp_rmb();
+ return read_atomic(&t->head);
+}
+
void _spin_lock(spinlock_t *lock)
{
+ spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
LOCK_PROFILE_VAR;
check_lock(&lock->debug);
- while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
+ tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
+ tickets.head_tail);
+ while ( tickets.tail != observe_head(&lock->tickets) )
{
LOCK_PROFILE_BLOCK;
- while ( likely(_raw_spin_is_locked(&lock->raw)) )
- cpu_relax();
+ cpu_relax();
}
LOCK_PROFILE_GOT;
preempt_disable();
+ arch_lock_acquire_barrier();
}
void _spin_lock_irq(spinlock_t *lock)
{
- LOCK_PROFILE_VAR;
-
ASSERT(local_irq_is_enabled());
local_irq_disable();
- check_lock(&lock->debug);
- while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
- {
- LOCK_PROFILE_BLOCK;
- local_irq_enable();
- while ( likely(_raw_spin_is_locked(&lock->raw)) )
- cpu_relax();
- local_irq_disable();
- }
- LOCK_PROFILE_GOT;
- preempt_disable();
+ _spin_lock(lock);
}
unsigned long _spin_lock_irqsave(spinlock_t *lock)
{
unsigned long flags;
- LOCK_PROFILE_VAR;
local_irq_save(flags);
- check_lock(&lock->debug);
- while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
- {
- LOCK_PROFILE_BLOCK;
- local_irq_restore(flags);
- while ( likely(_raw_spin_is_locked(&lock->raw)) )
- cpu_relax();
- local_irq_save(flags);
- }
- LOCK_PROFILE_GOT;
- preempt_disable();
+ _spin_lock(lock);
return flags;
}
void _spin_unlock(spinlock_t *lock)
{
+ arch_lock_release_barrier();
preempt_enable();
LOCK_PROFILE_REL;
- _raw_spin_unlock(&lock->raw);
+ add_sized(&lock->tickets.head, 1);
}
void _spin_unlock_irq(spinlock_t *lock)
{
- preempt_enable();
- LOCK_PROFILE_REL;
- _raw_spin_unlock(&lock->raw);
+ _spin_unlock(lock);
local_irq_enable();
}
void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
- preempt_enable();
- LOCK_PROFILE_REL;
- _raw_spin_unlock(&lock->raw);
+ _spin_unlock(lock);
local_irq_restore(flags);
}
int _spin_is_locked(spinlock_t *lock)
{
check_lock(&lock->debug);
- return _raw_spin_is_locked(&lock->raw);
+ return lock->tickets.head != lock->tickets.tail;
}
int _spin_trylock(spinlock_t *lock)
{
+ spinlock_tickets_t old, new;
+
check_lock(&lock->debug);
- if ( !_raw_spin_trylock(&lock->raw) )
+ old = observe_lock(&lock->tickets);
+ if ( old.head != old.tail )
+ return 0;
+ new = old;
+ new.tail++;
+ if ( cmpxchg(&lock->tickets.head_tail,
+ old.head_tail, new.head_tail) != old.head_tail )
return 0;
#ifdef LOCK_PROFILE
if (lock->profile)
lock->profile->time_locked = NOW();
#endif
preempt_disable();
+ /*
+ * cmpxchg() is a full barrier so no need for an
+ * arch_lock_acquire_barrier().
+ */
return 1;
}
void _spin_barrier(spinlock_t *lock)
{
+ spinlock_tickets_t sample;
#ifdef LOCK_PROFILE
s_time_t block = NOW();
- u64 loop = 0;
+#endif
check_barrier(&lock->debug);
- do { smp_mb(); loop++;} while ( _raw_spin_is_locked(&lock->raw) );
- if ((loop > 1) && lock->profile)
+ smp_mb();
+ sample = observe_lock(&lock->tickets);
+ if ( sample.head != sample.tail )
{
- lock->profile->time_block += NOW() - block;
- lock->profile->block_cnt++;
- }
-#else
- check_barrier(&lock->debug);
- do { smp_mb(); } while ( _raw_spin_is_locked(&lock->raw) );
+ while ( observe_head(&lock->tickets) == sample.head )
+ cpu_relax();
+#ifdef LOCK_PROFILE
+ if ( lock->profile )
+ {
+ lock->profile->time_block += NOW() - block;
+ lock->profile->block_cnt++;
+ }
#endif
+ }
smp_mb();
}
int _spin_trylock_recursive(spinlock_t *lock)
{
- int cpu = smp_processor_id();
+ unsigned int cpu = smp_processor_id();
/* Don't allow overflow of recurse_cpu field. */
BUILD_BUG_ON(NR_CPUS > 0xfffu);
@@ -256,8 +265,17 @@ int _spin_trylock_recursive(spinlock_t *
void _spin_lock_recursive(spinlock_t *lock)
{
- while ( !spin_trylock_recursive(lock) )
- cpu_relax();
+ unsigned int cpu = smp_processor_id();
+
+ if ( likely(lock->recurse_cpu != cpu) )
+ {
+ _spin_lock(lock);
+ lock->recurse_cpu = cpu;
+ }
+
+ /* We support only fairly shallow recursion, else the counter overflows. */
+ ASSERT(lock->recurse_cnt < 0xfu);
+ lock->recurse_cnt++;
}
void _spin_unlock_recursive(spinlock_t *lock)
--- a/xen/include/asm-arm/system.h
+++ b/xen/include/asm-arm/system.h
@@ -53,6 +53,9 @@
#define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v)
+#define arch_lock_acquire_barrier() smp_mb()
+#define arch_lock_release_barrier() smp_mb()
+
extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next);
#endif
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -185,6 +185,17 @@ static always_inline unsigned long __xad
#define set_mb(var, value) do { xchg(&var, value); } while (0)
#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+/*
+ * On x86 the only reordering is of reads with older writes. In the
+ * lock case, the read in observe_head() can only be reordered with
+ * writes that precede it, and moving a write _into_ a locked section
+ * is OK. In the release case, the write in add_sized() can only be
+ * reordered with reads that follow it, and hoisting a read _into_ a
+ * locked region is OK.
+ */
+#define arch_lock_acquire_barrier() barrier()
+#define arch_lock_release_barrier() barrier()
+
#define local_irq_disable() asm volatile ( "cli" : : : "memory" )
#define local_irq_enable() asm volatile ( "sti" : : : "memory" )
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -80,8 +80,7 @@ struct lock_profile_qhead {
static struct lock_profile *__lock_profile_##name \
__used_section(".lockprofile.data") = \
&__lock_profile_data_##name
-#define _SPIN_LOCK_UNLOCKED(x) { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, \
- _LOCK_DEBUG, x }
+#define _SPIN_LOCK_UNLOCKED(x) { { 0 }, 0xfffu, 0, _LOCK_DEBUG, x }
#define SPIN_LOCK_UNLOCKED _SPIN_LOCK_UNLOCKED(NULL)
#define DEFINE_SPINLOCK(l) \
spinlock_t l = _SPIN_LOCK_UNLOCKED(NULL); \
@@ -117,8 +116,7 @@ extern void spinlock_profile_reset(unsig
struct lock_profile_qhead { };
-#define SPIN_LOCK_UNLOCKED \
- { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, _LOCK_DEBUG }
+#define SPIN_LOCK_UNLOCKED { { 0 }, 0xfffu, 0, _LOCK_DEBUG }
#define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
#define spin_lock_init_prof(s, l) spin_lock_init(&((s)->l))
@@ -127,8 +125,18 @@ struct lock_profile_qhead { };
#endif
+typedef union {
+ u32 head_tail;
+ struct {
+ u16 head;
+ u16 tail;
+ };
+} spinlock_tickets_t;
+
+#define SPINLOCK_TICKET_INC { .head_tail = 0x10000, }
+
typedef struct spinlock {
- raw_spinlock_t raw;
+ spinlock_tickets_t tickets;
u16 recurse_cpu:12;
u16 recurse_cnt:4;
struct lock_debug debug;

View File

@ -1,266 +0,0 @@
# Commit e62e49e6d5d4e8d22f3df0b75443ede65a812435
# Date 2015-05-15 09:52:25 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86,arm: remove asm/spinlock.h from all architectures
Now that all architecture use a common ticket lock implementation for
spinlocks, remove the architecture specific byte lock implementations.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/arch/arm/README.LinuxPrimitives
+++ b/xen/arch/arm/README.LinuxPrimitives
@@ -25,16 +25,6 @@ linux/arch/arm64/include/asm/atomic.h
---------------------------------------------------------------------
-spinlocks: last sync @ v3.16-rc6 (last commit: 95c4189689f9)
-
-linux/arch/arm64/include/asm/spinlock.h xen/include/asm-arm/arm64/spinlock.h
-
-Skipped:
- 5686b06 arm64: lockref: add support for lockless lockrefs using cmpxchg
- 52ea2a5 arm64: locks: introduce ticket-based spinlock implementation
-
----------------------------------------------------------------------
-
mem*: last sync @ v3.16-rc6 (last commit: d875c9b37240)
linux/arch/arm64/lib/memchr.S xen/arch/arm/arm64/lib/memchr.S
@@ -103,24 +93,6 @@ linux/arch/arm/include/asm/atomic.h
---------------------------------------------------------------------
-spinlocks: last sync: 15e7e5c1ebf5
-
-linux/arch/arm/include/asm/spinlock.h xen/include/asm-arm/arm32/spinlock.h
-
-*** Linux has switched to ticket locks but we still use bitlocks.
-
-resync to v3.14-rc7:
-
- 7c8746a ARM: 7955/1: spinlock: ensure we have a compiler barrier before sev
- 0cbad9c ARM: 7854/1: lockref: add support for lockless lockrefs using cmpxchg64
- 9bb17be ARM: locks: prefetch the destination word for write prior to strex
- 27a8479 ARM: smp_on_up: move inline asm ALT_SMP patching macro out of spinlock.
- 00efaa0 ARM: 7812/1: rwlocks: retry trylock operation if strex fails on free lo
- afa31d8 ARM: 7811/1: locks: use early clobber in arch_spin_trylock
- 73a6fdc ARM: spinlock: use inner-shareable dsb variant prior to sev instruction
-
----------------------------------------------------------------------
-
mem*: last sync @ v3.16-rc6 (last commit: d98b90ea22b0)
linux/arch/arm/lib/copy_template.S xen/arch/arm/arm32/lib/copy_template.S
--- a/xen/include/asm-arm/arm32/spinlock.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef __ASM_ARM32_SPINLOCK_H
-#define __ASM_ARM32_SPINLOCK_H
-
-static inline void dsb_sev(void)
-{
- __asm__ __volatile__ (
- "dsb\n"
- "sev\n"
- );
-}
-
-typedef struct {
- volatile unsigned int lock;
-} raw_spinlock_t;
-
-#define _RAW_SPIN_LOCK_UNLOCKED { 0 }
-
-#define _raw_spin_is_locked(x) ((x)->lock != 0)
-
-static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
-{
- ASSERT(_raw_spin_is_locked(lock));
-
- smp_mb();
-
- __asm__ __volatile__(
-" str %1, [%0]\n"
- :
- : "r" (&lock->lock), "r" (0)
- : "cc");
-
- dsb_sev();
-}
-
-static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
-{
- unsigned long contended, res;
-
- do {
- __asm__ __volatile__(
- " ldrex %0, [%2]\n"
- " teq %0, #0\n"
- " strexeq %1, %3, [%2]\n"
- " movne %1, #0\n"
- : "=&r" (contended), "=r" (res)
- : "r" (&lock->lock), "r" (1)
- : "cc");
- } while (res);
-
- if (!contended) {
- smp_mb();
- return 1;
- } else {
- return 0;
- }
-}
-
-#endif /* __ASM_SPINLOCK_H */
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
--- a/xen/include/asm-arm/arm64/spinlock.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Derived from Linux arch64 spinlock.h which is:
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ASM_ARM64_SPINLOCK_H
-#define __ASM_ARM64_SPINLOCK_H
-
-typedef struct {
- volatile unsigned int lock;
-} raw_spinlock_t;
-
-#define _RAW_SPIN_LOCK_UNLOCKED { 0 }
-
-#define _raw_spin_is_locked(x) ((x)->lock != 0)
-
-static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
-{
- ASSERT(_raw_spin_is_locked(lock));
-
- asm volatile(
- " stlr %w1, %0\n"
- : "=Q" (lock->lock) : "r" (0) : "memory");
-}
-
-static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
-{
- unsigned int tmp;
-
- asm volatile(
- "2: ldaxr %w0, %1\n"
- " cbnz %w0, 1f\n"
- " stxr %w0, %w2, %1\n"
- " cbnz %w0, 2b\n"
- "1:\n"
- : "=&r" (tmp), "+Q" (lock->lock)
- : "r" (1)
- : "cc", "memory");
-
- return !tmp;
-}
-
-#endif /* __ASM_SPINLOCK_H */
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
--- a/xen/include/asm-arm/spinlock.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <xen/config.h>
-#include <xen/lib.h>
-
-#if defined(CONFIG_ARM_32)
-# include <asm/arm32/spinlock.h>
-#elif defined(CONFIG_ARM_64)
-# include <asm/arm64/spinlock.h>
-#else
-# error "unknown ARM variant"
-#endif
-
-#endif /* __ASM_SPINLOCK_H */
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
--- a/xen/include/asm-x86/spinlock.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <asm/atomic.h>
-
-typedef struct {
- volatile s16 lock;
-} raw_spinlock_t;
-
-#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 1 }
-
-#define _raw_spin_is_locked(x) ((x)->lock <= 0)
-
-static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
-{
- ASSERT(_raw_spin_is_locked(lock));
- asm volatile (
- "movw $1,%0"
- : "=m" (lock->lock) : : "memory" );
-}
-
-static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
-{
- s16 oldval;
- asm volatile (
- "xchgw %w0,%1"
- :"=r" (oldval), "=m" (lock->lock)
- :"0" ((s16)0) : "memory" );
- return (oldval > 0);
-}
-
-#endif /* __ASM_SPINLOCK_H */
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -2,7 +2,6 @@
#define __SPINLOCK_H__
#include <asm/system.h>
-#include <asm/spinlock.h>
#ifndef NDEBUG
struct lock_debug {

View File

@ -1,141 +0,0 @@
# Commit f278fcf19ce15f7b7ee69181560b5884a5e12b66
# Date 2015-05-15 10:06:04 +0200
# Author Roger Pau Monné <roger.pau@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
introduce a helper to allocate non-contiguous memory
The allocator uses independent calls to alloc_domheap_pages in order to get
the desired amount of memory and then maps all the independent physical
addresses into a contiguous virtual address space.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Tested-by: Julien Grall <julien.grall@citrix.com> (ARM)
Reviewed-by: Tim Deegan <tim@xen.org>
# Commit 640f891eb258563bb155e577389e8c5e6541a59a
# Date 2015-05-21 08:57:19 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
vmap: avoid hitting an ASSERT with vfree(NULL)
and unconditionally defer the vm_size() call, as it doesn't have a NULL
short circuit.
Reported-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/common/vmap.c
+++ b/xen/common/vmap.c
@@ -215,4 +215,75 @@ void vunmap(const void *va)
#endif
vm_free(va);
}
+
+void *vmalloc(size_t size)
+{
+ unsigned long *mfn;
+ size_t pages, i;
+ struct page_info *pg;
+ void *va;
+
+ ASSERT(size);
+
+ pages = PFN_UP(size);
+ mfn = xmalloc_array(unsigned long, pages);
+ if ( mfn == NULL )
+ return NULL;
+
+ for ( i = 0; i < pages; i++ )
+ {
+ pg = alloc_domheap_page(NULL, 0);
+ if ( pg == NULL )
+ goto error;
+ mfn[i] = page_to_mfn(pg);
+ }
+
+ va = vmap(mfn, pages);
+ if ( va == NULL )
+ goto error;
+
+ xfree(mfn);
+ return va;
+
+ error:
+ while ( i-- )
+ free_domheap_page(mfn_to_page(mfn[i]));
+ xfree(mfn);
+ return NULL;
+}
+
+void *vzalloc(size_t size)
+{
+ void *p = vmalloc(size);
+ int i;
+
+ if ( p == NULL )
+ return NULL;
+
+ for ( i = 0; i < size; i += PAGE_SIZE )
+ clear_page(p + i);
+
+ return p;
+}
+
+void vfree(void *va)
+{
+ unsigned int i, pages;
+ struct page_info *pg;
+ PAGE_LIST_HEAD(pg_list);
+
+ if ( !va )
+ return;
+
+ pages = vm_size(va);
+ ASSERT(pages);
+
+ for ( i = 0; i < pages; i++ )
+ page_list_add(vmap_to_page(va + i * PAGE_SIZE), &pg_list);
+
+ vunmap(va);
+
+ while ( (pg = page_list_remove_head(&pg_list)) != NULL )
+ free_domheap_page(pg);
+}
#endif
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -208,6 +208,8 @@ static inline void __iomem *ioremap_wc(p
#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
#define paddr_to_pdx(pa) pfn_to_pdx(paddr_to_pfn(pa))
+#define vmap_to_mfn(va) paddr_to_pfn(virt_to_maddr((vaddr_t)va))
+#define vmap_to_page(va) mfn_to_page(vmap_to_mfn(va))
/* Page-align address and convert to frame number format */
#define paddr_to_pfn_aligned(paddr) paddr_to_pfn(PAGE_ALIGN(paddr))
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -262,6 +262,8 @@ void copy_page_sse2(void *, const void *
#define pfn_to_paddr(pfn) __pfn_to_paddr(pfn)
#define paddr_to_pfn(pa) __paddr_to_pfn(pa)
#define paddr_to_pdx(pa) pfn_to_pdx(paddr_to_pfn(pa))
+#define vmap_to_mfn(va) l1e_get_pfn(*virt_to_xen_l1e((unsigned long)(va)))
+#define vmap_to_page(va) mfn_to_page(vmap_to_mfn(va))
#endif /* !defined(__ASSEMBLY__) */
--- a/xen/include/xen/vmap.h
+++ b/xen/include/xen/vmap.h
@@ -11,6 +11,9 @@ void *__vmap(const unsigned long *mfn, u
unsigned int nr, unsigned int align, unsigned int flags);
void *vmap(const unsigned long *mfn, unsigned int nr);
void vunmap(const void *);
+void *vmalloc(size_t size);
+void *vzalloc(size_t size);
+void vfree(void *va);
void __iomem *ioremap(paddr_t, size_t);

View File

@ -1,29 +0,0 @@
# Commit fed56ba0e69b251d0222ef0785cd1c1838f9e51d
# Date 2015-06-02 13:45:03 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
unmodified-drivers: tolerate IRQF_DISABLED being undefined
It's being removed in Linux 4.1.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c
@@ -350,11 +350,13 @@ int xen_irq_init(struct pci_dev *pdev)
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
SA_SHIRQ | SA_SAMPLE_RANDOM | SA_INTERRUPT,
#else
- IRQF_SHARED |
#ifdef IRQF_SAMPLE_RANDOM
IRQF_SAMPLE_RANDOM |
#endif
- IRQF_DISABLED,
+#ifdef IRQF_DISABLED
+ IRQF_DISABLED |
+#endif
+ IRQF_SHARED,
#endif
"xen-platform-pci", pdev);
}

View File

@ -1,164 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 85baced14dec2fafa9fe560969dba2ae28e8bebb
# Date 2015-06-09 15:59:31 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: adjust PV I/O emulation functions' types
admin_io_okay(), guest_io_read(), and guest_io_write() all don't need
their current "regs" parameter at all, and they don't use the vCPU
passed to them for other than obtaining its domain. Drop the former and
replace the latter by a struct domain pointer.
pci_cfg_okay() returns a boolean type, and its "write" parameter is of
boolean kind too.
All of them get called for the current vCPU (and hence current domain)
only, so name the domain parameters accordingly except in the
admin_io_okay() case, which a subsequent patch will use for simplifying
setup_io_bitmap().
Latch current->domain into a local variable in emulate_privileged_op().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
# Commit 2d67a7a4d37a4759bcd7f2ee2d740497ad669c7d
# Date 2015-06-18 15:07:10 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: synchronize PCI config space access decoding
Both PV and HVM logic have similar but not similar enough code here.
Synchronize the two so that
- in the HVM case we don't unconditionally try to access extended
config space
- in the PV case we pass a correct range to the XSM hook
- in the PV case we don't needlessly deny access when the operation
isn't really on PCI config space
All this along with sharing the macros HVM already had here.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Backport stripped down to just the pci_cfg_ok() adjustments.
Index: xen-4.5.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.5.2-testing/xen/arch/x86/traps.c
@@ -1709,14 +1709,18 @@ static int admin_io_okay(
return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
-static int pci_cfg_ok(struct domain *d, int write, int size)
+static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
+ unsigned int start, unsigned int size)
{
uint32_t machine_bdf;
- uint16_t start, end;
- if (!is_hardware_domain(d))
+
+ if ( !is_hardware_domain(currd) )
return 0;
- machine_bdf = (d->arch.pci_cf8 >> 8) & 0xFFFF;
+ if ( !CF8_ENABLED(currd->arch.pci_cf8) )
+ return 1;
+
+ machine_bdf = CF8_BDF(currd->arch.pci_cf8);
if ( write )
{
const unsigned long *ro_map = pci_get_ro_map(0);
@@ -1724,9 +1728,9 @@ static int pci_cfg_ok(struct domain *d,
if ( ro_map && test_bit(machine_bdf, ro_map) )
return 0;
}
- start = d->arch.pci_cf8 & 0xFF;
+ start |= CF8_ADDR_LO(currd->arch.pci_cf8);
/* AMD extended configuration space access? */
- if ( (d->arch.pci_cf8 & 0x0F000000) &&
+ if ( CF8_ADDR_HI(currd->arch.pci_cf8) &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86 >= 0x10 && boot_cpu_data.x86 <= 0x17 )
{
@@ -1735,12 +1739,11 @@ static int pci_cfg_ok(struct domain *d,
if ( rdmsr_safe(MSR_AMD64_NB_CFG, msr_val) )
return 0;
if ( msr_val & (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT) )
- start |= (d->arch.pci_cf8 >> 16) & 0xF00;
+ start |= CF8_ADDR_HI(currd->arch.pci_cf8);
}
- end = start + size - 1;
- if (xsm_pci_config_permission(XSM_HOOK, d, machine_bdf, start, end, write))
- return 0;
- return 1;
+
+ return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+ start, start + size - 1, write);
}
uint32_t guest_io_read(
@@ -1794,7 +1797,7 @@ uint32_t guest_io_read(
size = min(bytes, 4 - (port & 3));
if ( size == 3 )
size = 2;
- if ( pci_cfg_ok(v->domain, 0, size) )
+ if ( pci_cfg_ok(v->domain, 0, port & 3, size) )
sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
}
@@ -1867,7 +1870,7 @@ void guest_io_write(
size = min(bytes, 4 - (port & 3));
if ( size == 3 )
size = 2;
- if ( pci_cfg_ok(v->domain, 1, size) )
+ if ( pci_cfg_ok(v->domain, 1, port & 3, size) )
pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
}
Index: xen-4.5.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.5.2-testing/xen/arch/x86/hvm/hvm.c
@@ -2357,11 +2357,6 @@ void hvm_vcpu_down(struct vcpu *v)
static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
ioreq_t *p)
{
-#define CF8_BDF(cf8) (((cf8) & 0x00ffff00) >> 8)
-#define CF8_ADDR_LO(cf8) ((cf8) & 0x000000fc)
-#define CF8_ADDR_HI(cf8) (((cf8) & 0x0f000000) >> 16)
-#define CF8_ENABLED(cf8) (!!((cf8) & 0x80000000))
-
struct hvm_ioreq_server *s;
uint32_t cf8;
uint8_t type;
@@ -2446,11 +2441,6 @@ static struct hvm_ioreq_server *hvm_sele
}
return d->arch.hvm_domain.default_ioreq_server;
-
-#undef CF8_ADDR_ENABLED
-#undef CF8_ADDR_HI
-#undef CF8_ADDR_LO
-#undef CF8_BDF
}
int hvm_buffered_io_send(ioreq_t *p)
Index: xen-4.5.2-testing/xen/include/asm-x86/pci.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/pci.h
+++ xen-4.5.2-testing/xen/include/asm-x86/pci.h
@@ -1,6 +1,11 @@
#ifndef __X86_PCI_H__
#define __X86_PCI_H__
+#define CF8_BDF(cf8) ( ((cf8) & 0x00ffff00) >> 8)
+#define CF8_ADDR_LO(cf8) ( (cf8) & 0x000000fc)
+#define CF8_ADDR_HI(cf8) ( ((cf8) & 0x0f000000) >> 16)
+#define CF8_ENABLED(cf8) (!!((cf8) & 0x80000000))
+
#define IS_SNB_GFX(id) (id == 0x01068086 || id == 0x01168086 \
|| id == 0x01268086 || id == 0x01028086 \
|| id == 0x01128086 || id == 0x01228086 \

View File

@ -1,99 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 284ffb4f9b0d5c3a33c4c5bd87645d0cc342ca96
# Date 2015-06-11 11:52:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/vMSI-X: support qword MMIO access
The specification explicitly provides for this, so we should have
supported this from the beginning.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -223,7 +223,7 @@ static int msixtbl_read(
unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
- if ( len != 4 || (address & 3) )
+ if ( (len != 4 && len != 8) || (address & (len - 1)) )
return r;
rcu_read_lock(&msixtbl_rcu_lock);
@@ -241,13 +241,25 @@ static int msixtbl_read(
!acc_bit(test, entry, nr_entry, index) )
goto out;
*pval = entry->gentries[nr_entry].msi_ad[index];
+ if ( len == 8 )
+ {
+ if ( index )
+ offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+ else if ( acc_bit(test, entry, nr_entry, 1) )
+ *pval |= (u64)entry->gentries[nr_entry].msi_ad[1] << 32;
+ else
+ goto out;
+ }
}
- else
+ if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
{
virt = msixtbl_addr_to_virt(entry, address);
if ( !virt )
goto out;
- *pval = readl(virt);
+ if ( len == 4 )
+ *pval = readl(virt);
+ else
+ *pval |= (u64)readl(virt) << 32;
}
r = X86EMUL_OKAY;
@@ -268,7 +280,7 @@ static int msixtbl_write(struct vcpu *v,
unsigned long flags, orig;
struct irq_desc *desc;
- if ( len != 4 || (address & 3) )
+ if ( (len != 4 && len != 8) || (address & (len - 1)) )
return r;
rcu_read_lock(&msixtbl_rcu_lock);
@@ -279,16 +291,23 @@ static int msixtbl_write(struct vcpu *v,
nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
- if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+ if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
{
+ index = offset / sizeof(uint32_t);
if ( nr_entry < MAX_MSIX_ACC_ENTRIES )
{
- index = offset / sizeof(uint32_t);
entry->gentries[nr_entry].msi_ad[index] = val;
acc_bit(set, entry, nr_entry, index);
+ if ( len == 8 && !index )
+ {
+ entry->gentries[nr_entry].msi_ad[1] = val >> 32;
+ acc_bit(set, entry, nr_entry, 1);
+ }
}
set_bit(nr_entry, &entry->table_flags);
- goto out;
+ if ( len != 8 || !index )
+ goto out;
+ val >>= 32;
}
/* Exit to device model when unmasking and address/data got modified. */
@@ -352,7 +371,8 @@ static int msixtbl_write(struct vcpu *v,
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
- r = X86EMUL_OKAY;
+ if ( len == 4 )
+ r = X86EMUL_OKAY;
out:
rcu_read_unlock(&msixtbl_rcu_lock);

View File

@ -1,551 +0,0 @@
# Commit b4650e9a96d78b87ccf7deb4f74733ccfcc64db5
# Date 2015-06-15 13:22:07 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: per-active entry locking
Introduce a per-active entry spin lock to protect active entry state
The grant table lock must be locked before acquiring (locking) an
active entry.
This is a step in reducing contention on the grant table lock, but
will only do so once the grant table lock is turned into a read-write
lock.
Based on a patch originally by Matt Wilson <msw@amazon.com>.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -63,6 +63,7 @@ is complete.
act->domid : remote domain being granted rights
act->frame : machine frame being granted
act->pin : used to hold reference counts
+ act->lock : spinlock used to serialize access to active entry state
Map tracking
~~~~~~~~~~~~
@@ -74,7 +75,46 @@ is complete.
matching map track entry is then removed, as if unmap had been invoked.
These are not used by the transfer mechanism.
map->domid : owner of the mapped frame
- map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+ map->ref : grant reference
+ map->flags : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+ grant_table->lock : lock used to prevent readers from accessing
+ inconsistent grant table state such as current
+ version, partially initialized active table pages,
+ etc.
+ active_grant_entry->lock : spinlock used to serialize modifications to
+ active entries
+
+ The primary lock for the grant table is a spinlock. All functions
+ that access members of struct grant_table must acquire the lock
+ around critical sections.
+
+ Active entries are obtained by calling active_entry_acquire(gt, ref).
+ This function returns a pointer to the active entry after locking its
+ spinlock. The caller must hold the grant table lock for the gt in
+ question before calling active_entry_acquire(). This is because the
+ grant table can be dynamically extended via gnttab_grow_table() while
+ a domain is running and must be fully initialized. Once all access to
+ the active entry is complete, release the lock by calling
+ active_entry_release(act).
+
+ Summary of rules for locking:
+ active_entry_acquire() and active_entry_release() can only be
+ called when holding the relevant grant table's lock. I.e.:
+ spin_lock(&gt->lock);
+ act = active_entry_acquire(gt, ref);
+ ...
+ active_entry_release(act);
+ spin_unlock(&gt->lock);
+
+ Active entries cannot be acquired while holding the maptrack lock.
+ Multiple active entries can be acquired while holding the grant table
+ lock.
********************************************************************************
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -157,10 +157,13 @@ struct active_grant_entry {
in the page. */
unsigned length:16; /* For sub-page grants, the length of the
grant. */
+ spinlock_t lock; /* lock to protect access of this entry.
+ see docs/misc/grant-tables.txt for
+ locking protocol */
};
#define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
-#define active_entry(t, e) \
+#define _active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
static inline void gnttab_flush_tlb(const struct domain *d)
@@ -188,6 +191,24 @@ nr_active_grant_frames(struct grant_tabl
return num_act_frames_from_sha_frames(nr_grant_frames(gt));
}
+static inline struct active_grant_entry *
+active_entry_acquire(struct grant_table *t, grant_ref_t e)
+{
+ struct active_grant_entry *act;
+
+ ASSERT(spin_is_locked(&t->lock));
+
+ act = &_active_entry(t, e);
+ spin_lock(&act->lock);
+
+ return act;
+}
+
+static inline void active_entry_release(struct active_grant_entry *act)
+{
+ spin_unlock(&act->lock);
+}
+
/* Check if the page has been paged out, or needs unsharing.
If rc == GNTST_okay, *page contains the page struct with a ref taken.
Caller must do put_page(*page).
@@ -505,7 +526,6 @@ static int grant_map_exists(const struct
unsigned long mfn,
unsigned int *ref_count)
{
- const struct active_grant_entry *act;
unsigned int ref, max_iter;
ASSERT(spin_is_locked(&rgt->lock));
@@ -514,18 +534,19 @@ static int grant_map_exists(const struct
nr_grant_entries(rgt));
for ( ref = *ref_count; ref < max_iter; ref++ )
{
- act = &active_entry(rgt, ref);
+ struct active_grant_entry *act;
+ bool_t exists;
- if ( !act->pin )
- continue;
+ act = active_entry_acquire(rgt, ref);
- if ( act->domid != ld->domain_id )
- continue;
+ exists = act->pin
+ && act->domid == ld->domain_id
+ && act->frame == mfn;
- if ( act->frame != mfn )
- continue;
+ active_entry_release(act);
- return 0;
+ if ( exists )
+ return 0;
}
if ( ref < nr_grant_entries(rgt) )
@@ -546,13 +567,24 @@ static void mapcount(
*wrc = *rdc = 0;
+ /*
+ * Must have the local domain's grant table lock when iterating
+ * over its maptrack entries.
+ */
+ ASSERT(spin_is_locked(&lgt->lock));
+ /*
+ * Must have the remote domain's grant table lock while counting
+ * its active entries.
+ */
+ ASSERT(spin_is_locked(&rd->grant_table->lock));
+
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
map = &maptrack_entry(lgt, handle);
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
map->domid != rd->domain_id )
continue;
- if ( active_entry(rd->grant_table, map->ref).frame == mfn )
+ if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
(map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++;
}
}
@@ -639,7 +671,7 @@ __gnttab_map_grant_ref(
if ( unlikely(op->ref >= nr_grant_entries(rgt)))
PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
shah = shared_entry_header(rgt, op->ref);
if (rgt->gt_version == 1) {
sha1 = &shared_entry_v1(rgt, op->ref);
@@ -656,7 +688,7 @@ __gnttab_map_grant_ref(
((act->domid != ld->domain_id) ||
(act->pin & 0x80808080U) != 0 ||
(act->is_sub_page)) )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
act->domid, ld->domain_id, act->pin, act->is_sub_page);
@@ -667,7 +699,7 @@ __gnttab_map_grant_ref(
if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
op->flags & GNTMAP_readonly,
1, shah, act, status) ) != GNTST_okay )
- goto unlock_out;
+ goto act_release_out;
if ( !act->pin )
{
@@ -702,6 +734,7 @@ __gnttab_map_grant_ref(
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+ active_entry_release(act);
spin_unlock(&rgt->lock);
/* pg may be set, with a refcount included, from __get_paged_frame */
@@ -839,7 +872,7 @@ __gnttab_map_grant_ref(
spin_lock(&rgt->lock);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
if ( op->flags & GNTMAP_device_map )
act->pin -= (op->flags & GNTMAP_readonly) ?
@@ -856,6 +889,9 @@ __gnttab_map_grant_ref(
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
+ act_release_out:
+ active_entry_release(act);
+
unlock_out:
spin_unlock(&rgt->lock);
op->status = rc;
@@ -950,7 +986,7 @@ __gnttab_unmap_common(
}
op->rd = rd;
- act = &active_entry(rgt, op->map->ref);
+ act = active_entry_acquire(rgt, op->map->ref);
if ( op->frame == 0 )
{
@@ -959,7 +995,7 @@ __gnttab_unmap_common(
else
{
if ( unlikely(op->frame != act->frame) )
- PIN_FAIL(unmap_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad frame number doesn't match gntref. (%lx != %lx)\n",
op->frame, act->frame);
if ( op->flags & GNTMAP_device_map )
@@ -978,7 +1014,7 @@ __gnttab_unmap_common(
if ( (rc = replace_grant_host_mapping(op->host_addr,
op->frame, op->new_addr,
op->flags)) < 0 )
- goto unmap_out;
+ goto act_release_out;
ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
op->map->flags &= ~GNTMAP_host_map;
@@ -1000,7 +1036,7 @@ __gnttab_unmap_common(
if ( err )
{
rc = GNTST_general_error;
- goto unmap_out;
+ goto act_release_out;
}
}
@@ -1008,8 +1044,11 @@ __gnttab_unmap_common(
if ( !(op->flags & GNTMAP_readonly) )
gnttab_mark_dirty(rd, op->frame);
+ act_release_out:
+ active_entry_release(act);
unmap_out:
double_gt_unlock(lgt, rgt);
+
op->status = rc;
rcu_unlock_domain(rd);
}
@@ -1042,9 +1081,9 @@ __gnttab_unmap_common_complete(struct gn
spin_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- goto unmap_out;
+ goto unlock_out;
- act = &active_entry(rgt, op->map->ref);
+ act = active_entry_acquire(rgt, op->map->ref);
sha = shared_entry_header(rgt, op->map->ref);
if ( rgt->gt_version == 1 )
@@ -1058,7 +1097,7 @@ __gnttab_unmap_common_complete(struct gn
* Suggests that __gntab_unmap_common failed early and so
* nothing further to do
*/
- goto unmap_out;
+ goto act_release_out;
}
pg = mfn_to_page(op->frame);
@@ -1082,7 +1121,7 @@ __gnttab_unmap_common_complete(struct gn
* Suggests that __gntab_unmap_common failed in
* replace_grant_host_mapping() so nothing further to do
*/
- goto unmap_out;
+ goto act_release_out;
}
if ( !is_iomem_page(op->frame) )
@@ -1103,8 +1142,11 @@ __gnttab_unmap_common_complete(struct gn
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
- unmap_out:
+ act_release_out:
+ active_entry_release(act);
+ unlock_out:
spin_unlock(&rgt->lock);
+
if ( put_handle )
{
op->map->flags = 0;
@@ -1296,7 +1338,7 @@ gnttab_grow_table(struct domain *d, unsi
/* d's grant table lock must be held by the caller */
struct grant_table *gt = d->grant_table;
- unsigned int i;
+ unsigned int i, j;
ASSERT(req_nr_frames <= max_grant_frames);
@@ -1311,6 +1353,8 @@ gnttab_grow_table(struct domain *d, unsi
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
clear_page(gt->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(&gt->active[i][j].lock);
}
/* Shared */
@@ -1805,7 +1849,7 @@ __release_grant_for_copy(
spin_lock(&rgt->lock);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
r_frame = act->frame;
@@ -1844,6 +1888,7 @@ __release_grant_for_copy(
released_read = 1;
}
+ active_entry_release(act);
spin_unlock(&rgt->lock);
if ( td != rd )
@@ -1905,14 +1950,14 @@ __acquire_grant_for_copy(
spin_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(gt_unlock_out, GNTST_general_error,
"remote grant table not ready\n");
if ( unlikely(gref >= nr_grant_entries(rgt)) )
- PIN_FAIL(unlock_out, GNTST_bad_gntref,
+ PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
"Bad grant reference %ld\n", gref);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
shah = shared_entry_header(rgt, gref);
if ( rgt->gt_version == 1 )
{
@@ -1971,6 +2016,13 @@ __acquire_grant_for_copy(
PIN_FAIL(unlock_out_clear, GNTST_general_error,
"transitive grant referenced bad domain %d\n",
trans_domid);
+
+ /*
+ * __acquire_grant_for_copy() could take the lock on the
+ * remote table (if rd == td), so we have to drop the lock
+ * here and reacquire
+ */
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
@@ -1978,9 +2030,12 @@ __acquire_grant_for_copy(
&trans_page_off, &trans_length, 0);
spin_lock(&rgt->lock);
+ act = active_entry_acquire(rgt, gref);
+
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
return rc;
}
@@ -1993,6 +2048,7 @@ __acquire_grant_for_copy(
{
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
@@ -2061,6 +2117,7 @@ __acquire_grant_for_copy(
*length = act->length;
*frame = act->frame;
+ active_entry_release(act);
spin_unlock(&rgt->lock);
return rc;
@@ -2073,7 +2130,11 @@ __acquire_grant_for_copy(
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
+ active_entry_release(act);
+
+ gt_unlock_out:
spin_unlock(&rgt->lock);
+
return rc;
}
@@ -2373,7 +2434,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
gnttab_set_version_t op;
struct domain *d = current->domain;
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
long res;
int i;
@@ -2398,8 +2458,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
{
for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
{
- act = &active_entry(gt, i);
- if ( act->pin != 0 )
+ if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
{
gdprintk(XENLOG_WARNING,
"tried to change grant table version from %d to %d, but some grant entries still in use\n",
@@ -2586,7 +2645,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
{
struct domain *d = rcu_lock_current_domain();
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
+ struct active_grant_entry *act_a = NULL;
+ struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
spin_lock(&gt->lock);
@@ -2600,12 +2660,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b);
- act = &active_entry(gt, ref_a);
- if ( act->pin )
+ /* Swapping the same ref is a no-op. */
+ if ( ref_a == ref_b )
+ goto out;
+
+ act_a = active_entry_acquire(gt, ref_a);
+ if ( act_a->pin )
PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a);
- act = &active_entry(gt, ref_b);
- if ( act->pin )
+ act_b = active_entry_acquire(gt, ref_b);
+ if ( act_b->pin )
PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b);
if ( gt->gt_version == 1 )
@@ -2632,6 +2696,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
}
out:
+ if ( act_b != NULL )
+ active_entry_release(act_b);
+ if ( act_a != NULL )
+ active_entry_release(act_a);
spin_unlock(&gt->lock);
rcu_unlock_domain(d);
@@ -2941,7 +3009,7 @@ grant_table_create(
struct domain *d)
{
struct grant_table *t;
- int i;
+ unsigned int i, j;
if ( (t = xzalloc(struct grant_table)) == NULL )
goto no_mem_0;
@@ -2960,6 +3028,8 @@ grant_table_create(
if ( (t->active[i] = alloc_xenheap_page()) == NULL )
goto no_mem_2;
clear_page(t->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(&t->active[i][j].lock);
}
/* Tracking of mapped foreign frames table */
@@ -3056,7 +3126,7 @@ gnttab_release_mappings(
rgt = rd->grant_table;
spin_lock(&rgt->lock);
- act = &active_entry(rgt, ref);
+ act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
if (rgt->gt_version == 1)
status = &sha->flags;
@@ -3114,6 +3184,7 @@ gnttab_release_mappings(
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rcu_unlock_domain(rd);
@@ -3176,9 +3247,12 @@ static void gnttab_usage_print(struct do
uint16_t status;
uint64_t frame;
- act = &active_entry(gt, ref);
+ act = active_entry_acquire(gt, ref);
if ( !act->pin )
+ {
+ active_entry_release(act);
continue;
+ }
sha = shared_entry_header(gt, ref);
@@ -3208,6 +3282,7 @@ static void gnttab_usage_print(struct do
printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n",
ref, act->domid, act->frame, act->pin,
sha->domid, frame, status);
+ active_entry_release(act);
}
out:

View File

@ -1,86 +0,0 @@
# Commit 5a9899ddc42040e139233a6b1f0f65f3b65eda6d
# Date 2015-06-15 13:23:34 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: introduce maptrack lock
Split grant table lock into two separate locks. One to protect
maptrack free list (maptrack_lock) and one for everything else (lock).
Based on a patch originally by Matt Wilson <msw@amazon.com>.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -87,6 +87,7 @@ is complete.
inconsistent grant table state such as current
version, partially initialized active table pages,
etc.
+ grant_table->maptrack_lock : spinlock used to protect the maptrack free list
active_grant_entry->lock : spinlock used to serialize modifications to
active entries
@@ -94,6 +95,9 @@ is complete.
that access members of struct grant_table must acquire the lock
around critical sections.
+ The maptrack free list is protected by its own spinlock. The maptrack
+ lock may be locked while holding the grant table lock.
+
Active entries are obtained by calling active_entry_acquire(gt, ref).
This function returns a pointer to the active entry after locking its
spinlock. The caller must hold the grant table lock for the gt in
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -288,10 +288,10 @@ static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
{
- spin_lock(&t->lock);
+ spin_lock(&t->maptrack_lock);
maptrack_entry(t, handle).ref = t->maptrack_head;
t->maptrack_head = handle;
- spin_unlock(&t->lock);
+ spin_unlock(&t->maptrack_lock);
}
static inline int
@@ -303,7 +303,7 @@ get_maptrack_handle(
struct grant_mapping *new_mt;
unsigned int new_mt_limit, nr_frames;
- spin_lock(&lgt->lock);
+ spin_lock(&lgt->maptrack_lock);
while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
{
@@ -332,7 +332,7 @@ get_maptrack_handle(
nr_frames + 1);
}
- spin_unlock(&lgt->lock);
+ spin_unlock(&lgt->maptrack_lock);
return handle;
}
@@ -3016,6 +3016,7 @@ grant_table_create(
/* Simple stuff. */
spin_lock_init(&t->lock);
+ spin_lock_init(&t->maptrack_lock);
t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
/* Active grant table. */
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -82,6 +82,8 @@ struct grant_table {
struct grant_mapping **maptrack;
unsigned int maptrack_head;
unsigned int maptrack_limit;
+ /* Lock protecting the maptrack page list, head, and limit */
+ spinlock_t maptrack_lock;
/* Lock protecting updates to active and shared grant tables. */
spinlock_t lock;
/* The defined versions are 1 and 2. Set to 0 if we don't know

View File

@ -1,733 +0,0 @@
# Commit 40de9fffb4cc0b0485aa3391d72e2220b8e1ce12
# Date 2015-06-15 13:25:20 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: make the grant table lock a read-write lock
In combination with the per-active entry locks, the grant table lock
can be made a read-write lock since the majority of cases only the
read lock is required. The grant table read lock protects against
changes to the table version or size (which are done with the write
lock held).
The write lock is also required when two active entries must be
acquired.
The double lock is still required when updating IOMMU page tables.
With the lock contention being only on the maptrack lock (unless IOMMU
updates are required), performance and scalability is improved.
Based on a patch originally by Matt Wilson <msw@amazon.com>.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -83,7 +83,7 @@ is complete.
~~~~~~~
Xen uses several locks to serialize access to the internal grant table state.
- grant_table->lock : lock used to prevent readers from accessing
+ grant_table->lock : rwlock used to prevent readers from accessing
inconsistent grant table state such as current
version, partially initialized active table pages,
etc.
@@ -91,34 +91,43 @@ is complete.
active_grant_entry->lock : spinlock used to serialize modifications to
active entries
- The primary lock for the grant table is a spinlock. All functions
- that access members of struct grant_table must acquire the lock
- around critical sections.
+ The primary lock for the grant table is a read/write spinlock. All
+ functions that access members of struct grant_table must acquire a
+ read lock around critical sections. Any modification to the members
+ of struct grant_table (e.g., nr_status_frames, nr_grant_frames,
+ active frames, etc.) must only be made if the write lock is
+ held. These elements are read-mostly, and read critical sections can
+ be large, which makes a rwlock a good choice.
The maptrack free list is protected by its own spinlock. The maptrack
lock may be locked while holding the grant table lock.
Active entries are obtained by calling active_entry_acquire(gt, ref).
This function returns a pointer to the active entry after locking its
- spinlock. The caller must hold the grant table lock for the gt in
- question before calling active_entry_acquire(). This is because the
- grant table can be dynamically extended via gnttab_grow_table() while
- a domain is running and must be fully initialized. Once all access to
- the active entry is complete, release the lock by calling
- active_entry_release(act).
+ spinlock. The caller must hold the grant table read lock before
+ calling active_entry_acquire(). This is because the grant table can
+ be dynamically extended via gnttab_grow_table() while a domain is
+ running and must be fully initialized. Once all access to the active
+ entry is complete, release the lock by calling active_entry_release(act).
Summary of rules for locking:
active_entry_acquire() and active_entry_release() can only be
- called when holding the relevant grant table's lock. I.e.:
- spin_lock(&gt->lock);
+ called when holding the relevant grant table's read lock. I.e.:
+ read_lock(&gt->lock);
act = active_entry_acquire(gt, ref);
...
active_entry_release(act);
- spin_unlock(&gt->lock);
+ read_unlock(&gt->lock);
Active entries cannot be acquired while holding the maptrack lock.
Multiple active entries can be acquired while holding the grant table
- lock.
+ _write_ lock.
+
+ Maptrack entries are protected by the corresponding active entry
+ lock. As an exception, new maptrack entries may be populated without
+ holding the lock, provided the flags field is written last. This
+ requires any maptrack entry user validates the flags field as
+ non-zero first.
********************************************************************************
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1037,7 +1037,7 @@ int xenmem_add_to_physmap_one(
switch ( space )
{
case XENMAPSPACE_grant_table:
- spin_lock(&d->grant_table->lock);
+ write_lock(&d->grant_table->lock);
if ( d->grant_table->gt_version == 0 )
d->grant_table->gt_version = 1;
@@ -1067,7 +1067,7 @@ int xenmem_add_to_physmap_one(
t = p2m_ram_rw;
- spin_unlock(&d->grant_table->lock);
+ write_unlock(&d->grant_table->lock);
break;
case XENMAPSPACE_shared_info:
if ( idx != 0 )
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4595,7 +4595,7 @@ int xenmem_add_to_physmap_one(
mfn = virt_to_mfn(d->shared_info);
break;
case XENMAPSPACE_grant_table:
- spin_lock(&d->grant_table->lock);
+ write_lock(&d->grant_table->lock);
if ( d->grant_table->gt_version == 0 )
d->grant_table->gt_version = 1;
@@ -4617,7 +4617,7 @@ int xenmem_add_to_physmap_one(
mfn = virt_to_mfn(d->grant_table->shared_raw[idx]);
}
- spin_unlock(&d->grant_table->lock);
+ write_unlock(&d->grant_table->lock);
break;
case XENMAPSPACE_gmfn_range:
case XENMAPSPACE_gmfn:
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -196,7 +196,7 @@ active_entry_acquire(struct grant_table
{
struct active_grant_entry *act;
- ASSERT(spin_is_locked(&t->lock));
+ ASSERT(rw_is_locked(&t->lock));
act = &_active_entry(t, e);
spin_lock(&act->lock);
@@ -252,25 +252,29 @@ static int __get_paged_frame(unsigned lo
static inline void
double_gt_lock(struct grant_table *lgt, struct grant_table *rgt)
{
+ /*
+ * See mapcount() for why the write lock is also required for the
+ * remote domain.
+ */
if ( lgt < rgt )
{
- spin_lock(&lgt->lock);
- spin_lock(&rgt->lock);
+ write_lock(&lgt->lock);
+ write_lock(&rgt->lock);
}
else
{
if ( lgt != rgt )
- spin_lock(&rgt->lock);
- spin_lock(&lgt->lock);
+ write_lock(&rgt->lock);
+ write_lock(&lgt->lock);
}
}
static inline void
double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
{
- spin_unlock(&lgt->lock);
+ write_unlock(&lgt->lock);
if ( lgt != rgt )
- spin_unlock(&rgt->lock);
+ write_unlock(&rgt->lock);
}
static inline int
@@ -528,7 +532,7 @@ static int grant_map_exists(const struct
{
unsigned int ref, max_iter;
- ASSERT(spin_is_locked(&rgt->lock));
+ ASSERT(rw_is_locked(&rgt->lock));
max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
nr_grant_entries(rgt));
@@ -568,15 +572,15 @@ static void mapcount(
*wrc = *rdc = 0;
/*
- * Must have the local domain's grant table lock when iterating
- * over its maptrack entries.
+ * Must have the local domain's grant table write lock when
+ * iterating over its maptrack entries.
*/
- ASSERT(spin_is_locked(&lgt->lock));
+ ASSERT(rw_is_write_locked(&lgt->lock));
/*
- * Must have the remote domain's grant table lock while counting
- * its active entries.
+ * Must have the remote domain's grant table write lock while
+ * counting its active entries.
*/
- ASSERT(spin_is_locked(&rd->grant_table->lock));
+ ASSERT(rw_is_write_locked(&rd->grant_table->lock));
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
@@ -616,6 +620,7 @@ __gnttab_map_grant_ref(
grant_entry_v2_t *sha2;
grant_entry_header_t *shah;
uint16_t *status;
+ bool_t need_iommu;
led = current;
ld = led->domain;
@@ -661,7 +666,7 @@ __gnttab_map_grant_ref(
}
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
PIN_FAIL(unlock_out, GNTST_general_error,
@@ -735,7 +740,7 @@ __gnttab_map_grant_ref(
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
/* pg may be set, with a refcount included, from __get_paged_frame */
if ( !pg )
@@ -811,12 +816,14 @@ __gnttab_map_grant_ref(
goto undo_out;
}
- double_gt_lock(lgt, rgt);
-
- if ( gnttab_need_iommu_mapping(ld) )
+ need_iommu = gnttab_need_iommu_mapping(ld);
+ if ( need_iommu )
{
unsigned int wrc, rdc;
int err = 0;
+
+ double_gt_lock(lgt, rgt);
+
/* We're not translated, so we know that gmfns and mfns are
the same things, so the IOMMU entry is always 1-to-1. */
mapcount(lgt, rd, frame, &wrc, &rdc);
@@ -842,12 +849,22 @@ __gnttab_map_grant_ref(
TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
+ /*
+ * All maptrack entry users check mt->flags first before using the
+ * other fields so just ensure the flags field is stored last.
+ *
+ * However, if gnttab_need_iommu_mapping() then this would race
+ * with a concurrent mapcount() call (on an unmap, for example)
+ * and a lock is required.
+ */
mt = &maptrack_entry(lgt, handle);
mt->domid = op->dom;
mt->ref = op->ref;
- mt->flags = op->flags;
+ wmb();
+ write_atomic(&mt->flags, op->flags);
- double_gt_unlock(lgt, rgt);
+ if ( need_iommu )
+ double_gt_unlock(lgt, rgt);
op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
op->handle = handle;
@@ -870,7 +887,7 @@ __gnttab_map_grant_ref(
put_page(pg);
}
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, op->ref);
@@ -893,7 +910,7 @@ __gnttab_map_grant_ref(
active_entry_release(act);
unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
op->status = rc;
put_maptrack_handle(lgt, handle);
rcu_unlock_domain(rd);
@@ -943,18 +960,19 @@ __gnttab_unmap_common(
}
op->map = &maptrack_entry(lgt, op->handle);
- spin_lock(&lgt->lock);
- if ( unlikely(!op->map->flags) )
+ read_lock(&lgt->lock);
+
+ if ( unlikely(!read_atomic(&op->map->flags)) )
{
- spin_unlock(&lgt->lock);
+ read_unlock(&lgt->lock);
gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
op->status = GNTST_bad_handle;
return;
}
dom = op->map->domid;
- spin_unlock(&lgt->lock);
+ read_unlock(&lgt->lock);
if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
{
@@ -975,9 +993,10 @@ __gnttab_unmap_common(
TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom);
rgt = rd->grant_table;
- double_gt_lock(lgt, rgt);
- op->flags = op->map->flags;
+ read_lock(&rgt->lock);
+
+ op->flags = read_atomic(&op->map->flags);
if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) )
{
gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle);
@@ -1024,31 +1043,34 @@ __gnttab_unmap_common(
act->pin -= GNTPIN_hstw_inc;
}
- if ( gnttab_need_iommu_mapping(ld) )
+ act_release_out:
+ active_entry_release(act);
+ unmap_out:
+ read_unlock(&rgt->lock);
+
+ if ( rc == GNTST_okay && gnttab_need_iommu_mapping(ld) )
{
unsigned int wrc, rdc;
int err = 0;
+
+ double_gt_lock(lgt, rgt);
+
mapcount(lgt, rd, op->frame, &wrc, &rdc);
if ( (wrc + rdc) == 0 )
err = iommu_unmap_page(ld, op->frame);
else if ( wrc == 0 )
err = iommu_map_page(ld, op->frame, op->frame, IOMMUF_readable);
+
+ double_gt_unlock(lgt, rgt);
+
if ( err )
- {
rc = GNTST_general_error;
- goto act_release_out;
- }
}
/* If just unmapped a writable mapping, mark as dirtied */
- if ( !(op->flags & GNTMAP_readonly) )
+ if ( rc == GNTST_okay && !(op->flags & GNTMAP_readonly) )
gnttab_mark_dirty(rd, op->frame);
- act_release_out:
- active_entry_release(act);
- unmap_out:
- double_gt_unlock(lgt, rgt);
-
op->status = rc;
rcu_unlock_domain(rd);
}
@@ -1078,8 +1100,8 @@ __gnttab_unmap_common_complete(struct gn
rcu_lock_domain(rd);
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
goto unlock_out;
@@ -1145,7 +1167,7 @@ __gnttab_unmap_common_complete(struct gn
act_release_out:
active_entry_release(act);
unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
if ( put_handle )
{
@@ -1332,11 +1354,13 @@ gnttab_unpopulate_status_frames(struct d
gt->nr_status_frames = 0;
}
+/*
+ * Grow the grant table. The caller must hold the grant table's
+ * write lock before calling this function.
+ */
int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
{
- /* d's grant table lock must be held by the caller */
-
struct grant_table *gt = d->grant_table;
unsigned int i, j;
@@ -1442,7 +1466,7 @@ gnttab_setup_table(
}
gt = d->grant_table;
- spin_lock(&gt->lock);
+ write_lock(&gt->lock);
if ( gt->gt_version == 0 )
gt->gt_version = 1;
@@ -1470,7 +1494,7 @@ gnttab_setup_table(
}
out3:
- spin_unlock(&gt->lock);
+ write_unlock(&gt->lock);
out2:
rcu_unlock_domain(d);
out1:
@@ -1512,13 +1536,13 @@ gnttab_query_size(
goto query_out_unlock;
}
- spin_lock(&d->grant_table->lock);
+ read_lock(&d->grant_table->lock);
op.nr_frames = nr_grant_frames(d->grant_table);
op.max_nr_frames = max_grant_frames;
op.status = GNTST_okay;
- spin_unlock(&d->grant_table->lock);
+ read_unlock(&d->grant_table->lock);
query_out_unlock:
@@ -1544,7 +1568,7 @@ gnttab_prepare_for_transfer(
union grant_combo scombo, prev_scombo, new_scombo;
int retries = 0;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
{
@@ -1595,11 +1619,11 @@ gnttab_prepare_for_transfer(
scombo = prev_scombo;
}
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return 1;
fail:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return 0;
}
@@ -1614,6 +1638,7 @@ gnttab_transfer(
struct gnttab_transfer gop;
unsigned long mfn;
unsigned int max_bitsize;
+ struct active_grant_entry *act;
for ( i = 0; i < count; i++ )
{
@@ -1791,7 +1816,8 @@ gnttab_transfer(
TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
/* Tell the guest about its new page frame. */
- spin_lock(&e->grant_table->lock);
+ read_lock(&e->grant_table->lock);
+ act = active_entry_acquire(e->grant_table, gop.ref);
if ( e->grant_table->gt_version == 1 )
{
@@ -1809,7 +1835,8 @@ gnttab_transfer(
shared_entry_header(e->grant_table, gop.ref)->flags |=
GTF_transfer_completed;
- spin_unlock(&e->grant_table->lock);
+ active_entry_release(act);
+ read_unlock(&e->grant_table->lock);
rcu_unlock_domain(e);
@@ -1847,7 +1874,7 @@ __release_grant_for_copy(
released_read = 0;
released_write = 0;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
@@ -1889,7 +1916,7 @@ __release_grant_for_copy(
}
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
if ( td != rd )
{
@@ -1947,7 +1974,7 @@ __acquire_grant_for_copy(
*page = NULL;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
PIN_FAIL(gt_unlock_out, GNTST_general_error,
@@ -2023,20 +2050,20 @@ __acquire_grant_for_copy(
* here and reacquire
*/
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
readonly, &grant_frame, page,
&trans_page_off, &trans_length, 0);
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, gref);
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
}
@@ -2049,7 +2076,7 @@ __acquire_grant_for_copy(
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
frame, page, page_off, length,
@@ -2118,7 +2145,7 @@ __acquire_grant_for_copy(
*frame = act->frame;
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
unlock_out_clear:
@@ -2133,7 +2160,7 @@ __acquire_grant_for_copy(
active_entry_release(act);
gt_unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
}
@@ -2449,7 +2476,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
if ( gt->gt_version == op.version )
goto out;
- spin_lock(&gt->lock);
+ write_lock(&gt->lock);
/* Make sure that the grant table isn't currently in use when we
change the version number, except for the first 8 entries which
are allowed to be in use (xenstore/xenconsole keeps them mapped).
@@ -2534,7 +2561,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
gt->gt_version = op.version;
out_unlock:
- spin_unlock(&gt->lock);
+ write_unlock(&gt->lock);
out:
op.version = gt->gt_version;
@@ -2590,7 +2617,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
op.status = GNTST_okay;
- spin_lock(&gt->lock);
+ read_lock(&gt->lock);
for ( i = 0; i < op.nr_frames; i++ )
{
@@ -2599,7 +2626,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
op.status = GNTST_bad_virt_addr;
}
- spin_unlock(&gt->lock);
+ read_unlock(&gt->lock);
out2:
rcu_unlock_domain(d);
out1:
@@ -2649,7 +2676,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
- spin_lock(&gt->lock);
+ write_lock(&gt->lock);
if ( gt->gt_version == 0 )
PIN_FAIL(out, GNTST_general_error, "grant table not yet set up\n");
@@ -2700,7 +2727,7 @@ out:
active_entry_release(act_b);
if ( act_a != NULL )
active_entry_release(act_a);
- spin_unlock(&gt->lock);
+ write_unlock(&gt->lock);
rcu_unlock_domain(d);
@@ -2771,12 +2798,12 @@ static int __gnttab_cache_flush(gnttab_c
if ( d != owner )
{
- spin_lock(&owner->grant_table->lock);
+ read_lock(&owner->grant_table->lock);
ret = grant_map_exists(d, owner->grant_table, mfn, ref_count);
if ( ret != 0 )
{
- spin_unlock(&owner->grant_table->lock);
+ read_unlock(&owner->grant_table->lock);
rcu_unlock_domain(d);
put_page(page);
return ret;
@@ -2796,7 +2823,7 @@ static int __gnttab_cache_flush(gnttab_c
ret = 0;
if ( d != owner )
- spin_unlock(&owner->grant_table->lock);
+ read_unlock(&owner->grant_table->lock);
unmap_domain_page(v);
put_page(page);
@@ -3015,7 +3042,7 @@ grant_table_create(
goto no_mem_0;
/* Simple stuff. */
- spin_lock_init(&t->lock);
+ rwlock_init(&t->lock);
spin_lock_init(&t->maptrack_lock);
t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
@@ -3125,7 +3152,7 @@ gnttab_release_mappings(
}
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
@@ -3186,7 +3213,7 @@ gnttab_release_mappings(
gnttab_clear_flag(_GTF_reading, status);
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
rcu_unlock_domain(rd);
@@ -3234,7 +3261,7 @@ static void gnttab_usage_print(struct do
printk(" -------- active -------- -------- shared --------\n");
printk("[ref] localdom mfn pin localdom gmfn flags\n");
- spin_lock(&gt->lock);
+ read_lock(&gt->lock);
if ( gt->gt_version == 0 )
goto out;
@@ -3287,7 +3314,7 @@ static void gnttab_usage_print(struct do
}
out:
- spin_unlock(&gt->lock);
+ read_unlock(&gt->lock);
if ( first )
printk("grant-table for remote domain:%5d ... "
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -64,6 +64,11 @@ struct grant_mapping {
/* Per-domain grant information. */
struct grant_table {
+ /*
+ * Lock protecting updates to grant table state (version, active
+ * entry list, etc.)
+ */
+ rwlock_t lock;
/* Table size. Number of frames shared with guest */
unsigned int nr_grant_frames;
/* Shared grant table (see include/public/grant_table.h). */
@@ -84,8 +89,6 @@ struct grant_table {
unsigned int maptrack_limit;
/* Lock protecting the maptrack page list, head, and limit */
spinlock_t maptrack_lock;
- /* Lock protecting updates to active and shared grant tables. */
- spinlock_t lock;
/* The defined versions are 1 and 2. Set to 0 if we don't know
what version to use yet. */
unsigned gt_version;
@@ -103,7 +106,7 @@ gnttab_release_mappings(
struct domain *d);
/* Increase the size of a domain's grant table.
- * Caller must hold d's grant table lock.
+ * Caller must hold d's grant table write lock.
*/
int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames);

View File

@ -1,47 +0,0 @@
# Commit a622b5ade2bdf79ad95e6088a4041e75253c43f3
# Date 2015-06-16 12:30:16 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: factor out freeing an event channel
We're going to want to free an event channel from two places. Factor out
the code into a free_evtchn() function.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -194,6 +194,17 @@ static int get_free_port(struct domain *
return port;
}
+static void free_evtchn(struct domain *d, struct evtchn *chn)
+{
+ /* Clear pending event to avoid unexpected behavior on re-bind. */
+ evtchn_port_clear_pending(d, chn);
+
+ /* Reset binding to vcpu0 when the channel is freed. */
+ chn->state = ECS_FREE;
+ chn->notify_vcpu_id = 0;
+
+ xsm_evtchn_close_post(chn);
+}
static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
{
@@ -568,14 +579,7 @@ static long __evtchn_close(struct domain
BUG();
}
- /* Clear pending event to avoid unexpected behavior on re-bind. */
- evtchn_port_clear_pending(d1, chn1);
-
- /* Reset binding to vcpu0 when the channel is freed. */
- chn1->state = ECS_FREE;
- chn1->notify_vcpu_id = 0;
-
- xsm_evtchn_close_post(chn1);
+ free_evtchn(d1, chn1);
out:
if ( d2 != NULL )

View File

@ -1,63 +0,0 @@
# Commit 01280dc19cf3da089f98faf4f524b54b5a191df0
# Date 2015-06-18 14:53:23 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: simplify port_is_valid()
By keeping a count of the number of currently valid event channels,
port_is_valid() can be simplified.
d->valid_evtchns is only increased (while holding d->event_lock), so
port_is_valid() may be safely called without taking the lock (this
will be useful later).
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -191,6 +191,8 @@ static int get_free_port(struct domain *
return -ENOMEM;
bucket_from_port(d, port) = chn;
+ write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
+
return port;
}
@@ -1264,6 +1266,7 @@ int evtchn_init(struct domain *d)
d->evtchn = alloc_evtchn_bucket(d, 0);
if ( !d->evtchn )
return -ENOMEM;
+ d->valid_evtchns = EVTCHNS_PER_BUCKET;
spin_lock_init(&d->event_lock);
if ( get_free_port(d) != 0 )
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -90,11 +90,7 @@ static inline bool_t port_is_valid(struc
{
if ( p >= d->max_evtchns )
return 0;
- if ( !d->evtchn )
- return 0;
- if ( p < EVTCHNS_PER_BUCKET )
- return 1;
- return group_from_port(d, p) != NULL && bucket_from_port(d, p) != NULL;
+ return p < read_atomic(&d->valid_evtchns);
}
static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p)
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -339,8 +339,9 @@ struct domain
/* Event channel information. */
struct evtchn *evtchn; /* first bucket only */
struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
- unsigned int max_evtchns;
- unsigned int max_evtchn_port;
+ unsigned int max_evtchns; /* number supported by ABI */
+ unsigned int max_evtchn_port; /* max permitted port number */
+ unsigned int valid_evtchns; /* number of allocated event channels */
spinlock_t event_lock;
const struct evtchn_port_ops *evtchn_port_ops;
struct evtchn_fifo_domain *evtchn_fifo;

View File

@ -1,32 +0,0 @@
# Commit e156654d4eb2fdeb524e6b40838767a5dc918966
# Date 2015-06-18 14:54:25 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: remove the locking when unmasking an event channel
The event channel lock is no longer required to check if the port is
valid.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -931,8 +931,6 @@ int evtchn_unmask(unsigned int port)
struct domain *d = current->domain;
struct evtchn *evtchn;
- ASSERT(spin_is_locked(&d->event_lock));
-
if ( unlikely(!port_is_valid(d, port)) )
return -EINVAL;
@@ -1099,9 +1097,7 @@ long do_event_channel_op(int cmd, XEN_GU
struct evtchn_unmask unmask;
if ( copy_from_guest(&unmask, arg, 1) != 0 )
return -EFAULT;
- spin_lock(&current->domain->event_lock);
rc = evtchn_unmask(unmask.port);
- spin_unlock(&current->domain->event_lock);
break;
}

View File

@ -1,289 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 236e13ce60e1c0eb0535ad258e74a3789bc0d074
# Date 2015-06-19 10:58:45 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI-X: cleanup
- __pci_enable_msix() now checks that an MSI-X capability was actually
found
- pass "pos" to msix_capability_init() as both callers already know it
(and hence there's no need to re-obtain it)
- call __pci_disable_msi{,x}() directly instead of via
pci_disable_msi() from __pci_enable_msi{x,}() state validation paths
- use msix_control_reg() instead of open coding it
- log message adjustments
- coding style corrections
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -35,6 +35,8 @@
static s8 __read_mostly use_msi = -1;
boolean_param("msi", use_msi);
+static void __pci_disable_msix(struct msi_desc *);
+
/* bitmap indicate which fixed map is free */
static DEFINE_SPINLOCK(msix_fixmap_lock);
static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
@@ -129,12 +131,14 @@ void msi_compose_msg(unsigned vector, co
unsigned dest;
memset(msg, 0, sizeof(*msg));
- if ( !cpumask_intersects(cpu_mask, &cpu_online_map) ) {
+ if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
+ {
dprintk(XENLOG_ERR,"%s, compose msi message error!!\n", __func__);
return;
}
- if ( vector ) {
+ if ( vector )
+ {
cpumask_t *mask = this_cpu(scratch_mask);
cpumask_and(mask, cpu_mask, &cpu_online_map);
@@ -195,8 +199,7 @@ static void read_msi_msg(struct msi_desc
}
case PCI_CAP_ID_MSIX:
{
- void __iomem *base;
- base = entry->mask_base;
+ void __iomem *base = entry->mask_base;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -257,8 +260,7 @@ static int write_msi_msg(struct msi_desc
}
case PCI_CAP_ID_MSIX:
{
- void __iomem *base;
- base = entry->mask_base;
+ void __iomem *base = entry->mask_base;
writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -281,7 +283,7 @@ void set_msi_affinity(struct irq_desc *d
struct msi_desc *msi_desc = desc->msi_desc;
dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID || !msi_desc)
+ if ( dest == BAD_APICID || !msi_desc )
return;
ASSERT(spin_is_locked(&desc->lock));
@@ -332,11 +334,11 @@ static void msix_set_enable(struct pci_d
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
if ( pos )
{
- control = pci_conf_read16(seg, bus, slot, func, pos + PCI_MSIX_FLAGS);
+ control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
control &= ~PCI_MSIX_FLAGS_ENABLE;
if ( enable )
control |= PCI_MSIX_FLAGS_ENABLE;
- pci_conf_write16(seg, bus, slot, func, pos + PCI_MSIX_FLAGS, control);
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
}
}
@@ -353,9 +355,11 @@ static void msi_set_mask_bit(struct irq_
ASSERT(spin_is_locked(&desc->lock));
BUG_ON(!entry || !entry->dev);
- switch (entry->msi_attrib.type) {
+ switch ( entry->msi_attrib.type )
+ {
case PCI_CAP_ID_MSI:
- if (entry->msi_attrib.maskbit) {
+ if ( entry->msi_attrib.maskbit )
+ {
u32 mask_bits;
u16 seg = entry->dev->seg;
u8 bus = entry->dev->bus;
@@ -703,13 +707,14 @@ static u64 read_pci_mem_bar(u16 seg, u8
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
**/
static int msix_capability_init(struct pci_dev *dev,
+ unsigned int pos,
struct msi_info *msi,
struct msi_desc **desc,
unsigned int nr_entries)
{
struct arch_msix *msix = dev->msix;
struct msi_desc *entry = NULL;
- int pos, vf;
+ int vf;
u16 control;
u64 table_paddr;
u32 table_offset;
@@ -721,7 +726,6 @@ static int msix_capability_init(struct p
ASSERT(spin_is_locked(&pcidevs_lock));
- pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
@@ -886,10 +890,9 @@ static int __pci_enable_msi(struct msi_i
old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
if ( old_desc )
{
- dprintk(XENLOG_WARNING, "irq %d has already mapped to MSI on "
- "device %04x:%02x:%02x.%01x\n",
- msi->irq, msi->seg, msi->bus,
- PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+ printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
+ msi->irq, msi->seg, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
*desc = old_desc;
return 0;
}
@@ -897,10 +900,10 @@ static int __pci_enable_msi(struct msi_i
old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
if ( old_desc )
{
- dprintk(XENLOG_WARNING, "MSI-X is already in use on "
- "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus,
- PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
- pci_disable_msi(old_desc);
+ printk(XENLOG_WARNING "MSI-X already in use on %04x:%02x:%02x.%u\n",
+ msi->seg, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+ __pci_disable_msix(old_desc);
}
return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
@@ -914,7 +917,6 @@ static void __pci_disable_msi(struct msi
msi_set_enable(dev, 0);
BUG_ON(list_empty(&dev->msi_list));
-
}
/**
@@ -934,7 +936,7 @@ static void __pci_disable_msi(struct msi
**/
static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
{
- int status, pos, nr_entries;
+ int pos, nr_entries;
struct pci_dev *pdev;
u16 control;
u8 slot = PCI_SLOT(msi->devfn);
@@ -943,23 +945,22 @@ static int __pci_enable_msix(struct msi_
ASSERT(spin_is_locked(&pcidevs_lock));
pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
- if ( !pdev )
+ pos = pci_find_cap_offset(msi->seg, msi->bus, slot, func, PCI_CAP_ID_MSIX);
+ if ( !pdev || !pos )
return -ENODEV;
- pos = pci_find_cap_offset(msi->seg, msi->bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(msi->seg, msi->bus, slot, func,
msix_control_reg(pos));
nr_entries = multi_msix_capable(control);
- if (msi->entry_nr >= nr_entries)
+ if ( msi->entry_nr >= nr_entries )
return -EINVAL;
old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
if ( old_desc )
{
- dprintk(XENLOG_WARNING, "irq %d has already mapped to MSIX on "
- "device %04x:%02x:%02x.%01x\n",
- msi->irq, msi->seg, msi->bus,
- PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+ printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
+ msi->irq, msi->seg, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
*desc = old_desc;
return 0;
}
@@ -967,15 +968,13 @@ static int __pci_enable_msix(struct msi_
old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
if ( old_desc )
{
- dprintk(XENLOG_WARNING, "MSI is already in use on "
- "device %04x:%02x:%02x.%01x\n", msi->seg, msi->bus,
- PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
- pci_disable_msi(old_desc);
-
+ printk(XENLOG_WARNING "MSI already in use on %04x:%02x:%02x.%u\n",
+ msi->seg, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+ __pci_disable_msi(old_desc);
}
- status = msix_capability_init(pdev, msi, desc, nr_entries);
- return status;
+ return msix_capability_init(pdev, pos, msi, desc, nr_entries);
}
static void _pci_cleanup_msix(struct arch_msix *msix)
@@ -993,19 +992,16 @@ static void _pci_cleanup_msix(struct arc
static void __pci_disable_msix(struct msi_desc *entry)
{
- struct pci_dev *dev;
- int pos;
- u16 control, seg;
- u8 bus, slot, func;
-
- dev = entry->dev;
- seg = dev->seg;
- bus = dev->bus;
- slot = PCI_SLOT(dev->devfn);
- func = PCI_FUNC(dev->devfn);
+ struct pci_dev *dev = entry->dev;
+ u16 seg = dev->seg;
+ u8 bus = dev->bus;
+ u8 slot = PCI_SLOT(dev->devfn);
+ u8 func = PCI_FUNC(dev->devfn);
+ unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
+ PCI_CAP_ID_MSIX);
+ u16 control = pci_conf_read16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos));
- pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
- control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
msix_set_enable(dev, 0);
BUG_ON(list_empty(&dev->msi_list));
@@ -1047,7 +1043,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8
u16 control = pci_conf_read16(seg, bus, slot, func,
msix_control_reg(pos));
- rc = msix_capability_init(pdev, NULL, NULL,
+ rc = msix_capability_init(pdev, pos, NULL, NULL,
multi_msix_capable(control));
}
spin_unlock(&pcidevs_lock);
@@ -1066,8 +1062,8 @@ int pci_enable_msi(struct msi_info *msi,
if ( !use_msi )
return -EPERM;
- return msi->table_base ? __pci_enable_msix(msi, desc) :
- __pci_enable_msi(msi, desc);
+ return msi->table_base ? __pci_enable_msix(msi, desc) :
+ __pci_enable_msi(msi, desc);
}
/*
@@ -1117,7 +1113,9 @@ int pci_restore_msi_state(struct pci_dev
if ( !pdev )
return -EINVAL;
- ret = xsm_resource_setup_pci(XSM_PRIV, (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn);
+ ret = xsm_resource_setup_pci(XSM_PRIV,
+ (pdev->seg << 16) | (pdev->bus << 8) |
+ pdev->devfn);
if ( ret )
return ret;

View File

@ -1,408 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit ad28e42bd1d28d746988ed71654e8aa670629753
# Date 2015-06-19 10:59:53 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI: track host and guest masking separately
In particular we want to avoid losing track of our own intention to
have an entry masked. Physical unmasking now happens only when both
host and guest requested so.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
# Commit 84d6add5593d865736831d150da7c38588f669f6
# Date 2015-07-10 12:36:24 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI: fix guest unmasking when handling IRQ via event channel
Rather than assuming only PV guests need special treatment (and
dealing with that directly when an IRQ gets set up), keep all guest MSI
IRQs masked until either the (HVM) guest unmasks them via vMSI or the
(PV, PVHVM, or PVH) guest sets up an event channel for it.
To not further clutter the common evtchn_bind_pirq() with x86-specific
code, introduce an arch_evtchn_bind_pirq() hook instead.
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/hpet.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/hpet.c
+++ xen-4.5.2-testing/xen/arch/x86/hpet.c
@@ -240,7 +240,7 @@ static void hpet_msi_unmask(struct irq_d
cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
cfg |= HPET_TN_ENABLE;
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
- ch->msi.msi_attrib.masked = 0;
+ ch->msi.msi_attrib.host_masked = 0;
}
static void hpet_msi_mask(struct irq_desc *desc)
@@ -251,7 +251,7 @@ static void hpet_msi_mask(struct irq_des
cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
cfg &= ~HPET_TN_ENABLE;
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
- ch->msi.msi_attrib.masked = 1;
+ ch->msi.msi_attrib.host_masked = 1;
}
static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
Index: xen-4.5.2-testing/xen/arch/x86/hvm/vmsi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/hvm/vmsi.c
+++ xen-4.5.2-testing/xen/arch/x86/hvm/vmsi.c
@@ -219,7 +219,6 @@ static int msixtbl_read(
{
unsigned long offset;
struct msixtbl_entry *entry;
- void *virt;
unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
@@ -253,13 +252,20 @@ static int msixtbl_read(
}
if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
{
- virt = msixtbl_addr_to_virt(entry, address);
+ const struct msi_desc *msi_desc;
+ void *virt = msixtbl_addr_to_virt(entry, address);
+
if ( !virt )
goto out;
+ msi_desc = virt_to_msi_desc(entry->pdev, virt);
+ if ( !msi_desc )
+ goto out;
if ( len == 4 )
- *pval = readl(virt);
+ *pval = MASK_INSR(msi_desc->msi_attrib.guest_masked,
+ PCI_MSIX_VECTOR_BITMASK);
else
- *pval |= (u64)readl(virt) << 32;
+ *pval |= (u64)MASK_INSR(msi_desc->msi_attrib.guest_masked,
+ PCI_MSIX_VECTOR_BITMASK) << 32;
}
r = X86EMUL_OKAY;
@@ -277,7 +283,7 @@ static int msixtbl_write(struct vcpu *v,
void *virt;
unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
- unsigned long flags, orig;
+ unsigned long flags;
struct irq_desc *desc;
if ( (len != 4 && len != 8) || (address & (len - 1)) )
@@ -337,37 +343,7 @@ static int msixtbl_write(struct vcpu *v,
ASSERT(msi_desc == desc->msi_desc);
- orig = readl(virt);
-
- /*
- * Do not allow guest to modify MSI-X control bit if it is masked
- * by Xen. We'll only handle the case where Xen thinks that
- * bit is unmasked, but hardware has silently masked the bit
- * (in case of SR-IOV VF reset, etc). On the other hand, if Xen
- * thinks that the bit is masked, but it's really not,
- * we log a warning.
- */
- if ( msi_desc->msi_attrib.masked )
- {
- if ( !(orig & PCI_MSIX_VECTOR_BITMASK) )
- printk(XENLOG_WARNING "MSI-X control bit is unmasked when"
- " it is expected to be masked [%04x:%02x:%02x.%u]\n",
- entry->pdev->seg, entry->pdev->bus,
- PCI_SLOT(entry->pdev->devfn),
- PCI_FUNC(entry->pdev->devfn));
-
- goto unlock;
- }
-
- /*
- * The mask bit is the only defined bit in the word. But we
- * ought to preserve the reserved bits. Clearing the reserved
- * bits can result in undefined behaviour (see PCI Local Bus
- * Specification revision 2.3).
- */
- val &= PCI_MSIX_VECTOR_BITMASK;
- val |= (orig & ~PCI_MSIX_VECTOR_BITMASK);
- writel(val, virt);
+ guest_mask_msi_irq(desc, !!(val & PCI_MSIX_VECTOR_BITMASK));
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
Index: xen-4.5.2-testing/xen/arch/x86/irq.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/irq.c
+++ xen-4.5.2-testing/xen/arch/x86/irq.c
@@ -2503,6 +2503,25 @@ int unmap_domain_pirq_emuirq(struct doma
return ret;
}
+void arch_evtchn_bind_pirq(struct domain *d, int pirq)
+{
+ int irq = domain_pirq_to_irq(d, pirq);
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ if ( irq <= 0 )
+ return;
+
+ if ( is_hvm_domain(d) )
+ map_domain_emuirq_pirq(d, pirq, IRQ_PT);
+
+ desc = irq_to_desc(irq);
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( desc->msi_desc )
+ guest_mask_msi_irq(desc, 0);
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
bool_t hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq)
{
return is_hvm_domain(d) && pirq &&
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -349,9 +349,10 @@ int msi_maskable_irq(const struct msi_de
|| entry->msi_attrib.maskbit;
}
-static void msi_set_mask_bit(struct irq_desc *desc, int flag)
+static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
{
struct msi_desc *entry = desc->msi_desc;
+ bool_t flag = host || guest;
ASSERT(spin_is_locked(&desc->lock));
BUG_ON(!entry || !entry->dev);
@@ -383,7 +384,8 @@ static void msi_set_mask_bit(struct irq_
BUG();
break;
}
- entry->msi_attrib.masked = !!flag;
+ entry->msi_attrib.host_masked = host;
+ entry->msi_attrib.guest_masked = guest;
}
static int msi_get_mask_bit(const struct msi_desc *entry)
@@ -405,20 +407,30 @@ static int msi_get_mask_bit(const struct
void mask_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 1);
+ msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
}
void unmask_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 0);
+ msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+}
+
+void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
+{
+ msi_set_mask_bit(desc, desc->msi_desc->msi_attrib.host_masked, mask);
}
static unsigned int startup_msi_irq(struct irq_desc *desc)
{
- unmask_msi_irq(desc);
+ msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
return 0;
}
+static void shutdown_msi_irq(struct irq_desc *desc)
+{
+ msi_set_mask_bit(desc, 1, 1);
+}
+
void ack_nonmaskable_msi_irq(struct irq_desc *desc)
{
irq_complete_move(desc);
@@ -443,7 +455,7 @@ void end_nonmaskable_msi_irq(struct irq_
static hw_irq_controller pci_msi_maskable = {
.typename = "PCI-MSI/-X",
.startup = startup_msi_irq,
- .shutdown = mask_msi_irq,
+ .shutdown = shutdown_msi_irq,
.enable = unmask_msi_irq,
.disable = mask_msi_irq,
.ack = ack_maskable_msi_irq,
@@ -593,7 +605,8 @@ static int msi_capability_init(struct pc
entry[i].msi_attrib.is_64 = is_64bit_address(control);
entry[i].msi_attrib.entry_nr = i;
entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
- entry[i].msi_attrib.masked = 1;
+ entry[i].msi_attrib.host_masked = 1;
+ entry[i].msi_attrib.guest_masked = 0;
entry[i].msi_attrib.pos = pos;
if ( entry[i].msi_attrib.maskbit )
entry[i].msi.mpos = mpos;
@@ -819,7 +832,8 @@ static int msix_capability_init(struct p
entry->msi_attrib.is_64 = 1;
entry->msi_attrib.entry_nr = msi->entry_nr;
entry->msi_attrib.maskbit = 1;
- entry->msi_attrib.masked = 1;
+ entry->msi_attrib.host_masked = 1;
+ entry->msi_attrib.guest_masked = 1;
entry->msi_attrib.pos = pos;
entry->irq = msi->irq;
entry->dev = dev;
@@ -1154,7 +1168,8 @@ int pci_restore_msi_state(struct pci_dev
for ( i = 0; ; )
{
- msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+ msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
+ entry[i].msi_attrib.guest_masked);
if ( !--nr )
break;
@@ -1306,7 +1321,7 @@ static void dump_msi(unsigned char key)
else
mask = '?';
printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
- " dest=%08x mask=%d/%d/%c\n",
+ " dest=%08x mask=%d/%c%c/%c\n",
type, irq,
(data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
@@ -1314,7 +1329,10 @@ static void dump_msi(unsigned char key)
data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
- dest32, attr.maskbit, attr.masked, mask);
+ dest32, attr.maskbit,
+ attr.host_masked ? 'H' : ' ',
+ attr.guest_masked ? 'G' : ' ',
+ mask);
}
}
Index: xen-4.5.2-testing/xen/common/event_channel.c
===================================================================
--- xen-4.5.2-testing.orig/xen/common/event_channel.c
+++ xen-4.5.2-testing/xen/common/event_channel.c
@@ -445,10 +445,7 @@ static long evtchn_bind_pirq(evtchn_bind
bind->port = port;
-#ifdef CONFIG_X86
- if ( is_hvm_domain(d) && domain_pirq_to_irq(d, pirq) > 0 )
- map_domain_emuirq_pirq(d, pirq, IRQ_PT);
-#endif
+ arch_evtchn_bind_pirq(d, pirq);
out:
spin_unlock(&d->event_lock);
Index: xen-4.5.2-testing/xen/drivers/passthrough/amd/iommu_init.c
===================================================================
--- xen-4.5.2-testing.orig/xen/drivers/passthrough/amd/iommu_init.c
+++ xen-4.5.2-testing/xen/drivers/passthrough/amd/iommu_init.c
@@ -451,7 +451,7 @@ static void iommu_msi_unmask(struct irq_
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
spin_unlock_irqrestore(&iommu->lock, flags);
- iommu->msi.msi_attrib.masked = 0;
+ iommu->msi.msi_attrib.host_masked = 0;
}
static void iommu_msi_mask(struct irq_desc *desc)
@@ -464,7 +464,7 @@ static void iommu_msi_mask(struct irq_de
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
spin_unlock_irqrestore(&iommu->lock, flags);
- iommu->msi.msi_attrib.masked = 1;
+ iommu->msi.msi_attrib.host_masked = 1;
}
static unsigned int iommu_msi_startup(struct irq_desc *desc)
Index: xen-4.5.2-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-4.5.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-4.5.2-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -999,7 +999,7 @@ static void dma_msi_unmask(struct irq_de
sts &= ~DMA_FECTL_IM;
dmar_writel(iommu->reg, DMAR_FECTL_REG, sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- iommu->msi.msi_attrib.masked = 0;
+ iommu->msi.msi_attrib.host_masked = 0;
}
static void dma_msi_mask(struct irq_desc *desc)
@@ -1014,7 +1014,7 @@ static void dma_msi_mask(struct irq_desc
sts |= DMA_FECTL_IM;
dmar_writel(iommu->reg, DMAR_FECTL_REG, sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- iommu->msi.msi_attrib.masked = 1;
+ iommu->msi.msi_attrib.host_masked = 1;
}
static unsigned int dma_msi_startup(struct irq_desc *desc)
Index: xen-4.5.2-testing/xen/include/asm-arm/irq.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-arm/irq.h
+++ xen-4.5.2-testing/xen/include/asm-arm/irq.h
@@ -44,6 +44,8 @@ int route_irq_to_guest(struct domain *d,
const char *devname);
void arch_move_irqs(struct vcpu *v);
+#define arch_evtchn_bind_pirq(d, pirq) ((void)((d) + (pirq)))
+
/* Set IRQ type for an SPI */
int irq_set_spi_type(unsigned int spi, unsigned int type);
Index: xen-4.5.2-testing/xen/include/asm-x86/msi.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/msi.h
+++ xen-4.5.2-testing/xen/include/asm-x86/msi.h
@@ -90,12 +90,13 @@ extern unsigned int pci_msix_get_table_l
struct msi_desc {
struct msi_attrib {
- __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */
- __u8 maskbit : 1; /* mask-pending bit supported ? */
- __u8 masked : 1;
+ __u8 type; /* {0: unused, 5h:MSI, 11h:MSI-X} */
+ __u8 pos; /* Location of the MSI capability */
+ __u8 maskbit : 1; /* mask/pending bit supported ? */
__u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */
- __u8 pos; /* Location of the msi capability */
- __u16 entry_nr; /* specific enabled entry */
+ __u8 host_masked : 1;
+ __u8 guest_masked : 1;
+ __u16 entry_nr; /* specific enabled entry */
} msi_attrib;
struct list_head list;
@@ -236,6 +237,7 @@ void msi_compose_msg(unsigned vector, co
void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable);
void mask_msi_irq(struct irq_desc *);
void unmask_msi_irq(struct irq_desc *);
+void guest_mask_msi_irq(struct irq_desc *, bool_t mask);
void ack_nonmaskable_msi_irq(struct irq_desc *);
void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
void set_msi_affinity(struct irq_desc *, const cpumask_t *);
Index: xen-4.5.2-testing/xen/include/xen/irq.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/xen/irq.h
+++ xen-4.5.2-testing/xen/include/xen/irq.h
@@ -172,4 +172,8 @@ unsigned int set_desc_affinity(struct ir
unsigned int arch_hwdom_irqs(domid_t);
#endif
+#ifndef arch_evtchn_bind_pirq
+void arch_evtchn_bind_pirq(struct domain *, int pirq);
+#endif
+
#endif /* __XEN_IRQ_H__ */

View File

@ -1,284 +0,0 @@
# Commit dff515dfeac4c1c13422a128c558ac21ddc6c8db
# Date 2015-06-19 11:01:24 +0200
# Author Malcolm Crossley <malcolm.crossley@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: use per-VCPU maptrack free lists
Performance analysis of aggregate network throughput with many VMs
shows that performance is signficantly limited by contention on the
maptrack lock when obtaining/releasing maptrack handles from the free
list.
Instead of a single free list use a per-VCPU list. This avoids any
contention when obtaining a handle. Handles must be released back to
their original list and since this may occur on a different VCPU there
is some contention on the destination VCPU's free list tail pointer
(but this is much better than a per-domain lock).
Increase the default maximum number of maptrack frames by 4 times
because: a) struct grant_mapping is now 16 bytes (instead of 8); and
b) a guest may not evenly distribute all the grant map operations
across the VCPUs (meaning some VCPUs need more maptrack entries than
others).
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -126,6 +126,8 @@ struct vcpu *alloc_vcpu(
tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
+ grant_table_init_vcpu(v);
+
if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
!zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
!zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -37,6 +37,7 @@
#include <xen/iommu.h>
#include <xen/paging.h>
#include <xen/keyhandler.h>
+#include <xen/vmap.h>
#include <xsm/xsm.h>
#include <asm/flushtlb.h>
@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_g
* New options allow to set max_maptrack_frames and
* map_grant_table_frames independently.
*/
-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
static unsigned int __read_mostly max_maptrack_frames;
integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
@@ -279,62 +280,103 @@ double_gt_unlock(struct grant_table *lgt
static inline int
__get_maptrack_handle(
- struct grant_table *t)
+ struct grant_table *t,
+ struct vcpu *v)
{
- unsigned int h;
- if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+ unsigned int head, next;
+
+ /* No maptrack pages allocated for this VCPU yet? */
+ head = v->maptrack_head;
+ if ( unlikely(head == MAPTRACK_TAIL) )
return -1;
- t->maptrack_head = maptrack_entry(t, h).ref;
- return h;
+
+ /*
+ * Always keep one entry in the free list to make it easier to add
+ * free entries to the tail.
+ */
+ next = read_atomic(&maptrack_entry(t, head).ref);
+ if ( unlikely(next == MAPTRACK_TAIL) )
+ return -1;
+
+ v->maptrack_head = next;
+
+ return head;
}
static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
{
- spin_lock(&t->maptrack_lock);
- maptrack_entry(t, handle).ref = t->maptrack_head;
- t->maptrack_head = handle;
- spin_unlock(&t->maptrack_lock);
+ struct domain *currd = current->domain;
+ struct vcpu *v;
+ unsigned int prev_tail, cur_tail;
+
+ /* 1. Set entry to be a tail. */
+ maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+
+ /* 2. Add entry to the tail of the list on the original VCPU. */
+ v = currd->vcpu[maptrack_entry(t, handle).vcpu];
+
+ cur_tail = read_atomic(&v->maptrack_tail);
+ do {
+ prev_tail = cur_tail;
+ cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
+ } while ( cur_tail != prev_tail );
+
+ /* 3. Update the old tail entry to point to the new entry. */
+ write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
}
static inline int
get_maptrack_handle(
struct grant_table *lgt)
{
+ struct vcpu *curr = current;
int i;
grant_handle_t handle;
struct grant_mapping *new_mt;
- unsigned int new_mt_limit, nr_frames;
+
+ handle = __get_maptrack_handle(lgt, curr);
+ if ( likely(handle != -1) )
+ return handle;
spin_lock(&lgt->maptrack_lock);
- while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+ if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
{
- nr_frames = nr_maptrack_frames(lgt);
- if ( nr_frames >= max_maptrack_frames )
- break;
+ spin_unlock(&lgt->maptrack_lock);
+ return -1;
+ }
- new_mt = alloc_xenheap_page();
- if ( !new_mt )
- break;
+ new_mt = alloc_xenheap_page();
+ if ( !new_mt )
+ {
+ spin_unlock(&lgt->maptrack_lock);
+ return -1;
+ }
+ clear_page(new_mt);
- clear_page(new_mt);
+ /*
+ * Use the first new entry and add the remaining entries to the
+ * head of the free list.
+ */
+ handle = lgt->maptrack_limit;
- new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+ for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
+ {
+ new_mt[i].ref = handle + i + 1;
+ new_mt[i].vcpu = curr->vcpu_id;
+ }
+ new_mt[i - 1].ref = curr->maptrack_head;
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
- new_mt[i - 1].ref = lgt->maptrack_limit + i;
- new_mt[i - 1].ref = lgt->maptrack_head;
- lgt->maptrack_head = lgt->maptrack_limit;
+ /* Set tail directly if this is the first page for this VCPU. */
+ if ( curr->maptrack_tail == MAPTRACK_TAIL )
+ curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
- lgt->maptrack[nr_frames] = new_mt;
- smp_wmb();
- lgt->maptrack_limit = new_mt_limit;
+ curr->maptrack_head = handle + 1;
- gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
- nr_frames + 1);
- }
+ lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+ lgt->maptrack_limit += MAPTRACK_PER_PAGE;
spin_unlock(&lgt->maptrack_lock);
@@ -3061,16 +3103,9 @@ grant_table_create(
}
/* Tracking of mapped foreign frames table */
- if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
- max_maptrack_frames)) == NULL )
+ t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
+ if ( t->maptrack == NULL )
goto no_mem_2;
- if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
- goto no_mem_3;
- clear_page(t->maptrack[0]);
- t->maptrack_limit = MAPTRACK_PER_PAGE;
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
- t->maptrack[0][i - 1].ref = i;
- t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
/* Shared grant table. */
if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
@@ -3102,8 +3137,7 @@ grant_table_create(
free_xenheap_page(t->shared_raw[i]);
xfree(t->shared_raw);
no_mem_3:
- free_xenheap_page(t->maptrack[0]);
- xfree(t->maptrack);
+ vfree(t->maptrack);
no_mem_2:
for ( i = 0;
i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
@@ -3238,7 +3272,7 @@ grant_table_destroy(
for ( i = 0; i < nr_maptrack_frames(t); i++ )
free_xenheap_page(t->maptrack[i]);
- xfree(t->maptrack);
+ vfree(t->maptrack);
for ( i = 0; i < nr_active_grant_frames(t); i++ )
free_xenheap_page(t->active[i]);
@@ -3252,6 +3286,12 @@ grant_table_destroy(
d->grant_table = NULL;
}
+void grant_table_init_vcpu(struct vcpu *v)
+{
+ v->maptrack_head = MAPTRACK_TAIL;
+ v->maptrack_tail = MAPTRACK_TAIL;
+}
+
static void gnttab_usage_print(struct domain *rd)
{
int first = 1;
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -60,6 +60,8 @@ struct grant_mapping {
u32 ref; /* grant ref */
u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */
domid_t domid; /* granting domain */
+ u32 vcpu; /* vcpu which created the grant mapping */
+ u32 pad; /* round size to a power of 2 */
};
/* Per-domain grant information. */
@@ -83,9 +85,8 @@ struct grant_table {
grant_status_t **status;
/* Active grant table. */
struct active_grant_entry **active;
- /* Mapping tracking table. */
+ /* Mapping tracking table per vcpu. */
struct grant_mapping **maptrack;
- unsigned int maptrack_head;
unsigned int maptrack_limit;
/* Lock protecting the maptrack page list, head, and limit */
spinlock_t maptrack_lock;
@@ -99,6 +100,7 @@ int grant_table_create(
struct domain *d);
void grant_table_destroy(
struct domain *d);
+void grant_table_init_vcpu(struct vcpu *v);
/* Domain death release of granted mappings of other domains' memory. */
void
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -219,6 +219,10 @@ struct vcpu
/* VCPU paused by system controller. */
int controller_pause_count;
+ /* Maptrack */
+ unsigned int maptrack_head;
+ unsigned int maptrack_tail;
+
/* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
evtchn_port_t virq_to_evtchn[NR_VIRQS];
spinlock_t virq_lock;

View File

@ -1,153 +0,0 @@
# Commit e76ff6c156906b515c2a4300a81c95886ece5d5f
# Date 2015-06-19 11:02:04 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: steal maptrack entries from other VCPUs
If a guest is not evenly grant mapping across its VCPUs one of the
VCPUs may run out of free maptrack entries even though other VCPUs
have many free.
If this happens, "steal" free entries from other VCPUs. We want to
steal entries such that:
a) We avoid ping-ponging stolen entries between VCPUs.
b) The number of free entries owned by each VCPUs tends (over time) to
the number it uses.
So when stealing, we select a VCPU at random (reducing (a)) and we
transfer the stolen entries to the thief VCPU (aiming for (b)).
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -283,26 +283,70 @@ __get_maptrack_handle(
struct grant_table *t,
struct vcpu *v)
{
- unsigned int head, next;
+ unsigned int head, next, prev_head;
- /* No maptrack pages allocated for this VCPU yet? */
- head = v->maptrack_head;
- if ( unlikely(head == MAPTRACK_TAIL) )
- return -1;
-
- /*
- * Always keep one entry in the free list to make it easier to add
- * free entries to the tail.
- */
- next = read_atomic(&maptrack_entry(t, head).ref);
- if ( unlikely(next == MAPTRACK_TAIL) )
- return -1;
+ do {
+ /* No maptrack pages allocated for this VCPU yet? */
+ head = read_atomic(&v->maptrack_head);
+ if ( unlikely(head == MAPTRACK_TAIL) )
+ return -1;
- v->maptrack_head = next;
+ /*
+ * Always keep one entry in the free list to make it easier to
+ * add free entries to the tail.
+ */
+ next = read_atomic(&maptrack_entry(t, head).ref);
+ if ( unlikely(next == MAPTRACK_TAIL) )
+ return -1;
+
+ prev_head = head;
+ head = cmpxchg(&v->maptrack_head, prev_head, next);
+ } while ( head != prev_head );
return head;
}
+/*
+ * Try to "steal" a free maptrack entry from another VCPU.
+ *
+ * A stolen entry is transferred to the thief, so the number of
+ * entries for each VCPU should tend to the usage pattern.
+ *
+ * To avoid having to atomically count the number of free entries on
+ * each VCPU and to avoid two VCPU repeatedly stealing entries from
+ * each other, the initial victim VCPU is selected randomly.
+ */
+static int steal_maptrack_handle(struct grant_table *t,
+ const struct vcpu *curr)
+{
+ const struct domain *currd = curr->domain;
+ unsigned int first, i;
+
+ /* Find an initial victim. */
+ first = i = get_random() % currd->max_vcpus;
+
+ do {
+ if ( currd->vcpu[i] )
+ {
+ int handle;
+
+ handle = __get_maptrack_handle(t, currd->vcpu[i]);
+ if ( handle != -1 )
+ {
+ maptrack_entry(t, handle).vcpu = curr->vcpu_id;
+ return handle;
+ }
+ }
+
+ i++;
+ if ( i == currd->max_vcpus )
+ i = 0;
+ } while ( i != first );
+
+ /* No free handles on any VCPU. */
+ return -1;
+}
+
static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
@@ -342,10 +386,31 @@ get_maptrack_handle(
spin_lock(&lgt->maptrack_lock);
+ /*
+ * If we've run out of frames, try stealing an entry from another
+ * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+ */
if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
{
+ /*
+ * Can drop the lock since no other VCPU can be adding a new
+ * frame once they've run out.
+ */
spin_unlock(&lgt->maptrack_lock);
- return -1;
+
+ /*
+ * Uninitialized free list? Steal an extra entry for the tail
+ * sentinel.
+ */
+ if ( curr->maptrack_tail == MAPTRACK_TAIL )
+ {
+ handle = steal_maptrack_handle(lgt, curr);
+ if ( handle == -1 )
+ return -1;
+ curr->maptrack_tail = handle;
+ write_atomic(&curr->maptrack_head, handle);
+ }
+ return steal_maptrack_handle(lgt, curr);
}
new_mt = alloc_xenheap_page();
@@ -373,7 +438,7 @@ get_maptrack_handle(
if ( curr->maptrack_tail == MAPTRACK_TAIL )
curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
- curr->maptrack_head = handle + 1;
+ write_atomic(&curr->maptrack_head, handle + 1);
lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
lgt->maptrack_limit += MAPTRACK_PER_PAGE;

View File

@ -1,105 +0,0 @@
# Commit b399386bcdb9d458f5647476a06fe86f5968d87e
# Date 2015-06-22 11:36:17 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: clear xen_consumer when clearing state
Freeing a xen event channel would clear xen_consumer before clearing
the channel state, leaving a window where the channel is in a funny
state (still bound but no consumer).
Move the clear of xen_consumer into free_evtchn() where the state is
also cleared.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Ditch the pointless evtchn_close() wrapper around __evtchn_close()
(renaming the latter) as well as some bogus casts of function results
to void.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -204,6 +204,7 @@ static void free_evtchn(struct domain *d
/* Reset binding to vcpu0 when the channel is freed. */
chn->state = ECS_FREE;
chn->notify_vcpu_id = 0;
+ chn->xen_consumer = 0;
xsm_evtchn_close_post(chn);
}
@@ -467,7 +468,7 @@ static long evtchn_bind_pirq(evtchn_bind
}
-static long __evtchn_close(struct domain *d1, int port1)
+static long evtchn_close(struct domain *d1, int port1, bool_t guest)
{
struct domain *d2 = NULL;
struct vcpu *v;
@@ -487,7 +488,7 @@ static long __evtchn_close(struct domain
chn1 = evtchn_from_port(d1, port1);
/* Guest cannot close a Xen-attached event channel. */
- if ( unlikely(consumer_is_xen(chn1)) )
+ if ( unlikely(consumer_is_xen(chn1)) && guest )
{
rc = -EINVAL;
goto out;
@@ -596,12 +597,6 @@ static long __evtchn_close(struct domain
return rc;
}
-
-static long evtchn_close(evtchn_close_t *close)
-{
- return __evtchn_close(current->domain, close->port);
-}
-
int evtchn_send(struct domain *d, unsigned int lport)
{
struct evtchn *lchn, *rchn;
@@ -956,7 +951,7 @@ static long evtchn_reset(evtchn_reset_t
goto out;
for ( i = 0; port_is_valid(d, i); i++ )
- (void)__evtchn_close(d, i);
+ evtchn_close(d, i, 1);
spin_lock(&d->event_lock);
@@ -1063,7 +1058,7 @@ long do_event_channel_op(int cmd, XEN_GU
struct evtchn_close close;
if ( copy_from_guest(&close, arg, 1) != 0 )
return -EFAULT;
- rc = evtchn_close(&close);
+ rc = evtchn_close(current->domain, close.port, 1);
break;
}
@@ -1193,11 +1188,10 @@ void free_xen_event_channel(
BUG_ON(!port_is_valid(d, port));
chn = evtchn_from_port(d, port);
BUG_ON(!consumer_is_xen(chn));
- chn->xen_consumer = 0;
spin_unlock(&d->event_lock);
- (void)__evtchn_close(d, port);
+ evtchn_close(d, port, 0);
}
@@ -1296,10 +1290,7 @@ void evtchn_destroy(struct domain *d)
/* Close all existing event channels. */
for ( i = 0; port_is_valid(d, i); i++ )
- {
- evtchn_from_port(d, i)->xen_consumer = 0;
- (void)__evtchn_close(d, i);
- }
+ evtchn_close(d, i, 0);
/* Free all event-channel buckets. */
spin_lock(&d->event_lock);

View File

@ -1,110 +0,0 @@
# Commit a753f0e53ff973a8a066e86c1cb3d6dd5c68d59f
# Date 2015-06-22 11:38:01 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: defer freeing struct evtchn's until evtchn_destroy_final()
notify_via_xen_event_channel() and free_xen_event_channel() had to
check if the domain was dying because they may be called while the
domain is being destroyed and the struct evtchn's are being freed.
By deferring the freeing of the struct evtchn's until all references
to the domain are dropped, these functions can rely on the channel
state being present and valid.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1174,22 +1174,8 @@ int alloc_unbound_xen_event_channel(
void free_xen_event_channel(
struct vcpu *local_vcpu, int port)
{
- struct evtchn *chn;
struct domain *d = local_vcpu->domain;
-
- spin_lock(&d->event_lock);
-
- if ( unlikely(d->is_dying) )
- {
- spin_unlock(&d->event_lock);
- return;
- }
-
BUG_ON(!port_is_valid(d, port));
- chn = evtchn_from_port(d, port);
- BUG_ON(!consumer_is_xen(chn));
-
- spin_unlock(&d->event_lock);
evtchn_close(d, port, 0);
}
@@ -1203,18 +1189,12 @@ void notify_via_xen_event_channel(struct
spin_lock(&ld->event_lock);
- if ( unlikely(ld->is_dying) )
- {
- spin_unlock(&ld->event_lock);
- return;
- }
-
ASSERT(port_is_valid(ld, lport));
lchn = evtchn_from_port(ld, lport);
- ASSERT(consumer_is_xen(lchn));
if ( likely(lchn->state == ECS_INTERDOMAIN) )
{
+ ASSERT(consumer_is_xen(lchn));
rd = lchn->u.interdomain.remote_dom;
rport = lchn->u.interdomain.remote_port;
rchn = evtchn_from_port(rd, rport);
@@ -1282,7 +1262,7 @@ int evtchn_init(struct domain *d)
void evtchn_destroy(struct domain *d)
{
- unsigned int i, j;
+ unsigned int i;
/* After this barrier no new event-channel allocations can occur. */
BUG_ON(!d->is_dying);
@@ -1292,8 +1272,17 @@ void evtchn_destroy(struct domain *d)
for ( i = 0; port_is_valid(d, i); i++ )
evtchn_close(d, i, 0);
+ clear_global_virq_handlers(d);
+
+ evtchn_fifo_destroy(d);
+}
+
+
+void evtchn_destroy_final(struct domain *d)
+{
+ unsigned int i, j;
+
/* Free all event-channel buckets. */
- spin_lock(&d->event_lock);
for ( i = 0; i < NR_EVTCHN_GROUPS; i++ )
{
if ( !d->evtchn_group[i] )
@@ -1301,20 +1290,9 @@ void evtchn_destroy(struct domain *d)
for ( j = 0; j < BUCKETS_PER_GROUP; j++ )
free_evtchn_bucket(d, d->evtchn_group[i][j]);
xfree(d->evtchn_group[i]);
- d->evtchn_group[i] = NULL;
}
free_evtchn_bucket(d, d->evtchn);
- d->evtchn = NULL;
- spin_unlock(&d->event_lock);
- clear_global_virq_handlers(d);
-
- evtchn_fifo_destroy(d);
-}
-
-
-void evtchn_destroy_final(struct domain *d)
-{
#if MAX_VIRT_CPUS > BITS_PER_LONG
xfree(d->poll_mask);
d->poll_mask = NULL;

View File

@ -1,257 +0,0 @@
# Commit de6acb78bf0e137cbe5b72cee4a35ca018d759cc
# Date 2015-06-22 11:39:03 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: use a per-event channel lock for sending events
When sending an event, use a new per-event channel lock to safely
validate the event channel state.
This new lock must be held when changing event channel state. Note
that the event channel lock must also be held when changing state from
ECS_FREE or it will race with a concurrent get_free_port() call.
To avoid having to take the remote event channel locks when sending to
an interdomain event channel, the local and remote channel locks are
both held when binding or closing an interdomain event channel.
This significantly increases the number of events that can be sent
from multiple VCPUs. But struct evtchn increases in size, reducing
the number that fit into a single page to 64 (instead of 128).
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -141,6 +141,7 @@ static struct evtchn *alloc_evtchn_bucke
return NULL;
}
chn[i].port = port + i;
+ spin_lock_init(&chn[i].lock);
}
return chn;
}
@@ -231,11 +232,15 @@ static long evtchn_alloc_unbound(evtchn_
if ( rc )
goto out;
+ spin_lock(&chn->lock);
+
chn->state = ECS_UNBOUND;
if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF )
chn->u.unbound.remote_domid = current->domain->domain_id;
evtchn_port_init(d, chn);
+ spin_unlock(&chn->lock);
+
alloc->port = port;
out:
@@ -246,6 +251,28 @@ static long evtchn_alloc_unbound(evtchn_
}
+static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
+{
+ if ( lchn < rchn )
+ {
+ spin_lock(&lchn->lock);
+ spin_lock(&rchn->lock);
+ }
+ else
+ {
+ if ( lchn != rchn )
+ spin_lock(&rchn->lock);
+ spin_lock(&lchn->lock);
+ }
+}
+
+static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn)
+{
+ spin_unlock(&lchn->lock);
+ if ( lchn != rchn )
+ spin_unlock(&rchn->lock);
+}
+
static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
{
struct evtchn *lchn, *rchn;
@@ -288,6 +315,8 @@ static long evtchn_bind_interdomain(evtc
if ( rc )
goto out;
+ double_evtchn_lock(lchn, rchn);
+
lchn->u.interdomain.remote_dom = rd;
lchn->u.interdomain.remote_port = rport;
lchn->state = ECS_INTERDOMAIN;
@@ -303,6 +332,8 @@ static long evtchn_bind_interdomain(evtc
*/
evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
+ double_evtchn_unlock(lchn, rchn);
+
bind->local_port = lport;
out:
@@ -343,11 +374,16 @@ static long evtchn_bind_virq(evtchn_bind
ERROR_EXIT(port);
chn = evtchn_from_port(d, port);
+
+ spin_lock(&chn->lock);
+
chn->state = ECS_VIRQ;
chn->notify_vcpu_id = vcpu;
chn->u.virq = virq;
evtchn_port_init(d, chn);
+ spin_unlock(&chn->lock);
+
v->virq_to_evtchn[virq] = bind->port = port;
out:
@@ -374,10 +410,15 @@ static long evtchn_bind_ipi(evtchn_bind_
ERROR_EXIT(port);
chn = evtchn_from_port(d, port);
+
+ spin_lock(&chn->lock);
+
chn->state = ECS_IPI;
chn->notify_vcpu_id = vcpu;
evtchn_port_init(d, chn);
+ spin_unlock(&chn->lock);
+
bind->port = port;
out:
@@ -452,11 +493,15 @@ static long evtchn_bind_pirq(evtchn_bind
goto out;
}
+ spin_lock(&chn->lock);
+
chn->state = ECS_PIRQ;
chn->u.pirq.irq = pirq;
link_pirq_port(port, chn, v);
evtchn_port_init(d, chn);
+ spin_unlock(&chn->lock);
+
bind->port = port;
arch_evtchn_bind_pirq(d, pirq);
@@ -574,15 +619,24 @@ static long evtchn_close(struct domain *
BUG_ON(chn2->state != ECS_INTERDOMAIN);
BUG_ON(chn2->u.interdomain.remote_dom != d1);
+ double_evtchn_lock(chn1, chn2);
+
+ free_evtchn(d1, chn1);
+
chn2->state = ECS_UNBOUND;
chn2->u.unbound.remote_domid = d1->domain_id;
- break;
+
+ double_evtchn_unlock(chn1, chn2);
+
+ goto out;
default:
BUG();
}
+ spin_lock(&chn1->lock);
free_evtchn(d1, chn1);
+ spin_unlock(&chn1->lock);
out:
if ( d2 != NULL )
@@ -604,21 +658,18 @@ int evtchn_send(struct domain *d, unsign
struct vcpu *rvcpu;
int rport, ret = 0;
- spin_lock(&ld->event_lock);
-
- if ( unlikely(!port_is_valid(ld, lport)) )
- {
- spin_unlock(&ld->event_lock);
+ if ( !port_is_valid(ld, lport) )
return -EINVAL;
- }
lchn = evtchn_from_port(ld, lport);
+ spin_lock(&lchn->lock);
+
/* Guest cannot send via a Xen-attached event channel. */
if ( unlikely(consumer_is_xen(lchn)) )
{
- spin_unlock(&ld->event_lock);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
ret = xsm_evtchn_send(XSM_HOOK, ld, lchn);
@@ -648,7 +699,7 @@ int evtchn_send(struct domain *d, unsign
}
out:
- spin_unlock(&ld->event_lock);
+ spin_unlock(&lchn->lock);
return ret;
}
@@ -1159,11 +1210,15 @@ int alloc_unbound_xen_event_channel(
if ( rc )
goto out;
+ spin_lock(&chn->lock);
+
chn->state = ECS_UNBOUND;
chn->xen_consumer = get_xen_consumer(notification_fn);
chn->notify_vcpu_id = local_vcpu->vcpu_id;
chn->u.unbound.remote_domid = remote_domid;
+ spin_unlock(&chn->lock);
+
out:
spin_unlock(&d->event_lock);
@@ -1187,11 +1242,11 @@ void notify_via_xen_event_channel(struct
struct domain *rd;
int rport;
- spin_lock(&ld->event_lock);
-
ASSERT(port_is_valid(ld, lport));
lchn = evtchn_from_port(ld, lport);
+ spin_lock(&lchn->lock);
+
if ( likely(lchn->state == ECS_INTERDOMAIN) )
{
ASSERT(consumer_is_xen(lchn));
@@ -1201,7 +1256,7 @@ void notify_via_xen_event_channel(struct
evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
}
- spin_unlock(&ld->event_lock);
+ spin_unlock(&lchn->lock);
}
void evtchn_check_pollers(struct domain *d, unsigned int port)
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -79,6 +79,7 @@ extern domid_t hardware_domid;
struct evtchn
{
+ spinlock_t lock;
#define ECS_FREE 0 /* Channel is available for use. */
#define ECS_RESERVED 1 /* Channel is reserved. */
#define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */

View File

@ -1,27 +0,0 @@
# Commit b58214a24231a1f2a7e09ae9cc3014eff752918b
# Date 2015-06-22 11:39:46 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
evtchn: pad struct evtchn to 64 bytes
The number of struct evtchn in a page must be a power of two. Under
some workloads performance is improved slightly by padding struct
evtchn to 64 bytes (a typical cache line size), thus putting the fewer
per-channel locks into each cache line.
This does not decrease the number of struct evtchn's per-page.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -129,7 +129,7 @@ struct evtchn
#endif
} ssid;
#endif
-};
+} __attribute__((aligned(64)));
int evtchn_init(struct domain *d); /* from domain_create */
void evtchn_destroy(struct domain *d); /* from domain_kill */

View File

@ -1,128 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit a88b72fddd046a0978242411276861039ec99ad0
# Date 2015-07-23 10:13:12 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/PCI: add config space abstract write intercept logic
This is to be used by MSI code, and later to also be hooked up to
MMCFG accesses by Dom0.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -1110,6 +1110,12 @@ void pci_cleanup_msi(struct pci_dev *pde
msi_free_irqs(pdev);
}
+int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
+ unsigned int size, uint32_t *data)
+{
+ return 0;
+}
+
int pci_restore_msi_state(struct pci_dev *pdev)
{
unsigned long flags;
Index: xen-4.5.2-testing/xen/arch/x86/pci.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/pci.c
+++ xen-4.5.2-testing/xen/arch/x86/pci.c
@@ -67,3 +67,28 @@ void pci_conf_write(uint32_t cf8, uint8_
spin_unlock_irqrestore(&pci_config_lock, flags);
}
+
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+ unsigned int reg, unsigned int size,
+ uint32_t *data)
+{
+ struct pci_dev *pdev;
+ int rc = 0;
+
+ /*
+ * Avoid expensive operations when no hook is going to do anything
+ * for the access anyway.
+ */
+ if ( reg < 64 || reg >= 256 )
+ return 0;
+
+ spin_lock(&pcidevs_lock);
+
+ pdev = pci_get_pdev(seg, PCI_BUS(bdf), PCI_DEVFN2(bdf));
+ if ( pdev )
+ rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+
+ spin_unlock(&pcidevs_lock);
+
+ return rc;
+}
Index: xen-4.5.2-testing/xen/arch/x86/traps.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/traps.c
+++ xen-4.5.2-testing/xen/arch/x86/traps.c
@@ -1709,8 +1709,8 @@ static int admin_io_okay(
return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
-static bool_t pci_cfg_ok(struct domain *currd, bool_t write,
- unsigned int start, unsigned int size)
+static bool_t pci_cfg_ok(struct domain *currd, unsigned int start,
+ unsigned int size, uint32_t *write)
{
uint32_t machine_bdf;
@@ -1742,8 +1742,12 @@ static bool_t pci_cfg_ok(struct domain *
start |= CF8_ADDR_HI(currd->arch.pci_cf8);
}
- return !xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
- start, start + size - 1, write);
+ if ( xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
+ start, start + size - 1, !!write) != 0 )
+ return 0;
+
+ return !write ||
+ pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
}
uint32_t guest_io_read(
@@ -1797,7 +1801,7 @@ uint32_t guest_io_read(
size = min(bytes, 4 - (port & 3));
if ( size == 3 )
size = 2;
- if ( pci_cfg_ok(v->domain, 0, port & 3, size) )
+ if ( pci_cfg_ok(v->domain, port & 3, size, NULL) )
sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
}
@@ -1870,7 +1874,7 @@ void guest_io_write(
size = min(bytes, 4 - (port & 3));
if ( size == 3 )
size = 2;
- if ( pci_cfg_ok(v->domain, 1, port & 3, size) )
+ if ( pci_cfg_ok(v->domain, port & 3, size, &data) )
pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
}
Index: xen-4.5.2-testing/xen/include/asm-x86/pci.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/pci.h
+++ xen-4.5.2-testing/xen/include/asm-x86/pci.h
@@ -15,4 +15,11 @@ struct arch_pci_dev {
vmask_t used_vectors;
};
+struct pci_dev;
+int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
+ unsigned int reg, unsigned int size,
+ uint32_t *data);
+int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
+ unsigned int size, uint32_t *data);
+
#endif /* __X86_PCI_H__ */

View File

@ -1,79 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 484d7c852e4ff79c945406ed28b5db63a5a0b7f3
# Date 2015-07-23 10:14:13 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI-X: track host and guest mask-all requests separately
Host uses of the bits will be added subsequently, and must not be
overridden by guests (including Dom0, namely when acting on behalf of
a guest).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -845,6 +845,12 @@ static int msix_capability_init(struct p
if ( !msix->used_entries )
{
+ msix->host_maskall = 0;
+ if ( !msix->guest_maskall )
+ control &= ~PCI_MSIX_FLAGS_MASKALL;
+ else
+ control |= PCI_MSIX_FLAGS_MASKALL;
+
if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
msix->table.last) )
WARN();
@@ -1113,6 +1119,34 @@ void pci_cleanup_msi(struct pci_dev *pde
int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
unsigned int size, uint32_t *data)
{
+ u16 seg = pdev->seg;
+ u8 bus = pdev->bus;
+ u8 slot = PCI_SLOT(pdev->devfn);
+ u8 func = PCI_FUNC(pdev->devfn);
+ struct msi_desc *entry;
+ unsigned int pos;
+
+ if ( pdev->msix )
+ {
+ entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
+ pos = entry ? entry->msi_attrib.pos
+ : pci_find_cap_offset(seg, bus, slot, func,
+ PCI_CAP_ID_MSIX);
+ ASSERT(pos);
+
+ if ( reg < pos || reg >= msix_pba_offset_reg(pos) + 4 )
+ return 0;
+
+ if ( reg != msix_control_reg(pos) || size != 2 )
+ return -EACCES;
+
+ pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
+ if ( pdev->msix->host_maskall )
+ *data |= PCI_MSIX_FLAGS_MASKALL;
+
+ return 1;
+ }
+
return 0;
}
Index: xen-4.5.2-testing/xen/include/asm-x86/msi.h
===================================================================
--- xen-4.5.2-testing.orig/xen/include/asm-x86/msi.h
+++ xen-4.5.2-testing/xen/include/asm-x86/msi.h
@@ -228,6 +228,7 @@ struct arch_msix {
int table_refcnt[MAX_MSIX_TABLE_PAGES];
int table_idx[MAX_MSIX_TABLE_PAGES];
spinlock_t table_lock;
+ bool_t host_maskall, guest_maskall;
domid_t warned;
};

View File

@ -1,355 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 082fdc6ce85e5b603f8fb24553cf200e3b67889f
# Date 2015-07-23 10:14:59 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI-X: be more careful during teardown
When a device gets detached from a guest, pciback will clear its
command register, thus disabling both memory and I/O decoding. The
disabled memory decoding, however, has an effect on the MSI-X table
accesses the hypervisor does: These won't have the intended effect
anymore. Even worse, for PCIe devices (but not SR-IOV virtual
functions) such accesses may (will?) be treated as Unsupported
Requests, causing respective errors to be surfaced, potentially in the
form of NMIs that may be fatal to the hypervisor or Dom0 is different
ways. Hence rather than carrying out these accesses, we should avoid
them where we can, and use alternative (e.g. PCI config space based)
mechanisms to achieve at least the same effect.
At this time it continues to be unclear whether this is fixing an
actual bug or is rather just working around bogus (but apparently
common) system behavior.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
Backporting note (largely to myself):
Depends on (not yet backported to 4.4 and earlier) commit 061eebe0e
"x86/MSI: drop workaround for insecure Dom0 kernels" (due to re-use
of struct arch_msix's warned field).
Index: xen-4.5.2-testing/xen/arch/x86/irq.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/irq.c
+++ xen-4.5.2-testing/xen/arch/x86/irq.c
@@ -217,9 +217,9 @@ void destroy_irq(unsigned int irq)
}
spin_lock_irqsave(&desc->lock, flags);
- desc->status |= IRQ_DISABLED;
desc->status &= ~IRQ_GUEST;
desc->handler->shutdown(desc);
+ desc->status |= IRQ_DISABLED;
action = desc->action;
desc->action = NULL;
desc->msi_desc = NULL;
@@ -995,8 +995,8 @@ void __init release_irq(unsigned int irq
spin_lock_irqsave(&desc->lock,flags);
action = desc->action;
desc->action = NULL;
- desc->status |= IRQ_DISABLED;
desc->handler->shutdown(desc);
+ desc->status |= IRQ_DISABLED;
spin_unlock_irqrestore(&desc->lock,flags);
/* Wait to make sure it's not being used on another CPU */
@@ -1732,8 +1732,8 @@ static irq_guest_action_t *__pirq_guest_
BUG_ON(action->in_flight != 0);
/* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
- desc->status |= IRQ_DISABLED;
desc->handler->disable(desc);
+ desc->status |= IRQ_DISABLED;
/*
* Mark any remaining pending EOIs as ready to flush.
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -123,6 +123,27 @@ static void msix_put_fixmap(struct arch_
spin_unlock(&msix->table_lock);
}
+static bool_t memory_decoded(const struct pci_dev *dev)
+{
+ u8 bus, slot, func;
+
+ if ( !dev->info.is_virtfn )
+ {
+ bus = dev->bus;
+ slot = PCI_SLOT(dev->devfn);
+ func = PCI_FUNC(dev->devfn);
+ }
+ else
+ {
+ bus = dev->info.physfn.bus;
+ slot = PCI_SLOT(dev->info.physfn.devfn);
+ func = PCI_FUNC(dev->info.physfn.devfn);
+ }
+
+ return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
+ PCI_COMMAND_MEMORY);
+}
+
/*
* MSI message composition
*/
@@ -166,7 +187,7 @@ void msi_compose_msg(unsigned vector, co
}
}
-static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static bool_t read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
switch ( entry->msi_attrib.type )
{
@@ -201,6 +222,8 @@ static void read_msi_msg(struct msi_desc
{
void __iomem *base = entry->mask_base;
+ if ( unlikely(!memory_decoded(entry->dev)) )
+ return 0;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
@@ -212,6 +235,8 @@ static void read_msi_msg(struct msi_desc
if ( iommu_intremap )
iommu_read_msi_from_ire(entry, msg);
+
+ return 1;
}
static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
@@ -262,6 +287,8 @@ static int write_msi_msg(struct msi_desc
{
void __iomem *base = entry->mask_base;
+ if ( unlikely(!memory_decoded(entry->dev)) )
+ return -ENXIO;
writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
writel(msg->address_hi,
@@ -289,7 +316,8 @@ void set_msi_affinity(struct irq_desc *d
ASSERT(spin_is_locked(&desc->lock));
memset(&msg, 0, sizeof(msg));
- read_msi_msg(msi_desc, &msg);
+ if ( !read_msi_msg(msi_desc, &msg) )
+ return;
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
@@ -349,23 +377,27 @@ int msi_maskable_irq(const struct msi_de
|| entry->msi_attrib.maskbit;
}
-static void msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
+static bool_t msi_set_mask_bit(struct irq_desc *desc, bool_t host, bool_t guest)
{
struct msi_desc *entry = desc->msi_desc;
+ struct pci_dev *pdev;
+ u16 seg;
+ u8 bus, slot, func;
bool_t flag = host || guest;
ASSERT(spin_is_locked(&desc->lock));
BUG_ON(!entry || !entry->dev);
+ pdev = entry->dev;
+ seg = pdev->seg;
+ bus = pdev->bus;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
switch ( entry->msi_attrib.type )
{
case PCI_CAP_ID_MSI:
if ( entry->msi_attrib.maskbit )
{
u32 mask_bits;
- u16 seg = entry->dev->seg;
- u8 bus = entry->dev->bus;
- u8 slot = PCI_SLOT(entry->dev->devfn);
- u8 func = PCI_FUNC(entry->dev->devfn);
mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
@@ -374,25 +406,54 @@ static void msi_set_mask_bit(struct irq_
}
break;
case PCI_CAP_ID_MSIX:
- {
- int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
- writel(flag, entry->mask_base + offset);
- readl(entry->mask_base + offset);
- break;
- }
+ if ( likely(memory_decoded(pdev)) )
+ {
+ writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ break;
+ }
+ if ( flag )
+ {
+ u16 control;
+ domid_t domid = pdev->domain->domain_id;
+
+ pdev->msix->host_maskall = 1;
+ control = pci_conf_read16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos));
+ if ( control & PCI_MSIX_FLAGS_MASKALL )
+ break;
+ pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | PCI_MSIX_FLAGS_MASKALL);
+ if ( pdev->msix->warned != domid )
+ {
+ pdev->msix->warned = domid;
+ printk(XENLOG_G_WARNING
+ "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+ desc->irq, domid, pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ }
+ break;
+ }
+ /* fall through */
default:
- BUG();
- break;
+ return 0;
}
entry->msi_attrib.host_masked = host;
entry->msi_attrib.guest_masked = guest;
+
+ return 1;
}
static int msi_get_mask_bit(const struct msi_desc *entry)
{
- switch (entry->msi_attrib.type) {
+ if ( !entry->dev )
+ return -1;
+
+ switch ( entry->msi_attrib.type )
+ {
case PCI_CAP_ID_MSI:
- if (!entry->dev || !entry->msi_attrib.maskbit)
+ if ( !entry->msi_attrib.maskbit )
break;
return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
PCI_SLOT(entry->dev->devfn),
@@ -400,6 +461,8 @@ static int msi_get_mask_bit(const struct
entry->msi.mpos) >>
entry->msi_attrib.entry_nr) & 1;
case PCI_CAP_ID_MSIX:
+ if ( unlikely(!memory_decoded(entry->dev)) )
+ break;
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
}
return -1;
@@ -407,12 +470,16 @@ static int msi_get_mask_bit(const struct
void mask_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
+ if ( unlikely(!msi_set_mask_bit(desc, 1,
+ desc->msi_desc->msi_attrib.guest_masked)) )
+ BUG_ON(!(desc->status & IRQ_DISABLED));
}
void unmask_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
+ if ( unlikely(!msi_set_mask_bit(desc, 0,
+ desc->msi_desc->msi_attrib.guest_masked)) )
+ WARN();
}
void guest_mask_msi_irq(struct irq_desc *desc, bool_t mask)
@@ -422,13 +489,15 @@ void guest_mask_msi_irq(struct irq_desc
static unsigned int startup_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
+ if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
+ WARN();
return 0;
}
static void shutdown_msi_irq(struct irq_desc *desc)
{
- msi_set_mask_bit(desc, 1, 1);
+ if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
+ BUG_ON(!(desc->status & IRQ_DISABLED));
}
void ack_nonmaskable_msi_irq(struct irq_desc *desc)
@@ -742,6 +811,9 @@ static int msix_capability_init(struct p
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+ if ( unlikely(!memory_decoded(dev)) )
+ return -ENXIO;
+
if ( desc )
{
entry = alloc_msi_entry(1);
@@ -881,7 +953,8 @@ static int msix_capability_init(struct p
++msix->used_entries;
/* Restore MSI-X enabled bits */
- pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control & ~PCI_MSIX_FLAGS_MASKALL);
return 0;
}
@@ -1026,8 +1099,16 @@ static void __pci_disable_msix(struct ms
BUG_ON(list_empty(&dev->msi_list));
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-
+ if ( likely(memory_decoded(dev)) )
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
+ {
+ printk(XENLOG_WARNING
+ "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
+ entry->irq, dev->seg, dev->bus,
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ control |= PCI_MSIX_FLAGS_MASKALL;
+ }
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
_pci_cleanup_msix(dev->msix);
@@ -1201,15 +1282,24 @@ int pci_restore_msi_state(struct pci_dev
nr = entry->msi.nvec;
}
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+ {
msix_set_enable(pdev, 0);
+ if ( unlikely(!memory_decoded(pdev)) )
+ {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return -ENXIO;
+ }
+ }
msg = entry->msg;
write_msi_msg(entry, &msg);
for ( i = 0; ; )
{
- msi_set_mask_bit(desc, entry[i].msi_attrib.host_masked,
- entry[i].msi_attrib.guest_masked);
+ if ( unlikely(!msi_set_mask_bit(desc,
+ entry[i].msi_attrib.host_masked,
+ entry[i].msi_attrib.guest_masked)) )
+ BUG();
if ( !--nr )
break;

View File

@ -1,337 +0,0 @@
References: bsc#907514 bsc#910258 bsc#918984 bsc#923967
# Commit 0dba393db07331e9cff42df10e95b67547dfdb3e
# Date 2015-07-23 10:15:39 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/MSI-X: access MSI-X table only after having enabled MSI-X
As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
better way") and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -144,6 +144,17 @@ static bool_t memory_decoded(const struc
PCI_COMMAND_MEMORY);
}
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+ u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), msix_control_reg(pos));
+
+ if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+ return 0;
+
+ return memory_decoded(dev);
+}
+
/*
* MSI message composition
*/
@@ -222,7 +233,8 @@ static bool_t read_msi_msg(struct msi_de
{
void __iomem *base = entry->mask_base;
- if ( unlikely(!memory_decoded(entry->dev)) )
+ if ( unlikely(!msix_memory_decoded(entry->dev,
+ entry->msi_attrib.pos)) )
return 0;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -287,7 +299,8 @@ static int write_msi_msg(struct msi_desc
{
void __iomem *base = entry->mask_base;
- if ( unlikely(!memory_decoded(entry->dev)) )
+ if ( unlikely(!msix_memory_decoded(entry->dev,
+ entry->msi_attrib.pos)) )
return -ENXIO;
writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -381,9 +394,9 @@ static bool_t msi_set_mask_bit(struct ir
{
struct msi_desc *entry = desc->msi_desc;
struct pci_dev *pdev;
- u16 seg;
+ u16 seg, control;
u8 bus, slot, func;
- bool_t flag = host || guest;
+ bool_t flag = host || guest, maskall;
ASSERT(spin_is_locked(&desc->lock));
BUG_ON(!entry || !entry->dev);
@@ -406,36 +419,45 @@ static bool_t msi_set_mask_bit(struct ir
}
break;
case PCI_CAP_ID_MSIX:
+ maskall = pdev->msix->host_maskall;
+ control = pci_conf_read16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos));
+ if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+ {
+ pdev->msix->host_maskall = 1;
+ pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+ PCI_MSIX_FLAGS_MASKALL));
+ }
if ( likely(memory_decoded(pdev)) )
{
writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
- break;
+ if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
+ break;
+ flag = 1;
}
- if ( flag )
+ else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
{
- u16 control;
domid_t domid = pdev->domain->domain_id;
- pdev->msix->host_maskall = 1;
- control = pci_conf_read16(seg, bus, slot, func,
- msix_control_reg(entry->msi_attrib.pos));
- if ( control & PCI_MSIX_FLAGS_MASKALL )
- break;
- pci_conf_write16(seg, bus, slot, func,
- msix_control_reg(entry->msi_attrib.pos),
- control | PCI_MSIX_FLAGS_MASKALL);
+ maskall = 1;
if ( pdev->msix->warned != domid )
{
pdev->msix->warned = domid;
printk(XENLOG_G_WARNING
- "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
+ "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
desc->irq, domid, pdev->seg, pdev->bus,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
}
- break;
}
- /* fall through */
+ pdev->msix->host_maskall = maskall;
+ if ( maskall || pdev->msix->guest_maskall )
+ control |= PCI_MSIX_FLAGS_MASKALL;
+ pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos), control);
+ return flag;
default:
return 0;
}
@@ -461,7 +483,8 @@ static int msi_get_mask_bit(const struct
entry->msi.mpos) >>
entry->msi_attrib.entry_nr) & 1;
case PCI_CAP_ID_MSIX:
- if ( unlikely(!memory_decoded(entry->dev)) )
+ if ( unlikely(!msix_memory_decoded(entry->dev,
+ entry->msi_attrib.pos)) )
break;
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
}
@@ -564,9 +587,31 @@ static struct msi_desc *alloc_msi_entry(
int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
{
- return __setup_msi_irq(desc, msidesc,
- msi_maskable_irq(msidesc) ? &pci_msi_maskable
- : &pci_msi_nonmaskable);
+ const struct pci_dev *pdev = msidesc->dev;
+ unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
+ u16 control = ~0;
+ int rc;
+
+ if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
+ {
+ control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos);
+ if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+ pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos,
+ control | (PCI_MSIX_FLAGS_ENABLE |
+ PCI_MSIX_FLAGS_MASKALL));
+ }
+
+ rc = __setup_msi_irq(desc, msidesc,
+ msi_maskable_irq(msidesc) ? &pci_msi_maskable
+ : &pci_msi_nonmaskable);
+
+ if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+ pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos, control);
+
+ return rc;
}
int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
@@ -805,20 +850,38 @@ static int msix_capability_init(struct p
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
+ bool_t maskall = msix->host_maskall;
ASSERT(spin_is_locked(&pcidevs_lock));
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
- msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+ /*
+ * Ensure MSI-X interrupts are masked during setup. Some devices require
+ * MSI-X to be enabled before we can touch the MSI-X registers. We need
+ * to mask all the vectors to prevent interrupts coming in before they're
+ * fully set up.
+ */
+ msix->host_maskall = 1;
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+ PCI_MSIX_FLAGS_MASKALL));
if ( unlikely(!memory_decoded(dev)) )
+ {
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control & ~PCI_MSIX_FLAGS_ENABLE);
return -ENXIO;
+ }
if ( desc )
{
entry = alloc_msi_entry(1);
if ( !entry )
+ {
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control & ~PCI_MSIX_FLAGS_ENABLE);
return -ENOMEM;
+ }
ASSERT(msi);
}
@@ -849,6 +912,8 @@ static int msix_capability_init(struct p
{
if ( !msi || !msi->table_base )
{
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control & ~PCI_MSIX_FLAGS_ENABLE);
xfree(entry);
return -ENXIO;
}
@@ -891,6 +956,8 @@ static int msix_capability_init(struct p
if ( idx < 0 )
{
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control & ~PCI_MSIX_FLAGS_ENABLE);
xfree(entry);
return idx;
}
@@ -917,7 +984,7 @@ static int msix_capability_init(struct p
if ( !msix->used_entries )
{
- msix->host_maskall = 0;
+ maskall = 0;
if ( !msix->guest_maskall )
control &= ~PCI_MSIX_FLAGS_MASKALL;
else
@@ -953,8 +1020,8 @@ static int msix_capability_init(struct p
++msix->used_entries;
/* Restore MSI-X enabled bits */
- pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
- control & ~PCI_MSIX_FLAGS_MASKALL);
+ msix->host_maskall = maskall;
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
return 0;
}
@@ -1094,8 +1161,15 @@ static void __pci_disable_msix(struct ms
PCI_CAP_ID_MSIX);
u16 control = pci_conf_read16(seg, bus, slot, func,
msix_control_reg(entry->msi_attrib.pos));
+ bool_t maskall = dev->msix->host_maskall;
- msix_set_enable(dev, 0);
+ if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+ {
+ dev->msix->host_maskall = 1;
+ pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+ PCI_MSIX_FLAGS_MASKALL));
+ }
BUG_ON(list_empty(&dev->msi_list));
@@ -1107,8 +1181,11 @@ static void __pci_disable_msix(struct ms
"cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
entry->irq, dev->seg, dev->bus,
PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
- control |= PCI_MSIX_FLAGS_MASKALL;
+ maskall = 1;
}
+ dev->msix->host_maskall = maskall;
+ if ( maskall || dev->msix->guest_maskall )
+ control |= PCI_MSIX_FLAGS_MASKALL;
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
_pci_cleanup_msix(dev->msix);
@@ -1257,6 +1334,8 @@ int pci_restore_msi_state(struct pci_dev
list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
{
unsigned int i = 0, nr = 1;
+ u16 control = 0;
+ u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
irq = entry->irq;
desc = &irq_desc[irq];
@@ -1283,10 +1362,18 @@ int pci_restore_msi_state(struct pci_dev
}
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
{
- msix_set_enable(pdev, 0);
+ control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos));
+ pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+ PCI_MSIX_FLAGS_MASKALL));
if ( unlikely(!memory_decoded(pdev)) )
{
spin_unlock_irqrestore(&desc->lock, flags);
+ pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control & ~PCI_MSIX_FLAGS_ENABLE);
return -ENXIO;
}
}
@@ -1316,11 +1403,9 @@ int pci_restore_msi_state(struct pci_dev
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
{
unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
- u16 control = pci_conf_read16(pdev->seg, pdev->bus,
- PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), cpos);
- control &= ~PCI_MSI_FLAGS_QSIZE;
+ control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
+ ~PCI_MSI_FLAGS_QSIZE;
multi_msi_enable(control, entry->msi.nvec);
pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn), cpos, control);
@@ -1328,7 +1413,9 @@ int pci_restore_msi_state(struct pci_dev
msi_set_enable(pdev, 1);
}
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
- msix_set_enable(pdev, 1);
+ pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | PCI_MSIX_FLAGS_ENABLE);
}
return 0;

View File

@ -13,11 +13,11 @@ This allows reverting the main effect of the XSA-129 patches in qemu.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/msi.c
Index: xen-4.6.0-testing/xen/arch/x86/msi.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/msi.c
+++ xen-4.5.2-testing/xen/arch/x86/msi.c
@@ -1305,6 +1305,37 @@ int pci_msi_conf_write_intercept(struct
--- xen-4.6.0-testing.orig/xen/arch/x86/msi.c
+++ xen-4.6.0-testing/xen/arch/x86/msi.c
@@ -1336,6 +1336,37 @@ int pci_msi_conf_write_intercept(struct
return 1;
}

View File

@ -34,11 +34,11 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Index: xen-4.5.2-testing/xen/arch/x86/mm.c
Index: xen-4.6.0-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.5.2-testing.orig/xen/arch/x86/mm.c
+++ xen-4.5.2-testing/xen/arch/x86/mm.c
@@ -508,12 +508,12 @@ void update_cr3(struct vcpu *v)
--- xen-4.6.0-testing.orig/xen/arch/x86/mm.c
+++ xen-4.6.0-testing/xen/arch/x86/mm.c
@@ -502,12 +502,12 @@ void update_cr3(struct vcpu *v)
make_cr3(v, cr3_mfn);
}
@ -53,7 +53,7 @@ Index: xen-4.5.2-testing/xen/arch/x86/mm.c
struct page_info *page;
BUG_ON(unlikely(in_irq()));
@@ -528,10 +528,10 @@ static void invalidate_shadow_ldt(struct
@@ -522,10 +522,10 @@ static void invalidate_shadow_ldt(struct
for ( i = 16; i < 32; i++ )
{
@ -67,7 +67,7 @@ Index: xen-4.5.2-testing/xen/arch/x86/mm.c
ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
ASSERT_PAGE_IS_DOMAIN(page, v->domain);
put_page_and_type(page);
@@ -4366,16 +4366,18 @@ long do_update_va_mapping_otherdomain(un
@@ -4420,16 +4420,18 @@ long do_update_va_mapping_otherdomain(un
void destroy_gdt(struct vcpu *v)
{
l1_pgentry_t *pl1e;
@ -90,7 +90,7 @@ Index: xen-4.5.2-testing/xen/arch/x86/mm.c
v->arch.pv_vcpu.gdt_frames[i] = 0;
}
}
@@ -4388,7 +4390,7 @@ long set_gdt(struct vcpu *v,
@@ -4442,7 +4444,7 @@ long set_gdt(struct vcpu *v,
struct domain *d = v->domain;
l1_pgentry_t *pl1e;
/* NB. There are 512 8-byte entries per GDT page. */

View File

@ -17,9 +17,11 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -946,8 +946,18 @@ void __init x2apic_bsp_setup(void)
Index: xen-4.6.0-testing/xen/arch/x86/apic.c
===================================================================
--- xen-4.6.0-testing.orig/xen/arch/x86/apic.c
+++ xen-4.6.0-testing/xen/arch/x86/apic.c
@@ -943,8 +943,18 @@ void __init x2apic_bsp_setup(void)
mask_8259A();
mask_IO_APIC_setup(ioapic_entries);
@ -39,9 +41,11 @@ Acked-by: Yang Zhang <yang.z.zhang@intel.com>
if ( x2apic_enabled )
panic("Interrupt remapping could not be enabled while "
"x2APIC is already enabled by BIOS");
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -144,10 +144,10 @@ static void set_hpet_source_id(unsigned
Index: xen-4.6.0-testing/xen/drivers/passthrough/vtd/intremap.c
===================================================================
--- xen-4.6.0-testing.orig/xen/drivers/passthrough/vtd/intremap.c
+++ xen-4.6.0-testing/xen/drivers/passthrough/vtd/intremap.c
@@ -143,10 +143,10 @@ static void set_hpet_source_id(unsigned
set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, hpetid_to_bdf(id));
}
@ -54,7 +58,7 @@ Acked-by: Yang Zhang <yang.z.zhang@intel.com>
if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) )
return 0;
@@ -155,12 +155,12 @@ int iommu_supports_eim(void)
@@ -154,12 +154,12 @@ int iommu_supports_eim(void)
/* We MUST have a DRHD unit for each IOAPIC. */
for ( apic = 0; apic < nr_ioapics; apic++ )
if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
@ -69,7 +73,7 @@ Acked-by: Yang Zhang <yang.z.zhang@intel.com>
for_each_drhd_unit ( drhd )
if ( !ecap_queued_inval(drhd->iommu->ecap) ||
@@ -834,10 +834,10 @@ int iommu_enable_x2apic_IR(void)
@@ -833,10 +833,10 @@ int iommu_enable_x2apic_IR(void)
struct iommu *iommu;
if ( !iommu_supports_eim() )
@ -82,7 +86,7 @@ Acked-by: Yang Zhang <yang.z.zhang@intel.com>
for_each_drhd_unit ( drhd )
{
@@ -862,7 +862,7 @@ int iommu_enable_x2apic_IR(void)
@@ -861,7 +861,7 @@ int iommu_enable_x2apic_IR(void)
{
dprintk(XENLOG_INFO VTDPREFIX,
"Failed to enable Queued Invalidation!\n");
@ -91,7 +95,7 @@ Acked-by: Yang Zhang <yang.z.zhang@intel.com>
}
}
@@ -874,7 +874,7 @@ int iommu_enable_x2apic_IR(void)
@@ -873,7 +873,7 @@ int iommu_enable_x2apic_IR(void)
{
dprintk(XENLOG_INFO VTDPREFIX,
"Failed to enable Interrupt Remapping!\n");
@ -100,9 +104,11 @@ Acked-by: Yang Zhang <yang.z.zhang@intel.com>
}
}
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -28,7 +28,7 @@ int iommu_setup_hpet_msi(struct msi_desc
Index: xen-4.6.0-testing/xen/include/asm-x86/iommu.h
===================================================================
--- xen-4.6.0-testing.orig/xen/include/asm-x86/iommu.h
+++ xen-4.6.0-testing/xen/include/asm-x86/iommu.h
@@ -27,7 +27,7 @@ int iommu_setup_hpet_msi(struct msi_desc
/* While VT-d specific, this must get declared in a generic header. */
int adjust_vtd_irq_affinities(void);
void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present);

View File

@ -21,11 +21,11 @@ Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
Index: xen-4.5.1-testing/tools/qemu-xen-dir-remote/block/qcow.c
Index: xen-4.6.0-testing/tools/qemu-xen-dir-remote/block/qcow.c
===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-dir-remote/block/qcow.c
+++ xen-4.5.1-testing/tools/qemu-xen-dir-remote/block/qcow.c
@@ -147,6 +147,14 @@ static int qcow_open(BlockDriverState *b
--- xen-4.6.0-testing.orig/tools/qemu-xen-dir-remote/block/qcow.c
+++ xen-4.6.0-testing/tools/qemu-xen-dir-remote/block/qcow.c
@@ -148,6 +148,14 @@ static int qcow_open(BlockDriverState *b
goto fail;
}

View File

@ -26,9 +26,11 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1045,10 +1045,11 @@ static void noreturn svm_do_resume(struc
Index: xen-4.6.0-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.6.0-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.6.0-testing/xen/arch/x86/hvm/svm/svm.c
@@ -1043,10 +1043,11 @@ static void noreturn svm_do_resume(struc
unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
{
uint32_t intercepts = vmcb_get_exception_intercepts(vmcb);
@ -42,7 +44,7 @@ Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
}
if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
@@ -2435,8 +2436,9 @@ void svm_vmexit_handler(struct cpu_user_
@@ -2434,8 +2435,9 @@ void svm_vmexit_handler(struct cpu_user_
case VMEXIT_EXCEPTION_DB:
if ( !v->domain->debugger_attached )
@ -54,7 +56,7 @@ Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
break;
case VMEXIT_EXCEPTION_BP:
@@ -2484,6 +2486,11 @@ void svm_vmexit_handler(struct cpu_user_
@@ -2483,6 +2485,11 @@ void svm_vmexit_handler(struct cpu_user_
break;
}
@ -66,9 +68,11 @@ Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
case VMEXIT_EXCEPTION_UD:
svm_vmexit_ud_intercept(regs);
break;
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1186,16 +1186,10 @@ static void vmx_update_host_cr3(struct v
Index: xen-4.6.0-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-4.6.0-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-4.6.0-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1224,16 +1224,10 @@ static void vmx_update_host_cr3(struct v
void vmx_update_debug_state(struct vcpu *v)
{
@ -87,10 +91,10 @@ Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
vmx_vmcs_enter(v);
vmx_update_exception_bitmap(v);
@@ -2801,9 +2795,10 @@ void vmx_vmexit_handler(struct cpu_user_
@@ -3041,9 +3035,10 @@ void vmx_vmexit_handler(struct cpu_user_
__vmread(EXIT_QUALIFICATION, &exit_qualification);
HVMTRACE_1D(TRAP_DEBUG, exit_qualification);
write_debugreg(6, exit_qualification | 0xffff0ff0);
write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE);
- if ( !v->domain->debugger_attached || cpu_has_monitor_trap_flag )
- goto exit_and_crash;
- domain_pause_for_debugger();
@ -101,7 +105,7 @@ Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
break;
case TRAP_int3:
{
@@ -2868,6 +2863,11 @@ void vmx_vmexit_handler(struct cpu_user_
@@ -3108,6 +3103,11 @@ void vmx_vmexit_handler(struct cpu_user_
hvm_inject_page_fault(regs->error_code, exit_qualification);
break;
@ -111,11 +115,13 @@ Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+ hvm_inject_hw_exception(vector, ecode);
+ break;
case TRAP_nmi:
if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
(X86_EVENTTYPE_NMI << 8) )
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -378,7 +378,10 @@ static inline int hvm_event_pending(stru
if ( MASK_EXTR(intr_info, INTR_INFO_INTR_TYPE_MASK) !=
X86_EVENTTYPE_NMI )
Index: xen-4.6.0-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-4.6.0-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-4.6.0-testing/xen/include/asm-x86/hvm/hvm.h
@@ -384,7 +384,10 @@ static inline int hvm_event_pending(stru
(X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
/* These exceptions must always be intercepted. */

View File

@ -8,11 +8,11 @@ This is XSA-149.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
Index: xen-4.5.2-testing/xen/common/domain.c
Index: xen-4.6.0-testing/xen/common/domain.c
===================================================================
--- xen-4.5.2-testing.orig/xen/common/domain.c
+++ xen-4.5.2-testing/xen/common/domain.c
@@ -406,6 +406,7 @@ struct domain *domain_create(
--- xen-4.6.0-testing.orig/xen/common/domain.c
+++ xen-4.6.0-testing/xen/common/domain.c
@@ -412,6 +412,7 @@ struct domain *domain_create(domid_t dom
if ( init_status & INIT_xsm )
xsm_free_security_domain(d);
free_cpumask_var(d->domain_dirty_cpumask);

View File

@ -20,10 +20,10 @@ git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5076 c046a42c-6fe2-441c-8c8
vnc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 50 insertions(+), 9 deletions(-)
Index: xen-4.5.2-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.5.2-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.5.2-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1285,35 +1285,22 @@ static void press_key_altgr_down(VncStat
}
}

View File

@ -10,11 +10,11 @@ Signed-off-by: Olaf Hering <olaf@aepfle.de>
xen/include/public/arch-arm.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index 7496556..95f2a7c 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -342,13 +342,13 @@ typedef uint64_t xen_callback_t;
Index: xen-4.6.0-testing/xen/include/public/arch-arm.h
===================================================================
--- xen-4.6.0-testing.orig/xen/include/public/arch-arm.h
+++ xen-4.6.0-testing/xen/include/public/arch-arm.h
@@ -365,13 +365,13 @@ typedef uint64_t xen_callback_t;
/* 64 bit modes */
#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */

View File

@ -1,24 +1,3 @@
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -225,6 +225,7 @@ static int open_disk(struct td_state *s,
BlockDriver* drv;
char* devname;
static int devnumber = 0;
+ int flags = readonly ? BDRV_O_RDONLY : BDRV_O_RDWR;
int i;
DPRINTF("Opening %s as blktap%d\n", path, devnumber);
@@ -247,7 +248,7 @@ static int open_disk(struct td_state *s,
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
/* Open the image */
- if (bdrv_open2(bs, path, 0, drv) != 0) {
+ if (bdrv_open2(bs, path, flags, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c

View File

@ -1,22 +0,0 @@
--- xen-4.5.0-testing/tools/blktap/drivers/Makefile.orig 2015-04-08 09:20:08.817373085 +0000
+++ xen-4.5.0-testing/tools/blktap/drivers/Makefile 2015-04-08 09:20:55.738897365 +0000
@@ -6,6 +6,9 @@
CFLAGS += -Werror
CFLAGS += -Wno-unused
+ifeq ($(call cc-ver,$(CC),0x040900),y)
+$(call cc-option-add,CFLAGS,CC,-Wno-error=maybe-uninitialized)
+endif
CFLAGS += -I../lib
CFLAGS += $(CFLAGS_libxenctrl)
CFLAGS += $(CFLAGS_libxenstore)
--- xen-4.5.0-testing/tools/blktap2/drivers/Makefile.orig 2015-04-08 11:25:54.974241326 +0200
+++ xen-4.5.0-testing/tools/blktap2/drivers/Makefile 2015-04-08 11:26:10.150411238 +0200
@@ -11,6 +11,7 @@
CFLAGS += -Werror
CFLAGS += -Wno-unused
+$(call cc-option-add,CFLAGS,CC,-Wno-error=array-bounds)
CFLAGS += -fno-strict-aliasing
CFLAGS += -I$(BLKTAP_ROOT)/include -I$(BLKTAP_ROOT)/drivers
CFLAGS += $(CFLAGS_libxenctrl)

View File

@ -1,834 +0,0 @@
---
tools/blktap/drivers/Makefile | 6
tools/blktap/drivers/block-cdrom.c | 565 +++++++++++++++++++++++
tools/blktap/drivers/tapdisk.c | 16
tools/blktap/drivers/tapdisk.h | 16
tools/blktap/lib/blktaplib.h | 1
xen/include/public/io/blkif.h | 2
xen/include/public/io/cdromif.h | 122 ++++
7 files changed, 726 insertions(+), 3 deletions(-)
Index: xen-4.5.1-testing/tools/blktap/drivers/Makefile
===================================================================
--- xen-4.5.1-testing.orig/tools/blktap/drivers/Makefile
+++ xen-4.5.1-testing/tools/blktap/drivers/Makefile
@@ -35,8 +35,9 @@ AIOLIBS := -laio
CFLAGS += $(PTHREAD_CFLAGS)
LDFLAGS += $(PTHREAD_LDFLAGS)
-LDLIBS_blktapctrl := $(MEMSHRLIBS) $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) -L../lib -lblktap -lrt -lm $(PTHREAD_LIBS)
-LDLIBS_img := $(AIOLIBS) $(CRYPT_LIB) $(PTHREAD_LIBS) -lz
+LDLIBS_xen := $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore)
+LDLIBS_blktapctrl := $(MEMSHRLIBS) $(LDLIBS_xen) -L../lib -lblktap -lrt -lm $(PTHREAD_LIBS)
+LDLIBS_img := $(AIOLIBS) $(CRYPT_LIB) $(PTHREAD_LIBS) -lz $(LDLIBS_xen)
BLK-OBJS-y := block-aio.o
BLK-OBJS-y += block-sync.o
@@ -44,6 +45,7 @@ BLK-OBJS-y += block-vmdk.o
BLK-OBJS-y += block-ram.o
BLK-OBJS-y += block-qcow.o
BLK-OBJS-y += block-qcow2.o
+BLK-OBJS-y += block-cdrom.o
BLK-OBJS-y += aes.o
BLK-OBJS-y += tapaio.o
BLK-OBJS-$(CONFIG_Linux) += blk_linux.o
Index: xen-4.5.1-testing/tools/blktap/drivers/block-cdrom.c
===================================================================
--- /dev/null
+++ xen-4.5.1-testing/tools/blktap/drivers/block-cdrom.c
@@ -0,0 +1,568 @@
+/* block-cdrom.c
+ *
+ * simple slow synchronous cdrom disk implementation. Based off
+ * of block-sync.c
+ *
+ * (c) 2006 Andrew Warfield and Julian Chesterfield
+ * (c) 2008 Novell Inc. <plc@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+
+#include "tapdisk.h"
+#include <xen/io/cdromif.h>
+
+struct tdcdrom_state {
+ int fd;
+ int xs_fd; /* for xen event polling */
+ int media_present;
+ int media_changed;
+ struct xs_handle *xs_handle;
+ char *dev_name;
+ int dev_type;
+ td_flag_t flags;
+};
+
+#define BLOCK_DEVICE 0
+#define FILE_DEVICE 1
+#define CDROM_DEFAULT_SECTOR_SIZE 2048
+#define CDROM_DEFAULT_SIZE 2000000000
+
+/*Get Image size, secsize*/
+static void get_image_info(struct disk_driver *dd)
+{
+ int ret;
+ long size;
+ unsigned long total_size;
+ struct statvfs statBuf;
+ struct stat stat;
+ struct td_state *s = dd->td_state;
+ struct tdcdrom_state *prv = dd->private;
+
+ s->size = 0;
+ s->sector_size = CDROM_DEFAULT_SECTOR_SIZE;
+ s->info = (VDISK_CDROM | VDISK_REMOVABLE | VDISK_READONLY);
+ prv->media_present = 0;
+
+ ret = fstat(prv->fd, &stat);
+ if (ret != 0) {
+ DPRINTF("ERROR: fstat failed, Couldn't stat image");
+ return;
+ }
+
+ if (S_ISBLK(stat.st_mode)) {
+ /*Accessing block device directly*/
+ int status;
+
+ prv->dev_type = BLOCK_DEVICE;
+ status = ioctl(prv->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+ if (status == CDS_DISC_OK) {
+ prv->media_present = 1;
+ if ((ret =ioctl(prv->fd,BLKGETSIZE,&s->size))!=0) {
+ DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
+ s->size = CDROM_DEFAULT_SIZE;
+ }
+ }
+ else {
+ s->size = CDROM_DEFAULT_SIZE;
+ }
+ /*Get the sector size*/
+#if defined(BLKSSZGET)
+ {
+ int arg;
+ s->sector_size = CDROM_DEFAULT_SECTOR_SIZE;
+ ioctl(prv->fd, BLKSSZGET, &s->sector_size);
+
+ if (s->sector_size != CDROM_DEFAULT_SECTOR_SIZE)
+ DPRINTF("Note: sector size is %llu (not %d)\n",
+ (long long unsigned)s->sector_size,
+ CDROM_DEFAULT_SECTOR_SIZE);
+ }
+#else
+ s->sector_size = CDROM_DEFAULT_SECTOR_SIZE;
+#endif
+ DPRINTF("Block Device: Image size: %llu"
+ " media_present: %d sector_size: %llu\n",
+ (long long unsigned)s->size, prv->media_present,
+ (long long unsigned)s->sector_size);
+ } else {
+ /*Local file? try fstat instead*/
+ prv->dev_type = FILE_DEVICE;
+ prv->media_present = 1;
+ s->size = (stat.st_size >> SECTOR_SHIFT);
+ s->sector_size = DEFAULT_SECTOR_SIZE;
+ DPRINTF("Local File: Image size: %llu\n",
+ (long long unsigned)s->size);
+ }
+ return;
+}
+
+static inline void init_fds(struct disk_driver *dd)
+{
+ int i;
+ struct tdcdrom_state *prv = dd->private;
+
+ for(i = 0; i < MAX_IOFD; i++)
+ dd->io_fd[i] = 0;
+
+ prv->xs_handle = xs_daemon_open();
+ prv->xs_fd = xs_fileno(prv->xs_handle);
+ dd->io_fd[0] = prv->xs_fd;
+}
+
+void open_device (struct disk_driver *dd)
+{
+ struct tdcdrom_state *prv = dd->private;
+ int o_flags;
+
+ o_flags = O_NONBLOCK | O_LARGEFILE |
+ ((prv->flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
+
+ if (prv->fd < 0) {
+ prv->fd = open(prv->dev_name, o_flags);
+ if (prv->fd == -1) {
+ DPRINTF("Unable tp open: (%s)\n", prv->dev_name);
+ return;
+ }
+ }
+
+ if (prv->fd != -1) {
+
+ get_image_info(dd);
+
+ if (prv->dev_type == BLOCK_DEVICE) {
+ int status;
+ status = ioctl(prv->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+ switch (status) {
+ case CDS_DISC_OK:
+ prv->media_present = 1;
+ break;
+ default:
+ prv->media_present = 0;
+ }
+ }
+ else
+ prv->media_present = 1;
+ }
+}
+
+/*
+ * Main entry point, called when first loaded
+ */
+int tdcdrom_open (struct disk_driver *dd, const char *name, td_flag_t flags)
+{
+ int ret;
+ struct tdcdrom_state *prv = dd->private;
+
+ ret = asprintf(&prv->dev_name, "%s", name);
+ if (ret < 0) {
+ prv->dev_name = NULL;
+ goto out;
+ }
+ prv->fd = -1;
+ prv->media_changed = 0;
+ prv->media_present = 0;
+ prv->flags = flags;
+ init_fds(dd);
+
+ open_device(dd);
+
+out:
+ return ret;
+}
+
+int tdcdrom_queue_read(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct td_state *s = dd->td_state;
+ struct tdcdrom_state *prv = dd->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret;
+
+ if (prv->fd == -1 || prv->media_present == 0) {
+ ret = 0 - ENOMEDIUM;
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+ }
+ size = nb_sectors * 512;
+ offset = sector * (uint64_t)512;
+ ret = lseek(prv->fd, offset, SEEK_SET);
+ if (ret != (off_t)-1) {
+ ret = read(prv->fd, buf, size);
+ if (ret != size) {
+ ret = 0 - errno;
+ } else {
+ ret = 1;
+ }
+ } else ret = 0 - errno;
+
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+}
+
+int tdcdrom_queue_write(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct td_state *s = dd->td_state;
+ struct tdcdrom_state *prv = dd->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret = 0;
+
+ if (prv->fd == -1 || prv->media_present == 0) {
+ ret = 0 - ENOMEDIUM;
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+ }
+ ret = lseek(prv->fd, offset, SEEK_SET);
+ if (ret != (off_t)-1) {
+ ret = write(prv->fd, buf, size);
+ if (ret != size) {
+ ret = 0 - errno;
+ } else {
+ ret = 1;
+ }
+ } else ret = 0 - errno;
+
+ return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private);
+}
+
+int tdcdrom_queue_packet(struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *private)
+{
+ struct td_state *s = dd->td_state;
+ struct tdcdrom_state *prv = dd->private;
+ int size = nb_sectors * s->sector_size;
+ uint64_t offset = sector * (uint64_t)s->sector_size;
+ int ret = 0;
+
+ union xen_block_packet *sp;
+ struct xen_cdrom_packet *xcp;
+ struct xen_cdrom_support *xcs;
+ struct xen_cdrom_open *xco;
+ struct xen_cdrom_media_info *xcmi;
+ struct xen_cdrom_media_changed *xcmc;
+ struct cdrom_generic_command cgc;
+ struct vcd_generic_command * vgc;
+ struct request_sense sense;
+
+ sp = (union xen_block_packet *)buf;
+ switch(sp->type) {
+ case XEN_TYPE_CDROM_SUPPORT:
+ xcs = &(sp->xcs);
+ xcs->err = 0;
+ xcs->ret = 0;
+ xcs->supported = 1;
+ break;
+ case XEN_TYPE_CDROM_PACKET:
+ xcp = &(sp->xcp);
+ xcp->err = 0;
+ xcp->ret = 0;
+ vgc = (struct vcd_generic_command *)(buf + PACKET_PAYLOAD_OFFSET);
+
+ memset( &cgc, 0, sizeof(struct cdrom_generic_command));
+ memcpy(cgc.cmd, vgc->cmd, CDROM_PACKET_SIZE);
+ cgc.stat = vgc->stat;
+ cgc.data_direction = vgc->data_direction;
+ cgc.quiet = vgc->quiet;
+ cgc.timeout = vgc->timeout;
+
+ if (prv->fd == -1) {
+ xcp = &(sp->xcp);
+ xcp->ret = -1;
+ xcp->err = 0 - ENODEV;
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+ }
+ if (prv->dev_type == FILE_DEVICE) {
+ DPRINTF("%s() FILE_DEVICE inappropriate packetcmd \n",__func__);
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+ }
+ switch ( cgc.cmd[0]) {
+ case GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL:
+ {
+ int lock;
+ lock = cgc.cmd[4] & 1;
+ if (ioctl (prv->fd, CDROM_LOCKDOOR, lock) < 0) {
+ xcp->err = -(errno);
+ xcp->ret = -1;
+ }
+ }
+ break;
+ case GPCMD_START_STOP_UNIT:
+ {
+ int start, eject;
+ start = cgc.cmd[4] & 1;
+ eject = (cgc.cmd[4] >> 1) & 1;
+ if (eject && !start) {
+ if (ioctl (prv->fd, CDROMEJECT, NULL) < 0) {
+ xcp->err = -(errno);
+ xcp->ret = -1;
+ }
+ } else if (eject && start) {
+ if (ioctl (prv->fd, CDROMCLOSETRAY, NULL) < 0) {
+ xcp->err = -(errno);
+ xcp->ret = -1;
+ }
+ }
+ }
+ break;
+ default:
+ {
+ if (vgc->sense_offset) {
+ cgc.sense = &sense;
+ }
+ if (vgc->buffer_offset) {
+ cgc.buffer = malloc(vgc->buflen);
+ memcpy(cgc.buffer, (char *)sp + PACKET_BUFFER_OFFSET, vgc->buflen);
+ cgc.buflen = vgc->buflen;
+ }
+ if (ioctl (prv->fd, CDROM_SEND_PACKET, &cgc) < 0 ) {
+ xcp->err = -(errno);
+ xcp->ret = -1;
+ }
+ if (cgc.sense) {
+ memcpy((char *)sp + PACKET_SENSE_OFFSET, cgc.sense, sizeof(struct request_sense));
+ }
+ if (cgc.buffer) {
+ vgc->buflen = cgc.buflen;
+ memcpy((char *)sp + PACKET_BUFFER_OFFSET, cgc.buffer, cgc.buflen);
+ free(cgc.buffer);
+ }
+ break;
+ }
+ }
+ break;
+ case XEN_TYPE_CDROM_OPEN:
+ {
+ unsigned int len;
+ struct stat statbuf;
+ int major = 0;
+ int minor = 0;
+
+ if (stat (prv->dev_name, &statbuf) == 0) {
+ major = major (statbuf.st_rdev);
+ minor = minor (statbuf.st_rdev);
+ }
+ xco = &(sp->xco);
+ xco->err = 0;
+ xco->ret = 0;
+ if (xco->payload_offset) {
+ char *present;
+ char *buf;
+ char *num;
+ char *nodename;
+ char media_present[2];
+ nodename = (char *)sp + xco->payload_offset;
+ if (asprintf(&buf, "%s/media-present", nodename) < 0)
+ goto out_payload_offset;
+ present = xs_read(prv->xs_handle, XBT_NULL, buf, &len);
+ if (present) {
+ free(buf);
+ goto out_payload_offset_free;
+ }
+
+ sprintf(media_present, "%d", prv->media_present);
+ xs_write(prv->xs_handle, XBT_NULL, buf, media_present, strlen(media_present));
+ xs_watch(prv->xs_handle, buf, "media-present");
+ free(buf);
+
+ if (asprintf(&buf, "%s/params", nodename) < 0)
+ goto out_payload_offset_free;
+ xs_watch(prv->xs_handle, buf, "params");
+ free(buf);
+
+ if (asprintf(&num, "%x:%x", major, minor) < 0)
+ goto out_payload_offset_free;
+ if (asprintf(&buf, "%s/physical-device", nodename) < 0) {
+ free(num);
+ goto out_payload_offset_free;
+ }
+ xs_write(prv->xs_handle, XBT_NULL, buf, num, strlen(num));
+ free(buf);
+ free(num);
+out_payload_offset_free:
+ free(present);
+out_payload_offset:
+ ;
+ }
+
+ xco->media_present = prv->media_present;
+ xco->sectors = 0;
+ xco->sector_size = 2048;
+ if (prv->media_present && prv->fd != -1 ) {
+ get_image_info(dd);
+ xco->sectors = s->size;
+ xco->sector_size = s->sector_size;
+ }
+ }
+ break;
+ case XEN_TYPE_CDROM_MEDIA_CHANGED:
+ xcmc = &(sp->xcmc);
+ xcmc->err = 0;
+ xcmc->ret = 0;
+ xcmc->media_changed = prv->media_changed;
+ prv->media_changed = 0;
+ break;
+ default:
+ xcp = &(sp->xcp);
+ xcp->err = -EINVAL;
+ xcp->ret = -1;
+ break;
+ }
+
+ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
+}
+
+int tdcdrom_submit(struct disk_driver *dd)
+{
+ return 0;
+}
+
+int tdcdrom_close(struct disk_driver *dd)
+{
+ struct tdcdrom_state *prv = dd->private;
+
+ if (prv->fd != -1) {
+ close(prv->fd);
+ prv->fd = -1;
+ }
+ prv->xs_fd = -1;
+ xs_daemon_close(prv->xs_handle);
+ free(prv->dev_name);
+
+ return 0;
+}
+
+void tdcdrom_process_media_change_event(struct disk_driver *dd, char **vec)
+{
+ struct tdcdrom_state *prv = dd->private;
+ char *media_present;
+ unsigned int len;
+
+ media_present = xs_read(prv->xs_handle, XBT_NULL, vec[XS_WATCH_PATH], &len);
+ if (media_present == NULL)
+ return;
+
+ if (strcmp(media_present, "0") == 0) {
+ close(prv->fd);
+ prv->fd = -1;
+ prv->media_present = 0;
+ }
+ else {
+ open_device(dd);
+ prv->media_changed = 1;
+ }
+ free(media_present);
+}
+
+void tdcrom_process_params_event(struct disk_driver *dd, char **vec)
+{
+ struct tdcdrom_state *prv = dd->private;
+ char *params;
+ unsigned int len;
+
+ params = xs_read(prv->xs_handle, XBT_NULL, vec[XS_WATCH_PATH], &len);
+ if (params) {
+ char *cp = strchr(params, ':');
+ if (cp) {
+ cp++;
+ if (prv->dev_name)
+ free(prv->dev_name);
+ if (asprintf(&prv->dev_name, "%s", cp) < 0) {
+ prv->dev_name = NULL;
+ return;
+ }
+ if (prv->fd != -1) {
+ close(prv->fd);
+ prv->fd = -1;
+ }
+ open_device(dd);
+ prv->media_changed = 1;
+ }
+ free(params);
+ }
+}
+
+int tdcdrom_do_callbacks(struct disk_driver *dd, int sid)
+{
+ struct tdcdrom_state *prv = dd->private;
+ char **vec;
+ unsigned int num;
+
+ vec = xs_read_watch(prv->xs_handle, &num);
+ if (!vec)
+ return 1;
+
+ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) {
+ tdcdrom_process_media_change_event(dd, vec);
+ goto out;
+ }
+
+ if (!strcmp(vec[XS_WATCH_TOKEN], "params")) {
+ tdcrom_process_params_event(dd, vec);
+ goto out;
+ }
+
+ out:
+ free(vec);
+ return 1;
+}
+
+int tdcdrom_get_parent_id(struct disk_driver *dd, struct disk_id *id)
+{
+ return TD_NO_PARENT;
+}
+
+int tdcdrom_validate_parent(struct disk_driver *dd,
+ struct disk_driver *parent, td_flag_t flags)
+{
+ return -EINVAL;
+}
+
+struct tap_disk tapdisk_cdrom = {
+ .disk_type = "tapdisk_cdrom",
+ .private_data_size = sizeof(struct tdcdrom_state),
+ .td_open = tdcdrom_open,
+ .td_queue_read = tdcdrom_queue_read,
+ .td_queue_packet = tdcdrom_queue_packet,
+ .td_queue_write = tdcdrom_queue_write,
+ .td_submit = tdcdrom_submit,
+ .td_close = tdcdrom_close,
+ .td_do_callbacks = tdcdrom_do_callbacks,
+ .td_get_parent_id = tdcdrom_get_parent_id,
+ .td_validate_parent = tdcdrom_validate_parent
+};
Index: xen-4.5.1-testing/tools/blktap/drivers/tapdisk.c
===================================================================
--- xen-4.5.1-testing.orig/tools/blktap/drivers/tapdisk.c
+++ xen-4.5.1-testing/tools/blktap/drivers/tapdisk.c
@@ -735,6 +735,22 @@ static void get_io_request(struct td_sta
goto out;
}
break;
+ case BLKIF_OP_PACKET:
+ ret = 0;
+ if (drv->td_queue_packet)
+ ret = drv->td_queue_packet(dd, sector_nr,
+ nsects, page,
+ send_responses,
+ idx, (void *)(long)i);
+ if (ret > 0) dd->early += ret;
+ else if (ret == -EBUSY) {
+ /* put req back on queue */
+ --info->fe_ring.req_cons;
+ info->busy.req = req;
+ info->busy.seg_idx = i;
+ goto out;
+ }
+ break;
default:
DPRINTF("Unknown block operation\n");
break;
Index: xen-4.5.1-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-4.5.1-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.5.1-testing/tools/blktap/drivers/tapdisk.h
@@ -137,6 +137,9 @@ struct tap_disk {
int (*td_get_parent_id) (struct disk_driver *dd, struct disk_id *id);
int (*td_validate_parent)(struct disk_driver *dd,
struct disk_driver *p, td_flag_t flags);
+ int (*td_queue_packet) (struct disk_driver *dd, uint64_t sector,
+ int nb_sectors, char *buf, td_callback_t cb,
+ int id, void *prv);
};
typedef struct disk_info {
@@ -160,6 +163,7 @@ extern struct tap_disk tapdisk_vmdk;
extern struct tap_disk tapdisk_ram;
extern struct tap_disk tapdisk_qcow;
extern struct tap_disk tapdisk_qcow2;
+extern struct tap_disk tapdisk_cdrom;
/*Define Individual Disk Parameters here */
@@ -229,6 +233,17 @@ static disk_info_t qcow2_disk = {
#endif
};
+static disk_info_t cdrom_disk = {
+ DISK_TYPE_CDROM,
+ "raw image (cdrom)",
+ "cdrom",
+ 0,
+ 0,
+#ifdef TAPDISK
+ &tapdisk_cdrom,
+#endif
+};
+
/*Main disk info array */
static disk_info_t *dtypes[] = {
&aio_disk,
@@ -237,6 +252,7 @@ static disk_info_t *dtypes[] = {
&ram_disk,
&qcow_disk,
&qcow2_disk,
+ &cdrom_disk,
};
typedef struct driver_list_entry {
Index: xen-4.5.1-testing/tools/blktap/lib/blktaplib.h
===================================================================
--- xen-4.5.1-testing.orig/tools/blktap/lib/blktaplib.h
+++ xen-4.5.1-testing/tools/blktap/lib/blktaplib.h
@@ -219,6 +219,7 @@ typedef struct msg_pid {
#define DISK_TYPE_RAM 3
#define DISK_TYPE_QCOW 4
#define DISK_TYPE_QCOW2 5
+#define DISK_TYPE_CDROM 6
/* xenstore/xenbus: */
#define DOMNAME "Domain-0"
Index: xen-4.5.1-testing/xen/include/public/io/blkif.h
===================================================================
--- xen-4.5.1-testing.orig/xen/include/public/io/blkif.h
+++ xen-4.5.1-testing/xen/include/public/io/blkif.h
@@ -485,7 +485,7 @@
* Used in SLES sources for device specific command packet
* contained within the request. Reserved for that purpose.
*/
-#define BLKIF_OP_RESERVED_1 4
+#define BLKIF_OP_PACKET 4
/*
* Indicate to the backend device that a region of storage is no longer in
* use, and may be discarded at any time without impact to the client. If
Index: xen-4.5.1-testing/xen/include/public/io/cdromif.h
===================================================================
--- /dev/null
+++ xen-4.5.1-testing/xen/include/public/io/cdromif.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * cdromif.h
+ *
+ * Shared definitions between backend driver and Xen guest Virtual CDROM
+ * block device.
+ *
+ * Copyright (c) 2008, Pat Campell plc@novell.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_IO_CDROMIF_H__
+#define __XEN_PUBLIC_IO_CDROMIF_H__
+
+#include <linux/cdrom.h>
+
+/*
+ * Queries backend for CDROM support
+ */
+#define XEN_TYPE_CDROM_SUPPORT _IO('c', 1)
+
+struct xen_cdrom_support
+{
+ uint32_t type;
+ int8_t ret; /* returned, 0 succeded, -1 error */
+ int8_t err; /* returned, backend errno */
+ int8_t supported; /* returned, 1 supported */
+};
+
+/*
+ * Opens backend device, returns drive geometry or
+ * any encountered errors
+ */
+#define XEN_TYPE_CDROM_OPEN _IO('c', 2)
+
+struct xen_cdrom_open
+{
+ uint32_t type;
+ int8_t ret;
+ int8_t err;
+ int8_t pad;
+ int8_t media_present; /* returned */
+ uint32_t sectors; /* returned */
+ uint32_t sector_size; /* returned */
+ int32_t payload_offset; /* offset to backend node name payload */
+};
+
+/*
+ * Queries backend for media changed status
+ */
+#define XEN_TYPE_CDROM_MEDIA_CHANGED _IO('c', 3)
+
+struct xen_cdrom_media_changed
+{
+ uint32_t type;
+ int8_t ret;
+ int8_t err;
+ int8_t media_changed; /* returned */
+};
+
+/*
+ * Sends vcd generic CDROM packet to backend, followed
+ * immediately by the vcd_generic_command payload
+ */
+#define XEN_TYPE_CDROM_PACKET _IO('c', 4)
+
+struct xen_cdrom_packet
+{
+ uint32_t type;
+ int8_t ret;
+ int8_t err;
+ int8_t pad[2];
+ int32_t payload_offset; /* offset to struct vcd_generic_command payload */
+};
+
+/* CDROM_PACKET_COMMAND, payload for XEN_TYPE_CDROM_PACKET */
+struct vcd_generic_command
+{
+ uint8_t cmd[CDROM_PACKET_SIZE];
+ uint8_t pad[4];
+ uint32_t buffer_offset;
+ uint32_t buflen;
+ int32_t stat;
+ uint32_t sense_offset;
+ uint8_t data_direction;
+ uint8_t pad1[3];
+ int32_t quiet;
+ int32_t timeout;
+};
+
+union xen_block_packet
+{
+ uint32_t type;
+ struct xen_cdrom_support xcs;
+ struct xen_cdrom_open xco;
+ struct xen_cdrom_media_changed xcmc;
+ struct xen_cdrom_packet xcp;
+};
+
+#define PACKET_PAYLOAD_OFFSET (sizeof(struct xen_cdrom_packet))
+#define PACKET_SENSE_OFFSET (PACKET_PAYLOAD_OFFSET + sizeof(struct vcd_generic_command))
+#define PACKET_BUFFER_OFFSET (PACKET_SENSE_OFFSET + sizeof(struct request_sense))
+#define MAX_PACKET_DATA (PAGE_SIZE - sizeof(struct xen_cdrom_packet) - \
+ sizeof(struct vcd_generic_command) - sizeof(struct request_sense))
+
+#endif

View File

@ -1,10 +1,10 @@
bug #239173
bug #242953
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -447,9 +447,9 @@ void xenstore_parse_domain_config(int hv
{
char **e_danger = NULL;
@ -32,7 +32,7 @@ Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -762,6 +770,7 @@ void xenstore_parse_domain_config(int hv
@@ -760,6 +768,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);

10
blktap2-no-uninit.patch Normal file
View File

@ -0,0 +1,10 @@
--- xen-4.5.0-testing/tools/blktap2/drivers/Makefile.orig 2015-04-08 11:25:54.974241326 +0200
+++ xen-4.5.0-testing/tools/blktap2/drivers/Makefile 2015-04-08 11:26:10.150411238 +0200
@@ -11,6 +11,7 @@
CFLAGS += -Werror
CFLAGS += -Wno-unused
+CFLAGS += -Wno-error=array-bounds
CFLAGS += -fno-strict-aliasing
CFLAGS += -I$(BLKTAP_ROOT)/include -I$(BLKTAP_ROOT)/drivers
CFLAGS += $(CFLAGS_libxenctrl)

View File

@ -1,13 +0,0 @@
Index: xen-4.4.0-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.4.0-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.4.0-testing/tools/blktap/drivers/blktapctrl.c
@@ -282,7 +282,7 @@ static int del_disktype(blkif_t *blkif)
* qemu-dm instance. We may close the file handle only if there is
* no other disk left for this domain.
*/
- if (dtypes[type]->use_ioemu)
+ if (dtypes[type]->use_ioemu && dtypes[type]->idnum != DISK_TYPE_AIO)
return !qemu_instance_has_disks(blkif->tappid);
/* Caller should close() if no single controller, or list is empty. */

View File

@ -1,49 +0,0 @@
Index: xen-4.2.0-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-4.2.0-testing/tools/blktap/drivers/tapdisk.h
@@ -168,7 +168,7 @@ static disk_info_t aio_disk = {
"raw image (aio)",
"aio",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_aio,
#endif
@@ -179,7 +179,7 @@ static disk_info_t sync_disk = {
"raw image (sync)",
"sync",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_sync,
#endif
@@ -190,7 +190,7 @@ static disk_info_t vmdk_disk = {
"vmware image (vmdk)",
"vmdk",
1,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_vmdk,
#endif
@@ -212,7 +212,7 @@ static disk_info_t qcow_disk = {
"qcow disk (qcow)",
"qcow",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow,
#endif
@@ -223,7 +223,7 @@ static disk_info_t qcow2_disk = {
"qcow2 disk (qcow2)",
"qcow2",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow2,
#endif

View File

@ -1,27 +0,0 @@
Index: xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-4.2.0-testing/tools/blktap/drivers/blktapctrl.c
@@ -61,6 +61,9 @@
#include "list.h"
#include "xs_api.h" /* for xs_fire_next_watch() */
+#undef DPRINTF
+#define DPRINTF(_f, _a...) ((void)0)
+
#define PIDFILE "/var/run/blktapctrl.pid"
#define NUM_POLL_FDS 2
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -46,7 +46,7 @@
#define BLKTAP_CTRL_DIR "/var/run/tap"
/* If enabled, print debug messages to stderr */
-#if 1
+#if 0
#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a)
#else
#define DPRINTF(_f, _a...) ((void)0)

View File

@ -1,143 +0,0 @@
From f1ebeae7802a5775422004f62630c42e46dcf664 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:32:40 +0100
Subject: [PATCH 3/6] ioemu: Build tapdisk-ioemu binary
When changing away from the old ioemu, changes in the Makefiles
resulted in tapdisk-ioemu appearing there, but actually not
being built. This patch re-enables the build of tapdisk-ioemu.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
@@ -1,3 +1,4 @@
+CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc
CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/include
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/Makefile
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/Makefile
@@ -46,14 +46,6 @@ $(filter %-user,$(SUBDIR_RULES)): libqem
recurse-all: $(SUBDIR_RULES)
-CPPFLAGS += -I$(XEN_ROOT)/tools/libxc/include
-CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
-CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore/include
-CPPFLAGS += -I$(XEN_ROOT)/tools/include
-
-tapdisk-ioemu: tapdisk-ioemu.c cutils.c block.c block-raw.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c block-vpc.c block-vvfat.c block-qcow2.c hw/xen_blktap.c osdep.c
- $(CC) -DQEMU_TOOL $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) $(LDFLAGS) $(BASE_LDFLAGS) -o $@ $^ -lz $(LIBS)
-
#######################################################################
# BLOCK_OBJS is code used by both qemu system emulation and qemu-img
@@ -72,6 +64,21 @@ endif
BLOCK_OBJS += block-raw-posix.o
endif
+#######################################################################
+# tapdisk-ioemu
+
+hw/tapdisk-xen_blktap.o: hw/xen_blktap.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+tapdisk-ioemu.o: tapdisk-ioemu.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $<
+
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/libxc/include
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore/include
+tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/include
+tapdisk-ioemu: tapdisk-ioemu.o $(BLOCK_OBJS) qemu-tool.o hw/tapdisk-xen_blktap.o
+ $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS)
+
######################################################################
# libqemu_common.a: Target independent part of system emulation. The
# long term path is to suppress *all* target specific code in case of
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/configure
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/configure
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/configure
@@ -1512,7 +1512,7 @@ bsd)
;;
esac
-tools=
+tools="tapdisk-ioemu"
if test `expr "$target_list" : ".*softmmu.*"` != 0 ; then
tools="qemu-img\$(EXESUF) $tools"
if [ "$linux" = "yes" ] ; then
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-tool.c
@@ -68,7 +68,7 @@ void qemu_bh_delete(QEMUBH *bh)
qemu_free(bh);
}
-int qemu_set_fd_handler2(int fd,
+int __attribute__((weak)) qemu_set_fd_handler2(int fd,
IOCanRWHandler *fd_read_poll,
IOHandler *fd_read,
IOHandler *fd_write,
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -12,34 +12,12 @@
extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
-extern void bdrv_init(void);
-
-extern void *qemu_mallocz(size_t size);
-extern void qemu_free(void *ptr);
extern void *fd_start;
int domid = 0;
FILE* logfile;
-void term_printf(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-void term_print_filename(const char *filename)
-{
- term_printf(filename);
-}
-
-
-typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
-typedef int IOCanRWHandler(void *opaque);
-typedef void IOHandler(void *opaque);
-
typedef struct IOHandlerRecord {
int fd;
IOCanRWHandler *fd_read_poll;
@@ -103,7 +81,6 @@ int main(void)
logfile = stderr;
bdrv_init();
- qemu_aio_init();
init_blktap();
/* Daemonize */
@@ -115,8 +92,6 @@ int main(void)
* completed aio operations.
*/
while (1) {
- qemu_aio_poll();
-
max_fd = -1;
FD_ZERO(&rfds);
for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next)

View File

@ -1,7 +1,7 @@
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -18,6 +18,7 @@
#include "exec-all.h"
#include "sysemu.h"
@ -32,7 +32,7 @@ Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1095,6 +1111,50 @@ static void xenstore_process_vcpu_set_ev
@@ -1083,6 +1099,50 @@ static void xenstore_process_vcpu_set_ev
return;
}
@ -83,7 +83,7 @@ Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image = NULL;
@@ -1130,6 +1190,11 @@ void xenstore_process_event(void *opaque
@@ -1118,6 +1178,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);

View File

@ -1,8 +1,8 @@
Index: xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
Index: xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
===================================================================
--- xen-4.2.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s3.asl
+++ xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
@@ -20,13 +20,9 @@
--- xen-4.6.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s3.asl
+++ xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
@@ -19,13 +19,9 @@
DefinitionBlock ("SSDT_S3.aml", "SSDT", 2, "Xen", "HVM", 0)
{
@ -20,11 +20,11 @@ Index: xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
+ */
}
Index: xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
Index: xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
===================================================================
--- xen-4.2.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s4.asl
+++ xen-4.2.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
@@ -20,13 +20,9 @@
--- xen-4.6.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s4.asl
+++ xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
@@ -19,13 +19,9 @@
DefinitionBlock ("SSDT_S4.aml", "SSDT", 2, "Xen", "HVM", 0)
{

View File

@ -24,10 +24,10 @@ Signed-off-by: Mike Latimer <mlatimer@xxxxxxxx>
tools/hotplug/Linux/block | 89 ++++++++++++++++++++++++++++++-----------------
1 file changed, 57 insertions(+), 32 deletions(-)
Index: xen-4.4.3-testing/tools/hotplug/Linux/block
Index: xen-4.6.0-testing/tools/hotplug/Linux/block
===================================================================
--- xen-4.4.3-testing.orig/tools/hotplug/Linux/block
+++ xen-4.4.3-testing/tools/hotplug/Linux/block
--- xen-4.6.0-testing.orig/tools/hotplug/Linux/block
+++ xen-4.6.0-testing/tools/hotplug/Linux/block
@@ -38,7 +38,7 @@ find_free_loopback_dev() {
}
@ -185,7 +185,7 @@ Index: xen-4.4.3-testing/tools/hotplug/Linux/block
fi
}
@@ -279,15 +312,7 @@ mount it read-write in a guest domain."
@@ -281,15 +314,7 @@ mount it read-write in a guest domain."
fatal "Unable to lookup $file: dev: $dev inode: $inode"
fi

View File

@ -1,18 +0,0 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -247,8 +247,11 @@ static int open_disk(struct td_state *s,
drv = blktap_drivers[i].drv;
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
- /* Open the image */
- if (bdrv_open2(bs, path, flags, drv) != 0) {
+ /* Open the image
+ * Use BDRV_O_CACHE_WB for write-through caching,
+ * no flags for write-back caching
+ */
+ if (bdrv_open2(bs, path, flags|BDRV_O_CACHE_WB, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}

View File

@ -1,73 +0,0 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -360,6 +360,15 @@ static void qemu_send_responses(void* op
}
/**
+ * Callback function for AIO flush
+ */
+static void qemu_flush_response(void* opaque, int ret) {
+ if (ret != 0) {
+ DPRINTF("aio_flush: ret = %d (%s)\n", ret, strerror(-ret));
+ }
+}
+
+/**
* Callback function for the IO message pipe. Reads requests from the ring
* and processes them (call qemu read/write functions).
*
@@ -378,6 +387,7 @@ static void handle_blktap_iomsg(void* pr
blkif_t *blkif = s->blkif;
tapdev_info_t *info = s->ring_info;
int page_size = getpagesize();
+ int sync;
struct aiocb_info *aiocb_info;
@@ -410,7 +420,7 @@ static void handle_blktap_iomsg(void* pr
/* Don't allow writes on readonly devices */
if ((s->flags & TD_RDONLY) &&
- (req->operation == BLKIF_OP_WRITE)) {
+ (req->operation != BLKIF_OP_READ)) {
blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
goto send_response;
}
@@ -431,7 +441,7 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("Sector request failed:\n");
DPRINTF("%s request, idx [%d,%d] size [%llu], "
"sector [%llu,%llu]\n",
- (req->operation == BLKIF_OP_WRITE ?
+ (req->operation != BLKIF_OP_READ ?
"WRITE" : "READ"),
idx,i,
(long long unsigned)
@@ -444,8 +454,14 @@ static void handle_blktap_iomsg(void* pr
blkif->pending_list[idx].secs_pending += nsects;
- switch (req->operation)
+ sync = 0;
+ switch (req->operation)
{
+ case BLKIF_OP_WRITE_BARRIER:
+ sync = 1;
+ bdrv_aio_flush(s->bs, qemu_flush_response, NULL);
+ /* fall through */
+
case BLKIF_OP_WRITE:
aiocb_info = malloc(sizeof(*aiocb_info));
@@ -465,6 +481,10 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("ERROR: bdrv_write() == NULL\n");
goto send_response;
}
+
+ if (sync)
+ bdrv_aio_flush(s->bs, qemu_flush_response, NULL);
+
break;
case BLKIF_OP_READ:

View File

@ -1,22 +0,0 @@
--- a/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/xen_machine_fv.c
@@ -270,6 +270,7 @@ void qemu_invalidate_entry(uint8_t *buff
#endif /* defined(MAPCACHE) */
+extern void init_blktap(void);
static void xen_init_fv(ram_addr_t ram_size, int vga_ram_size,
const char *boot_device,
@@ -295,6 +296,11 @@ static void xen_init_fv(ram_addr_t ram_s
}
#endif
+#if defined(CONFIG_BLKTAP1) && !defined(CONFIG_STUBDOM) && !defined(__NetBSD__)
+ /* Initialize tapdisk client */
+ init_blktap();
+#endif
+
#ifdef CONFIG_STUBDOM /* the hvmop is not supported on older hypervisors */
xc_set_hvm_param(xc_handle, domid, HVM_PARAM_DM_DOMAIN, DOMID_SELF);
#endif

View File

@ -1,89 +0,0 @@
From 5ac882a6d7499e4a36103db071203bf4d1ddfe1f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:26:45 +0100
Subject: [PATCH 2/6] ioemu: Use the image format sent by blktapctrl
Currently the blktap backend in ioemu lets qemu guess which format an
image is in. This was a security problem and the blktap backend
doesn't work any more since this was fixed in qemu.
This patch changes ioemu to respect the format it gets from blktapctrl.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
hw/xen_blktap.c | 22 +++++++++++++++++++---
hw/xen_blktap.h | 14 ++++++++++++++
2 files changed, 33 insertions(+), 3 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -218,9 +218,10 @@ static int map_new_dev(struct td_state *
return -1;
}
-static int open_disk(struct td_state *s, char *path, int readonly)
+static int open_disk(struct td_state *s, char *path, int driver, int readonly)
{
BlockDriverState* bs;
+ BlockDriver* drv;
char* devname;
static int devnumber = 0;
int i;
@@ -230,7 +231,22 @@ static int open_disk(struct td_state *s,
bs = bdrv_new(devname);
free(devname);
- if (bdrv_open(bs, path, 0) != 0) {
+ /* Search for disk driver */
+ for (i = 0; blktap_drivers[i].idnum >= 0; i++) {
+ if (blktap_drivers[i].idnum == driver)
+ break;
+ }
+
+ if (blktap_drivers[i].idnum < 0) {
+ fprintf(stderr, "Could not find image format id %d\n", driver);
+ return -ENOMEM;
+ }
+
+ drv = blktap_drivers[i].drv;
+ DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
+
+ /* Open the image */
+ if (bdrv_open2(bs, path, 0, drv) != 0) {
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
@@ -521,7 +537,7 @@ static void handle_blktap_ctrlmsg(void*
s = state_init();
/*Open file*/
- if (s == NULL || open_disk(s, path, msg->readonly)) {
+ if (s == NULL || open_disk(s, path, msg->drivertype, msg->readonly)) {
msglen = sizeof(msg_hdr_t);
msg->type = CTLMSG_IMG_FAIL;
msg->len = msglen;
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.h
@@ -52,4 +52,18 @@ typedef struct fd_list_entry {
int init_blktap(void);
+typedef struct disk_info {
+ int idnum;
+ struct BlockDriver *drv;
+} disk_info_t;
+
+static disk_info_t blktap_drivers[] = {
+ { DISK_TYPE_AIO, &bdrv_raw },
+ { DISK_TYPE_SYNC, &bdrv_raw },
+ { DISK_TYPE_VMDK, &bdrv_vmdk },
+ { DISK_TYPE_QCOW, &bdrv_qcow },
+ { DISK_TYPE_QCOW2, &bdrv_qcow2 },
+ { -1, NULL }
+};
+
#endif /*XEN_BLKTAP_H_*/

View File

@ -1,31 +0,0 @@
From cb982fd919a52ff86f01025d0f92225bc7b2a956 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:44:31 +0100
Subject: [PATCH 5/6] ioemu: Fail on too small blktap disks
The blktap infrastructure doesn't seems to be able to cope with images
that are smaller than a sector, it produced hangs for me. Such an
image isn't really useful anyway, so just fail gracefully.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
hw/xen_blktap.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -256,6 +256,12 @@ static int open_disk(struct td_state *s,
s->size = bs->total_sectors;
s->sector_size = 512;
+ if (s->size == 0) {
+ fprintf(stderr, "Error: Disk image %s is too small\n",
+ path);
+ return -ENOMEM;
+ }
+
s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
#ifndef QEMU_TOOL

View File

@ -2,6 +2,15 @@
tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c | 46 ++++++++++++++++
1 file changed, 46 insertions(+)
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
@@ -1,3 +1,4 @@
+CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc
CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/include
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c

View File

@ -1,5 +1,7 @@
--- /dev/null 2015-03-12 10:40:51.327307686 +0000
+++ xen-4.5.0-testing/tools/firmware/etherboot/patches/ipxe-no-error-logical-not-parentheses.patch 2015-03-12 12:21:37.394804667 +0000
Index: xen-4.6.0-testing/tools/firmware/etherboot/patches/ipxe-no-error-logical-not-parentheses.patch
===================================================================
--- /dev/null
+++ xen-4.6.0-testing/tools/firmware/etherboot/patches/ipxe-no-error-logical-not-parentheses.patch
@@ -0,0 +1,11 @@
+--- ipxe/src/Makefile.housekeeping.orig 2015-03-12 12:15:50.054891858 +0000
++++ ipxe/src/Makefile.housekeeping 2015-03-12 12:16:05.978071221 +0000
@ -11,11 +13,13 @@
++CFLAGS += -Werror -Wno-logical-not-parentheses
+ ASFLAGS += --fatal-warnings
+ endif
+
--- xen-4.5.0-testing/tools/firmware/etherboot/patches/series.orig 2015-03-12 12:24:54.895029501 +0000
+++ xen-4.5.0-testing/tools/firmware/etherboot/patches/series 2015-03-12 12:25:42.367564285 +0000
@@ -3,3 +3,4 @@
build_fix_2.patch
+
Index: xen-4.6.0-testing/tools/firmware/etherboot/patches/series
===================================================================
--- xen-4.6.0-testing.orig/tools/firmware/etherboot/patches/series
+++ xen-4.6.0-testing/tools/firmware/etherboot/patches/series
@@ -4,3 +4,4 @@ build_fix_2.patch
build_fix_3.patch
build-compare.patch
build_fix_4.patch
+ipxe-no-error-logical-not-parentheses.patch

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a7b3bed4f4132e9b65970b89a23e7d234728b44ae9c7a3c068ff33ea86fa48f5
size 2877798
oid sha256:6e448144cdd7d1b12a08094b6f955e2c75c167d05bf8da40ec5b9c085d920eef
size 2877217

View File

@ -7,11 +7,11 @@ https://bugzilla.novell.com/show_bug.cgi?id=879425
tools/libxl/libxlu_disk_l.l | 1 +
5 files changed, 18 insertions(+), 1 deletion(-)
Index: xen-4.5.2-testing/tools/libxl/libxl.c
Index: xen-4.6.0-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.c
+++ xen-4.5.2-testing/tools/libxl/libxl.c
@@ -2832,6 +2832,8 @@ static void device_disk_add(libxl__egc *
--- xen-4.6.0-testing.orig/tools/libxl/libxl.c
+++ xen-4.6.0-testing/tools/libxl/libxl.c
@@ -2829,6 +2829,8 @@ static void device_disk_add(libxl__egc *
flexarray_append_pair(back, "discard-enable",
libxl_defbool_val(disk->discard_enable) ?
"1" : "0");
@ -20,12 +20,12 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
flexarray_append(front, "backend-id");
flexarray_append(front, libxl__sprintf(gc, "%d", disk->backend_domid));
Index: xen-4.5.2-testing/tools/libxl/libxl.h
Index: xen-4.6.0-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.h
+++ xen-4.5.2-testing/tools/libxl/libxl.h
@@ -163,6 +163,18 @@
#define LIBXL_HAVE_BUILDINFO_HVM_MMIO_HOLE_MEMKB 1
--- xen-4.6.0-testing.orig/tools/libxl/libxl.h
+++ xen-4.6.0-testing/tools/libxl/libxl.h
@@ -205,6 +205,18 @@
#define LIBXL_HAVE_BUILDINFO_ARM_GIC_VERSION 1
/*
+ * The libxl_device_disk has no way to indicate that cache=unsafe is
@ -43,10 +43,10 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.h
* libxl ABI compatibility
*
* The only guarantee which libxl makes regarding ABI compatibility
Index: xen-4.5.2-testing/tools/libxl/libxlu_disk.c
Index: xen-4.6.0-testing/tools/libxl/libxlu_disk.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxlu_disk.c
+++ xen-4.5.2-testing/tools/libxl/libxlu_disk.c
--- xen-4.6.0-testing.orig/tools/libxl/libxlu_disk.c
+++ xen-4.6.0-testing/tools/libxl/libxlu_disk.c
@@ -79,6 +79,8 @@ int xlu_disk_parse(XLU_Config *cfg,
if (!disk->pdev_path || !strcmp(disk->pdev_path, ""))
disk->format = LIBXL_DISK_FORMAT_EMPTY;
@ -56,10 +56,10 @@ Index: xen-4.5.2-testing/tools/libxl/libxlu_disk.c
if (!disk->vdev) {
xlu__disk_err(&dpc,0, "no vdev specified");
Index: xen-4.5.2-testing/tools/libxl/libxlu_disk_i.h
Index: xen-4.6.0-testing/tools/libxl/libxlu_disk_i.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxlu_disk_i.h
+++ xen-4.5.2-testing/tools/libxl/libxlu_disk_i.h
--- xen-4.6.0-testing.orig/tools/libxl/libxlu_disk_i.h
+++ xen-4.6.0-testing/tools/libxl/libxlu_disk_i.h
@@ -10,7 +10,7 @@ typedef struct {
void *scanner;
YY_BUFFER_STATE buf;
@ -69,10 +69,10 @@ Index: xen-4.5.2-testing/tools/libxl/libxlu_disk_i.h
const char *spec;
} DiskParseContext;
Index: xen-4.5.2-testing/tools/libxl/libxlu_disk_l.l
Index: xen-4.6.0-testing/tools/libxl/libxlu_disk_l.l
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxlu_disk_l.l
+++ xen-4.5.2-testing/tools/libxl/libxlu_disk_l.l
--- xen-4.6.0-testing.orig/tools/libxl/libxlu_disk_l.l
+++ xen-4.6.0-testing/tools/libxl/libxlu_disk_l.l
@@ -176,6 +176,7 @@ script=[^,]*,? { STRIP(','); SAVESTRING(
direct-io-safe,? { DPC->disk->direct_io_safe = 1; }
discard,? { libxl_defbool_set(&DPC->disk->discard_enable, true); }

View File

@ -31,11 +31,11 @@ ee2e7e5 Merge pull request #1 from aaannz/pvscsi
7de6f49 support character devices too
c84381b allow /dev/sda as scsi devspec
f11e3a2 pvscsi
Index: xen-4.5.2-testing/docs/man/xl.cfg.pod.5
Index: xen-4.6.0-testing/docs/man/xl.cfg.pod.5
===================================================================
--- xen-4.5.2-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.5.2-testing/docs/man/xl.cfg.pod.5
@@ -448,6 +448,36 @@ value is optional if this is a guest dom
--- xen-4.6.0-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.6.0-testing/docs/man/xl.cfg.pod.5
@@ -506,6 +506,36 @@ value is optional if this is a guest dom
=back
@ -72,11 +72,11 @@ Index: xen-4.5.2-testing/docs/man/xl.cfg.pod.5
=item B<vfb=[ "VFB_SPEC_STRING", "VFB_SPEC_STRING", ...]>
Specifies the paravirtual framebuffer devices which should be supplied
Index: xen-4.5.2-testing/docs/man/xl.pod.1
Index: xen-4.6.0-testing/docs/man/xl.pod.1
===================================================================
--- xen-4.5.2-testing.orig/docs/man/xl.pod.1
+++ xen-4.5.2-testing/docs/man/xl.pod.1
@@ -1323,6 +1323,26 @@ List virtual trusted platform modules fo
--- xen-4.6.0-testing.orig/docs/man/xl.pod.1
+++ xen-4.6.0-testing/docs/man/xl.pod.1
@@ -1293,6 +1293,26 @@ List virtual trusted platform modules fo
=back
@ -103,11 +103,11 @@ Index: xen-4.5.2-testing/docs/man/xl.pod.1
=head1 PCI PASS-THROUGH
=over 4
Index: xen-4.5.2-testing/tools/libxl/libxl.c
Index: xen-4.6.0-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.c
+++ xen-4.5.2-testing/tools/libxl/libxl.c
@@ -2324,6 +2324,273 @@ int libxl_devid_to_device_vtpm(libxl_ctx
--- xen-4.6.0-testing.orig/tools/libxl/libxl.c
+++ xen-4.6.0-testing/tools/libxl/libxl.c
@@ -2319,6 +2319,273 @@ int libxl_devid_to_device_vtpm(libxl_ctx
return rc;
}
@ -381,7 +381,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
/******************************************************************************/
@@ -4199,6 +4466,8 @@ out:
@@ -4133,6 +4400,8 @@ out:
* libxl_device_vkb_destroy
* libxl_device_vfb_remove
* libxl_device_vfb_destroy
@ -390,7 +390,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
*/
#define DEFINE_DEVICE_REMOVE(type, removedestroy, f) \
int libxl_device_##type##_##removedestroy(libxl_ctx *ctx, \
@@ -4254,6 +4523,10 @@ DEFINE_DEVICE_REMOVE(vtpm, destroy, 1)
@@ -4188,6 +4457,10 @@ DEFINE_DEVICE_REMOVE(vtpm, destroy, 1)
* 1. add support for secondary consoles to xenconsoled
* 2. dynamically add/remove qemu chardevs via qmp messages. */
@ -401,7 +401,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
#undef DEFINE_DEVICE_REMOVE
/******************************************************************************/
@@ -4263,6 +4536,7 @@ DEFINE_DEVICE_REMOVE(vtpm, destroy, 1)
@@ -4197,6 +4470,7 @@ DEFINE_DEVICE_REMOVE(vtpm, destroy, 1)
* libxl_device_disk_add
* libxl_device_nic_add
* libxl_device_vtpm_add
@ -409,7 +409,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
*/
#define DEFINE_DEVICE_ADD(type) \
@@ -4294,6 +4568,9 @@ DEFINE_DEVICE_ADD(nic)
@@ -4228,6 +4502,9 @@ DEFINE_DEVICE_ADD(nic)
/* vtpm */
DEFINE_DEVICE_ADD(vtpm)
@ -419,7 +419,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
#undef DEFINE_DEVICE_ADD
/******************************************************************************/
@@ -6836,6 +7113,20 @@ out:
@@ -6780,6 +7057,20 @@ out:
return rc;
}
@ -440,11 +440,11 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.c
/*
* Local variables:
* mode: C
Index: xen-4.5.2-testing/tools/libxl/libxl.h
Index: xen-4.6.0-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.h
+++ xen-4.5.2-testing/tools/libxl/libxl.h
@@ -1238,6 +1238,26 @@ libxl_device_vtpm *libxl_device_vtpm_lis
--- xen-4.6.0-testing.orig/tools/libxl/libxl.h
+++ xen-4.6.0-testing/tools/libxl/libxl.h
@@ -1435,6 +1435,26 @@ libxl_device_vtpm *libxl_device_vtpm_lis
int libxl_device_vtpm_getinfo(libxl_ctx *ctx, uint32_t domid,
libxl_device_vtpm *vtpm, libxl_vtpminfo *vtpminfo);
@ -471,7 +471,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.h
/* Keyboard */
int libxl_device_vkb_add(libxl_ctx *ctx, uint32_t domid, libxl_device_vkb *vkb,
const libxl_asyncop_how *ao_how)
@@ -1495,6 +1515,27 @@ int libxl_fd_set_nonblock(libxl_ctx *ctx
@@ -1740,6 +1760,27 @@ int libxl_fd_set_nonblock(libxl_ctx *ctx
#include <libxl_event.h>
@ -499,11 +499,11 @@ Index: xen-4.5.2-testing/tools/libxl/libxl.h
#endif /* LIBXL_H */
/*
Index: xen-4.5.2-testing/tools/libxl/libxl_create.c
Index: xen-4.6.0-testing/tools/libxl/libxl_create.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.5.2-testing/tools/libxl/libxl_create.c
@@ -1141,6 +1141,7 @@ static void domcreate_rebuild_done(libxl
--- xen-4.6.0-testing.orig/tools/libxl/libxl_create.c
+++ xen-4.6.0-testing/tools/libxl/libxl_create.c
@@ -1149,6 +1149,7 @@ static void domcreate_rebuild_done(libxl
libxl__multidev_begin(ao, &dcs->multidev);
dcs->multidev.callback = domcreate_launch_dm;
libxl__add_disks(egc, ao, domid, d_config, &dcs->multidev);
@ -511,11 +511,11 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_create.c
libxl__multidev_prepared(egc, &dcs->multidev, 0);
return;
Index: xen-4.5.2-testing/tools/libxl/libxl_device.c
Index: xen-4.6.0-testing/tools/libxl/libxl_device.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_device.c
+++ xen-4.5.2-testing/tools/libxl/libxl_device.c
@@ -541,6 +541,7 @@ void libxl__multidev_prepared(libxl__egc
--- xen-4.6.0-testing.orig/tools/libxl/libxl_device.c
+++ xen-4.6.0-testing/tools/libxl/libxl_device.c
@@ -543,6 +543,7 @@ void libxl__multidev_prepared(libxl__egc
* The following functions are defined:
* libxl__add_disks
* libxl__add_nics
@ -523,7 +523,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_device.c
* libxl__add_vtpms
*/
@@ -560,10 +561,32 @@ void libxl__multidev_prepared(libxl__egc
@@ -562,10 +563,32 @@ void libxl__multidev_prepared(libxl__egc
DEFINE_DEVICES_ADD(disk)
DEFINE_DEVICES_ADD(nic)
@ -556,11 +556,11 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_device.c
/******************************************************************************/
int libxl__device_destroy(libxl__gc *gc, libxl__device *dev)
Index: xen-4.5.2-testing/tools/libxl/libxl_internal.h
Index: xen-4.6.0-testing/tools/libxl/libxl_internal.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.5.2-testing/tools/libxl/libxl_internal.h
@@ -1094,6 +1094,7 @@ _hidden int libxl__device_disk_setdefaul
--- xen-4.6.0-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.6.0-testing/tools/libxl/libxl_internal.h
@@ -1185,6 +1185,7 @@ _hidden int libxl__device_disk_setdefaul
_hidden int libxl__device_nic_setdefault(libxl__gc *gc, libxl_device_nic *nic,
uint32_t domid);
_hidden int libxl__device_vtpm_setdefault(libxl__gc *gc, libxl_device_vtpm *vtpm);
@ -568,7 +568,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_internal.h
_hidden int libxl__device_vfb_setdefault(libxl__gc *gc, libxl_device_vfb *vfb);
_hidden int libxl__device_vkb_setdefault(libxl__gc *gc, libxl_device_vkb *vkb);
_hidden int libxl__device_pci_setdefault(libxl__gc *gc, libxl_device_pci *pci);
@@ -2405,6 +2406,10 @@ _hidden void libxl__device_vtpm_add(libx
@@ -2561,6 +2562,10 @@ _hidden void libxl__device_vtpm_add(libx
libxl_device_vtpm *vtpm,
libxl__ao_device *aodev);
@ -579,7 +579,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_internal.h
/* Internal function to connect a vkb device */
_hidden int libxl__device_vkb_add(libxl__gc *gc, uint32_t domid,
libxl_device_vkb *vkb);
@@ -3029,6 +3034,10 @@ _hidden void libxl__add_vtpms(libxl__egc
@@ -3277,6 +3282,10 @@ _hidden void libxl__add_vtpms(libxl__egc
libxl_domain_config *d_config,
libxl__multidev *multidev);
@ -590,11 +590,11 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_internal.h
/*----- device model creation -----*/
/* First layer; wraps libxl__spawn_spawn. */
Index: xen-4.5.2-testing/tools/libxl/libxl_types.idl
Index: xen-4.6.0-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.5.2-testing/tools/libxl/libxl_types.idl
@@ -540,6 +540,26 @@ libxl_device_channel = Struct("device_ch
--- xen-4.6.0-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.6.0-testing/tools/libxl/libxl_types.idl
@@ -617,6 +617,26 @@ libxl_device_channel = Struct("device_ch
])),
])
@ -621,7 +621,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_types.idl
libxl_domain_config = Struct("domain_config", [
("c_info", libxl_domain_create_info),
("b_info", libxl_domain_build_info),
@@ -553,6 +573,8 @@ libxl_domain_config = Struct("domain_con
@@ -632,6 +652,8 @@ libxl_domain_config = Struct("domain_con
# a channel manifests as a console with a name,
# see docs/misc/channels.txt
("channels", Array(libxl_device_channel, "num_channels")),
@ -630,7 +630,7 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_types.idl
("on_poweroff", libxl_action_on_shutdown),
("on_reboot", libxl_action_on_shutdown),
@@ -595,6 +617,28 @@ libxl_vtpminfo = Struct("vtpminfo", [
@@ -674,6 +696,28 @@ libxl_vtpminfo = Struct("vtpminfo", [
("uuid", libxl_uuid),
], dir=DIR_OUT)
@ -659,10 +659,10 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_types.idl
libxl_vcpuinfo = Struct("vcpuinfo", [
("vcpuid", uint32),
("cpu", uint32),
Index: xen-4.5.2-testing/tools/libxl/libxl_types_internal.idl
Index: xen-4.6.0-testing/tools/libxl/libxl_types_internal.idl
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_types_internal.idl
+++ xen-4.5.2-testing/tools/libxl/libxl_types_internal.idl
--- xen-4.6.0-testing.orig/tools/libxl/libxl_types_internal.idl
+++ xen-4.6.0-testing/tools/libxl/libxl_types_internal.idl
@@ -22,6 +22,7 @@ libxl__device_kind = Enumeration("device
(6, "VKBD"),
(7, "CONSOLE"),
@ -671,11 +671,11 @@ Index: xen-4.5.2-testing/tools/libxl/libxl_types_internal.idl
])
libxl__console_backend = Enumeration("console_backend", [
Index: xen-4.5.2-testing/tools/libxl/xl.h
Index: xen-4.6.0-testing/tools/libxl/xl.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/xl.h
+++ xen-4.5.2-testing/tools/libxl/xl.h
@@ -83,6 +83,9 @@ int main_channellist(int argc, char **ar
--- xen-4.6.0-testing.orig/tools/libxl/xl.h
+++ xen-4.6.0-testing/tools/libxl/xl.h
@@ -82,6 +82,9 @@ int main_channellist(int argc, char **ar
int main_blockattach(int argc, char **argv);
int main_blocklist(int argc, char **argv);
int main_blockdetach(int argc, char **argv);
@ -685,10 +685,10 @@ Index: xen-4.5.2-testing/tools/libxl/xl.h
int main_vtpmattach(int argc, char **argv);
int main_vtpmlist(int argc, char **argv);
int main_vtpmdetach(int argc, char **argv);
Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
Index: xen-4.6.0-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
--- xen-4.6.0-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.6.0-testing/tools/libxl/xl_cmdimpl.c
@@ -17,6 +17,7 @@
#include "libxl_osdeps.h"
@ -705,7 +705,7 @@ Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
#include <xen/hvm/e820.h>
#include "libxl.h"
@@ -549,6 +551,122 @@ static void set_default_nic_values(libxl
@@ -626,6 +628,122 @@ static void set_default_nic_values(libxl
}
}
@ -828,16 +828,16 @@ Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
static void split_string_into_string_list(const char *str,
const char *delim,
libxl_string_list *psl)
@@ -918,7 +1036,7 @@ static void parse_config_data(const char
@@ -1261,7 +1379,7 @@ static void parse_config_data(const char
const char *buf;
long l;
long l, vcpus = 0;
XLU_Config *config;
- XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids, *vtpms;
+ XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids, *vtpms, *vscsis;
XLU_ConfigList *channels, *ioports, *irqs, *iomem, *viridian;
XLU_ConfigList *channels, *ioports, *irqs, *iomem, *viridian, *dtdevs;
int num_ioports, num_irqs, num_iomem, num_cpus, num_viridian;
int pci_power_mgmt = 0;
@@ -1421,6 +1539,66 @@ static void parse_config_data(const char
@@ -1782,6 +1900,66 @@ static void parse_config_data(const char
}
}
@ -904,7 +904,7 @@ Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
if (!xlu_cfg_get_list(config, "vtpm", &vtpms, 0, 0)) {
d_config->num_vtpms = 0;
d_config->vtpms = NULL;
@@ -6511,6 +6689,256 @@ int main_blockdetach(int argc, char **ar
@@ -6702,6 +6880,256 @@ int main_blockdetach(int argc, char **ar
return rc;
}
@ -1161,11 +1161,11 @@ Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
int main_vtpmattach(int argc, char **argv)
{
int opt;
Index: xen-4.5.2-testing/tools/libxl/xl_cmdtable.c
Index: xen-4.6.0-testing/tools/libxl/xl_cmdtable.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.5.2-testing/tools/libxl/xl_cmdtable.c
@@ -372,6 +372,21 @@ struct cmd_spec cmd_table[] = {
--- xen-4.6.0-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.6.0-testing/tools/libxl/xl_cmdtable.c
@@ -351,6 +351,21 @@ struct cmd_spec cmd_table[] = {
"Destroy a domain's virtual block device",
"<Domain> <DevId>",
},

View File

@ -1,513 +0,0 @@
From 77deb80879859ed279e24a790ec08e9c5d37dd0e Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Wed, 5 Feb 2014 14:37:53 +0100
Subject: libxl: set migration constraints from cmdline
Add new options to xl migrate to control the process of migration.
The intention is to optionally abort the migration if it takes too long
to migrate a busy guest due to the high number of new dirty pages.
Currently the guest is suspended to transfer the remaining dirty pages.
The suspend/resume cycle will cause a time jump. This transfer can take
a long time, which can confuse the guest if the time jump is too far.
The new options allow to override the built-in default values, which are
not changed by this patch.
--max_iters <number> Number of iterations before final suspend (default: 30)
--max_factor <factor> Max amount of memory to transfer before final suspend (default: 3*RAM)
--min_remaing <pages> Number of dirty pages before stop&copy (default: 50)
--abort_if_busy Abort migration instead of doing final suspend.
The changes to libxl change the API, handle LIBXL_API_VERSION == 0x040200.
v8:
- merge --min_remaing changes
- tools/libxc: print stats if migration is aborted
- use special _suse version of lib calls to preserve ABI
v7:
- remove short options
- update description of --abort_if_busy in xl.1
- extend description of --abort_if_busy in xl help
- add comment to libxl_domain_suspend declaration, props is optional
v6:
- update the LIBXL_API_VERSION handling for libxl_domain_suspend
change it to an inline function if LIBXL_API_VERSION is defined to 4.2.0
- rename libxl_save_properties to libxl_domain_suspend_properties
- rename ->xlflags to ->flags within that struct
v5:
- adjust libxl_domain_suspend prototype, move flags, max_iters,
max_factor into a new, optional struct libxl_save_properties
- rename XCFLAGS_DOMSAVE_NOSUSPEND to XCFLAGS_DOMSAVE_ABORT_IF_BUSY
- rename LIBXL_SUSPEND_NO_FINAL_SUSPEND to LIBXL_SUSPEND_ABORT_IF_BUSY
- rename variables no_suspend to abort_if_busy
- rename option -N/--no_suspend to -A/--abort_if_busy
- update xl.1, extend description of -A option
v4:
- update default for no_suspend from None to 0 in XendCheckpoint.py:save
- update logoutput in setMigrateConstraints
- change xm migrate defaults from None to 0
- add new options to xl.1
- fix syntax error in XendDomain.py:domain_migrate_constraints_set
- fix xm migrate -N option name to match xl migrate
v3:
- move logic errors in libxl__domain_suspend and fixed help text in
cmd_table to separate patches
- fix syntax error in XendCheckpoint.py
- really pass max_iters and max_factor in libxl__xc_domain_save
- make libxl_domain_suspend_0x040200 declaration globally visible
- bump libxenlight.so SONAME from 2.0 to 2.1 due to changed
libxl_domain_suspend
v2:
- use LIBXL_API_VERSION and define libxl_domain_suspend_0x040200
- fix logic error in min_reached check in xc_domain_save
- add longopts
- update --help text
- correct description of migrate --help text
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
docs/man/xl.pod.1 | 20 +++++++++++++++++++
tools/libxc/include/xenguest.h | 6 +++++
tools/libxc/xc_domain_save.c | 26 ++++++++++++++++++++++--
tools/libxc/xc_nomigrate.c | 9 ++++++++
tools/libxl/libxl.c | 27 ++++++++++++++++++++++---
tools/libxl/libxl.h | 15 ++++++++++++++
tools/libxl/libxl_dom.c | 1
tools/libxl/libxl_internal.h | 4 +++
tools/libxl/libxl_save_callout.c | 4 ++-
tools/libxl/libxl_save_helper.c | 4 ++-
tools/libxl/xl_cmdimpl.c | 41 +++++++++++++++++++++++++++++++++------
tools/libxl/xl_cmdtable.c | 23 ++++++++++++++-------
12 files changed, 159 insertions(+), 21 deletions(-)
Index: xen-4.5.2-testing/docs/man/xl.pod.1
===================================================================
--- xen-4.5.2-testing.orig/docs/man/xl.pod.1
+++ xen-4.5.2-testing/docs/man/xl.pod.1
@@ -428,6 +428,26 @@ Send <config> instead of config file fro
Print huge (!) amount of debug during the migration process.
+=item B<--max_iters> I<number>
+
+Number of iterations before final suspend (default: 30)
+
+=item B<--max_factor> I<factor>
+
+Max amount of memory to transfer before final suspend (default: 3*RAM)
+
+=item B<--min_remaining>
+
+Number of remaining dirty pages. If the number of dirty pages drops that
+low the guest is suspended and the remaing pages are transfered to <host>.
+
+=item B<--abort_if_busy>
+
+Abort migration instead of doing final suspend/transfer/resume if the
+guest has still dirty pages after the number of iterations and/or the
+amount of RAM transferred. This avoids long periods of time where the
+guest is suspended.
+
=back
=item B<remus> [I<OPTIONS>] I<domain-id> I<host>
Index: xen-4.5.2-testing/tools/libxc/include/xenguest.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxc/include/xenguest.h
+++ xen-4.5.2-testing/tools/libxc/include/xenguest.h
@@ -28,6 +28,7 @@
#define XCFLAGS_HVM (1 << 2)
#define XCFLAGS_STDVGA (1 << 3)
#define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4)
+#define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5)
#define X86_64_B_SIZE 64
#define X86_32_B_SIZE 32
@@ -88,6 +89,11 @@ int xc_domain_save(xc_interface *xch, in
uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
struct save_callbacks* callbacks, int hvm);
+int xc_domain_save_suse(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ uint32_t min_remaining,
+ struct save_callbacks* callbacks, int hvm);
+
/* callbacks provided by xc_domain_restore */
struct restore_callbacks {
Index: xen-4.5.2-testing/tools/libxc/xc_domain_save.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxc/xc_domain_save.c
+++ xen-4.5.2-testing/tools/libxc/xc_domain_save.c
@@ -44,6 +44,7 @@
*/
#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */
#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */
+#define DEF_MIN_REMAINING 50 /* low water mark of dirty pages */
struct save_ctx {
unsigned long hvirt_start; /* virtual starting address of the hypervisor */
@@ -800,8 +801,9 @@ static int save_tsc_info(xc_interface *x
return 0;
}
-int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
+int xc_domain_save_suse(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags,
+ uint32_t min_remaining,
struct save_callbacks* callbacks, int hvm)
{
xc_dominfo_t info;
@@ -810,6 +812,7 @@ int xc_domain_save(xc_interface *xch, in
int rc, frc, i, j, last_iter = 0, iter = 0;
int live = (flags & XCFLAGS_LIVE);
int debug = (flags & XCFLAGS_DEBUG);
+ int abort_if_busy = (flags & XCFLAGS_DOMSAVE_ABORT_IF_BUSY);
int superpages = !!hvm;
int race = 0, sent_last_iter, skip_this_iter = 0;
unsigned int sent_this_iter = 0;
@@ -910,6 +913,7 @@ int xc_domain_save(xc_interface *xch, in
/* If no explicit control parameters given, use defaults */
max_iters = max_iters ? : DEF_MAX_ITERS;
max_factor = max_factor ? : DEF_MAX_FACTOR;
+ min_remaining = min_remaining ? : DEF_MIN_REMAINING;
if ( !get_platform_info(xch, dom,
&ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, &dinfo->guest_width) )
@@ -1536,10 +1540,21 @@ int xc_domain_save(xc_interface *xch, in
if ( live )
{
+ int min_reached = sent_this_iter + skip_this_iter < min_remaining;
if ( (iter >= max_iters) ||
- (sent_this_iter+skip_this_iter < 50) ||
+ min_reached ||
(total_sent > dinfo->p2m_size*max_factor) )
{
+ if ( !min_reached && abort_if_busy )
+ {
+ DPRINTF("Live migration aborted, as requested. (guest too busy?)");
+ DPRINTF(" total_sent %lu iter %d, max_iters %u max_factor %u",
+ total_sent, iter, max_iters, max_factor);
+ print_stats(xch, dom, sent_this_iter, &time_stats, &shadow_stats, 1);
+ rc = 1;
+ goto out;
+ }
+
DPRINTF("Start last iteration\n");
last_iter = 1;
@@ -2181,6 +2196,13 @@ exit:
return !!errno;
}
+int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags,
+ struct save_callbacks* callbacks, int hvm)
+{
+ return xc_domain_save_suse(xch, io_fd, dom, max_iters, max_factor, flags, 0, callbacks, hvm);
+}
+
/*
* Local variables:
* mode: C
Index: xen-4.5.2-testing/tools/libxc/xc_nomigrate.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxc/xc_nomigrate.c
+++ xen-4.5.2-testing/tools/libxc/xc_nomigrate.c
@@ -21,6 +21,15 @@
#include <xenctrl.h>
#include <xenguest.h>
+int xc_domain_save_suse(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags,
+ uint32_t min_remaining,
+ struct save_callbacks* callbacks, int hvm)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags,
struct save_callbacks* callbacks, int hvm)
Index: xen-4.5.2-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.c
+++ xen-4.5.2-testing/tools/libxl/libxl.c
@@ -958,7 +958,8 @@ static void domain_suspend_cb(libxl__egc
}
-int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
+static int do_libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd,
+ const libxl_domain_suspend_suse_properties *props,
const libxl_asyncop_how *ao_how)
{
AO_CREATE(ctx, domid, ao_how);
@@ -979,8 +980,14 @@ int libxl_domain_suspend(libxl_ctx *ctx,
dss->domid = domid;
dss->fd = fd;
dss->type = type;
- dss->live = flags & LIBXL_SUSPEND_LIVE;
- dss->debug = flags & LIBXL_SUSPEND_DEBUG;
+ if (props) {
+ dss->live = props->flags & LIBXL_SUSPEND_LIVE;
+ dss->debug = props->flags & LIBXL_SUSPEND_DEBUG;
+ dss->max_iters = props->max_iters;
+ dss->max_factor = props->max_factor;
+ dss->min_remaining = props->min_remaining;
+ dss->xlflags = props->flags;
+ }
libxl__domain_suspend(egc, dss);
return AO_INPROGRESS;
@@ -989,6 +996,20 @@ int libxl_domain_suspend(libxl_ctx *ctx,
return AO_ABORT(rc);
}
+int libxl_domain_suspend_suse(libxl_ctx *ctx, uint32_t domid, int fd,
+ const libxl_domain_suspend_suse_properties *props,
+ const libxl_asyncop_how *ao_how)
+{
+ return do_libxl_domain_suspend(ctx, domid, fd, props, ao_how);
+}
+
+int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
+ const libxl_asyncop_how *ao_how)
+{
+ libxl_domain_suspend_suse_properties props = { .flags = flags };
+ return do_libxl_domain_suspend(ctx, domid, fd, &props, ao_how);
+}
+
int libxl_domain_pause(libxl_ctx *ctx, uint32_t domid)
{
int ret;
Index: xen-4.5.2-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.h
+++ xen-4.5.2-testing/tools/libxl/libxl.h
@@ -959,8 +959,23 @@ int libxl_domain_suspend(libxl_ctx *ctx,
int flags, /* LIBXL_SUSPEND_* */
const libxl_asyncop_how *ao_how)
LIBXL_EXTERNAL_CALLERS_ONLY;
+
+typedef struct {
+ int flags; /* LIBXL_SUSPEND_* */
+ int max_iters;
+ int max_factor;
+ int min_remaining;
+} libxl_domain_suspend_suse_properties;
+
+#define LIBXL_HAVE_DOMAIN_SUSPEND_SUSE
+int libxl_domain_suspend_suse(libxl_ctx *ctx, uint32_t domid, int fd,
+ const libxl_domain_suspend_suse_properties *props, /* optional */
+ const libxl_asyncop_how *ao_how)
+ LIBXL_EXTERNAL_CALLERS_ONLY;
+
#define LIBXL_SUSPEND_DEBUG 1
#define LIBXL_SUSPEND_LIVE 2
+#define LIBXL_SUSPEND_ABORT_IF_BUSY 4
/* @param suspend_cancel [from xenctrl.h:xc_domain_resume( @param fast )]
* If this parameter is true, use co-operative resume. The guest
Index: xen-4.5.2-testing/tools/libxl/libxl_dom.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_dom.c
+++ xen-4.5.2-testing/tools/libxl/libxl_dom.c
@@ -1815,6 +1815,7 @@ void libxl__domain_suspend(libxl__egc *e
dss->xcflags = (live ? XCFLAGS_LIVE : 0)
| (debug ? XCFLAGS_DEBUG : 0)
+ | (dss->xlflags & LIBXL_SUSPEND_ABORT_IF_BUSY ? XCFLAGS_DOMSAVE_ABORT_IF_BUSY : 0)
| (dss->hvm ? XCFLAGS_HVM : 0);
dss->guest_evtchn.port = -1;
Index: xen-4.5.2-testing/tools/libxl/libxl_internal.h
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.5.2-testing/tools/libxl/libxl_internal.h
@@ -2818,6 +2818,10 @@ struct libxl__domain_suspend_state {
libxl__ev_evtchn guest_evtchn;
int guest_evtchn_lockfd;
int hvm;
+ int max_iters;
+ int max_factor;
+ int min_remaining;
+ int xlflags;
int xcflags;
int guest_responded;
libxl__xswait_state pvcontrol;
Index: xen-4.5.2-testing/tools/libxl/libxl_save_callout.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_save_callout.c
+++ xen-4.5.2-testing/tools/libxl/libxl_save_callout.c
@@ -110,7 +110,9 @@ void libxl__xc_domain_save(libxl__egc *e
}
const unsigned long argnums[] = {
- dss->domid, 0, 0, dss->xcflags, dss->hvm,
+ dss->domid,
+ dss->max_iters, dss->max_factor, dss->min_remaining,
+ dss->xcflags, dss->hvm,
toolstack_data_fd, toolstack_data_len,
cbflags,
};
Index: xen-4.5.2-testing/tools/libxl/libxl_save_helper.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl_save_helper.c
+++ xen-4.5.2-testing/tools/libxl/libxl_save_helper.c
@@ -215,6 +215,7 @@ int main(int argc, char **argv)
uint32_t dom = strtoul(NEXTARG,0,10);
uint32_t max_iters = strtoul(NEXTARG,0,10);
uint32_t max_factor = strtoul(NEXTARG,0,10);
+ uint32_t min_remaining = strtoul(NEXTARG,0,10);
uint32_t flags = strtoul(NEXTARG,0,10);
int hvm = atoi(NEXTARG);
toolstack_save_fd = atoi(NEXTARG);
@@ -228,7 +229,8 @@ int main(int argc, char **argv)
helper_setcallbacks_save(&helper_save_callbacks, cbflags);
startup("save");
- r = xc_domain_save(xch, io_fd, dom, max_iters, max_factor, flags,
+ r = xc_domain_save_suse(xch, io_fd, dom, max_iters, max_factor, flags,
+ min_remaining,
&helper_save_callbacks, hvm);
complete(r);
Index: xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.5.2-testing/tools/libxl/xl_cmdimpl.c
@@ -3880,6 +3880,8 @@ static void migrate_do_preamble(int send
}
static void migrate_domain(uint32_t domid, const char *rune, int debug,
+ int max_iters, int max_factor,
+ int min_remaining, int abort_if_busy,
const char *override_config_file)
{
pid_t child = -1;
@@ -3888,7 +3890,13 @@ static void migrate_domain(uint32_t domi
char *away_domname;
char rc_buf;
uint8_t *config_data;
- int config_len, flags = LIBXL_SUSPEND_LIVE;
+ int config_len;
+ libxl_domain_suspend_suse_properties props = {
+ .flags = LIBXL_SUSPEND_LIVE,
+ .max_iters = max_iters,
+ .max_factor = max_factor,
+ .min_remaining = min_remaining,
+ };
save_domain_core_begin(domid, override_config_file,
&config_data, &config_len);
@@ -3907,10 +3915,13 @@ static void migrate_domain(uint32_t domi
xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
if (debug)
- flags |= LIBXL_SUSPEND_DEBUG;
- rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL);
+ props.flags |= LIBXL_SUSPEND_DEBUG;
+ if (abort_if_busy)
+ props.flags |= LIBXL_SUSPEND_ABORT_IF_BUSY;
+
+ rc = libxl_domain_suspend_suse(ctx, domid, send_fd, &props, NULL);
if (rc) {
- fprintf(stderr, "migration sender: libxl_domain_suspend failed"
+ fprintf(stderr, "migration sender: libxl_domain_suspend_suse failed"
" (rc=%d)\n", rc);
if (rc == ERROR_GUEST_TIMEDOUT)
goto failed_suspend;
@@ -4297,13 +4308,18 @@ int main_migrate(int argc, char **argv)
char *rune = NULL;
char *host;
int opt, daemonize = 1, monitor = 1, debug = 0;
+ int max_iters = 0, max_factor = 0, min_remaining = 0, abort_if_busy = 0;
static struct option opts[] = {
{"debug", 0, 0, 0x100},
+ {"max_iters", 1, 0, 0x101},
+ {"max_factor", 1, 0, 0x102},
+ {"min_remaining", 1, 0, 0x103},
+ {"abort_if_busy", 0, 0, 0x104},
COMMON_LONG_OPTS,
{0, 0, 0, 0}
};
- SWITCH_FOREACH_OPT(opt, "FC:s:e", opts, "migrate", 2) {
+ SWITCH_FOREACH_OPT(opt, "FC:s:eM:m:A", opts, "migrate", 2) {
case 'C':
config_filename = optarg;
break;
@@ -4320,6 +4336,18 @@ int main_migrate(int argc, char **argv)
case 0x100:
debug = 1;
break;
+ case 0x101:
+ max_iters = atoi(optarg);
+ break;
+ case 0x102:
+ max_factor = atoi(optarg);
+ break;
+ case 0x103:
+ min_remaining = atoi(optarg);
+ break;
+ case 0x104:
+ abort_if_busy = 1;
+ break;
}
domid = find_domain(argv[optind]);
@@ -4350,7 +4378,8 @@ int main_migrate(int argc, char **argv)
return 1;
}
- migrate_domain(domid, rune, debug, config_filename);
+ migrate_domain(domid, rune, debug, max_iters, max_factor, min_remaining,
+ abort_if_busy, config_filename);
return 0;
}
#endif
Index: xen-4.5.2-testing/tools/libxl/xl_cmdtable.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/xl_cmdtable.c
+++ xen-4.5.2-testing/tools/libxl/xl_cmdtable.c
@@ -155,14 +155,21 @@ struct cmd_spec cmd_table[] = {
&main_migrate, 0, 1,
"Migrate a domain to another host",
"[options] <Domain> <host>",
- "-h Print this help.\n"
- "-C <config> Send <config> instead of config file from creation.\n"
- "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n"
- " to sh. If empty, run <host> instead of ssh <host> xl\n"
- " migrate-receive [-d -e]\n"
- "-e Do not wait in the background (on <host>) for the death\n"
- " of the domain.\n"
- "--debug Print huge (!) amount of debug during the migration process."
+ "-h Print this help.\n"
+ "-C <config> Send <config> instead of config file from creation.\n"
+ "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n"
+ " to sh. If empty, run <host> instead of ssh <host> xl\n"
+ " migrate-receive [-d -e]\n"
+ "-e Do not wait in the background (on <host>) for the death\n"
+ " of the domain.\n"
+ "--debug Print huge (!) amount of debug during the migration process.\n"
+ "\n"
+ "SUSE Linux specific options:\n"
+ "--max_iters <number> Number of iterations before final suspend (default: 30)\n"
+ "--max_factor <factor> Max amount of memory to transfer before final suspend (default: 3*RAM).\n"
+ "--min_remaining <pages> Number of remaining dirty pages before final suspend (default: 50).\n"
+ "--abort_if_busy Abort migration instead of doing final suspend, if number\n"
+ " of iterations or amount of transfered memory is exceeded."
},
{ "restore",
&main_restore, 0, 1,

View File

@ -1,58 +0,0 @@
commit 3bcf91cbbd9a18db9ae7d594ffde7979774ed512
Author: Roger Pau Monne <roger.pau@xxxxxxxxxx>
Date: Wed Feb 12 11:15:17 2014 +0100
libxl: local attach support for PHY backends using scripts
Allow disks using the PHY backend to locally attach if using a script.
Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
Suggested-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Index: xen-4.5.2-testing/tools/libxl/libxl.c
===================================================================
--- xen-4.5.2-testing.orig/tools/libxl/libxl.c
+++ xen-4.5.2-testing/tools/libxl/libxl.c
@@ -3067,6 +3067,16 @@ void libxl__device_disk_local_initiate_a
switch (disk->backend) {
case LIBXL_DISK_BACKEND_PHY:
+ if (disk->script != NULL) {
+ LOG(DEBUG, "trying to locally attach PHY device %s with script %s",
+ disk->pdev_path, disk->script);
+ libxl__prepare_ao_device(ao, &dls->aodev);
+ dls->aodev.callback = local_device_attach_cb;
+ device_disk_add(egc, LIBXL_TOOLSTACK_DOMID, disk,
+ &dls->aodev, libxl__alloc_vdev,
+ (void *) blkdev_start);
+ return;
+ }
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching PHY disk %s",
disk->pdev_path);
dev = disk->pdev_path;
@@ -3146,7 +3156,7 @@ static void local_device_attach_cb(libxl
}
dev = GCSPRINTF("/dev/%s", disk->vdev);
- LOG(DEBUG, "locally attaching qdisk %s", dev);
+ LOG(DEBUG, "locally attached disk %s", dev);
rc = libxl__device_from_disk(gc, LIBXL_TOOLSTACK_DOMID, disk, &device);
if (rc < 0)
@@ -3186,6 +3196,7 @@ void libxl__device_disk_local_initiate_d
if (!dls->diskpath) goto out;
switch (disk->backend) {
+ case LIBXL_DISK_BACKEND_PHY:
case LIBXL_DISK_BACKEND_QDISK:
if (disk->vdev != NULL) {
GCNEW(device);
@@ -3203,7 +3214,6 @@ void libxl__device_disk_local_initiate_d
/* disk->vdev == NULL; fall through */
default:
/*
- * Nothing to do for PHYSTYPE_PHY.
* For other device types assume that the blktap2 process is
* needed by the soon to be started domain and do nothing.
*/

View File

@ -2,11 +2,11 @@ Make our PV drivers work with older hosts that do not recognize the new PV driv
Signed-off-by: K. Y. Srinivasan <ksrinivasan@novell.com>
Index: xen-4.4.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
Index: xen-4.6.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
===================================================================
--- xen-4.4.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.4.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -337,7 +337,10 @@ static int check_platform_magic(struct d
--- xen-4.6.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
+++ xen-4.6.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
@@ -336,7 +336,10 @@ static int check_platform_magic(struct d
if (magic != XEN_IOPORT_MAGIC_VAL) {
err = "unrecognised magic value";

3
mini-os.tar.bz2 Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:108d025e6b5068a817b79df33a0fd3b94704f8af94f4199188835d4f5eea14c0
size 250896

View File

@ -1,33 +0,0 @@
From 9bfb923a855388bb38f7f57b4881bc888a04f9b5 Mon Sep 17 00:00:00 2001
From: Chunyan Liu <cyliu@suse.com>
Date: Mon, 14 Sep 2015 14:45:37 +0800
Subject: [PATCH] pci-attach: fix assertation
run "xl pci-attach <domain> <pci_device>", the 2nd time fails:
xl: libxl_xshelp.c:209: libxl__xs_transaction_start: Assertion `!*t' failed.
Aborted
To fix that, initialize xs_transaction to avoid libxl__xs_transaction_start
assertion error.
Signed-off-by: Chunyan Liu <cyliu@suse.com>
---
tools/libxl/libxl_pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
index 1ebdce7..19c597e 100644
--- a/tools/libxl/libxl_pci.c
+++ b/tools/libxl/libxl_pci.c
@@ -123,7 +123,7 @@ static int libxl__device_pci_add_xenstore(libxl__gc *gc, uint32_t domid, libxl_d
flexarray_t *back;
char *num_devs, *be_path;
int num = 0;
- xs_transaction_t t;
+ xs_transaction_t t = XBT_NULL;
libxl__device *device;
int rc;
libxl_domain_config d_config;
--
2.1.4

View File

@ -1,8 +1,8 @@
Index: xen-4.5.0-testing/tools/pygrub/src/pygrub
Index: xen-4.6.0-testing/tools/pygrub/src/pygrub
===================================================================
--- xen-4.5.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.5.0-testing/tools/pygrub/src/pygrub
@@ -450,7 +450,7 @@ class Grub:
--- xen-4.6.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.6.0-testing/tools/pygrub/src/pygrub
@@ -449,7 +449,7 @@ class Grub:
self.cf.filename = f
break
if self.__dict__.get('cf', None) is None:
@ -11,7 +11,7 @@ Index: xen-4.5.0-testing/tools/pygrub/src/pygrub
f = fs.open_file(self.cf.filename)
# limit read size to avoid pathological cases
buf = f.read(FS_READ_MAX)
@@ -622,6 +622,20 @@ def run_grub(file, entry, fs, cfg_args):
@@ -621,6 +621,20 @@ def run_grub(file, entry, fs, cfg_args):
g = Grub(file, fs)
@ -32,7 +32,7 @@ Index: xen-4.5.0-testing/tools/pygrub/src/pygrub
if list_entries:
for i in range(len(g.cf.images)):
img = g.cf.images[i]
@@ -717,6 +731,19 @@ def sniff_netware(fs, cfg):
@@ -716,6 +730,19 @@ def sniff_netware(fs, cfg):
return cfg
@ -52,7 +52,7 @@ Index: xen-4.5.0-testing/tools/pygrub/src/pygrub
def format_sxp(kernel, ramdisk, args):
s = "linux (kernel %s)" % kernel
if ramdisk:
@@ -797,7 +824,7 @@ if __name__ == "__main__":
@@ -796,7 +823,7 @@ if __name__ == "__main__":
debug = False
not_really = False
output_format = "sxp"

View File

@ -1,8 +1,8 @@
Index: xen-4.5.0-testing/tools/pygrub/src/pygrub
Index: xen-4.6.0-testing/tools/pygrub/src/pygrub
===================================================================
--- xen-4.5.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.5.0-testing/tools/pygrub/src/pygrub
@@ -26,6 +26,7 @@ import fsimage
--- xen-4.6.0-testing.orig/tools/pygrub/src/pygrub
+++ xen-4.6.0-testing/tools/pygrub/src/pygrub
@@ -25,6 +25,7 @@ import fsimage
import grub.GrubConf
import grub.LiloConf
import grub.ExtLinuxConf
@ -10,7 +10,7 @@ Index: xen-4.5.0-testing/tools/pygrub/src/pygrub
PYGRUB_VER = 0.6
FS_READ_MAX = 1024 * 1024
@@ -759,6 +760,8 @@ if __name__ == "__main__":
@@ -758,6 +759,8 @@ if __name__ == "__main__":
if len(data) == 0:
os.close(tfd)
del datafile

View File

@ -1,7 +1,7 @@
Index: xen-4.5.2-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
===================================================================
--- xen-4.5.2-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ide.c
+++ xen-4.5.2-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ide.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
@@ -935,8 +935,9 @@ static inline void ide_dma_submit_check(
static inline void ide_set_irq(IDEState *s)

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:22d2fccd2c9f323897279d5adefaaf21e8c3eb61670f4bb4937a5c993b012643
size 8167861
oid sha256:a62686ff9b94dda2777a5b1b37b75ae0cbc861dff7bdcbd8789785551e351f45
size 8995267

View File

@ -1,8 +1,8 @@
Index: xen-4.5.0-testing/tools/Makefile
Index: xen-4.6.0-testing/tools/Makefile
===================================================================
--- xen-4.5.0-testing.orig/tools/Makefile
+++ xen-4.5.0-testing/tools/Makefile
@@ -222,6 +222,7 @@ subdir-all-qemu-xen-dir: qemu-xen-dir-fi
--- xen-4.6.0-testing.orig/tools/Makefile
+++ xen-4.6.0-testing/tools/Makefile
@@ -259,6 +259,7 @@ subdir-all-qemu-xen-dir: qemu-xen-dir-fi
--datadir=$(SHAREDIR)/qemu-xen \
--localstatedir=$(localstatedir) \
--disable-kvm \

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d08a4031b593048672772d438366f2242ca09a792949935293de5d663042f587
size 3230082
oid sha256:c039f105aaa84cb17dd3c6efc65316e55dae6de47b19c3400bb469ee017cecd6
size 3214075

View File

@ -1,21 +0,0 @@
Causes rebuilds.
Says rpmlint.
---
tools/qemu-xen-dir-remote/hw/scsi/megasas.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/scsi/megasas.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-dir-remote/hw/scsi/megasas.c
+++ xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/scsi/megasas.c
@@ -717,8 +717,8 @@ static int megasas_ctrl_get_info(Megasas
snprintf(info.package_version, 0x60, "%s-QEMU", QEMU_VERSION);
memcpy(info.image_component[0].name, "APP", 3);
memcpy(info.image_component[0].version, MEGASAS_VERSION "-QEMU", 9);
- memcpy(info.image_component[0].build_date, __DATE__, 11);
- memcpy(info.image_component[0].build_time, __TIME__, 8);
+ memcpy(info.image_component[0].build_date, "Apr 1 2014", 11);
+ memcpy(info.image_component[0].build_time, "12:34:56", 8);
info.image_component_count = 1;
if (pci_dev->has_rom) {
uint8_t biosver[32];

View File

@ -3,11 +3,11 @@ https://bugzilla.novell.com/show_bug.cgi?id=879425
tools/qemu-xen-dir-remote/hw/block/xen_disk.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
Index: xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
Index: xen-4.6.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
+++ xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
@@ -120,6 +120,7 @@ struct XenBlkDev {
--- xen-4.6.0-testing.orig/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
+++ xen-4.6.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
@@ -121,6 +121,7 @@ struct XenBlkDev {
int requests_inflight;
int requests_finished;
@ -15,7 +15,7 @@ Index: xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
/* Persistent grants extension */
gboolean feature_discard;
gboolean feature_persistent;
@@ -780,6 +781,16 @@ static void blk_parse_discard(struct Xen
@@ -784,6 +785,16 @@ static void blk_parse_discard(struct Xen
}
}
@ -32,7 +32,7 @@ Index: xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
static int blk_init(struct XenDevice *xendev)
{
struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
@@ -848,6 +859,7 @@ static int blk_init(struct XenDevice *xe
@@ -852,6 +863,7 @@ static int blk_init(struct XenDevice *xe
xenstore_write_be_int(&blkdev->xendev, "info", info);
blk_parse_discard(blkdev);
@ -40,7 +40,7 @@ Index: xen-4.5.0-testing/tools/qemu-xen-dir-remote/hw/block/xen_disk.c
g_free(directiosafe);
return 0;
@@ -888,6 +900,9 @@ static int blk_connect(struct XenDevice
@@ -892,6 +904,9 @@ static int blk_connect(struct XenDevice
qflags |= BDRV_O_UNMAP;
}

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:772e5efd44072d44438d7e0b93ce9dec70823d6affc516249e3aabe65ebd607d
size 444597
oid sha256:f763331c9616e2fb8e5ed815974a7c4bf142b0c1e5ad8c97b75ad5930f712c3d
size 445943

26
stubdom-have-iovec.patch Normal file
View File

@ -0,0 +1,26 @@
Because of commit 76eb7cef6b84ca804f4db340e23ad9c501767c32
xc_private.h now contains a definition of iovec. This conflicts
when building qemu traditional xen_platform.c which includes
hw.h which includes qemu-common.h which already has a definition
of iovec
Index: xen-4.6.0-testing/tools/libxc/xc_private.h
===================================================================
--- xen-4.6.0-testing.orig/tools/libxc/xc_private.h
+++ xen-4.6.0-testing/tools/libxc/xc_private.h
@@ -42,6 +42,8 @@
#endif
#if defined(__MINIOS__)
+#ifndef HAVE_IOVEC
+#define HAVE_IOVEC
/*
* MiniOS's libc doesn't know about sys/uio.h or writev().
* Declare enough of sys/uio.h to compile.
@@ -50,6 +52,7 @@ struct iovec {
void *iov_base;
size_t iov_len;
};
+#endif
#else
#include <sys/uio.h>
#endif

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:990c3470aa76d9106da860b0e67b1fb36c33281a3e26e58ec89df6f44a0be037
size 17477301
oid sha256:84630b41c8066eddb78755762e7a8d1261ed9e12fd8733604f8a0ab9d32eac86
size 17477041

View File

@ -6,11 +6,11 @@ http://xen.1045712.n5.nabble.com/Re-PATCH-improve-suspend-evtchn-lock-processing
Signed-off-by: Chunyan Liu <cyliu@suse.com>
Index: xen-4.5.0-testing/tools/libxc/xc_suspend.c
Index: xen-4.6.0-testing/tools/libxc/xc_suspend.c
===================================================================
--- xen-4.5.0-testing.orig/tools/libxc/xc_suspend.c
+++ xen-4.5.0-testing/tools/libxc/xc_suspend.c
@@ -19,6 +19,10 @@
--- xen-4.6.0-testing.orig/tools/libxc/xc_suspend.c
+++ xen-4.6.0-testing/tools/libxc/xc_suspend.c
@@ -18,6 +18,10 @@
#include "xc_private.h"
#include "xenguest.h"
@ -21,7 +21,7 @@ Index: xen-4.5.0-testing/tools/libxc/xc_suspend.c
#define SUSPEND_LOCK_FILE XEN_RUN_DIR "/suspend-evtchn-%d.lock"
@@ -34,6 +38,37 @@
@@ -33,6 +37,37 @@
#define SUSPEND_FILE_BUFLEN (sizeof(SUSPEND_LOCK_FILE) + 10)
@ -59,7 +59,7 @@ Index: xen-4.5.0-testing/tools/libxc/xc_suspend.c
static void get_suspend_file(char buf[], int domid)
{
snprintf(buf, SUSPEND_FILE_BUFLEN, SUSPEND_LOCK_FILE, domid);
@@ -47,6 +82,7 @@ static int lock_suspend_event(xc_interfa
@@ -46,6 +81,7 @@ static int lock_suspend_event(xc_interfa
struct flock fl;
get_suspend_file(suspend_file, domid);
@ -67,7 +67,7 @@ Index: xen-4.5.0-testing/tools/libxc/xc_suspend.c
*lockfd = -1;
@@ -96,6 +132,8 @@ static int lock_suspend_event(xc_interfa
@@ -95,6 +131,8 @@ static int lock_suspend_event(xc_interfa
if (fd >= 0)
close(fd);

View File

@ -1,46 +0,0 @@
From 903a145f3eace5e3ae914f0335ab6c4e33635d2f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:36:23 +0100
Subject: [PATCH 4/6] tapdisk-ioemu: Write messages to a logfile
Typically, tapdisk-ioemu runs as a daemon and messages to stderr are
simply lost. Write them to a logfile instead.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
tapdisk-ioemu.c | 19 +++++++++++++------
1 files changed, 13 insertions(+), 6 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -78,15 +78,22 @@ int main(void)
struct timeval tv;
void *old_fd_start = NULL;
- logfile = stderr;
-
+ /* Daemonize */
+ if (fork() != 0)
+ exit(0);
+
bdrv_init();
init_blktap();
- /* Daemonize */
- if (fork() != 0)
- exit(0);
-
+ logfile = fopen("/var/log/xen/tapdisk-ioemu.log", "a");
+ if (logfile) {
+ setbuf(logfile, NULL);
+ fclose(stderr);
+ stderr = logfile;
+ } else {
+ logfile = stderr;
+ }
+
/*
* Main loop: Pass events to the corrsponding handlers and check for
* completed aio operations.

View File

@ -1,89 +0,0 @@
From 9062564d79cb45029403cc998b48410e42ead924 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@suse.de>
Date: Tue, 10 Mar 2009 16:45:44 +0100
Subject: [PATCH 6/6] tapdisk-ioemu: Fix shutdown condition
Even when opening the only image a tapdisk-ioemu instance is
responsible for fails, it can't immediately shut down. blktapctrl
still wants to communicate with tapdisk-ioemu and close the disk.
This patch changes tapdisk-ioemu to count the connections to
blktapctrl rather than the number of opened disk images.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
---
hw/xen_blktap.c | 5 ++++-
tapdisk-ioemu.c | 13 ++++++++++---
2 files changed, 14 insertions(+), 4 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_blktap.c
@@ -65,6 +65,7 @@ int read_fd;
int write_fd;
static pid_t process;
+int connected_disks = 0;
fd_list_entry_t *fd_start = NULL;
static void handle_blktap_iomsg(void* private);
@@ -541,6 +542,7 @@ static void handle_blktap_ctrlmsg(void*
/* Allocate the disk structs */
s = state_init();
+ connected_disks++;
/*Open file*/
if (s == NULL || open_disk(s, path, msg->drivertype, msg->readonly)) {
@@ -591,7 +593,8 @@ static void handle_blktap_ctrlmsg(void*
case CTLMSG_CLOSE:
s = get_state(msg->cookie);
if (s) unmap_disk(s);
- break;
+ connected_disks--;
+ break;
case CTLMSG_PID:
memset(buf, 0x00, MSG_SIZE);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/tapdisk-ioemu.c
@@ -14,6 +14,7 @@ extern void qemu_aio_init(void);
extern void qemu_aio_poll(void);
extern void *fd_start;
+extern int connected_disks;
int domid = 0;
FILE* logfile;
@@ -76,7 +77,7 @@ int main(void)
int max_fd;
fd_set rfds;
struct timeval tv;
- void *old_fd_start = NULL;
+ int old_connected_disks = 0;
/* Daemonize */
if (fork() != 0)
@@ -128,11 +129,17 @@ int main(void)
pioh = &ioh->next;
}
+ if (old_connected_disks != connected_disks)
+ fprintf(stderr, "connected disks: %d => %d\n",
+ old_connected_disks, connected_disks);
+
/* Exit when the last image has been closed */
- if (old_fd_start != NULL && fd_start == NULL)
+ if (old_connected_disks != 0 && connected_disks == 0) {
+ fprintf(stderr, "Last image is closed, exiting.\n");
exit(0);
+ }
- old_fd_start = fd_start;
+ old_connected_disks = connected_disks;
}
return 0;
}

View File

@ -1,8 +1,8 @@
Index: xen-4.5.0-testing/tools/qemu-xen-dir-remote/ui/vnc.c
Index: xen-4.6.0-testing/tools/qemu-xen-dir-remote/ui/vnc.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-dir-remote/ui/vnc.c
+++ xen-4.5.0-testing/tools/qemu-xen-dir-remote/ui/vnc.c
@@ -1645,6 +1645,25 @@ static void do_key_event(VncState *vs, i
--- xen-4.6.0-testing.orig/tools/qemu-xen-dir-remote/ui/vnc.c
+++ xen-4.6.0-testing/tools/qemu-xen-dir-remote/ui/vnc.c
@@ -1659,6 +1659,25 @@ static void do_key_event(VncState *vs, i
if (down)
vs->modifiers_state[keycode] ^= 1;
break;

View File

@ -2,11 +2,11 @@
tools/xenstore/Makefile | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
Index: xen-4.5.1-testing/tools/xenstore/Makefile
Index: xen-4.6.0-testing/tools/xenstore/Makefile
===================================================================
--- xen-4.5.1-testing.orig/tools/xenstore/Makefile
+++ xen-4.5.1-testing/tools/xenstore/Makefile
@@ -91,6 +91,7 @@ $(CLIENTS_DOMU): xenstore
--- xen-4.6.0-testing.orig/tools/xenstore/Makefile
+++ xen-4.6.0-testing/tools/xenstore/Makefile
@@ -92,6 +92,7 @@ $(CLIENTS_DOMU): xenstore
xenstore: xenstore_client.o $(LIBXENSTORE)
$(CC) $< $(LDFLAGS) $(LDLIBS_libxenstore) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS)
@ -14,22 +14,20 @@ Index: xen-4.5.1-testing/tools/xenstore/Makefile
xenstore-control: xenstore_control.o $(LIBXENSTORE)
$(CC) $< $(LDFLAGS) $(LDLIBS_libxenstore) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS)
@@ -136,14 +137,16 @@ ifeq ($(XENSTORE_XENSTORED),y)
$(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
$(INSTALL_DIR) $(DESTDIR)$(XEN_LIB_STORED)
$(INSTALL_PROG) xenstored $(DESTDIR)$(SBINDIR)
+ $(INSTALL_DIR) $(DESTDIR)/bin
endif
$(INSTALL_PROG) xenstore-control $(DESTDIR)$(BINDIR)
$(INSTALL_PROG) xenstore $(DESTDIR)$(BINDIR)
@@ -145,12 +146,13 @@ endif
$(INSTALL_PROG) xenstore-control $(DESTDIR)$(bindir)
$(INSTALL_PROG) xenstore $(DESTDIR)$(bindir)
set -e ; for c in $(CLIENTS) ; do \
- ln -f $(DESTDIR)$(BINDIR)/xenstore $(DESTDIR)$(BINDIR)/$${c} ; \
- ln -f $(DESTDIR)$(bindir)/xenstore $(DESTDIR)$(bindir)/$${c} ; \
+ ln -fs xenstore $(DESTDIR)/usr/bin/$${c} ; \
done
+ $(INSTALL_PROG) domu-xenstore $(DESTDIR)/bin
for client in $(CLIENTS_DOMU); do \
- $(INSTALL_PROG) $$client $(DESTDIR)/$(BINDIR)/$${client/domu-}; \
- $(INSTALL_PROG) $$client $(DESTDIR)/bin/$${client/domu-}; \
+ ln -fs domu-xenstore $(DESTDIR)/bin/$${client/domu-}; \
done
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_SHLIB) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
- $(INSTALL_DIR) $(DESTDIR)$(libdir)
+ $(INSTALL_DIR) $(DESTDIR)$(libdir)
$(INSTALL_SHLIB) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)
ln -sf libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)/libxenstore.so.$(MAJOR)
ln -sf libxenstore.so.$(MAJOR) $(DESTDIR)$(libdir)/libxenstore.so

View File

@ -1,11 +0,0 @@
Index: xen-4.5.0-testing/tools/hotplug/Linux/xen-backend.rules.in
===================================================================
--- xen-4.5.0-testing.orig/tools/hotplug/Linux/xen-backend.rules.in
+++ xen-4.5.0-testing/tools/hotplug/Linux/xen-backend.rules.in
@@ -12,4 +12,5 @@ KERNEL=="blktap-control", NAME="xen/blkt
KERNEL=="gntdev", NAME="xen/%k", MODE="0600"
KERNEL=="pci_iomul", NAME="xen/%k", MODE="0600"
KERNEL=="tapdev[a-z]*", NAME="xen/blktap-2/tapdev%m", MODE="0600"
-SUBSYSTEM=="net", KERNEL=="vif*-emu", ACTION=="add", ENV{UDEV_CALL}="1", RUN+="@XEN_SCRIPT_DIR@/vif-setup $env{ACTION} type_if=tap"
+SUBSYSTEM=="net", KERNEL=="vif*-emu", ACTION=="add", ENV{UDEV_CALL}="1", TEST=="/proc/xen" RUN+="/etc/xen/scripts/vif-setup $env{ACTION} type_if=tap"
+KERNELS=="xen", KERNEL=="xvd*", SUBSYSTEM=="block", OPTIONS+="last_rule"

View File

@ -1,9 +1,9 @@
Index: xen-4.5.0-testing/xen/arch/x86/platform_hypercall.c
Index: xen-4.6.0-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.5.0-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.5.0-testing/xen/arch/x86/platform_hypercall.c
@@ -25,7 +25,7 @@
#include <xen/irq.h>
--- xen-4.6.0-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.6.0-testing/xen/arch/x86/platform_hypercall.c
@@ -26,7 +26,7 @@
#include <xen/symbols.h>
#include <asm/current.h>
#include <public/platform.h>
-#include <acpi/cpufreq/processor_perf.h>
@ -11,10 +11,11 @@ Index: xen-4.5.0-testing/xen/arch/x86/platform_hypercall.c
#include <asm/edd.h>
#include <asm/mtrr.h>
#include <asm/io_apic.h>
@@ -760,6 +760,41 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PA
@@ -825,6 +825,41 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PA
ret = -EFAULT;
}
break;
+
+ case XENPF_get_cpu_freq:
+ case XENPF_get_cpu_freq_min:
+ case XENPF_get_cpu_freq_max:
@ -49,15 +50,14 @@ Index: xen-4.5.0-testing/xen/arch/x86/platform_hypercall.c
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = -ENOSYS;
break;
Index: xen-4.5.0-testing/xen/include/public/platform.h
Index: xen-4.6.0-testing/xen/include/public/platform.h
===================================================================
--- xen-4.5.0-testing.orig/xen/include/public/platform.h
+++ xen-4.5.0-testing/xen/include/public/platform.h
@@ -527,6 +527,16 @@ struct xenpf_core_parking {
--- xen-4.6.0-testing.orig/xen/include/public/platform.h
+++ xen-4.6.0-testing/xen/include/public/platform.h
@@ -547,6 +547,16 @@ struct xenpf_core_parking {
typedef struct xenpf_core_parking xenpf_core_parking_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);
@ -74,10 +74,10 @@ Index: xen-4.5.0-testing/xen/include/public/platform.h
/*
* Access generic platform resources(e.g., accessing MSR, port I/O, etc)
* in unified way. Batch resource operations in one call are supported and
@@ -587,6 +597,7 @@ struct xen_platform_op {
struct xenpf_mem_hotadd mem_add;
@@ -638,6 +648,7 @@ struct xen_platform_op {
struct xenpf_core_parking core_parking;
struct xenpf_resource_op resource_op;
struct xenpf_symdata symdata;
+ struct xenpf_get_cpu_freq get_cpu_freq;
uint8_t pad[128];
} u;

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef9016f97076f85298500a01a3d4b4f6a4a3d608780233ef8bc78bd80ee71734
size 4124919

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3930c6a8177865093ee27cc75e9d29f7ba0bf1d7084ea6886d1b6747bc60f0bf
size 4085311

View File

@ -1,6 +1,8 @@
--- a/tools/xenstore/Makefile
+++ b/tools/xenstore/Makefile
@@ -19,6 +19,7 @@ LDFLAGS += $(LDFLAGS-y)
Index: xen-4.6.0-testing/tools/xenstore/Makefile
===================================================================
--- xen-4.6.0-testing.orig/tools/xenstore/Makefile
+++ xen-4.6.0-testing/tools/xenstore/Makefile
@@ -20,6 +20,7 @@ LDFLAGS += $(LDFLAGS-y)
CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod
CLIENTS += xenstore-write xenstore-ls xenstore-watch
@ -8,7 +10,7 @@
XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
@@ -57,7 +58,7 @@ endif
@@ -58,7 +59,7 @@ endif
all: $(ALL_TARGETS)
.PHONY: clients
@ -17,7 +19,7 @@
ifeq ($(CONFIG_SunOS),y)
xenstored_probes.h: xenstored_probes.d
@@ -85,6 +86,9 @@ xenstored.a: $(XENSTORED_OBJS)
@@ -86,6 +87,9 @@ xenstored.a: $(XENSTORED_OBJS)
$(CLIENTS): xenstore
ln -f xenstore $@
@ -27,7 +29,7 @@
xenstore: xenstore_client.o $(LIBXENSTORE)
$(CC) $< $(LDFLAGS) $(LDLIBS_libxenstore) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS)
@@ -112,7 +116,7 @@ clean:
@@ -113,7 +117,7 @@ clean:
rm -f *.a *.o *.opic *.so* xenstored_probes.h
rm -f xenstored xs_random xs_stress xs_crashme
rm -f xs_tdb_dump xenstore-control init-xenstore-domain
@ -35,14 +37,23 @@
+ rm -f xenstore $(CLIENTS) $(CLIENTS_DOMU)
$(RM) $(DEPS)
.PHONY: TAGS
@@ -138,6 +142,9 @@ ifeq ($(XENSTORE_XENSTORED),y)
.PHONY: distclean
@@ -136,13 +140,17 @@ ifeq ($(XENSTORE_XENSTORED),y)
$(INSTALL_DIR) $(DESTDIR)$(sbindir)
$(INSTALL_DIR) $(DESTDIR)$(XEN_LIB_STORED)
$(INSTALL_PROG) xenstored $(DESTDIR)$(sbindir)
+ $(INSTALL_DIR) $(DESTDIR)/bin
endif
$(INSTALL_PROG) xenstore-control $(DESTDIR)$(bindir)
$(INSTALL_PROG) xenstore $(DESTDIR)$(bindir)
set -e ; for c in $(CLIENTS) ; do \
ln -f $(DESTDIR)$(BINDIR)/xenstore $(DESTDIR)$(BINDIR)/$${c} ; \
ln -f $(DESTDIR)$(bindir)/xenstore $(DESTDIR)$(bindir)/$${c} ; \
done
- $(INSTALL_DIR) $(DESTDIR)$(libdir)
+ for client in $(CLIENTS_DOMU); do \
+ $(INSTALL_PROG) $$client $(DESTDIR)/$(BINDIR)/$${client/domu-}; \
+ $(INSTALL_PROG) $$client $(DESTDIR)/bin/$${client/domu-}; \
+ done
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_SHLIB) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
ln -sf libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libxenstore.so.$(MAJOR)
+ $(INSTALL_DIR) $(DESTDIR)$(libdir)
$(INSTALL_SHLIB) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)
ln -sf libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)/libxenstore.so.$(MAJOR)
ln -sf libxenstore.so.$(MAJOR) $(DESTDIR)$(libdir)/libxenstore.so

View File

@ -2,7 +2,6 @@
Description=Load dom0 backend drivers
ConditionPathExists=/proc/xen
Before=proc-xen.mount
DefaultDependencies=no
[Install]
WantedBy=multi-user.target

View File

@ -1,7 +1,7 @@
Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.h
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/net.h
===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/net.h
+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.h
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/net.h
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/net.h
@@ -107,8 +107,8 @@ void net_host_device_add(const char *dev
void net_host_device_remove(int vlan_id, const char *device);
@ -13,10 +13,10 @@ Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.h
#endif
#ifdef __sun__
#define SMBD_COMMAND "/usr/sfw/sbin/smbd"
Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.c
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/net.c
===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/net.c
+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.c
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/net.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/net.c
@@ -1765,9 +1765,10 @@ int net_client_init(const char *device,
}
if (get_param_value(script_arg, sizeof(script_arg), "scriptarg", p) == 0 &&
@ -30,10 +30,10 @@ Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.c
}
} else
#endif
Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/i386-dm/qemu-ifup-Linux
@@ -1,36 +1,22 @@
#!/bin/sh

Some files were not shown because too many files have changed in this diff Show More