Add/Remove package files

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=221
This commit is contained in:
Charles Arnold 2013-01-14 17:53:52 +00:00 committed by Git OBS Bridge
parent 898ade9d0e
commit b48642e0b8
84 changed files with 2416 additions and 4512 deletions

View File

@ -1,61 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347022899 -7200
# Node ID bb85bbccb1c9d802c92dd3fe00841368966ff623
# Parent e3b51948114ec1e2b2eece415b32e26ff857acde
x86/32-on-64: adjust Dom0 initial page table layout
Drop the unnecessary reservation of the L4 page for 32on64 Dom0, and
allocate its L3 first (to match behavior when running identical bit-
width hypervisor and Dom0 kernel).
Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -510,7 +510,7 @@ int __init construct_dom0(
#define NR(_l,_h,_s) \
(((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
((_l) & ~((1UL<<(_s))-1))) >> (_s))
- if ( (1 + /* # L4 */
+ if ( (!is_pv_32on64_domain(d) + /* # L4 */
NR(v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
(!is_pv_32on64_domain(d) ?
NR(v_start, v_end, L3_PAGETABLE_SHIFT) : /* # L2 */
@@ -756,6 +756,8 @@ int __init construct_dom0(
panic("Not enough RAM for domain 0 PML4.\n");
page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
l4start = l4tab = page_to_virt(page);
+ maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l3_page_table;
+ l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
}
copy_page(l4tab, idle_pg_table);
l4tab[0] = l4e_empty(); /* zap trampoline mapping */
@@ -787,9 +789,13 @@ int __init construct_dom0(
l2tab += l2_table_offset(v_start);
if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
{
- maddr_to_page(mpt_alloc)->u.inuse.type_info =
- PGT_l3_page_table;
- l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ if ( count || !l3start )
+ {
+ maddr_to_page(mpt_alloc)->u.inuse.type_info =
+ PGT_l3_page_table;
+ l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ }
+ l3tab = l3start;
clear_page(l3tab);
if ( count == 0 )
l3tab += l3_table_offset(v_start);
@@ -938,7 +944,7 @@ int __init construct_dom0(
if ( !vinitrd_start && initrd_len )
si->flags |= SIF_MOD_START_PFN;
si->flags |= (xen_processor_pmbits << 8) & SIF_PM_MASK;
- si->pt_base = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d);
+ si->pt_base = vpt_start;
si->nr_pt_frames = nr_pt_pages;
si->mfn_list = vphysmap_start;
snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",

View File

@ -1,274 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347033492 -7200
# Node ID c70d70d85306b3e4a0538353be131100c5ee38d5
# Parent 0376c85caaf34fe3cc8c3327f7a1d9ecd6f070b4
adjust a few RCU domain locking calls
x86's do_physdev_op() had a case where the locking was entirely
superfluous. Its physdev_map_pirq() further had a case where the lock
was being obtained too early, needlessly complicating early exit paths.
Grant table code had two open coded instances of
rcu_lock_target_domain_by_id(), and a third code section could be
consolidated by using the newly introduced helper function.
The memory hypercall code had two more instances of open coding
rcu_lock_target_domain_by_id(), but note that here this is not just
cleanup, but also fixes an error return path in memory_exchange() to
actually return an error.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -90,14 +90,10 @@ static int physdev_hvm_map_pirq(
int physdev_map_pirq(domid_t domid, int type, int *index, int *pirq_p,
struct msi_info *msi)
{
- struct domain *d;
+ struct domain *d = current->domain;
int pirq, irq, ret = 0;
void *map_data = NULL;
- ret = rcu_lock_target_domain_by_id(domid, &d);
- if ( ret )
- return ret;
-
if ( domid == DOMID_SELF && is_hvm_domain(d) )
{
/*
@@ -105,14 +101,15 @@ int physdev_map_pirq(domid_t domid, int
* calls back into itself and deadlocks on hvm_domain.irq_lock.
*/
if ( !is_hvm_pv_evtchn_domain(d) )
- {
- ret = -EINVAL;
- goto free_domain;
- }
- ret = physdev_hvm_map_pirq(d, type, index, pirq_p);
- goto free_domain;
+ return -EINVAL;
+
+ return physdev_hvm_map_pirq(d, type, index, pirq_p);
}
+ ret = rcu_lock_target_domain_by_id(domid, &d);
+ if ( ret )
+ return ret;
+
if ( !IS_PRIV_FOR(current->domain, d) )
{
ret = -EPERM;
@@ -696,13 +693,12 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
}
case PHYSDEVOP_get_free_pirq: {
struct physdev_get_free_pirq out;
- struct domain *d;
+ struct domain *d = v->domain;
ret = -EFAULT;
if ( copy_from_guest(&out, arg, 1) != 0 )
break;
- d = rcu_lock_current_domain();
spin_lock(&d->event_lock);
ret = get_free_pirq(d, out.type);
@@ -717,7 +713,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
}
spin_unlock(&d->event_lock);
- rcu_unlock_domain(d);
if ( ret >= 0 )
{
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -24,7 +24,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <xen/config.h>
+#include <xen/err.h>
#include <xen/iocap.h>
#include <xen/lib.h>
#include <xen/sched.h>
@@ -195,6 +195,30 @@ double_gt_unlock(struct grant_table *lgt
spin_unlock(&rgt->lock);
}
+static struct domain *gt_lock_target_domain_by_id(domid_t dom)
+{
+ struct domain *d;
+ int rc = GNTST_general_error;
+
+ switch ( rcu_lock_target_domain_by_id(dom, &d) )
+ {
+ case 0:
+ return d;
+
+ case -ESRCH:
+ gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
+ rc = GNTST_bad_domain;
+ break;
+
+ case -EPERM:
+ rc = GNTST_permission_denied;
+ break;
+ }
+
+ ASSERT(rc < 0 && -rc <= MAX_ERRNO);
+ return ERR_PTR(rc);
+}
+
static inline int
__get_maptrack_handle(
struct grant_table *t)
@@ -1261,7 +1285,6 @@ gnttab_setup_table(
struct grant_table *gt;
int i;
unsigned long gmfn;
- domid_t dom;
if ( count != 1 )
return -EINVAL;
@@ -1281,25 +1304,11 @@ gnttab_setup_table(
goto out1;
}
- dom = op.dom;
- if ( dom == DOMID_SELF )
+ d = gt_lock_target_domain_by_id(op.dom);
+ if ( IS_ERR(d) )
{
- d = rcu_lock_current_domain();
- }
- else
- {
- if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) )
- {
- gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
- op.status = GNTST_bad_domain;
- goto out1;
- }
-
- if ( unlikely(!IS_PRIV_FOR(current->domain, d)) )
- {
- op.status = GNTST_permission_denied;
- goto out2;
- }
+ op.status = PTR_ERR(d);
+ goto out1;
}
if ( xsm_grant_setup(current->domain, d) )
@@ -1352,7 +1361,6 @@ gnttab_query_size(
{
struct gnttab_query_size op;
struct domain *d;
- domid_t dom;
int rc;
if ( count != 1 )
@@ -1364,25 +1372,11 @@ gnttab_query_size(
return -EFAULT;
}
- dom = op.dom;
- if ( dom == DOMID_SELF )
- {
- d = rcu_lock_current_domain();
- }
- else
+ d = gt_lock_target_domain_by_id(op.dom);
+ if ( IS_ERR(d) )
{
- if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) )
- {
- gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
- op.status = GNTST_bad_domain;
- goto query_out;
- }
-
- if ( unlikely(!IS_PRIV_FOR(current->domain, d)) )
- {
- op.status = GNTST_permission_denied;
- goto query_out_unlock;
- }
+ op.status = PTR_ERR(d);
+ goto query_out;
}
rc = xsm_grant_query_size(current->domain, d);
@@ -2251,15 +2245,10 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
return -EFAULT;
}
- rc = rcu_lock_target_domain_by_id(op.dom, &d);
- if ( rc < 0 )
+ d = gt_lock_target_domain_by_id(op.dom);
+ if ( IS_ERR(d) )
{
- if ( rc == -ESRCH )
- op.status = GNTST_bad_domain;
- else if ( rc == -EPERM )
- op.status = GNTST_permission_denied;
- else
- op.status = GNTST_general_error;
+ op.status = PTR_ERR(d);
goto out1;
}
rc = xsm_grant_setup(current->domain, d);
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -329,22 +329,9 @@ static long memory_exchange(XEN_GUEST_HA
out_chunk_order = exch.in.extent_order - exch.out.extent_order;
}
- if ( likely(exch.in.domid == DOMID_SELF) )
- {
- d = rcu_lock_current_domain();
- }
- else
- {
- if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
- goto fail_early;
-
- if ( !IS_PRIV_FOR(current->domain, d) )
- {
- rcu_unlock_domain(d);
- rc = -EPERM;
- goto fail_early;
- }
- }
+ rc = rcu_lock_target_domain_by_id(exch.in.domid, &d);
+ if ( rc )
+ goto fail_early;
memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
d,
@@ -583,20 +570,8 @@ long do_memory_op(unsigned long cmd, XEN
&& (reservation.mem_flags & XENMEMF_populate_on_demand) )
args.memflags |= MEMF_populate_on_demand;
- if ( likely(reservation.domid == DOMID_SELF) )
- {
- d = rcu_lock_current_domain();
- }
- else
- {
- if ( (d = rcu_lock_domain_by_id(reservation.domid)) == NULL )
- return start_extent;
- if ( !IS_PRIV_FOR(current->domain, d) )
- {
- rcu_unlock_domain(d);
- return start_extent;
- }
- }
+ if ( unlikely(rcu_lock_target_domain_by_id(reservation.domid, &d)) )
+ return start_extent;
args.domain = d;
rc = xsm_memory_adjust_reservation(current->domain, d);

View File

@ -1,52 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347263130 -7200
# Node ID 7d216f026f71022186196a75244e97cf3864f50b
# Parent c70d70d85306b3e4a0538353be131100c5ee38d5
VT-d: split .ack and .disable DMA-MSI actors
Calling irq_complete_move() from .disable is wrong, breaking S3 resume.
Comparing with all other .ack actors, it was also missing a call to
move_{native,masked}_irq(). As the actor is masking its interrupt
anyway (albeit it's not immediately obvious why), the latter is the
better choice.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Acked-by Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1040,8 +1040,6 @@ static void dma_msi_mask(struct irq_desc
unsigned long flags;
struct iommu *iommu = desc->action->dev_id;
- irq_complete_move(desc);
-
/* mask it */
spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
@@ -1054,6 +1052,13 @@ static unsigned int dma_msi_startup(stru
return 0;
}
+static void dma_msi_ack(struct irq_desc *desc)
+{
+ irq_complete_move(desc);
+ dma_msi_mask(desc);
+ move_masked_irq(desc);
+}
+
static void dma_msi_end(struct irq_desc *desc, u8 vector)
{
dma_msi_unmask(desc);
@@ -1115,7 +1120,7 @@ static hw_irq_controller dma_msi_type =
.shutdown = dma_msi_mask,
.enable = dma_msi_unmask,
.disable = dma_msi_mask,
- .ack = dma_msi_mask,
+ .ack = dma_msi_ack,
.end = dma_msi_end,
.set_affinity = dma_msi_set_affinity,
};

View File

@ -1,27 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365190 -7200
# Node ID 0dba5a8886556f1b92e59eb19c570ad1704037f6
# Parent 90533f3b6babfda56edbbefda47c46b391204132
tmem: only allow tmem control operations from privileged domains
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2541,10 +2541,8 @@ static NOINLINE int do_tmem_control(stru
OID *oidp = (OID *)(&op->u.ctrl.oid[0]);
if (!tmh_current_is_privileged())
- {
- /* don't fail... mystery: sometimes dom0 fails here */
- /* return -EPERM; */
- }
+ return -EPERM;
+
switch(subop)
{
case TMEMC_THAW:

View File

@ -1,45 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365203 -7200
# Node ID fcf567acc92ae57f4adfbe967b01a2ba0390c08f
# Parent 0dba5a8886556f1b92e59eb19c570ad1704037f6
tmem: consistently make pool_id a uint32_t
Treating it as an int could allow a malicious guest to provide a
negative pool_Id, by passing the MAX_POOLS_PER_DOMAIN limit check and
allowing access to the negative offsets of the pool array.
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2417,7 +2417,7 @@ static NOINLINE int tmemc_save_subop(int
return rc;
}
-static NOINLINE int tmemc_save_get_next_page(int cli_id, int pool_id,
+static NOINLINE int tmemc_save_get_next_page(int cli_id, uint32_t pool_id,
tmem_cli_va_t buf, uint32_t bufsize)
{
client_t *client = tmh_client_from_cli_id(cli_id);
@@ -2509,7 +2509,7 @@ out:
return ret;
}
-static int tmemc_restore_put_page(int cli_id, int pool_id, OID *oidp,
+static int tmemc_restore_put_page(int cli_id, uint32_t pool_id, OID *oidp,
uint32_t index, tmem_cli_va_t buf, uint32_t bufsize)
{
client_t *client = tmh_client_from_cli_id(cli_id);
@@ -2521,7 +2521,7 @@ static int tmemc_restore_put_page(int cl
return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p);
}
-static int tmemc_restore_flush_page(int cli_id, int pool_id, OID *oidp,
+static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id, OID *oidp,
uint32_t index)
{
client_t *client = tmh_client_from_cli_id(cli_id);

View File

@ -1,23 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365214 -7200
# Node ID d189d99ef00c1e197321593d13282e1b57eb4a38
# Parent fcf567acc92ae57f4adfbe967b01a2ba0390c08f
tmem: check the pool_id is valid when destroying a tmem pool
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1870,6 +1870,8 @@ static NOINLINE int do_tmem_destroy_pool
if ( client->pools == NULL )
return 0;
+ if ( pool_id >= MAX_POOLS_PER_DOMAIN )
+ return 0;
if ( (pool = client->pools[pool_id]) == NULL )
return 0;
client->pools[pool_id] = NULL;

View File

@ -1,44 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365847 -7200
# Node ID f53c5aadbba9d389f4a7d83f308499e22d1d1eda
# Parent d189d99ef00c1e197321593d13282e1b57eb4a38
tmem: check for a valid client ("domain") in the save subops
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2379,12 +2379,18 @@ static NOINLINE int tmemc_save_subop(int
rc = MAX_POOLS_PER_DOMAIN;
break;
case TMEMC_SAVE_GET_CLIENT_WEIGHT:
+ if ( client == NULL )
+ break;
rc = client->weight == -1 ? -2 : client->weight;
break;
case TMEMC_SAVE_GET_CLIENT_CAP:
+ if ( client == NULL )
+ break;
rc = client->cap == -1 ? -2 : client->cap;
break;
case TMEMC_SAVE_GET_CLIENT_FLAGS:
+ if ( client == NULL )
+ break;
rc = (client->compress ? TMEM_CLIENT_COMPRESS : 0 ) |
(client->was_frozen ? TMEM_CLIENT_FROZEN : 0 );
break;
@@ -2408,6 +2414,8 @@ static NOINLINE int tmemc_save_subop(int
*uuid = pool->uuid[1];
rc = 0;
case TMEMC_SAVE_END:
+ if ( client == NULL )
+ break;
client->live_migrating = 0;
if ( !list_empty(&client->persistent_invalidated_list) )
list_for_each_entry_safe(pgp,pgp2,

View File

@ -1,556 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365869 -7200
# Node ID ccd60ed6c555e1816cac448fcb20a84533977b43
# Parent f53c5aadbba9d389f4a7d83f308499e22d1d1eda
tmem: don't access guest memory without using the accessors intended for this
This is not permitted, not even for buffers coming from Dom0 (and it
would also break the moment Dom0 runs in HVM mode). An implication from
the changes here is that tmh_copy_page() can't be used anymore for
control operations calling tmh_copy_{from,to}_client() (as those pass
the buffer by virtual address rather than MFN).
Note that tmemc_save_get_next_page() previously didn't set the returned
handle's pool_id field, while the new code does. It need to be
confirmed that this is not a problem (otherwise the copy-out operation
will require further tmh_...() abstractions to be added).
Further note that the patch removes (rather than adjusts) an invalid
call to unmap_domain_page() (no matching map_domain_page()) from
tmh_compress_from_client() and adds a missing one to an error return
path in tmh_copy_from_client().
Finally note that the patch adds a previously missing return statement
to cli_get_page() (without which that function could de-reference a
NULL pointer, triggerable from guest mode).
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -388,11 +388,13 @@ static NOINLINE int pcd_copy_to_client(t
pcd = pgp->pcd;
if ( pgp->size < PAGE_SIZE && pgp->size != 0 &&
pcd->size < PAGE_SIZE && pcd->size != 0 )
- ret = tmh_decompress_to_client(cmfn, pcd->cdata, pcd->size, NULL);
+ ret = tmh_decompress_to_client(cmfn, pcd->cdata, pcd->size,
+ tmh_cli_buf_null);
else if ( tmh_tze_enabled() && pcd->size < PAGE_SIZE )
ret = tmh_copy_tze_to_client(cmfn, pcd->tze, pcd->size);
else
- ret = tmh_copy_to_client(cmfn, pcd->pfp, 0, 0, PAGE_SIZE, NULL);
+ ret = tmh_copy_to_client(cmfn, pcd->pfp, 0, 0, PAGE_SIZE,
+ tmh_cli_buf_null);
tmem_read_unlock(&pcd_tree_rwlocks[firstbyte]);
return ret;
}
@@ -1444,7 +1446,7 @@ static inline void tmem_ensure_avail_pag
/************ TMEM CORE OPERATIONS ************************************/
static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn,
- void *cva)
+ tmem_cli_va_t clibuf)
{
void *dst, *p;
size_t size;
@@ -1463,7 +1465,7 @@ static NOINLINE int do_tmem_put_compress
if ( pgp->pfp != NULL )
pgp_free_data(pgp, pgp->us.obj->pool);
START_CYC_COUNTER(compress);
- ret = tmh_compress_from_client(cmfn, &dst, &size, cva);
+ ret = tmh_compress_from_client(cmfn, &dst, &size, clibuf);
if ( (ret == -EFAULT) || (ret == 0) )
goto out;
else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) {
@@ -1490,7 +1492,8 @@ out:
}
static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn,
- pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cva)
+ pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len,
+ tmem_cli_va_t clibuf)
{
pool_t *pool;
obj_t *obj;
@@ -1512,7 +1515,7 @@ static NOINLINE int do_tmem_dup_put(pgp_
/* can we successfully manipulate pgp to change out the data? */
if ( len != 0 && client->compress && pgp->size != 0 )
{
- ret = do_tmem_put_compress(pgp,cmfn,cva);
+ ret = do_tmem_put_compress(pgp, cmfn, clibuf);
if ( ret == 1 )
goto done;
else if ( ret == 0 )
@@ -1530,7 +1533,8 @@ copy_uncompressed:
goto failed_dup;
pgp->size = 0;
/* tmh_copy_from_client properly handles len==0 and offsets != 0 */
- ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,0);
+ ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
+ tmh_cli_buf_null);
if ( ret == -EFAULT )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
@@ -1582,7 +1586,7 @@ cleanup:
static NOINLINE int do_tmem_put(pool_t *pool,
OID *oidp, uint32_t index,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cva)
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t clibuf)
{
obj_t *obj = NULL, *objfound = NULL, *objnew = NULL;
pgp_t *pgp = NULL, *pgpdel = NULL;
@@ -1596,7 +1600,8 @@ static NOINLINE int do_tmem_put(pool_t *
{
ASSERT_SPINLOCK(&objfound->obj_spinlock);
if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL)
- return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len,cva);
+ return do_tmem_dup_put(pgp, cmfn, tmem_offset, pfn_offset, len,
+ clibuf);
}
/* no puts allowed into a frozen pool (except dup puts) */
@@ -1631,7 +1636,7 @@ static NOINLINE int do_tmem_put(pool_t *
if ( len != 0 && client->compress )
{
ASSERT(pgp->pfp == NULL);
- ret = do_tmem_put_compress(pgp,cmfn,cva);
+ ret = do_tmem_put_compress(pgp, cmfn, clibuf);
if ( ret == 1 )
goto insert_page;
if ( ret == -ENOMEM )
@@ -1655,7 +1660,8 @@ copy_uncompressed:
goto delete_and_free;
}
/* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */
- ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,cva);
+ ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
+ clibuf);
if ( ret == -EFAULT )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
@@ -1725,12 +1731,13 @@ free:
static NOINLINE int do_tmem_get(pool_t *pool, OID *oidp, uint32_t index,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cva)
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t clibuf)
{
obj_t *obj;
pgp_t *pgp;
client_t *client = pool->client;
DECL_LOCAL_CYC_COUNTER(decompress);
+ int rc = -EFAULT;
if ( !_atomic_read(pool->pgp_count) )
return -EEMPTY;
@@ -1755,16 +1762,18 @@ static NOINLINE int do_tmem_get(pool_t *
if ( tmh_dedup_enabled() && !is_persistent(pool) &&
pgp->firstbyte != NOT_SHAREABLE )
{
- if ( pcd_copy_to_client(cmfn, pgp) == -EFAULT )
+ rc = pcd_copy_to_client(cmfn, pgp);
+ if ( rc <= 0 )
goto bad_copy;
} else if ( pgp->size != 0 ) {
START_CYC_COUNTER(decompress);
- if ( tmh_decompress_to_client(cmfn, pgp->cdata,
- pgp->size, cva) == -EFAULT )
+ rc = tmh_decompress_to_client(cmfn, pgp->cdata,
+ pgp->size, clibuf);
+ if ( rc <= 0 )
goto bad_copy;
END_CYC_COUNTER(decompress);
} else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
- pfn_offset, len, cva) == -EFAULT)
+ pfn_offset, len, clibuf) == -EFAULT)
goto bad_copy;
if ( is_ephemeral(pool) )
{
@@ -1804,8 +1813,7 @@ static NOINLINE int do_tmem_get(pool_t *
bad_copy:
/* this should only happen if the client passed a bad mfn */
failed_copies++;
- return -EFAULT;
-
+ return rc;
}
static NOINLINE int do_tmem_flush_page(pool_t *pool, OID *oidp, uint32_t index)
@@ -2345,7 +2353,6 @@ static NOINLINE int tmemc_save_subop(int
pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
? NULL : client->pools[pool_id];
uint32_t p;
- uint64_t *uuid;
pgp_t *pgp, *pgp2;
int rc = -1;
@@ -2409,9 +2416,7 @@ static NOINLINE int tmemc_save_subop(int
case TMEMC_SAVE_GET_POOL_UUID:
if ( pool == NULL )
break;
- uuid = (uint64_t *)buf.p;
- *uuid++ = pool->uuid[0];
- *uuid = pool->uuid[1];
+ tmh_copy_to_client_buf(buf, pool->uuid, 2);
rc = 0;
case TMEMC_SAVE_END:
if ( client == NULL )
@@ -2436,7 +2441,7 @@ static NOINLINE int tmemc_save_get_next_
pgp_t *pgp;
OID oid;
int ret = 0;
- struct tmem_handle *h;
+ struct tmem_handle h;
unsigned int pagesize = 1 << (pool->pageshift+12);
if ( pool == NULL || is_ephemeral(pool) )
@@ -2467,11 +2472,13 @@ static NOINLINE int tmemc_save_get_next_
pgp_t,us.pool_pers_pages);
pool->cur_pgp = pgp;
oid = pgp->us.obj->oid;
- h = (struct tmem_handle *)buf.p;
- *(OID *)&h->oid[0] = oid;
- h->index = pgp->index;
- buf.p = (void *)(h+1);
- ret = do_tmem_get(pool, &oid, h->index,0,0,0,pagesize,buf.p);
+ h.pool_id = pool_id;
+ BUILD_BUG_ON(sizeof(h.oid) != sizeof(oid));
+ memcpy(h.oid, oid.oid, sizeof(h.oid));
+ h.index = pgp->index;
+ tmh_copy_to_client_buf(buf, &h, 1);
+ tmh_client_buf_add(buf, sizeof(h));
+ ret = do_tmem_get(pool, &oid, pgp->index, 0, 0, 0, pagesize, buf);
out:
tmem_spin_unlock(&pers_lists_spinlock);
@@ -2483,7 +2490,7 @@ static NOINLINE int tmemc_save_get_next_
{
client_t *client = tmh_client_from_cli_id(cli_id);
pgp_t *pgp;
- struct tmem_handle *h;
+ struct tmem_handle h;
int ret = 0;
if ( client == NULL )
@@ -2509,10 +2516,11 @@ static NOINLINE int tmemc_save_get_next_
pgp_t,client_inv_pages);
client->cur_pgp = pgp;
}
- h = (struct tmem_handle *)buf.p;
- h->pool_id = pgp->pool_id;
- *(OID *)&h->oid = pgp->inv_oid;
- h->index = pgp->index;
+ h.pool_id = pgp->pool_id;
+ BUILD_BUG_ON(sizeof(h.oid) != sizeof(pgp->inv_oid));
+ memcpy(h.oid, pgp->inv_oid.oid, sizeof(h.oid));
+ h.index = pgp->index;
+ tmh_copy_to_client_buf(buf, &h, 1);
ret = 1;
out:
tmem_spin_unlock(&pers_lists_spinlock);
@@ -2528,7 +2536,7 @@ static int tmemc_restore_put_page(int cl
if ( pool == NULL )
return -1;
- return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p);
+ return do_tmem_put(pool, oidp, index, 0, 0, 0, bufsize, buf);
}
static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id, OID *oidp,
@@ -2732,19 +2740,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
break;
case TMEM_NEW_PAGE:
tmem_ensure_avail_pages();
- rc = do_tmem_put(pool, oidp,
- op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, NULL);
+ rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn, 0, 0, 0,
+ tmh_cli_buf_null);
break;
case TMEM_PUT_PAGE:
tmem_ensure_avail_pages();
- rc = do_tmem_put(pool, oidp,
- op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, NULL);
+ rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn, 0, 0,
+ PAGE_SIZE, tmh_cli_buf_null);
if (rc == 1) succ_put = 1;
else non_succ_put = 1;
break;
case TMEM_GET_PAGE:
rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
- 0, 0, PAGE_SIZE, 0);
+ 0, 0, PAGE_SIZE, tmh_cli_buf_null);
if (rc == 1) succ_get = 1;
else non_succ_get = 1;
break;
@@ -2763,13 +2771,13 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
case TMEM_READ:
rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
op.u.gen.tmem_offset, op.u.gen.pfn_offset,
- op.u.gen.len,0);
+ op.u.gen.len, tmh_cli_buf_null);
break;
case TMEM_WRITE:
rc = do_tmem_put(pool, oidp,
op.u.gen.index, op.u.gen.cmfn,
op.u.gen.tmem_offset, op.u.gen.pfn_offset,
- op.u.gen.len, NULL);
+ op.u.gen.len, tmh_cli_buf_null);
break;
case TMEM_XCHG:
/* need to hold global lock to ensure xchg is atomic */
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -51,6 +51,7 @@ DECL_CYC_COUNTER(pg_copy);
#define LZO_DSTMEM_PAGES 2
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, workmem);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, dstmem);
+static DEFINE_PER_CPU_READ_MOSTLY(void *, scratch_page);
#ifdef COMPARE_COPY_PAGE_SSE2
#include <asm/flushtlb.h> /* REMOVE ME AFTER TEST */
@@ -115,6 +116,7 @@ static inline void *cli_get_page(tmem_cl
{
if ( page )
put_page(page);
+ return NULL;
}
if ( cli_write && !get_page_type(page, PGT_writable_page) )
@@ -144,12 +146,12 @@ static inline void cli_put_page(tmem_cli
EXPORT int tmh_copy_from_client(pfp_t *pfp,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cli_va)
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t clibuf)
{
unsigned long tmem_mfn, cli_mfn = 0;
- void *tmem_va;
+ char *tmem_va, *cli_va = NULL;
pfp_t *cli_pfp = NULL;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
+ int rc = 1;
ASSERT(pfp != NULL);
tmem_mfn = page_to_mfn(pfp);
@@ -160,62 +162,76 @@ EXPORT int tmh_copy_from_client(pfp_t *p
unmap_domain_page(tmem_va);
return 1;
}
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
if ( cli_va == NULL )
+ {
+ unmap_domain_page(tmem_va);
return -EFAULT;
+ }
}
mb();
- if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
+ if ( len == PAGE_SIZE && !tmem_offset && !pfn_offset && cli_va )
tmh_copy_page(tmem_va, cli_va);
else if ( (tmem_offset+len <= PAGE_SIZE) &&
(pfn_offset+len <= PAGE_SIZE) )
- memcpy((char *)tmem_va+tmem_offset,(char *)cli_va+pfn_offset,len);
- if ( !tmemc )
+ {
+ if ( cli_va )
+ memcpy(tmem_va + tmem_offset, cli_va + pfn_offset, len);
+ else if ( copy_from_guest_offset(tmem_va + tmem_offset, clibuf,
+ pfn_offset, len) )
+ rc = -EFAULT;
+ }
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
- return 1;
+ return rc;
}
EXPORT int tmh_compress_from_client(tmem_cli_mfn_t cmfn,
- void **out_va, size_t *out_len, void *cli_va)
+ void **out_va, size_t *out_len, tmem_cli_va_t clibuf)
{
int ret = 0;
unsigned char *dmem = this_cpu(dstmem);
unsigned char *wmem = this_cpu(workmem);
+ char *scratch = this_cpu(scratch_page);
pfp_t *cli_pfp = NULL;
unsigned long cli_mfn = 0;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
+ void *cli_va = NULL;
if ( dmem == NULL || wmem == NULL )
return 0; /* no buffer, so can't compress */
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
if ( cli_va == NULL )
return -EFAULT;
}
+ else if ( !scratch )
+ return 0;
+ else if ( copy_from_guest(scratch, clibuf, PAGE_SIZE) )
+ return -EFAULT;
mb();
- ret = lzo1x_1_compress(cli_va, PAGE_SIZE, dmem, out_len, wmem);
+ ret = lzo1x_1_compress(cli_va ?: scratch, PAGE_SIZE, dmem, out_len, wmem);
ASSERT(ret == LZO_E_OK);
*out_va = dmem;
- if ( !tmemc )
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
- unmap_domain_page(cli_va);
return 1;
}
EXPORT int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
- pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cli_va)
+ pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len,
+ tmem_cli_va_t clibuf)
{
unsigned long tmem_mfn, cli_mfn = 0;
- void *tmem_va;
+ char *tmem_va, *cli_va = NULL;
pfp_t *cli_pfp = NULL;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
+ int rc = 1;
ASSERT(pfp != NULL);
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
if ( cli_va == NULL )
@@ -223,37 +239,48 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
}
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
- if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
+ if ( len == PAGE_SIZE && !tmem_offset && !pfn_offset && cli_va )
tmh_copy_page(cli_va, tmem_va);
else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) )
- memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len);
+ {
+ if ( cli_va )
+ memcpy(cli_va + pfn_offset, tmem_va + tmem_offset, len);
+ else if ( copy_to_guest_offset(clibuf, pfn_offset,
+ tmem_va + tmem_offset, len) )
+ rc = -EFAULT;
+ }
unmap_domain_page(tmem_va);
- if ( !tmemc )
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
mb();
- return 1;
+ return rc;
}
EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va,
- size_t size, void *cli_va)
+ size_t size, tmem_cli_va_t clibuf)
{
unsigned long cli_mfn = 0;
pfp_t *cli_pfp = NULL;
+ void *cli_va = NULL;
+ char *scratch = this_cpu(scratch_page);
size_t out_len = PAGE_SIZE;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
int ret;
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
if ( cli_va == NULL )
return -EFAULT;
}
- ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len);
+ else if ( !scratch )
+ return 0;
+ ret = lzo1x_decompress_safe(tmem_va, size, cli_va ?: scratch, &out_len);
ASSERT(ret == LZO_E_OK);
ASSERT(out_len == PAGE_SIZE);
- if ( !tmemc )
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ else if ( copy_to_guest(clibuf, scratch, PAGE_SIZE) )
+ return -EFAULT;
mb();
return 1;
}
@@ -423,6 +450,11 @@ static int cpu_callback(
struct page_info *p = alloc_domheap_pages(0, workmem_order, 0);
per_cpu(workmem, cpu) = p ? page_to_virt(p) : NULL;
}
+ if ( per_cpu(scratch_page, cpu) == NULL )
+ {
+ struct page_info *p = alloc_domheap_page(NULL, 0);
+ per_cpu(scratch_page, cpu) = p ? page_to_virt(p) : NULL;
+ }
break;
}
case CPU_DEAD:
@@ -439,6 +471,11 @@ static int cpu_callback(
free_domheap_pages(p, workmem_order);
per_cpu(workmem, cpu) = NULL;
}
+ if ( per_cpu(scratch_page, cpu) != NULL )
+ {
+ free_domheap_page(virt_to_page(per_cpu(scratch_page, cpu)));
+ per_cpu(scratch_page, cpu) = NULL;
+ }
break;
}
default:
--- a/xen/include/xen/tmem_xen.h
+++ b/xen/include/xen/tmem_xen.h
@@ -482,27 +482,33 @@ static inline int tmh_get_tmemop_from_cl
return copy_from_guest(op, uops, 1);
}
+#define tmh_cli_buf_null guest_handle_from_ptr(NULL, char)
+
static inline void tmh_copy_to_client_buf_offset(tmem_cli_va_t clibuf, int off,
char *tmembuf, int len)
{
copy_to_guest_offset(clibuf,off,tmembuf,len);
}
+#define tmh_copy_to_client_buf(clibuf, tmembuf, cnt) \
+ copy_to_guest(guest_handle_cast(clibuf, void), tmembuf, cnt)
+
+#define tmh_client_buf_add guest_handle_add_offset
+
#define TMH_CLI_ID_NULL ((cli_id_t)((domid_t)-1L))
#define tmh_cli_id_str "domid"
#define tmh_client_str "domain"
-extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t,void*);
+int tmh_decompress_to_client(tmem_cli_mfn_t, void *, size_t, tmem_cli_va_t);
-extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *,void*);
+int tmh_compress_from_client(tmem_cli_mfn_t, void **, size_t *, tmem_cli_va_t);
-extern int tmh_copy_from_client(pfp_t *pfp,
- tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cva);
+int tmh_copy_from_client(pfp_t *, tmem_cli_mfn_t, pagesize_t tmem_offset,
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t);
-extern int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
- pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cva);
+int tmh_copy_to_client(tmem_cli_mfn_t, pfp_t *, pagesize_t tmem_offset,
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t);
extern int tmh_copy_tze_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, pagesize_t len);

View File

@ -1,137 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365879 -7200
# Node ID 33b8c42a87ec2fa6e6533dd9ee7603f732b168f5
# Parent ccd60ed6c555e1816cac448fcb20a84533977b43
tmem: detect arithmetic overflow in tmh_copy_{from,to}_client()
This implies adjusting callers to deal with errors other than -EFAULT
and removing some comments which would otherwise become stale.
Reported-by: Tim Deegan <tim@xen.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1535,7 +1535,7 @@ copy_uncompressed:
/* tmh_copy_from_client properly handles len==0 and offsets != 0 */
ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
tmh_cli_buf_null);
- if ( ret == -EFAULT )
+ if ( ret < 0 )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
{
@@ -1556,9 +1556,7 @@ done:
return 1;
bad_copy:
- /* this should only happen if the client passed a bad mfn */
failed_copies++;
- ret = -EFAULT;
goto cleanup;
failed_dup:
@@ -1662,7 +1660,7 @@ copy_uncompressed:
/* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */
ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
clibuf);
- if ( ret == -EFAULT )
+ if ( ret < 0 )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
{
@@ -1702,8 +1700,6 @@ insert_page:
return 1;
bad_copy:
- /* this should only happen if the client passed a bad mfn */
- ret = -EFAULT;
failed_copies++;
delete_and_free:
@@ -1737,7 +1733,7 @@ static NOINLINE int do_tmem_get(pool_t *
pgp_t *pgp;
client_t *client = pool->client;
DECL_LOCAL_CYC_COUNTER(decompress);
- int rc = -EFAULT;
+ int rc;
if ( !_atomic_read(pool->pgp_count) )
return -EEMPTY;
@@ -1761,20 +1757,20 @@ static NOINLINE int do_tmem_get(pool_t *
ASSERT(pgp->size != -1);
if ( tmh_dedup_enabled() && !is_persistent(pool) &&
pgp->firstbyte != NOT_SHAREABLE )
- {
rc = pcd_copy_to_client(cmfn, pgp);
- if ( rc <= 0 )
- goto bad_copy;
- } else if ( pgp->size != 0 ) {
+ else if ( pgp->size != 0 )
+ {
START_CYC_COUNTER(decompress);
rc = tmh_decompress_to_client(cmfn, pgp->cdata,
pgp->size, clibuf);
- if ( rc <= 0 )
- goto bad_copy;
END_CYC_COUNTER(decompress);
- } else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
- pfn_offset, len, clibuf) == -EFAULT)
+ }
+ else
+ rc = tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
+ pfn_offset, len, clibuf);
+ if ( rc <= 0 )
goto bad_copy;
+
if ( is_ephemeral(pool) )
{
if ( is_private(pool) )
@@ -1811,7 +1807,6 @@ static NOINLINE int do_tmem_get(pool_t *
return 1;
bad_copy:
- /* this should only happen if the client passed a bad mfn */
failed_copies++;
return rc;
}
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -153,6 +153,8 @@ EXPORT int tmh_copy_from_client(pfp_t *p
pfp_t *cli_pfp = NULL;
int rc = 1;
+ if ( tmem_offset > PAGE_SIZE || pfn_offset > PAGE_SIZE || len > PAGE_SIZE )
+ return -EINVAL;
ASSERT(pfp != NULL);
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
@@ -183,6 +185,8 @@ EXPORT int tmh_copy_from_client(pfp_t *p
pfn_offset, len) )
rc = -EFAULT;
}
+ else if ( len )
+ rc = -EINVAL;
if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
@@ -230,6 +234,8 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
pfp_t *cli_pfp = NULL;
int rc = 1;
+ if ( tmem_offset > PAGE_SIZE || pfn_offset > PAGE_SIZE || len > PAGE_SIZE )
+ return -EINVAL;
ASSERT(pfp != NULL);
if ( guest_handle_is_null(clibuf) )
{
@@ -249,6 +255,8 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
tmem_va + tmem_offset, len) )
rc = -EFAULT;
}
+ else if ( len )
+ rc = -EINVAL;
unmap_domain_page(tmem_va);
if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);

View File

@ -1,32 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365888 -7200
# Node ID 83b97a59888b6d2d0f984b8403bd5764dd55c10c
# Parent 33b8c42a87ec2fa6e6533dd9ee7603f732b168f5
tmem: properly drop lock on error path in do_tmem_get()
Also remove a bogus assertion.
Reported-by: Tim Deegan <tim@xen.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1790,7 +1790,6 @@ static NOINLINE int do_tmem_get(pool_t *
list_del(&pgp->us.client_eph_pages);
list_add_tail(&pgp->us.client_eph_pages,&client->ephemeral_page_list);
tmem_spin_unlock(&eph_lists_spinlock);
- ASSERT(obj != NULL);
obj->last_client = tmh_get_cli_id_from_current();
}
}
@@ -1807,6 +1806,8 @@ static NOINLINE int do_tmem_get(pool_t *
return 1;
bad_copy:
+ obj->no_evict = 0;
+ tmem_spin_unlock(&obj->obj_spinlock);
failed_copies++;
return rc;
}

View File

@ -1,34 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365906 -7200
# Node ID 109ea6a0c23aa0c5df79e3f5658162aed959ffcf
# Parent 83b97a59888b6d2d0f984b8403bd5764dd55c10c
tmem: properly drop lock on error path in do_tmem_op()
Reported-by: Tim Deegan <tim@xen.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2659,13 +2659,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
if ( client != NULL && tmh_client_is_dying(client) )
{
rc = -ENODEV;
- goto out;
+ if ( tmh_lock_all )
+ goto out;
+ simple_error:
+ errored_tmem_ops++;
+ return rc;
}
if ( unlikely(tmh_get_tmemop_from_client(&op, uops) != 0) )
{
printk("tmem: can't get tmem struct from %s\n",client_str);
rc = -EFAULT;
+ if ( !tmh_lock_all )
+ goto simple_error;
goto out;
}

View File

@ -1,305 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365916 -7200
# Node ID 0520982a602a3ac06dd5bc573ddaff5edc9c6987
# Parent 109ea6a0c23aa0c5df79e3f5658162aed959ffcf
tmem: reduce severity of log messages
Otherwise they can be used by a guest to spam the hypervisor log with
all settings at their defaults.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1107,7 +1107,7 @@ static int shared_pool_join(pool_t *pool
sl->client = new_client;
list_add_tail(&sl->share_list, &pool->share_list);
if ( new_client->cli_id != pool->client->cli_id )
- printk("adding new %s %d to shared pool owned by %s %d\n",
+ tmh_client_info("adding new %s %d to shared pool owned by %s %d\n",
client_str, new_client->cli_id, client_str, pool->client->cli_id);
return ++pool->shared_count;
}
@@ -1137,7 +1137,7 @@ static NOINLINE void shared_pool_reassig
old_client->eph_count -= _atomic_read(pool->pgp_count);
list_splice_init(&old_client->ephemeral_page_list,
&new_client->ephemeral_page_list);
- printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
+ tmh_client_info("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id, poolid);
pool->pool_id = poolid;
}
@@ -1173,7 +1173,7 @@ static NOINLINE int shared_pool_quit(poo
}
return 0;
}
- printk("tmem: no match unsharing pool, %s=%d\n",
+ tmh_client_warn("tmem: no match unsharing pool, %s=%d\n",
cli_id_str,pool->client->cli_id);
return -1;
}
@@ -1184,17 +1184,18 @@ static void pool_flush(pool_t *pool, cli
ASSERT(pool != NULL);
if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) )
{
- printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
+ tmh_client_warn("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id);
return;
}
- printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing",
- is_persistent(pool) ? "persistent" : "ephemeral" ,
- is_shared(pool) ? "shared" : "private");
- printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id);
+ tmh_client_info("%s %s-%s tmem pool %s=%d pool_id=%d\n",
+ destroy ? "destroying" : "flushing",
+ is_persistent(pool) ? "persistent" : "ephemeral" ,
+ is_shared(pool) ? "shared" : "private",
+ cli_id_str, pool->client->cli_id, pool->pool_id);
if ( pool->client->live_migrating )
{
- printk("can't %s pool while %s is live-migrating\n",
+ tmh_client_warn("can't %s pool while %s is live-migrating\n",
destroy?"destroy":"flush", client_str);
return;
}
@@ -1213,21 +1214,22 @@ static client_t *client_create(cli_id_t
client_t *client = tmh_alloc_infra(sizeof(client_t),__alignof__(client_t));
int i;
- printk("tmem: initializing tmem capability for %s=%d...",cli_id_str,cli_id);
+ tmh_client_info("tmem: initializing tmem capability for %s=%d...",
+ cli_id_str, cli_id);
if ( client == NULL )
{
- printk("failed... out of memory\n");
+ tmh_client_err("failed... out of memory\n");
goto fail;
}
memset(client,0,sizeof(client_t));
if ( (client->tmh = tmh_client_init(cli_id)) == NULL )
{
- printk("failed... can't allocate host-dependent part of client\n");
+ tmh_client_err("failed... can't allocate host-dependent part of client\n");
goto fail;
}
if ( !tmh_set_client_from_id(client, client->tmh, cli_id) )
{
- printk("failed... can't set client\n");
+ tmh_client_err("failed... can't set client\n");
goto fail;
}
client->cli_id = cli_id;
@@ -1249,7 +1251,7 @@ static client_t *client_create(cli_id_t
client->eph_count = client->eph_count_max = 0;
client->total_cycles = 0; client->succ_pers_puts = 0;
client->succ_eph_gets = 0; client->succ_pers_gets = 0;
- printk("ok\n");
+ tmh_client_info("ok\n");
return client;
fail:
@@ -1903,32 +1905,33 @@ static NOINLINE int do_tmem_new_pool(cli
cli_id = tmh_get_cli_id_from_current();
else
cli_id = this_cli_id;
- printk("tmem: allocating %s-%s tmem pool for %s=%d...",
+ tmh_client_info("tmem: allocating %s-%s tmem pool for %s=%d...",
persistent ? "persistent" : "ephemeral" ,
shared ? "shared" : "private", cli_id_str, cli_id);
if ( specversion != TMEM_SPEC_VERSION )
{
- printk("failed... unsupported spec version\n");
+ tmh_client_err("failed... unsupported spec version\n");
return -EPERM;
}
if ( pagebits != (PAGE_SHIFT - 12) )
{
- printk("failed... unsupported pagesize %d\n",1<<(pagebits+12));
+ tmh_client_err("failed... unsupported pagesize %d\n",
+ 1 << (pagebits + 12));
return -EPERM;
}
if ( flags & TMEM_POOL_PRECOMPRESSED )
{
- printk("failed... precompression flag set but unsupported\n");
+ tmh_client_err("failed... precompression flag set but unsupported\n");
return -EPERM;
}
if ( flags & TMEM_POOL_RESERVED_BITS )
{
- printk("failed... reserved bits must be zero\n");
+ tmh_client_err("failed... reserved bits must be zero\n");
return -EPERM;
}
if ( (pool = pool_alloc()) == NULL )
{
- printk("failed... out of memory\n");
+ tmh_client_err("failed... out of memory\n");
return -ENOMEM;
}
if ( this_cli_id != CLI_ID_NULL )
@@ -1947,7 +1950,7 @@ static NOINLINE int do_tmem_new_pool(cli
break;
if ( d_poolid >= MAX_POOLS_PER_DOMAIN )
{
- printk("failed... no more pool slots available for this %s\n",
+ tmh_client_err("failed... no more pool slots available for this %s\n",
client_str);
goto fail;
}
@@ -1977,9 +1980,8 @@ static NOINLINE int do_tmem_new_pool(cli
{
if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi )
{
- printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ",
- uuid_hi, uuid_lo);
- printk("pool_id=%d\n",d_poolid);
+ tmh_client_info("(matches shared pool uuid=%"PRIx64".%"PRIx64") pool_id=%d\n",
+ uuid_hi, uuid_lo, d_poolid);
client->pools[d_poolid] = global_shared_pools[s_poolid];
shared_pool_join(global_shared_pools[s_poolid], client);
pool_free(pool);
@@ -1991,7 +1993,7 @@ static NOINLINE int do_tmem_new_pool(cli
}
if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS )
{
- printk("tmem: failed... no global shared pool slots available\n");
+ tmh_client_warn("tmem: failed... no global shared pool slots available\n");
goto fail;
}
else
@@ -2007,7 +2009,7 @@ static NOINLINE int do_tmem_new_pool(cli
pool->pool_id = d_poolid;
pool->persistent = persistent;
pool->uuid[0] = uuid_lo; pool->uuid[1] = uuid_hi;
- printk("pool_id=%d\n",d_poolid);
+ tmh_client_info("pool_id=%d\n", d_poolid);
return d_poolid;
fail:
@@ -2030,14 +2032,15 @@ static int tmemc_freeze_pools(cli_id_t c
{
list_for_each_entry(client,&global_client_list,client_list)
client_freeze(client,freeze);
- printk("tmem: all pools %s for all %ss\n",s,client_str);
+ tmh_client_info("tmem: all pools %s for all %ss\n", s, client_str);
}
else
{
if ( (client = tmh_client_from_cli_id(cli_id)) == NULL)
return -1;
client_freeze(client,freeze);
- printk("tmem: all pools %s for %s=%d\n",s,cli_id_str,cli_id);
+ tmh_client_info("tmem: all pools %s for %s=%d\n",
+ s, cli_id_str, cli_id);
}
return 0;
}
@@ -2048,7 +2051,7 @@ static int tmemc_flush_mem(cli_id_t cli_
if ( cli_id != CLI_ID_NULL )
{
- printk("tmem: %s-specific flush not supported yet, use --all\n",
+ tmh_client_warn("tmem: %s-specific flush not supported yet, use --all\n",
client_str);
return -1;
}
@@ -2261,13 +2264,15 @@ static int tmemc_set_var_one(client_t *c
case TMEMC_SET_WEIGHT:
old_weight = client->weight;
client->weight = arg1;
- printk("tmem: weight set to %d for %s=%d\n",arg1,cli_id_str,cli_id);
+ tmh_client_info("tmem: weight set to %d for %s=%d\n",
+ arg1, cli_id_str, cli_id);
atomic_sub(old_weight,&client_weight_total);
atomic_add(client->weight,&client_weight_total);
break;
case TMEMC_SET_CAP:
client->cap = arg1;
- printk("tmem: cap set to %d for %s=%d\n",arg1,cli_id_str,cli_id);
+ tmh_client_info("tmem: cap set to %d for %s=%d\n",
+ arg1, cli_id_str, cli_id);
break;
case TMEMC_SET_COMPRESS:
#ifdef __i386__
@@ -2275,17 +2280,17 @@ static int tmemc_set_var_one(client_t *c
#endif
if ( tmh_dedup_enabled() )
{
- printk("tmem: compression %s for all %ss, cannot be changed "
- "when tmem_dedup is enabled\n",
- tmh_compression_enabled() ? "enabled" : "disabled",client_str);
+ tmh_client_warn("tmem: compression %s for all %ss, cannot be changed when tmem_dedup is enabled\n",
+ tmh_compression_enabled() ? "enabled" : "disabled",
+ client_str);
return -1;
}
client->compress = arg1 ? 1 : 0;
- printk("tmem: compression %s for %s=%d\n",
+ tmh_client_info("tmem: compression %s for %s=%d\n",
arg1 ? "enabled" : "disabled",cli_id_str,cli_id);
break;
default:
- printk("tmem: unknown subop %d for tmemc_set_var\n",subop);
+ tmh_client_warn("tmem: unknown subop %d for tmemc_set_var\n", subop);
return -1;
}
return 0;
@@ -2668,7 +2673,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
if ( unlikely(tmh_get_tmemop_from_client(&op, uops) != 0) )
{
- printk("tmem: can't get tmem struct from %s\n",client_str);
+ tmh_client_err("tmem: can't get tmem struct from %s\n", client_str);
rc = -EFAULT;
if ( !tmh_lock_all )
goto simple_error;
@@ -2702,7 +2707,8 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
tmem_write_lock_set = 1;
if ( (client = client_create(tmh_get_cli_id_from_current())) == NULL )
{
- printk("tmem: can't create tmem structure for %s\n",client_str);
+ tmh_client_err("tmem: can't create tmem structure for %s\n",
+ client_str);
rc = -ENOMEM;
goto out;
}
@@ -2726,8 +2732,8 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
if ( ((uint32_t)op.pool_id >= MAX_POOLS_PER_DOMAIN) ||
((pool = client->pools[op.pool_id]) == NULL) )
{
+ tmh_client_err("tmem: operation requested on uncreated pool\n");
rc = -ENODEV;
- printk("tmem: operation requested on uncreated pool\n");
goto out;
}
ASSERT_SENTINEL(pool,POOL);
@@ -2783,11 +2789,11 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
break;
case TMEM_XCHG:
/* need to hold global lock to ensure xchg is atomic */
- printk("tmem_xchg op not implemented yet\n");
+ tmh_client_warn("tmem_xchg op not implemented yet\n");
rc = 0;
break;
default:
- printk("tmem: op %d not implemented\n", op.cmd);
+ tmh_client_warn("tmem: op %d not implemented\n", op.cmd);
rc = 0;
break;
}
--- a/xen/include/xen/tmem_xen.h
+++ b/xen/include/xen/tmem_xen.h
@@ -512,6 +512,9 @@ int tmh_copy_to_client(tmem_cli_mfn_t, p
extern int tmh_copy_tze_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, pagesize_t len);
+#define tmh_client_err(fmt, args...) printk(XENLOG_G_ERR fmt, ##args)
+#define tmh_client_warn(fmt, args...) printk(XENLOG_G_WARNING fmt, ##args)
+#define tmh_client_info(fmt, args...) printk(XENLOG_G_INFO fmt, ##args)
#define TMEM_PERF
#ifdef TMEM_PERF

View File

@ -1,41 +0,0 @@
# HG changeset patch
# User Dan Magenheimer <dan.magenheimer@oracle.com>
# Date 1347365943 -7200
# Node ID 16e0392c6594b1757bbaa82076630a73d843229b
# Parent 0520982a602a3ac06dd5bc573ddaff5edc9c6987
tmem: fixup 2010 cleanup patch that breaks tmem save/restore
20918:a3fa6d444b25 "Fix domain reference leaks" (in Feb 2010, by Jan)
does some cleanup in addition to the leak fixes. Unfortunately, that
cleanup inadvertently resulted in an incorrect fallthrough in a switch
statement which breaks tmem save/restore.
That broken patch was apparently applied to 4.0-testing and 4.1-testing
so those are broken as well.
What is the process now for requesting back-patches to 4.0 and 4.1?
(Side note: This does not by itself entirely fix save/restore in 4.2.)
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2419,6 +2419,7 @@ static NOINLINE int tmemc_save_subop(int
break;
tmh_copy_to_client_buf(buf, pool->uuid, 2);
rc = 0;
+ break;
case TMEMC_SAVE_END:
if ( client == NULL )
break;
@@ -2429,6 +2430,7 @@ static NOINLINE int tmemc_save_subop(int
pgp_free_from_inv_list(client,pgp);
client->frozen = client->was_frozen;
rc = 0;
+ break;
}
return rc;
}

View File

@ -1,106 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365969 -7200
# Node ID e4cb8411161043c726f699252cc761e77853e820
# Parent 16e0392c6594b1757bbaa82076630a73d843229b
tmem: cleanup
- one more case of checking for a specific rather than any error
- drop no longer needed first parameter from cli_put_page()
- drop a redundant cast
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1468,7 +1468,7 @@ static NOINLINE int do_tmem_put_compress
pgp_free_data(pgp, pgp->us.obj->pool);
START_CYC_COUNTER(compress);
ret = tmh_compress_from_client(cmfn, &dst, &size, clibuf);
- if ( (ret == -EFAULT) || (ret == 0) )
+ if ( ret <= 0 )
goto out;
else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) {
ret = 0;
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -97,7 +97,7 @@ static inline void *cli_get_page(tmem_cl
return NULL;
}
-static inline void cli_put_page(tmem_cli_mfn_t cmfn, void *cli_va, pfp_t *cli_pfp,
+static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp,
unsigned long cli_mfn, bool_t mark_dirty)
{
ASSERT(0);
@@ -126,20 +126,20 @@ static inline void *cli_get_page(tmem_cl
}
*pcli_mfn = page_to_mfn(page);
- *pcli_pfp = (pfp_t *)page;
+ *pcli_pfp = page;
return map_domain_page(*pcli_mfn);
}
-static inline void cli_put_page(tmem_cli_mfn_t cmfn, void *cli_va, pfp_t *cli_pfp,
+static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp,
unsigned long cli_mfn, bool_t mark_dirty)
{
if ( mark_dirty )
{
- put_page_and_type((struct page_info *)cli_pfp);
+ put_page_and_type(cli_pfp);
paging_mark_dirty(current->domain,cli_mfn);
}
else
- put_page((struct page_info *)cli_pfp);
+ put_page(cli_pfp);
unmap_domain_page(cli_va);
}
#endif
@@ -188,7 +188,7 @@ EXPORT int tmh_copy_from_client(pfp_t *p
else if ( len )
rc = -EINVAL;
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
return rc;
}
@@ -221,7 +221,7 @@ EXPORT int tmh_compress_from_client(tmem
ASSERT(ret == LZO_E_OK);
*out_va = dmem;
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
return 1;
}
@@ -259,7 +259,7 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
rc = -EINVAL;
unmap_domain_page(tmem_va);
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return rc;
}
@@ -286,7 +286,7 @@ EXPORT int tmh_decompress_to_client(tmem
ASSERT(ret == LZO_E_OK);
ASSERT(out_len == PAGE_SIZE);
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
else if ( copy_to_guest(clibuf, scratch, PAGE_SIZE) )
return -EFAULT;
mb();
@@ -310,7 +310,7 @@ EXPORT int tmh_copy_tze_to_client(tmem_c
memcpy((char *)cli_va,(char *)tmem_va,len);
if ( len < PAGE_SIZE )
memset((char *)cli_va+len,0,PAGE_SIZE-len);
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return 1;
}

View File

@ -1,28 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348039675 -7200
# Node ID 3e3959413b2fbef584993beb434285d0691d5c67
# Parent 4a0438fe1e6afe01e46023bcb2c828c5aaeefb1d
x86: properly check XEN_DOMCTL_ioport_mapping arguments for invalid range
In particular, the case of "np" being a very large value wasn't handled
correctly. The range start checks also were off by one (except that in
practice, when "np" is properly range checked, this would still have
been caught by the range end checks).
Also, is a GFN wrap in XEN_DOMCTL_memory_mapping really okay?
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -888,7 +888,7 @@ long arch_do_domctl(
int found = 0;
ret = -EINVAL;
- if ( (np == 0) || (fgp > MAX_IOPORTS) || (fmp > MAX_IOPORTS) ||
+ if ( ((fgp | fmp | (np - 1)) >= MAX_IOPORTS) ||
((fgp + np) > MAX_IOPORTS) || ((fmp + np) > MAX_IOPORTS) )
{
printk(XENLOG_G_ERR

View File

@ -1,26 +0,0 @@
# HG changeset patch
# User Zhenzhong Duan <zhenzhong.duan@oracle.com>
# Date 1348069127 -7200
# Node ID fee83ac77d8c7339abf5185690603ea5b0c548cf
# Parent 7b045d43e59dcb42340097058502bf456e151180
tmem: bump pool version to 1 to fix restore issue when tmem enabled
Restore fails when tmem is enabled both in hypervisor and guest. This
is due to spec version mismatch when restoring a pool.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2407,7 +2407,8 @@ static NOINLINE int tmemc_save_subop(int
break;
rc = (pool->persistent ? TMEM_POOL_PERSIST : 0) |
(pool->shared ? TMEM_POOL_SHARED : 0) |
- (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT);
+ (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT) |
+ (TMEM_SPEC_VERSION << TMEM_POOL_VERSION_SHIFT);
break;
case TMEMC_SAVE_GET_POOL_NPAGES:
if ( pool == NULL )

View File

@ -1,257 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348125713 -7200
# Node ID 14980591956954584f3cd87bf4e8d306360e7f27
# Parent 4e93cbeac98b3e6be98e5ec0881b44b68ee95974
x86: tighten checks in XEN_DOMCTL_memory_mapping handler
Properly checking the MFN implies knowing the physical address width
supported by the platform, so to obtain this consistently the
respective code gets moved out of the MTRR subdir.
Btw., the model specific workaround in that code is likely unnecessary
- I believe those CPU models don't support 64-bit mode. But I wasn't
able to formally verify this, so I preferred to retain that code for
now.
But domctl code here also was lacking other error checks (as was,
looking at it again from that angle) the XEN_DOMCTL_ioport_mapping one.
Besides adding the missing checks, printing is also added for the case
where revoking access permissions didn't work (as that may have
implications for the host operator, e.g. wanting to not pass through
affected devices to another guest until the one previously using them
did actually die).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/cpu/centaur.c
+++ b/xen/arch/x86/cpu/centaur.c
@@ -56,6 +56,9 @@ static void __init init_c3(struct cpuinf
if (c->x86_model >=6 && c->x86_model <9)
set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+ if (cpuid_eax(0x80000000) < 0x80000008)
+ paddr_bits = 32;
+
get_model_name(c);
display_cacheinfo(c);
}
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -36,6 +36,8 @@ integer_param("cpuid_mask_ext_edx", opt_
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+unsigned int paddr_bits __read_mostly = 36;
+
/*
* Default host IA32_CR_PAT value to cover all memory types.
* BIOS usually sets it to 0x07040600070406.
@@ -265,6 +267,8 @@ static void __cpuinit generic_identify(s
}
if ( xlvl >= 0x80000004 )
get_model_name(c); /* Default name */
+ if ( xlvl >= 0x80000008 )
+ paddr_bits = cpuid_eax(0x80000008) & 0xff;
}
/* Intel-defined flags: level 0x00000007 */
--- a/xen/arch/x86/cpu/cyrix.c
+++ b/xen/arch/x86/cpu/cyrix.c
@@ -255,7 +255,9 @@ static void __init init_cyrix(struct cpu
}
safe_strcpy(c->x86_model_id, Cx86_model[dir0_msn & 7]);
if (p) safe_strcat(c->x86_model_id, p);
- return;
+
+ if (cpu_has_cyrix_arr)
+ paddr_bits = 32;
}
/*
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -144,6 +144,11 @@ void __devinit early_intel_workaround(st
c->cpuid_level);
}
}
+
+ /* CPUID workaround for Intel 0F33/0F34 CPU */
+ if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
+ (boot_cpu_data.x86_mask == 3 || boot_cpu_data.x86_mask == 4))
+ paddr_bits = 36;
}
/*
--- a/xen/arch/x86/cpu/mtrr/main.c
+++ b/xen/arch/x86/cpu/mtrr/main.c
@@ -587,8 +587,6 @@ struct mtrr_value {
unsigned long lsize;
};
-unsigned int paddr_bits __read_mostly = 36;
-
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -602,48 +600,12 @@ void __init mtrr_bp_init(void)
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
- size_and_mask = 0x00f00000;
-
- /* This is an AMD specific MSR, but we assume(hope?) that
- Intel will implement it to when they extend the address
- bus of the Xeon. */
- if (cpuid_eax(0x80000000) >= 0x80000008) {
- paddr_bits = cpuid_eax(0x80000008) & 0xff;
- /* CPUID workaround for Intel 0F33/0F34 CPU */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
- boot_cpu_data.x86 == 0xF &&
- boot_cpu_data.x86_model == 0x3 &&
- (boot_cpu_data.x86_mask == 0x3 ||
- boot_cpu_data.x86_mask == 0x4))
- paddr_bits = 36;
-
- size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
- size_and_mask = ~size_or_mask & 0xfffff00000ULL;
- } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
- boot_cpu_data.x86 == 6) {
- /* VIA C* family have Intel style MTRRs, but
- don't support PAE */
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
} else {
#ifndef CONFIG_X86_64
switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (cpu_has_k6_mtrr) {
- /* Pre-Athlon (K6) AMD CPU MTRRs */
- mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
case X86_VENDOR_CYRIX:
- if (cpu_has_cyrix_arr) {
+ if (cpu_has_cyrix_arr)
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
break;
default:
break;
@@ -652,6 +614,8 @@ void __init mtrr_bp_init(void)
}
if (mtrr_if) {
+ size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
+ size_and_mask = ~size_or_mask & 0xfffff00000ULL;
set_num_var_ranges();
init_table();
if (use_intel())
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -829,10 +829,12 @@ long arch_do_domctl(
unsigned long mfn = domctl->u.memory_mapping.first_mfn;
unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns;
int add = domctl->u.memory_mapping.add_mapping;
- int i;
+ unsigned long i;
ret = -EINVAL;
- if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
+ if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */
+ ((mfn | (mfn + nr_mfns - 1)) >> (paddr_bits - PAGE_SHIFT)) ||
+ (gfn + nr_mfns - 1) < gfn ) /* wrap? */
break;
ret = -EPERM;
@@ -857,8 +859,25 @@ long arch_do_domctl(
d->domain_id, gfn, mfn, nr_mfns);
ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
- for ( i = 0; i < nr_mfns; i++ )
- set_mmio_p2m_entry(d, gfn+i, _mfn(mfn+i));
+ if ( !ret && paging_mode_translate(d) )
+ {
+ for ( i = 0; !ret && i < nr_mfns; i++ )
+ if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) )
+ ret = -EIO;
+ if ( ret )
+ {
+ printk(XENLOG_G_WARNING
+ "memory_map:fail: dom%d gfn=%lx mfn=%lx\n",
+ d->domain_id, gfn + i, mfn + i);
+ while ( i-- )
+ clear_mmio_p2m_entry(d, gfn + i);
+ if ( iomem_deny_access(d, mfn, mfn + nr_mfns - 1) &&
+ IS_PRIV(current->domain) )
+ printk(XENLOG_ERR
+ "memory_map: failed to deny dom%d access to [%lx,%lx]\n",
+ d->domain_id, mfn, mfn + nr_mfns - 1);
+ }
+ }
}
else
{
@@ -866,9 +885,17 @@ long arch_do_domctl(
"memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n",
d->domain_id, gfn, mfn, nr_mfns);
- for ( i = 0; i < nr_mfns; i++ )
- clear_mmio_p2m_entry(d, gfn+i);
+ if ( paging_mode_translate(d) )
+ for ( i = 0; i < nr_mfns; i++ )
+ add |= !clear_mmio_p2m_entry(d, gfn + i);
ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
+ if ( !ret && add )
+ ret = -EIO;
+ if ( ret && IS_PRIV(current->domain) )
+ printk(XENLOG_ERR
+ "memory_map: error %ld %s dom%d access to [%lx,%lx]\n",
+ ret, add ? "removing" : "denying", d->domain_id,
+ mfn, mfn + nr_mfns - 1);
}
rcu_unlock_domain(d);
@@ -930,12 +957,23 @@ long arch_do_domctl(
if ( !found )
{
g2m_ioport = xmalloc(struct g2m_ioport);
+ if ( !g2m_ioport )
+ ret = -ENOMEM;
+ }
+ if ( !found && !ret )
+ {
g2m_ioport->gport = fgp;
g2m_ioport->mport = fmp;
g2m_ioport->np = np;
list_add_tail(&g2m_ioport->list, &hd->g2m_ioport_list);
}
- ret = ioports_permit_access(d, fmp, fmp + np - 1);
+ if ( !ret )
+ ret = ioports_permit_access(d, fmp, fmp + np - 1);
+ if ( ret && !found && g2m_ioport )
+ {
+ list_del(&g2m_ioport->list);
+ xfree(g2m_ioport);
+ }
}
else
{
@@ -950,6 +988,10 @@ long arch_do_domctl(
break;
}
ret = ioports_deny_access(d, fmp, fmp + np - 1);
+ if ( ret && IS_PRIV(current->domain) )
+ printk(XENLOG_ERR
+ "ioport_map: error %ld denying dom%d access to [%x,%x]\n",
+ ret, d->domain_id, fmp, fmp + np - 1);
}
rcu_unlock_domain(d);
}

View File

@ -1,53 +0,0 @@
# HG changeset patch
# User Ben Guthro <ben@guthro.net>
# Date 1348555094 -7200
# Node ID c8d65d91a6f20fa7fae905bbf172e59b335d6371
# Parent b49f7bf52fa92626517386cba89350243b808871
x86/S3: add cache flush on secondary CPUs before going to sleep
Secondary CPUs, between doing their final memory writes (particularly
updating cpu_initialized) and getting a subsequent INIT, may not write
back all modified data. The INIT itself then causes those modifications
to be lost, so in the cpu_initialized case the CPU would find itself
already initialized, (intentionally) entering an infinite loop instead
of actually coming online.
Signed-off-by: Ben Guthro <ben@guthro.net>
Make acpi_dead_idle() call default_dead_idle() rather than duplicating
the logic there.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -647,6 +647,12 @@ static void acpi_dead_idle(void)
}
default_halt:
+ /*
+ * When going into S3, without flushing caches modified data may be
+ * held by the CPUs spinning here indefinitely, and get discarded by
+ * a subsequent INIT.
+ */
+ wbinvd();
for ( ; ; )
halt();
}
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -86,6 +86,12 @@ static void default_idle(void)
static void default_dead_idle(void)
{
+ /*
+ * When going into S3, without flushing caches modified data may be
+ * held by the CPUs spinning here indefinitely, and get discarded by
+ * a subsequent INIT.
+ */
+ wbinvd();
for ( ; ; )
halt();
}

View File

@ -1,44 +0,0 @@
changeset: 25941:795c493fe561
user: Olaf Hering <olaf@aepfle.de>
date: Tue Sep 25 11:03:51 2012 +0100
files: tools/pygrub/src/pygrub
description:
pygrub: always append --args
If a bootloader entry in menu.lst has no additional kernel command line
options listed and the domU.cfg has 'bootargs="--args=something"' the
additional arguments from the config file are not passed to the kernel.
The reason for that incorrect behaviour is that run_grub appends arg
only if the parsed config file has arguments listed.
Fix this by appending args from image section and the config file separatly.
To avoid adding to a NoneType initialize grubcfg['args'] to an empty string.
This does not change behaviour but simplifies the code which appends the
string.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r c8d65d91a6f2 -r 795c493fe561 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Tue Sep 25 08:38:14 2012 +0200
+++ b/tools/pygrub/src/pygrub Tue Sep 25 11:03:51 2012 +0100
@@ -615,13 +615,15 @@ def run_grub(file, entry, fs, arg):
except IndexError:
img = g.cf.images[0]
- grubcfg = { "kernel": None, "ramdisk": None, "args": None }
+ grubcfg = { "kernel": None, "ramdisk": None, "args": "" }
grubcfg["kernel"] = img.kernel[1]
if img.initrd:
grubcfg["ramdisk"] = img.initrd[1]
if img.args:
- grubcfg["args"] = img.args + " " + arg
+ grubcfg["args"] += img.args
+ if arg:
+ grubcfg["args"] += " " + args
return grubcfg

View File

@ -1,36 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348816934 -7200
# Node ID 6a581212909478bba0c7b4dfc6c370270dee825c
# Parent 6bf8b882df8f66ab5500e4d9cc0c3338ae5a6cb9
x86/HPET: don't disable interrupt delivery right after setting it up
We shouldn't clear HPET_TN_FSB right after we (indirectly, via
request_irq()) enabled it for the channels we intend to use for
broadcasts.
This fixes a regression introduced by c/s 25103:0b0e42dc4f0a.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -533,7 +533,7 @@ void __init hpet_broadcast_init(void)
{
/* set HPET Tn as oneshot */
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC | HPET_TN_FSB);
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
@@ -590,7 +590,7 @@ void hpet_broadcast_resume(void)
/* set HPET Tn as oneshot */
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC | HPET_TN_FSB);
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));

View File

@ -1,35 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348817014 -7200
# Node ID 41f523f1b5e5af9cf8e85160f2412456da83050f
# Parent 6a581212909478bba0c7b4dfc6c370270dee825c
x86/IRQ: fix valid-old-vector checks in __assign_irq_vector()
There are two greater-than-zero checks for the old vector retrieved,
which don't work when a negative value got stashed into the respective
arch_irq_desc field. The effect of this was that for interrupts that
are intended to get their affinity adjusted the first time before the
first interrupt occurs, the affinity change would fail, because the
original vector assignment would have caused the move_in_progress flag
to get set (which causes subsequent re-assignments to fail until it
gets cleared, which only happens from the ->ack() actor, i.e. when an
interrupt actually occurred).
This addresses a problem introduced in c/s 23816:7f357e1ef60a (by
changing IRQ_VECTOR_UNASSIGNED from 0 to -1).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -430,8 +430,7 @@ static int __assign_irq_vector(
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
- unsigned int old_vector;
- int cpu, err;
+ int cpu, err, old_vector;
cpumask_t tmp_mask;
vmask_t *irq_used_vectors = NULL;

View File

@ -1,35 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348817291 -7200
# Node ID 4496d56c68a0e57ed9f03b482028093f1e7fdf6c
# Parent 00c05b9d76247d063a8ebc75050246e488323f50
x86/ucode: fix Intel case of resume handling on boot CPU
Checking the stored version doesn't tell us anything about the need to
apply the update (during resume, what is stored doesn't necessarily
match what is loaded).
Note that the check can be removed altogether because once switched to
use what was read from the CPU (uci->cpu_sig.rev, as used in the
subsequent pr_debug()), it would become redundant with the checks that
lead to microcode_update_match() returning the indication that an
update should be applied.
Note further that this was not an issue on APs since they start with
uci->mc.mc_intel being NULL.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Ben Guthro <ben@guthro.net>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/microcode_intel.c
+++ b/xen/arch/x86/microcode_intel.c
@@ -261,8 +261,6 @@ static int get_matching_microcode(const
}
return 0;
find:
- if ( uci->mc.mc_intel && uci->mc.mc_intel->hdr.rev >= mc_header->rev )
- return 0;
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n",
cpu, mc_header->rev, uci->cpu_sig.rev);

View File

@ -1,41 +0,0 @@
# HG changeset patch
# User Tim Deegan <tim@xen.org>
# Date 1349356850 -3600
# Node ID a9c84069c2489e2c432a5068adc7cf8d51ae3366
# Parent 72d89cc43c72848be9bf49da9a87729ed8f48433
x86/nested-svm: Update the paging mode on VMRUN and VMEXIT emulation.
This allows Xen to walk the l1 hypervisor's shadow pagetable
correctly. Not needed for hap-on-hap guests because they are handled
at lookup time. Problem found with 64bit Win7 and 32bit XPMode where Win7
switches forth and back between long mode and PAE legacy pagetables.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
[Adjusted to update in all cases where the l1 vmm uses shadows]
Signed-off-by: Tim Deegan <tim@xen.org>
Committed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -741,6 +741,10 @@ nsvm_vcpu_vmrun(struct vcpu *v, struct c
return 1;
}
+ /* If l1 guest uses shadow paging, update the paging mode. */
+ if (!nestedhvm_paging_mode_hap(v))
+ paging_update_paging_modes(v);
+
nv->nv_vmswitch_in_progress = 0;
return 0;
}
@@ -1408,6 +1412,10 @@ nestedsvm_vcpu_vmexit(struct vcpu *v, st
*/
rc = nhvm_vcpu_vmexit(v, regs, exitcode);
+ /* If l1 guest uses shadow paging, update the paging mode. */
+ if (!nestedhvm_paging_mode_hap(v))
+ paging_update_paging_modes(v);
+
nv->nv_vmswitch_in_progress = 0;
if (rc)

View File

@ -1,21 +0,0 @@
changeset: 26006:8b6870d686d6
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:30 2012 +0100
files: tools/hotplug/Linux/network-nat
description:
hotplug/Linux: Remove tracing (bash -x) from network-nat script
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r cdb48f1742f3 -r 8b6870d686d6 tools/hotplug/Linux/network-nat
--- a/tools/hotplug/Linux/network-nat Mon Oct 08 12:18:29 2012 +0100
+++ b/tools/hotplug/Linux/network-nat Mon Oct 08 12:18:30 2012 +0100
@@ -1,4 +1,4 @@
-#!/bin/bash -x
+#!/bin/bash
#============================================================================
# Default Xen network start/stop script when using NAT.
# Xend calls a network script when it starts.

View File

@ -1,31 +0,0 @@
changeset: 26007:fe756682cc7f
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:31 2012 +0100
files: tools/xenballoon/xenballoond.init
description:
xenballoond.init: remove 4 from default runlevel
Remove 4 from default runlevel in xenballoond.init.
Similar to what changeset 24847:0900b1c905f1 does in xencommons, remove
runlevel 4 from the other runlevel scripts. LSB defines runlevel 4 as
reserved for local use, the local sysadmin is responsible for symlink
creation in rc4.d.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 8b6870d686d6 -r fe756682cc7f tools/xenballoon/xenballoond.init
--- a/tools/xenballoon/xenballoond.init Mon Oct 08 12:18:30 2012 +0100
+++ b/tools/xenballoon/xenballoond.init Mon Oct 08 12:18:31 2012 +0100
@@ -14,7 +14,7 @@
# Should-Start:
# Required-Stop: $syslog $remote_fs
# Should-Stop:
-# Default-Start: 3 4 5
+# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Start/stop xenballoond
# Description: Starts and stops the Xen ballooning daemon.

View File

@ -1,41 +0,0 @@
changeset: 26008:eecb528583d7
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:31 2012 +0100
files: tools/python/xen/util/vscsi_util.py
description:
xend/pvscsi: fix passing of SCSI control LUNs
Currently pvscsi can not pass SCSI devices that have just a scsi_generic node.
In the following example sg3 is a control LUN for the disk sdd.
But vscsi=['4:0:2:0,0:0:0:0'] does not work because the internal 'devname'
variable remains None. Later writing p-devname to xenstore fails because None
is not a valid string variable.
Since devname is used for just informational purpose use sg also as devname.
carron:~ $ lsscsi -g
[0:0:0:0] disk ATA FK0032CAAZP HPF2 /dev/sda /dev/sg0
[4:0:0:0] disk HP P2000G3 FC/iSCSI T100 /dev/sdb /dev/sg1
[4:0:1:0] disk HP P2000G3 FC/iSCSI T100 /dev/sdc /dev/sg2
[4:0:2:0] storage HP HSV400 0950 - /dev/sg3
[4:0:2:1] disk HP HSV400 0950 /dev/sdd /dev/sg4
[4:0:3:0] storage HP HSV400 0950 - /dev/sg5
[4:0:3:1] disk HP HSV400 0950 /dev/sde /dev/sg6
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r fe756682cc7f -r eecb528583d7 tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:31 2012 +0100
+++ b/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:31 2012 +0100
@@ -105,6 +105,8 @@ def _vscsi_get_scsidevices_by_lsscsi(opt
devname = None
try:
sg = s[-1].split('/dev/')[1]
+ if devname is None:
+ devname = sg
scsi_id = _vscsi_get_scsiid(sg)
except IndexError:
sg = None

View File

@ -1,47 +0,0 @@
changeset: 26009:2dbfa4d2e107
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:32 2012 +0100
files: tools/python/xen/util/vscsi_util.py
description:
xend/pvscsi: fix usage of persistant device names for SCSI devices
Currently the callers of vscsi_get_scsidevices() do not pass a mask
string. This will call "lsscsi -g '[]'", which causes a lsscsi syntax
error. As a result the sysfs parser _vscsi_get_scsidevices() is used.
But this parser is broken and the specified names in the config file are
not found.
Using a mask '*' if no mask was given will call lsscsi correctly and the
following config is parsed correctly:
vscsi=[
'/dev/sg3, 0:0:0:0',
'/dev/disk/by-id/wwn-0x600508b4000cf1c30000800000410000, 0:0:0:1'
]
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r eecb528583d7 -r 2dbfa4d2e107 tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:31 2012 +0100
+++ b/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:32 2012 +0100
@@ -150,7 +150,7 @@ def _vscsi_get_scsidevices_by_sysfs():
return devices
-def vscsi_get_scsidevices(mask=""):
+def vscsi_get_scsidevices(mask="*"):
""" get all scsi devices information """
devices = _vscsi_get_scsidevices_by_lsscsi("[%s]" % mask)
@@ -279,7 +279,7 @@ def get_scsi_device(pHCTL):
return _make_scsi_record(scsi_info)
return None
-def get_all_scsi_devices(mask=""):
+def get_all_scsi_devices(mask="*"):
scsi_records = []
for scsi_info in vscsi_get_scsidevices(mask):
scsi_record = _make_scsi_record(scsi_info)

View File

@ -1,84 +0,0 @@
changeset: 26010:cff10030c6ea
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:33 2012 +0100
files: tools/python/xen/util/vscsi_util.py
description:
xend/pvscsi: update sysfs parser for Linux 3.0
The sysfs parser for /sys/bus/scsi/devices understands only the layout
of kernel version 2.6.16. This looks as follows:
/sys/bus/scsi/devices/1:0:0:0/block:sda is a symlink to /sys/block/sda/
/sys/bus/scsi/devices/1:0:0:0/scsi_generic:sg1 is a symlink to /sys/class/scsi_generic/sg1
Both directories contain a 'dev' file with the major:minor information.
This patch updates the used regex strings to match also the colon to
make it more robust against possible future changes.
In kernel version 3.0 the layout changed:
/sys/bus/scsi/devices/ contains now additional symlinks to directories
such as host1 and target1:0:0. This patch ignores these as they do not
point to the desired scsi devices. They just clutter the devices array.
The directory layout in '1:0:0:0' changed as well, the 'type:name'
notation was replaced with 'type/name' directories:
/sys/bus/scsi/devices/1:0:0:0/block/sda/
/sys/bus/scsi/devices/1:0:0:0/scsi_generic/sg1/
Both directories contain a 'dev' file with the major:minor information.
This patch adds additional code to walk the subdir to find the 'dev'
file to make sure the given subdirectory is really the kernel name.
In addition this patch makes sure devname is not None.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 2dbfa4d2e107 -r cff10030c6ea tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:32 2012 +0100
+++ b/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:33 2012 +0100
@@ -130,20 +130,36 @@ def _vscsi_get_scsidevices_by_sysfs():
for dirpath, dirnames, files in os.walk(sysfs_mnt + SYSFS_SCSI_PATH):
for hctl in dirnames:
+ if len(hctl.split(':')) != 4:
+ continue
paths = os.path.join(dirpath, hctl)
devname = None
sg = None
scsi_id = None
for f in os.listdir(paths):
realpath = os.path.realpath(os.path.join(paths, f))
- if re.match('^block', f) or \
- re.match('^tape', f) or \
- re.match('^scsi_changer', f) or \
- re.match('^onstream_tape', f):
+ if re.match('^block:', f) or \
+ re.match('^tape:', f) or \
+ re.match('^scsi_changer:', f) or \
+ re.match('^onstream_tape:', f):
devname = os.path.basename(realpath)
+ elif f == "block" or \
+ f == "tape" or \
+ f == "scsi_changer" or \
+ f == "onstream_tape":
+ for dir in os.listdir(os.path.join(paths, f)):
+ if os.path.exists(os.path.join(paths, f, dir, "dev")):
+ devname = os.path.basename(dir)
- if re.match('^scsi_generic', f):
+ if re.match('^scsi_generic:', f):
sg = os.path.basename(realpath)
+ elif f == "scsi_generic":
+ for dir in os.listdir(os.path.join(paths, f)):
+ if os.path.exists(os.path.join(paths, f, dir, "dev")):
+ sg = os.path.basename(dir)
+ if sg:
+ if devname is None:
+ devname = sg
scsi_id = _vscsi_get_scsiid(sg)
devices.append([hctl, devname, sg, scsi_id])

View File

@ -1,224 +0,0 @@
changeset: 26011:b6fb4e63b946
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:34 2012 +0100
files: stubdom/Makefile
description:
stubdom: fix parallel build by expanding CROSS_MAKE
Recently I changed my rpm xen.spec file from doing
'make -C tools -j N && make stubdom' to 'make -j N stubdom' because
stubdom depends on tools, so both get built.
The result was the failure below.
....
mkdir -p grub-x86_64
CPPFLAGS="-isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include -D__MINIOS__ -DHAVE_LIBC -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/posix -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../tools/xenstore -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/x86 -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/x86/x86_64 -U __linux__ -U __FreeBSD__ -U __sun__ -nostdinc -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/posix -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/cross-root-x86_64/x86_64-xen-elf/include -isystem /usr/lib64/gcc/x86_64-suse-linux/4.7/include -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/lwip-x86_64/src/include -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/lwip-x86_64/src/include/ipv4 -I/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/include -I/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../xen/include" CFLAGS="-mno-red-zone -O1 -fno-omit-frame-pointer -m64 -mno-red-zone -fno-reorder-blocks -fno-asynchronous-unwind-tables -m64 -g -fno-strict-aliasing -std=gnu99 -Wall -Wstrict-prototypes -Wdeclaration-after-statement -Wno-unused-but-set-variable -fno-stack-protector -fno-exceptions" make DESTDIR= -C grub OBJ_DIR=/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/grub-x86_64
make[2]: Entering directory `/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/grub'
make[2]: warning: jobserver unavailable: using -j1. Add `+' to parent make rule.
make[2]: *** INTERNAL: readdir: Bad file descriptor
. Stop.
make[2]: Makefile: Field 'stem' not cached: Makefile
make[2]: Leaving directory `/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/grub'
make[1]: *** [grub] Error 2
[ -d mini-os-x86_64-xenstore ] || \
for i in $(cd /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os ; find . -type d) ; do \
mkdir -p mini-os-x86_64-xenstore/$i ; \
done
....
Expanding every occurrence of CROSS_MAKE avoids this error. It also has
the nice side effect of actually enabling parallel build for stubdom.
According to the GNU make documentation $(MAKE) gets its special meaning
only if it appears directly in the recipe:
http://www.gnu.org/software/make/manual/html_node/MAKE-Variable.html
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r cff10030c6ea -r b6fb4e63b946 stubdom/Makefile
--- a/stubdom/Makefile Mon Oct 08 12:18:33 2012 +0100
+++ b/stubdom/Makefile Mon Oct 08 12:18:34 2012 +0100
@@ -76,8 +76,6 @@ TARGET_LDFLAGS += -nostdlib -L$(CROSS_PR
TARGETS=ioemu c caml grub xenstore
-CROSS_MAKE := $(MAKE) DESTDIR=
-
.PHONY: all
all: build
ifeq ($(STUBDOM_SUPPORTED),1)
@@ -113,8 +111,8 @@ cross-newlib: $(NEWLIB_STAMPFILE)
mkdir -p newlib-$(XEN_TARGET_ARCH)
( cd newlib-$(XEN_TARGET_ARCH) && \
CC_FOR_TARGET="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) $(NEWLIB_CFLAGS)" AR_FOR_TARGET=$(AR) LD_FOR_TARGET=$(LD) RANLIB_FOR_TARGET=$(RANLIB) ../newlib-$(NEWLIB_VERSION)/configure --prefix=$(CROSS_PREFIX) --verbose --target=$(GNU_TARGET_ARCH)-xen-elf --enable-newlib-io-long-long --disable-multilib && \
- $(CROSS_MAKE) && \
- $(CROSS_MAKE) install )
+ $(MAKE) DESTDIR= && \
+ $(MAKE) DESTDIR= install )
############
# Cross-zlib
@@ -133,8 +131,8 @@ cross-zlib: $(ZLIB_STAMPFILE)
$(ZLIB_STAMPFILE): zlib-$(XEN_TARGET_ARCH) $(NEWLIB_STAMPFILE)
( cd $< && \
CFLAGS="$(TARGET_CPPFLAGS) $(TARGET_CFLAGS)" CC=$(CC) ./configure --prefix=$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf && \
- $(CROSS_MAKE) libz.a && \
- $(CROSS_MAKE) install )
+ $(MAKE) DESTDIR= libz.a && \
+ $(MAKE) DESTDIR= install )
##############
# Cross-libpci
@@ -158,7 +156,7 @@ cross-libpci: $(LIBPCI_STAMPFILE)
chmod u+w lib/config.h && \
echo '#define PCILIB_VERSION "$(LIBPCI_VERSION)"' >> lib/config.h && \
ln -sf ../../libpci.config.mak lib/config.mk && \
- $(CROSS_MAKE) CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(call realpath,$(MINI_OS)/include)" lib/libpci.a && \
+ $(MAKE) DESTDIR= CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(call realpath,$(MINI_OS)/include)" lib/libpci.a && \
$(INSTALL_DATA) lib/libpci.a $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/lib/ && \
$(INSTALL_DIR) $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci && \
$(INSTALL_DATA) lib/config.h lib/header.h lib/pci.h lib/types.h $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \
@@ -203,8 +201,8 @@ cross-ocaml: $(OCAML_STAMPFILE)
-no-pthread -no-shared-libs -no-tk -no-curses \
-cc "$(CC) -U_FORTIFY_SOURCE -fno-stack-protector -mno-red-zone"
$(foreach i,$(MINIOS_HASNOT),sed -i 's,^\(#define HAS_$(i)\),//\1,' ocaml-$(XEN_TARGET_ARCH)/config/s.h ; )
- $(CROSS_MAKE) -C ocaml-$(XEN_TARGET_ARCH) world
- $(CROSS_MAKE) -C ocaml-$(XEN_TARGET_ARCH) opt
+ $(MAKE) DESTDIR= -C ocaml-$(XEN_TARGET_ARCH) world
+ $(MAKE) DESTDIR= -C ocaml-$(XEN_TARGET_ARCH) opt
$(MAKE) -C ocaml-$(XEN_TARGET_ARCH) install
touch $@
@@ -219,7 +217,7 @@ QEMU_ROOT := $(shell if [ -d "$(CONFIG_Q
ifeq ($(QEMU_ROOT),.)
$(XEN_ROOT)/tools/qemu-xen-traditional-dir:
- $(CROSS_MAKE) -C $(XEN_ROOT)/tools qemu-xen-traditional-dir-find
+ $(MAKE) DESTDIR= -C $(XEN_ROOT)/tools qemu-xen-traditional-dir-find
ioemu/linkfarm.stamp: $(XEN_ROOT)/tools/qemu-xen-traditional-dir
mkdir -p ioemu
@@ -250,7 +248,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
( [ -h include/xen/libelf ] || ln -sf $(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \
mkdir -p include/xen-foreign && \
ln -sf $(wildcard $(XEN_ROOT)/tools/include/xen-foreign/*) include/xen-foreign/ && \
- $(CROSS_MAKE) -C include/xen-foreign/ && \
+ $(MAKE) DESTDIR= -C include/xen-foreign/ && \
( [ -h include/xen/foreign ] || ln -sf ../xen-foreign include/xen/foreign )
mkdir -p libxc-$(XEN_TARGET_ARCH)
[ -h libxc-$(XEN_TARGET_ARCH)/Makefile ] || ( cd libxc-$(XEN_TARGET_ARCH) && \
@@ -267,7 +265,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
ln -sf $(XEN_ROOT)/tools/xenstore/*.c . && \
ln -sf $(XEN_ROOT)/tools/xenstore/*.h . && \
ln -sf $(XEN_ROOT)/tools/xenstore/Makefile . )
- $(CROSS_MAKE) -C $(MINI_OS) links
+ $(MAKE) DESTDIR= -C $(MINI_OS) links
touch mk-headers-$(XEN_TARGET_ARCH)
TARGETS_MINIOS=$(addprefix mini-os-$(XEN_TARGET_ARCH)-,$(TARGETS))
@@ -284,7 +282,7 @@ TARGETS_MINIOS=$(addprefix mini-os-$(XEN
.PHONY: libxc
libxc: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a libxc-$(XEN_TARGET_ARCH)/libxenguest.a
libxc-$(XEN_TARGET_ARCH)/libxenctrl.a: cross-zlib
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C libxc-$(XEN_TARGET_ARCH)
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C libxc-$(XEN_TARGET_ARCH)
libxc-$(XEN_TARGET_ARCH)/libxenguest.a: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a
@@ -302,7 +300,7 @@ ioemu: cross-zlib cross-libpci libxc
TARGET_CFLAGS="$(TARGET_CFLAGS)" \
TARGET_LDFLAGS="$(TARGET_LDFLAGS)" \
$(QEMU_ROOT)/xen-setup-stubdom )
- $(CROSS_MAKE) -C ioemu -f $(QEMU_ROOT)/Makefile
+ $(MAKE) DESTDIR= -C ioemu -f $(QEMU_ROOT)/Makefile
######
# caml
@@ -310,7 +308,7 @@ ioemu: cross-zlib cross-libpci libxc
.PHONY: caml
caml: $(CROSS_ROOT)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) OCAMLC_CROSS_PREFIX=$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/bin/
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) OCAMLC_CROSS_PREFIX=$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/bin/
###
# C
@@ -318,7 +316,7 @@ caml: $(CROSS_ROOT)
.PHONY: c
c: $(CROSS_ROOT)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH)
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH)
######
# Grub
@@ -337,7 +335,7 @@ grub-upstream: grub-$(GRUB_VERSION).tar.
.PHONY: grub
grub: grub-upstream $(CROSS_ROOT)
mkdir -p grub-$(XEN_TARGET_ARCH)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ OBJ_DIR=$(CURDIR)/grub-$(XEN_TARGET_ARCH)
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ OBJ_DIR=$(CURDIR)/grub-$(XEN_TARGET_ARCH)
##########
# xenstore
@@ -345,7 +343,7 @@ grub: grub-upstream $(CROSS_ROOT)
.PHONY: xenstore
xenstore: $(CROSS_ROOT)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ xenstored.a CONFIG_STUBDOM=y
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ xenstored.a CONFIG_STUBDOM=y
########
# minios
@@ -354,23 +352,23 @@ xenstore: $(CROSS_ROOT)
.PHONY: ioemu-stubdom
ioemu-stubdom: APP_OBJS=$(CURDIR)/ioemu/i386-stubdom/qemu.a $(CURDIR)/ioemu/i386-stubdom/libqemu.a $(CURDIR)/ioemu/libqemu_common.a
ioemu-stubdom: mini-os-$(XEN_TARGET_ARCH)-ioemu lwip-$(XEN_TARGET_ARCH) libxc ioemu
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/ioemu-minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(APP_OBJS)"
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/ioemu-minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(APP_OBJS)"
.PHONY: caml-stubdom
caml-stubdom: mini-os-$(XEN_TARGET_ARCH)-caml lwip-$(XEN_TARGET_ARCH) libxc cross-ocaml caml
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/caml/minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/caml/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
.PHONY: c-stubdom
c-stubdom: mini-os-$(XEN_TARGET_ARCH)-c lwip-$(XEN_TARGET_ARCH) libxc c
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/c/minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS=$(CURDIR)/c/main.a
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/c/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS=$(CURDIR)/c/main.a
.PHONY: pv-grub
pv-grub: mini-os-$(XEN_TARGET_ARCH)-grub libxc grub
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/grub/minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/grub-$(XEN_TARGET_ARCH)/main.a
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/grub/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/grub-$(XEN_TARGET_ARCH)/main.a
.PHONY: xenstore-stubdom
xenstore-stubdom: mini-os-$(XEN_TARGET_ARCH)-xenstore libxc xenstore
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/xenstore-minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/xenstore/xenstored.a
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/xenstore-minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/xenstore/xenstored.a
#########
# install
@@ -412,13 +410,13 @@ clean:
rm -fr mini-os-$(XEN_TARGET_ARCH)-caml
rm -fr mini-os-$(XEN_TARGET_ARCH)-grub
rm -fr mini-os-$(XEN_TARGET_ARCH)-xenstore
- $(CROSS_MAKE) -C caml clean
- $(CROSS_MAKE) -C c clean
+ $(MAKE) DESTDIR= -C caml clean
+ $(MAKE) DESTDIR= -C c clean
rm -fr grub-$(XEN_TARGET_ARCH)
rm -f $(STUBDOMPATH)
- [ ! -d libxc-$(XEN_TARGET_ARCH) ] || $(CROSS_MAKE) -C libxc-$(XEN_TARGET_ARCH) clean
- -[ ! -d ioemu ] || $(CROSS_MAKE) -C ioemu clean
- -[ ! -d xenstore ] || $(CROSS_MAKE) -C xenstore clean
+ [ ! -d libxc-$(XEN_TARGET_ARCH) ] || $(MAKE) DESTDIR= -C libxc-$(XEN_TARGET_ARCH) clean
+ -[ ! -d ioemu ] || $(MAKE) DESTDIR= -C ioemu clean
+ -[ ! -d xenstore ] || $(MAKE) DESTDIR= -C xenstore clean
# clean the cross-compilation result
.PHONY: crossclean

View File

@ -1,43 +0,0 @@
changeset: 26018:ecc7627ca6d7
tag: tip
user: Olaf Hering <olaf@aepfle.de>
date: Tue Oct 09 09:18:42 2012 +0100
files: tools/pygrub/src/pygrub
description:
pygrub: correct typo in --args assignment
If pygrub was called with --args="some thing", then this string should
be append to the kernel command line. But the last changeset
25941:795c493fe561 contained a typo, it assigns 'args' instead of 'arg'.
Rename the local variable which holds the string from the domain config
file to avoid further confusion.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r c9f621893a05 -r ecc7627ca6d7 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Mon Oct 08 14:36:31 2012 +0100
+++ b/tools/pygrub/src/pygrub Tue Oct 09 09:18:42 2012 +0100
@@ -585,7 +585,7 @@ def get_entry_idx(cf, entry):
return None
-def run_grub(file, entry, fs, arg):
+def run_grub(file, entry, fs, cfg_args):
global g
global sel
@@ -622,8 +622,8 @@ def run_grub(file, entry, fs, arg):
grubcfg["ramdisk"] = img.initrd[1]
if img.args:
grubcfg["args"] += img.args
- if arg:
- grubcfg["args"] += " " + args
+ if cfg_args:
+ grubcfg["args"] += " " + cfg_args
return grubcfg

View File

@ -1,69 +0,0 @@
# HG changeset patch
# User Wei Wang <wei.wang2@amd.com>
# Date 1350306216 -7200
# Node ID 983108e1b56bf809f3f5eaaebf18c4b613ff0865
# Parent 137dfbd3190e849b3a498d8b2ea282ebbf12e77d
x86/amd: Fix xen_apic_write warnings in Dom0
[ 0.020294] ------------[ cut here ]------------
[ 0.020311] WARNING: at arch/x86/xen/enlighten.c:730
xen_apic_write+0x15/0x17()
[ 0.020318] Hardware name: empty
[ 0.020323] Modules linked in:
[ 0.020334] Pid: 1, comm: swapper/0 Not tainted 3.3.8 #7
[ 0.020340] Call Trace:
[ 0.020354] [<ffffffff81050379>] warn_slowpath_common+0x80/0x98
[ 0.020369] [<ffffffff810503a6>] warn_slowpath_null+0x15/0x17
[ 0.020378] [<ffffffff810034df>] xen_apic_write+0x15/0x17
[ 0.020392] [<ffffffff8101cb2b>] perf_events_lapic_init+0x2e/0x30
[ 0.020410] [<ffffffff81ee4dd0>] init_hw_perf_events+0x250/0x407
[ 0.020419] [<ffffffff81ee4b80>] ? check_bugs+0x2d/0x2d
[ 0.020430] [<ffffffff81002181>] do_one_initcall+0x7a/0x131
[ 0.020444] [<ffffffff81edbbf9>] kernel_init+0x91/0x15d
[ 0.020456] [<ffffffff817caaa4>] kernel_thread_helper+0x4/0x10
[ 0.020471] [<ffffffff817c347c>] ? retint_restore_args+0x5/0x6
[ 0.020481] [<ffffffff817caaa0>] ? gs_change+0x13/0x13
[ 0.020500] ---[ end trace a7919e7f17c0a725 ]---
Kernel function check_hw_exists() writes 0xabcd to msr 0xc0010201 (Performance Event
Counter 0) and read it again to check if it is running as dom0. Early amd cpus does
not reset perf counters during warm reboot. If the kernel is booted with bare metal
and then as a dom0, the content of msr 0xc0010201 will stay and the checking will
pass and PMU will be enabled unexpectedly.
Signed-off-by: Wei Wang <wei.wang2@amd.com>
Don't reset the counters when used for the NMI watchdog.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -11,6 +11,7 @@
#include <asm/hvm/support.h>
#include <asm/setup.h> /* amd_init_cpu */
#include <asm/acpi.h>
+#include <asm/apic.h>
#include "cpu.h"
@@ -532,6 +533,17 @@ static void __devinit init_amd(struct cp
if (c->x86 > 0x11)
set_bit(X86_FEATURE_ARAT, c->x86_capability);
+ /*
+ * Prior to Family 0x14, perf counters are not reset during warm reboot.
+ * We have to reset them manually.
+ */
+ if (nmi_watchdog != NMI_LOCAL_APIC && c->x86 < 0x14) {
+ wrmsrl(MSR_K7_PERFCTR0, 0);
+ wrmsrl(MSR_K7_PERFCTR1, 0);
+ wrmsrl(MSR_K7_PERFCTR2, 0);
+ wrmsrl(MSR_K7_PERFCTR3, 0);
+ }
+
if (cpuid_edx(0x80000007) & (1 << 10)) {
rdmsr(MSR_K7_HWCR, l, h);
l |= (1 << 27); /* Enable read-only APERF/MPERF bit */

View File

@ -1,65 +0,0 @@
# HG changeset patch
# User Jacob Shin <jacob.shin@amd.com>
# Date 1350306291 -7200
# Node ID 14e32621dbaf5b485b134ace4558e67c4c36e1ce
# Parent 983108e1b56bf809f3f5eaaebf18c4b613ff0865
x86/xenoprof: fix kernel/user mode detection for HVM
While trying oprofile under Xen, I noticed that HVM passive domain's
kernel addresses were showing up as user application. It turns out
under HVM get_cpu_user_regs()->cs contains 0x0000beef.
Signed-off-by: Jacob Shin <jacob.shin@amd.com>
Don't cast away const-ness. Use SS instead of CS to determine ring.
Special-case real and protected mode.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/oprofile/xenoprof.c
+++ b/xen/arch/x86/oprofile/xenoprof.c
@@ -78,16 +78,26 @@ int compat_oprof_arch_counter(XEN_GUEST_
}
#endif
-int xenoprofile_get_mode(const struct vcpu *v,
- const struct cpu_user_regs *regs)
+int xenoprofile_get_mode(struct vcpu *curr, const struct cpu_user_regs *regs)
{
if ( !guest_mode(regs) )
return 2;
- if ( is_hvm_vcpu(v) )
- return ((regs->cs & 3) != 3);
+ if ( !is_hvm_vcpu(curr) )
+ return guest_kernel_mode(curr, regs);
- return guest_kernel_mode(v, regs);
+ switch ( hvm_guest_x86_mode(curr) )
+ {
+ struct segment_register ss;
+
+ case 0: /* real mode */
+ return 1;
+ case 1: /* vm86 mode */
+ return 0;
+ default:
+ hvm_get_segment_register(curr, x86_seg_ss, &ss);
+ return (ss.sel & 3) != 3;
+ }
}
/*
--- a/xen/include/asm-x86/xenoprof.h
+++ b/xen/include/asm-x86/xenoprof.h
@@ -56,7 +56,7 @@ static inline void ibs_init(void) {}
#define ibs_caps 0
#endif
-int xenoprofile_get_mode(const struct vcpu *, const struct cpu_user_regs *);
+int xenoprofile_get_mode(struct vcpu *, const struct cpu_user_regs *);
static inline int xenoprof_backtrace_supported(void)
{

View File

@ -1,84 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1350315491 -3600
# Node ID 177fdda0be568ccdb62697b64aa64ee20bc55bee
# Parent 14e32621dbaf5b485b134ace4558e67c4c36e1ce
More efficient TLB-flush filtering in alloc_heap_pages().
Rather than per-cpu filtering for every page in a super-page
allocation, simply remember the most recent TLB timestamp across all
allocated pages, and filter on that, just once, at the end of the
function.
For large-CPU systems, doing 2MB allocations during domain creation,
this cuts down the domain creation time *massively*.
TODO: It may make sense to move the filtering out into some callers,
such as memory.c:populate_physmap() and
memory.c:increase_reservation(), so that the filtering can be moved
outside their loops, too.
Signed-off-by: Keir Fraser <keir@xen.org>
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -414,9 +414,10 @@ static struct page_info *alloc_heap_page
unsigned int first_node, i, j, zone = 0, nodemask_retry = 0;
unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1);
unsigned long request = 1UL << order;
- cpumask_t mask;
struct page_info *pg;
nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
+ bool_t need_tlbflush = 0;
+ uint32_t tlbflush_timestamp = 0;
if ( node == NUMA_NO_NODE )
{
@@ -530,22 +531,19 @@ static struct page_info *alloc_heap_page
if ( d != NULL )
d->last_alloc_node = node;
- cpumask_clear(&mask);
-
for ( i = 0; i < (1 << order); i++ )
{
/* Reference count must continuously be zero for free pages. */
BUG_ON(pg[i].count_info != PGC_state_free);
pg[i].count_info = PGC_state_inuse;
- if ( pg[i].u.free.need_tlbflush )
+ if ( pg[i].u.free.need_tlbflush &&
+ (pg[i].tlbflush_timestamp <= tlbflush_current_time()) &&
+ (!need_tlbflush ||
+ (pg[i].tlbflush_timestamp > tlbflush_timestamp)) )
{
- /* Add in extra CPUs that need flushing because of this page. */
- static cpumask_t extra_cpus_mask;
-
- cpumask_andnot(&extra_cpus_mask, &cpu_online_map, &mask);
- tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
- cpumask_or(&mask, &mask, &extra_cpus_mask);
+ need_tlbflush = 1;
+ tlbflush_timestamp = pg[i].tlbflush_timestamp;
}
/* Initialise fields which have other uses for free pages. */
@@ -555,10 +553,15 @@ static struct page_info *alloc_heap_page
spin_unlock(&heap_lock);
- if ( unlikely(!cpumask_empty(&mask)) )
+ if ( need_tlbflush )
{
- perfc_incr(need_flush_tlb_flush);
- flush_tlb_mask(&mask);
+ cpumask_t mask = cpu_online_map;
+ tlbflush_filter(mask, tlbflush_timestamp);
+ if ( !cpumask_empty(&mask) )
+ {
+ perfc_incr(need_flush_tlb_flush);
+ flush_tlb_mask(&mask);
+ }
}
return pg;

View File

@ -1,30 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1350465790 -7200
# Node ID 4b4c0c7a6031820ab521fdd6764cb0df157f44bf
# Parent 4fc87c2f31a02c770655518c9e4d389302564f00
x86/oprof: adjust off-by-one counter range checks
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/oprofile/xenoprof.c
+++ b/xen/arch/x86/oprofile/xenoprof.c
@@ -26,7 +26,7 @@ int xenoprof_arch_counter(XEN_GUEST_HAND
if ( copy_from_guest(&counter, arg, 1) )
return -EFAULT;
- if ( counter.ind > OP_MAX_COUNTER )
+ if ( counter.ind >= OP_MAX_COUNTER )
return -E2BIG;
counter_config[counter.ind].count = counter.count;
@@ -64,7 +64,7 @@ int compat_oprof_arch_counter(XEN_GUEST_
if ( copy_from_guest(&counter, arg, 1) )
return -EFAULT;
- if ( counter.ind > OP_MAX_COUNTER )
+ if ( counter.ind >= OP_MAX_COUNTER )
return -E2BIG;
counter_config[counter.ind].count = counter.count;

View File

@ -1,51 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1350476000 -7200
# Node ID 1f4be6ee4619c88c273cb457d8e7f1eee49d00dd
# Parent ec8a091efcce717584b00ce76e3cec40a6247ebc
x86/HPET: obtain proper lock for changing IRQ affinity
The IRQ descriptor lock should be held while adjusting the affinity of
any IRQ; the HPET channel lock isn't sufficient to protect namely
against races with moving the IRQ to a different CPU.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -433,6 +433,16 @@ static struct hpet_event_channel *hpet_g
return ch;
}
+static void set_channel_irq_affinity(const struct hpet_event_channel *ch)
+{
+ struct irq_desc *desc = irq_to_desc(ch->irq);
+
+ ASSERT(!local_irq_is_enabled());
+ spin_lock(&desc->lock);
+ hpet_msi_set_affinity(desc, cpumask_of(ch->cpu));
+ spin_unlock(&desc->lock);
+}
+
static void hpet_attach_channel(unsigned int cpu,
struct hpet_event_channel *ch)
{
@@ -447,7 +457,7 @@ static void hpet_attach_channel(unsigned
if ( ch->cpu != cpu )
return;
- hpet_msi_set_affinity(irq_to_desc(ch->irq), cpumask_of(ch->cpu));
+ set_channel_irq_affinity(ch);
}
static void hpet_detach_channel(unsigned int cpu,
@@ -469,7 +479,7 @@ static void hpet_detach_channel(unsigned
}
ch->cpu = cpumask_first(ch->cpumask);
- hpet_msi_set_affinity(irq_to_desc(ch->irq), cpumask_of(ch->cpu));
+ set_channel_irq_affinity(ch);
}
#include <asm/mc146818rtc.h>

View File

@ -1,29 +0,0 @@
changeset: 26088:dd64a1bdbe3a
user: Joe Jin <joe.jin@oracle.com>
date: Thu Oct 18 09:35:08 2012 +0100
files: tools/python/xen/xend/XendStateStore.py
description:
tools: xend: fix wrong condition check for xml file
In commit e8d40584, it intended to check xml file size and when empty will
return, the condition should be "if os.path.getsize(xml_path) == 0" rather
then "if not os.path.getsize(xml_path) == 0".
Signed-off-by: Chuang Cao <chuang.cao@oracle.com>
Signed-off-by: Joe Jin <joe.jin@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 6239ace16749 -r dd64a1bdbe3a tools/python/xen/xend/XendStateStore.py
--- a/tools/python/xen/xend/XendStateStore.py Thu Oct 18 09:35:07 2012 +0100
+++ b/tools/python/xen/xend/XendStateStore.py Thu Oct 18 09:35:08 2012 +0100
@@ -101,7 +101,7 @@ class XendStateStore:
if not os.path.exists(xml_path):
return {}
- if not os.path.getsize(xml_path) == 0:
+ if os.path.getsize(xml_path) == 0:
return {}
dom = minidom.parse(xml_path)

View File

@ -1,32 +0,0 @@
# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1350655745 -7200
# Node ID 4ae08ca5500f68d19a689c06489024157300d7b0
# Parent 478ba3f146df23d2cfa95fc603d0b4b9d21ba15d
hvm: handle PoD and grant pages in HVMOP_get_mem_type
During kexec in a ballooned PVonHVM guest the new kernel needs to check
each pfn if its backed by a mfn to find ballooned pages. Currently all
PoD and grant pages will appear as HVMMEM_mmio_dm, so the new kernel has
to assume they are ballooned. This is wrong: PoD pages may turn into
real RAM at runtime, grant pages are also RAM.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Tim Deegan <tim@xen.org>
Index: xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
@@ -4135,6 +4135,10 @@ long do_hvm_op(unsigned long op, XEN_GUE
a.mem_type = HVMMEM_ram_ro;
else if ( p2m_is_ram(t) )
a.mem_type = HVMMEM_ram_rw;
+ else if ( p2m_is_magic(t) )
+ a.mem_type = HVMMEM_ram_rw;
+ else if ( p2m_is_grant(t) )
+ a.mem_type = HVMMEM_ram_rw;
else
a.mem_type = HVMMEM_mmio_dm;
rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;

View File

@ -1,32 +0,0 @@
# HG changeset patch
# User Christoph Egger <Christoph.Egger@amd.com>
# Date 1350976407 -7200
# Node ID a7503ce27d462056421c6d74737cee08ab4ae31e
# Parent c69bcb24812896dc6d5cf033babb7e79b8a50aec
nestedsvm: fix memory leak on shutdown/crash
Fix memory leak of l1 vmcb page when destroying a vcpu while l2 guest
is running.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -122,6 +122,15 @@ void nsvm_vcpu_destroy(struct vcpu *v)
struct nestedvcpu *nv = &vcpu_nestedhvm(v);
struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ /*
+ * When destroying the vcpu, it may be running on behalf of l2 guest.
+ * Therefore we need to switch the VMCB pointer back to the l1 vmcb,
+ * in order to avoid double free of l2 vmcb and the possible memory leak
+ * of l1 vmcb page.
+ */
+ if (nv->nv_n1vmcx)
+ v->arch.hvm_svm.vmcb = nv->nv_n1vmcx;
+
if (svm->ns_cached_msrpm) {
free_xenheap_pages(svm->ns_cached_msrpm,
get_order_from_bytes(MSRPM_SIZE));

View File

@ -1,61 +0,0 @@
# HG changeset patch
# User Christoph Egger <Christoph.Egger@amd.com>
# Date 1350976467 -7200
# Node ID d642720e1ea996ce85203fc9718f64cf2cab0468
# Parent a7503ce27d462056421c6d74737cee08ab4ae31e
nestedsvm: fix VMEXIT emulation
Values in regs can be newer than those in the shadow vmcb (e.g. due to
an instruction emulation right before). So use the values from regs.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -995,7 +995,7 @@ nsvm_vmcb_guest_intercepts_trap(struct v
}
static int
-nsvm_vmcb_prepare4vmexit(struct vcpu *v)
+nsvm_vmcb_prepare4vmexit(struct vcpu *v, struct cpu_user_regs *regs)
{
struct nestedvcpu *nv = &vcpu_nestedhvm(v);
struct nestedsvm *svm = &vcpu_nestedsvm(v);
@@ -1119,17 +1119,22 @@ nsvm_vmcb_prepare4vmexit(struct vcpu *v)
ns_vmcb->_dr7 = n2vmcb->_dr7;
ns_vmcb->_dr6 = n2vmcb->_dr6;
+ /* Restore registers from regs as those values
+ * can be newer than in n2vmcb (e.g. due to an
+ * instruction emulation right before).
+ */
+
/* RFLAGS */
- ns_vmcb->rflags = n2vmcb->rflags;
+ ns_vmcb->rflags = n2vmcb->rflags = regs->eflags;
/* RIP */
- ns_vmcb->rip = n2vmcb->rip;
+ ns_vmcb->rip = n2vmcb->rip = regs->eip;
/* RSP */
- ns_vmcb->rsp = n2vmcb->rsp;
+ ns_vmcb->rsp = n2vmcb->rsp = regs->esp;
/* RAX */
- ns_vmcb->rax = n2vmcb->rax;
+ ns_vmcb->rax = n2vmcb->rax = regs->eax;
/* Keep the l2 guest values of the fs, gs, ldtr, tr, kerngsbase,
* star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
@@ -1363,7 +1368,7 @@ nestedsvm_vmexit_n2n1(struct vcpu *v, st
ASSERT(vcpu_nestedhvm(v).nv_vmswitch_in_progress);
ASSERT(nestedhvm_vcpu_in_guestmode(v));
- rc = nsvm_vmcb_prepare4vmexit(v);
+ rc = nsvm_vmcb_prepare4vmexit(v, regs);
if (rc)
ret = NESTEDHVM_VMEXIT_ERROR;

View File

@ -1,43 +0,0 @@
# HG changeset patch
# User Dario Faggioli <dario.faggioli@citrix.com>
# Date 1350990742 25200
# Node ID 6d54c3338c52ec3a8e671c13284a59a535a3273f
# Parent 67c27013e191598543ccc1b8f8f1d533c7a5164b
xen: fix build when 'perfc=y'
Which was failing with this:
viridian.c: In function wrmsr_viridian_regs:
viridian.c:254:1: error: PERFC_mshv_wrmsr_apic_msr undeclared
(first use in this function)
viridian.c:254:1: note: each undeclared identifier is reported only
once for each function it appears in
viridian.c: In function rdmsr_viridian_regs:
viridian.c:305:1: error: PERFC_mshv_rdmsr_apic_msr undeclared
(first use in this function)
as a consequence of 17b754cab7b0 using but not defining
the counters.
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/include/asm-x86/perfc_defn.h
+++ b/xen/include/asm-x86/perfc_defn.h
@@ -121,6 +121,7 @@ PERFCOUNTER(mshv_rdmsr_vp_index,
PERFCOUNTER(mshv_rdmsr_icr, "MS Hv rdmsr icr")
PERFCOUNTER(mshv_rdmsr_tpr, "MS Hv rdmsr tpr")
PERFCOUNTER(mshv_rdmsr_apic_assist, "MS Hv rdmsr APIC assist")
+PERFCOUNTER(mshv_rdmsr_apic_msr, "MS Hv rdmsr APIC msr")
PERFCOUNTER(mshv_wrmsr_osid, "MS Hv wrmsr Guest OS ID")
PERFCOUNTER(mshv_wrmsr_hc_page, "MS Hv wrmsr hypercall page")
PERFCOUNTER(mshv_wrmsr_vp_index, "MS Hv wrmsr vp index")
@@ -128,6 +129,7 @@ PERFCOUNTER(mshv_wrmsr_icr,
PERFCOUNTER(mshv_wrmsr_tpr, "MS Hv wrmsr tpr")
PERFCOUNTER(mshv_wrmsr_eoi, "MS Hv wrmsr eoi")
PERFCOUNTER(mshv_wrmsr_apic_assist, "MS Hv wrmsr APIC assist")
+PERFCOUNTER(mshv_wrmsr_apic_msr, "MS Hv wrmsr APIC msr")
PERFCOUNTER(realmode_emulations, "realmode instructions emulated")
PERFCOUNTER(realmode_exits, "vmexits from realmode")

View File

@ -1,99 +0,0 @@
References: bnc#784087
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1351093908 -7200
# Node ID 22e08c9ac770db07c3c3e7c844aa7153050939f3
# Parent 07cf00a917cd1d1849f3e40d5b8ecc2cd8964fe8
x86: don't special case first IO-APIC
It has always been puzzling me why the first IO-APIC gets special cased
in two places, and finally Xen got run on a system where this breaks:
(XEN) ACPI: IOAPIC (id[0x10] address[0xfecff000] gsi_base[0])
(XEN) IOAPIC[0]: apic_id 16, version 17, address 0xfecff000, GSI 0-2
(XEN) ACPI: IOAPIC (id[0x0f] address[0xfec00000] gsi_base[3])
(XEN) IOAPIC[1]: apic_id 15, version 17, address 0xfec00000, GSI 3-38
(XEN) ACPI: IOAPIC (id[0x0e] address[0xfec01000] gsi_base[39])
(XEN) IOAPIC[2]: apic_id 14, version 17, address 0xfec01000, GSI 39-74
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 1 global_irq 4 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 5 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 3 global_irq 6 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 7 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 9 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 10 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 11 low edge)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 12 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 15 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 13 global_irq 16 dfl dfl)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 17 low edge)
(XEN) ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 18 dfl dfl)
i.e. all legacy IRQs (apart from the timer one, but the firmware passed
data doesn't look right for that case anyway, as both Xen and native
Linux are falling back to use the virtual wire setup for IRQ0,
apparently rather using pin 2 of the first IO-APIC) are being handled
by the second IO-APIC.
This at once eliminates the possibility of an unmasked RTE getting
written without having got a vector put in place (in
setup_IO_APIC_irqs()).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Index: xen-4.2.0-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/io_apic.c
+++ xen-4.2.0-testing/xen/arch/x86/io_apic.c
@@ -999,18 +999,17 @@ static void __init setup_IO_APIC_irqs(vo
else
add_pin_to_irq(irq, apic, pin);
- if (!apic && !IO_APIC_IRQ(irq))
+ if (!IO_APIC_IRQ(irq))
continue;
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq, NULL);
- BUG_ON(vector < 0);
- entry.vector = vector;
- ioapic_register_intr(irq, IOAPIC_AUTO);
+ vector = assign_irq_vector(irq, NULL);
+ BUG_ON(vector < 0);
+ entry.vector = vector;
+ ioapic_register_intr(irq, IOAPIC_AUTO);
+
+ if (platform_legacy_irq(irq))
+ disable_8259A_irq(irq_to_desc(irq));
- if (!apic && platform_legacy_irq(irq))
- disable_8259A_irq(irq_to_desc(irq));
- }
desc = irq_to_desc(irq);
SET_DEST(entry.dest.dest32, entry.dest.logical.logical_dest,
cpu_mask_to_apicid(desc->arch.cpu_mask));
@@ -2257,18 +2256,15 @@ unsigned apic_gsi_base(int apic);
static int apic_pin_2_gsi_irq(int apic, int pin)
{
- int idx, irq;
+ int idx;
if (apic < 0)
return -EINVAL;
- irq = apic_gsi_base(apic) + pin;
- if (apic == 0) {
- idx = find_irq_entry(apic, pin, mp_INT);
- if (idx >= 0)
- irq = pin_2_irq(idx, apic, pin);
- }
- return irq;
+ idx = find_irq_entry(apic, pin, mp_INT);
+
+ return idx >= 0 ? pin_2_irq(idx, apic, pin)
+ : apic_gsi_base(apic) + pin;
}
int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)

View File

@ -1,31 +0,0 @@
# HG changeset patch
# User Matthew Daley <mattjd@gmail.com>
# Date 1352709297 -3600
# Node ID 286ef4ced2164f4e9bf52fd0c52248182e69a6e6
# Parent 62885b3c34c84354ead017703a86f0465cb58cf7
tmem: Prevent NULL dereference on error case
If the client / pool IDs given to tmemc_save_get_next_page are invalid,
the calculation of pagesize will dereference NULL.
Fix this by moving the calculation below the appropriate NULL check.
Signed-off-by: Matthew Daley <mattjd@gmail.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2446,10 +2446,12 @@ static NOINLINE int tmemc_save_get_next_
OID oid;
int ret = 0;
struct tmem_handle h;
- unsigned int pagesize = 1 << (pool->pageshift+12);
+ unsigned int pagesize;
if ( pool == NULL || is_ephemeral(pool) )
return -1;
+
+ pagesize = 1 << (pool->pageshift + 12);
if ( bufsize < pagesize + sizeof(struct tmem_handle) )
return -ENOMEM;

View File

@ -0,0 +1,168 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1352709367 -3600
# Node ID fdb69dd527cd01a46f87efb380050559dcf12d37
# Parent 286ef4ced2164f4e9bf52fd0c52248182e69a6e6
IOMMU: don't immediately disable bus mastering on faults
Instead, give the owning domain at least a small opportunity of fixing
things up, and allow for rare faults to not bring down the device at
all.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -564,7 +564,7 @@ static hw_irq_controller iommu_msi_type
static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
{
- u16 domain_id, device_id, bdf, cword;
+ u16 domain_id, device_id, bdf;
u32 code;
u64 *addr;
int count = 0;
@@ -615,18 +615,10 @@ static void parse_event_log_entry(struct
"fault address = 0x%"PRIx64"\n",
event_str[code-1], domain_id, device_id, *addr);
- /* Tell the device to stop DMAing; we can't rely on the guest to
- * control it for us. */
for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
- {
- cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
- PCI_SLOT(bdf), PCI_FUNC(bdf),
- PCI_COMMAND);
- pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
- PCI_FUNC(bdf), PCI_COMMAND,
- cword & ~PCI_COMMAND_MASTER);
- }
+ pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
+ PCI_DEVFN2(bdf));
}
else
{
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -214,6 +214,7 @@ static int device_assigned(u16 seg, u8 b
static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev;
int rc = 0;
if ( !iommu_enabled || !hd->platform_ops )
@@ -227,6 +228,10 @@ static int assign_device(struct domain *
return -EXDEV;
spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( pdev )
+ pdev->fault.count = 0;
+
if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
goto done;
@@ -378,6 +383,8 @@ int deassign_device(struct domain *d, u1
return ret;
}
+ pdev->fault.count = 0;
+
if ( !has_arch_pdevs(d) && need_iommu(d) )
{
d->need_iommu = 0;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -637,6 +637,36 @@ int __init pci_device_detect(u16 seg, u8
return 1;
}
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
+{
+ struct pci_dev *pdev;
+ s_time_t now = NOW();
+ u16 cword;
+
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( pdev )
+ {
+ if ( now < pdev->fault.time ||
+ now - pdev->fault.time > MILLISECS(10) )
+ pdev->fault.count >>= 1;
+ pdev->fault.time = now;
+ if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
+ pdev = NULL;
+ }
+ spin_unlock(&pcidevs_lock);
+
+ if ( !pdev )
+ return;
+
+ /* Tell the device to stop DMAing; we can't rely on the guest to
+ * control it for us. */
+ cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_COMMAND);
+ pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+}
+
/*
* scan pci devices to add all existed PCI devices to alldevs_list,
* and setup pci hierarchy in array bus2bridge.
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -936,7 +936,7 @@ static void __do_iommu_page_fault(struct
while (1)
{
u8 fault_reason;
- u16 source_id, cword;
+ u16 source_id;
u32 data;
u64 guest_addr;
int type;
@@ -969,14 +969,8 @@ static void __do_iommu_page_fault(struct
iommu_page_fault_do_one(iommu, type, fault_reason,
source_id, guest_addr);
- /* Tell the device to stop DMAing; we can't rely on the guest to
- * control it for us. */
- cword = pci_conf_read16(iommu->intel->drhd->segment,
- PCI_BUS(source_id), PCI_SLOT(source_id),
- PCI_FUNC(source_id), PCI_COMMAND);
- pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
- PCI_SLOT(source_id), PCI_FUNC(source_id),
- PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+ pci_check_disable_device(iommu->intel->drhd->segment,
+ PCI_BUS(source_id), PCI_DEVFN2(source_id));
fault_index++;
if ( fault_index > cap_num_fault_regs(iommu->cap) )
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -64,6 +64,11 @@ struct pci_dev {
const u8 devfn;
struct pci_dev_info info;
struct arch_pci_dev arch;
+ struct {
+ s_time_t time;
+ unsigned int count;
+#define PT_FAULT_THRESHOLD 10
+ } fault;
u64 vf_rlen[6];
};
@@ -106,6 +111,7 @@ void arch_pci_ro_device(int seg, int bdf
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
uint8_t pci_conf_read8(
unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,

View File

@ -1,22 +0,0 @@
# HG changeset patch
# User Matthew Daley <mattjd@gmail.com>
# Date 1352715420 0
# Node ID 279bbf2a0b485fce18af26473eca5e60d794c17b
# Parent fdb69dd527cd01a46f87efb380050559dcf12d37
x86/mm x86 shadow: Fix typo in sh_invlpg sl3 page presence check
Signed-off-by: Matthew Daley <mattjd@gmail.com>
Acked-by: Tim Deegan <tim@xen.org>
Committed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3665,7 +3665,7 @@ sh_invlpg(struct vcpu *v, unsigned long
perfc_incr(shadow_invlpg_fault);
return 0;
}
- if ( (!shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) )
+ if ( !(shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) )
return 0;
}
#else /* SHADOW_PAGING_LEVELS == 3 */

View File

@ -1,39 +0,0 @@
# HG changeset patch
# User Matthew Daley <mattjd@gmail.com>
# Date 1352802490 -3600
# Node ID 56400658f0962099988678487e525d12f869a96a
# Parent a3cde70320ada4a5424c37f65b8fe3753fc95205
fix xenctl_cpumap_to_cpumask() buffer size check
xenctl_cpumap_to_cpumask incorrectly uses sizeof when checking whether
bits should be masked off from the input cpumap bitmap or not.
Fix by using the correct cpumask buffer size in place of sizeof.
Signed-off-by: Matthew Daley <mattjd@gmail.com>
Compare against copy_bytes instead, and use equality rather than less-
or-equal.
Further, this issue (introduced with c/s 23991:a7ccbc79fc17) is not
security relevant (i.e. the bug could not cause memory corruption):
_xmalloc() never returns chunks of data smaller than the size of a
pointer, i.e. even if sizeof(void*) > guest_bytes > copy_bytes, the
piece of memory erroneously written to would still be inside the
allocation done at the top of the function.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -78,7 +78,7 @@ int xenctl_cpumap_to_cpumask(
{
if ( copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes) )
err = -EFAULT;
- if ( (xenctl_cpumap->nr_cpus & 7) && (guest_bytes <= sizeof(bytemap)) )
+ if ( (xenctl_cpumap->nr_cpus & 7) && (guest_bytes == copy_bytes) )
bytemap[guest_bytes-1] &= ~(0xff << (xenctl_cpumap->nr_cpus & 7));
}

View File

@ -1,44 +0,0 @@
References: CVE-2012-4535 XSA-20 bnc#786516
# HG changeset patch
# User Ian Jackson <Ian.Jackson@eu.citrix.com>
# Date 1352892634 0
# Node ID bf58b94b3cef4db8d9ad9c8686bf10910ccc0644
# Parent 3186c04af5829a242059ffe4f6427853b7bfc408
VCPU/timers: Prevent overflow in calculations, leading to DoS vulnerability
The timer action for a vcpu periodic timer is to calculate the next
expiry time, and to reinsert itself into the timer queue. If the
deadline ends up in the past, Xen never leaves __do_softirq(). The
affected PCPU will stay in an infinite loop until Xen is killed by the
watchdog (if enabled).
This is a security problem, XSA-20 / CVE-2012-4535.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -882,6 +882,9 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
if ( set.period_ns < MILLISECS(1) )
return -EINVAL;
+ if ( set.period_ns > STIME_DELTA_MAX )
+ return -EINVAL;
+
v->periodic_period = set.period_ns;
vcpu_force_reschedule(v);
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -55,6 +55,8 @@ struct tm gmtime(unsigned long t);
#define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL))
#define MICROSECS(_us) ((s_time_t)((_us) * 1000ULL))
#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
+/* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */
+#define STIME_DELTA_MAX ((s_time_t)((uint64_t)~0ull>>2))
extern void update_vcpu_system_time(struct vcpu *v);
extern void update_domain_wallclock_time(struct domain *d);

View File

@ -1,47 +0,0 @@
References: CVE-2012-4537 XSA-22 bnc#786517
# HG changeset patch
# User Ian Jackson <Ian.Jackson@eu.citrix.com>
# Date 1352892962 0
# Node ID 6b6a4007a6091610a29b71cc32908c74113b852b
# Parent bf58b94b3cef4db8d9ad9c8686bf10910ccc0644
x86/physmap: Prevent incorrect updates of m2p mappings
In certain conditions, such as low memory, set_p2m_entry() can fail.
Currently, the p2m and m2p tables will get out of sync because we still
update the m2p table after the p2m update has failed.
If that happens, subsequent guest-invoked memory operations can cause
BUG()s and ASSERT()s to kill Xen.
This is fixed by only updating the m2p table iff the p2m was
successfully updated.
This is a security problem, XSA-22 / CVE-2012-4537.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -654,7 +654,10 @@ guest_physmap_add_entry(struct domain *d
if ( mfn_valid(_mfn(mfn)) )
{
if ( !set_p2m_entry(p2m, gfn, _mfn(mfn), page_order, t, p2m->default_access) )
+ {
rc = -EINVAL;
+ goto out; /* Failed to update p2m, bail without updating m2p. */
+ }
if ( !p2m_is_grant(t) )
{
for ( i = 0; i < (1UL << page_order); i++ )
@@ -677,6 +680,7 @@ guest_physmap_add_entry(struct domain *d
}
}
+out:
p2m_unlock(p2m);
return rc;

View File

@ -1,39 +0,0 @@
References: CVE-2012-4538 XSA-23 bnc#786519
# HG changeset patch
# User Ian Jackson <Ian.Jackson@eu.citrix.com>
# Date 1352893341 0
# Node ID c7a01b6450e483ca839228bf1e1e44de692e3458
# Parent 6b6a4007a6091610a29b71cc32908c74113b852b
xen/mm/shadow: check toplevel pagetables are present before unhooking them.
If the guest has not fully populated its top-level PAE entries when it calls
HVMOP_pagetable_dying, the shadow code could try to unhook entries from
MFN 0. Add a check to avoid that case.
This issue was introduced by c/s 21239:b9d2db109cf5.
This is a security problem, XSA-23 / CVE-2012-4538.
Signed-off-by: Tim Deegan <tim@xen.org>
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4734,8 +4734,12 @@ static void sh_pagetable_dying(struct vc
unsigned long gfn;
mfn_t smfn, gmfn;
- if ( fast_path )
- smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i]));
+ if ( fast_path ) {
+ if ( pagetable_is_null(v->arch.shadow_table[i]) )
+ smfn = _mfn(INVALID_MFN);
+ else
+ smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i]));
+ }
else
{
/* retrieving the l2s */

View File

@ -1,33 +0,0 @@
References: CVE-2012-4539 XSA-24 bnc#786520
# HG changeset patch
# User Ian Jackson <Ian.Jackson@eu.citrix.com>
# Date 1352893537 0
# Node ID b64a7d868f06c730a444e990da1a4d816ce3f5dc
# Parent c7a01b6450e483ca839228bf1e1e44de692e3458
compat/gnttab: Prevent infinite loop in compat code
c/s 20281:95ea2052b41b, which introduces Grant Table version 2
hypercalls introduces a vulnerability whereby the compat hypercall
handler can fall into an infinite loop.
If the watchdog is enabled, Xen will die after the timeout.
This is a security problem, XSA-24 / CVE-2012-4539.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/common/compat/grant_table.c
+++ b/xen/common/compat/grant_table.c
@@ -318,6 +318,8 @@ int compat_grant_table_op(unsigned int c
#undef XLAT_gnttab_get_status_frames_HNDL_frame_list
if ( unlikely(__copy_to_guest(cmp_uop, &cmp.get_status, 1)) )
rc = -EFAULT;
+ else
+ i = 1;
}
break;
}

View File

@ -1,59 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1353398311 -3600
# Node ID ae6fb202b233af815466055d9f1a635802a50855
# Parent 02b9d9a25feecd76d56143afa9bbb853fd01f602
passthrough/PCI: replace improper uses of pci_find_next_cap()
Using pci_find_next_cap() without prior pci_find_cap_offset() is bogus
(and possibly wrong, given that the latter doesn't check the
PCI_STATUS_CAP_LIST flag, which so far was checked in an open-coded way
only for the non-bridge case).
Once at it, fold the two calls into one, as we need its result in any
case.
Question is whether, without any caller left, pci_find_next_cap()
should be purged as well.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -565,16 +565,13 @@ void pci_release_devices(struct domain *
int pdev_type(u16 seg, u8 bus, u8 devfn)
{
- u16 class_device;
- u16 status, creg;
- int pos;
+ u16 class_device, creg;
u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
+ int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE);
if ( class_device == PCI_CLASS_BRIDGE_PCI )
{
- pos = pci_find_next_cap(seg, bus, devfn,
- PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
if ( !pos )
return DEV_TYPE_LEGACY_PCI_BRIDGE;
creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS);
@@ -582,15 +579,7 @@ int pdev_type(u16 seg, u8 bus, u8 devfn)
DEV_TYPE_PCIe2PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE;
}
- status = pci_conf_read16(seg, bus, d, f, PCI_STATUS);
- if ( !(status & PCI_STATUS_CAP_LIST) )
- return DEV_TYPE_PCI;
-
- if ( pci_find_next_cap(seg, bus, devfn, PCI_CAPABILITY_LIST,
- PCI_CAP_ID_EXP) )
- return DEV_TYPE_PCIe_ENDPOINT;
-
- return DEV_TYPE_PCI;
+ return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
}
/*

View File

@ -1,34 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1353946839 -3600
# Node ID 16bf7f3069a77c8a15b785cfdb5f2f634661d7fc
# Parent 0049de3827bcfe3488dc6c7b7a60327a562aee5c
x86/time: fix scale_delta() inline assembly
The way it was coded, it clobbered %rdx without telling the compiler.
This generally didn't cause any problems except when there are two back
to back invocations (as in plt_overflow()), as in that case the
compiler may validly assume that it can re-use for the second instance
the value loaded into %rdx before the first one.
Once at it, also properly relax the second operand of "mul" (there's no
need for it to be in %rdx, or a register at all), and switch away from
using explicit register names in the instruction operands.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -142,8 +142,9 @@ static inline u64 scale_delta(u64 delta,
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
#else
asm (
- "mul %%rdx ; shrd $32,%%rdx,%%rax"
- : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
+ "mul %2 ; shrd $32,%1,%0"
+ : "=a" (product), "=d" (delta)
+ : "rm" (delta), "0" ((u64)scale->mul_frac) );
#endif
return product;

View File

@ -0,0 +1,28 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1354118456 -3600
# Node ID 836697b197462f89a4d296da9482d1719dcc0836
# Parent 1fce7522daa6bab9fce93b95adf592193c904097
IOMMU: imply "verbose" from "debug"
I think that generally enabling debugging code without also enabling
verbose output is rather pointless; if someone really wants this, they
can always pass e.g. "iommu=debug,no-verbose".
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -91,7 +91,11 @@ static void __init parse_iommu_param(cha
else if ( !strcmp(s, "intremap") )
iommu_intremap = val;
else if ( !strcmp(s, "debug") )
+ {
iommu_debug = val;
+ if ( val )
+ iommu_verbose = 1;
+ }
else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
amd_iommu_perdev_intremap = val;
else if ( !strcmp(s, "dom0-passthrough") )

View File

@ -0,0 +1,52 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1354697534 -3600
# Node ID 670b07e8d7382229639af0d1df30071e6c1ebb19
# Parent bc624b00d6d601f00a53c2f7502a82dcef60f882
IOMMU/ATS: fix maximum queue depth calculation
The capabilities register field is a 5-bit value, and the 5 bits all
being zero actually means 32 entries.
Under the assumption that amd_iommu_flush_iotlb() really just tried
to correct for the miscalculation above when adding 32 to the value,
that adjustment is also being removed.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by Xiantao Zhang <xiantao.zhang@intel.com>
Acked-by: Wei Huang <wei.huang2@amd.com>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -321,7 +321,7 @@ void amd_iommu_flush_iotlb(struct pci_de
req_id = get_dma_requestor_id(iommu->seg, bdf);
queueid = req_id;
- maxpend = (ats_pdev->ats_queue_depth + 32) & 0xff;
+ maxpend = ats_pdev->ats_queue_depth & 0xff;
/* send INVALIDATE_IOTLB_PAGES command */
spin_lock_irqsave(&iommu->lock, flags);
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -30,7 +30,7 @@ struct pci_ats_dev {
#define ATS_REG_CAP 4
#define ATS_REG_CTL 6
-#define ATS_QUEUE_DEPTH_MASK 0xF
+#define ATS_QUEUE_DEPTH_MASK 0x1f
#define ATS_ENABLE (1<<15)
extern struct list_head ats_devices;
--- a/xen/drivers/passthrough/x86/ats.c
+++ b/xen/drivers/passthrough/x86/ats.c
@@ -93,7 +93,8 @@ int enable_ats_device(int seg, int bus,
pdev->devfn = devfn;
value = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
PCI_FUNC(devfn), pos + ATS_REG_CAP);
- pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK;
+ pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?:
+ ATS_QUEUE_DEPTH_MASK + 1;
list_add(&pdev->list, &ats_devices);
}

View File

@ -0,0 +1,28 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354812866 0
# Node ID 312f0713dfc98635fd9ed4b42481581489faa28f
# Parent bfd8e96fa3f157630f9698401a1f040ca1776c8e
nested vmx: fix rflags status in virtual vmexit
As stated in SDM, all bits (except for those 1-reserved) in rflags
would be set to 0 in VM exit. Therefore we need to follow this logic
in virtual_vmexit.
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -990,7 +990,8 @@ static void virtual_vmexit(struct cpu_us
regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
- regs->eflags = __vmread(GUEST_RFLAGS);
+ /* VM exit clears all bits except bit 1 */
+ regs->eflags = 0x2;
/* updating host cr0 to sync TS bit */
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);

View File

@ -0,0 +1,46 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354812981 0
# Node ID a09150b57ace2fa786dcaefa958f0b197b1b6d4c
# Parent 312f0713dfc98635fd9ed4b42481581489faa28f
nested vmx: fix handling of RDTSC
If L0 is to handle the TSC access, then we need to update guest EIP by
calling update_guest_eip().
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1613,7 +1613,7 @@ static int get_instruction_length(void)
return len;
}
-static void update_guest_eip(void)
+void update_guest_eip(void)
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
unsigned long x;
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1558,6 +1558,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
regs->eax = (uint32_t)tsc;
regs->edx = (uint32_t)(tsc >> 32);
+ update_guest_eip();
return 1;
}
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -396,6 +396,8 @@ void ept_p2m_init(struct p2m_domain *p2m
void ept_walk_table(struct domain *d, unsigned long gfn);
void setup_ept_dump(void);
+void update_guest_eip(void);
+
/* EPT violation qualifications definitions */
#define _EPT_READ_VIOLATION 0
#define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION)

27
26254-VMX-nested-dr.patch Normal file
View File

@ -0,0 +1,27 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354813009 0
# Node ID e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
# Parent a09150b57ace2fa786dcaefa958f0b197b1b6d4c
nested vmx: fix DR access VM exit
For DR register, we use lazy restore mechanism when access
it. Therefore when receiving such VM exit, L0 should be responsible to
switch to the right DR values, then inject to L1 hypervisor.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1585,7 +1585,8 @@ int nvmx_n2_vmexit_handler(struct cpu_us
break;
case EXIT_REASON_DR_ACCESS:
ctrl = __n2_exec_control(v);
- if ( ctrl & CPU_BASED_MOV_DR_EXITING )
+ if ( (ctrl & CPU_BASED_MOV_DR_EXITING) &&
+ v->arch.hvm_vcpu.flag_dr_dirty )
nvcpu->nv_vmexit_pending = 1;
break;
case EXIT_REASON_INVLPG:

View File

@ -0,0 +1,30 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354813046 0
# Node ID 1ed1507fa0407f1da715d04fe1b510e81ca4fb31
# Parent e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
nested vmx: enable IA32E mode while do VM entry
Some VMMs may check the platform capability to judge whether long
mode guest is supported. Therefore we need to expose this bit to
guest VMM.
Xen on Xen works fine in current solution because Xen doesn't
check this capability but directly set it in VMCS if guest
supports long mode.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1351,7 +1351,7 @@ int nvmx_msr_read_intercept(unsigned int
case MSR_IA32_VMX_ENTRY_CTLS:
/* bit 0-8, and 12 must be 1 (refer G5 of SDM) */
data = 0x11ff;
- data = (data << 32) | data;
+ data = ((data | VM_ENTRY_IA32E_MODE) << 32) | data;
break;
case IA32_FEATURE_CONTROL_MSR:

View File

@ -0,0 +1,45 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354813139 0
# Node ID 90831c29bfde6aac013b7e5ec98934a4953c31c9
# Parent 25dd352265ca23750f1a1a983124b36f518c4384
nested vmx: fix interrupt delivery to L2 guest
While delivering interrupt into L2 guest, L0 hypervisor need to check
whether L1 hypervisor wants to own the interrupt, if not, directly
inject the interrupt into L2 guest.
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -163,7 +163,7 @@ enum hvm_intblk nvmx_intr_blocked(struct
static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
{
- u32 exit_ctrl;
+ u32 ctrl;
if ( nvmx_intr_blocked(v) != hvm_intblk_none )
{
@@ -176,11 +176,14 @@ static int nvmx_intr_intercept(struct vc
if ( intack.source == hvm_intsrc_pic ||
intack.source == hvm_intsrc_lapic )
{
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+ if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
+ return 0;
+
vmx_inject_extint(intack.vector);
- exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
- VM_EXIT_CONTROLS);
- if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
+ if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
{
/* for now, duplicate the ack path in vmx_intr_assist */
hvm_vcpu_ack_pending_irq(v, intack);

View File

@ -0,0 +1,62 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1354884272 -3600
# Node ID b62bd62b26836fafe19cf41fec194bcf33e2ead6
# Parent cb542e58da25211843eb79998ea8568ebe9c8056
x86/EFI: add code interfacing with the secure boot shim
... to validate the kernel image (which is required to be in PE
format, as is e.g. the case for the Linux bzImage when built with
CONFIG_EFI_STUB).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -24,6 +24,18 @@
#include <asm/msr.h>
#include <asm/processor.h>
+#define SHIM_LOCK_PROTOCOL_GUID \
+ { 0x605dab50, 0xe046, 0x4300, {0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23} }
+
+typedef EFI_STATUS
+(/* _not_ EFIAPI */ *EFI_SHIM_LOCK_VERIFY) (
+ IN VOID *Buffer,
+ IN UINT32 Size);
+
+typedef struct {
+ EFI_SHIM_LOCK_VERIFY Verify;
+} EFI_SHIM_LOCK_PROTOCOL;
+
extern char start[];
extern u32 cpuid_ext_features;
@@ -628,12 +640,14 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL;
static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL;
+ static EFI_GUID __initdata shim_lock_guid = SHIM_LOCK_PROTOCOL_GUID;
EFI_LOADED_IMAGE *loaded_image;
EFI_STATUS status;
unsigned int i, argc;
CHAR16 **argv, *file_name, *cfg_file_name = NULL;
UINTN cols, rows, depth, size, map_key, info_size, gop_mode = ~0;
EFI_HANDLE *handles = NULL;
+ EFI_SHIM_LOCK_PROTOCOL *shim_lock;
EFI_GRAPHICS_OUTPUT_PROTOCOL *gop = NULL;
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
@@ -823,6 +837,11 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
read_file(dir_handle, s2w(&name), &kernel);
efi_bs->FreePool(name.w);
+ if ( !EFI_ERROR(efi_bs->LocateProtocol(&shim_lock_guid, NULL,
+ (void **)&shim_lock)) &&
+ shim_lock->Verify(kernel.ptr, kernel.size) != EFI_SUCCESS )
+ blexit(L"Dom0 kernel image could not be verified\r\n");
+
name.s = get_value(&cfg, section.s, "ramdisk");
if ( name.s )
{

View File

@ -0,0 +1,70 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1355134467 -3600
# Node ID 8d209624ea83b272e1ebd713a928c38d4782f4f1
# Parent f96a0cda12160f497981a37f6922a1ed7db9a462
scheduler: fix rate limit range checking
For one, neither of the two checks permitted for the documented value
of zero (disabling the functionality altogether).
Second, the range checking of the command line parameter was done by
the credit scheduler's initialization code, despite it being a generic
scheduler option.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -846,8 +846,9 @@ csched_sys_cntl(const struct scheduler *
case XEN_SYSCTL_SCHEDOP_putinfo:
if (params->tslice_ms > XEN_SYSCTL_CSCHED_TSLICE_MAX
|| params->tslice_ms < XEN_SYSCTL_CSCHED_TSLICE_MIN
- || params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
- || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN
+ || (params->ratelimit_us
+ && (params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
+ || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN))
|| MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) )
goto out;
prv->tslice_ms = params->tslice_ms;
@@ -1607,17 +1608,6 @@ csched_init(struct scheduler *ops)
sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS;
}
- if ( sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
- || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN )
- {
- printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
- " Resetting to default %u\n",
- XEN_SYSCTL_SCHED_RATELIMIT_MIN,
- XEN_SYSCTL_SCHED_RATELIMIT_MAX,
- SCHED_DEFAULT_RATELIMIT_US);
- sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
- }
-
prv->tslice_ms = sched_credit_tslice_ms;
prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE;
if ( prv->tslice_ms < prv->ticks_per_tslice )
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1322,6 +1322,18 @@ void __init scheduler_init(void)
if ( SCHED_OP(&ops, init) )
panic("scheduler returned error on init\n");
+ if ( sched_ratelimit_us &&
+ (sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
+ || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN) )
+ {
+ printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
+ " Resetting to default %u\n",
+ XEN_SYSCTL_SCHED_RATELIMIT_MIN,
+ XEN_SYSCTL_SCHED_RATELIMIT_MAX,
+ SCHED_DEFAULT_RATELIMIT_US);
+ sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
+ }
+
idle_domain = domain_create(DOMID_IDLE, 0, 0);
BUG_ON(IS_ERR(idle_domain));
idle_domain->vcpu = idle_vcpu;

View File

@ -0,0 +1,71 @@
# HG changeset patch
# User Andre Przywara <osp@andrep.de>
# Date 1355913729 -3600
# Node ID 5fb0b8b838dab0b331abfa675fd2b2214ac90760
# Parent b04de677de31f26ba4b8f2f382ca4dfffcff9a79
x86, amd: Disable way access filter on Piledriver CPUs
The Way Access Filter in recent AMD CPUs may hurt the performance of
some workloads, caused by aliasing issues in the L1 cache.
This patch disables it on the affected CPUs.
The issue is similar to that one of last year:
http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html
This new patch does not replace the old one, we just need another
quirk for newer CPUs.
The performance penalty without the patch depends on the
circumstances, but is a bit less than the last year's 3%.
The workloads affected would be those that access code from the same
physical page under different virtual addresses, so different
processes using the same libraries with ASLR or multiple instances of
PIE-binaries. The code needs to be accessed simultaneously from both
cores of the same compute unit.
More details can be found here:
http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf
CPUs affected are anything with the core known as Piledriver.
That includes the new parts of the AMD A-Series (aka Trinity) and the
just released new CPUs of the FX-Series (aka Vishera).
The model numbering is a bit odd here: FX CPUs have model 2,
A-Series has model 10h, with possible extensions to 1Fh. Hence the
range of model ids.
Signed-off-by: Andre Przywara <osp@andrep.de>
Add and use MSR_AMD64_IC_CFG. Update the value whenever it is found to
not have all bits set, rather than just when it's zero.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -493,6 +493,14 @@ static void __devinit init_amd(struct cp
}
}
+ /*
+ * The way access filter has a performance penalty on some workloads.
+ * Disable it on the affected CPUs.
+ */
+ if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 &&
+ !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e)
+ wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e);
+
amd_get_topology(c);
/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -206,6 +206,7 @@
/* AMD64 MSRs */
#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_IC_CFG 0xc0011021
#define MSR_AMD64_DC_CFG 0xc0011022
#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46

View File

@ -0,0 +1,45 @@
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@citrix.com>
# Date 1357290407 -3600
# Node ID 8fd5635f451b073ddc99e928c975e8a7743d1321
# Parent c4114a042410d3bdec3a77c30b2e85366d7fbe1d
passthrough/domctl: use correct struct in union
This appears to be a copy paste error from c/s 23861:ec7c81fbe0de.
It is safe, functionally speaking, as both the xen_domctl_assign_device
and xen_domctl_get_device_group structure start with a 'uint32_t
machine_sbdf'. We should however use the correct union structure.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -592,7 +592,7 @@ int iommu_do_domctl(
if ( ret )
break;
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
@@ -621,7 +621,7 @@ int iommu_do_domctl(
if ( ret )
goto assign_device_out;
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
@@ -649,7 +649,7 @@ int iommu_do_domctl(
if ( ret )
goto deassign_device_out;
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;

View File

@ -0,0 +1,267 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559364 -3600
# Node ID 62dd78a4e3fc9d190840549f13b4d613f2d19c41
# Parent 64b36dde26bc3c4fc80312cc9eeb0e511f0cf94b
IOMMU: adjust (re)assign operation parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -328,34 +328,31 @@ void amd_iommu_disable_domain_device(str
disable_ats_device(iommu->seg, bus, devfn);
}
-static int reassign_device( struct domain *source, struct domain *target,
- u16 seg, u8 bus, u8 devfn)
+static int reassign_device(struct domain *source, struct domain *target,
+ u8 devfn, struct pci_dev *pdev)
{
- struct pci_dev *pdev;
struct amd_iommu *iommu;
int bdf;
struct hvm_iommu *t = domain_hvm_iommu(target);
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
- if ( !pdev )
- return -ENODEV;
-
- bdf = PCI_BDF2(bus, devfn);
- iommu = find_iommu_for_device(seg, bdf);
+ bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+ iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%x02.%x cannot be assigned to dom%d\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
target->domain_id);
return -ENODEV;
}
amd_iommu_disable_domain_device(source, iommu, bdf);
- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
+ if ( devfn == pdev->devfn )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }
/* IO page tables might be destroyed after pci-detach the last device
* In this case, we have to re-allocate root table for next pci-attach.*/
@@ -364,17 +361,18 @@ static int reassign_device( struct domai
amd_iommu_setup_domain_device(target, iommu, bdf);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
return 0;
}
-static int amd_iommu_assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+static int amd_iommu_assign_device(struct domain *d, u8 devfn,
+ struct pci_dev *pdev)
{
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
- int bdf = (bus << 8) | devfn;
- int req_id = get_dma_requestor_id(seg, bdf);
+ struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
+ int bdf = PCI_BDF2(pdev->bus, devfn);
+ int req_id = get_dma_requestor_id(pdev->seg, bdf);
if ( ivrs_mappings[req_id].unity_map_enable )
{
@@ -386,7 +384,7 @@ static int amd_iommu_assign_device(struc
ivrs_mappings[req_id].read_permission);
}
- return reassign_device(dom0, d, seg, bus, devfn);
+ return reassign_device(dom0, d, devfn, pdev);
}
static void deallocate_next_page_table(struct page_info* pg, int level)
@@ -451,12 +449,6 @@ static void amd_iommu_domain_destroy(str
amd_iommu_flush_all_pages(d);
}
-static int amd_iommu_return_device(
- struct domain *s, struct domain *t, u16 seg, u8 bus, u8 devfn)
-{
- return reassign_device(s, t, seg, bus, devfn);
-}
-
static int amd_iommu_add_device(struct pci_dev *pdev)
{
struct amd_iommu *iommu;
@@ -596,7 +588,7 @@ const struct iommu_ops amd_iommu_ops = {
.teardown = amd_iommu_domain_destroy,
.map_page = amd_iommu_map_page,
.unmap_page = amd_iommu_unmap_page,
- .reassign_device = amd_iommu_return_device,
+ .reassign_device = reassign_device,
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -232,11 +232,16 @@ static int assign_device(struct domain *
return -EXDEV;
spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(seg, bus, devfn);
- if ( pdev )
- pdev->fault.count = 0;
+ pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+ if ( !pdev )
+ {
+ rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
+ goto done;
+ }
+
+ pdev->fault.count = 0;
- if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
+ if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
goto done;
if ( has_arch_pdevs(d) && !need_iommu(d) )
@@ -367,18 +372,11 @@ int deassign_device(struct domain *d, u1
return -EINVAL;
ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(seg, bus, devfn);
+ pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
if ( !pdev )
return -ENODEV;
- if ( pdev->domain != d )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "d%d: deassign a device not owned\n", d->domain_id);
- return -EINVAL;
- }
-
- ret = hd->platform_ops->reassign_device(d, dom0, seg, bus, devfn);
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1689,17 +1689,10 @@ out:
static int reassign_device_ownership(
struct domain *source,
struct domain *target,
- u16 seg, u8 bus, u8 devfn)
+ u8 devfn, struct pci_dev *pdev)
{
- struct pci_dev *pdev;
int ret;
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
-
- if (!pdev)
- return -ENODEV;
-
/*
* Devices assigned to untrusted domains (here assumed to be any domU)
* can attempt to send arbitrary LAPIC/MSI messages. We are unprotected
@@ -1708,16 +1701,19 @@ static int reassign_device_ownership(
if ( (target != dom0) && !iommu_intremap )
untrusted_msi = 1;
- ret = domain_context_unmap(source, seg, bus, devfn);
+ ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn);
if ( ret )
return ret;
- ret = domain_context_mapping(target, seg, bus, devfn);
+ ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn);
if ( ret )
return ret;
- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
+ if ( devfn == pdev->devfn )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }
return ret;
}
@@ -2207,36 +2203,26 @@ int __init intel_vtd_setup(void)
}
static int intel_iommu_assign_device(
- struct domain *d, u16 seg, u8 bus, u8 devfn)
+ struct domain *d, u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
int ret = 0, i;
- struct pci_dev *pdev;
- u16 bdf;
+ u16 bdf, seg;
+ u8 bus;
if ( list_empty(&acpi_drhd_units) )
return -ENODEV;
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(seg, bus, devfn);
- if (!pdev)
- return -ENODEV;
-
- if (pdev->domain != dom0)
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "IOMMU: assign a assigned device\n");
- return -EBUSY;
- }
-
- ret = reassign_device_ownership(dom0, d, seg, bus, devfn);
+ ret = reassign_device_ownership(dom0, d, devfn, pdev);
if ( ret )
goto done;
/* FIXME: Because USB RMRR conflicts with guest bios region,
* ignore USB RMRR temporarily.
*/
- if ( is_usb_device(seg, bus, devfn) )
+ seg = pdev->seg;
+ bus = pdev->bus;
+ if ( is_usb_device(seg, bus, pdev->devfn) )
{
ret = 0;
goto done;
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -123,13 +123,13 @@ struct iommu_ops {
int (*add_device)(struct pci_dev *pdev);
int (*enable_device)(struct pci_dev *pdev);
int (*remove_device)(struct pci_dev *pdev);
- int (*assign_device)(struct domain *d, u16 seg, u8 bus, u8 devfn);
+ int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags);
int (*unmap_page)(struct domain *d, unsigned long gfn);
int (*reassign_device)(struct domain *s, struct domain *t,
- u16 seg, u8 bus, u8 devfn);
+ u8 devfn, struct pci_dev *);
int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);

View File

@ -0,0 +1,354 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559482 -3600
# Node ID 75cc4943b1ff509c4074800a23ff51d773233b8a
# Parent 62dd78a4e3fc9d190840549f13b4d613f2d19c41
IOMMU: adjust add/remove operation parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -83,14 +83,14 @@ static void disable_translation(u32 *dte
}
static void amd_iommu_setup_domain_device(
- struct domain *domain, struct amd_iommu *iommu, int bdf)
+ struct domain *domain, struct amd_iommu *iommu,
+ u8 devfn, struct pci_dev *pdev)
{
void *dte;
unsigned long flags;
int req_id, valid = 1;
int dte_i = 0;
- u8 bus = PCI_BUS(bdf);
- u8 devfn = PCI_DEVFN2(bdf);
+ u8 bus = pdev->bus;
struct hvm_iommu *hd = domain_hvm_iommu(domain);
@@ -103,7 +103,7 @@ static void amd_iommu_setup_domain_devic
dte_i = 1;
/* get device-table entry */
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
spin_lock_irqsave(&iommu->lock, flags);
@@ -115,7 +115,7 @@ static void amd_iommu_setup_domain_devic
(u32 *)dte, page_to_maddr(hd->root_table), hd->domain_id,
hd->paging_mode, valid);
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, dte_i);
@@ -132,32 +132,31 @@ static void amd_iommu_setup_domain_devic
ASSERT(spin_is_locked(&pcidevs_lock));
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
- !pci_ats_enabled(iommu->seg, bus, devfn) )
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
{
- struct pci_dev *pdev;
+ if ( devfn == pdev->devfn )
+ enable_ats_device(iommu->seg, bus, devfn);
- enable_ats_device(iommu->seg, bus, devfn);
-
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(iommu->seg, bus, devfn);
-
- ASSERT( pdev != NULL );
amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
}
-static void __init amd_iommu_setup_dom0_device(struct pci_dev *pdev)
+static int __init amd_iommu_setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
int bdf = PCI_BDF2(pdev->bus, pdev->devfn);
struct amd_iommu *iommu = find_iommu_for_device(pdev->seg, bdf);
- if ( likely(iommu != NULL) )
- amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
- else
+ if ( unlikely(!iommu) )
+ {
AMD_IOMMU_DEBUG("No iommu for device %04x:%02x:%02x.%u\n",
pdev->seg, pdev->bus,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return -ENODEV;
+ }
+
+ amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
+ return 0;
}
int __init amd_iov_detect(void)
@@ -291,16 +290,16 @@ static void __init amd_iommu_dom0_init(s
}
void amd_iommu_disable_domain_device(struct domain *domain,
- struct amd_iommu *iommu, int bdf)
+ struct amd_iommu *iommu,
+ u8 devfn, struct pci_dev *pdev)
{
void *dte;
unsigned long flags;
int req_id;
- u8 bus = PCI_BUS(bdf);
- u8 devfn = PCI_DEVFN2(bdf);
+ u8 bus = pdev->bus;
BUG_ON ( iommu->dev_table.buffer == NULL );
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
spin_lock_irqsave(&iommu->lock, flags);
@@ -308,7 +307,7 @@ void amd_iommu_disable_domain_device(str
{
disable_translation((u32 *)dte);
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, 0);
@@ -323,7 +322,8 @@ void amd_iommu_disable_domain_device(str
ASSERT(spin_is_locked(&pcidevs_lock));
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( devfn == pdev->devfn &&
+ pci_ats_device(iommu->seg, bus, devfn) &&
pci_ats_enabled(iommu->seg, bus, devfn) )
disable_ats_device(iommu->seg, bus, devfn);
}
@@ -346,7 +346,7 @@ static int reassign_device(struct domain
return -ENODEV;
}
- amd_iommu_disable_domain_device(source, iommu, bdf);
+ amd_iommu_disable_domain_device(source, iommu, devfn, pdev);
if ( devfn == pdev->devfn )
{
@@ -359,7 +359,7 @@ static int reassign_device(struct domain
if ( t->root_table == NULL )
allocate_domain_resources(t);
- amd_iommu_setup_domain_device(target, iommu, bdf);
+ amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
@@ -449,7 +449,7 @@ static void amd_iommu_domain_destroy(str
amd_iommu_flush_all_pages(d);
}
-static int amd_iommu_add_device(struct pci_dev *pdev)
+static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
{
struct amd_iommu *iommu;
u16 bdf;
@@ -462,16 +462,16 @@ static int amd_iommu_add_device(struct p
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%02x.%u cannot be assigned to dom%d\n",
- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain->domain_id);
return -ENODEV;
}
- amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
+ amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
return 0;
}
-static int amd_iommu_remove_device(struct pci_dev *pdev)
+static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
struct amd_iommu *iommu;
u16 bdf;
@@ -484,12 +484,12 @@ static int amd_iommu_remove_device(struc
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%02x.%u cannot be removed from dom%d\n",
- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain->domain_id);
return -ENODEV;
}
- amd_iommu_disable_domain_device(pdev->domain, iommu, bdf);
+ amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev);
return 0;
}
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -167,7 +167,7 @@ int iommu_add_device(struct pci_dev *pde
if ( !iommu_enabled || !hd->platform_ops )
return 0;
- return hd->platform_ops->add_device(pdev);
+ return hd->platform_ops->add_device(pdev->devfn, pdev);
}
int iommu_enable_device(struct pci_dev *pdev)
@@ -197,7 +197,7 @@ int iommu_remove_device(struct pci_dev *
if ( !iommu_enabled || !hd->platform_ops )
return 0;
- return hd->platform_ops->remove_device(pdev);
+ return hd->platform_ops->remove_device(pdev->devfn, pdev);
}
/*
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -715,7 +715,7 @@ int __init scan_pci_devices(void)
struct setup_dom0 {
struct domain *d;
- void (*handler)(struct pci_dev *);
+ int (*handler)(u8 devfn, struct pci_dev *);
};
static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
@@ -734,7 +734,7 @@ static int __init _setup_dom0_pci_device
pdev->domain = ctxt->d;
list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
- ctxt->handler(pdev);
+ ctxt->handler(devfn, pdev);
}
}
@@ -742,7 +742,7 @@ static int __init _setup_dom0_pci_device
}
void __init setup_dom0_pci_devices(
- struct domain *d, void (*handler)(struct pci_dev *))
+ struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
{
struct setup_dom0 ctxt = { .d = d, .handler = handler };
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -52,7 +52,7 @@ int nr_iommus;
static struct tasklet vtd_fault_tasklet;
-static void setup_dom0_device(struct pci_dev *);
+static int setup_dom0_device(u8 devfn, struct pci_dev *);
static void setup_dom0_rmrr(struct domain *d);
static int domain_iommu_domid(struct domain *d,
@@ -1904,7 +1904,7 @@ static int rmrr_identity_mapping(struct
return 0;
}
-static int intel_iommu_add_device(struct pci_dev *pdev)
+static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
u16 bdf;
@@ -1915,8 +1915,7 @@ static int intel_iommu_add_device(struct
if ( !pdev->domain )
return -EINVAL;
- ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus,
- pdev->devfn);
+ ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
@@ -1928,7 +1927,7 @@ static int intel_iommu_add_device(struct
{
if ( rmrr->segment == pdev->seg &&
PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == pdev->devfn )
+ PCI_DEVFN2(bdf) == devfn )
{
ret = rmrr_identity_mapping(pdev->domain, rmrr);
if ( ret )
@@ -1953,7 +1952,7 @@ static int intel_iommu_enable_device(str
return ret >= 0 ? 0 : ret;
}
-static int intel_iommu_remove_device(struct pci_dev *pdev)
+static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
u16 bdf;
@@ -1971,19 +1970,22 @@ static int intel_iommu_remove_device(str
{
if ( rmrr->segment == pdev->seg &&
PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == pdev->devfn )
+ PCI_DEVFN2(bdf) == devfn )
return 0;
}
}
- return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus,
- pdev->devfn);
+ return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn);
}
-static void __init setup_dom0_device(struct pci_dev *pdev)
+static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
- domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, pdev->devfn);
- pci_vtd_quirk(pdev);
+ int err;
+
+ err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ if ( !err && devfn == pdev->devfn )
+ pci_vtd_quirk(pdev);
+ return err;
}
void clear_fault_bits(struct iommu *iommu)
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -120,9 +120,9 @@ bool_t pt_irq_need_timer(uint32_t flags)
struct iommu_ops {
int (*init)(struct domain *d);
void (*dom0_init)(struct domain *d);
- int (*add_device)(struct pci_dev *pdev);
+ int (*add_device)(u8 devfn, struct pci_dev *);
int (*enable_device)(struct pci_dev *pdev);
- int (*remove_device)(struct pci_dev *pdev);
+ int (*remove_device)(u8 devfn, struct pci_dev *);
int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -100,7 +100,8 @@ struct pci_dev *pci_lock_pdev(int seg, i
struct pci_dev *pci_lock_domain_pdev(
struct domain *, int seg, int bus, int devfn);
-void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *));
+void setup_dom0_pci_devices(struct domain *,
+ int (*)(u8 devfn, struct pci_dev *));
void pci_release_devices(struct domain *d);
int pci_add_segment(u16 seg);
const unsigned long *pci_get_ro_map(u16 seg);

View File

@ -0,0 +1,187 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559549 -3600
# Node ID afb598bd0f5436bea15b7ef842e8ad5c6adefa1a
# Parent 75cc4943b1ff509c4074800a23ff51d773233b8a
VT-d: adjust context map/unmap parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -95,7 +95,7 @@ void free_pgtable_maddr(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
- u8 bus, u8 devfn);
+ u8 bus, u8 devfn, const struct pci_dev *);
int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1308,7 +1308,7 @@ static void __init intel_iommu_dom0_init
int domain_context_mapping_one(
struct domain *domain,
struct iommu *iommu,
- u8 bus, u8 devfn)
+ u8 bus, u8 devfn, const struct pci_dev *pdev)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct context_entry *context, *context_entries;
@@ -1325,11 +1325,9 @@ int domain_context_mapping_one(
if ( context_present(*context) )
{
int res = 0;
- struct pci_dev *pdev = NULL;
- /* First try to get domain ownership from device structure. If that's
+ /* Try to get domain ownership from device structure. If that's
* not available, try to read it from the context itself. */
- pdev = pci_get_pdev(seg, bus, devfn);
if ( pdev )
{
if ( pdev->domain != domain )
@@ -1448,13 +1446,12 @@ int domain_context_mapping_one(
}
static int domain_context_mapping(
- struct domain *domain, u16 seg, u8 bus, u8 devfn)
+ struct domain *domain, u8 devfn, const struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
int ret = 0;
u32 type;
- u8 secbus;
- struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn);
+ u8 seg = pdev->seg, bus = pdev->bus, secbus;
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
@@ -1475,8 +1472,9 @@ static int domain_context_mapping(
dprintk(VTDPREFIX, "d%d:PCIe: map %04x:%02x:%02x.%u\n",
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- if ( !ret && ats_device(pdev, drhd) > 0 )
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pdev);
+ if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
enable_ats_device(seg, bus, devfn);
break;
@@ -1487,14 +1485,16 @@ static int domain_context_mapping(
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pdev);
if ( ret )
break;
if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 )
break;
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pci_get_pdev(seg, bus, devfn));
/*
* Devices behind PCIe-to-PCI/PCIx bridge may generate different
@@ -1503,7 +1503,8 @@ static int domain_context_mapping(
*/
if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
(secbus != pdev->bus || pdev->devfn != 0) )
- ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
+ ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0,
+ pci_get_pdev(seg, secbus, 0));
break;
@@ -1576,18 +1577,15 @@ int domain_context_unmap_one(
}
static int domain_context_unmap(
- struct domain *domain, u16 seg, u8 bus, u8 devfn)
+ struct domain *domain, u8 devfn, const struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int ret = 0;
u32 type;
- u8 tmp_bus, tmp_devfn, secbus;
- struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn);
+ u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
int found = 0;
- BUG_ON(!pdev);
-
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
return -ENODEV;
@@ -1607,7 +1605,7 @@ static int domain_context_unmap(
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, iommu, bus, devfn);
- if ( !ret && ats_device(pdev, drhd) > 0 )
+ if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
disable_ats_device(seg, bus, devfn);
break;
@@ -1701,11 +1699,11 @@ static int reassign_device_ownership(
if ( (target != dom0) && !iommu_intremap )
untrusted_msi = 1;
- ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_unmap(source, devfn, pdev);
if ( ret )
return ret;
- ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_mapping(target, devfn, pdev);
if ( ret )
return ret;
@@ -1915,7 +1913,7 @@ static int intel_iommu_add_device(u8 dev
if ( !pdev->domain )
return -EINVAL;
- ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_mapping(pdev->domain, devfn, pdev);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
@@ -1975,14 +1973,14 @@ static int intel_iommu_remove_device(u8
}
}
- return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn);
+ return domain_context_unmap(pdev->domain, devfn, pdev);
}
static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
int err;
- err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ err = domain_context_mapping(pdev->domain, devfn, pdev);
if ( !err && devfn == pdev->devfn )
pci_vtd_quirk(pdev);
return err;
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -292,7 +292,7 @@ static void map_me_phantom_function(stru
/* map or unmap ME phantom function */
if ( map )
domain_context_mapping_one(domain, drhd->iommu, 0,
- PCI_DEVFN(dev, 7));
+ PCI_DEVFN(dev, 7), NULL);
else
domain_context_unmap_one(domain, drhd->iommu, 0,
PCI_DEVFN(dev, 7));

View File

@ -0,0 +1,84 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559599 -3600
# Node ID 2a2c63f641ee3bda4ad552eb0b3ea479d37590cc
# Parent afb598bd0f5436bea15b7ef842e8ad5c6adefa1a
AMD IOMMU: adjust flush function parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -287,12 +287,12 @@ void invalidate_iommu_all(struct amd_iom
send_iommu_command(iommu, cmd);
}
-void amd_iommu_flush_iotlb(struct pci_dev *pdev,
+void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
uint64_t gaddr, unsigned int order)
{
unsigned long flags;
struct amd_iommu *iommu;
- unsigned int bdf, req_id, queueid, maxpend;
+ unsigned int req_id, queueid, maxpend;
struct pci_ats_dev *ats_pdev;
if ( !ats_enabled )
@@ -305,8 +305,8 @@ void amd_iommu_flush_iotlb(struct pci_de
if ( !pci_ats_enabled(ats_pdev->seg, ats_pdev->bus, ats_pdev->devfn) )
return;
- bdf = PCI_BDF2(ats_pdev->bus, ats_pdev->devfn);
- iommu = find_iommu_for_device(ats_pdev->seg, bdf);
+ iommu = find_iommu_for_device(ats_pdev->seg,
+ PCI_BDF2(ats_pdev->bus, ats_pdev->devfn));
if ( !iommu )
{
@@ -319,7 +319,7 @@ void amd_iommu_flush_iotlb(struct pci_de
if ( !iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
return;
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(ats_pdev->bus, devfn));
queueid = req_id;
maxpend = ats_pdev->ats_queue_depth & 0xff;
@@ -339,7 +339,7 @@ static void amd_iommu_flush_all_iotlbs(s
return;
for_each_pdev( d, pdev )
- amd_iommu_flush_iotlb(pdev, gaddr, order);
+ amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
}
/* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -138,7 +138,7 @@ static void amd_iommu_setup_domain_devic
if ( devfn == pdev->devfn )
enable_ats_device(iommu->seg, bus, devfn);
- amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+ amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
}
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -78,8 +78,8 @@ void iommu_dte_set_guest_cr3(u32 *dte, u
void amd_iommu_flush_all_pages(struct domain *d);
void amd_iommu_flush_pages(struct domain *d, unsigned long gfn,
unsigned int order);
-void amd_iommu_flush_iotlb(struct pci_dev *pdev, uint64_t gaddr,
- unsigned int order);
+void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
+ uint64_t gaddr, unsigned int order);
void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);

222
26328-IOMMU-pdev-type.patch Normal file
View File

@ -0,0 +1,222 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559679 -3600
# Node ID 11fa145c880ee814aaf56a7f47f47ee3e5560c7c
# Parent 2a2c63f641ee3bda4ad552eb0b3ea479d37590cc
IOMMU/PCI: consolidate pdev_type() and cache its result for a given device
Add an "unknown" device types as well as one for PCI-to-PCIe bridges
(the latter of which other IOMMU code with or without this patch
doesn't appear to handle properly).
Make sure we don't mistake a device for which we can't access its
config space as a legacy PCI device (after all we in fact don't know
how to deal with such a device, and hence shouldn't try to).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -144,7 +144,7 @@ static struct pci_dev *alloc_pdev(struct
spin_lock_init(&pdev->msix_table_lock);
/* update bus2bridge */
- switch ( pdev_type(pseg->nr, bus, devfn) )
+ switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
{
u8 sec_bus, sub_bus;
@@ -184,7 +184,7 @@ static struct pci_dev *alloc_pdev(struct
static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
{
/* update bus2bridge */
- switch ( pdev_type(pseg->nr, pdev->bus, pdev->devfn) )
+ switch ( pdev->type )
{
u8 dev, func, sec_bus, sub_bus;
@@ -202,6 +202,9 @@ static void free_pdev(struct pci_seg *ps
pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
spin_unlock(&pseg->bus2bridge_lock);
break;
+
+ default:
+ break;
}
list_del(&pdev->alldevs_list);
@@ -563,20 +566,30 @@ void pci_release_devices(struct domain *
#define PCI_CLASS_BRIDGE_PCI 0x0604
-int pdev_type(u16 seg, u8 bus, u8 devfn)
+enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
{
u16 class_device, creg;
u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE);
- if ( class_device == PCI_CLASS_BRIDGE_PCI )
+ switch ( class_device )
{
+ case PCI_CLASS_BRIDGE_PCI:
if ( !pos )
return DEV_TYPE_LEGACY_PCI_BRIDGE;
creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS);
- return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ?
- DEV_TYPE_PCIe2PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE;
+ switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
+ {
+ case PCI_EXP_TYPE_PCI_BRIDGE:
+ return DEV_TYPE_PCIe2PCI_BRIDGE;
+ case PCI_EXP_TYPE_PCIE_BRIDGE:
+ return DEV_TYPE_PCI2PCIe_BRIDGE;
+ }
+ return DEV_TYPE_PCIe_BRIDGE;
+
+ case 0x0000: case 0xffff:
+ return DEV_TYPE_PCI_UNKNOWN;
}
return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -426,7 +426,6 @@ void io_apic_write_remap_rte(
static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
{
- int type;
u16 seg;
u8 bus, devfn, secbus;
int ret;
@@ -437,8 +436,7 @@ static void set_msi_source_id(struct pci
seg = pdev->seg;
bus = pdev->bus;
devfn = pdev->devfn;
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -470,7 +468,7 @@ static void set_msi_source_id(struct pci
default:
dprintk(XENLOG_WARNING VTDPREFIX,
"d%d: unknown(%u): %04x:%02x:%02x.%u\n",
- pdev->domain->domain_id, type,
+ pdev->domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
break;
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1450,7 +1450,6 @@ static int domain_context_mapping(
{
struct acpi_drhd_unit *drhd;
int ret = 0;
- u32 type;
u8 seg = pdev->seg, bus = pdev->bus, secbus;
drhd = acpi_find_matched_drhd_unit(pdev);
@@ -1459,8 +1458,7 @@ static int domain_context_mapping(
ASSERT(spin_is_locked(&pcidevs_lock));
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -1510,7 +1508,7 @@ static int domain_context_mapping(
default:
dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
- domain->domain_id, type,
+ domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -EINVAL;
break;
@@ -1582,7 +1580,6 @@ static int domain_context_unmap(
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int ret = 0;
- u32 type;
u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
int found = 0;
@@ -1591,8 +1588,7 @@ static int domain_context_unmap(
return -ENODEV;
iommu = drhd->iommu;
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -1639,7 +1635,7 @@ static int domain_context_unmap(
default:
dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
- domain->domain_id, type,
+ domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -EINVAL;
goto out;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -62,6 +62,17 @@ struct pci_dev {
const u16 seg;
const u8 bus;
const u8 devfn;
+
+ enum pdev_type {
+ DEV_TYPE_PCI_UNKNOWN,
+ DEV_TYPE_PCIe_ENDPOINT,
+ DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
+ DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge
+ DEV_TYPE_PCI2PCIe_BRIDGE, // PCI/PCIx-to-PCIe bridge
+ DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge
+ DEV_TYPE_PCI,
+ } type;
+
struct pci_dev_info info;
struct arch_pci_dev arch;
struct {
@@ -83,18 +94,10 @@ struct pci_dev {
extern spinlock_t pcidevs_lock;
-enum {
- DEV_TYPE_PCIe_ENDPOINT,
- DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
- DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge
- DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge
- DEV_TYPE_PCI,
-};
-
bool_t pci_known_segment(u16 seg);
int pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func);
int scan_pci_devices(void);
-int pdev_type(u16 seg, u8 bus, u8 devfn);
+enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
struct pci_dev *pci_lock_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_lock_domain_pdev(
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -371,6 +371,9 @@
#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */
+#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */
+#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */
+#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */
#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
#define PCI_EXP_DEVCAP 4 /* Device capabilities */

View File

@ -0,0 +1,365 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559742 -3600
# Node ID c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
# Parent 11fa145c880ee814aaf56a7f47f47ee3e5560c7c
IOMMU: add phantom function support
Apart from generating device context entries for the base function,
all phantom functions also need context entries to be generated for
them.
In order to distinguish different use cases, a variant of
pci_get_pdev() is being introduced that, even when passed a phantom
function number, would return the underlying actual device.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
return;
for_each_pdev( d, pdev )
- amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
+ {
+ u8 devfn = pdev->devfn;
+
+ do {
+ amd_iommu_flush_iotlb(devfn, pdev, gaddr, order);
+ devfn += pdev->phantom_stride;
+ } while ( devfn != pdev->devfn &&
+ PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+ }
}
/* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -692,7 +692,7 @@ void parse_ppr_log_entry(struct amd_iomm
devfn = PCI_DEVFN2(device_id);
spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(iommu->seg, bus, devfn);
+ pdev = pci_get_real_pdev(iommu->seg, bus, devfn);
spin_unlock(&pcidevs_lock);
if ( pdev )
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -612,7 +612,6 @@ static int update_paging_mode(struct dom
for_each_pdev( d, pdev )
{
bdf = (pdev->bus << 8) | pdev->devfn;
- req_id = get_dma_requestor_id(pdev->seg, bdf);
iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
@@ -621,16 +620,21 @@ static int update_paging_mode(struct dom
}
spin_lock_irqsave(&iommu->lock, flags);
- device_entry = iommu->dev_table.buffer +
- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
-
- /* valid = 0 only works for dom0 passthrough mode */
- amd_iommu_set_root_page_table((u32 *)device_entry,
- page_to_maddr(hd->root_table),
- hd->domain_id,
- hd->paging_mode, 1);
-
- amd_iommu_flush_device(iommu, req_id);
+ do {
+ req_id = get_dma_requestor_id(pdev->seg, bdf);
+ device_entry = iommu->dev_table.buffer +
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+ /* valid = 0 only works for dom0 passthrough mode */
+ amd_iommu_set_root_page_table((u32 *)device_entry,
+ page_to_maddr(hd->root_table),
+ hd->domain_id,
+ hd->paging_mode, 1);
+
+ amd_iommu_flush_device(iommu, req_id);
+ bdf += pdev->phantom_stride;
+ } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+ PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
spin_unlock_irqrestore(&iommu->lock, flags);
}
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -157,6 +157,8 @@ void __init iommu_dom0_init(struct domai
int iommu_add_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+ int rc;
+ u8 devfn;
if ( !pdev->domain )
return -EINVAL;
@@ -167,7 +169,20 @@ int iommu_add_device(struct pci_dev *pde
if ( !iommu_enabled || !hd->platform_ops )
return 0;
- return hd->platform_ops->add_device(pdev->devfn, pdev);
+ rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+ if ( rc || !pdev->phantom_stride )
+ return rc;
+
+ for ( devfn = pdev->devfn ; ; )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ return 0;
+ rc = hd->platform_ops->add_device(devfn, pdev);
+ if ( rc )
+ printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ }
}
int iommu_enable_device(struct pci_dev *pdev)
@@ -190,6 +205,8 @@ int iommu_enable_device(struct pci_dev *
int iommu_remove_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+ u8 devfn;
+
if ( !pdev->domain )
return -EINVAL;
@@ -197,6 +214,22 @@ int iommu_remove_device(struct pci_dev *
if ( !iommu_enabled || !hd->platform_ops )
return 0;
+ for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+ {
+ int rc;
+
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->remove_device(devfn, pdev);
+ if ( !rc )
+ continue;
+
+ printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ return rc;
+ }
+
return hd->platform_ops->remove_device(pdev->devfn, pdev);
}
@@ -244,6 +277,18 @@ static int assign_device(struct domain *
if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
goto done;
+ for ( ; pdev->phantom_stride; rc = 0 )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->assign_device(d, devfn, pdev);
+ if ( rc )
+ printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ rc);
+ }
+
if ( has_arch_pdevs(d) && !need_iommu(d) )
{
d->need_iommu = 1;
@@ -376,6 +421,21 @@ int deassign_device(struct domain *d, u1
if ( !pdev )
return -ENODEV;
+ while ( pdev->phantom_stride )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+ if ( !ret )
+ continue;
+
+ printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+ return ret;
+ }
+
+ devfn = pdev->devfn;
ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
if ( ret )
{
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -146,6 +146,8 @@ static struct pci_dev *alloc_pdev(struct
/* update bus2bridge */
switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
{
+ int pos;
+ u16 cap;
u8 sec_bus, sub_bus;
case DEV_TYPE_PCIe_BRIDGE:
@@ -169,6 +171,20 @@ static struct pci_dev *alloc_pdev(struct
break;
case DEV_TYPE_PCIe_ENDPOINT:
+ pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), PCI_CAP_ID_EXP);
+ BUG_ON(!pos);
+ cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
+ if ( cap & PCI_EXP_DEVCAP_PHANTOM )
+ {
+ pdev->phantom_stride = 8 >> MASK_EXTR(cap,
+ PCI_EXP_DEVCAP_PHANTOM);
+ if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
+ pdev->phantom_stride = 0;
+ }
+ break;
+
case DEV_TYPE_PCI:
break;
@@ -266,6 +282,27 @@ struct pci_dev *pci_get_pdev(int seg, in
return NULL;
}
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+{
+ struct pci_dev *pdev;
+ int stride;
+
+ if ( seg < 0 || bus < 0 || devfn < 0 )
+ return NULL;
+
+ for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
+ !pdev && stride; stride >>= 1 )
+ {
+ if ( !(devfn & (8 - stride)) )
+ continue;
+ pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
+ if ( pdev && stride != pdev->phantom_stride )
+ pdev = NULL;
+ }
+
+ return pdev;
+}
+
struct pci_dev *pci_get_pdev_by_domain(
struct domain *d, int seg, int bus, int devfn)
{
@@ -464,8 +501,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d
out:
spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
- seg, bus, slot, func);
+ if ( !ret )
+ {
+ printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+ seg, bus, slot, func);
+ while ( pdev->phantom_stride )
+ {
+ func += pdev->phantom_stride;
+ if ( PCI_SLOT(func) )
+ break;
+ printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
+ seg, bus, slot, func);
+ }
+ }
return ret;
}
@@ -657,7 +705,7 @@ void pci_check_disable_device(u16 seg, u
u16 cword;
spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(seg, bus, devfn);
+ pdev = pci_get_real_pdev(seg, bus, devfn);
if ( pdev )
{
if ( now < pdev->fault.time ||
@@ -674,6 +722,7 @@ void pci_check_disable_device(u16 seg, u
/* Tell the device to stop DMAing; we can't rely on the guest to
* control it for us. */
+ devfn = pdev->devfn;
cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_COMMAND);
pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -731,6 +780,27 @@ struct setup_dom0 {
int (*handler)(u8 devfn, struct pci_dev *);
};
+static void setup_one_dom0_device(const struct setup_dom0 *ctxt,
+ struct pci_dev *pdev)
+{
+ u8 devfn = pdev->devfn;
+
+ do {
+ int err = ctxt->handler(devfn, pdev);
+
+ if ( err )
+ {
+ printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ ctxt->d->domain_id, err);
+ if ( devfn == pdev->devfn )
+ return;
+ }
+ devfn += pdev->phantom_stride;
+ } while ( devfn != pdev->devfn &&
+ PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+}
+
static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
{
struct setup_dom0 *ctxt = arg;
@@ -747,7 +817,7 @@ static int __init _setup_dom0_pci_device
pdev->domain = ctxt->d;
list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
- ctxt->handler(devfn, pdev);
+ setup_one_dom0_device(ctxt, pdev);
}
}
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -58,6 +58,9 @@ do {
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))
+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
#define reserve_bootmem(_p,_l) ((void)0)
struct domain;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -63,6 +63,8 @@ struct pci_dev {
const u8 bus;
const u8 devfn;
+ u8 phantom_stride;
+
enum pdev_type {
DEV_TYPE_PCI_UNKNOWN,
DEV_TYPE_PCIe_ENDPOINT,
@@ -113,6 +115,7 @@ int pci_remove_device(u16 seg, u8 bus, u
int pci_ro_device(int seg, int bus, int devfn);
void arch_pci_ro_device(int seg, int bdf);
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);

View File

@ -0,0 +1,42 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559812 -3600
# Node ID b514b7118958327605e33dd387944832bc8d734a
# Parent c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
VT-d: relax source qualifier for MSI of phantom functions
With ordinary requests allowed to come from phantom functions, the
remapping tables ought to be set up to allow for MSI triggers to come
from other than the "real" device too.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -438,13 +438,22 @@ static void set_msi_source_id(struct pci
devfn = pdev->devfn;
switch ( pdev->type )
{
+ unsigned int sq;
+
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
case DEV_TYPE_LEGACY_PCI_BRIDGE:
break;
case DEV_TYPE_PCIe_ENDPOINT:
- set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16, PCI_BDF2(bus, devfn));
+ switch ( pdev->phantom_stride )
+ {
+ case 1: sq = SQ_13_IGNORE_3; break;
+ case 2: sq = SQ_13_IGNORE_2; break;
+ case 4: sq = SQ_13_IGNORE_1; break;
+ default: sq = SQ_ALL_16; break;
+ }
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
break;
case DEV_TYPE_PCI:

View File

@ -0,0 +1,108 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559889 -3600
# Node ID 23c4bbc0111dd807561b2c62cbc5798220943a0d
# Parent b514b7118958327605e33dd387944832bc8d734a
IOMMU: add option to specify devices behaving like ones using phantom functions
At least certain Marvell SATA controllers are known to issue bus master
requests with a non-zero function as origin, despite themselves being
single function devices.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -672,6 +672,16 @@ Defaults to booting secondary processors
Default: `on`
+### pci-phantom
+> `=[<seg>:]<bus>:<device>,<stride>`
+
+Mark a group of PCI devices as using phantom functions without actually
+advertising so, so the IOMMU can create translation contexts for them.
+
+All numbers specified must be hexadecimal ones.
+
+This option can be specified more than once (up to 8 times at present).
+
### ple\_gap
> `= <integer>`
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -123,6 +123,49 @@ const unsigned long *pci_get_ro_map(u16
return pseg ? pseg->ro_map : NULL;
}
+static struct phantom_dev {
+ u16 seg;
+ u8 bus, slot, stride;
+} phantom_devs[8];
+static unsigned int nr_phantom_devs;
+
+static void __init parse_phantom_dev(char *str) {
+ const char *s = str;
+ struct phantom_dev phantom;
+
+ if ( !s || !*s || nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
+ return;
+
+ phantom.seg = simple_strtol(s, &s, 16);
+ if ( *s != ':' )
+ return;
+
+ phantom.bus = simple_strtol(s + 1, &s, 16);
+ if ( *s == ',' )
+ {
+ phantom.slot = phantom.bus;
+ phantom.bus = phantom.seg;
+ phantom.seg = 0;
+ }
+ else if ( *s == ':' )
+ phantom.slot = simple_strtol(s + 1, &s, 16);
+ else
+ return;
+
+ if ( *s != ',' )
+ return;
+ switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
+ {
+ case 1: case 2: case 4:
+ if ( *s )
+ default:
+ return;
+ }
+
+ phantom_devs[nr_phantom_devs++] = phantom;
+}
+custom_param("pci-phantom", parse_phantom_dev);
+
static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
{
struct pci_dev *pdev;
@@ -183,6 +226,20 @@ static struct pci_dev *alloc_pdev(struct
if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
pdev->phantom_stride = 0;
}
+ else
+ {
+ unsigned int i;
+
+ for ( i = 0; i < nr_phantom_devs; ++i )
+ if ( phantom_devs[i].seg == pseg->nr &&
+ phantom_devs[i].bus == bus &&
+ phantom_devs[i].slot == PCI_SLOT(devfn) &&
+ phantom_devs[i].stride > PCI_FUNC(devfn) )
+ {
+ pdev->phantom_stride = phantom_devs[i].stride;
+ break;
+ }
+ }
break;
case DEV_TYPE_PCI:

View File

@ -0,0 +1,30 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357561709 -3600
# Node ID 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
# Parent 23c4bbc0111dd807561b2c62cbc5798220943a0d
x86: compat_show_guest_stack() should not truncate MFN
Re-using "addr" here was a mistake, as it is a 32-bit quantity.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -20,11 +20,12 @@ void compat_show_guest_stack(struct vcpu
if ( v != current )
{
struct vcpu *vcpu;
+ unsigned long mfn;
ASSERT(guest_kernel_mode(v, regs));
- addr = read_cr3() >> PAGE_SHIFT;
+ mfn = read_cr3() >> PAGE_SHIFT;
for_each_vcpu( v->domain, vcpu )
- if ( pagetable_get_pfn(vcpu->arch.guest_table) == addr )
+ if ( pagetable_get_pfn(vcpu->arch.guest_table) == mfn )
break;
if ( !vcpu )
{

View File

@ -0,0 +1,30 @@
References: CVE-2013-0154 XSA-37 bnc#797031
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357564826 -3600
# Node ID e1facbde56ff4e5e85f9a4935abc99eb24367cd0
# Parent 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
x86: fix assertion in get_page_type()
c/s 22998:e9fab50d7b61 (and immediately following ones) made it
possible that __get_page_type() returns other than -EINVAL, in
particular -EBUSY. Consequently, the assertion in get_page_type()
should check for only the return values we absolutely don't expect to
see there.
This is XSA-37 / CVE-2013-0154.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2603,7 +2603,7 @@ int get_page_type(struct page_info *page
int rc = __get_page_type(page, type, 0);
if ( likely(rc == 0) )
return 1;
- ASSERT(rc == -EINVAL);
+ ASSERT(rc != -EINTR && rc != -EAGAIN);
return 0;
}

View File

@ -1,366 +0,0 @@
libxc: builder: limit maximum size of kernel/ramdisk.
Allowing user supplied kernels of arbitrary sizes, especially during
decompression, can swallow up dom0 memory leading to either virtual
address space exhaustion in the builder process or allocation
failures/OOM killing of both toolstack and unrelated processes.
We disable these checks when building in a stub domain for pvgrub
since this uses the guest's own memory and is isolated.
Decompression of gzip compressed kernels and ramdisks has been safe
since 14954:58205257517d (Xen 3.1.0 onwards).
This is XSA-25 / CVE-2012-4544.
Also make explicit checks for buffer overflows in various
decompression routines. These were already ruled out due to other
properties of the code but check them as a belt-and-braces measure.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
[ Includes 25589:60f09d1ab1fe for CVE-2012-2625 ]
Index: xen-4.2.0-testing/stubdom/grub/kexec.c
===================================================================
--- xen-4.2.0-testing.orig/stubdom/grub/kexec.c
+++ xen-4.2.0-testing/stubdom/grub/kexec.c
@@ -137,6 +137,10 @@ void kexec(void *kernel, long kernel_siz
dom = xc_dom_allocate(xc_handle, cmdline, features);
dom->allocate = kexec_allocate;
+ /* We are using guest owned memory, therefore no limits. */
+ xc_dom_kernel_max_size(dom, 0);
+ xc_dom_ramdisk_max_size(dom, 0);
+
dom->kernel_blob = kernel;
dom->kernel_size = kernel_size;
Index: xen-4.2.0-testing/tools/libxc/xc_dom.h
===================================================================
--- xen-4.2.0-testing.orig/tools/libxc/xc_dom.h
+++ xen-4.2.0-testing/tools/libxc/xc_dom.h
@@ -55,6 +55,9 @@ struct xc_dom_image {
void *ramdisk_blob;
size_t ramdisk_size;
+ size_t max_kernel_size;
+ size_t max_ramdisk_size;
+
/* arguments and parameters */
char *cmdline;
uint32_t f_requested[XENFEAT_NR_SUBMAPS];
@@ -180,6 +183,23 @@ void xc_dom_release_phys(struct xc_dom_i
void xc_dom_release(struct xc_dom_image *dom);
int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
+/* Set this larger if you have enormous ramdisks/kernels. Note that
+ * you should trust all kernels not to be maliciously large (e.g. to
+ * exhaust all dom0 memory) if you do this (see CVE-2012-4544 /
+ * XSA-25). You can also set the default independently for
+ * ramdisks/kernels in xc_dom_allocate() or call
+ * xc_dom_{kernel,ramdisk}_max_size.
+ */
+#ifndef XC_DOM_DECOMPRESS_MAX
+#define XC_DOM_DECOMPRESS_MAX (1024*1024*1024) /* 1GB */
+#endif
+
+int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz);
+int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz);
+
+int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz);
+int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz);
+
size_t xc_dom_check_gzip(xc_interface *xch,
void *blob, size_t ziplen);
int xc_dom_do_gunzip(xc_interface *xch,
@@ -240,7 +260,8 @@ void xc_dom_log_memory_footprint(struct
void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
- const char *filename, size_t * size);
+ const char *filename, size_t * size,
+ const size_t max_size);
char *xc_dom_strdup(struct xc_dom_image *dom, const char *str);
/* --- alloc memory pool ------------------------------------------- */
Index: xen-4.2.0-testing/tools/libxc/xc_dom_bzimageloader.c
===================================================================
--- xen-4.2.0-testing.orig/tools/libxc/xc_dom_bzimageloader.c
+++ xen-4.2.0-testing/tools/libxc/xc_dom_bzimageloader.c
@@ -47,13 +47,19 @@ static int xc_try_bzip2_decode(
char *out_buf;
char *tmp_buf;
int retval = -1;
- int outsize;
+ unsigned int outsize;
uint64_t total;
stream.bzalloc = NULL;
stream.bzfree = NULL;
stream.opaque = NULL;
+ if ( dom->kernel_size == 0)
+ {
+ DOMPRINTF("BZIP2: Input is 0 size");
+ return -1;
+ }
+
ret = BZ2_bzDecompressInit(&stream, 0, 0);
if ( ret != BZ_OK )
{
@@ -66,6 +72,17 @@ static int xc_try_bzip2_decode(
* the input buffer to start, and we'll realloc as needed.
*/
outsize = dom->kernel_size;
+
+ /*
+ * stream.avail_in and outsize are unsigned int, while kernel_size
+ * is a size_t. Check we aren't overflowing.
+ */
+ if ( outsize != dom->kernel_size )
+ {
+ DOMPRINTF("BZIP2: Input too large");
+ goto bzip2_cleanup;
+ }
+
out_buf = malloc(outsize);
if ( out_buf == NULL )
{
@@ -98,13 +115,20 @@ static int xc_try_bzip2_decode(
if ( stream.avail_out == 0 )
{
/* Protect against output buffer overflow */
- if ( outsize > INT_MAX / 2 )
+ if ( outsize > UINT_MAX / 2 )
{
DOMPRINTF("BZIP2: output buffer overflow");
free(out_buf);
goto bzip2_cleanup;
}
+ if ( xc_dom_kernel_check_size(dom, outsize * 2) )
+ {
+ DOMPRINTF("BZIP2: output too large");
+ free(out_buf);
+ goto bzip2_cleanup;
+ }
+
tmp_buf = realloc(out_buf, outsize * 2);
if ( tmp_buf == NULL )
{
@@ -172,9 +196,15 @@ static int _xc_try_lzma_decode(
unsigned char *out_buf;
unsigned char *tmp_buf;
int retval = -1;
- int outsize;
+ size_t outsize;
const char *msg;
+ if ( dom->kernel_size == 0)
+ {
+ DOMPRINTF("LZMA: Input is 0 size");
+ return -1;
+ }
+
/* sigh. We don't know up-front how much memory we are going to need
* for the output buffer. Allocate the output buffer to be equal
* the input buffer to start, and we'll realloc as needed.
@@ -244,13 +274,20 @@ static int _xc_try_lzma_decode(
if ( stream->avail_out == 0 )
{
/* Protect against output buffer overflow */
- if ( outsize > INT_MAX / 2 )
+ if ( outsize > SIZE_MAX / 2 )
{
DOMPRINTF("%s: output buffer overflow", what);
free(out_buf);
goto lzma_cleanup;
}
+ if ( xc_dom_kernel_check_size(dom, outsize * 2) )
+ {
+ DOMPRINTF("LZMA: output too large");
+ free(out_buf);
+ goto lzma_cleanup;
+ }
+
tmp_buf = realloc(out_buf, outsize * 2);
if ( tmp_buf == NULL )
{
@@ -359,6 +396,12 @@ static int xc_try_lzo1x_decode(
0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a
};
+ /*
+ * lzo_uint should match size_t. Check that this is the case to be
+ * sure we won't overflow various lzo_uint fields.
+ */
+ XC_BUILD_BUG_ON(sizeof(lzo_uint) != sizeof(size_t));
+
ret = lzo_init();
if ( ret != LZO_E_OK )
{
@@ -438,6 +481,14 @@ static int xc_try_lzo1x_decode(
if ( src_len <= 0 || src_len > dst_len || src_len > left )
break;
+ msg = "Output buffer overflow";
+ if ( *size > SIZE_MAX - dst_len )
+ break;
+
+ msg = "Decompressed image too large";
+ if ( xc_dom_kernel_check_size(dom, *size + dst_len) )
+ break;
+
msg = "Failed to (re)alloc memory";
tmp_buf = realloc(out_buf, *size + dst_len);
if ( tmp_buf == NULL )
Index: xen-4.2.0-testing/tools/libxc/xc_dom_core.c
===================================================================
--- xen-4.2.0-testing.orig/tools/libxc/xc_dom_core.c
+++ xen-4.2.0-testing/tools/libxc/xc_dom_core.c
@@ -159,7 +159,8 @@ void *xc_dom_malloc_page_aligned(struct
}
void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
- const char *filename, size_t * size)
+ const char *filename, size_t * size,
+ const size_t max_size)
{
struct xc_dom_mem *block = NULL;
int fd = -1;
@@ -171,6 +172,13 @@ void *xc_dom_malloc_filemap(struct xc_do
lseek(fd, 0, SEEK_SET);
*size = lseek(fd, 0, SEEK_END);
+ if ( max_size && *size > max_size )
+ {
+ xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+ "tried to map file which is too large");
+ goto err;
+ }
+
block = malloc(sizeof(*block));
if ( block == NULL )
goto err;
@@ -222,6 +230,40 @@ char *xc_dom_strdup(struct xc_dom_image
}
/* ------------------------------------------------------------------------ */
+/* decompression buffer sizing */
+int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz)
+{
+ /* No limit */
+ if ( !dom->max_kernel_size )
+ return 0;
+
+ if ( sz > dom->max_kernel_size )
+ {
+ xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+ "kernel image too large");
+ return 1;
+ }
+
+ return 0;
+}
+
+int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz)
+{
+ /* No limit */
+ if ( !dom->max_ramdisk_size )
+ return 0;
+
+ if ( sz > dom->max_ramdisk_size )
+ {
+ xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+ "ramdisk image too large");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
/* read files, copy memory blocks, with transparent gunzip */
size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen)
@@ -235,7 +277,7 @@ size_t xc_dom_check_gzip(xc_interface *x
gzlen = blob + ziplen - 4;
unziplen = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0];
- if ( (unziplen < 0) || (unziplen > (1024*1024*1024)) ) /* 1GB limit */
+ if ( (unziplen < 0) || (unziplen > XC_DOM_DECOMPRESS_MAX) )
{
xc_dom_printf
(xch,
@@ -288,6 +330,9 @@ int xc_dom_try_gunzip(struct xc_dom_imag
if ( unziplen == 0 )
return 0;
+ if ( xc_dom_kernel_check_size(dom, unziplen) )
+ return 0;
+
unzip = xc_dom_malloc(dom, unziplen);
if ( unzip == NULL )
return -1;
@@ -588,6 +633,9 @@ struct xc_dom_image *xc_dom_allocate(xc_
memset(dom, 0, sizeof(*dom));
dom->xch = xch;
+ dom->max_kernel_size = XC_DOM_DECOMPRESS_MAX;
+ dom->max_ramdisk_size = XC_DOM_DECOMPRESS_MAX;
+
if ( cmdline )
dom->cmdline = xc_dom_strdup(dom, cmdline);
if ( features )
@@ -608,10 +656,25 @@ struct xc_dom_image *xc_dom_allocate(xc_
return NULL;
}
+int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz)
+{
+ DOMPRINTF("%s: kernel_max_size=%zx", __FUNCTION__, sz);
+ dom->max_kernel_size = sz;
+ return 0;
+}
+
+int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz)
+{
+ DOMPRINTF("%s: ramdisk_max_size=%zx", __FUNCTION__, sz);
+ dom->max_ramdisk_size = sz;
+ return 0;
+}
+
int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename)
{
DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
- dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size);
+ dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size,
+ dom->max_kernel_size);
if ( dom->kernel_blob == NULL )
return -1;
return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
@@ -621,7 +684,9 @@ int xc_dom_ramdisk_file(struct xc_dom_im
{
DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
dom->ramdisk_blob =
- xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size);
+ xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size,
+ dom->max_ramdisk_size);
+
if ( dom->ramdisk_blob == NULL )
return -1;
// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
@@ -781,7 +846,11 @@ int xc_dom_build_image(struct xc_dom_ima
void *ramdiskmap;
unziplen = xc_dom_check_gzip(dom->xch, dom->ramdisk_blob, dom->ramdisk_size);
+ if ( xc_dom_ramdisk_check_size(dom, unziplen) != 0 )
+ unziplen = 0;
+
ramdisklen = unziplen ? unziplen : dom->ramdisk_size;
+
if ( xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0,
ramdisklen) != 0 )
goto err;

View File

@ -1,105 +0,0 @@
References: CVE-2012-5510 XSA-26 bnc#789945
gnttab: fix releasing of memory upon switches between versions
gnttab_unpopulate_status_frames() incompletely freed the pages
previously used as status frame in that they did not get removed from
the domain's xenpage_list, thus causing subsequent list corruption
when those pages did get allocated again for the same or another purpose.
Similarly, grant_table_create() and gnttab_grow_table() both improperly
clean up in the event of an error - pages already shared with the guest
can't be freed by just passing them to free_xenheap_page(). Fix this by
sharing the pages only after all allocations succeeded.
This is CVE-2012-5510 / XSA-26.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1170,12 +1170,13 @@ fault:
}
static int
-gnttab_populate_status_frames(struct domain *d, struct grant_table *gt)
+gnttab_populate_status_frames(struct domain *d, struct grant_table *gt,
+ unsigned int req_nr_frames)
{
unsigned i;
unsigned req_status_frames;
- req_status_frames = grant_to_status_frames(gt->nr_grant_frames);
+ req_status_frames = grant_to_status_frames(req_nr_frames);
for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
{
if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
@@ -1206,7 +1207,12 @@ gnttab_unpopulate_status_frames(struct d
for ( i = 0; i < nr_status_frames(gt); i++ )
{
- page_set_owner(virt_to_page(gt->status[i]), dom_xen);
+ struct page_info *pg = virt_to_page(gt->status[i]);
+
+ BUG_ON(page_get_owner(pg) != d);
+ if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
+ put_page(pg);
+ BUG_ON(pg->count_info & ~PGC_xen_heap);
free_xenheap_page(gt->status[i]);
gt->status[i] = NULL;
}
@@ -1244,19 +1250,18 @@ gnttab_grow_table(struct domain *d, unsi
clear_page(gt->shared_raw[i]);
}
- /* Share the new shared frames with the recipient domain */
- for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
- gnttab_create_shared_page(d, gt, i);
-
- gt->nr_grant_frames = req_nr_frames;
-
/* Status pages - version 2 */
if (gt->gt_version > 1)
{
- if ( gnttab_populate_status_frames(d, gt) )
+ if ( gnttab_populate_status_frames(d, gt, req_nr_frames) )
goto shared_alloc_failed;
}
+ /* Share the new shared frames with the recipient domain */
+ for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
+ gnttab_create_shared_page(d, gt, i);
+ gt->nr_grant_frames = req_nr_frames;
+
return 1;
shared_alloc_failed:
@@ -2154,7 +2159,7 @@ gnttab_set_version(XEN_GUEST_HANDLE(gntt
if ( op.version == 2 && gt->gt_version < 2 )
{
- res = gnttab_populate_status_frames(d, gt);
+ res = gnttab_populate_status_frames(d, gt, nr_grant_frames(gt));
if ( res < 0)
goto out_unlock;
}
@@ -2597,14 +2602,15 @@ grant_table_create(
clear_page(t->shared_raw[i]);
}
- for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
- gnttab_create_shared_page(d, t, i);
-
/* Status pages for grant table - for version 2 */
t->status = xzalloc_array(grant_status_t *,
grant_to_status_frames(max_nr_grant_frames));
if ( t->status == NULL )
goto no_mem_4;
+
+ for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+ gnttab_create_shared_page(d, t, i);
+
t->nr_status_frames = 0;
/* Okay, install the structure. */

View File

@ -1,136 +0,0 @@
References: CVE-2012-5511 XSA-27 bnc#789944
hvm: Limit the size of large HVM op batches
Doing large p2m updates for HVMOP_track_dirty_vram without preemption
ties up the physical processor. Integrating preemption into the p2m
updates is hard so simply limit to 1GB which is sufficient for a 15000
* 15000 * 32bpp framebuffer.
For HVMOP_modified_memory and HVMOP_set_mem_type preemptible add the
necessary machinery to handle preemption.
This is CVE-2012-5511 / XSA-27.
Signed-off-by: Tim Deegan <tim@xen.org>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
v2: Provide definition of GB to fix x86-32 compile.
Signed-off-by: Jan Beulich <JBeulich@suse.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4033,6 +4033,9 @@ long do_hvm_op(unsigned long op, XEN_GUE
if ( !is_hvm_domain(d) )
goto param_fail2;
+ if ( a.nr > GB(1) >> PAGE_SHIFT )
+ goto param_fail2;
+
rc = xsm_hvm_param(d, op);
if ( rc )
goto param_fail2;
@@ -4059,7 +4062,6 @@ long do_hvm_op(unsigned long op, XEN_GUE
{
struct xen_hvm_modified_memory a;
struct domain *d;
- unsigned long pfn;
if ( copy_from_guest(&a, arg, 1) )
return -EFAULT;
@@ -4086,9 +4088,11 @@ long do_hvm_op(unsigned long op, XEN_GUE
if ( !paging_mode_log_dirty(d) )
goto param_fail3;
- for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
+ while ( a.nr > 0 )
{
+ unsigned long pfn = a.first_pfn;
struct page_info *page;
+
page = get_page_from_gfn(d, pfn, NULL, P2M_UNSHARE);
if ( page )
{
@@ -4098,6 +4102,19 @@ long do_hvm_op(unsigned long op, XEN_GUE
sh_remove_shadows(d->vcpu[0], _mfn(page_to_mfn(page)), 1, 0);
put_page(page);
}
+
+ a.first_pfn++;
+ a.nr--;
+
+ /* Check for continuation if it's not the last interation */
+ if ( a.nr > 0 && hypercall_preempt_check() )
+ {
+ if ( copy_to_guest(arg, &a, 1) )
+ rc = -EFAULT;
+ else
+ rc = -EAGAIN;
+ break;
+ }
}
param_fail3:
@@ -4153,7 +4170,6 @@ long do_hvm_op(unsigned long op, XEN_GUE
{
struct xen_hvm_set_mem_type a;
struct domain *d;
- unsigned long pfn;
/* Interface types to internal p2m types */
p2m_type_t memtype[] = {
@@ -4186,8 +4202,9 @@ long do_hvm_op(unsigned long op, XEN_GUE
if ( a.hvmmem_type >= ARRAY_SIZE(memtype) )
goto param_fail4;
- for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
+ while ( a.nr )
{
+ unsigned long pfn = a.first_pfn;
p2m_type_t t;
p2m_type_t nt;
mfn_t mfn;
@@ -4227,6 +4244,19 @@ long do_hvm_op(unsigned long op, XEN_GUE
}
}
put_gfn(d, pfn);
+
+ a.first_pfn++;
+ a.nr--;
+
+ /* Check for continuation if it's not the last interation */
+ if ( a.nr > 0 && hypercall_preempt_check() )
+ {
+ if ( copy_to_guest(arg, &a, 1) )
+ rc = -EFAULT;
+ else
+ rc = -EAGAIN;
+ goto param_fail4;
+ }
}
rc = 0;
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -119,6 +119,9 @@ extern char wakeup_start[];
extern unsigned int video_mode, video_flags;
extern unsigned short boot_edid_caps;
extern unsigned char boot_edid_info[128];
+
+#define GB(_gb) (_gb ## UL << 30)
+
#endif
#define asmlinkage
@@ -134,7 +137,6 @@ extern unsigned char boot_edid_info[128]
#define PML4_ADDR(_slot) \
((((_slot ## UL) >> 8) * 0xffff000000000000UL) | \
(_slot ## UL << PML4_ENTRY_BITS))
-#define GB(_gb) (_gb ## UL << 30)
#else
#define PML4_ENTRY_BYTES (1 << PML4_ENTRY_BITS)
#define PML4_ADDR(_slot) \

View File

@ -1,47 +0,0 @@
References: CVE-2012-5513 XSA-29 bnc#789951
xen: add missing guest address range checks to XENMEM_exchange handlers
Ever since its existence (3.0.3 iirc) the handler for this has been
using non address range checking guest memory accessors (i.e.
the ones prefixed with two underscores) without first range
checking the accessed space (via guest_handle_okay()), allowing
a guest to access and overwrite hypervisor memory.
This is XSA-29 / CVE-2012-5513.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -115,6 +115,12 @@ int compat_memory_op(unsigned int cmd, X
(cmp.xchg.out.nr_extents << cmp.xchg.out.extent_order)) )
return -EINVAL;
+ if ( !compat_handle_okay(cmp.xchg.in.extent_start,
+ cmp.xchg.in.nr_extents) ||
+ !compat_handle_okay(cmp.xchg.out.extent_start,
+ cmp.xchg.out.nr_extents) )
+ return -EFAULT;
+
start_extent = cmp.xchg.nr_exchanged;
end_extent = (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xchg)) /
(((1U << ABS(order_delta)) + 1) *
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -308,6 +308,13 @@ static long memory_exchange(XEN_GUEST_HA
goto fail_early;
}
+ if ( !guest_handle_okay(exch.in.extent_start, exch.in.nr_extents) ||
+ !guest_handle_okay(exch.out.extent_start, exch.out.nr_extents) )
+ {
+ rc = -EFAULT;
+ goto fail_early;
+ }
+
/* Only privileged guests can allocate multi-page contiguous extents. */
if ( !multipage_allocation_permitted(current->domain,
exch.in.extent_order) ||

View File

@ -1,57 +0,0 @@
References: CVE-2012-5514 XSA-30 bnc#789948
fix error handling of guest_physmap_mark_populate_on_demand()
The only user of the "out" label bypasses a necessary unlock, thus
enabling the caller to lock up Xen.
Also, the function was never meant to be called by a guest for itself,
so rather than inspecting the code paths in depth for potential other
problems this might cause, and adjusting e.g. the non-guest printk()
in the above error path, just disallow the guest access to it.
Finally, the printk() (considering its potential of spamming the log,
the more that it's not using XENLOG_GUEST), is being converted to
P2M_DEBUG(), as debugging is what it apparently was added for in the
first place.
This is XSA-30 / CVE-2012-5514.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -1117,6 +1117,9 @@ guest_physmap_mark_populate_on_demand(st
mfn_t omfn;
int rc = 0;
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ return -EPERM;
+
if ( !paging_mode_translate(d) )
return -EINVAL;
@@ -1135,8 +1138,7 @@ guest_physmap_mark_populate_on_demand(st
omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL);
if ( p2m_is_ram(ot) )
{
- printk("%s: gfn_to_mfn returned type %d!\n",
- __func__, ot);
+ P2M_DEBUG("gfn_to_mfn returned type %d!\n", ot);
rc = -EBUSY;
goto out;
}
@@ -1160,9 +1162,9 @@ guest_physmap_mark_populate_on_demand(st
pod_unlock(p2m);
}
+out:
gfn_unlock(p2m, gfn, order);
-out:
return rc;
}

View File

@ -1,50 +0,0 @@
References: CVE-2012-5515 XSA-31 bnc#789950
memop: limit guest specified extent order
Allowing unbounded order values here causes almost unbounded loops
and/or partially incomplete requests, particularly in PoD code.
The added range checks in populate_physmap(), decrease_reservation(),
and the "in" one in memory_exchange() architecturally all could use
PADDR_BITS - PAGE_SHIFT, and are being artificially constrained to
MAX_ORDER.
This is XSA-31 / CVE-2012-5515.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -115,7 +115,8 @@ static void populate_physmap(struct memo
if ( a->memflags & MEMF_populate_on_demand )
{
- if ( guest_physmap_mark_populate_on_demand(d, gpfn,
+ if ( a->extent_order > MAX_ORDER ||
+ guest_physmap_mark_populate_on_demand(d, gpfn,
a->extent_order) < 0 )
goto out;
}
@@ -235,7 +236,8 @@ static void decrease_reservation(struct
xen_pfn_t gmfn;
if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
- a->nr_extents-1) )
+ a->nr_extents-1) ||
+ a->extent_order > MAX_ORDER )
return;
for ( i = a->nr_done; i < a->nr_extents; i++ )
@@ -297,6 +299,9 @@ static long memory_exchange(XEN_GUEST_HA
if ( (exch.nr_exchanged > exch.in.nr_extents) ||
/* Input and output domain identifiers match? */
(exch.in.domid != exch.out.domid) ||
+ /* Extent orders are sensible? */
+ (exch.in.extent_order > MAX_ORDER) ||
+ (exch.out.extent_order > MAX_ORDER) ||
/* Sizes of input and output lists do not overflow a long? */
((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||

View File

@ -1,22 +0,0 @@
References: CVE-2012-5525 XSA-32 bnc#789952
x86: get_page_from_gfn() must return NULL for invalid GFNs
... also in the non-translated case.
This is XSA-32 / CVE-2012-5525.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -400,7 +400,7 @@ static inline struct page_info *get_page
if (t)
*t = p2m_ram_rw;
page = __mfn_to_page(gfn);
- return get_page(page, d) ? page : NULL;
+ return mfn_valid(gfn) && get_page(page, d) ? page : NULL;
}

22
CVE-2012-5634-xsa33.patch Normal file
View File

@ -0,0 +1,22 @@
References: CVE-2012-5634 XSA-33 bnc#794316
VT-d: fix interrupt remapping source validation for devices behind legacy bridges
Using SVT_VERIFY_BUS here doesn't make sense; native Linux also
uses SVT_VERIFY_SID_SQ here instead.
This is XSA-33 / CVE-2012-5634.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -469,7 +469,7 @@ static void set_msi_source_id(struct pci
set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
(bus << 8) | pdev->bus);
else if ( pdev_type(seg, bus, devfn) == DEV_TYPE_LEGACY_PCI_BRIDGE )
- set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
PCI_BDF2(bus, devfn));
}
break;

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:eb9bedd095a2d08ef53a0828ef801e9293cfbfca0a6204bf01a38727e83e92cf
size 4862049

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9819306f1cc5efdc0e97c442c627ab95de251228713ab06efa27e5d0c8cabacd
size 4862820

View File

@ -0,0 +1,29 @@
bnc#757525
Index: xen-4.2.0-testing/tools/python/xen/xend/server/netif.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/netif.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/netif.py
@@ -23,6 +23,7 @@
import os
import random
import re
+import commands
from xen.xend import XendOptions, sxp
from xen.xend.server.DevController import DevController
@@ -101,6 +102,14 @@ class NetifController(DevController):
def __init__(self, vm):
DevController.__init__(self, vm)
+ def createDevice(self, config):
+ bridge = config.get('bridge')
+ if bridge is not None:
+ bridge_result = commands.getstatusoutput("/sbin/ifconfig %s" % bridge)
+ if bridge_result[0] != 0:
+ raise VmError('Network bridge does not exist: %s' % bridge)
+ DevController.createDevice(self, config)
+
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""

View File

@ -15,6 +15,7 @@
# Please submit bugfixes or comments via http://bugs.opensuse.org/ # Please submit bugfixes or comments via http://bugs.opensuse.org/
# #
Name: xen Name: xen
ExclusiveArch: %ix86 x86_64 ExclusiveArch: %ix86 x86_64
%define xvers 4.2 %define xvers 4.2