Accepting request 148616 from Virtualization

Update to version 4.2.1 and bug fixes

OBS-URL: https://build.opensuse.org/request/show/148616
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/xen?expand=0&rev=165
This commit is contained in:
Stephan Kulow 2013-01-17 09:59:08 +00:00 committed by Git OBS Bridge
commit 669528d347
107 changed files with 6370 additions and 3099 deletions

View File

@ -1,61 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347022899 -7200
# Node ID bb85bbccb1c9d802c92dd3fe00841368966ff623
# Parent e3b51948114ec1e2b2eece415b32e26ff857acde
x86/32-on-64: adjust Dom0 initial page table layout
Drop the unnecessary reservation of the L4 page for 32on64 Dom0, and
allocate its L3 first (to match behavior when running identical bit-
width hypervisor and Dom0 kernel).
Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -510,7 +510,7 @@ int __init construct_dom0(
#define NR(_l,_h,_s) \
(((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
((_l) & ~((1UL<<(_s))-1))) >> (_s))
- if ( (1 + /* # L4 */
+ if ( (!is_pv_32on64_domain(d) + /* # L4 */
NR(v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
(!is_pv_32on64_domain(d) ?
NR(v_start, v_end, L3_PAGETABLE_SHIFT) : /* # L2 */
@@ -756,6 +756,8 @@ int __init construct_dom0(
panic("Not enough RAM for domain 0 PML4.\n");
page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
l4start = l4tab = page_to_virt(page);
+ maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l3_page_table;
+ l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
}
copy_page(l4tab, idle_pg_table);
l4tab[0] = l4e_empty(); /* zap trampoline mapping */
@@ -787,9 +789,13 @@ int __init construct_dom0(
l2tab += l2_table_offset(v_start);
if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
{
- maddr_to_page(mpt_alloc)->u.inuse.type_info =
- PGT_l3_page_table;
- l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ if ( count || !l3start )
+ {
+ maddr_to_page(mpt_alloc)->u.inuse.type_info =
+ PGT_l3_page_table;
+ l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ }
+ l3tab = l3start;
clear_page(l3tab);
if ( count == 0 )
l3tab += l3_table_offset(v_start);
@@ -938,7 +944,7 @@ int __init construct_dom0(
if ( !vinitrd_start && initrd_len )
si->flags |= SIF_MOD_START_PFN;
si->flags |= (xen_processor_pmbits << 8) & SIF_PM_MASK;
- si->pt_base = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d);
+ si->pt_base = vpt_start;
si->nr_pt_frames = nr_pt_pages;
si->mfn_list = vphysmap_start;
snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",

View File

@ -1,274 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347033492 -7200
# Node ID c70d70d85306b3e4a0538353be131100c5ee38d5
# Parent 0376c85caaf34fe3cc8c3327f7a1d9ecd6f070b4
adjust a few RCU domain locking calls
x86's do_physdev_op() had a case where the locking was entirely
superfluous. Its physdev_map_pirq() further had a case where the lock
was being obtained too early, needlessly complicating early exit paths.
Grant table code had two open coded instances of
rcu_lock_target_domain_by_id(), and a third code section could be
consolidated by using the newly introduced helper function.
The memory hypercall code had two more instances of open coding
rcu_lock_target_domain_by_id(), but note that here this is not just
cleanup, but also fixes an error return path in memory_exchange() to
actually return an error.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -90,14 +90,10 @@ static int physdev_hvm_map_pirq(
int physdev_map_pirq(domid_t domid, int type, int *index, int *pirq_p,
struct msi_info *msi)
{
- struct domain *d;
+ struct domain *d = current->domain;
int pirq, irq, ret = 0;
void *map_data = NULL;
- ret = rcu_lock_target_domain_by_id(domid, &d);
- if ( ret )
- return ret;
-
if ( domid == DOMID_SELF && is_hvm_domain(d) )
{
/*
@@ -105,14 +101,15 @@ int physdev_map_pirq(domid_t domid, int
* calls back into itself and deadlocks on hvm_domain.irq_lock.
*/
if ( !is_hvm_pv_evtchn_domain(d) )
- {
- ret = -EINVAL;
- goto free_domain;
- }
- ret = physdev_hvm_map_pirq(d, type, index, pirq_p);
- goto free_domain;
+ return -EINVAL;
+
+ return physdev_hvm_map_pirq(d, type, index, pirq_p);
}
+ ret = rcu_lock_target_domain_by_id(domid, &d);
+ if ( ret )
+ return ret;
+
if ( !IS_PRIV_FOR(current->domain, d) )
{
ret = -EPERM;
@@ -696,13 +693,12 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
}
case PHYSDEVOP_get_free_pirq: {
struct physdev_get_free_pirq out;
- struct domain *d;
+ struct domain *d = v->domain;
ret = -EFAULT;
if ( copy_from_guest(&out, arg, 1) != 0 )
break;
- d = rcu_lock_current_domain();
spin_lock(&d->event_lock);
ret = get_free_pirq(d, out.type);
@@ -717,7 +713,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
}
spin_unlock(&d->event_lock);
- rcu_unlock_domain(d);
if ( ret >= 0 )
{
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -24,7 +24,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <xen/config.h>
+#include <xen/err.h>
#include <xen/iocap.h>
#include <xen/lib.h>
#include <xen/sched.h>
@@ -195,6 +195,30 @@ double_gt_unlock(struct grant_table *lgt
spin_unlock(&rgt->lock);
}
+static struct domain *gt_lock_target_domain_by_id(domid_t dom)
+{
+ struct domain *d;
+ int rc = GNTST_general_error;
+
+ switch ( rcu_lock_target_domain_by_id(dom, &d) )
+ {
+ case 0:
+ return d;
+
+ case -ESRCH:
+ gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
+ rc = GNTST_bad_domain;
+ break;
+
+ case -EPERM:
+ rc = GNTST_permission_denied;
+ break;
+ }
+
+ ASSERT(rc < 0 && -rc <= MAX_ERRNO);
+ return ERR_PTR(rc);
+}
+
static inline int
__get_maptrack_handle(
struct grant_table *t)
@@ -1261,7 +1285,6 @@ gnttab_setup_table(
struct grant_table *gt;
int i;
unsigned long gmfn;
- domid_t dom;
if ( count != 1 )
return -EINVAL;
@@ -1281,25 +1304,11 @@ gnttab_setup_table(
goto out1;
}
- dom = op.dom;
- if ( dom == DOMID_SELF )
+ d = gt_lock_target_domain_by_id(op.dom);
+ if ( IS_ERR(d) )
{
- d = rcu_lock_current_domain();
- }
- else
- {
- if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) )
- {
- gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
- op.status = GNTST_bad_domain;
- goto out1;
- }
-
- if ( unlikely(!IS_PRIV_FOR(current->domain, d)) )
- {
- op.status = GNTST_permission_denied;
- goto out2;
- }
+ op.status = PTR_ERR(d);
+ goto out1;
}
if ( xsm_grant_setup(current->domain, d) )
@@ -1352,7 +1361,6 @@ gnttab_query_size(
{
struct gnttab_query_size op;
struct domain *d;
- domid_t dom;
int rc;
if ( count != 1 )
@@ -1364,25 +1372,11 @@ gnttab_query_size(
return -EFAULT;
}
- dom = op.dom;
- if ( dom == DOMID_SELF )
- {
- d = rcu_lock_current_domain();
- }
- else
+ d = gt_lock_target_domain_by_id(op.dom);
+ if ( IS_ERR(d) )
{
- if ( unlikely((d = rcu_lock_domain_by_id(dom)) == NULL) )
- {
- gdprintk(XENLOG_INFO, "Bad domid %d.\n", dom);
- op.status = GNTST_bad_domain;
- goto query_out;
- }
-
- if ( unlikely(!IS_PRIV_FOR(current->domain, d)) )
- {
- op.status = GNTST_permission_denied;
- goto query_out_unlock;
- }
+ op.status = PTR_ERR(d);
+ goto query_out;
}
rc = xsm_grant_query_size(current->domain, d);
@@ -2251,15 +2245,10 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
return -EFAULT;
}
- rc = rcu_lock_target_domain_by_id(op.dom, &d);
- if ( rc < 0 )
+ d = gt_lock_target_domain_by_id(op.dom);
+ if ( IS_ERR(d) )
{
- if ( rc == -ESRCH )
- op.status = GNTST_bad_domain;
- else if ( rc == -EPERM )
- op.status = GNTST_permission_denied;
- else
- op.status = GNTST_general_error;
+ op.status = PTR_ERR(d);
goto out1;
}
rc = xsm_grant_setup(current->domain, d);
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -329,22 +329,9 @@ static long memory_exchange(XEN_GUEST_HA
out_chunk_order = exch.in.extent_order - exch.out.extent_order;
}
- if ( likely(exch.in.domid == DOMID_SELF) )
- {
- d = rcu_lock_current_domain();
- }
- else
- {
- if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
- goto fail_early;
-
- if ( !IS_PRIV_FOR(current->domain, d) )
- {
- rcu_unlock_domain(d);
- rc = -EPERM;
- goto fail_early;
- }
- }
+ rc = rcu_lock_target_domain_by_id(exch.in.domid, &d);
+ if ( rc )
+ goto fail_early;
memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
d,
@@ -583,20 +570,8 @@ long do_memory_op(unsigned long cmd, XEN
&& (reservation.mem_flags & XENMEMF_populate_on_demand) )
args.memflags |= MEMF_populate_on_demand;
- if ( likely(reservation.domid == DOMID_SELF) )
- {
- d = rcu_lock_current_domain();
- }
- else
- {
- if ( (d = rcu_lock_domain_by_id(reservation.domid)) == NULL )
- return start_extent;
- if ( !IS_PRIV_FOR(current->domain, d) )
- {
- rcu_unlock_domain(d);
- return start_extent;
- }
- }
+ if ( unlikely(rcu_lock_target_domain_by_id(reservation.domid, &d)) )
+ return start_extent;
args.domain = d;
rc = xsm_memory_adjust_reservation(current->domain, d);

View File

@ -1,52 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347263130 -7200
# Node ID 7d216f026f71022186196a75244e97cf3864f50b
# Parent c70d70d85306b3e4a0538353be131100c5ee38d5
VT-d: split .ack and .disable DMA-MSI actors
Calling irq_complete_move() from .disable is wrong, breaking S3 resume.
Comparing with all other .ack actors, it was also missing a call to
move_{native,masked}_irq(). As the actor is masking its interrupt
anyway (albeit it's not immediately obvious why), the latter is the
better choice.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Acked-by Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1040,8 +1040,6 @@ static void dma_msi_mask(struct irq_desc
unsigned long flags;
struct iommu *iommu = desc->action->dev_id;
- irq_complete_move(desc);
-
/* mask it */
spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
@@ -1054,6 +1052,13 @@ static unsigned int dma_msi_startup(stru
return 0;
}
+static void dma_msi_ack(struct irq_desc *desc)
+{
+ irq_complete_move(desc);
+ dma_msi_mask(desc);
+ move_masked_irq(desc);
+}
+
static void dma_msi_end(struct irq_desc *desc, u8 vector)
{
dma_msi_unmask(desc);
@@ -1115,7 +1120,7 @@ static hw_irq_controller dma_msi_type =
.shutdown = dma_msi_mask,
.enable = dma_msi_unmask,
.disable = dma_msi_mask,
- .ack = dma_msi_mask,
+ .ack = dma_msi_ack,
.end = dma_msi_end,
.set_affinity = dma_msi_set_affinity,
};

View File

@ -1,27 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365190 -7200
# Node ID 0dba5a8886556f1b92e59eb19c570ad1704037f6
# Parent 90533f3b6babfda56edbbefda47c46b391204132
tmem: only allow tmem control operations from privileged domains
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2541,10 +2541,8 @@ static NOINLINE int do_tmem_control(stru
OID *oidp = (OID *)(&op->u.ctrl.oid[0]);
if (!tmh_current_is_privileged())
- {
- /* don't fail... mystery: sometimes dom0 fails here */
- /* return -EPERM; */
- }
+ return -EPERM;
+
switch(subop)
{
case TMEMC_THAW:

View File

@ -1,45 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365203 -7200
# Node ID fcf567acc92ae57f4adfbe967b01a2ba0390c08f
# Parent 0dba5a8886556f1b92e59eb19c570ad1704037f6
tmem: consistently make pool_id a uint32_t
Treating it as an int could allow a malicious guest to provide a
negative pool_Id, by passing the MAX_POOLS_PER_DOMAIN limit check and
allowing access to the negative offsets of the pool array.
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2417,7 +2417,7 @@ static NOINLINE int tmemc_save_subop(int
return rc;
}
-static NOINLINE int tmemc_save_get_next_page(int cli_id, int pool_id,
+static NOINLINE int tmemc_save_get_next_page(int cli_id, uint32_t pool_id,
tmem_cli_va_t buf, uint32_t bufsize)
{
client_t *client = tmh_client_from_cli_id(cli_id);
@@ -2509,7 +2509,7 @@ out:
return ret;
}
-static int tmemc_restore_put_page(int cli_id, int pool_id, OID *oidp,
+static int tmemc_restore_put_page(int cli_id, uint32_t pool_id, OID *oidp,
uint32_t index, tmem_cli_va_t buf, uint32_t bufsize)
{
client_t *client = tmh_client_from_cli_id(cli_id);
@@ -2521,7 +2521,7 @@ static int tmemc_restore_put_page(int cl
return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p);
}
-static int tmemc_restore_flush_page(int cli_id, int pool_id, OID *oidp,
+static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id, OID *oidp,
uint32_t index)
{
client_t *client = tmh_client_from_cli_id(cli_id);

View File

@ -1,23 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365214 -7200
# Node ID d189d99ef00c1e197321593d13282e1b57eb4a38
# Parent fcf567acc92ae57f4adfbe967b01a2ba0390c08f
tmem: check the pool_id is valid when destroying a tmem pool
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1870,6 +1870,8 @@ static NOINLINE int do_tmem_destroy_pool
if ( client->pools == NULL )
return 0;
+ if ( pool_id >= MAX_POOLS_PER_DOMAIN )
+ return 0;
if ( (pool = client->pools[pool_id]) == NULL )
return 0;
client->pools[pool_id] = NULL;

View File

@ -1,44 +0,0 @@
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1347365847 -7200
# Node ID f53c5aadbba9d389f4a7d83f308499e22d1d1eda
# Parent d189d99ef00c1e197321593d13282e1b57eb4a38
tmem: check for a valid client ("domain") in the save subops
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2379,12 +2379,18 @@ static NOINLINE int tmemc_save_subop(int
rc = MAX_POOLS_PER_DOMAIN;
break;
case TMEMC_SAVE_GET_CLIENT_WEIGHT:
+ if ( client == NULL )
+ break;
rc = client->weight == -1 ? -2 : client->weight;
break;
case TMEMC_SAVE_GET_CLIENT_CAP:
+ if ( client == NULL )
+ break;
rc = client->cap == -1 ? -2 : client->cap;
break;
case TMEMC_SAVE_GET_CLIENT_FLAGS:
+ if ( client == NULL )
+ break;
rc = (client->compress ? TMEM_CLIENT_COMPRESS : 0 ) |
(client->was_frozen ? TMEM_CLIENT_FROZEN : 0 );
break;
@@ -2408,6 +2414,8 @@ static NOINLINE int tmemc_save_subop(int
*uuid = pool->uuid[1];
rc = 0;
case TMEMC_SAVE_END:
+ if ( client == NULL )
+ break;
client->live_migrating = 0;
if ( !list_empty(&client->persistent_invalidated_list) )
list_for_each_entry_safe(pgp,pgp2,

View File

@ -1,556 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365869 -7200
# Node ID ccd60ed6c555e1816cac448fcb20a84533977b43
# Parent f53c5aadbba9d389f4a7d83f308499e22d1d1eda
tmem: don't access guest memory without using the accessors intended for this
This is not permitted, not even for buffers coming from Dom0 (and it
would also break the moment Dom0 runs in HVM mode). An implication from
the changes here is that tmh_copy_page() can't be used anymore for
control operations calling tmh_copy_{from,to}_client() (as those pass
the buffer by virtual address rather than MFN).
Note that tmemc_save_get_next_page() previously didn't set the returned
handle's pool_id field, while the new code does. It need to be
confirmed that this is not a problem (otherwise the copy-out operation
will require further tmh_...() abstractions to be added).
Further note that the patch removes (rather than adjusts) an invalid
call to unmap_domain_page() (no matching map_domain_page()) from
tmh_compress_from_client() and adds a missing one to an error return
path in tmh_copy_from_client().
Finally note that the patch adds a previously missing return statement
to cli_get_page() (without which that function could de-reference a
NULL pointer, triggerable from guest mode).
This is part of XSA-15 / CVE-2012-3497.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -388,11 +388,13 @@ static NOINLINE int pcd_copy_to_client(t
pcd = pgp->pcd;
if ( pgp->size < PAGE_SIZE && pgp->size != 0 &&
pcd->size < PAGE_SIZE && pcd->size != 0 )
- ret = tmh_decompress_to_client(cmfn, pcd->cdata, pcd->size, NULL);
+ ret = tmh_decompress_to_client(cmfn, pcd->cdata, pcd->size,
+ tmh_cli_buf_null);
else if ( tmh_tze_enabled() && pcd->size < PAGE_SIZE )
ret = tmh_copy_tze_to_client(cmfn, pcd->tze, pcd->size);
else
- ret = tmh_copy_to_client(cmfn, pcd->pfp, 0, 0, PAGE_SIZE, NULL);
+ ret = tmh_copy_to_client(cmfn, pcd->pfp, 0, 0, PAGE_SIZE,
+ tmh_cli_buf_null);
tmem_read_unlock(&pcd_tree_rwlocks[firstbyte]);
return ret;
}
@@ -1444,7 +1446,7 @@ static inline void tmem_ensure_avail_pag
/************ TMEM CORE OPERATIONS ************************************/
static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn,
- void *cva)
+ tmem_cli_va_t clibuf)
{
void *dst, *p;
size_t size;
@@ -1463,7 +1465,7 @@ static NOINLINE int do_tmem_put_compress
if ( pgp->pfp != NULL )
pgp_free_data(pgp, pgp->us.obj->pool);
START_CYC_COUNTER(compress);
- ret = tmh_compress_from_client(cmfn, &dst, &size, cva);
+ ret = tmh_compress_from_client(cmfn, &dst, &size, clibuf);
if ( (ret == -EFAULT) || (ret == 0) )
goto out;
else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) {
@@ -1490,7 +1492,8 @@ out:
}
static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn,
- pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cva)
+ pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len,
+ tmem_cli_va_t clibuf)
{
pool_t *pool;
obj_t *obj;
@@ -1512,7 +1515,7 @@ static NOINLINE int do_tmem_dup_put(pgp_
/* can we successfully manipulate pgp to change out the data? */
if ( len != 0 && client->compress && pgp->size != 0 )
{
- ret = do_tmem_put_compress(pgp,cmfn,cva);
+ ret = do_tmem_put_compress(pgp, cmfn, clibuf);
if ( ret == 1 )
goto done;
else if ( ret == 0 )
@@ -1530,7 +1533,8 @@ copy_uncompressed:
goto failed_dup;
pgp->size = 0;
/* tmh_copy_from_client properly handles len==0 and offsets != 0 */
- ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,0);
+ ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
+ tmh_cli_buf_null);
if ( ret == -EFAULT )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
@@ -1582,7 +1586,7 @@ cleanup:
static NOINLINE int do_tmem_put(pool_t *pool,
OID *oidp, uint32_t index,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cva)
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t clibuf)
{
obj_t *obj = NULL, *objfound = NULL, *objnew = NULL;
pgp_t *pgp = NULL, *pgpdel = NULL;
@@ -1596,7 +1600,8 @@ static NOINLINE int do_tmem_put(pool_t *
{
ASSERT_SPINLOCK(&objfound->obj_spinlock);
if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL)
- return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len,cva);
+ return do_tmem_dup_put(pgp, cmfn, tmem_offset, pfn_offset, len,
+ clibuf);
}
/* no puts allowed into a frozen pool (except dup puts) */
@@ -1631,7 +1636,7 @@ static NOINLINE int do_tmem_put(pool_t *
if ( len != 0 && client->compress )
{
ASSERT(pgp->pfp == NULL);
- ret = do_tmem_put_compress(pgp,cmfn,cva);
+ ret = do_tmem_put_compress(pgp, cmfn, clibuf);
if ( ret == 1 )
goto insert_page;
if ( ret == -ENOMEM )
@@ -1655,7 +1660,8 @@ copy_uncompressed:
goto delete_and_free;
}
/* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */
- ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,cva);
+ ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
+ clibuf);
if ( ret == -EFAULT )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
@@ -1725,12 +1731,13 @@ free:
static NOINLINE int do_tmem_get(pool_t *pool, OID *oidp, uint32_t index,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cva)
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t clibuf)
{
obj_t *obj;
pgp_t *pgp;
client_t *client = pool->client;
DECL_LOCAL_CYC_COUNTER(decompress);
+ int rc = -EFAULT;
if ( !_atomic_read(pool->pgp_count) )
return -EEMPTY;
@@ -1755,16 +1762,18 @@ static NOINLINE int do_tmem_get(pool_t *
if ( tmh_dedup_enabled() && !is_persistent(pool) &&
pgp->firstbyte != NOT_SHAREABLE )
{
- if ( pcd_copy_to_client(cmfn, pgp) == -EFAULT )
+ rc = pcd_copy_to_client(cmfn, pgp);
+ if ( rc <= 0 )
goto bad_copy;
} else if ( pgp->size != 0 ) {
START_CYC_COUNTER(decompress);
- if ( tmh_decompress_to_client(cmfn, pgp->cdata,
- pgp->size, cva) == -EFAULT )
+ rc = tmh_decompress_to_client(cmfn, pgp->cdata,
+ pgp->size, clibuf);
+ if ( rc <= 0 )
goto bad_copy;
END_CYC_COUNTER(decompress);
} else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
- pfn_offset, len, cva) == -EFAULT)
+ pfn_offset, len, clibuf) == -EFAULT)
goto bad_copy;
if ( is_ephemeral(pool) )
{
@@ -1804,8 +1813,7 @@ static NOINLINE int do_tmem_get(pool_t *
bad_copy:
/* this should only happen if the client passed a bad mfn */
failed_copies++;
- return -EFAULT;
-
+ return rc;
}
static NOINLINE int do_tmem_flush_page(pool_t *pool, OID *oidp, uint32_t index)
@@ -2345,7 +2353,6 @@ static NOINLINE int tmemc_save_subop(int
pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
? NULL : client->pools[pool_id];
uint32_t p;
- uint64_t *uuid;
pgp_t *pgp, *pgp2;
int rc = -1;
@@ -2409,9 +2416,7 @@ static NOINLINE int tmemc_save_subop(int
case TMEMC_SAVE_GET_POOL_UUID:
if ( pool == NULL )
break;
- uuid = (uint64_t *)buf.p;
- *uuid++ = pool->uuid[0];
- *uuid = pool->uuid[1];
+ tmh_copy_to_client_buf(buf, pool->uuid, 2);
rc = 0;
case TMEMC_SAVE_END:
if ( client == NULL )
@@ -2436,7 +2441,7 @@ static NOINLINE int tmemc_save_get_next_
pgp_t *pgp;
OID oid;
int ret = 0;
- struct tmem_handle *h;
+ struct tmem_handle h;
unsigned int pagesize = 1 << (pool->pageshift+12);
if ( pool == NULL || is_ephemeral(pool) )
@@ -2467,11 +2472,13 @@ static NOINLINE int tmemc_save_get_next_
pgp_t,us.pool_pers_pages);
pool->cur_pgp = pgp;
oid = pgp->us.obj->oid;
- h = (struct tmem_handle *)buf.p;
- *(OID *)&h->oid[0] = oid;
- h->index = pgp->index;
- buf.p = (void *)(h+1);
- ret = do_tmem_get(pool, &oid, h->index,0,0,0,pagesize,buf.p);
+ h.pool_id = pool_id;
+ BUILD_BUG_ON(sizeof(h.oid) != sizeof(oid));
+ memcpy(h.oid, oid.oid, sizeof(h.oid));
+ h.index = pgp->index;
+ tmh_copy_to_client_buf(buf, &h, 1);
+ tmh_client_buf_add(buf, sizeof(h));
+ ret = do_tmem_get(pool, &oid, pgp->index, 0, 0, 0, pagesize, buf);
out:
tmem_spin_unlock(&pers_lists_spinlock);
@@ -2483,7 +2490,7 @@ static NOINLINE int tmemc_save_get_next_
{
client_t *client = tmh_client_from_cli_id(cli_id);
pgp_t *pgp;
- struct tmem_handle *h;
+ struct tmem_handle h;
int ret = 0;
if ( client == NULL )
@@ -2509,10 +2516,11 @@ static NOINLINE int tmemc_save_get_next_
pgp_t,client_inv_pages);
client->cur_pgp = pgp;
}
- h = (struct tmem_handle *)buf.p;
- h->pool_id = pgp->pool_id;
- *(OID *)&h->oid = pgp->inv_oid;
- h->index = pgp->index;
+ h.pool_id = pgp->pool_id;
+ BUILD_BUG_ON(sizeof(h.oid) != sizeof(pgp->inv_oid));
+ memcpy(h.oid, pgp->inv_oid.oid, sizeof(h.oid));
+ h.index = pgp->index;
+ tmh_copy_to_client_buf(buf, &h, 1);
ret = 1;
out:
tmem_spin_unlock(&pers_lists_spinlock);
@@ -2528,7 +2536,7 @@ static int tmemc_restore_put_page(int cl
if ( pool == NULL )
return -1;
- return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p);
+ return do_tmem_put(pool, oidp, index, 0, 0, 0, bufsize, buf);
}
static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id, OID *oidp,
@@ -2732,19 +2740,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
break;
case TMEM_NEW_PAGE:
tmem_ensure_avail_pages();
- rc = do_tmem_put(pool, oidp,
- op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, NULL);
+ rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn, 0, 0, 0,
+ tmh_cli_buf_null);
break;
case TMEM_PUT_PAGE:
tmem_ensure_avail_pages();
- rc = do_tmem_put(pool, oidp,
- op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, NULL);
+ rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn, 0, 0,
+ PAGE_SIZE, tmh_cli_buf_null);
if (rc == 1) succ_put = 1;
else non_succ_put = 1;
break;
case TMEM_GET_PAGE:
rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
- 0, 0, PAGE_SIZE, 0);
+ 0, 0, PAGE_SIZE, tmh_cli_buf_null);
if (rc == 1) succ_get = 1;
else non_succ_get = 1;
break;
@@ -2763,13 +2771,13 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
case TMEM_READ:
rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
op.u.gen.tmem_offset, op.u.gen.pfn_offset,
- op.u.gen.len,0);
+ op.u.gen.len, tmh_cli_buf_null);
break;
case TMEM_WRITE:
rc = do_tmem_put(pool, oidp,
op.u.gen.index, op.u.gen.cmfn,
op.u.gen.tmem_offset, op.u.gen.pfn_offset,
- op.u.gen.len, NULL);
+ op.u.gen.len, tmh_cli_buf_null);
break;
case TMEM_XCHG:
/* need to hold global lock to ensure xchg is atomic */
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -51,6 +51,7 @@ DECL_CYC_COUNTER(pg_copy);
#define LZO_DSTMEM_PAGES 2
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, workmem);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, dstmem);
+static DEFINE_PER_CPU_READ_MOSTLY(void *, scratch_page);
#ifdef COMPARE_COPY_PAGE_SSE2
#include <asm/flushtlb.h> /* REMOVE ME AFTER TEST */
@@ -115,6 +116,7 @@ static inline void *cli_get_page(tmem_cl
{
if ( page )
put_page(page);
+ return NULL;
}
if ( cli_write && !get_page_type(page, PGT_writable_page) )
@@ -144,12 +146,12 @@ static inline void cli_put_page(tmem_cli
EXPORT int tmh_copy_from_client(pfp_t *pfp,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cli_va)
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t clibuf)
{
unsigned long tmem_mfn, cli_mfn = 0;
- void *tmem_va;
+ char *tmem_va, *cli_va = NULL;
pfp_t *cli_pfp = NULL;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
+ int rc = 1;
ASSERT(pfp != NULL);
tmem_mfn = page_to_mfn(pfp);
@@ -160,62 +162,76 @@ EXPORT int tmh_copy_from_client(pfp_t *p
unmap_domain_page(tmem_va);
return 1;
}
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
if ( cli_va == NULL )
+ {
+ unmap_domain_page(tmem_va);
return -EFAULT;
+ }
}
mb();
- if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
+ if ( len == PAGE_SIZE && !tmem_offset && !pfn_offset && cli_va )
tmh_copy_page(tmem_va, cli_va);
else if ( (tmem_offset+len <= PAGE_SIZE) &&
(pfn_offset+len <= PAGE_SIZE) )
- memcpy((char *)tmem_va+tmem_offset,(char *)cli_va+pfn_offset,len);
- if ( !tmemc )
+ {
+ if ( cli_va )
+ memcpy(tmem_va + tmem_offset, cli_va + pfn_offset, len);
+ else if ( copy_from_guest_offset(tmem_va + tmem_offset, clibuf,
+ pfn_offset, len) )
+ rc = -EFAULT;
+ }
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
- return 1;
+ return rc;
}
EXPORT int tmh_compress_from_client(tmem_cli_mfn_t cmfn,
- void **out_va, size_t *out_len, void *cli_va)
+ void **out_va, size_t *out_len, tmem_cli_va_t clibuf)
{
int ret = 0;
unsigned char *dmem = this_cpu(dstmem);
unsigned char *wmem = this_cpu(workmem);
+ char *scratch = this_cpu(scratch_page);
pfp_t *cli_pfp = NULL;
unsigned long cli_mfn = 0;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
+ void *cli_va = NULL;
if ( dmem == NULL || wmem == NULL )
return 0; /* no buffer, so can't compress */
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
if ( cli_va == NULL )
return -EFAULT;
}
+ else if ( !scratch )
+ return 0;
+ else if ( copy_from_guest(scratch, clibuf, PAGE_SIZE) )
+ return -EFAULT;
mb();
- ret = lzo1x_1_compress(cli_va, PAGE_SIZE, dmem, out_len, wmem);
+ ret = lzo1x_1_compress(cli_va ?: scratch, PAGE_SIZE, dmem, out_len, wmem);
ASSERT(ret == LZO_E_OK);
*out_va = dmem;
- if ( !tmemc )
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
- unmap_domain_page(cli_va);
return 1;
}
EXPORT int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
- pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cli_va)
+ pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len,
+ tmem_cli_va_t clibuf)
{
unsigned long tmem_mfn, cli_mfn = 0;
- void *tmem_va;
+ char *tmem_va, *cli_va = NULL;
pfp_t *cli_pfp = NULL;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
+ int rc = 1;
ASSERT(pfp != NULL);
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
if ( cli_va == NULL )
@@ -223,37 +239,48 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
}
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
- if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
+ if ( len == PAGE_SIZE && !tmem_offset && !pfn_offset && cli_va )
tmh_copy_page(cli_va, tmem_va);
else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) )
- memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len);
+ {
+ if ( cli_va )
+ memcpy(cli_va + pfn_offset, tmem_va + tmem_offset, len);
+ else if ( copy_to_guest_offset(clibuf, pfn_offset,
+ tmem_va + tmem_offset, len) )
+ rc = -EFAULT;
+ }
unmap_domain_page(tmem_va);
- if ( !tmemc )
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
mb();
- return 1;
+ return rc;
}
EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va,
- size_t size, void *cli_va)
+ size_t size, tmem_cli_va_t clibuf)
{
unsigned long cli_mfn = 0;
pfp_t *cli_pfp = NULL;
+ void *cli_va = NULL;
+ char *scratch = this_cpu(scratch_page);
size_t out_len = PAGE_SIZE;
- bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
int ret;
- if ( !tmemc )
+ if ( guest_handle_is_null(clibuf) )
{
cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
if ( cli_va == NULL )
return -EFAULT;
}
- ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len);
+ else if ( !scratch )
+ return 0;
+ ret = lzo1x_decompress_safe(tmem_va, size, cli_va ?: scratch, &out_len);
ASSERT(ret == LZO_E_OK);
ASSERT(out_len == PAGE_SIZE);
- if ( !tmemc )
+ if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ else if ( copy_to_guest(clibuf, scratch, PAGE_SIZE) )
+ return -EFAULT;
mb();
return 1;
}
@@ -423,6 +450,11 @@ static int cpu_callback(
struct page_info *p = alloc_domheap_pages(0, workmem_order, 0);
per_cpu(workmem, cpu) = p ? page_to_virt(p) : NULL;
}
+ if ( per_cpu(scratch_page, cpu) == NULL )
+ {
+ struct page_info *p = alloc_domheap_page(NULL, 0);
+ per_cpu(scratch_page, cpu) = p ? page_to_virt(p) : NULL;
+ }
break;
}
case CPU_DEAD:
@@ -439,6 +471,11 @@ static int cpu_callback(
free_domheap_pages(p, workmem_order);
per_cpu(workmem, cpu) = NULL;
}
+ if ( per_cpu(scratch_page, cpu) != NULL )
+ {
+ free_domheap_page(virt_to_page(per_cpu(scratch_page, cpu)));
+ per_cpu(scratch_page, cpu) = NULL;
+ }
break;
}
default:
--- a/xen/include/xen/tmem_xen.h
+++ b/xen/include/xen/tmem_xen.h
@@ -482,27 +482,33 @@ static inline int tmh_get_tmemop_from_cl
return copy_from_guest(op, uops, 1);
}
+#define tmh_cli_buf_null guest_handle_from_ptr(NULL, char)
+
static inline void tmh_copy_to_client_buf_offset(tmem_cli_va_t clibuf, int off,
char *tmembuf, int len)
{
copy_to_guest_offset(clibuf,off,tmembuf,len);
}
+#define tmh_copy_to_client_buf(clibuf, tmembuf, cnt) \
+ copy_to_guest(guest_handle_cast(clibuf, void), tmembuf, cnt)
+
+#define tmh_client_buf_add guest_handle_add_offset
+
#define TMH_CLI_ID_NULL ((cli_id_t)((domid_t)-1L))
#define tmh_cli_id_str "domid"
#define tmh_client_str "domain"
-extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t,void*);
+int tmh_decompress_to_client(tmem_cli_mfn_t, void *, size_t, tmem_cli_va_t);
-extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *,void*);
+int tmh_compress_from_client(tmem_cli_mfn_t, void **, size_t *, tmem_cli_va_t);
-extern int tmh_copy_from_client(pfp_t *pfp,
- tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
- pagesize_t pfn_offset, pagesize_t len, void *cva);
+int tmh_copy_from_client(pfp_t *, tmem_cli_mfn_t, pagesize_t tmem_offset,
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t);
-extern int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
- pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cva);
+int tmh_copy_to_client(tmem_cli_mfn_t, pfp_t *, pagesize_t tmem_offset,
+ pagesize_t pfn_offset, pagesize_t len, tmem_cli_va_t);
extern int tmh_copy_tze_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, pagesize_t len);

View File

@ -1,137 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365879 -7200
# Node ID 33b8c42a87ec2fa6e6533dd9ee7603f732b168f5
# Parent ccd60ed6c555e1816cac448fcb20a84533977b43
tmem: detect arithmetic overflow in tmh_copy_{from,to}_client()
This implies adjusting callers to deal with errors other than -EFAULT
and removing some comments which would otherwise become stale.
Reported-by: Tim Deegan <tim@xen.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1535,7 +1535,7 @@ copy_uncompressed:
/* tmh_copy_from_client properly handles len==0 and offsets != 0 */
ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
tmh_cli_buf_null);
- if ( ret == -EFAULT )
+ if ( ret < 0 )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
{
@@ -1556,9 +1556,7 @@ done:
return 1;
bad_copy:
- /* this should only happen if the client passed a bad mfn */
failed_copies++;
- ret = -EFAULT;
goto cleanup;
failed_dup:
@@ -1662,7 +1660,7 @@ copy_uncompressed:
/* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */
ret = tmh_copy_from_client(pgp->pfp, cmfn, tmem_offset, pfn_offset, len,
clibuf);
- if ( ret == -EFAULT )
+ if ( ret < 0 )
goto bad_copy;
if ( tmh_dedup_enabled() && !is_persistent(pool) )
{
@@ -1702,8 +1700,6 @@ insert_page:
return 1;
bad_copy:
- /* this should only happen if the client passed a bad mfn */
- ret = -EFAULT;
failed_copies++;
delete_and_free:
@@ -1737,7 +1733,7 @@ static NOINLINE int do_tmem_get(pool_t *
pgp_t *pgp;
client_t *client = pool->client;
DECL_LOCAL_CYC_COUNTER(decompress);
- int rc = -EFAULT;
+ int rc;
if ( !_atomic_read(pool->pgp_count) )
return -EEMPTY;
@@ -1761,20 +1757,20 @@ static NOINLINE int do_tmem_get(pool_t *
ASSERT(pgp->size != -1);
if ( tmh_dedup_enabled() && !is_persistent(pool) &&
pgp->firstbyte != NOT_SHAREABLE )
- {
rc = pcd_copy_to_client(cmfn, pgp);
- if ( rc <= 0 )
- goto bad_copy;
- } else if ( pgp->size != 0 ) {
+ else if ( pgp->size != 0 )
+ {
START_CYC_COUNTER(decompress);
rc = tmh_decompress_to_client(cmfn, pgp->cdata,
pgp->size, clibuf);
- if ( rc <= 0 )
- goto bad_copy;
END_CYC_COUNTER(decompress);
- } else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
- pfn_offset, len, clibuf) == -EFAULT)
+ }
+ else
+ rc = tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset,
+ pfn_offset, len, clibuf);
+ if ( rc <= 0 )
goto bad_copy;
+
if ( is_ephemeral(pool) )
{
if ( is_private(pool) )
@@ -1811,7 +1807,6 @@ static NOINLINE int do_tmem_get(pool_t *
return 1;
bad_copy:
- /* this should only happen if the client passed a bad mfn */
failed_copies++;
return rc;
}
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -153,6 +153,8 @@ EXPORT int tmh_copy_from_client(pfp_t *p
pfp_t *cli_pfp = NULL;
int rc = 1;
+ if ( tmem_offset > PAGE_SIZE || pfn_offset > PAGE_SIZE || len > PAGE_SIZE )
+ return -EINVAL;
ASSERT(pfp != NULL);
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
@@ -183,6 +185,8 @@ EXPORT int tmh_copy_from_client(pfp_t *p
pfn_offset, len) )
rc = -EFAULT;
}
+ else if ( len )
+ rc = -EINVAL;
if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
@@ -230,6 +234,8 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
pfp_t *cli_pfp = NULL;
int rc = 1;
+ if ( tmem_offset > PAGE_SIZE || pfn_offset > PAGE_SIZE || len > PAGE_SIZE )
+ return -EINVAL;
ASSERT(pfp != NULL);
if ( guest_handle_is_null(clibuf) )
{
@@ -249,6 +255,8 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
tmem_va + tmem_offset, len) )
rc = -EFAULT;
}
+ else if ( len )
+ rc = -EINVAL;
unmap_domain_page(tmem_va);
if ( cli_va )
cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);

View File

@ -1,32 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365888 -7200
# Node ID 83b97a59888b6d2d0f984b8403bd5764dd55c10c
# Parent 33b8c42a87ec2fa6e6533dd9ee7603f732b168f5
tmem: properly drop lock on error path in do_tmem_get()
Also remove a bogus assertion.
Reported-by: Tim Deegan <tim@xen.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1790,7 +1790,6 @@ static NOINLINE int do_tmem_get(pool_t *
list_del(&pgp->us.client_eph_pages);
list_add_tail(&pgp->us.client_eph_pages,&client->ephemeral_page_list);
tmem_spin_unlock(&eph_lists_spinlock);
- ASSERT(obj != NULL);
obj->last_client = tmh_get_cli_id_from_current();
}
}
@@ -1807,6 +1806,8 @@ static NOINLINE int do_tmem_get(pool_t *
return 1;
bad_copy:
+ obj->no_evict = 0;
+ tmem_spin_unlock(&obj->obj_spinlock);
failed_copies++;
return rc;
}

View File

@ -1,34 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365906 -7200
# Node ID 109ea6a0c23aa0c5df79e3f5658162aed959ffcf
# Parent 83b97a59888b6d2d0f984b8403bd5764dd55c10c
tmem: properly drop lock on error path in do_tmem_op()
Reported-by: Tim Deegan <tim@xen.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2659,13 +2659,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
if ( client != NULL && tmh_client_is_dying(client) )
{
rc = -ENODEV;
- goto out;
+ if ( tmh_lock_all )
+ goto out;
+ simple_error:
+ errored_tmem_ops++;
+ return rc;
}
if ( unlikely(tmh_get_tmemop_from_client(&op, uops) != 0) )
{
printk("tmem: can't get tmem struct from %s\n",client_str);
rc = -EFAULT;
+ if ( !tmh_lock_all )
+ goto simple_error;
goto out;
}

View File

@ -1,305 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365916 -7200
# Node ID 0520982a602a3ac06dd5bc573ddaff5edc9c6987
# Parent 109ea6a0c23aa0c5df79e3f5658162aed959ffcf
tmem: reduce severity of log messages
Otherwise they can be used by a guest to spam the hypervisor log with
all settings at their defaults.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1107,7 +1107,7 @@ static int shared_pool_join(pool_t *pool
sl->client = new_client;
list_add_tail(&sl->share_list, &pool->share_list);
if ( new_client->cli_id != pool->client->cli_id )
- printk("adding new %s %d to shared pool owned by %s %d\n",
+ tmh_client_info("adding new %s %d to shared pool owned by %s %d\n",
client_str, new_client->cli_id, client_str, pool->client->cli_id);
return ++pool->shared_count;
}
@@ -1137,7 +1137,7 @@ static NOINLINE void shared_pool_reassig
old_client->eph_count -= _atomic_read(pool->pgp_count);
list_splice_init(&old_client->ephemeral_page_list,
&new_client->ephemeral_page_list);
- printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
+ tmh_client_info("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id, poolid);
pool->pool_id = poolid;
}
@@ -1173,7 +1173,7 @@ static NOINLINE int shared_pool_quit(poo
}
return 0;
}
- printk("tmem: no match unsharing pool, %s=%d\n",
+ tmh_client_warn("tmem: no match unsharing pool, %s=%d\n",
cli_id_str,pool->client->cli_id);
return -1;
}
@@ -1184,17 +1184,18 @@ static void pool_flush(pool_t *pool, cli
ASSERT(pool != NULL);
if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) )
{
- printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
+ tmh_client_warn("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id);
return;
}
- printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing",
- is_persistent(pool) ? "persistent" : "ephemeral" ,
- is_shared(pool) ? "shared" : "private");
- printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id);
+ tmh_client_info("%s %s-%s tmem pool %s=%d pool_id=%d\n",
+ destroy ? "destroying" : "flushing",
+ is_persistent(pool) ? "persistent" : "ephemeral" ,
+ is_shared(pool) ? "shared" : "private",
+ cli_id_str, pool->client->cli_id, pool->pool_id);
if ( pool->client->live_migrating )
{
- printk("can't %s pool while %s is live-migrating\n",
+ tmh_client_warn("can't %s pool while %s is live-migrating\n",
destroy?"destroy":"flush", client_str);
return;
}
@@ -1213,21 +1214,22 @@ static client_t *client_create(cli_id_t
client_t *client = tmh_alloc_infra(sizeof(client_t),__alignof__(client_t));
int i;
- printk("tmem: initializing tmem capability for %s=%d...",cli_id_str,cli_id);
+ tmh_client_info("tmem: initializing tmem capability for %s=%d...",
+ cli_id_str, cli_id);
if ( client == NULL )
{
- printk("failed... out of memory\n");
+ tmh_client_err("failed... out of memory\n");
goto fail;
}
memset(client,0,sizeof(client_t));
if ( (client->tmh = tmh_client_init(cli_id)) == NULL )
{
- printk("failed... can't allocate host-dependent part of client\n");
+ tmh_client_err("failed... can't allocate host-dependent part of client\n");
goto fail;
}
if ( !tmh_set_client_from_id(client, client->tmh, cli_id) )
{
- printk("failed... can't set client\n");
+ tmh_client_err("failed... can't set client\n");
goto fail;
}
client->cli_id = cli_id;
@@ -1249,7 +1251,7 @@ static client_t *client_create(cli_id_t
client->eph_count = client->eph_count_max = 0;
client->total_cycles = 0; client->succ_pers_puts = 0;
client->succ_eph_gets = 0; client->succ_pers_gets = 0;
- printk("ok\n");
+ tmh_client_info("ok\n");
return client;
fail:
@@ -1903,32 +1905,33 @@ static NOINLINE int do_tmem_new_pool(cli
cli_id = tmh_get_cli_id_from_current();
else
cli_id = this_cli_id;
- printk("tmem: allocating %s-%s tmem pool for %s=%d...",
+ tmh_client_info("tmem: allocating %s-%s tmem pool for %s=%d...",
persistent ? "persistent" : "ephemeral" ,
shared ? "shared" : "private", cli_id_str, cli_id);
if ( specversion != TMEM_SPEC_VERSION )
{
- printk("failed... unsupported spec version\n");
+ tmh_client_err("failed... unsupported spec version\n");
return -EPERM;
}
if ( pagebits != (PAGE_SHIFT - 12) )
{
- printk("failed... unsupported pagesize %d\n",1<<(pagebits+12));
+ tmh_client_err("failed... unsupported pagesize %d\n",
+ 1 << (pagebits + 12));
return -EPERM;
}
if ( flags & TMEM_POOL_PRECOMPRESSED )
{
- printk("failed... precompression flag set but unsupported\n");
+ tmh_client_err("failed... precompression flag set but unsupported\n");
return -EPERM;
}
if ( flags & TMEM_POOL_RESERVED_BITS )
{
- printk("failed... reserved bits must be zero\n");
+ tmh_client_err("failed... reserved bits must be zero\n");
return -EPERM;
}
if ( (pool = pool_alloc()) == NULL )
{
- printk("failed... out of memory\n");
+ tmh_client_err("failed... out of memory\n");
return -ENOMEM;
}
if ( this_cli_id != CLI_ID_NULL )
@@ -1947,7 +1950,7 @@ static NOINLINE int do_tmem_new_pool(cli
break;
if ( d_poolid >= MAX_POOLS_PER_DOMAIN )
{
- printk("failed... no more pool slots available for this %s\n",
+ tmh_client_err("failed... no more pool slots available for this %s\n",
client_str);
goto fail;
}
@@ -1977,9 +1980,8 @@ static NOINLINE int do_tmem_new_pool(cli
{
if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi )
{
- printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ",
- uuid_hi, uuid_lo);
- printk("pool_id=%d\n",d_poolid);
+ tmh_client_info("(matches shared pool uuid=%"PRIx64".%"PRIx64") pool_id=%d\n",
+ uuid_hi, uuid_lo, d_poolid);
client->pools[d_poolid] = global_shared_pools[s_poolid];
shared_pool_join(global_shared_pools[s_poolid], client);
pool_free(pool);
@@ -1991,7 +1993,7 @@ static NOINLINE int do_tmem_new_pool(cli
}
if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS )
{
- printk("tmem: failed... no global shared pool slots available\n");
+ tmh_client_warn("tmem: failed... no global shared pool slots available\n");
goto fail;
}
else
@@ -2007,7 +2009,7 @@ static NOINLINE int do_tmem_new_pool(cli
pool->pool_id = d_poolid;
pool->persistent = persistent;
pool->uuid[0] = uuid_lo; pool->uuid[1] = uuid_hi;
- printk("pool_id=%d\n",d_poolid);
+ tmh_client_info("pool_id=%d\n", d_poolid);
return d_poolid;
fail:
@@ -2030,14 +2032,15 @@ static int tmemc_freeze_pools(cli_id_t c
{
list_for_each_entry(client,&global_client_list,client_list)
client_freeze(client,freeze);
- printk("tmem: all pools %s for all %ss\n",s,client_str);
+ tmh_client_info("tmem: all pools %s for all %ss\n", s, client_str);
}
else
{
if ( (client = tmh_client_from_cli_id(cli_id)) == NULL)
return -1;
client_freeze(client,freeze);
- printk("tmem: all pools %s for %s=%d\n",s,cli_id_str,cli_id);
+ tmh_client_info("tmem: all pools %s for %s=%d\n",
+ s, cli_id_str, cli_id);
}
return 0;
}
@@ -2048,7 +2051,7 @@ static int tmemc_flush_mem(cli_id_t cli_
if ( cli_id != CLI_ID_NULL )
{
- printk("tmem: %s-specific flush not supported yet, use --all\n",
+ tmh_client_warn("tmem: %s-specific flush not supported yet, use --all\n",
client_str);
return -1;
}
@@ -2261,13 +2264,15 @@ static int tmemc_set_var_one(client_t *c
case TMEMC_SET_WEIGHT:
old_weight = client->weight;
client->weight = arg1;
- printk("tmem: weight set to %d for %s=%d\n",arg1,cli_id_str,cli_id);
+ tmh_client_info("tmem: weight set to %d for %s=%d\n",
+ arg1, cli_id_str, cli_id);
atomic_sub(old_weight,&client_weight_total);
atomic_add(client->weight,&client_weight_total);
break;
case TMEMC_SET_CAP:
client->cap = arg1;
- printk("tmem: cap set to %d for %s=%d\n",arg1,cli_id_str,cli_id);
+ tmh_client_info("tmem: cap set to %d for %s=%d\n",
+ arg1, cli_id_str, cli_id);
break;
case TMEMC_SET_COMPRESS:
#ifdef __i386__
@@ -2275,17 +2280,17 @@ static int tmemc_set_var_one(client_t *c
#endif
if ( tmh_dedup_enabled() )
{
- printk("tmem: compression %s for all %ss, cannot be changed "
- "when tmem_dedup is enabled\n",
- tmh_compression_enabled() ? "enabled" : "disabled",client_str);
+ tmh_client_warn("tmem: compression %s for all %ss, cannot be changed when tmem_dedup is enabled\n",
+ tmh_compression_enabled() ? "enabled" : "disabled",
+ client_str);
return -1;
}
client->compress = arg1 ? 1 : 0;
- printk("tmem: compression %s for %s=%d\n",
+ tmh_client_info("tmem: compression %s for %s=%d\n",
arg1 ? "enabled" : "disabled",cli_id_str,cli_id);
break;
default:
- printk("tmem: unknown subop %d for tmemc_set_var\n",subop);
+ tmh_client_warn("tmem: unknown subop %d for tmemc_set_var\n", subop);
return -1;
}
return 0;
@@ -2668,7 +2673,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
if ( unlikely(tmh_get_tmemop_from_client(&op, uops) != 0) )
{
- printk("tmem: can't get tmem struct from %s\n",client_str);
+ tmh_client_err("tmem: can't get tmem struct from %s\n", client_str);
rc = -EFAULT;
if ( !tmh_lock_all )
goto simple_error;
@@ -2702,7 +2707,8 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
tmem_write_lock_set = 1;
if ( (client = client_create(tmh_get_cli_id_from_current())) == NULL )
{
- printk("tmem: can't create tmem structure for %s\n",client_str);
+ tmh_client_err("tmem: can't create tmem structure for %s\n",
+ client_str);
rc = -ENOMEM;
goto out;
}
@@ -2726,8 +2732,8 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
if ( ((uint32_t)op.pool_id >= MAX_POOLS_PER_DOMAIN) ||
((pool = client->pools[op.pool_id]) == NULL) )
{
+ tmh_client_err("tmem: operation requested on uncreated pool\n");
rc = -ENODEV;
- printk("tmem: operation requested on uncreated pool\n");
goto out;
}
ASSERT_SENTINEL(pool,POOL);
@@ -2783,11 +2789,11 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
break;
case TMEM_XCHG:
/* need to hold global lock to ensure xchg is atomic */
- printk("tmem_xchg op not implemented yet\n");
+ tmh_client_warn("tmem_xchg op not implemented yet\n");
rc = 0;
break;
default:
- printk("tmem: op %d not implemented\n", op.cmd);
+ tmh_client_warn("tmem: op %d not implemented\n", op.cmd);
rc = 0;
break;
}
--- a/xen/include/xen/tmem_xen.h
+++ b/xen/include/xen/tmem_xen.h
@@ -512,6 +512,9 @@ int tmh_copy_to_client(tmem_cli_mfn_t, p
extern int tmh_copy_tze_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, pagesize_t len);
+#define tmh_client_err(fmt, args...) printk(XENLOG_G_ERR fmt, ##args)
+#define tmh_client_warn(fmt, args...) printk(XENLOG_G_WARNING fmt, ##args)
+#define tmh_client_info(fmt, args...) printk(XENLOG_G_INFO fmt, ##args)
#define TMEM_PERF
#ifdef TMEM_PERF

View File

@ -1,41 +0,0 @@
# HG changeset patch
# User Dan Magenheimer <dan.magenheimer@oracle.com>
# Date 1347365943 -7200
# Node ID 16e0392c6594b1757bbaa82076630a73d843229b
# Parent 0520982a602a3ac06dd5bc573ddaff5edc9c6987
tmem: fixup 2010 cleanup patch that breaks tmem save/restore
20918:a3fa6d444b25 "Fix domain reference leaks" (in Feb 2010, by Jan)
does some cleanup in addition to the leak fixes. Unfortunately, that
cleanup inadvertently resulted in an incorrect fallthrough in a switch
statement which breaks tmem save/restore.
That broken patch was apparently applied to 4.0-testing and 4.1-testing
so those are broken as well.
What is the process now for requesting back-patches to 4.0 and 4.1?
(Side note: This does not by itself entirely fix save/restore in 4.2.)
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2419,6 +2419,7 @@ static NOINLINE int tmemc_save_subop(int
break;
tmh_copy_to_client_buf(buf, pool->uuid, 2);
rc = 0;
+ break;
case TMEMC_SAVE_END:
if ( client == NULL )
break;
@@ -2429,6 +2430,7 @@ static NOINLINE int tmemc_save_subop(int
pgp_free_from_inv_list(client,pgp);
client->frozen = client->was_frozen;
rc = 0;
+ break;
}
return rc;
}

View File

@ -1,106 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1347365969 -7200
# Node ID e4cb8411161043c726f699252cc761e77853e820
# Parent 16e0392c6594b1757bbaa82076630a73d843229b
tmem: cleanup
- one more case of checking for a specific rather than any error
- drop no longer needed first parameter from cli_put_page()
- drop a redundant cast
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -1468,7 +1468,7 @@ static NOINLINE int do_tmem_put_compress
pgp_free_data(pgp, pgp->us.obj->pool);
START_CYC_COUNTER(compress);
ret = tmh_compress_from_client(cmfn, &dst, &size, clibuf);
- if ( (ret == -EFAULT) || (ret == 0) )
+ if ( ret <= 0 )
goto out;
else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) {
ret = 0;
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -97,7 +97,7 @@ static inline void *cli_get_page(tmem_cl
return NULL;
}
-static inline void cli_put_page(tmem_cli_mfn_t cmfn, void *cli_va, pfp_t *cli_pfp,
+static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp,
unsigned long cli_mfn, bool_t mark_dirty)
{
ASSERT(0);
@@ -126,20 +126,20 @@ static inline void *cli_get_page(tmem_cl
}
*pcli_mfn = page_to_mfn(page);
- *pcli_pfp = (pfp_t *)page;
+ *pcli_pfp = page;
return map_domain_page(*pcli_mfn);
}
-static inline void cli_put_page(tmem_cli_mfn_t cmfn, void *cli_va, pfp_t *cli_pfp,
+static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp,
unsigned long cli_mfn, bool_t mark_dirty)
{
if ( mark_dirty )
{
- put_page_and_type((struct page_info *)cli_pfp);
+ put_page_and_type(cli_pfp);
paging_mark_dirty(current->domain,cli_mfn);
}
else
- put_page((struct page_info *)cli_pfp);
+ put_page(cli_pfp);
unmap_domain_page(cli_va);
}
#endif
@@ -188,7 +188,7 @@ EXPORT int tmh_copy_from_client(pfp_t *p
else if ( len )
rc = -EINVAL;
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
return rc;
}
@@ -221,7 +221,7 @@ EXPORT int tmh_compress_from_client(tmem
ASSERT(ret == LZO_E_OK);
*out_va = dmem;
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
return 1;
}
@@ -259,7 +259,7 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
rc = -EINVAL;
unmap_domain_page(tmem_va);
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return rc;
}
@@ -286,7 +286,7 @@ EXPORT int tmh_decompress_to_client(tmem
ASSERT(ret == LZO_E_OK);
ASSERT(out_len == PAGE_SIZE);
if ( cli_va )
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
else if ( copy_to_guest(clibuf, scratch, PAGE_SIZE) )
return -EFAULT;
mb();
@@ -310,7 +310,7 @@ EXPORT int tmh_copy_tze_to_client(tmem_c
memcpy((char *)cli_va,(char *)tmem_va,len);
if ( len < PAGE_SIZE )
memset((char *)cli_va+len,0,PAGE_SIZE-len);
- cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return 1;
}

View File

@ -12,8 +12,10 @@ the fixmaps together with other boot time page table construction.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
Index: xen-4.2.0-testing/xen/arch/x86/boot/head.S
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/boot/head.S
+++ xen-4.2.0-testing/xen/arch/x86/boot/head.S
@@ -3,6 +3,7 @@
#include <public/xen.h>
#include <asm/asm_defns.h>
@ -55,8 +57,10 @@ Acked-by: Keir Fraser <keir@xen.org>
#endif
/* Initialize 4kB mappings of first 2MB or 4MB of memory. */
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
Index: xen-4.2.0-testing/xen/arch/x86/efi/boot.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/efi/boot.c
+++ xen-4.2.0-testing/xen/arch/x86/efi/boot.c
@@ -17,6 +17,9 @@
#include <xen/vga.h>
#include <asm/e820.h>
@ -88,8 +92,10 @@ Acked-by: Keir Fraser <keir@xen.org>
/* Initialise L3 boot-map page directory entries. */
l3_bootmap[l3_table_offset(xen_phys_start)] =
l3e_from_paddr((UINTN)l2_bootmap, __PAGE_HYPERVISOR);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
Index: xen-4.2.0-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/mm.c
+++ xen-4.2.0-testing/xen/arch/x86/mm.c
@@ -130,6 +130,10 @@
l1_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l1_identmap[L1_PAGETABLE_ENTRIES];
@ -101,8 +107,10 @@ Acked-by: Keir Fraser <keir@xen.org>
#define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
/*
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
Index: xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
@@ -65,6 +65,10 @@ l3_pgentry_t __attribute__ ((__section__
l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l2_xenmap[L2_PAGETABLE_ENTRIES];
@ -114,9 +122,11 @@ Acked-by: Keir Fraser <keir@xen.org>
/* Enough page directories to map into the bottom 1GB. */
l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
l3_bootmap[L3_PAGETABLE_ENTRIES];
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -315,7 +315,7 @@ extern unsigned char boot_edid_info[128]
Index: xen-4.2.0-testing/xen/include/asm-x86/config.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/config.h
+++ xen-4.2.0-testing/xen/include/asm-x86/config.h
@@ -317,7 +317,7 @@ extern unsigned char boot_edid_info[128]
#define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */
#define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)
@ -125,8 +135,10 @@ Acked-by: Keir Fraser <keir@xen.org>
#define IOREMAP_VIRT_START (IOREMAP_VIRT_END - (IOREMAP_MBYTES<<20))
#define DIRECTMAP_VIRT_END IOREMAP_VIRT_START
#define DIRECTMAP_VIRT_START (DIRECTMAP_VIRT_END - (DIRECTMAP_MBYTES<<20))
--- a/xen/include/asm-x86/fixmap.h
+++ b/xen/include/asm-x86/fixmap.h
Index: xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/fixmap.h
+++ xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
@@ -13,12 +13,17 @@
#define _ASM_FIXMAP_H
@ -161,8 +173,10 @@ Acked-by: Keir Fraser <keir@xen.org>
+#endif /* __ASSEMBLY__ */
+
#endif
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
Index: xen-4.2.0-testing/xen/include/asm-x86/page.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/page.h
+++ xen-4.2.0-testing/xen/include/asm-x86/page.h
@@ -1,6 +1,8 @@
#ifndef __X86_PAGE_H__
#define __X86_PAGE_H__
@ -189,8 +203,10 @@ Acked-by: Keir Fraser <keir@xen.org>
void paging_init(void);
void setup_idle_pagetable(void);
#endif /* !defined(__ASSEMBLY__) */
Index: xen-4.2.0-testing/xen/include/xen/const.h
===================================================================
--- /dev/null
+++ b/xen/include/xen/const.h
+++ xen-4.2.0-testing/xen/include/xen/const.h
@@ -0,0 +1,24 @@
+/* const.h: Macros for dealing with constants. */
+

View File

@ -0,0 +1,130 @@
References: FATE#313605
# HG changeset patch
# User Jiongxi Li <jiongxi.li@intel.com>
# Date 1347912248 -3600
# Node ID ec60de627945f17ec2ce5c14e1224b59403875f7
# Parent 62de66cec48a1716bb700912da451a26296b8d1e
xen: enable APIC-Register Virtualization
Add APIC register virtualization support
- APIC read doesn't cause VM-Exit
- APIC write becomes trap-like
Signed-off-by: Gang Wei <gang.wei@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -823,6 +823,14 @@ static int vlapic_write(struct vcpu *v,
return rc;
}
+int vlapic_apicv_write(struct vcpu *v, unsigned int offset)
+{
+ uint32_t val = vlapic_get_reg(vcpu_vlapic(v), offset);
+
+ vlapic_reg_write(v, offset, val);
+ return 0;
+}
+
int hvm_x2apic_msr_write(struct vcpu *v, unsigned int msr, uint64_t msr_content)
{
struct vlapic *vlapic = vcpu_vlapic(v);
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -89,6 +89,7 @@ static void __init vmx_display_features(
P(cpu_has_vmx_vnmi, "Virtual NMI");
P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
+ P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
#undef P
if ( !printed )
@@ -186,6 +187,14 @@ static int vmx_init_vmcs_config(void)
if ( opt_unrestricted_guest_enabled )
opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ /*
+ * "APIC Register Virtualization"
+ * can be set only when "use TPR shadow" is set
+ */
+ if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
+ opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT;
+
+
_vmx_secondary_exec_control = adjust_vmx_controls(
"Secondary Exec Control", min, opt,
MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2274,6 +2274,16 @@ static void vmx_idtv_reinject(unsigned l
}
}
+static int vmx_handle_apic_write(void)
+{
+ unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION);
+ unsigned int offset = exit_qualification & 0xfff;
+
+ ASSERT(cpu_has_vmx_apic_reg_virt);
+
+ return vlapic_apicv_write(current, offset);
+}
+
void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
@@ -2729,6 +2739,11 @@ void vmx_vmexit_handler(struct cpu_user_
break;
}
+ case EXIT_REASON_APIC_WRITE:
+ if ( vmx_handle_apic_write() )
+ hvm_inject_hw_exception(TRAP_gp_fault, 0);
+ break;
+
case EXIT_REASON_ACCESS_GDTR_OR_IDTR:
case EXIT_REASON_ACCESS_LDTR_OR_TR:
case EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED:
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -103,6 +103,8 @@ void vlapic_EOI_set(struct vlapic *vlapi
int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high);
+int vlapic_apicv_write(struct vcpu *v, unsigned int offset);
+
struct vlapic *vlapic_lowest_prio(
struct domain *d, struct vlapic *source,
int short_hand, uint8_t dest, uint8_t dest_mode);
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -182,6 +182,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
extern u32 vmx_secondary_exec_control;
@@ -230,6 +231,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
SECONDARY_EXEC_UNRESTRICTED_GUEST)
#define cpu_has_vmx_ple \
(vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+#define cpu_has_vmx_apic_reg_virt \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -129,6 +129,7 @@ void vmx_update_cpu_exec_control(struct
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
#define EXIT_REASON_INVPCID 58
/*

View File

@ -0,0 +1,505 @@
References: FATE#313605
# HG changeset patch
# User Jiongxi Li <jiongxi.li@intel.com>
# Date 1347912311 -3600
# Node ID 713b8849b11afa05f1dde157a3f5086fa3aaad08
# Parent ec60de627945f17ec2ce5c14e1224b59403875f7
xen: enable Virtual-interrupt delivery
Virtual interrupt delivery avoids Xen to inject vAPIC interrupts
manually, which is fully taken care of by the hardware. This needs
some special awareness into existing interrupr injection path:
For pending interrupt from vLAPIC, instead of direct injection, we may
need update architecture specific indicators before resuming to guest.
Before returning to guest, RVI should be updated if any pending IRRs
EOI exit bitmap controls whether an EOI write should cause VM-Exit. If
set, a trap-like induced EOI VM-Exit is triggered. The approach here
is to manipulate EOI exit bitmap based on value of TMR. Level
triggered irq requires a hook in vLAPIC EOI write, so that vIOAPIC EOI
is triggered and emulated
Signed-off-by: Gang Wei <gang.wei@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -145,6 +145,9 @@ int vlapic_set_irq(struct vlapic *vlapic
if ( trig )
vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]);
+ if ( hvm_funcs.update_eoi_exit_bitmap )
+ hvm_funcs.update_eoi_exit_bitmap(vlapic_vcpu(vlapic), vec ,trig);
+
/* We may need to wake up target vcpu, besides set pending bit here */
return !vlapic_test_and_set_irr(vec, vlapic);
}
@@ -410,6 +413,14 @@ void vlapic_EOI_set(struct vlapic *vlapi
hvm_dpci_msi_eoi(current->domain, vector);
}
+void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector)
+{
+ if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) )
+ vioapic_update_EOI(vlapic_domain(vlapic), vector);
+
+ hvm_dpci_msi_eoi(current->domain, vector);
+}
+
int vlapic_ipi(
struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
{
@@ -1000,6 +1011,14 @@ void vlapic_adjust_i8259_target(struct d
pt_adjust_global_vcpu_target(v);
}
+int vlapic_virtual_intr_delivery_enabled(void)
+{
+ if ( hvm_funcs.virtual_intr_delivery_enabled )
+ return hvm_funcs.virtual_intr_delivery_enabled();
+ else
+ return 0;
+}
+
int vlapic_has_pending_irq(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
@@ -1012,6 +1031,9 @@ int vlapic_has_pending_irq(struct vcpu *
if ( irr == -1 )
return -1;
+ if ( vlapic_virtual_intr_delivery_enabled() )
+ return irr;
+
isr = vlapic_find_highest_isr(vlapic);
isr = (isr != -1) ? isr : 0;
if ( (isr & 0xf0) >= (irr & 0xf0) )
@@ -1024,6 +1046,9 @@ int vlapic_ack_pending_irq(struct vcpu *
{
struct vlapic *vlapic = vcpu_vlapic(v);
+ if ( vlapic_virtual_intr_delivery_enabled() )
+ return 1;
+
vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
vlapic_clear_irr(vector, vlapic);
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -206,6 +206,7 @@ void vmx_intr_assist(void)
struct vcpu *v = current;
unsigned int tpr_threshold = 0;
enum hvm_intblk intblk;
+ int pt_vector = -1;
/* Block event injection when single step with MTF. */
if ( unlikely(v->arch.hvm_vcpu.single_step) )
@@ -216,7 +217,7 @@ void vmx_intr_assist(void)
}
/* Crank the handle on interrupt state. */
- pt_update_irq(v);
+ pt_vector = pt_update_irq(v);
do {
intack = hvm_vcpu_has_pending_irq(v);
@@ -227,16 +228,34 @@ void vmx_intr_assist(void)
goto out;
intblk = hvm_interrupt_blocked(v, intack);
- if ( intblk == hvm_intblk_tpr )
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ /* Set "Interrupt-window exiting" for ExtINT */
+ if ( (intblk != hvm_intblk_none) &&
+ ( (intack.source == hvm_intsrc_pic) ||
+ ( intack.source == hvm_intsrc_vector) ) )
+ {
+ enable_intr_window(v, intack);
+ goto out;
+ }
+
+ if ( __vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK )
+ {
+ if ( (intack.source == hvm_intsrc_pic) ||
+ (intack.source == hvm_intsrc_nmi) ||
+ (intack.source == hvm_intsrc_mce) )
+ enable_intr_window(v, intack);
+
+ goto out;
+ }
+ } else if ( intblk == hvm_intblk_tpr )
{
ASSERT(vlapic_enabled(vcpu_vlapic(v)));
ASSERT(intack.source == hvm_intsrc_lapic);
tpr_threshold = intack.vector >> 4;
goto out;
- }
-
- if ( (intblk != hvm_intblk_none) ||
- (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+ } else if ( (intblk != hvm_intblk_none) ||
+ (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
{
enable_intr_window(v, intack);
goto out;
@@ -253,6 +272,44 @@ void vmx_intr_assist(void)
{
hvm_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE);
}
+ else if ( cpu_has_vmx_virtual_intr_delivery &&
+ intack.source != hvm_intsrc_pic &&
+ intack.source != hvm_intsrc_vector )
+ {
+ unsigned long status = __vmread(GUEST_INTR_STATUS);
+
+ /*
+ * Set eoi_exit_bitmap for periodic timer interrup to cause EOI-induced VM
+ * exit, then pending periodic time interrups have the chance to be injected
+ * for compensation
+ */
+ if (pt_vector != -1)
+ vmx_set_eoi_exit_bitmap(v, pt_vector);
+
+ /* we need update the RVI field */
+ status &= ~(unsigned long)0x0FF;
+ status |= (unsigned long)0x0FF &
+ intack.vector;
+ __vmwrite(GUEST_INTR_STATUS, status);
+ if (v->arch.hvm_vmx.eoi_exitmap_changed) {
+#ifdef __i386__
+#define UPDATE_EOI_EXITMAP(v, e) { \
+ if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) { \
+ __vmwrite(EOI_EXIT_BITMAP##e, v->arch.hvm_vmx.eoi_exit_bitmap[e]); \
+ __vmwrite(EOI_EXIT_BITMAP##e##_HIGH, v->arch.hvm_vmx.eoi_exit_bitmap[e] >> 32);}}
+#else
+#define UPDATE_EOI_EXITMAP(v, e) { \
+ if (test_and_clear_bit(e, &v->arch.hvm_vmx.eoi_exitmap_changed)) { \
+ __vmwrite(EOI_EXIT_BITMAP##e, v->arch.hvm_vmx.eoi_exit_bitmap[e]);}}
+#endif
+ UPDATE_EOI_EXITMAP(v, 0);
+ UPDATE_EOI_EXITMAP(v, 1);
+ UPDATE_EOI_EXITMAP(v, 2);
+ UPDATE_EOI_EXITMAP(v, 3);
+ }
+
+ pt_intr_post(v, intack);
+ }
else
{
HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
@@ -262,11 +319,16 @@ void vmx_intr_assist(void)
/* Is there another IRQ to queue up behind this one? */
intack = hvm_vcpu_has_pending_irq(v);
- if ( unlikely(intack.source != hvm_intsrc_none) )
- enable_intr_window(v, intack);
+ if ( !cpu_has_vmx_virtual_intr_delivery ||
+ intack.source == hvm_intsrc_pic ||
+ intack.source == hvm_intsrc_vector )
+ {
+ if ( unlikely(intack.source != hvm_intsrc_none) )
+ enable_intr_window(v, intack);
+ }
out:
- if ( cpu_has_vmx_tpr_shadow )
+ if ( !cpu_has_vmx_virtual_intr_delivery && cpu_has_vmx_tpr_shadow )
__vmwrite(TPR_THRESHOLD, tpr_threshold);
}
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -90,6 +90,7 @@ static void __init vmx_display_features(
P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
+ P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
#undef P
if ( !printed )
@@ -188,11 +189,12 @@ static int vmx_init_vmcs_config(void)
opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
/*
- * "APIC Register Virtualization"
+ * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
* can be set only when "use TPR shadow" is set
*/
if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
- opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT;
+ opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
_vmx_secondary_exec_control = adjust_vmx_controls(
@@ -787,6 +789,22 @@ static int construct_vmcs(struct vcpu *v
__vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
__vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ /* EOI-exit bitmap */
+ v->arch.hvm_vmx.eoi_exit_bitmap[0] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP0, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
+ v->arch.hvm_vmx.eoi_exit_bitmap[1] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP1, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
+ v->arch.hvm_vmx.eoi_exit_bitmap[2] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP2, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
+ v->arch.hvm_vmx.eoi_exit_bitmap[3] = (uint64_t)0;
+ __vmwrite(EOI_EXIT_BITMAP3, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
+
+ /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */
+ __vmwrite(GUEST_INTR_STATUS, 0);
+ }
+
/* Host data selectors. */
__vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
__vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
@@ -1028,6 +1046,30 @@ int vmx_add_host_load_msr(u32 msr)
return 0;
}
+void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
+{
+ int index, offset, changed;
+
+ index = vector >> 6;
+ offset = vector & 63;
+ changed = !test_and_set_bit(offset,
+ (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]);
+ if (changed)
+ set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed);
+}
+
+void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector)
+{
+ int index, offset, changed;
+
+ index = vector >> 6;
+ offset = vector & 63;
+ changed = test_and_clear_bit(offset,
+ (uint64_t *)&v->arch.hvm_vmx.eoi_exit_bitmap[index]);
+ if (changed)
+ set_bit(index, &v->arch.hvm_vmx.eoi_exitmap_changed);
+}
+
int vmx_create_vmcs(struct vcpu *v)
{
struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1502,6 +1502,22 @@ static void vmx_set_info_guest(struct vc
vmx_vmcs_exit(v);
}
+static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector, u8 trig)
+{
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ if (trig)
+ vmx_set_eoi_exit_bitmap(v, vector);
+ else
+ vmx_clear_eoi_exit_bitmap(v, vector);
+ }
+}
+
+static int vmx_virtual_intr_delivery_enabled(void)
+{
+ return cpu_has_vmx_virtual_intr_delivery;
+}
+
static struct hvm_function_table __read_mostly vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -1548,7 +1564,9 @@ static struct hvm_function_table __read_
.nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
.nhvm_vcpu_vmexit_trap = nvmx_vmexit_trap,
.nhvm_intr_blocked = nvmx_intr_blocked,
- .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources
+ .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
+ .update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap,
+ .virtual_intr_delivery_enabled = vmx_virtual_intr_delivery_enabled
};
struct hvm_function_table * __init start_vmx(void)
@@ -2284,6 +2302,17 @@ static int vmx_handle_apic_write(void)
return vlapic_apicv_write(current, offset);
}
+/*
+ * When "Virtual Interrupt Delivery" is enabled, this function is used
+ * to handle EOI-induced VM exit
+ */
+void vmx_handle_EOI_induced_exit(struct vlapic *vlapic, int vector)
+{
+ ASSERT(cpu_has_vmx_virtual_intr_delivery);
+
+ vlapic_handle_EOI_induced_exit(vlapic, vector);
+}
+
void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
@@ -2677,6 +2706,16 @@ void vmx_vmexit_handler(struct cpu_user_
hvm_inject_hw_exception(TRAP_gp_fault, 0);
break;
+ case EXIT_REASON_EOI_INDUCED:
+ {
+ int vector;
+ exit_qualification = __vmread(EXIT_QUALIFICATION);
+ vector = exit_qualification & 0xff;
+
+ vmx_handle_EOI_induced_exit(vcpu_vlapic(current), vector);
+ break;
+ }
+
case EXIT_REASON_IO_INSTRUCTION:
exit_qualification = __vmread(EXIT_QUALIFICATION);
if ( exit_qualification & 0x10 )
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -212,7 +212,7 @@ static void pt_timer_fn(void *data)
pt_unlock(pt);
}
-void pt_update_irq(struct vcpu *v)
+int pt_update_irq(struct vcpu *v)
{
struct list_head *head = &v->arch.hvm_vcpu.tm_list;
struct periodic_time *pt, *temp, *earliest_pt = NULL;
@@ -245,7 +245,7 @@ void pt_update_irq(struct vcpu *v)
if ( earliest_pt == NULL )
{
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
- return;
+ return -1;
}
earliest_pt->irq_issued = 1;
@@ -263,6 +263,17 @@ void pt_update_irq(struct vcpu *v)
hvm_isa_irq_deassert(v->domain, irq);
hvm_isa_irq_assert(v->domain, irq);
}
+
+ /*
+ * If periodic timer interrut is handled by lapic, its vector in
+ * IRR is returned and used to set eoi_exit_bitmap for virtual
+ * interrupt delivery case. Otherwise return -1 to do nothing.
+ */
+ if ( vlapic_accept_pic_intr(v) &&
+ (&v->domain->arch.hvm_domain)->vpic[0].int_output )
+ return -1;
+ else
+ return pt_irq_vector(earliest_pt, hvm_intsrc_lapic);
}
static struct periodic_time *is_pt_irq(
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -180,6 +180,10 @@ struct hvm_function_table {
enum hvm_intblk (*nhvm_intr_blocked)(struct vcpu *v);
void (*nhvm_domain_relinquish_resources)(struct domain *d);
+
+ /* Virtual interrupt delivery */
+ void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig);
+ int (*virtual_intr_delivery_enabled)(void);
};
extern struct hvm_function_table hvm_funcs;
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -100,6 +100,7 @@ int vlapic_accept_pic_intr(struct vcpu *
void vlapic_adjust_i8259_target(struct domain *d);
void vlapic_EOI_set(struct vlapic *vlapic);
+void vlapic_handle_EOI_induced_exit(struct vlapic *vlapic, int vector);
int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high);
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -110,6 +110,9 @@ struct arch_vmx_struct {
unsigned int host_msr_count;
struct vmx_msr_entry *host_msr_area;
+ uint32_t eoi_exitmap_changed;
+ uint64_t eoi_exit_bitmap[4];
+
unsigned long host_cr0;
/* Is the guest in real mode? */
@@ -183,6 +186,7 @@ extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
extern u32 vmx_secondary_exec_control;
@@ -233,6 +237,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
(vmx_secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
#define cpu_has_vmx_apic_reg_virt \
(vmx_secondary_exec_control & SECONDARY_EXEC_APIC_REGISTER_VIRT)
+#define cpu_has_vmx_virtual_intr_delivery \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -251,6 +257,7 @@ enum vmcs_field {
GUEST_GS_SELECTOR = 0x0000080a,
GUEST_LDTR_SELECTOR = 0x0000080c,
GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
HOST_ES_SELECTOR = 0x00000c00,
HOST_CS_SELECTOR = 0x00000c02,
HOST_SS_SELECTOR = 0x00000c04,
@@ -278,6 +285,14 @@ enum vmcs_field {
APIC_ACCESS_ADDR_HIGH = 0x00002015,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
@@ -398,6 +413,8 @@ int vmx_write_guest_msr(u32 msr, u64 val
int vmx_add_guest_msr(u32 msr);
int vmx_add_host_load_msr(u32 msr);
void vmx_vmcs_switch(struct vmcs_struct *from, struct vmcs_struct *to);
+void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
+void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
#endif /* ASM_X86_HVM_VMX_VMCS_H__ */
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -119,6 +119,7 @@ void vmx_update_cpu_exec_control(struct
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
#define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46
#define EXIT_REASON_ACCESS_LDTR_OR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -141,7 +141,7 @@ struct pl_time { /* platform time */
void pt_save_timer(struct vcpu *v);
void pt_restore_timer(struct vcpu *v);
-void pt_update_irq(struct vcpu *v);
+int pt_update_irq(struct vcpu *v);
void pt_intr_post(struct vcpu *v, struct hvm_intack intack);
void pt_migrate(struct vcpu *v);

View File

@ -0,0 +1,120 @@
References: FATE#313605
# HG changeset patch
# User Jiongxi Li <jiongxi.li@intel.com>
# Date 1347912362 -3600
# Node ID c2578dd96b8318e108fff0f340411135dedaa47d
# Parent 713b8849b11afa05f1dde157a3f5086fa3aaad08
xen: add virtual x2apic support for apicv
basically to benefit from apicv, we need clear MSR bitmap for
corresponding x2apic MSRs:
0x800 - 0x8ff: no read intercept for apicv register virtualization
TPR,EOI,SELF-IPI: no write intercept for virtual interrupt
delivery
Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -658,7 +658,7 @@ static void vmx_set_host_env(struct vcpu
(unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
}
-void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
{
unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
@@ -673,14 +673,18 @@ void vmx_disable_intercept_for_msr(struc
*/
if ( msr <= 0x1fff )
{
- __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
- __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */
+ if (type & MSR_TYPE_R)
+ __clear_bit(msr, msr_bitmap + 0x000/BYTES_PER_LONG); /* read-low */
+ if (type & MSR_TYPE_W)
+ __clear_bit(msr, msr_bitmap + 0x800/BYTES_PER_LONG); /* write-low */
}
else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
{
msr &= 0x1fff;
- __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */
- __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */
+ if (type & MSR_TYPE_R)
+ __clear_bit(msr, msr_bitmap + 0x400/BYTES_PER_LONG); /* read-high */
+ if (type & MSR_TYPE_W)
+ __clear_bit(msr, msr_bitmap + 0xc00/BYTES_PER_LONG); /* write-high */
}
}
@@ -776,13 +780,25 @@ static int construct_vmcs(struct vcpu *v
v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
__vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
- vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
- vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
- vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+ vmx_disable_intercept_for_msr(v, MSR_FS_BASE, MSR_TYPE_R | MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_GS_BASE, MSR_TYPE_R | MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W);
if ( cpu_has_vmx_pat && paging_mode_hap(d) )
- vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W);
+ if ( cpu_has_vmx_apic_reg_virt )
+ {
+ int msr;
+ for (msr = MSR_IA32_APICBASE_MSR; msr <= MSR_IA32_APICBASE_MSR + 0xff; msr++)
+ vmx_disable_intercept_for_msr(v, msr, MSR_TYPE_R);
+ }
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ {
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICTPR_MSR, MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICEOI_MSR, MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_APICSELF_MSR, MSR_TYPE_W);
+ }
}
/* I/O access bitmap. */
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2036,7 +2036,7 @@ static int vmx_msr_write_intercept(unsig
for ( ; (rc == 0) && lbr->count; lbr++ )
for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
- vmx_disable_intercept_for_msr(v, lbr->base + i);
+ vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
}
if ( (rc < 0) ||
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -407,7 +407,9 @@ enum vmcs_field {
#define VMCS_VPID_WIDTH 16
-void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr);
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type);
int vmx_read_guest_msr(u32 msr, u64 *val);
int vmx_write_guest_msr(u32 msr, u64 val);
int vmx_add_guest_msr(u32 msr);
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -291,6 +291,9 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_APICBASE_MSR 0x800
+#define MSR_IA32_APICTPR_MSR 0x808
+#define MSR_IA32_APICEOI_MSR 0x80b
+#define MSR_IA32_APICSELF_MSR 0x83f
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b

View File

@ -1,28 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348039675 -7200
# Node ID 3e3959413b2fbef584993beb434285d0691d5c67
# Parent 4a0438fe1e6afe01e46023bcb2c828c5aaeefb1d
x86: properly check XEN_DOMCTL_ioport_mapping arguments for invalid range
In particular, the case of "np" being a very large value wasn't handled
correctly. The range start checks also were off by one (except that in
practice, when "np" is properly range checked, this would still have
been caught by the range end checks).
Also, is a GFN wrap in XEN_DOMCTL_memory_mapping really okay?
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -888,7 +888,7 @@ long arch_do_domctl(
int found = 0;
ret = -EINVAL;
- if ( (np == 0) || (fgp > MAX_IOPORTS) || (fmp > MAX_IOPORTS) ||
+ if ( ((fgp | fmp | (np - 1)) >= MAX_IOPORTS) ||
((fgp + np) > MAX_IOPORTS) || ((fmp + np) > MAX_IOPORTS) )
{
printk(XENLOG_G_ERR

View File

@ -1,26 +0,0 @@
# HG changeset patch
# User Zhenzhong Duan <zhenzhong.duan@oracle.com>
# Date 1348069127 -7200
# Node ID fee83ac77d8c7339abf5185690603ea5b0c548cf
# Parent 7b045d43e59dcb42340097058502bf456e151180
tmem: bump pool version to 1 to fix restore issue when tmem enabled
Restore fails when tmem is enabled both in hypervisor and guest. This
is due to spec version mismatch when restoring a pool.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/tmem.c
+++ b/xen/common/tmem.c
@@ -2407,7 +2407,8 @@ static NOINLINE int tmemc_save_subop(int
break;
rc = (pool->persistent ? TMEM_POOL_PERSIST : 0) |
(pool->shared ? TMEM_POOL_SHARED : 0) |
- (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT);
+ (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT) |
+ (TMEM_SPEC_VERSION << TMEM_POOL_VERSION_SHIFT);
break;
case TMEMC_SAVE_GET_POOL_NPAGES:
if ( pool == NULL )

View File

@ -1,257 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348125713 -7200
# Node ID 14980591956954584f3cd87bf4e8d306360e7f27
# Parent 4e93cbeac98b3e6be98e5ec0881b44b68ee95974
x86: tighten checks in XEN_DOMCTL_memory_mapping handler
Properly checking the MFN implies knowing the physical address width
supported by the platform, so to obtain this consistently the
respective code gets moved out of the MTRR subdir.
Btw., the model specific workaround in that code is likely unnecessary
- I believe those CPU models don't support 64-bit mode. But I wasn't
able to formally verify this, so I preferred to retain that code for
now.
But domctl code here also was lacking other error checks (as was,
looking at it again from that angle) the XEN_DOMCTL_ioport_mapping one.
Besides adding the missing checks, printing is also added for the case
where revoking access permissions didn't work (as that may have
implications for the host operator, e.g. wanting to not pass through
affected devices to another guest until the one previously using them
did actually die).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/cpu/centaur.c
+++ b/xen/arch/x86/cpu/centaur.c
@@ -56,6 +56,9 @@ static void __init init_c3(struct cpuinf
if (c->x86_model >=6 && c->x86_model <9)
set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+ if (cpuid_eax(0x80000000) < 0x80000008)
+ paddr_bits = 32;
+
get_model_name(c);
display_cacheinfo(c);
}
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -36,6 +36,8 @@ integer_param("cpuid_mask_ext_edx", opt_
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+unsigned int paddr_bits __read_mostly = 36;
+
/*
* Default host IA32_CR_PAT value to cover all memory types.
* BIOS usually sets it to 0x07040600070406.
@@ -265,6 +267,8 @@ static void __cpuinit generic_identify(s
}
if ( xlvl >= 0x80000004 )
get_model_name(c); /* Default name */
+ if ( xlvl >= 0x80000008 )
+ paddr_bits = cpuid_eax(0x80000008) & 0xff;
}
/* Intel-defined flags: level 0x00000007 */
--- a/xen/arch/x86/cpu/cyrix.c
+++ b/xen/arch/x86/cpu/cyrix.c
@@ -255,7 +255,9 @@ static void __init init_cyrix(struct cpu
}
safe_strcpy(c->x86_model_id, Cx86_model[dir0_msn & 7]);
if (p) safe_strcat(c->x86_model_id, p);
- return;
+
+ if (cpu_has_cyrix_arr)
+ paddr_bits = 32;
}
/*
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -144,6 +144,11 @@ void __devinit early_intel_workaround(st
c->cpuid_level);
}
}
+
+ /* CPUID workaround for Intel 0F33/0F34 CPU */
+ if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
+ (boot_cpu_data.x86_mask == 3 || boot_cpu_data.x86_mask == 4))
+ paddr_bits = 36;
}
/*
--- a/xen/arch/x86/cpu/mtrr/main.c
+++ b/xen/arch/x86/cpu/mtrr/main.c
@@ -587,8 +587,6 @@ struct mtrr_value {
unsigned long lsize;
};
-unsigned int paddr_bits __read_mostly = 36;
-
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -602,48 +600,12 @@ void __init mtrr_bp_init(void)
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
- size_and_mask = 0x00f00000;
-
- /* This is an AMD specific MSR, but we assume(hope?) that
- Intel will implement it to when they extend the address
- bus of the Xeon. */
- if (cpuid_eax(0x80000000) >= 0x80000008) {
- paddr_bits = cpuid_eax(0x80000008) & 0xff;
- /* CPUID workaround for Intel 0F33/0F34 CPU */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
- boot_cpu_data.x86 == 0xF &&
- boot_cpu_data.x86_model == 0x3 &&
- (boot_cpu_data.x86_mask == 0x3 ||
- boot_cpu_data.x86_mask == 0x4))
- paddr_bits = 36;
-
- size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
- size_and_mask = ~size_or_mask & 0xfffff00000ULL;
- } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
- boot_cpu_data.x86 == 6) {
- /* VIA C* family have Intel style MTRRs, but
- don't support PAE */
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
} else {
#ifndef CONFIG_X86_64
switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (cpu_has_k6_mtrr) {
- /* Pre-Athlon (K6) AMD CPU MTRRs */
- mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
case X86_VENDOR_CYRIX:
- if (cpu_has_cyrix_arr) {
+ if (cpu_has_cyrix_arr)
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
break;
default:
break;
@@ -652,6 +614,8 @@ void __init mtrr_bp_init(void)
}
if (mtrr_if) {
+ size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
+ size_and_mask = ~size_or_mask & 0xfffff00000ULL;
set_num_var_ranges();
init_table();
if (use_intel())
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -829,10 +829,12 @@ long arch_do_domctl(
unsigned long mfn = domctl->u.memory_mapping.first_mfn;
unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns;
int add = domctl->u.memory_mapping.add_mapping;
- int i;
+ unsigned long i;
ret = -EINVAL;
- if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
+ if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */
+ ((mfn | (mfn + nr_mfns - 1)) >> (paddr_bits - PAGE_SHIFT)) ||
+ (gfn + nr_mfns - 1) < gfn ) /* wrap? */
break;
ret = -EPERM;
@@ -857,8 +859,25 @@ long arch_do_domctl(
d->domain_id, gfn, mfn, nr_mfns);
ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
- for ( i = 0; i < nr_mfns; i++ )
- set_mmio_p2m_entry(d, gfn+i, _mfn(mfn+i));
+ if ( !ret && paging_mode_translate(d) )
+ {
+ for ( i = 0; !ret && i < nr_mfns; i++ )
+ if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) )
+ ret = -EIO;
+ if ( ret )
+ {
+ printk(XENLOG_G_WARNING
+ "memory_map:fail: dom%d gfn=%lx mfn=%lx\n",
+ d->domain_id, gfn + i, mfn + i);
+ while ( i-- )
+ clear_mmio_p2m_entry(d, gfn + i);
+ if ( iomem_deny_access(d, mfn, mfn + nr_mfns - 1) &&
+ IS_PRIV(current->domain) )
+ printk(XENLOG_ERR
+ "memory_map: failed to deny dom%d access to [%lx,%lx]\n",
+ d->domain_id, mfn, mfn + nr_mfns - 1);
+ }
+ }
}
else
{
@@ -866,9 +885,17 @@ long arch_do_domctl(
"memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n",
d->domain_id, gfn, mfn, nr_mfns);
- for ( i = 0; i < nr_mfns; i++ )
- clear_mmio_p2m_entry(d, gfn+i);
+ if ( paging_mode_translate(d) )
+ for ( i = 0; i < nr_mfns; i++ )
+ add |= !clear_mmio_p2m_entry(d, gfn + i);
ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
+ if ( !ret && add )
+ ret = -EIO;
+ if ( ret && IS_PRIV(current->domain) )
+ printk(XENLOG_ERR
+ "memory_map: error %ld %s dom%d access to [%lx,%lx]\n",
+ ret, add ? "removing" : "denying", d->domain_id,
+ mfn, mfn + nr_mfns - 1);
}
rcu_unlock_domain(d);
@@ -930,12 +957,23 @@ long arch_do_domctl(
if ( !found )
{
g2m_ioport = xmalloc(struct g2m_ioport);
+ if ( !g2m_ioport )
+ ret = -ENOMEM;
+ }
+ if ( !found && !ret )
+ {
g2m_ioport->gport = fgp;
g2m_ioport->mport = fmp;
g2m_ioport->np = np;
list_add_tail(&g2m_ioport->list, &hd->g2m_ioport_list);
}
- ret = ioports_permit_access(d, fmp, fmp + np - 1);
+ if ( !ret )
+ ret = ioports_permit_access(d, fmp, fmp + np - 1);
+ if ( ret && !found && g2m_ioport )
+ {
+ list_del(&g2m_ioport->list);
+ xfree(g2m_ioport);
+ }
}
else
{
@@ -950,6 +988,10 @@ long arch_do_domctl(
break;
}
ret = ioports_deny_access(d, fmp, fmp + np - 1);
+ if ( ret && IS_PRIV(current->domain) )
+ printk(XENLOG_ERR
+ "ioport_map: error %ld denying dom%d access to [%x,%x]\n",
+ ret, d->domain_id, fmp, fmp + np - 1);
}
rcu_unlock_domain(d);
}

View File

@ -1,53 +0,0 @@
# HG changeset patch
# User Ben Guthro <ben@guthro.net>
# Date 1348555094 -7200
# Node ID c8d65d91a6f20fa7fae905bbf172e59b335d6371
# Parent b49f7bf52fa92626517386cba89350243b808871
x86/S3: add cache flush on secondary CPUs before going to sleep
Secondary CPUs, between doing their final memory writes (particularly
updating cpu_initialized) and getting a subsequent INIT, may not write
back all modified data. The INIT itself then causes those modifications
to be lost, so in the cpu_initialized case the CPU would find itself
already initialized, (intentionally) entering an infinite loop instead
of actually coming online.
Signed-off-by: Ben Guthro <ben@guthro.net>
Make acpi_dead_idle() call default_dead_idle() rather than duplicating
the logic there.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -647,6 +647,12 @@ static void acpi_dead_idle(void)
}
default_halt:
+ /*
+ * When going into S3, without flushing caches modified data may be
+ * held by the CPUs spinning here indefinitely, and get discarded by
+ * a subsequent INIT.
+ */
+ wbinvd();
for ( ; ; )
halt();
}
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -86,6 +86,12 @@ static void default_idle(void)
static void default_dead_idle(void)
{
+ /*
+ * When going into S3, without flushing caches modified data may be
+ * held by the CPUs spinning here indefinitely, and get discarded by
+ * a subsequent INIT.
+ */
+ wbinvd();
for ( ; ; )
halt();
}

View File

@ -1,44 +0,0 @@
changeset: 25941:795c493fe561
user: Olaf Hering <olaf@aepfle.de>
date: Tue Sep 25 11:03:51 2012 +0100
files: tools/pygrub/src/pygrub
description:
pygrub: always append --args
If a bootloader entry in menu.lst has no additional kernel command line
options listed and the domU.cfg has 'bootargs="--args=something"' the
additional arguments from the config file are not passed to the kernel.
The reason for that incorrect behaviour is that run_grub appends arg
only if the parsed config file has arguments listed.
Fix this by appending args from image section and the config file separatly.
To avoid adding to a NoneType initialize grubcfg['args'] to an empty string.
This does not change behaviour but simplifies the code which appends the
string.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r c8d65d91a6f2 -r 795c493fe561 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Tue Sep 25 08:38:14 2012 +0200
+++ b/tools/pygrub/src/pygrub Tue Sep 25 11:03:51 2012 +0100
@@ -615,13 +615,15 @@ def run_grub(file, entry, fs, arg):
except IndexError:
img = g.cf.images[0]
- grubcfg = { "kernel": None, "ramdisk": None, "args": None }
+ grubcfg = { "kernel": None, "ramdisk": None, "args": "" }
grubcfg["kernel"] = img.kernel[1]
if img.initrd:
grubcfg["ramdisk"] = img.initrd[1]
if img.args:
- grubcfg["args"] = img.args + " " + arg
+ grubcfg["args"] += img.args
+ if arg:
+ grubcfg["args"] += " " + args
return grubcfg

View File

@ -17,9 +17,11 @@ domain's permission is sufficient.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -870,6 +870,19 @@ get_page_from_l1e(
Index: xen-4.2.0-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/mm.c
+++ xen-4.2.0-testing/xen/arch/x86/mm.c
@@ -883,6 +883,19 @@ get_page_from_l1e(
return -EINVAL;
}

View File

@ -0,0 +1,129 @@
References: FATE#313633
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1348654362 -7200
# Node ID c47ef9592fb39325e33f8406b4bd736cc84482e5
# Parent 5d63c633a60b9a1d695594f9c17cf933240bec81
x86: Implement TSC adjust feature for HVM guest
IA32_TSC_ADJUST MSR is maintained separately for each logical
processor. A logical processor maintains and uses the IA32_TSC_ADJUST
MSR as follows:
1). On RESET, the value of the IA32_TSC_ADJUST MSR is 0;
2). If an execution of WRMSR to the IA32_TIME_STAMP_COUNTER MSR adds
(or subtracts) value X from the TSC, the logical processor also
adds (or subtracts) value X from the IA32_TSC_ADJUST MSR;
3). If an execution of WRMSR to the IA32_TSC_ADJUST MSR adds (or
subtracts) value X from that MSR, the logical processor also adds
(or subtracts) value X from the TSC.
This patch provides tsc adjust support for hvm guest, with it guest OS
would be happy when sync tsc.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
Index: xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
@@ -244,6 +244,7 @@ int hvm_set_guest_pat(struct vcpu *v, u6
void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
{
uint64_t tsc;
+ uint64_t delta_tsc;
if ( v->domain->arch.vtsc )
{
@@ -255,10 +256,22 @@ void hvm_set_guest_tsc(struct vcpu *v, u
rdtscll(tsc);
}
- v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc;
+ delta_tsc = guest_tsc - tsc;
+ v->arch.hvm_vcpu.msr_tsc_adjust += delta_tsc
+ - v->arch.hvm_vcpu.cache_tsc_offset;
+ v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc;
+
hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
}
+void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust)
+{
+ v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust
+ - v->arch.hvm_vcpu.msr_tsc_adjust;
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust;
+}
+
u64 hvm_get_guest_tsc(struct vcpu *v)
{
uint64_t tsc;
@@ -277,6 +290,11 @@ u64 hvm_get_guest_tsc(struct vcpu *v)
return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
}
+u64 hvm_get_guest_tsc_adjust(struct vcpu *v)
+{
+ return v->arch.hvm_vcpu.msr_tsc_adjust;
+}
+
void hvm_migrate_timers(struct vcpu *v)
{
rtc_migrate_timers(v);
@@ -2798,6 +2816,10 @@ int hvm_msr_read_intercept(unsigned int
*msr_content = hvm_get_guest_tsc(v);
break;
+ case MSR_IA32_TSC_ADJUST:
+ *msr_content = hvm_get_guest_tsc_adjust(v);
+ break;
+
case MSR_TSC_AUX:
*msr_content = hvm_msr_tsc_aux(v);
break;
@@ -2911,6 +2933,10 @@ int hvm_msr_write_intercept(unsigned int
hvm_set_guest_tsc(v, msr_content);
break;
+ case MSR_IA32_TSC_ADJUST:
+ hvm_set_guest_tsc_adjust(v, msr_content);
+ break;
+
case MSR_TSC_AUX:
v->arch.hvm_vcpu.msr_tsc_aux = (uint32_t)msr_content;
if ( cpu_has_rdtscp
@@ -3482,6 +3508,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ v->arch.hvm_vcpu.msr_tsc_adjust = 0;
+
paging_update_paging_modes(v);
v->arch.flags |= TF_kernel_mode;
Index: xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/hvm/vcpu.h
+++ xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
@@ -137,6 +137,7 @@ struct hvm_vcpu {
struct hvm_vcpu_asid n1asid;
u32 msr_tsc_aux;
+ u64 msr_tsc_adjust;
/* VPMU */
struct vpmu_struct vpmu;
Index: xen-4.2.0-testing/xen/include/asm-x86/msr-index.h
===================================================================
--- xen-4.2.0-testing.orig/xen/include/asm-x86/msr-index.h
+++ xen-4.2.0-testing/xen/include/asm-x86/msr-index.h
@@ -284,6 +284,7 @@
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_IA32_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)

View File

@ -0,0 +1,104 @@
References: FATE#313633
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1348654418 -7200
# Node ID 56fb977ce6eb4626a02d4a7a34e85009bb8ee3e0
# Parent c47ef9592fb39325e33f8406b4bd736cc84482e5
x86: Save/restore TSC adjust during HVM guest migration
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/misc/xen-hvmctx.c
+++ b/tools/misc/xen-hvmctx.c
@@ -390,6 +390,13 @@ static void dump_vmce_vcpu(void)
printf(" VMCE_VCPU: caps %" PRIx64 "\n", p.caps);
}
+static void dump_tsc_adjust(void)
+{
+ HVM_SAVE_TYPE(TSC_ADJUST) p;
+ READ(p);
+ printf(" TSC_ADJUST: tsc_adjust %" PRIx64 "\n", p.tsc_adjust);
+}
+
int main(int argc, char **argv)
{
int entry, domid;
@@ -457,6 +464,7 @@ int main(int argc, char **argv)
case HVM_SAVE_CODE(VIRIDIAN_DOMAIN): dump_viridian_domain(); break;
case HVM_SAVE_CODE(VIRIDIAN_VCPU): dump_viridian_vcpu(); break;
case HVM_SAVE_CODE(VMCE_VCPU): dump_vmce_vcpu(); break;
+ case HVM_SAVE_CODE(TSC_ADJUST): dump_tsc_adjust(); break;
case HVM_SAVE_CODE(END): break;
default:
printf(" ** Don't understand type %u: skipping\n",
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -610,6 +610,46 @@ void hvm_domain_destroy(struct domain *d
hvm_destroy_cacheattr_region_list(d);
}
+static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
+{
+ struct vcpu *v;
+ struct hvm_tsc_adjust ctxt;
+ int err = 0;
+
+ for_each_vcpu ( d, v )
+ {
+ ctxt.tsc_adjust = v->arch.hvm_vcpu.msr_tsc_adjust;
+ err = hvm_save_entry(TSC_ADJUST, v->vcpu_id, h, &ctxt);
+ if ( err )
+ break;
+ }
+
+ return err;
+}
+
+static int hvm_load_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
+{
+ unsigned int vcpuid = hvm_load_instance(h);
+ struct vcpu *v;
+ struct hvm_tsc_adjust ctxt;
+
+ if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
+ {
+ dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
+ d->domain_id, vcpuid);
+ return -EINVAL;
+ }
+
+ if ( hvm_load_entry(TSC_ADJUST, h, &ctxt) != 0 )
+ return -EINVAL;
+
+ v->arch.hvm_vcpu.msr_tsc_adjust = ctxt.tsc_adjust;
+ return 0;
+}
+
+HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust,
+ hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU);
+
static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
{
struct vcpu *v;
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -581,9 +581,15 @@ struct hvm_vmce_vcpu {
DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu);
+struct hvm_tsc_adjust {
+ uint64_t tsc_adjust;
+};
+
+DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);
+
/*
* Largest type-code in use
*/
-#define HVM_SAVE_CODE_MAX 18
+#define HVM_SAVE_CODE_MAX 19
#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */

View File

@ -0,0 +1,39 @@
References: FATE#313633
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@intel.com>
# Date 1348654470 -7200
# Node ID 3aa66543a51ba77cb73e8c874e2416d065426a22
# Parent 56fb977ce6eb4626a02d4a7a34e85009bb8ee3e0
x86: Expose TSC adjust to HVM guest
Intel latest SDM (17.13.3) release a new MSR CPUID.7.0.EBX[1]=1
indicates TSC_ADJUST MSR 0x3b is supported.
This patch expose it to hvm guest.
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -128,6 +128,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_TSC_ADJUST 1 /* Tsc thread offset */
#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE 4 /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 5 /* AVX2 instructions */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -362,7 +362,8 @@ static void xc_cpuid_hvm_policy(
case 0x00000007: /* Intel-defined CPU features */
if ( input[1] == 0 ) {
- regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ regs[1] &= (bitmaskof(X86_FEATURE_TSC_ADJUST) |
+ bitmaskof(X86_FEATURE_BMI1) |
bitmaskof(X86_FEATURE_HLE) |
bitmaskof(X86_FEATURE_AVX2) |
bitmaskof(X86_FEATURE_SMEP) |

View File

@ -1,36 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348816934 -7200
# Node ID 6a581212909478bba0c7b4dfc6c370270dee825c
# Parent 6bf8b882df8f66ab5500e4d9cc0c3338ae5a6cb9
x86/HPET: don't disable interrupt delivery right after setting it up
We shouldn't clear HPET_TN_FSB right after we (indirectly, via
request_irq()) enabled it for the channels we intend to use for
broadcasts.
This fixes a regression introduced by c/s 25103:0b0e42dc4f0a.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -533,7 +533,7 @@ void __init hpet_broadcast_init(void)
{
/* set HPET Tn as oneshot */
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC | HPET_TN_FSB);
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
@@ -590,7 +590,7 @@ void hpet_broadcast_resume(void)
/* set HPET Tn as oneshot */
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC | HPET_TN_FSB);
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));

View File

@ -1,35 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348817014 -7200
# Node ID 41f523f1b5e5af9cf8e85160f2412456da83050f
# Parent 6a581212909478bba0c7b4dfc6c370270dee825c
x86/IRQ: fix valid-old-vector checks in __assign_irq_vector()
There are two greater-than-zero checks for the old vector retrieved,
which don't work when a negative value got stashed into the respective
arch_irq_desc field. The effect of this was that for interrupts that
are intended to get their affinity adjusted the first time before the
first interrupt occurs, the affinity change would fail, because the
original vector assignment would have caused the move_in_progress flag
to get set (which causes subsequent re-assignments to fail until it
gets cleared, which only happens from the ->ack() actor, i.e. when an
interrupt actually occurred).
This addresses a problem introduced in c/s 23816:7f357e1ef60a (by
changing IRQ_VECTOR_UNASSIGNED from 0 to -1).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -430,8 +430,7 @@ static int __assign_irq_vector(
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
- unsigned int old_vector;
- int cpu, err;
+ int cpu, err, old_vector;
cpumask_t tmp_mask;
vmask_t *irq_used_vectors = NULL;

View File

@ -1,35 +0,0 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1348817291 -7200
# Node ID 4496d56c68a0e57ed9f03b482028093f1e7fdf6c
# Parent 00c05b9d76247d063a8ebc75050246e488323f50
x86/ucode: fix Intel case of resume handling on boot CPU
Checking the stored version doesn't tell us anything about the need to
apply the update (during resume, what is stored doesn't necessarily
match what is loaded).
Note that the check can be removed altogether because once switched to
use what was read from the CPU (uci->cpu_sig.rev, as used in the
subsequent pr_debug()), it would become redundant with the checks that
lead to microcode_update_match() returning the indication that an
update should be applied.
Note further that this was not an issue on APs since they start with
uci->mc.mc_intel being NULL.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Ben Guthro <ben@guthro.net>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/microcode_intel.c
+++ b/xen/arch/x86/microcode_intel.c
@@ -261,8 +261,6 @@ static int get_matching_microcode(const
}
return 0;
find:
- if ( uci->mc.mc_intel && uci->mc.mc_intel->hdr.rev >= mc_header->rev )
- return 0;
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n",
cpu, mc_header->rev, uci->cpu_sig.rev);

55
25975-x86-IvyBridge.patch Normal file
View File

@ -0,0 +1,55 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1349172840 -7200
# Node ID 87bf99fad7a9f018530d13213f57610621838085
# Parent 5fbdbf585f5f2ee9a3e3c75a8a9f9f2cc6eda65c
x86/Intel: add further support for Ivy Bridge CPU models
And some initial Haswell ones at once.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Nakajima, Jun" <jun.nakajima@intel.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -105,11 +105,15 @@ static void do_get_hw_residencies(void *
switch ( c->x86_model )
{
- /* Ivy bridge */
- case 0x3A:
/* Sandy bridge */
case 0x2A:
case 0x2D:
+ /* Ivy bridge */
+ case 0x3A:
+ case 0x3E:
+ /* Haswell */
+ case 0x3C:
+ case 0x45:
GET_PC2_RES(hw_res->pc2);
GET_CC7_RES(hw_res->cc7);
/* fall through */
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1820,7 +1820,9 @@ static const struct lbr_info *last_branc
/* Sandy Bridge */
case 42: case 45:
/* Ivy Bridge */
- case 58:
+ case 58: case 62:
+ /* Haswell */
+ case 60: case 69:
return nh_lbr;
break;
/* Atom */
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -747,6 +747,7 @@ int vmx_vpmu_initialise(struct vcpu *v,
case 46:
case 47:
case 58:
+ case 62:
ret = core2_vpmu_initialise(v, vpmu_flags);
if ( !ret )
vpmu->arch_vpmu_ops = &core2_vpmu_ops;

View File

@ -1,21 +0,0 @@
changeset: 26006:8b6870d686d6
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:30 2012 +0100
files: tools/hotplug/Linux/network-nat
description:
hotplug/Linux: Remove tracing (bash -x) from network-nat script
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r cdb48f1742f3 -r 8b6870d686d6 tools/hotplug/Linux/network-nat
--- a/tools/hotplug/Linux/network-nat Mon Oct 08 12:18:29 2012 +0100
+++ b/tools/hotplug/Linux/network-nat Mon Oct 08 12:18:30 2012 +0100
@@ -1,4 +1,4 @@
-#!/bin/bash -x
+#!/bin/bash
#============================================================================
# Default Xen network start/stop script when using NAT.
# Xend calls a network script when it starts.

View File

@ -1,31 +0,0 @@
changeset: 26007:fe756682cc7f
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:31 2012 +0100
files: tools/xenballoon/xenballoond.init
description:
xenballoond.init: remove 4 from default runlevel
Remove 4 from default runlevel in xenballoond.init.
Similar to what changeset 24847:0900b1c905f1 does in xencommons, remove
runlevel 4 from the other runlevel scripts. LSB defines runlevel 4 as
reserved for local use, the local sysadmin is responsible for symlink
creation in rc4.d.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 8b6870d686d6 -r fe756682cc7f tools/xenballoon/xenballoond.init
--- a/tools/xenballoon/xenballoond.init Mon Oct 08 12:18:30 2012 +0100
+++ b/tools/xenballoon/xenballoond.init Mon Oct 08 12:18:31 2012 +0100
@@ -14,7 +14,7 @@
# Should-Start:
# Required-Stop: $syslog $remote_fs
# Should-Stop:
-# Default-Start: 3 4 5
+# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Start/stop xenballoond
# Description: Starts and stops the Xen ballooning daemon.

View File

@ -1,41 +0,0 @@
changeset: 26008:eecb528583d7
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:31 2012 +0100
files: tools/python/xen/util/vscsi_util.py
description:
xend/pvscsi: fix passing of SCSI control LUNs
Currently pvscsi can not pass SCSI devices that have just a scsi_generic node.
In the following example sg3 is a control LUN for the disk sdd.
But vscsi=['4:0:2:0,0:0:0:0'] does not work because the internal 'devname'
variable remains None. Later writing p-devname to xenstore fails because None
is not a valid string variable.
Since devname is used for just informational purpose use sg also as devname.
carron:~ $ lsscsi -g
[0:0:0:0] disk ATA FK0032CAAZP HPF2 /dev/sda /dev/sg0
[4:0:0:0] disk HP P2000G3 FC/iSCSI T100 /dev/sdb /dev/sg1
[4:0:1:0] disk HP P2000G3 FC/iSCSI T100 /dev/sdc /dev/sg2
[4:0:2:0] storage HP HSV400 0950 - /dev/sg3
[4:0:2:1] disk HP HSV400 0950 /dev/sdd /dev/sg4
[4:0:3:0] storage HP HSV400 0950 - /dev/sg5
[4:0:3:1] disk HP HSV400 0950 /dev/sde /dev/sg6
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r fe756682cc7f -r eecb528583d7 tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:31 2012 +0100
+++ b/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:31 2012 +0100
@@ -105,6 +105,8 @@ def _vscsi_get_scsidevices_by_lsscsi(opt
devname = None
try:
sg = s[-1].split('/dev/')[1]
+ if devname is None:
+ devname = sg
scsi_id = _vscsi_get_scsiid(sg)
except IndexError:
sg = None

View File

@ -1,47 +0,0 @@
changeset: 26009:2dbfa4d2e107
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:32 2012 +0100
files: tools/python/xen/util/vscsi_util.py
description:
xend/pvscsi: fix usage of persistant device names for SCSI devices
Currently the callers of vscsi_get_scsidevices() do not pass a mask
string. This will call "lsscsi -g '[]'", which causes a lsscsi syntax
error. As a result the sysfs parser _vscsi_get_scsidevices() is used.
But this parser is broken and the specified names in the config file are
not found.
Using a mask '*' if no mask was given will call lsscsi correctly and the
following config is parsed correctly:
vscsi=[
'/dev/sg3, 0:0:0:0',
'/dev/disk/by-id/wwn-0x600508b4000cf1c30000800000410000, 0:0:0:1'
]
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r eecb528583d7 -r 2dbfa4d2e107 tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:31 2012 +0100
+++ b/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:32 2012 +0100
@@ -150,7 +150,7 @@ def _vscsi_get_scsidevices_by_sysfs():
return devices
-def vscsi_get_scsidevices(mask=""):
+def vscsi_get_scsidevices(mask="*"):
""" get all scsi devices information """
devices = _vscsi_get_scsidevices_by_lsscsi("[%s]" % mask)
@@ -279,7 +279,7 @@ def get_scsi_device(pHCTL):
return _make_scsi_record(scsi_info)
return None
-def get_all_scsi_devices(mask=""):
+def get_all_scsi_devices(mask="*"):
scsi_records = []
for scsi_info in vscsi_get_scsidevices(mask):
scsi_record = _make_scsi_record(scsi_info)

View File

@ -1,84 +0,0 @@
changeset: 26010:cff10030c6ea
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:33 2012 +0100
files: tools/python/xen/util/vscsi_util.py
description:
xend/pvscsi: update sysfs parser for Linux 3.0
The sysfs parser for /sys/bus/scsi/devices understands only the layout
of kernel version 2.6.16. This looks as follows:
/sys/bus/scsi/devices/1:0:0:0/block:sda is a symlink to /sys/block/sda/
/sys/bus/scsi/devices/1:0:0:0/scsi_generic:sg1 is a symlink to /sys/class/scsi_generic/sg1
Both directories contain a 'dev' file with the major:minor information.
This patch updates the used regex strings to match also the colon to
make it more robust against possible future changes.
In kernel version 3.0 the layout changed:
/sys/bus/scsi/devices/ contains now additional symlinks to directories
such as host1 and target1:0:0. This patch ignores these as they do not
point to the desired scsi devices. They just clutter the devices array.
The directory layout in '1:0:0:0' changed as well, the 'type:name'
notation was replaced with 'type/name' directories:
/sys/bus/scsi/devices/1:0:0:0/block/sda/
/sys/bus/scsi/devices/1:0:0:0/scsi_generic/sg1/
Both directories contain a 'dev' file with the major:minor information.
This patch adds additional code to walk the subdir to find the 'dev'
file to make sure the given subdirectory is really the kernel name.
In addition this patch makes sure devname is not None.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 2dbfa4d2e107 -r cff10030c6ea tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:32 2012 +0100
+++ b/tools/python/xen/util/vscsi_util.py Mon Oct 08 12:18:33 2012 +0100
@@ -130,20 +130,36 @@ def _vscsi_get_scsidevices_by_sysfs():
for dirpath, dirnames, files in os.walk(sysfs_mnt + SYSFS_SCSI_PATH):
for hctl in dirnames:
+ if len(hctl.split(':')) != 4:
+ continue
paths = os.path.join(dirpath, hctl)
devname = None
sg = None
scsi_id = None
for f in os.listdir(paths):
realpath = os.path.realpath(os.path.join(paths, f))
- if re.match('^block', f) or \
- re.match('^tape', f) or \
- re.match('^scsi_changer', f) or \
- re.match('^onstream_tape', f):
+ if re.match('^block:', f) or \
+ re.match('^tape:', f) or \
+ re.match('^scsi_changer:', f) or \
+ re.match('^onstream_tape:', f):
devname = os.path.basename(realpath)
+ elif f == "block" or \
+ f == "tape" or \
+ f == "scsi_changer" or \
+ f == "onstream_tape":
+ for dir in os.listdir(os.path.join(paths, f)):
+ if os.path.exists(os.path.join(paths, f, dir, "dev")):
+ devname = os.path.basename(dir)
- if re.match('^scsi_generic', f):
+ if re.match('^scsi_generic:', f):
sg = os.path.basename(realpath)
+ elif f == "scsi_generic":
+ for dir in os.listdir(os.path.join(paths, f)):
+ if os.path.exists(os.path.join(paths, f, dir, "dev")):
+ sg = os.path.basename(dir)
+ if sg:
+ if devname is None:
+ devname = sg
scsi_id = _vscsi_get_scsiid(sg)
devices.append([hctl, devname, sg, scsi_id])

View File

@ -1,224 +0,0 @@
changeset: 26011:b6fb4e63b946
user: Olaf Hering <olaf@aepfle.de>
date: Mon Oct 08 12:18:34 2012 +0100
files: stubdom/Makefile
description:
stubdom: fix parallel build by expanding CROSS_MAKE
Recently I changed my rpm xen.spec file from doing
'make -C tools -j N && make stubdom' to 'make -j N stubdom' because
stubdom depends on tools, so both get built.
The result was the failure below.
....
mkdir -p grub-x86_64
CPPFLAGS="-isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include -D__MINIOS__ -DHAVE_LIBC -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/posix -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../tools/xenstore -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/x86 -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/x86/x86_64 -U __linux__ -U __FreeBSD__ -U __sun__ -nostdinc -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os/include/posix -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/cross-root-x86_64/x86_64-xen-elf/include -isystem /usr/lib64/gcc/x86_64-suse-linux/4.7/include -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/lwip-x86_64/src/include -isystem /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/lwip-x86_64/src/include/ipv4 -I/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/include -I/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../xen/include" CFLAGS="-mno-red-zone -O1 -fno-omit-frame-pointer -m64 -mno-red-zone -fno-reorder-blocks -fno-asynchronous-unwind-tables -m64 -g -fno-strict-aliasing -std=gnu99 -Wall -Wstrict-prototypes -Wdeclaration-after-statement -Wno-unused-but-set-variable -fno-stack-protector -fno-exceptions" make DESTDIR= -C grub OBJ_DIR=/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/grub-x86_64
make[2]: Entering directory `/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/grub'
make[2]: warning: jobserver unavailable: using -j1. Add `+' to parent make rule.
make[2]: *** INTERNAL: readdir: Bad file descriptor
. Stop.
make[2]: Makefile: Field 'stem' not cached: Makefile
make[2]: Leaving directory `/home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/grub'
make[1]: *** [grub] Error 2
[ -d mini-os-x86_64-xenstore ] || \
for i in $(cd /home/abuild/rpmbuild/BUILD/xen-4.2.25602/non-dbg/stubdom/../extras/mini-os ; find . -type d) ; do \
mkdir -p mini-os-x86_64-xenstore/$i ; \
done
....
Expanding every occurrence of CROSS_MAKE avoids this error. It also has
the nice side effect of actually enabling parallel build for stubdom.
According to the GNU make documentation $(MAKE) gets its special meaning
only if it appears directly in the recipe:
http://www.gnu.org/software/make/manual/html_node/MAKE-Variable.html
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r cff10030c6ea -r b6fb4e63b946 stubdom/Makefile
--- a/stubdom/Makefile Mon Oct 08 12:18:33 2012 +0100
+++ b/stubdom/Makefile Mon Oct 08 12:18:34 2012 +0100
@@ -76,8 +76,6 @@ TARGET_LDFLAGS += -nostdlib -L$(CROSS_PR
TARGETS=ioemu c caml grub xenstore
-CROSS_MAKE := $(MAKE) DESTDIR=
-
.PHONY: all
all: build
ifeq ($(STUBDOM_SUPPORTED),1)
@@ -113,8 +111,8 @@ cross-newlib: $(NEWLIB_STAMPFILE)
mkdir -p newlib-$(XEN_TARGET_ARCH)
( cd newlib-$(XEN_TARGET_ARCH) && \
CC_FOR_TARGET="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) $(NEWLIB_CFLAGS)" AR_FOR_TARGET=$(AR) LD_FOR_TARGET=$(LD) RANLIB_FOR_TARGET=$(RANLIB) ../newlib-$(NEWLIB_VERSION)/configure --prefix=$(CROSS_PREFIX) --verbose --target=$(GNU_TARGET_ARCH)-xen-elf --enable-newlib-io-long-long --disable-multilib && \
- $(CROSS_MAKE) && \
- $(CROSS_MAKE) install )
+ $(MAKE) DESTDIR= && \
+ $(MAKE) DESTDIR= install )
############
# Cross-zlib
@@ -133,8 +131,8 @@ cross-zlib: $(ZLIB_STAMPFILE)
$(ZLIB_STAMPFILE): zlib-$(XEN_TARGET_ARCH) $(NEWLIB_STAMPFILE)
( cd $< && \
CFLAGS="$(TARGET_CPPFLAGS) $(TARGET_CFLAGS)" CC=$(CC) ./configure --prefix=$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf && \
- $(CROSS_MAKE) libz.a && \
- $(CROSS_MAKE) install )
+ $(MAKE) DESTDIR= libz.a && \
+ $(MAKE) DESTDIR= install )
##############
# Cross-libpci
@@ -158,7 +156,7 @@ cross-libpci: $(LIBPCI_STAMPFILE)
chmod u+w lib/config.h && \
echo '#define PCILIB_VERSION "$(LIBPCI_VERSION)"' >> lib/config.h && \
ln -sf ../../libpci.config.mak lib/config.mk && \
- $(CROSS_MAKE) CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(call realpath,$(MINI_OS)/include)" lib/libpci.a && \
+ $(MAKE) DESTDIR= CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(call realpath,$(MINI_OS)/include)" lib/libpci.a && \
$(INSTALL_DATA) lib/libpci.a $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/lib/ && \
$(INSTALL_DIR) $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci && \
$(INSTALL_DATA) lib/config.h lib/header.h lib/pci.h lib/types.h $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \
@@ -203,8 +201,8 @@ cross-ocaml: $(OCAML_STAMPFILE)
-no-pthread -no-shared-libs -no-tk -no-curses \
-cc "$(CC) -U_FORTIFY_SOURCE -fno-stack-protector -mno-red-zone"
$(foreach i,$(MINIOS_HASNOT),sed -i 's,^\(#define HAS_$(i)\),//\1,' ocaml-$(XEN_TARGET_ARCH)/config/s.h ; )
- $(CROSS_MAKE) -C ocaml-$(XEN_TARGET_ARCH) world
- $(CROSS_MAKE) -C ocaml-$(XEN_TARGET_ARCH) opt
+ $(MAKE) DESTDIR= -C ocaml-$(XEN_TARGET_ARCH) world
+ $(MAKE) DESTDIR= -C ocaml-$(XEN_TARGET_ARCH) opt
$(MAKE) -C ocaml-$(XEN_TARGET_ARCH) install
touch $@
@@ -219,7 +217,7 @@ QEMU_ROOT := $(shell if [ -d "$(CONFIG_Q
ifeq ($(QEMU_ROOT),.)
$(XEN_ROOT)/tools/qemu-xen-traditional-dir:
- $(CROSS_MAKE) -C $(XEN_ROOT)/tools qemu-xen-traditional-dir-find
+ $(MAKE) DESTDIR= -C $(XEN_ROOT)/tools qemu-xen-traditional-dir-find
ioemu/linkfarm.stamp: $(XEN_ROOT)/tools/qemu-xen-traditional-dir
mkdir -p ioemu
@@ -250,7 +248,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
( [ -h include/xen/libelf ] || ln -sf $(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \
mkdir -p include/xen-foreign && \
ln -sf $(wildcard $(XEN_ROOT)/tools/include/xen-foreign/*) include/xen-foreign/ && \
- $(CROSS_MAKE) -C include/xen-foreign/ && \
+ $(MAKE) DESTDIR= -C include/xen-foreign/ && \
( [ -h include/xen/foreign ] || ln -sf ../xen-foreign include/xen/foreign )
mkdir -p libxc-$(XEN_TARGET_ARCH)
[ -h libxc-$(XEN_TARGET_ARCH)/Makefile ] || ( cd libxc-$(XEN_TARGET_ARCH) && \
@@ -267,7 +265,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
ln -sf $(XEN_ROOT)/tools/xenstore/*.c . && \
ln -sf $(XEN_ROOT)/tools/xenstore/*.h . && \
ln -sf $(XEN_ROOT)/tools/xenstore/Makefile . )
- $(CROSS_MAKE) -C $(MINI_OS) links
+ $(MAKE) DESTDIR= -C $(MINI_OS) links
touch mk-headers-$(XEN_TARGET_ARCH)
TARGETS_MINIOS=$(addprefix mini-os-$(XEN_TARGET_ARCH)-,$(TARGETS))
@@ -284,7 +282,7 @@ TARGETS_MINIOS=$(addprefix mini-os-$(XEN
.PHONY: libxc
libxc: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a libxc-$(XEN_TARGET_ARCH)/libxenguest.a
libxc-$(XEN_TARGET_ARCH)/libxenctrl.a: cross-zlib
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C libxc-$(XEN_TARGET_ARCH)
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C libxc-$(XEN_TARGET_ARCH)
libxc-$(XEN_TARGET_ARCH)/libxenguest.a: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a
@@ -302,7 +300,7 @@ ioemu: cross-zlib cross-libpci libxc
TARGET_CFLAGS="$(TARGET_CFLAGS)" \
TARGET_LDFLAGS="$(TARGET_LDFLAGS)" \
$(QEMU_ROOT)/xen-setup-stubdom )
- $(CROSS_MAKE) -C ioemu -f $(QEMU_ROOT)/Makefile
+ $(MAKE) DESTDIR= -C ioemu -f $(QEMU_ROOT)/Makefile
######
# caml
@@ -310,7 +308,7 @@ ioemu: cross-zlib cross-libpci libxc
.PHONY: caml
caml: $(CROSS_ROOT)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) OCAMLC_CROSS_PREFIX=$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/bin/
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) OCAMLC_CROSS_PREFIX=$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/bin/
###
# C
@@ -318,7 +316,7 @@ caml: $(CROSS_ROOT)
.PHONY: c
c: $(CROSS_ROOT)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH)
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH)
######
# Grub
@@ -337,7 +335,7 @@ grub-upstream: grub-$(GRUB_VERSION).tar.
.PHONY: grub
grub: grub-upstream $(CROSS_ROOT)
mkdir -p grub-$(XEN_TARGET_ARCH)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ OBJ_DIR=$(CURDIR)/grub-$(XEN_TARGET_ARCH)
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ OBJ_DIR=$(CURDIR)/grub-$(XEN_TARGET_ARCH)
##########
# xenstore
@@ -345,7 +343,7 @@ grub: grub-upstream $(CROSS_ROOT)
.PHONY: xenstore
xenstore: $(CROSS_ROOT)
- CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(CROSS_MAKE) -C $@ xenstored.a CONFIG_STUBDOM=y
+ CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= -C $@ xenstored.a CONFIG_STUBDOM=y
########
# minios
@@ -354,23 +352,23 @@ xenstore: $(CROSS_ROOT)
.PHONY: ioemu-stubdom
ioemu-stubdom: APP_OBJS=$(CURDIR)/ioemu/i386-stubdom/qemu.a $(CURDIR)/ioemu/i386-stubdom/libqemu.a $(CURDIR)/ioemu/libqemu_common.a
ioemu-stubdom: mini-os-$(XEN_TARGET_ARCH)-ioemu lwip-$(XEN_TARGET_ARCH) libxc ioemu
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/ioemu-minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(APP_OBJS)"
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/ioemu-minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(APP_OBJS)"
.PHONY: caml-stubdom
caml-stubdom: mini-os-$(XEN_TARGET_ARCH)-caml lwip-$(XEN_TARGET_ARCH) libxc cross-ocaml caml
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/caml/minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/caml/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
.PHONY: c-stubdom
c-stubdom: mini-os-$(XEN_TARGET_ARCH)-c lwip-$(XEN_TARGET_ARCH) libxc c
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/c/minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS=$(CURDIR)/c/main.a
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/c/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS=$(CURDIR)/c/main.a
.PHONY: pv-grub
pv-grub: mini-os-$(XEN_TARGET_ARCH)-grub libxc grub
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/grub/minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/grub-$(XEN_TARGET_ARCH)/main.a
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/grub/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/grub-$(XEN_TARGET_ARCH)/main.a
.PHONY: xenstore-stubdom
xenstore-stubdom: mini-os-$(XEN_TARGET_ARCH)-xenstore libxc xenstore
- DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/xenstore-minios.cfg" $(CROSS_MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/xenstore/xenstored.a
+ DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/xenstore-minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/xenstore/xenstored.a
#########
# install
@@ -412,13 +410,13 @@ clean:
rm -fr mini-os-$(XEN_TARGET_ARCH)-caml
rm -fr mini-os-$(XEN_TARGET_ARCH)-grub
rm -fr mini-os-$(XEN_TARGET_ARCH)-xenstore
- $(CROSS_MAKE) -C caml clean
- $(CROSS_MAKE) -C c clean
+ $(MAKE) DESTDIR= -C caml clean
+ $(MAKE) DESTDIR= -C c clean
rm -fr grub-$(XEN_TARGET_ARCH)
rm -f $(STUBDOMPATH)
- [ ! -d libxc-$(XEN_TARGET_ARCH) ] || $(CROSS_MAKE) -C libxc-$(XEN_TARGET_ARCH) clean
- -[ ! -d ioemu ] || $(CROSS_MAKE) -C ioemu clean
- -[ ! -d xenstore ] || $(CROSS_MAKE) -C xenstore clean
+ [ ! -d libxc-$(XEN_TARGET_ARCH) ] || $(MAKE) DESTDIR= -C libxc-$(XEN_TARGET_ARCH) clean
+ -[ ! -d ioemu ] || $(MAKE) DESTDIR= -C ioemu clean
+ -[ ! -d xenstore ] || $(MAKE) DESTDIR= -C xenstore clean
# clean the cross-compilation result
.PHONY: crossclean

View File

@ -1,43 +0,0 @@
changeset: 26018:ecc7627ca6d7
tag: tip
user: Olaf Hering <olaf@aepfle.de>
date: Tue Oct 09 09:18:42 2012 +0100
files: tools/pygrub/src/pygrub
description:
pygrub: correct typo in --args assignment
If pygrub was called with --args="some thing", then this string should
be append to the kernel command line. But the last changeset
25941:795c493fe561 contained a typo, it assigns 'args' instead of 'arg'.
Rename the local variable which holds the string from the domain config
file to avoid further confusion.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r c9f621893a05 -r ecc7627ca6d7 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Mon Oct 08 14:36:31 2012 +0100
+++ b/tools/pygrub/src/pygrub Tue Oct 09 09:18:42 2012 +0100
@@ -585,7 +585,7 @@ def get_entry_idx(cf, entry):
return None
-def run_grub(file, entry, fs, arg):
+def run_grub(file, entry, fs, cfg_args):
global g
global sel
@@ -622,8 +622,8 @@ def run_grub(file, entry, fs, arg):
grubcfg["ramdisk"] = img.initrd[1]
if img.args:
grubcfg["args"] += img.args
- if arg:
- grubcfg["args"] += " " + args
+ if cfg_args:
+ grubcfg["args"] += " " + cfg_args
return grubcfg

View File

@ -0,0 +1,93 @@
# HG changeset patch
# User Huang Ying <ying.huang@intel.com>
# Date 1350475926 -7200
# Node ID ec8a091efcce717584b00ce76e3cec40a6247ebc
# Parent 4b4c0c7a6031820ab521fdd6764cb0df157f44bf
ACPI/APEI: fix ERST MOVE_DATA instruction implementation
The src_base and dst_base fields in apei_exec_context are physical
address, so they should be ioremaped before being used in ERST
MOVE_DATA instruction.
Reported-by: Javier Martinez Canillas <martinez.javier@gmail.com>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Replace use of ioremap() by __acpi_map_table()/set_fixmap(). Fix error
handling.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/acpi/apei/erst.c
+++ b/xen/drivers/acpi/apei/erst.c
@@ -247,15 +247,64 @@ static int erst_exec_move_data(struct ap
{
int rc;
u64 offset;
+#ifdef CONFIG_X86
+ enum fixed_addresses idx;
+#endif
+ void *src, *dst;
+
+ /* ioremap does not work in interrupt context */
+ if (in_irq()) {
+ printk(KERN_WARNING
+ "MOVE_DATA cannot be used in interrupt context\n");
+ return -EBUSY;
+ }
rc = __apei_exec_read_register(entry, &offset);
if (rc)
return rc;
- memmove((void *)(unsigned long)(ctx->dst_base + offset),
- (void *)(unsigned long)(ctx->src_base + offset),
- ctx->var2);
- return 0;
+#ifdef CONFIG_X86
+ switch (ctx->var2) {
+ case 0:
+ return 0;
+ case 1 ... PAGE_SIZE:
+ break;
+ default:
+ printk(KERN_WARNING
+ "MOVE_DATA cannot be used for %#"PRIx64" bytes of data\n",
+ ctx->var2);
+ return -EOPNOTSUPP;
+ }
+
+ src = __acpi_map_table(ctx->src_base + offset, ctx->var2);
+#else
+ src = ioremap(ctx->src_base + offset, ctx->var2);
+#endif
+ if (!src)
+ return -ENOMEM;
+
+#ifdef CONFIG_X86
+ BUILD_BUG_ON(FIX_ACPI_PAGES < 4);
+ idx = virt_to_fix((unsigned long)src + 2 * PAGE_SIZE);
+ offset += ctx->dst_base;
+ dst = (void *)fix_to_virt(idx) + (offset & ~PAGE_MASK);
+ set_fixmap(idx, offset);
+ if (PFN_DOWN(offset) != PFN_DOWN(offset + ctx->var2 - 1)) {
+ idx = virt_to_fix((unsigned long)dst + PAGE_SIZE);
+ set_fixmap(idx, offset + PAGE_SIZE);
+ }
+#else
+ dst = ioremap(ctx->dst_base + offset, ctx->var2);
+#endif
+ if (dst) {
+ memmove(dst, src, ctx->var2);
+ iounmap(dst);
+ } else
+ rc = -ENOMEM;
+
+ iounmap(src);
+
+ return rc;
}
static struct apei_exec_ins_type erst_ins_type[] = {

View File

@ -0,0 +1,76 @@
changeset: 26077:33348baecf37
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:34:59 2012 +0100
files: stubdom/grub.patches/70compiler_warnings.diff
description:
stubdom: fix compile errors in grub
Building xen.rpm in SLES11 started to fail due to these compiler
warnings:
[ 1436s] ../grub-upstream/netboot/fsys_tftp.c:213: warning: operation on 'block' may be undefined
[ 1437s] ../grub-upstream/netboot/main.c:444: warning: operation on 'block' may be undefined
[ 1234s] E: xen sequence-point ../grub-upstream/netboot/fsys_tftp.c:213
[ 1234s] E: xen sequence-point ../grub-upstream/netboot/main.c:444
The reason for this is that the assignment is done twice:
tp.u.ack.block = ((uint16_t)( (((uint16_t)((block = prevblock)) & (uint16_t)0x00ffU) << 8) | (((uint16_t)((block = prevblock)) & (uint16_t)0xff00U) >> 8)));
Fix this package build error by adding another patch for grub, which
moves the assignment out of the macro usage.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 8dcab28b8081 -r 33348baecf37 stubdom/grub.patches/70compiler_warnings.diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/stubdom/grub.patches/70compiler_warnings.diff Thu Oct 18 09:34:59 2012 +0100
@@ -0,0 +1,45 @@
+[ 1436s] ../grub-upstream/netboot/fsys_tftp.c:213: warning: operation on 'block' may be undefined
+[ 1437s] ../grub-upstream/netboot/main.c:444: warning: operation on 'block' may be undefined
+
+[ 1234s] E: xen sequence-point ../grub-upstream/netboot/fsys_tftp.c:213
+[ 1234s] E: xen sequence-point ../grub-upstream/netboot/main.c:444
+
+---
+ netboot/fsys_tftp.c | 5 ++++-
+ netboot/main.c | 5 ++++-
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+Index: grub-0.97/netboot/fsys_tftp.c
+===================================================================
+--- grub-0.97.orig/netboot/fsys_tftp.c
++++ grub-0.97/netboot/fsys_tftp.c
+@@ -209,8 +209,11 @@ buf_fill (int abort)
+ break;
+
+ if ((block || bcounter) && (block != prevblock + (unsigned short) 1))
++ {
++ block = prevblock;
+ /* Block order should be continuous */
+- tp.u.ack.block = htons (block = prevblock);
++ tp.u.ack.block = htons (block);
++ }
+
+ /* Should be continuous. */
+ tp.opcode = abort ? htons (TFTP_ERROR) : htons (TFTP_ACK);
+Index: grub-0.97/netboot/main.c
+===================================================================
+--- grub-0.97.orig/netboot/main.c
++++ grub-0.97/netboot/main.c
+@@ -440,8 +440,11 @@ tftp (const char *name, int (*fnc) (unsi
+ break;
+
+ if ((block || bcounter) && (block != prevblock + 1))
++ {
++ block = prevblock;
+ /* Block order should be continuous */
+- tp.u.ack.block = htons (block = prevblock);
++ tp.u.ack.block = htons (block);
++ }
+
+ /* Should be continuous. */
+ tp.opcode = htons (TFTP_ACK);

View File

@ -0,0 +1,187 @@
changeset: 26078:019ca95dfa34
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:00 2012 +0100
files: Makefile README install.sh tools/hotplug/Linux/Makefile tools/hotplug/Linux/xen-backend.agent
description:
hotplug/Linux: remove hotplug support, rely on udev instead
Hotplug has been replaced by udev since several years. Remove the
hotplug related files and install udev unconditionally.
This makes it possible to remove udev from rpm BuildRequires which
reduces the buildtime dependency chain. For openSuSE:Factory it was
done just now:
http://lists.opensuse.org/opensuse-buildservice/2012-10/msg00085.html
The patch by itself will have no practical impact unless someone
attempts to build and run a Xen dom0 on a really old base system. e.g.
circa SLES9/2007 or earlier
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 33348baecf37 -r 019ca95dfa34 Makefile
--- a/Makefile Thu Oct 18 09:34:59 2012 +0100
+++ b/Makefile Thu Oct 18 09:35:00 2012 +0100
@@ -223,7 +223,6 @@ uninstall:
$(MAKE) -C xen uninstall
rm -rf $(D)$(CONFIG_DIR)/init.d/xendomains $(D)$(CONFIG_DIR)/init.d/xend
rm -rf $(D)$(CONFIG_DIR)/init.d/xencommons $(D)$(CONFIG_DIR)/init.d/xen-watchdog
- rm -rf $(D)$(CONFIG_DIR)/hotplug/xen-backend.agent
rm -f $(D)$(CONFIG_DIR)/udev/rules.d/xen-backend.rules
rm -f $(D)$(CONFIG_DIR)/udev/rules.d/xend.rules
rm -f $(D)$(SYSCONFIG_DIR)/xendomains
diff -r 33348baecf37 -r 019ca95dfa34 README
--- a/README Thu Oct 18 09:34:59 2012 +0100
+++ b/README Thu Oct 18 09:35:00 2012 +0100
@@ -54,7 +54,7 @@ provided by your OS distributor:
* pkg-config
* bridge-utils package (/sbin/brctl)
* iproute package (/sbin/ip)
- * hotplug or udev
+ * udev
* GNU bison and GNU flex
* GNU gettext
* 16-bit x86 assembler, loader and compiler (dev86 rpm or bin86 & bcc debs)
@@ -120,9 +120,9 @@ 4. To rebuild an existing tree without m
make install and make dist differ in that make install does the
right things for your local machine (installing the appropriate
- version of hotplug or udev scripts, for example), but make dist
- includes all versions of those scripts, so that you can copy the dist
- directory to another machine and install from that distribution.
+ version of udev scripts, for example), but make dist includes all
+ versions of those scripts, so that you can copy the dist directory
+ to another machine and install from that distribution.
Python Runtime Libraries
========================
diff -r 33348baecf37 -r 019ca95dfa34 install.sh
--- a/install.sh Thu Oct 18 09:34:59 2012 +0100
+++ b/install.sh Thu Oct 18 09:35:00 2012 +0100
@@ -27,20 +27,6 @@ echo "Installing Xen from '$src' to '$ds
echo "Installing Xen from '$src' to '$dst'..."
(cd $src; tar -cf - * ) | tar -C "$tmp" -xf -
-[ -x "$(which udevinfo)" ] && \
- UDEV_VERSION=$(udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/')
-
-[ -z "$UDEV_VERSION" -a -x /sbin/udevadm ] && \
- UDEV_VERSION=$(/sbin/udevadm info -V | awk '{print $NF}')
-
-if [ -n "$UDEV_VERSION" ] && [ $UDEV_VERSION -ge 059 ]; then
- echo " - installing for udev-based system"
- rm -rf "$tmp/etc/hotplug"
-else
- echo " - installing for hotplug-based system"
- rm -rf "$tmp/etc/udev"
-fi
-
echo " - modifying permissions"
chmod -R a+rX "$tmp"
diff -r 33348baecf37 -r 019ca95dfa34 tools/hotplug/Linux/Makefile
--- a/tools/hotplug/Linux/Makefile Thu Oct 18 09:34:59 2012 +0100
+++ b/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:00 2012 +0100
@@ -27,31 +27,8 @@ XEN_SCRIPT_DATA += block-common.sh vtpm-
XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
XEN_SCRIPT_DATA += vtpm-migration.sh vtpm-impl
-XEN_HOTPLUG_DIR = $(CONFIG_DIR)/hotplug
-XEN_HOTPLUG_SCRIPTS = xen-backend.agent
-
-UDEVVER = 0
-ifeq ($(shell [ -x /sbin/udevadm ] && echo 1),1)
-UDEVVER = $(shell /sbin/udevadm info -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' )
-endif
-ifeq ($(shell [ -x /usr/bin/udevinfo ] && echo 1),1)
-UDEVVER = $(shell /usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' )
-endif
-
UDEV_RULES_DIR = $(CONFIG_DIR)/udev
UDEV_RULES = xen-backend.rules xend.rules
-
-DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),)
-DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),)
-ifeq ($(findstring $(DI),$(DE)),$(DI))
-HOTPLUGS=install-hotplug install-udev
-else
-ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1)
-HOTPLUGS=install-udev
-else
-HOTPLUGS=install-hotplug
-endif
-endif
.PHONY: all
all:
@@ -60,7 +37,7 @@ build:
build:
.PHONY: install
-install: all install-initd install-scripts $(HOTPLUGS)
+install: all install-initd install-scripts install-udev
# See docs/misc/distro_mapping.txt for INITD_DIR location
.PHONY: install-initd
@@ -87,15 +64,6 @@ install-scripts:
$(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
done
-.PHONY: install-hotplug
-install-hotplug:
- [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
- $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR)
- set -e; for i in $(XEN_HOTPLUG_SCRIPTS); \
- do \
- $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \
- done
-
.PHONY: install-udev
install-udev:
[ -d $(DESTDIR)$(UDEV_RULES_DIR) ] || \
diff -r 33348baecf37 -r 019ca95dfa34 tools/hotplug/Linux/xen-backend.agent
--- a/tools/hotplug/Linux/xen-backend.agent Thu Oct 18 09:34:59 2012 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#! /bin/bash
-
-PATH=/etc/xen/scripts:$PATH
-
-. /etc/xen/scripts/locking.sh
-
-claim_lock xenbus_hotplug_global
-
-case "$XENBUS_TYPE" in
- tap)
- /etc/xen/scripts/blktap "$ACTION"
- ;;
- vbd)
- /etc/xen/scripts/block "$ACTION"
- ;;
- vtpm)
- /etc/xen/scripts/vtpm "$ACTION"
- ;;
- vif)
- [ -n "$script" ] && $script "$ACTION"
- ;;
- vscsi)
- /etc/xen/scripts/vscsi "$ACTION"
- ;;
-esac
-
-case "$ACTION" in
- add)
- ;;
- remove)
- /etc/xen/scripts/xen-hotplug-cleanup
- ;;
- online)
- ;;
- offline)
- ;;
-esac
-
-release_lock xenbus_hotplug_global

View File

@ -0,0 +1,36 @@
changeset: 26079:b3b03536789a
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:01 2012 +0100
files: tools/hotplug/Linux/locking.sh
description:
hotplug/Linux: close lockfd after lock attempt
When a HVM guest is shutdown some of the 'remove' events can not claim
the lock for some reason. Instead they try to grab the lock in a busy
loop, until udev reaps the xen-hotplug-cleanup helper.
After analyzing the resulting logfile its not obvious what the cause is.
The only explanation is that bash (?) gets confused if the same lockfd
is opened again and again. Closing it in each iteration seem to fix the
issue.
This was observed with sles11sp2 (bash 3.2) and 4.2 xend.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <Ian.campbell@citrix.com>
[ ijc -- added the comment ]
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 019ca95dfa34 -r b3b03536789a tools/hotplug/Linux/locking.sh
--- a/tools/hotplug/Linux/locking.sh Thu Oct 18 09:35:00 2012 +0100
+++ b/tools/hotplug/Linux/locking.sh Thu Oct 18 09:35:01 2012 +0100
@@ -59,6 +59,9 @@ claim_lock()
print "y\n" if $fd_inum eq $file_inum;
' "$_lockfile" )
if [ x$rightfile = xy ]; then break; fi
+ # Some versions of bash appear to be buggy if the same
+ # $_lockfile is opened repeatedly. Close the current fd here.
+ eval "exec $_lockfd<&-"
done
}

View File

@ -0,0 +1,30 @@
changeset: 26081:02064298ebcb
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:03 2012 +0100
files: stubdom/Makefile
description:
stubdom: fix rpmlint warning spurious-executable-perm
[ 1758s] xen-tools.x86_64: E: spurious-executable-perm (Badness: 50) /usr/lib/xen/boot/xenstore-stubdom.gz
[ 1758s] The file is installed with executable permissions, but was identified as one
[ 1758s] that probably should not be executable. Verify if the executable bits are
[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged under
[ 1758s] %docdir/examples, which will avoid this warning.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 25b2f53d2583 -r 02064298ebcb stubdom/Makefile
--- a/stubdom/Makefile Thu Oct 18 09:35:02 2012 +0100
+++ b/stubdom/Makefile Thu Oct 18 09:35:03 2012 +0100
@@ -396,7 +396,7 @@ install-grub: pv-grub
install-xenstore: xenstore-stubdom
$(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_PROG) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz"
+ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz"
#######
# clean

View File

@ -0,0 +1,30 @@
changeset: 26082:8cf26ace9ca0
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:03 2012 +0100
files: tools/blktap2/vhd/lib/Makefile
description:
blktap2/libvhd: fix rpmlint warning spurious-executable-perm
[ 1758s] xen-devel.x86_64: E: spurious-executable-perm (Badness: 50) /usr/lib64/libvhd.a
[ 1758s] The file is installed with executable permissions, but was identified as one
[ 1758s] that probably should not be executable. Verify if the executable bits are
[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged under
[ 1758s] %docdir/examples, which will avoid this warning.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 02064298ebcb -r 8cf26ace9ca0 tools/blktap2/vhd/lib/Makefile
--- a/tools/blktap2/vhd/lib/Makefile Thu Oct 18 09:35:03 2012 +0100
+++ b/tools/blktap2/vhd/lib/Makefile Thu Oct 18 09:35:03 2012 +0100
@@ -68,7 +68,7 @@ libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR
install: all
$(INSTALL_DIR) -p $(DESTDIR)$(INST-DIR)
- $(INSTALL_PROG) libvhd.a $(DESTDIR)$(INST-DIR)
+ $(INSTALL_DATA) libvhd.a $(DESTDIR)$(INST-DIR)
$(INSTALL_PROG) libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR)
ln -sf libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR)/libvhd.so.$(LIBVHD-MAJOR)
ln -sf libvhd.so.$(LIBVHD-MAJOR) $(DESTDIR)$(INST-DIR)/libvhd.so

View File

@ -0,0 +1,51 @@
changeset: 26083:3fbeb019d522
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:04 2012 +0100
files: tools/blktap/lib/Makefile
description:
blktap: fix rpmlint warning spurious-executable-perm
[ 1758s] xen-devel.x86_64: E: spurious-executable-perm (Badness: 50) /usr/lib64/libblktap.a
[ 1758s] The file is installed with executable permissions, but was identified as one
[ 1758s] that probably should not be executable. Verify if the executable bits are
[ 1758s] desired, and remove if not. NOTE: example scripts should be packaged under
[ 1758s] %docdir/examples, which will avoid this warning.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 8cf26ace9ca0 -r 3fbeb019d522 tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Thu Oct 18 09:35:03 2012 +0100
+++ b/tools/blktap/lib/Makefile Thu Oct 18 09:35:04 2012 +0100
@@ -23,23 +23,25 @@ OBJS_PIC = $(SRCS:.c=.opic)
OBJS_PIC = $(SRCS:.c=.opic)
IBINS :=
-LIB = libblktap.a libblktap.so.$(MAJOR).$(MINOR)
+LIB = libblktap.a
+LIB_SO = libblktap.so.$(MAJOR).$(MINOR)
.PHONY: all
-all: $(LIB)
+all: $(LIB) $(LIB_SO)
.PHONY: install
install: all
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)
- $(INSTALL_PROG) $(LIB) $(DESTDIR)$(LIBDIR)
+ $(INSTALL_PROG) $(LIB_SO) $(DESTDIR)$(LIBDIR)
+ $(INSTALL_DATA) $(LIB) $(DESTDIR)$(LIBDIR)
ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libblktap.so.$(MAJOR)
ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so
$(INSTALL_DATA) blktaplib.h $(DESTDIR)$(INCLUDEDIR)
.PHONY: clean
clean:
- rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS
+ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) $(LIB_SO) *~ $(DEPS) xen TAGS
libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC)
$(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_LDFLAGS) \

View File

@ -0,0 +1,34 @@
changeset: 26084:fe9a0eb9aaaa
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:05 2012 +0100
files: tools/hotplug/common/Makefile
description:
hotplug: install hotplugpath.sh as data file
rpmlint complains a script helper which is only sourced:
[ 1875s] xen-tools.i586: W: script-without-shebang /etc/xen/scripts/hotplugpath.sh
[ 1875s] This text file has executable bits set or is located in a path dedicated for
[ 1875s] executables, but lacks a shebang and cannot thus be executed. If the file is
[ 1875s] meant to be an executable script, add the shebang, otherwise remove the
[ 1875s] executable bits or move the file elsewhere.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 3fbeb019d522 -r fe9a0eb9aaaa tools/hotplug/common/Makefile
--- a/tools/hotplug/common/Makefile Thu Oct 18 09:35:04 2012 +0100
+++ b/tools/hotplug/common/Makefile Thu Oct 18 09:35:05 2012 +0100
@@ -6,8 +6,8 @@ HOTPLUGPATH="hotplugpath.sh"
# OS-independent hotplug scripts go in this directory
# Xen scripts to go there.
-XEN_SCRIPTS = $(HOTPLUGPATH)
-XEN_SCRIPT_DATA =
+XEN_SCRIPTS =
+XEN_SCRIPT_DATA = $(HOTPLUGPATH)
genpath-target = $(call buildmakevars2file,$(HOTPLUGPATH))
$(eval $(genpath-target))

View File

@ -0,0 +1,33 @@
changeset: 26085:e32f4301f384
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:06 2012 +0100
files: stubdom/Makefile
description:
stubdom: install stubdompath.sh as data file
rpmlint complains a script helper which is only sourced:
[ 1875s] xen-tools.i586: W: script-without-shebang /usr/lib/xen/bin/stubdompath.sh
[ 1875s] This text file has executable bits set or is located in a path dedicated for
[ 1875s] executables, but lacks a shebang and cannot thus be executed. If the file is
[ 1875s] meant to be an executable script, add the shebang, otherwise remove the
[ 1875s] executable bits or move the file elsewhere.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r fe9a0eb9aaaa -r e32f4301f384 stubdom/Makefile
--- a/stubdom/Makefile Thu Oct 18 09:35:05 2012 +0100
+++ b/stubdom/Makefile Thu Oct 18 09:35:06 2012 +0100
@@ -386,7 +386,8 @@ install-readme:
install-ioemu: ioemu-stubdom
$(INSTALL_DIR) "$(DESTDIR)$(LIBEXEC)"
- $(INSTALL_PROG) stubdompath.sh stubdom-dm "$(DESTDIR)$(LIBEXEC)"
+ $(INSTALL_PROG) stubdom-dm "$(DESTDIR)$(LIBEXEC)"
+ $(INSTALL_DATA) stubdompath.sh "$(DESTDIR)$(LIBEXEC)"
$(INSTALL_DIR) "$(DESTDIR)$(XENFIRMWAREDIR)"
$(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-ioemu/mini-os.gz "$(DESTDIR)$(XENFIRMWAREDIR)/ioemu-stubdom.gz"

View File

@ -0,0 +1,21 @@
changeset: 26086:ba6b1db89ec8
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:07 2012 +0100
files: tools/hotplug/Linux/init.d/sysconfig.xendomains
description:
hotplug/Linux: correct sysconfig tag in xendomains
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r e32f4301f384 -r ba6b1db89ec8 tools/hotplug/Linux/init.d/sysconfig.xendomains
--- a/tools/hotplug/Linux/init.d/sysconfig.xendomains Thu Oct 18 09:35:06 2012 +0100
+++ b/tools/hotplug/Linux/init.d/sysconfig.xendomains Thu Oct 18 09:35:07 2012 +0100
@@ -1,4 +1,4 @@
-## Path: System/xen
+## Path: System/Virtualization
## Description: xen domain start/stop on boot
## Type: string
## Default:

View File

@ -0,0 +1,36 @@
changeset: 26087:6239ace16749
user: Olaf Hering <olaf@aepfle.de>
date: Thu Oct 18 09:35:07 2012 +0100
files: tools/hotplug/Linux/Makefile
description:
hotplug/Linux: install sysconfig files as data files
rpmlint complains about wrong permissions of config files:
[ 455s] xen-tools.i586: W: script-without-shebang /var/adm/fillup-templates/sysconfig.xendomains
[ 455s] xen-tools.i586: W: script-without-shebang /var/adm/fillup-templates/sysconfig.xencommons
[ 455s] This text file has executable bits set or is located in a path dedicated for
[ 455s] executables, but lacks a shebang and cannot thus be executed. If the file is
[ 455s] meant to be an executable script, add the shebang, otherwise remove the
[ 455s] executable bits or move the file elsewhere.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
diff -r ba6b1db89ec8 -r 6239ace16749 tools/hotplug/Linux/Makefile
--- a/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:07 2012 +0100
+++ b/tools/hotplug/Linux/Makefile Thu Oct 18 09:35:07 2012 +0100
@@ -46,9 +46,9 @@ install-initd:
[ -d $(DESTDIR)$(SYSCONFIG_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(SYSCONFIG_DIR)
$(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR)
$(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains
+ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains
$(INSTALL_PROG) $(XENCOMMONS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons
+ $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons
$(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR)
.PHONY: install-scripts

View File

@ -0,0 +1,90 @@
# HG changeset patch
# User Charles Arnold <carnold@suse.com>
# Date 1351249508 -3600
# Node ID 6f9e46917eb8771914041b98f714e8f485fca5ef
# Parent 03af0abd2b72dfab3f2e50dd502108de8603f741
pygrub: Add option to list grub entries
The argument to "--entry" allows 2 syntaxes, either directly the entry
number in menu.lst, or the whole string behind the "title" key word.
This poses the following issue:
From Dom0 there is no way to guess the number and, or the complete
title string because this string contains the kernel version, which
will change with a kernel update.
This patch adds [-l|--list-entries] as an argument to pygrub.
Signed-off-by: Charles Arnold <carnold@suse.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
diff -r 03af0abd2b72 -r 6f9e46917eb8 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub Fri Oct 26 12:03:12 2012 +0100
+++ b/tools/pygrub/src/pygrub Fri Oct 26 12:05:08 2012 +0100
@@ -595,7 +595,17 @@ def run_grub(file, entry, fs, cfg_args):
sel = g.run()
g = Grub(file, fs)
- if interactive:
+
+ if list_entries:
+ for i in range(len(g.cf.images)):
+ img = g.cf.images[i]
+ print "title: %s" % img.title
+ print " root: %s" % img.root
+ print " kernel: %s" % img.kernel[1]
+ print " args: %s" % img.args
+ print " initrd: %s" % img.initrd[1]
+
+ if interactive and not list_entries:
curses.wrapper(run_main)
else:
sel = g.cf.default
@@ -702,7 +712,7 @@ if __name__ == "__main__":
sel = None
def usage():
- print >> sys.stderr, "Usage: %s [-q|--quiet] [-i|--interactive] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] <image>" %(sys.argv[0],)
+ print >> sys.stderr, "Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] <image>" %(sys.argv[0],)
def copy_from_image(fs, file_to_read, file_type, output_directory,
not_really):
@@ -736,8 +746,8 @@ if __name__ == "__main__":
dataoff += len(data)
try:
- opts, args = getopt.gnu_getopt(sys.argv[1:], 'qinh::',
- ["quiet", "interactive", "not-really", "help",
+ opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::',
+ ["quiet", "interactive", "list-entries", "not-really", "help",
"output=", "output-format=", "output-directory=",
"entry=", "kernel=",
"ramdisk=", "args=", "isconfig", "debug"])
@@ -753,6 +763,7 @@ if __name__ == "__main__":
output = None
entry = None
interactive = True
+ list_entries = False
isconfig = False
debug = False
not_really = False
@@ -771,6 +782,8 @@ if __name__ == "__main__":
interactive = False
elif o in ("-i", "--interactive"):
interactive = True
+ elif o in ("-l", "--list-entries"):
+ list_entries = True
elif o in ("-n", "--not-really"):
not_really = True
elif o in ("-h", "--help"):
@@ -855,6 +868,9 @@ if __name__ == "__main__":
fs = None
continue
+ if list_entries:
+ sys.exit(0)
+
# Did looping through partitions find us a kernel?
if not fs:
raise RuntimeError, "Unable to find partition containing kernel"

View File

@ -0,0 +1,643 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1352368421 -3600
# Node ID 92163b114076029842d0f2d1dbfaa445976c71a3
# Parent aa2074529eb0183257b6f5f29821b0cd6dfd991a
x86/ACPI: invalidate BGRT if necessary
Since the image pointed to may live in boot services memory (which we
add to the global memory pool long before ACPI tables get looked at),
we should prevent Dom0 from trying to retrieve the image data in that
case.
The alternatives would be to
- not add boot services memory to the global pool at all, or
- defer adding boot services memory until Dom0 indicates it is safe to
do so, or
- find and parse the BGRT table in xen/arch/x86/efi/boot.c, and avoid
adding that specific region to the E820 table.
None of these are really attractive, and as Xen commonly prints to the
video console anyway (without trying to avoid any regions on the
screen), the invalidation would need to be done conditionally anyway.
(xen/include/acpi/actbl3.h is a verbatim copy from Linux 3.7-rc4)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -28,6 +28,7 @@
#include <xen/init.h>
#include <xen/acpi.h>
#include <xen/irq.h>
+#include <xen/mm.h>
#include <xen/dmi.h>
#include <asm/fixmap.h>
#include <asm/page.h>
@@ -285,6 +286,27 @@ static int __init acpi_parse_hpet(struct
#define acpi_parse_hpet NULL
#endif
+static int __init acpi_invalidate_bgrt(struct acpi_table_header *table)
+{
+ struct acpi_table_bgrt *bgrt_tbl =
+ container_of(table, struct acpi_table_bgrt, header);
+
+ if (table->length < sizeof(*bgrt_tbl))
+ return -1;
+
+ if (bgrt_tbl->version == 1 && bgrt_tbl->image_address
+ && !page_is_ram_type(PFN_DOWN(bgrt_tbl->image_address),
+ RAM_TYPE_CONVENTIONAL))
+ return 0;
+
+ printk(KERN_INFO PREFIX "BGRT: invalidating v%d image at %#"PRIx64"\n",
+ bgrt_tbl->version, bgrt_tbl->image_address);
+ bgrt_tbl->image_address = 0;
+ bgrt_tbl->status &= ~1;
+
+ return 0;
+}
+
#ifdef CONFIG_ACPI_SLEEP
#define acpi_fadt_copy_address(dst, src, len) do { \
if (fadt->header.revision >= FADT2_REVISION_ID) \
@@ -833,5 +855,7 @@ int __init acpi_boot_init(void)
erst_init();
+ acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt);
+
return 0;
}
--- a/xen/include/acpi/actbl.h
+++ b/xen/include/acpi/actbl.h
@@ -314,6 +314,7 @@ enum acpi_prefered_pm_profiles {
#include <acpi/actbl1.h>
#include <acpi/actbl2.h>
+#include <acpi/actbl3.h>
/*
* Sizes of the various flavors of FADT. We need to look closely
--- /dev/null
+++ b/xen/include/acpi/actbl3.h
@@ -0,0 +1,557 @@
+/******************************************************************************
+ *
+ * Name: actbl3.h - ACPI Table Definitions
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef __ACTBL3_H__
+#define __ACTBL3_H__
+
+/*******************************************************************************
+ *
+ * Additional ACPI Tables (3)
+ *
+ * These tables are not consumed directly by the ACPICA subsystem, but are
+ * included here to support device drivers and the AML disassembler.
+ *
+ * The tables in this file are fully defined within the ACPI specification.
+ *
+ ******************************************************************************/
+
+/*
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
+ */
+#define ACPI_SIG_BGRT "BGRT" /* Boot Graphics Resource Table */
+#define ACPI_SIG_DRTM "DRTM" /* Dynamic Root of Trust for Measurement table */
+#define ACPI_SIG_FPDT "FPDT" /* Firmware Performance Data Table */
+#define ACPI_SIG_GTDT "GTDT" /* Generic Timer Description Table */
+#define ACPI_SIG_MPST "MPST" /* Memory Power State Table */
+#define ACPI_SIG_PCCT "PCCT" /* Platform Communications Channel Table */
+#define ACPI_SIG_PMTT "PMTT" /* Platform Memory Topology Table */
+#define ACPI_SIG_RASF "RASF" /* RAS Feature table */
+
+#define ACPI_SIG_S3PT "S3PT" /* S3 Performance (sub)Table */
+#define ACPI_SIG_PCCS "PCC" /* PCC Shared Memory Region */
+
+/* Reserved table signatures */
+
+#define ACPI_SIG_CSRT "CSRT" /* Core System Resources Table */
+#define ACPI_SIG_MATR "MATR" /* Memory Address Translation Table */
+#define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */
+#define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */
+
+/*
+ * All tables must be byte-packed to match the ACPI specification, since
+ * the tables are provided by the system BIOS.
+ */
+#pragma pack(1)
+
+/*
+ * Note: C bitfields are not used for this reason:
+ *
+ * "Bitfields are great and easy to read, but unfortunately the C language
+ * does not specify the layout of bitfields in memory, which means they are
+ * essentially useless for dealing with packed data in on-disk formats or
+ * binary wire protocols." (Or ACPI tables and buffers.) "If you ask me,
+ * this decision was a design error in C. Ritchie could have picked an order
+ * and stuck with it." Norman Ramsey.
+ * See http://stackoverflow.com/a/1053662/41661
+ */
+
+/*******************************************************************************
+ *
+ * BGRT - Boot Graphics Resource Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_bgrt {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u16 version;
+ u8 status;
+ u8 image_type;
+ u64 image_address;
+ u32 image_offset_x;
+ u32 image_offset_y;
+};
+
+/*******************************************************************************
+ *
+ * DRTM - Dynamic Root of Trust for Measurement table
+ *
+ ******************************************************************************/
+
+struct acpi_table_drtm {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u64 entry_base_address;
+ u64 entry_length;
+ u32 entry_address32;
+ u64 entry_address64;
+ u64 exit_address;
+ u64 log_area_address;
+ u32 log_area_length;
+ u64 arch_dependent_address;
+ u32 flags;
+};
+
+/* 1) Validated Tables List */
+
+struct acpi_drtm_vtl_list {
+ u32 validated_table_list_count;
+};
+
+/* 2) Resources List */
+
+struct acpi_drtm_resource_list {
+ u32 resource_list_count;
+};
+
+/* 3) Platform-specific Identifiers List */
+
+struct acpi_drtm_id_list {
+ u32 id_list_count;
+};
+
+/*******************************************************************************
+ *
+ * FPDT - Firmware Performance Data Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_fpdt {
+ struct acpi_table_header header; /* Common ACPI table header */
+};
+
+/* FPDT subtable header */
+
+struct acpi_fpdt_header {
+ u16 type;
+ u8 length;
+ u8 revision;
+};
+
+/* Values for Type field above */
+
+enum acpi_fpdt_type {
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+/*
+ * FPDT subtables
+ */
+
+/* 0: Firmware Basic Boot Performance Record */
+
+struct acpi_fpdt_boot {
+ struct acpi_fpdt_header header;
+ u8 reserved[4];
+ u64 reset_end;
+ u64 load_start;
+ u64 startup_start;
+ u64 exit_services_entry;
+ u64 exit_services_exit;
+};
+
+/* 1: S3 Performance Table Pointer Record */
+
+struct acpi_fpdt_s3pt_ptr {
+ struct acpi_fpdt_header header;
+ u8 reserved[4];
+ u64 address;
+};
+
+/*
+ * S3PT - S3 Performance Table. This table is pointed to by the
+ * FPDT S3 Pointer Record above.
+ */
+struct acpi_table_s3pt {
+ u8 signature[4]; /* "S3PT" */
+ u32 length;
+};
+
+/*
+ * S3PT Subtables
+ */
+struct acpi_s3pt_header {
+ u16 type;
+ u8 length;
+ u8 revision;
+};
+
+/* Values for Type field above */
+
+enum acpi_s3pt_type {
+ ACPI_S3PT_TYPE_RESUME = 0,
+ ACPI_S3PT_TYPE_SUSPEND = 1,
+};
+
+struct acpi_s3pt_resume {
+ struct acpi_s3pt_header header;
+ u32 resume_count;
+ u64 full_resume;
+ u64 average_resume;
+};
+
+struct acpi_s3pt_suspend {
+ struct acpi_s3pt_header header;
+ u64 suspend_start;
+ u64 suspend_end;
+};
+
+/*******************************************************************************
+ *
+ * GTDT - Generic Timer Description Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_gtdt {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u64 address;
+ u32 flags;
+ u32 secure_pl1_interrupt;
+ u32 secure_pl1_flags;
+ u32 non_secure_pl1_interrupt;
+ u32 non_secure_pl1_flags;
+ u32 virtual_timer_interrupt;
+ u32 virtual_timer_flags;
+ u32 non_secure_pl2_interrupt;
+ u32 non_secure_pl2_flags;
+};
+
+/* Values for Flags field above */
+
+#define ACPI_GTDT_MAPPED_BLOCK_PRESENT 1
+
+/* Values for all "TimerFlags" fields above */
+
+#define ACPI_GTDT_INTERRUPT_MODE 1
+#define ACPI_GTDT_INTERRUPT_POLARITY 2
+
+/*******************************************************************************
+ *
+ * MPST - Memory Power State Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+#define ACPI_MPST_CHANNEL_INFO \
+ u16 reserved1; \
+ u8 channel_id; \
+ u8 reserved2; \
+ u16 power_node_count;
+
+/* Main table */
+
+struct acpi_table_mpst {
+ struct acpi_table_header header; /* Common ACPI table header */
+ ACPI_MPST_CHANNEL_INFO /* Platform Communication Channel */
+};
+
+/* Memory Platform Communication Channel Info */
+
+struct acpi_mpst_channel {
+ ACPI_MPST_CHANNEL_INFO /* Platform Communication Channel */
+};
+
+/* Memory Power Node Structure */
+
+struct acpi_mpst_power_node {
+ u8 flags;
+ u8 reserved1;
+ u16 node_id;
+ u32 length;
+ u64 range_address;
+ u64 range_length;
+ u8 num_power_states;
+ u8 num_physical_components;
+ u16 reserved2;
+};
+
+/* Values for Flags field above */
+
+#define ACPI_MPST_ENABLED 1
+#define ACPI_MPST_POWER_MANAGED 2
+#define ACPI_MPST_HOT_PLUG_CAPABLE 4
+
+/* Memory Power State Structure (follows POWER_NODE above) */
+
+struct acpi_mpst_power_state {
+ u8 power_state;
+ u8 info_index;
+};
+
+/* Physical Component ID Structure (follows POWER_STATE above) */
+
+struct acpi_mpst_component {
+ u16 component_id;
+};
+
+/* Memory Power State Characteristics Structure (follows all POWER_NODEs) */
+
+struct acpi_mpst_data_hdr {
+ u16 characteristics_count;
+};
+
+struct acpi_mpst_power_data {
+ u8 revision;
+ u8 flags;
+ u16 reserved1;
+ u32 average_power;
+ u32 power_saving;
+ u64 exit_latency;
+ u64 reserved2;
+};
+
+/* Values for Flags field above */
+
+#define ACPI_MPST_PRESERVE 1
+#define ACPI_MPST_AUTOENTRY 2
+#define ACPI_MPST_AUTOEXIT 4
+
+/* Shared Memory Region (not part of an ACPI table) */
+
+struct acpi_mpst_shared {
+ u32 signature;
+ u16 pcc_command;
+ u16 pcc_status;
+ u16 command_register;
+ u16 status_register;
+ u16 power_state_id;
+ u16 power_node_id;
+ u64 energy_consumed;
+ u64 average_power;
+};
+
+/*******************************************************************************
+ *
+ * PCCT - Platform Communications Channel Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_pcct {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u32 flags;
+ u32 latency;
+ u32 reserved;
+};
+
+/* Values for Flags field above */
+
+#define ACPI_PCCT_DOORBELL 1
+
+/*
+ * PCCT subtables
+ */
+
+/* 0: Generic Communications Subspace */
+
+struct acpi_pcct_subspace {
+ struct acpi_subtable_header header;
+ u8 reserved[6];
+ u64 base_address;
+ u64 length;
+ struct acpi_generic_address doorbell_register;
+ u64 preserve_mask;
+ u64 write_mask;
+};
+
+/*
+ * PCC memory structures (not part of the ACPI table)
+ */
+
+/* Shared Memory Region */
+
+struct acpi_pcct_shared_memory {
+ u32 signature;
+ u16 command;
+ u16 status;
+};
+
+/*******************************************************************************
+ *
+ * PMTT - Platform Memory Topology Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_pmtt {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u32 reserved;
+};
+
+/* Common header for PMTT subtables that follow main table */
+
+struct acpi_pmtt_header {
+ u8 type;
+ u8 reserved1;
+ u16 length;
+ u16 flags;
+ u16 reserved2;
+};
+
+/* Values for Type field above */
+
+#define ACPI_PMTT_TYPE_SOCKET 0
+#define ACPI_PMTT_TYPE_CONTROLLER 1
+#define ACPI_PMTT_TYPE_DIMM 2
+#define ACPI_PMTT_TYPE_RESERVED 3 /* 0x03-0xFF are reserved */
+
+/* Values for Flags field above */
+
+#define ACPI_PMTT_TOP_LEVEL 0x0001
+#define ACPI_PMTT_PHYSICAL 0x0002
+#define ACPI_PMTT_MEMORY_TYPE 0x000C
+
+/*
+ * PMTT subtables, correspond to Type in struct acpi_pmtt_header
+ */
+
+/* 0: Socket Structure */
+
+struct acpi_pmtt_socket {
+ struct acpi_pmtt_header header;
+ u16 socket_id;
+ u16 reserved;
+};
+
+/* 1: Memory Controller subtable */
+
+struct acpi_pmtt_controller {
+ struct acpi_pmtt_header header;
+ u32 read_latency;
+ u32 write_latency;
+ u32 read_bandwidth;
+ u32 write_bandwidth;
+ u16 access_width;
+ u16 alignment;
+ u16 reserved;
+ u16 domain_count;
+};
+
+/* 1a: Proximity Domain substructure */
+
+struct acpi_pmtt_domain {
+ u32 proximity_domain;
+};
+
+/* 2: Physical Component Identifier (DIMM) */
+
+struct acpi_pmtt_physical_component {
+ struct acpi_pmtt_header header;
+ u16 component_id;
+ u16 reserved;
+ u32 memory_size;
+ u32 bios_handle;
+};
+
+/*******************************************************************************
+ *
+ * RASF - RAS Feature Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_rasf {
+ struct acpi_table_header header; /* Common ACPI table header */
+ u8 channel_id[12];
+};
+
+/* RASF Platform Communication Channel Shared Memory Region */
+
+struct acpi_rasf_shared_memory {
+ u32 signature;
+ u16 command;
+ u16 status;
+ u64 requested_address;
+ u64 requested_length;
+ u64 actual_address;
+ u64 actual_length;
+ u16 flags;
+ u8 speed;
+};
+
+/* Masks for Flags and Speed fields above */
+
+#define ACPI_RASF_SCRUBBER_RUNNING 1
+#define ACPI_RASF_SPEED (7<<1)
+
+/* Channel Commands */
+
+enum acpi_rasf_commands {
+ ACPI_RASF_GET_RAS_CAPABILITIES = 1,
+ ACPI_RASF_GET_PATROL_PARAMETERS = 2,
+ ACPI_RASF_START_PATROL_SCRUBBER = 3,
+ ACPI_RASF_STOP_PATROL_SCRUBBER = 4
+};
+
+/* Channel Command flags */
+
+#define ACPI_RASF_GENERATE_SCI (1<<15)
+
+/* Status values */
+
+enum acpi_rasf_status {
+ ACPI_RASF_SUCCESS = 0,
+ ACPI_RASF_NOT_VALID = 1,
+ ACPI_RASF_NOT_SUPPORTED = 2,
+ ACPI_RASF_BUSY = 3,
+ ACPI_RASF_FAILED = 4,
+ ACPI_RASF_ABORTED = 5,
+ ACPI_RASF_INVALID_DATA = 6
+};
+
+/* Status flags */
+
+#define ACPI_RASF_COMMAND_COMPLETE (1)
+#define ACPI_RASF_SCI_DOORBELL (1<<1)
+#define ACPI_RASF_ERROR (1<<2)
+#define ACPI_RASF_STATUS (0x1F<<3)
+
+/* Reset to default packing */
+
+#pragma pack()
+
+#endif /* __ACTBL3_H__ */

View File

@ -0,0 +1,168 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1352709367 -3600
# Node ID fdb69dd527cd01a46f87efb380050559dcf12d37
# Parent 286ef4ced2164f4e9bf52fd0c52248182e69a6e6
IOMMU: don't immediately disable bus mastering on faults
Instead, give the owning domain at least a small opportunity of fixing
things up, and allow for rare faults to not bring down the device at
all.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -564,7 +564,7 @@ static hw_irq_controller iommu_msi_type
static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
{
- u16 domain_id, device_id, bdf, cword;
+ u16 domain_id, device_id, bdf;
u32 code;
u64 *addr;
int count = 0;
@@ -615,18 +615,10 @@ static void parse_event_log_entry(struct
"fault address = 0x%"PRIx64"\n",
event_str[code-1], domain_id, device_id, *addr);
- /* Tell the device to stop DMAing; we can't rely on the guest to
- * control it for us. */
for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
- {
- cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
- PCI_SLOT(bdf), PCI_FUNC(bdf),
- PCI_COMMAND);
- pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
- PCI_FUNC(bdf), PCI_COMMAND,
- cword & ~PCI_COMMAND_MASTER);
- }
+ pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
+ PCI_DEVFN2(bdf));
}
else
{
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -214,6 +214,7 @@ static int device_assigned(u16 seg, u8 b
static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev;
int rc = 0;
if ( !iommu_enabled || !hd->platform_ops )
@@ -227,6 +228,10 @@ static int assign_device(struct domain *
return -EXDEV;
spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( pdev )
+ pdev->fault.count = 0;
+
if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
goto done;
@@ -378,6 +383,8 @@ int deassign_device(struct domain *d, u1
return ret;
}
+ pdev->fault.count = 0;
+
if ( !has_arch_pdevs(d) && need_iommu(d) )
{
d->need_iommu = 0;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -637,6 +637,36 @@ int __init pci_device_detect(u16 seg, u8
return 1;
}
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
+{
+ struct pci_dev *pdev;
+ s_time_t now = NOW();
+ u16 cword;
+
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(seg, bus, devfn);
+ if ( pdev )
+ {
+ if ( now < pdev->fault.time ||
+ now - pdev->fault.time > MILLISECS(10) )
+ pdev->fault.count >>= 1;
+ pdev->fault.time = now;
+ if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
+ pdev = NULL;
+ }
+ spin_unlock(&pcidevs_lock);
+
+ if ( !pdev )
+ return;
+
+ /* Tell the device to stop DMAing; we can't rely on the guest to
+ * control it for us. */
+ cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_COMMAND);
+ pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+}
+
/*
* scan pci devices to add all existed PCI devices to alldevs_list,
* and setup pci hierarchy in array bus2bridge.
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -936,7 +936,7 @@ static void __do_iommu_page_fault(struct
while (1)
{
u8 fault_reason;
- u16 source_id, cword;
+ u16 source_id;
u32 data;
u64 guest_addr;
int type;
@@ -969,14 +969,8 @@ static void __do_iommu_page_fault(struct
iommu_page_fault_do_one(iommu, type, fault_reason,
source_id, guest_addr);
- /* Tell the device to stop DMAing; we can't rely on the guest to
- * control it for us. */
- cword = pci_conf_read16(iommu->intel->drhd->segment,
- PCI_BUS(source_id), PCI_SLOT(source_id),
- PCI_FUNC(source_id), PCI_COMMAND);
- pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
- PCI_SLOT(source_id), PCI_FUNC(source_id),
- PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+ pci_check_disable_device(iommu->intel->drhd->segment,
+ PCI_BUS(source_id), PCI_DEVFN2(source_id));
fault_index++;
if ( fault_index > cap_num_fault_regs(iommu->cap) )
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -64,6 +64,11 @@ struct pci_dev {
const u8 devfn;
struct pci_dev_info info;
struct arch_pci_dev arch;
+ struct {
+ s_time_t time;
+ unsigned int count;
+#define PT_FAULT_THRESHOLD 10
+ } fault;
u64 vf_rlen[6];
};
@@ -106,6 +111,7 @@ void arch_pci_ro_device(int seg, int bdf
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
uint8_t pci_conf_read8(
unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,

View File

@ -0,0 +1,88 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1353575003 -3600
# Node ID c139ca92edca2fab8ec95deb7fd9e4246c3fe28d
# Parent af6b72a224e99a4a516fbc2eecc06ada569304e8
x86/HPET: fix FSB interrupt masking
HPET_TN_FSB is not really suitable for masking interrupts - it merely
switches between the two delivery methods. The right way of masking is
through the HPET_TN_ENABLE bit (which really is an interrupt enable,
not a counter enable or some such). This is even more so with certain
chip sets not even allowing HPET_TN_FSB to be cleared on some of the
channels.
Further, all the setup of the channel should happen before actually
enabling the interrupt, which requires splitting legacy and FSB logic.
Finally this also fixes an S3 resume problem (HPET_TN_FSB did not get
set in hpet_broadcast_resume(), and hpet_msi_unmask() doesn't get
called from the general resume code either afaict).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -236,7 +236,7 @@ static void hpet_msi_unmask(struct irq_d
struct hpet_event_channel *ch = desc->action->dev_id;
cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
- cfg |= HPET_TN_FSB;
+ cfg |= HPET_TN_ENABLE;
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
}
@@ -246,7 +246,7 @@ static void hpet_msi_mask(struct irq_des
struct hpet_event_channel *ch = desc->action->dev_id;
cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
- cfg &= ~HPET_TN_FSB;
+ cfg &= ~HPET_TN_ENABLE;
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
}
@@ -319,8 +319,14 @@ static void __hpet_setup_msi_irq(struct
static int __init hpet_setup_msi_irq(unsigned int irq, struct hpet_event_channel *ch)
{
int ret;
+ u32 cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
irq_desc_t *desc = irq_to_desc(irq);
+ /* set HPET Tn as oneshot */
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+ cfg |= HPET_TN_FSB | HPET_TN_32BIT;
+ hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+
desc->handler = &hpet_msi_type;
ret = request_irq(irq, hpet_interrupt_handler, 0, "HPET", ch);
if ( ret < 0 )
@@ -541,11 +547,14 @@ void __init hpet_broadcast_init(void)
for ( i = 0; i < n; i++ )
{
- /* set HPET Tn as oneshot */
- cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
+ if ( i == 0 && (cfg & HPET_CFG_LEGACY) )
+ {
+ /* set HPET T0 as oneshot */
+ cfg = hpet_read32(HPET_Tn_CFG(0));
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ hpet_write32(cfg, HPET_Tn_CFG(0));
+ }
/*
* The period is a femto seconds value. We need to calculate the scaled
@@ -602,6 +611,8 @@ void hpet_broadcast_resume(void)
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ if ( !(hpet_events[i].flags & HPET_EVT_LEGACY) )
+ cfg |= HPET_TN_FSB;
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
hpet_events[i].next_event = STIME_MAX;

View File

@ -0,0 +1,85 @@
# HG changeset patch
# Parent 8b93ac0c93f3fb8a140b4688ba71841ac927d4e3
xenstore-chmod: handle arbitrary number of perms rather than MAX_PERMS constant
Constant MAX_PERMS 16 is too small to use in some occasions, e.g. if
there are more than 16 domU(s) on one hypervisor (it's easy to
achieve) and one wants to do xenstore-chmod PATH to all domU(s). So,
remove MAX_PERMS limitation and make it as arbitrary number of perms.
Signed-off-by: Chunyan Liu <cyliu@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 8b93ac0c93f3 tools/xenstore/xenstore_client.c
--- a/tools/xenstore/xenstore_client.c Tue Nov 13 11:19:17 2012 +0000
+++ b/tools/xenstore/xenstore_client.c Mon Nov 26 11:33:38 2012 +0800
@@ -25,7 +25,6 @@
#define PATH_SEP '/'
#define MAX_PATH_LEN 256
-#define MAX_PERMS 16
enum mode {
MODE_unknown,
@@ -407,44 +406,41 @@ perform(enum mode mode, int optind, int
output("%s\n", list[i]);
}
free(list);
- optind++;
- break;
- }
- case MODE_ls: {
- do_ls(xsh, argv[optind], 0, prefix);
- optind++;
- break;
+ optind++;
+ break;
+ }
+ case MODE_ls: {
+ do_ls(xsh, argv[optind], 0, prefix);
+ optind++;
+ break;
}
case MODE_chmod: {
- struct xs_permissions perms[MAX_PERMS];
- int nperms = 0;
/* save path pointer: */
char *path = argv[optind++];
- for (; argv[optind]; optind++, nperms++)
+ int nperms = argc - optind;
+ struct xs_permissions perms[nperms];
+ int i;
+ for (i = 0; argv[optind]; optind++, i++)
{
- if (MAX_PERMS <= nperms)
- errx(1, "Too many permissions specified. "
- "Maximum per invocation is %d.", MAX_PERMS);
-
- perms[nperms].id = atoi(argv[optind]+1);
+ perms[i].id = atoi(argv[optind]+1);
switch (argv[optind][0])
{
case 'n':
- perms[nperms].perms = XS_PERM_NONE;
+ perms[i].perms = XS_PERM_NONE;
break;
case 'r':
- perms[nperms].perms = XS_PERM_READ;
+ perms[i].perms = XS_PERM_READ;
break;
case 'w':
- perms[nperms].perms = XS_PERM_WRITE;
+ perms[i].perms = XS_PERM_WRITE;
break;
case 'b':
- perms[nperms].perms = XS_PERM_READ | XS_PERM_WRITE;
+ perms[i].perms = XS_PERM_READ | XS_PERM_WRITE;
break;
default:
errx(1, "Invalid permission specification: '%c'",
- argv[optind][0]);
+ argv[optind][0]);
}
}

View File

@ -0,0 +1,28 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1354118456 -3600
# Node ID 836697b197462f89a4d296da9482d1719dcc0836
# Parent 1fce7522daa6bab9fce93b95adf592193c904097
IOMMU: imply "verbose" from "debug"
I think that generally enabling debugging code without also enabling
verbose output is rather pointless; if someone really wants this, they
can always pass e.g. "iommu=debug,no-verbose".
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -91,7 +91,11 @@ static void __init parse_iommu_param(cha
else if ( !strcmp(s, "intremap") )
iommu_intremap = val;
else if ( !strcmp(s, "debug") )
+ {
iommu_debug = val;
+ if ( val )
+ iommu_verbose = 1;
+ }
else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
amd_iommu_perdev_intremap = val;
else if ( !strcmp(s, "dom0-passthrough") )

View File

@ -0,0 +1,52 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1354697534 -3600
# Node ID 670b07e8d7382229639af0d1df30071e6c1ebb19
# Parent bc624b00d6d601f00a53c2f7502a82dcef60f882
IOMMU/ATS: fix maximum queue depth calculation
The capabilities register field is a 5-bit value, and the 5 bits all
being zero actually means 32 entries.
Under the assumption that amd_iommu_flush_iotlb() really just tried
to correct for the miscalculation above when adding 32 to the value,
that adjustment is also being removed.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by Xiantao Zhang <xiantao.zhang@intel.com>
Acked-by: Wei Huang <wei.huang2@amd.com>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -321,7 +321,7 @@ void amd_iommu_flush_iotlb(struct pci_de
req_id = get_dma_requestor_id(iommu->seg, bdf);
queueid = req_id;
- maxpend = (ats_pdev->ats_queue_depth + 32) & 0xff;
+ maxpend = ats_pdev->ats_queue_depth & 0xff;
/* send INVALIDATE_IOTLB_PAGES command */
spin_lock_irqsave(&iommu->lock, flags);
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -30,7 +30,7 @@ struct pci_ats_dev {
#define ATS_REG_CAP 4
#define ATS_REG_CTL 6
-#define ATS_QUEUE_DEPTH_MASK 0xF
+#define ATS_QUEUE_DEPTH_MASK 0x1f
#define ATS_ENABLE (1<<15)
extern struct list_head ats_devices;
--- a/xen/drivers/passthrough/x86/ats.c
+++ b/xen/drivers/passthrough/x86/ats.c
@@ -93,7 +93,8 @@ int enable_ats_device(int seg, int bus,
pdev->devfn = devfn;
value = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
PCI_FUNC(devfn), pos + ATS_REG_CAP);
- pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK;
+ pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?:
+ ATS_QUEUE_DEPTH_MASK + 1;
list_add(&pdev->list, &ats_devices);
}

View File

@ -0,0 +1,28 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354812866 0
# Node ID 312f0713dfc98635fd9ed4b42481581489faa28f
# Parent bfd8e96fa3f157630f9698401a1f040ca1776c8e
nested vmx: fix rflags status in virtual vmexit
As stated in SDM, all bits (except for those 1-reserved) in rflags
would be set to 0 in VM exit. Therefore we need to follow this logic
in virtual_vmexit.
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -990,7 +990,8 @@ static void virtual_vmexit(struct cpu_us
regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
- regs->eflags = __vmread(GUEST_RFLAGS);
+ /* VM exit clears all bits except bit 1 */
+ regs->eflags = 0x2;
/* updating host cr0 to sync TS bit */
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);

View File

@ -0,0 +1,46 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354812981 0
# Node ID a09150b57ace2fa786dcaefa958f0b197b1b6d4c
# Parent 312f0713dfc98635fd9ed4b42481581489faa28f
nested vmx: fix handling of RDTSC
If L0 is to handle the TSC access, then we need to update guest EIP by
calling update_guest_eip().
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1613,7 +1613,7 @@ static int get_instruction_length(void)
return len;
}
-static void update_guest_eip(void)
+void update_guest_eip(void)
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
unsigned long x;
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1558,6 +1558,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
regs->eax = (uint32_t)tsc;
regs->edx = (uint32_t)(tsc >> 32);
+ update_guest_eip();
return 1;
}
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -396,6 +396,8 @@ void ept_p2m_init(struct p2m_domain *p2m
void ept_walk_table(struct domain *d, unsigned long gfn);
void setup_ept_dump(void);
+void update_guest_eip(void);
+
/* EPT violation qualifications definitions */
#define _EPT_READ_VIOLATION 0
#define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION)

27
26254-VMX-nested-dr.patch Normal file
View File

@ -0,0 +1,27 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354813009 0
# Node ID e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
# Parent a09150b57ace2fa786dcaefa958f0b197b1b6d4c
nested vmx: fix DR access VM exit
For DR register, we use lazy restore mechanism when access
it. Therefore when receiving such VM exit, L0 should be responsible to
switch to the right DR values, then inject to L1 hypervisor.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1585,7 +1585,8 @@ int nvmx_n2_vmexit_handler(struct cpu_us
break;
case EXIT_REASON_DR_ACCESS:
ctrl = __n2_exec_control(v);
- if ( ctrl & CPU_BASED_MOV_DR_EXITING )
+ if ( (ctrl & CPU_BASED_MOV_DR_EXITING) &&
+ v->arch.hvm_vcpu.flag_dr_dirty )
nvcpu->nv_vmexit_pending = 1;
break;
case EXIT_REASON_INVLPG:

View File

@ -0,0 +1,30 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354813046 0
# Node ID 1ed1507fa0407f1da715d04fe1b510e81ca4fb31
# Parent e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
nested vmx: enable IA32E mode while do VM entry
Some VMMs may check the platform capability to judge whether long
mode guest is supported. Therefore we need to expose this bit to
guest VMM.
Xen on Xen works fine in current solution because Xen doesn't
check this capability but directly set it in VMCS if guest
supports long mode.
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1351,7 +1351,7 @@ int nvmx_msr_read_intercept(unsigned int
case MSR_IA32_VMX_ENTRY_CTLS:
/* bit 0-8, and 12 must be 1 (refer G5 of SDM) */
data = 0x11ff;
- data = (data << 32) | data;
+ data = ((data | VM_ENTRY_IA32E_MODE) << 32) | data;
break;
case IA32_FEATURE_CONTROL_MSR:

View File

@ -0,0 +1,45 @@
# HG changeset patch
# User Dongxiao Xu <dongxiao.xu@intel.com>
# Date 1354813139 0
# Node ID 90831c29bfde6aac013b7e5ec98934a4953c31c9
# Parent 25dd352265ca23750f1a1a983124b36f518c4384
nested vmx: fix interrupt delivery to L2 guest
While delivering interrupt into L2 guest, L0 hypervisor need to check
whether L1 hypervisor wants to own the interrupt, if not, directly
inject the interrupt into L2 guest.
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Committed-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -163,7 +163,7 @@ enum hvm_intblk nvmx_intr_blocked(struct
static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
{
- u32 exit_ctrl;
+ u32 ctrl;
if ( nvmx_intr_blocked(v) != hvm_intblk_none )
{
@@ -176,11 +176,14 @@ static int nvmx_intr_intercept(struct vc
if ( intack.source == hvm_intsrc_pic ||
intack.source == hvm_intsrc_lapic )
{
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+ if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
+ return 0;
+
vmx_inject_extint(intack.vector);
- exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
- VM_EXIT_CONTROLS);
- if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
+ if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
{
/* for now, duplicate the ack path in vmx_intr_assist */
hvm_vcpu_ack_pending_irq(v, intack);

View File

@ -0,0 +1,62 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1354884272 -3600
# Node ID b62bd62b26836fafe19cf41fec194bcf33e2ead6
# Parent cb542e58da25211843eb79998ea8568ebe9c8056
x86/EFI: add code interfacing with the secure boot shim
... to validate the kernel image (which is required to be in PE
format, as is e.g. the case for the Linux bzImage when built with
CONFIG_EFI_STUB).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -24,6 +24,18 @@
#include <asm/msr.h>
#include <asm/processor.h>
+#define SHIM_LOCK_PROTOCOL_GUID \
+ { 0x605dab50, 0xe046, 0x4300, {0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23} }
+
+typedef EFI_STATUS
+(/* _not_ EFIAPI */ *EFI_SHIM_LOCK_VERIFY) (
+ IN VOID *Buffer,
+ IN UINT32 Size);
+
+typedef struct {
+ EFI_SHIM_LOCK_VERIFY Verify;
+} EFI_SHIM_LOCK_PROTOCOL;
+
extern char start[];
extern u32 cpuid_ext_features;
@@ -628,12 +640,14 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL;
static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL;
+ static EFI_GUID __initdata shim_lock_guid = SHIM_LOCK_PROTOCOL_GUID;
EFI_LOADED_IMAGE *loaded_image;
EFI_STATUS status;
unsigned int i, argc;
CHAR16 **argv, *file_name, *cfg_file_name = NULL;
UINTN cols, rows, depth, size, map_key, info_size, gop_mode = ~0;
EFI_HANDLE *handles = NULL;
+ EFI_SHIM_LOCK_PROTOCOL *shim_lock;
EFI_GRAPHICS_OUTPUT_PROTOCOL *gop = NULL;
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
@@ -823,6 +837,11 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
read_file(dir_handle, s2w(&name), &kernel);
efi_bs->FreePool(name.w);
+ if ( !EFI_ERROR(efi_bs->LocateProtocol(&shim_lock_guid, NULL,
+ (void **)&shim_lock)) &&
+ shim_lock->Verify(kernel.ptr, kernel.size) != EFI_SUCCESS )
+ blexit(L"Dom0 kernel image could not be verified\r\n");
+
name.s = get_value(&cfg, section.s, "ramdisk");
if ( name.s )
{

View File

@ -0,0 +1,70 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1355134467 -3600
# Node ID 8d209624ea83b272e1ebd713a928c38d4782f4f1
# Parent f96a0cda12160f497981a37f6922a1ed7db9a462
scheduler: fix rate limit range checking
For one, neither of the two checks permitted for the documented value
of zero (disabling the functionality altogether).
Second, the range checking of the command line parameter was done by
the credit scheduler's initialization code, despite it being a generic
scheduler option.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -846,8 +846,9 @@ csched_sys_cntl(const struct scheduler *
case XEN_SYSCTL_SCHEDOP_putinfo:
if (params->tslice_ms > XEN_SYSCTL_CSCHED_TSLICE_MAX
|| params->tslice_ms < XEN_SYSCTL_CSCHED_TSLICE_MIN
- || params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
- || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN
+ || (params->ratelimit_us
+ && (params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
+ || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN))
|| MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) )
goto out;
prv->tslice_ms = params->tslice_ms;
@@ -1607,17 +1608,6 @@ csched_init(struct scheduler *ops)
sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS;
}
- if ( sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
- || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN )
- {
- printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
- " Resetting to default %u\n",
- XEN_SYSCTL_SCHED_RATELIMIT_MIN,
- XEN_SYSCTL_SCHED_RATELIMIT_MAX,
- SCHED_DEFAULT_RATELIMIT_US);
- sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
- }
-
prv->tslice_ms = sched_credit_tslice_ms;
prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE;
if ( prv->tslice_ms < prv->ticks_per_tslice )
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1322,6 +1322,18 @@ void __init scheduler_init(void)
if ( SCHED_OP(&ops, init) )
panic("scheduler returned error on init\n");
+ if ( sched_ratelimit_us &&
+ (sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
+ || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN) )
+ {
+ printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
+ " Resetting to default %u\n",
+ XEN_SYSCTL_SCHED_RATELIMIT_MIN,
+ XEN_SYSCTL_SCHED_RATELIMIT_MAX,
+ SCHED_DEFAULT_RATELIMIT_US);
+ sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
+ }
+
idle_domain = domain_create(DOMID_IDLE, 0, 0);
BUG_ON(IS_ERR(idle_domain));
idle_domain->vcpu = idle_vcpu;

View File

@ -0,0 +1,71 @@
# HG changeset patch
# User Andre Przywara <osp@andrep.de>
# Date 1355913729 -3600
# Node ID 5fb0b8b838dab0b331abfa675fd2b2214ac90760
# Parent b04de677de31f26ba4b8f2f382ca4dfffcff9a79
x86, amd: Disable way access filter on Piledriver CPUs
The Way Access Filter in recent AMD CPUs may hurt the performance of
some workloads, caused by aliasing issues in the L1 cache.
This patch disables it on the affected CPUs.
The issue is similar to that one of last year:
http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html
This new patch does not replace the old one, we just need another
quirk for newer CPUs.
The performance penalty without the patch depends on the
circumstances, but is a bit less than the last year's 3%.
The workloads affected would be those that access code from the same
physical page under different virtual addresses, so different
processes using the same libraries with ASLR or multiple instances of
PIE-binaries. The code needs to be accessed simultaneously from both
cores of the same compute unit.
More details can be found here:
http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf
CPUs affected are anything with the core known as Piledriver.
That includes the new parts of the AMD A-Series (aka Trinity) and the
just released new CPUs of the FX-Series (aka Vishera).
The model numbering is a bit odd here: FX CPUs have model 2,
A-Series has model 10h, with possible extensions to 1Fh. Hence the
range of model ids.
Signed-off-by: Andre Przywara <osp@andrep.de>
Add and use MSR_AMD64_IC_CFG. Update the value whenever it is found to
not have all bits set, rather than just when it's zero.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -493,6 +493,14 @@ static void __devinit init_amd(struct cp
}
}
+ /*
+ * The way access filter has a performance penalty on some workloads.
+ * Disable it on the affected CPUs.
+ */
+ if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 &&
+ !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e)
+ wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e);
+
amd_get_topology(c);
/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -206,6 +206,7 @@
/* AMD64 MSRs */
#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_IC_CFG 0xc0011021
#define MSR_AMD64_DC_CFG 0xc0011022
#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46

View File

@ -0,0 +1,45 @@
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@citrix.com>
# Date 1357290407 -3600
# Node ID 8fd5635f451b073ddc99e928c975e8a7743d1321
# Parent c4114a042410d3bdec3a77c30b2e85366d7fbe1d
passthrough/domctl: use correct struct in union
This appears to be a copy paste error from c/s 23861:ec7c81fbe0de.
It is safe, functionally speaking, as both the xen_domctl_assign_device
and xen_domctl_get_device_group structure start with a 'uint32_t
machine_sbdf'. We should however use the correct union structure.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Committed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -592,7 +592,7 @@ int iommu_do_domctl(
if ( ret )
break;
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
@@ -621,7 +621,7 @@ int iommu_do_domctl(
if ( ret )
goto assign_device_out;
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
@@ -649,7 +649,7 @@ int iommu_do_domctl(
if ( ret )
goto deassign_device_out;
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
devfn = domctl->u.assign_device.machine_sbdf & 0xff;

View File

@ -0,0 +1,267 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559364 -3600
# Node ID 62dd78a4e3fc9d190840549f13b4d613f2d19c41
# Parent 64b36dde26bc3c4fc80312cc9eeb0e511f0cf94b
IOMMU: adjust (re)assign operation parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -328,34 +328,31 @@ void amd_iommu_disable_domain_device(str
disable_ats_device(iommu->seg, bus, devfn);
}
-static int reassign_device( struct domain *source, struct domain *target,
- u16 seg, u8 bus, u8 devfn)
+static int reassign_device(struct domain *source, struct domain *target,
+ u8 devfn, struct pci_dev *pdev)
{
- struct pci_dev *pdev;
struct amd_iommu *iommu;
int bdf;
struct hvm_iommu *t = domain_hvm_iommu(target);
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
- if ( !pdev )
- return -ENODEV;
-
- bdf = PCI_BDF2(bus, devfn);
- iommu = find_iommu_for_device(seg, bdf);
+ bdf = PCI_BDF2(pdev->bus, pdev->devfn);
+ iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%x02.%x cannot be assigned to dom%d\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
target->domain_id);
return -ENODEV;
}
amd_iommu_disable_domain_device(source, iommu, bdf);
- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
+ if ( devfn == pdev->devfn )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }
/* IO page tables might be destroyed after pci-detach the last device
* In this case, we have to re-allocate root table for next pci-attach.*/
@@ -364,17 +361,18 @@ static int reassign_device( struct domai
amd_iommu_setup_domain_device(target, iommu, bdf);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
- seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
return 0;
}
-static int amd_iommu_assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+static int amd_iommu_assign_device(struct domain *d, u8 devfn,
+ struct pci_dev *pdev)
{
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
- int bdf = (bus << 8) | devfn;
- int req_id = get_dma_requestor_id(seg, bdf);
+ struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
+ int bdf = PCI_BDF2(pdev->bus, devfn);
+ int req_id = get_dma_requestor_id(pdev->seg, bdf);
if ( ivrs_mappings[req_id].unity_map_enable )
{
@@ -386,7 +384,7 @@ static int amd_iommu_assign_device(struc
ivrs_mappings[req_id].read_permission);
}
- return reassign_device(dom0, d, seg, bus, devfn);
+ return reassign_device(dom0, d, devfn, pdev);
}
static void deallocate_next_page_table(struct page_info* pg, int level)
@@ -451,12 +449,6 @@ static void amd_iommu_domain_destroy(str
amd_iommu_flush_all_pages(d);
}
-static int amd_iommu_return_device(
- struct domain *s, struct domain *t, u16 seg, u8 bus, u8 devfn)
-{
- return reassign_device(s, t, seg, bus, devfn);
-}
-
static int amd_iommu_add_device(struct pci_dev *pdev)
{
struct amd_iommu *iommu;
@@ -596,7 +588,7 @@ const struct iommu_ops amd_iommu_ops = {
.teardown = amd_iommu_domain_destroy,
.map_page = amd_iommu_map_page,
.unmap_page = amd_iommu_unmap_page,
- .reassign_device = amd_iommu_return_device,
+ .reassign_device = reassign_device,
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -232,11 +232,16 @@ static int assign_device(struct domain *
return -EXDEV;
spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(seg, bus, devfn);
- if ( pdev )
- pdev->fault.count = 0;
+ pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn);
+ if ( !pdev )
+ {
+ rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
+ goto done;
+ }
+
+ pdev->fault.count = 0;
- if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
+ if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
goto done;
if ( has_arch_pdevs(d) && !need_iommu(d) )
@@ -367,18 +372,11 @@ int deassign_device(struct domain *d, u1
return -EINVAL;
ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(seg, bus, devfn);
+ pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
if ( !pdev )
return -ENODEV;
- if ( pdev->domain != d )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "d%d: deassign a device not owned\n", d->domain_id);
- return -EINVAL;
- }
-
- ret = hd->platform_ops->reassign_device(d, dom0, seg, bus, devfn);
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1689,17 +1689,10 @@ out:
static int reassign_device_ownership(
struct domain *source,
struct domain *target,
- u16 seg, u8 bus, u8 devfn)
+ u8 devfn, struct pci_dev *pdev)
{
- struct pci_dev *pdev;
int ret;
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev_by_domain(source, seg, bus, devfn);
-
- if (!pdev)
- return -ENODEV;
-
/*
* Devices assigned to untrusted domains (here assumed to be any domU)
* can attempt to send arbitrary LAPIC/MSI messages. We are unprotected
@@ -1708,16 +1701,19 @@ static int reassign_device_ownership(
if ( (target != dom0) && !iommu_intremap )
untrusted_msi = 1;
- ret = domain_context_unmap(source, seg, bus, devfn);
+ ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn);
if ( ret )
return ret;
- ret = domain_context_mapping(target, seg, bus, devfn);
+ ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn);
if ( ret )
return ret;
- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
+ if ( devfn == pdev->devfn )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }
return ret;
}
@@ -2207,36 +2203,26 @@ int __init intel_vtd_setup(void)
}
static int intel_iommu_assign_device(
- struct domain *d, u16 seg, u8 bus, u8 devfn)
+ struct domain *d, u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
int ret = 0, i;
- struct pci_dev *pdev;
- u16 bdf;
+ u16 bdf, seg;
+ u8 bus;
if ( list_empty(&acpi_drhd_units) )
return -ENODEV;
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(seg, bus, devfn);
- if (!pdev)
- return -ENODEV;
-
- if (pdev->domain != dom0)
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "IOMMU: assign a assigned device\n");
- return -EBUSY;
- }
-
- ret = reassign_device_ownership(dom0, d, seg, bus, devfn);
+ ret = reassign_device_ownership(dom0, d, devfn, pdev);
if ( ret )
goto done;
/* FIXME: Because USB RMRR conflicts with guest bios region,
* ignore USB RMRR temporarily.
*/
- if ( is_usb_device(seg, bus, devfn) )
+ seg = pdev->seg;
+ bus = pdev->bus;
+ if ( is_usb_device(seg, bus, pdev->devfn) )
{
ret = 0;
goto done;
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -123,13 +123,13 @@ struct iommu_ops {
int (*add_device)(struct pci_dev *pdev);
int (*enable_device)(struct pci_dev *pdev);
int (*remove_device)(struct pci_dev *pdev);
- int (*assign_device)(struct domain *d, u16 seg, u8 bus, u8 devfn);
+ int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags);
int (*unmap_page)(struct domain *d, unsigned long gfn);
int (*reassign_device)(struct domain *s, struct domain *t,
- u16 seg, u8 bus, u8 devfn);
+ u8 devfn, struct pci_dev *);
int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);

View File

@ -0,0 +1,354 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559482 -3600
# Node ID 75cc4943b1ff509c4074800a23ff51d773233b8a
# Parent 62dd78a4e3fc9d190840549f13b4d613f2d19c41
IOMMU: adjust add/remove operation parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -83,14 +83,14 @@ static void disable_translation(u32 *dte
}
static void amd_iommu_setup_domain_device(
- struct domain *domain, struct amd_iommu *iommu, int bdf)
+ struct domain *domain, struct amd_iommu *iommu,
+ u8 devfn, struct pci_dev *pdev)
{
void *dte;
unsigned long flags;
int req_id, valid = 1;
int dte_i = 0;
- u8 bus = PCI_BUS(bdf);
- u8 devfn = PCI_DEVFN2(bdf);
+ u8 bus = pdev->bus;
struct hvm_iommu *hd = domain_hvm_iommu(domain);
@@ -103,7 +103,7 @@ static void amd_iommu_setup_domain_devic
dte_i = 1;
/* get device-table entry */
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
spin_lock_irqsave(&iommu->lock, flags);
@@ -115,7 +115,7 @@ static void amd_iommu_setup_domain_devic
(u32 *)dte, page_to_maddr(hd->root_table), hd->domain_id,
hd->paging_mode, valid);
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, dte_i);
@@ -132,32 +132,31 @@ static void amd_iommu_setup_domain_devic
ASSERT(spin_is_locked(&pcidevs_lock));
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
- !pci_ats_enabled(iommu->seg, bus, devfn) )
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
{
- struct pci_dev *pdev;
+ if ( devfn == pdev->devfn )
+ enable_ats_device(iommu->seg, bus, devfn);
- enable_ats_device(iommu->seg, bus, devfn);
-
- ASSERT(spin_is_locked(&pcidevs_lock));
- pdev = pci_get_pdev(iommu->seg, bus, devfn);
-
- ASSERT( pdev != NULL );
amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
}
-static void __init amd_iommu_setup_dom0_device(struct pci_dev *pdev)
+static int __init amd_iommu_setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
int bdf = PCI_BDF2(pdev->bus, pdev->devfn);
struct amd_iommu *iommu = find_iommu_for_device(pdev->seg, bdf);
- if ( likely(iommu != NULL) )
- amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
- else
+ if ( unlikely(!iommu) )
+ {
AMD_IOMMU_DEBUG("No iommu for device %04x:%02x:%02x.%u\n",
pdev->seg, pdev->bus,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return -ENODEV;
+ }
+
+ amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
+ return 0;
}
int __init amd_iov_detect(void)
@@ -291,16 +290,16 @@ static void __init amd_iommu_dom0_init(s
}
void amd_iommu_disable_domain_device(struct domain *domain,
- struct amd_iommu *iommu, int bdf)
+ struct amd_iommu *iommu,
+ u8 devfn, struct pci_dev *pdev)
{
void *dte;
unsigned long flags;
int req_id;
- u8 bus = PCI_BUS(bdf);
- u8 devfn = PCI_DEVFN2(bdf);
+ u8 bus = pdev->bus;
BUG_ON ( iommu->dev_table.buffer == NULL );
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
spin_lock_irqsave(&iommu->lock, flags);
@@ -308,7 +307,7 @@ void amd_iommu_disable_domain_device(str
{
disable_translation((u32 *)dte);
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, 0);
@@ -323,7 +322,8 @@ void amd_iommu_disable_domain_device(str
ASSERT(spin_is_locked(&pcidevs_lock));
- if ( pci_ats_device(iommu->seg, bus, devfn) &&
+ if ( devfn == pdev->devfn &&
+ pci_ats_device(iommu->seg, bus, devfn) &&
pci_ats_enabled(iommu->seg, bus, devfn) )
disable_ats_device(iommu->seg, bus, devfn);
}
@@ -346,7 +346,7 @@ static int reassign_device(struct domain
return -ENODEV;
}
- amd_iommu_disable_domain_device(source, iommu, bdf);
+ amd_iommu_disable_domain_device(source, iommu, devfn, pdev);
if ( devfn == pdev->devfn )
{
@@ -359,7 +359,7 @@ static int reassign_device(struct domain
if ( t->root_table == NULL )
allocate_domain_resources(t);
- amd_iommu_setup_domain_device(target, iommu, bdf);
+ amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
@@ -449,7 +449,7 @@ static void amd_iommu_domain_destroy(str
amd_iommu_flush_all_pages(d);
}
-static int amd_iommu_add_device(struct pci_dev *pdev)
+static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
{
struct amd_iommu *iommu;
u16 bdf;
@@ -462,16 +462,16 @@ static int amd_iommu_add_device(struct p
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%02x.%u cannot be assigned to dom%d\n",
- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain->domain_id);
return -ENODEV;
}
- amd_iommu_setup_domain_device(pdev->domain, iommu, bdf);
+ amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
return 0;
}
-static int amd_iommu_remove_device(struct pci_dev *pdev)
+static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
struct amd_iommu *iommu;
u16 bdf;
@@ -484,12 +484,12 @@ static int amd_iommu_remove_device(struc
{
AMD_IOMMU_DEBUG("Fail to find iommu."
" %04x:%02x:%02x.%u cannot be removed from dom%d\n",
- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), pdev->domain->domain_id);
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain->domain_id);
return -ENODEV;
}
- amd_iommu_disable_domain_device(pdev->domain, iommu, bdf);
+ amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev);
return 0;
}
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -167,7 +167,7 @@ int iommu_add_device(struct pci_dev *pde
if ( !iommu_enabled || !hd->platform_ops )
return 0;
- return hd->platform_ops->add_device(pdev);
+ return hd->platform_ops->add_device(pdev->devfn, pdev);
}
int iommu_enable_device(struct pci_dev *pdev)
@@ -197,7 +197,7 @@ int iommu_remove_device(struct pci_dev *
if ( !iommu_enabled || !hd->platform_ops )
return 0;
- return hd->platform_ops->remove_device(pdev);
+ return hd->platform_ops->remove_device(pdev->devfn, pdev);
}
/*
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -715,7 +715,7 @@ int __init scan_pci_devices(void)
struct setup_dom0 {
struct domain *d;
- void (*handler)(struct pci_dev *);
+ int (*handler)(u8 devfn, struct pci_dev *);
};
static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
@@ -734,7 +734,7 @@ static int __init _setup_dom0_pci_device
pdev->domain = ctxt->d;
list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
- ctxt->handler(pdev);
+ ctxt->handler(devfn, pdev);
}
}
@@ -742,7 +742,7 @@ static int __init _setup_dom0_pci_device
}
void __init setup_dom0_pci_devices(
- struct domain *d, void (*handler)(struct pci_dev *))
+ struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
{
struct setup_dom0 ctxt = { .d = d, .handler = handler };
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -52,7 +52,7 @@ int nr_iommus;
static struct tasklet vtd_fault_tasklet;
-static void setup_dom0_device(struct pci_dev *);
+static int setup_dom0_device(u8 devfn, struct pci_dev *);
static void setup_dom0_rmrr(struct domain *d);
static int domain_iommu_domid(struct domain *d,
@@ -1904,7 +1904,7 @@ static int rmrr_identity_mapping(struct
return 0;
}
-static int intel_iommu_add_device(struct pci_dev *pdev)
+static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
u16 bdf;
@@ -1915,8 +1915,7 @@ static int intel_iommu_add_device(struct
if ( !pdev->domain )
return -EINVAL;
- ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus,
- pdev->devfn);
+ ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
@@ -1928,7 +1927,7 @@ static int intel_iommu_add_device(struct
{
if ( rmrr->segment == pdev->seg &&
PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == pdev->devfn )
+ PCI_DEVFN2(bdf) == devfn )
{
ret = rmrr_identity_mapping(pdev->domain, rmrr);
if ( ret )
@@ -1953,7 +1952,7 @@ static int intel_iommu_enable_device(str
return ret >= 0 ? 0 : ret;
}
-static int intel_iommu_remove_device(struct pci_dev *pdev)
+static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
struct acpi_rmrr_unit *rmrr;
u16 bdf;
@@ -1971,19 +1970,22 @@ static int intel_iommu_remove_device(str
{
if ( rmrr->segment == pdev->seg &&
PCI_BUS(bdf) == pdev->bus &&
- PCI_DEVFN2(bdf) == pdev->devfn )
+ PCI_DEVFN2(bdf) == devfn )
return 0;
}
}
- return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus,
- pdev->devfn);
+ return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn);
}
-static void __init setup_dom0_device(struct pci_dev *pdev)
+static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
- domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, pdev->devfn);
- pci_vtd_quirk(pdev);
+ int err;
+
+ err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ if ( !err && devfn == pdev->devfn )
+ pci_vtd_quirk(pdev);
+ return err;
}
void clear_fault_bits(struct iommu *iommu)
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -120,9 +120,9 @@ bool_t pt_irq_need_timer(uint32_t flags)
struct iommu_ops {
int (*init)(struct domain *d);
void (*dom0_init)(struct domain *d);
- int (*add_device)(struct pci_dev *pdev);
+ int (*add_device)(u8 devfn, struct pci_dev *);
int (*enable_device)(struct pci_dev *pdev);
- int (*remove_device)(struct pci_dev *pdev);
+ int (*remove_device)(u8 devfn, struct pci_dev *);
int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
void (*teardown)(struct domain *d);
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -100,7 +100,8 @@ struct pci_dev *pci_lock_pdev(int seg, i
struct pci_dev *pci_lock_domain_pdev(
struct domain *, int seg, int bus, int devfn);
-void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *));
+void setup_dom0_pci_devices(struct domain *,
+ int (*)(u8 devfn, struct pci_dev *));
void pci_release_devices(struct domain *d);
int pci_add_segment(u16 seg);
const unsigned long *pci_get_ro_map(u16 seg);

View File

@ -0,0 +1,187 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559549 -3600
# Node ID afb598bd0f5436bea15b7ef842e8ad5c6adefa1a
# Parent 75cc4943b1ff509c4074800a23ff51d773233b8a
VT-d: adjust context map/unmap parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -95,7 +95,7 @@ void free_pgtable_maddr(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
- u8 bus, u8 devfn);
+ u8 bus, u8 devfn, const struct pci_dev *);
int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
u8 bus, u8 devfn);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1308,7 +1308,7 @@ static void __init intel_iommu_dom0_init
int domain_context_mapping_one(
struct domain *domain,
struct iommu *iommu,
- u8 bus, u8 devfn)
+ u8 bus, u8 devfn, const struct pci_dev *pdev)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct context_entry *context, *context_entries;
@@ -1325,11 +1325,9 @@ int domain_context_mapping_one(
if ( context_present(*context) )
{
int res = 0;
- struct pci_dev *pdev = NULL;
- /* First try to get domain ownership from device structure. If that's
+ /* Try to get domain ownership from device structure. If that's
* not available, try to read it from the context itself. */
- pdev = pci_get_pdev(seg, bus, devfn);
if ( pdev )
{
if ( pdev->domain != domain )
@@ -1448,13 +1446,12 @@ int domain_context_mapping_one(
}
static int domain_context_mapping(
- struct domain *domain, u16 seg, u8 bus, u8 devfn)
+ struct domain *domain, u8 devfn, const struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
int ret = 0;
u32 type;
- u8 secbus;
- struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn);
+ u8 seg = pdev->seg, bus = pdev->bus, secbus;
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
@@ -1475,8 +1472,9 @@ static int domain_context_mapping(
dprintk(VTDPREFIX, "d%d:PCIe: map %04x:%02x:%02x.%u\n",
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- if ( !ret && ats_device(pdev, drhd) > 0 )
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pdev);
+ if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
enable_ats_device(seg, bus, devfn);
break;
@@ -1487,14 +1485,16 @@ static int domain_context_mapping(
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pdev);
if ( ret )
break;
if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 )
break;
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ pci_get_pdev(seg, bus, devfn));
/*
* Devices behind PCIe-to-PCI/PCIx bridge may generate different
@@ -1503,7 +1503,8 @@ static int domain_context_mapping(
*/
if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
(secbus != pdev->bus || pdev->devfn != 0) )
- ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
+ ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0,
+ pci_get_pdev(seg, secbus, 0));
break;
@@ -1576,18 +1577,15 @@ int domain_context_unmap_one(
}
static int domain_context_unmap(
- struct domain *domain, u16 seg, u8 bus, u8 devfn)
+ struct domain *domain, u8 devfn, const struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int ret = 0;
u32 type;
- u8 tmp_bus, tmp_devfn, secbus;
- struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn);
+ u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
int found = 0;
- BUG_ON(!pdev);
-
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
return -ENODEV;
@@ -1607,7 +1605,7 @@ static int domain_context_unmap(
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, iommu, bus, devfn);
- if ( !ret && ats_device(pdev, drhd) > 0 )
+ if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
disable_ats_device(seg, bus, devfn);
break;
@@ -1701,11 +1699,11 @@ static int reassign_device_ownership(
if ( (target != dom0) && !iommu_intremap )
untrusted_msi = 1;
- ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_unmap(source, devfn, pdev);
if ( ret )
return ret;
- ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_mapping(target, devfn, pdev);
if ( ret )
return ret;
@@ -1915,7 +1913,7 @@ static int intel_iommu_add_device(u8 dev
if ( !pdev->domain )
return -EINVAL;
- ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ ret = domain_context_mapping(pdev->domain, devfn, pdev);
if ( ret )
{
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
@@ -1975,14 +1973,14 @@ static int intel_iommu_remove_device(u8
}
}
- return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn);
+ return domain_context_unmap(pdev->domain, devfn, pdev);
}
static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev)
{
int err;
- err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn);
+ err = domain_context_mapping(pdev->domain, devfn, pdev);
if ( !err && devfn == pdev->devfn )
pci_vtd_quirk(pdev);
return err;
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -292,7 +292,7 @@ static void map_me_phantom_function(stru
/* map or unmap ME phantom function */
if ( map )
domain_context_mapping_one(domain, drhd->iommu, 0,
- PCI_DEVFN(dev, 7));
+ PCI_DEVFN(dev, 7), NULL);
else
domain_context_unmap_one(domain, drhd->iommu, 0,
PCI_DEVFN(dev, 7));

View File

@ -0,0 +1,84 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559599 -3600
# Node ID 2a2c63f641ee3bda4ad552eb0b3ea479d37590cc
# Parent afb598bd0f5436bea15b7ef842e8ad5c6adefa1a
AMD IOMMU: adjust flush function parameters
... to use a (struct pci_dev *, devfn) pair.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -287,12 +287,12 @@ void invalidate_iommu_all(struct amd_iom
send_iommu_command(iommu, cmd);
}
-void amd_iommu_flush_iotlb(struct pci_dev *pdev,
+void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
uint64_t gaddr, unsigned int order)
{
unsigned long flags;
struct amd_iommu *iommu;
- unsigned int bdf, req_id, queueid, maxpend;
+ unsigned int req_id, queueid, maxpend;
struct pci_ats_dev *ats_pdev;
if ( !ats_enabled )
@@ -305,8 +305,8 @@ void amd_iommu_flush_iotlb(struct pci_de
if ( !pci_ats_enabled(ats_pdev->seg, ats_pdev->bus, ats_pdev->devfn) )
return;
- bdf = PCI_BDF2(ats_pdev->bus, ats_pdev->devfn);
- iommu = find_iommu_for_device(ats_pdev->seg, bdf);
+ iommu = find_iommu_for_device(ats_pdev->seg,
+ PCI_BDF2(ats_pdev->bus, ats_pdev->devfn));
if ( !iommu )
{
@@ -319,7 +319,7 @@ void amd_iommu_flush_iotlb(struct pci_de
if ( !iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
return;
- req_id = get_dma_requestor_id(iommu->seg, bdf);
+ req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(ats_pdev->bus, devfn));
queueid = req_id;
maxpend = ats_pdev->ats_queue_depth & 0xff;
@@ -339,7 +339,7 @@ static void amd_iommu_flush_all_iotlbs(s
return;
for_each_pdev( d, pdev )
- amd_iommu_flush_iotlb(pdev, gaddr, order);
+ amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
}
/* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -138,7 +138,7 @@ static void amd_iommu_setup_domain_devic
if ( devfn == pdev->devfn )
enable_ats_device(iommu->seg, bus, devfn);
- amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+ amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
}
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -78,8 +78,8 @@ void iommu_dte_set_guest_cr3(u32 *dte, u
void amd_iommu_flush_all_pages(struct domain *d);
void amd_iommu_flush_pages(struct domain *d, unsigned long gfn,
unsigned int order);
-void amd_iommu_flush_iotlb(struct pci_dev *pdev, uint64_t gaddr,
- unsigned int order);
+void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
+ uint64_t gaddr, unsigned int order);
void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);

222
26328-IOMMU-pdev-type.patch Normal file
View File

@ -0,0 +1,222 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559679 -3600
# Node ID 11fa145c880ee814aaf56a7f47f47ee3e5560c7c
# Parent 2a2c63f641ee3bda4ad552eb0b3ea479d37590cc
IOMMU/PCI: consolidate pdev_type() and cache its result for a given device
Add an "unknown" device types as well as one for PCI-to-PCIe bridges
(the latter of which other IOMMU code with or without this patch
doesn't appear to handle properly).
Make sure we don't mistake a device for which we can't access its
config space as a legacy PCI device (after all we in fact don't know
how to deal with such a device, and hence shouldn't try to).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -144,7 +144,7 @@ static struct pci_dev *alloc_pdev(struct
spin_lock_init(&pdev->msix_table_lock);
/* update bus2bridge */
- switch ( pdev_type(pseg->nr, bus, devfn) )
+ switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
{
u8 sec_bus, sub_bus;
@@ -184,7 +184,7 @@ static struct pci_dev *alloc_pdev(struct
static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
{
/* update bus2bridge */
- switch ( pdev_type(pseg->nr, pdev->bus, pdev->devfn) )
+ switch ( pdev->type )
{
u8 dev, func, sec_bus, sub_bus;
@@ -202,6 +202,9 @@ static void free_pdev(struct pci_seg *ps
pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
spin_unlock(&pseg->bus2bridge_lock);
break;
+
+ default:
+ break;
}
list_del(&pdev->alldevs_list);
@@ -563,20 +566,30 @@ void pci_release_devices(struct domain *
#define PCI_CLASS_BRIDGE_PCI 0x0604
-int pdev_type(u16 seg, u8 bus, u8 devfn)
+enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
{
u16 class_device, creg;
u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE);
- if ( class_device == PCI_CLASS_BRIDGE_PCI )
+ switch ( class_device )
{
+ case PCI_CLASS_BRIDGE_PCI:
if ( !pos )
return DEV_TYPE_LEGACY_PCI_BRIDGE;
creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS);
- return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ?
- DEV_TYPE_PCIe2PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE;
+ switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
+ {
+ case PCI_EXP_TYPE_PCI_BRIDGE:
+ return DEV_TYPE_PCIe2PCI_BRIDGE;
+ case PCI_EXP_TYPE_PCIE_BRIDGE:
+ return DEV_TYPE_PCI2PCIe_BRIDGE;
+ }
+ return DEV_TYPE_PCIe_BRIDGE;
+
+ case 0x0000: case 0xffff:
+ return DEV_TYPE_PCI_UNKNOWN;
}
return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -426,7 +426,6 @@ void io_apic_write_remap_rte(
static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
{
- int type;
u16 seg;
u8 bus, devfn, secbus;
int ret;
@@ -437,8 +436,7 @@ static void set_msi_source_id(struct pci
seg = pdev->seg;
bus = pdev->bus;
devfn = pdev->devfn;
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -470,7 +468,7 @@ static void set_msi_source_id(struct pci
default:
dprintk(XENLOG_WARNING VTDPREFIX,
"d%d: unknown(%u): %04x:%02x:%02x.%u\n",
- pdev->domain->domain_id, type,
+ pdev->domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
break;
}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1450,7 +1450,6 @@ static int domain_context_mapping(
{
struct acpi_drhd_unit *drhd;
int ret = 0;
- u32 type;
u8 seg = pdev->seg, bus = pdev->bus, secbus;
drhd = acpi_find_matched_drhd_unit(pdev);
@@ -1459,8 +1458,7 @@ static int domain_context_mapping(
ASSERT(spin_is_locked(&pcidevs_lock));
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -1510,7 +1508,7 @@ static int domain_context_mapping(
default:
dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
- domain->domain_id, type,
+ domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -EINVAL;
break;
@@ -1582,7 +1580,6 @@ static int domain_context_unmap(
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int ret = 0;
- u32 type;
u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus;
int found = 0;
@@ -1591,8 +1588,7 @@ static int domain_context_unmap(
return -ENODEV;
iommu = drhd->iommu;
- type = pdev_type(seg, bus, devfn);
- switch ( type )
+ switch ( pdev->type )
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
@@ -1639,7 +1635,7 @@ static int domain_context_unmap(
default:
dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n",
- domain->domain_id, type,
+ domain->domain_id, pdev->type,
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -EINVAL;
goto out;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -62,6 +62,17 @@ struct pci_dev {
const u16 seg;
const u8 bus;
const u8 devfn;
+
+ enum pdev_type {
+ DEV_TYPE_PCI_UNKNOWN,
+ DEV_TYPE_PCIe_ENDPOINT,
+ DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
+ DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge
+ DEV_TYPE_PCI2PCIe_BRIDGE, // PCI/PCIx-to-PCIe bridge
+ DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge
+ DEV_TYPE_PCI,
+ } type;
+
struct pci_dev_info info;
struct arch_pci_dev arch;
struct {
@@ -83,18 +94,10 @@ struct pci_dev {
extern spinlock_t pcidevs_lock;
-enum {
- DEV_TYPE_PCIe_ENDPOINT,
- DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
- DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge
- DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge
- DEV_TYPE_PCI,
-};
-
bool_t pci_known_segment(u16 seg);
int pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func);
int scan_pci_devices(void);
-int pdev_type(u16 seg, u8 bus, u8 devfn);
+enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
struct pci_dev *pci_lock_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_lock_domain_pdev(
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -371,6 +371,9 @@
#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */
+#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */
+#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */
+#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */
#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
#define PCI_EXP_DEVCAP 4 /* Device capabilities */

View File

@ -0,0 +1,365 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559742 -3600
# Node ID c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
# Parent 11fa145c880ee814aaf56a7f47f47ee3e5560c7c
IOMMU: add phantom function support
Apart from generating device context entries for the base function,
all phantom functions also need context entries to be generated for
them.
In order to distinguish different use cases, a variant of
pci_get_pdev() is being introduced that, even when passed a phantom
function number, would return the underlying actual device.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
return;
for_each_pdev( d, pdev )
- amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
+ {
+ u8 devfn = pdev->devfn;
+
+ do {
+ amd_iommu_flush_iotlb(devfn, pdev, gaddr, order);
+ devfn += pdev->phantom_stride;
+ } while ( devfn != pdev->devfn &&
+ PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+ }
}
/* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -692,7 +692,7 @@ void parse_ppr_log_entry(struct amd_iomm
devfn = PCI_DEVFN2(device_id);
spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(iommu->seg, bus, devfn);
+ pdev = pci_get_real_pdev(iommu->seg, bus, devfn);
spin_unlock(&pcidevs_lock);
if ( pdev )
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -612,7 +612,6 @@ static int update_paging_mode(struct dom
for_each_pdev( d, pdev )
{
bdf = (pdev->bus << 8) | pdev->devfn;
- req_id = get_dma_requestor_id(pdev->seg, bdf);
iommu = find_iommu_for_device(pdev->seg, bdf);
if ( !iommu )
{
@@ -621,16 +620,21 @@ static int update_paging_mode(struct dom
}
spin_lock_irqsave(&iommu->lock, flags);
- device_entry = iommu->dev_table.buffer +
- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
-
- /* valid = 0 only works for dom0 passthrough mode */
- amd_iommu_set_root_page_table((u32 *)device_entry,
- page_to_maddr(hd->root_table),
- hd->domain_id,
- hd->paging_mode, 1);
-
- amd_iommu_flush_device(iommu, req_id);
+ do {
+ req_id = get_dma_requestor_id(pdev->seg, bdf);
+ device_entry = iommu->dev_table.buffer +
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+ /* valid = 0 only works for dom0 passthrough mode */
+ amd_iommu_set_root_page_table((u32 *)device_entry,
+ page_to_maddr(hd->root_table),
+ hd->domain_id,
+ hd->paging_mode, 1);
+
+ amd_iommu_flush_device(iommu, req_id);
+ bdf += pdev->phantom_stride;
+ } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+ PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
spin_unlock_irqrestore(&iommu->lock, flags);
}
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -157,6 +157,8 @@ void __init iommu_dom0_init(struct domai
int iommu_add_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+ int rc;
+ u8 devfn;
if ( !pdev->domain )
return -EINVAL;
@@ -167,7 +169,20 @@ int iommu_add_device(struct pci_dev *pde
if ( !iommu_enabled || !hd->platform_ops )
return 0;
- return hd->platform_ops->add_device(pdev->devfn, pdev);
+ rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+ if ( rc || !pdev->phantom_stride )
+ return rc;
+
+ for ( devfn = pdev->devfn ; ; )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ return 0;
+ rc = hd->platform_ops->add_device(devfn, pdev);
+ if ( rc )
+ printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ }
}
int iommu_enable_device(struct pci_dev *pdev)
@@ -190,6 +205,8 @@ int iommu_enable_device(struct pci_dev *
int iommu_remove_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+ u8 devfn;
+
if ( !pdev->domain )
return -EINVAL;
@@ -197,6 +214,22 @@ int iommu_remove_device(struct pci_dev *
if ( !iommu_enabled || !hd->platform_ops )
return 0;
+ for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+ {
+ int rc;
+
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->remove_device(devfn, pdev);
+ if ( !rc )
+ continue;
+
+ printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+ return rc;
+ }
+
return hd->platform_ops->remove_device(pdev->devfn, pdev);
}
@@ -244,6 +277,18 @@ static int assign_device(struct domain *
if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
goto done;
+ for ( ; pdev->phantom_stride; rc = 0 )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ rc = hd->platform_ops->assign_device(d, devfn, pdev);
+ if ( rc )
+ printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ rc);
+ }
+
if ( has_arch_pdevs(d) && !need_iommu(d) )
{
d->need_iommu = 1;
@@ -376,6 +421,21 @@ int deassign_device(struct domain *d, u1
if ( !pdev )
return -ENODEV;
+ while ( pdev->phantom_stride )
+ {
+ devfn += pdev->phantom_stride;
+ if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+ break;
+ ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+ if ( !ret )
+ continue;
+
+ printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+ d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+ return ret;
+ }
+
+ devfn = pdev->devfn;
ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
if ( ret )
{
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -146,6 +146,8 @@ static struct pci_dev *alloc_pdev(struct
/* update bus2bridge */
switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
{
+ int pos;
+ u16 cap;
u8 sec_bus, sub_bus;
case DEV_TYPE_PCIe_BRIDGE:
@@ -169,6 +171,20 @@ static struct pci_dev *alloc_pdev(struct
break;
case DEV_TYPE_PCIe_ENDPOINT:
+ pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), PCI_CAP_ID_EXP);
+ BUG_ON(!pos);
+ cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
+ PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
+ if ( cap & PCI_EXP_DEVCAP_PHANTOM )
+ {
+ pdev->phantom_stride = 8 >> MASK_EXTR(cap,
+ PCI_EXP_DEVCAP_PHANTOM);
+ if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
+ pdev->phantom_stride = 0;
+ }
+ break;
+
case DEV_TYPE_PCI:
break;
@@ -266,6 +282,27 @@ struct pci_dev *pci_get_pdev(int seg, in
return NULL;
}
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+{
+ struct pci_dev *pdev;
+ int stride;
+
+ if ( seg < 0 || bus < 0 || devfn < 0 )
+ return NULL;
+
+ for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
+ !pdev && stride; stride >>= 1 )
+ {
+ if ( !(devfn & (8 - stride)) )
+ continue;
+ pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
+ if ( pdev && stride != pdev->phantom_stride )
+ pdev = NULL;
+ }
+
+ return pdev;
+}
+
struct pci_dev *pci_get_pdev_by_domain(
struct domain *d, int seg, int bus, int devfn)
{
@@ -464,8 +501,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d
out:
spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
- seg, bus, slot, func);
+ if ( !ret )
+ {
+ printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+ seg, bus, slot, func);
+ while ( pdev->phantom_stride )
+ {
+ func += pdev->phantom_stride;
+ if ( PCI_SLOT(func) )
+ break;
+ printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
+ seg, bus, slot, func);
+ }
+ }
return ret;
}
@@ -657,7 +705,7 @@ void pci_check_disable_device(u16 seg, u
u16 cword;
spin_lock(&pcidevs_lock);
- pdev = pci_get_pdev(seg, bus, devfn);
+ pdev = pci_get_real_pdev(seg, bus, devfn);
if ( pdev )
{
if ( now < pdev->fault.time ||
@@ -674,6 +722,7 @@ void pci_check_disable_device(u16 seg, u
/* Tell the device to stop DMAing; we can't rely on the guest to
* control it for us. */
+ devfn = pdev->devfn;
cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_COMMAND);
pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -731,6 +780,27 @@ struct setup_dom0 {
int (*handler)(u8 devfn, struct pci_dev *);
};
+static void setup_one_dom0_device(const struct setup_dom0 *ctxt,
+ struct pci_dev *pdev)
+{
+ u8 devfn = pdev->devfn;
+
+ do {
+ int err = ctxt->handler(devfn, pdev);
+
+ if ( err )
+ {
+ printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ ctxt->d->domain_id, err);
+ if ( devfn == pdev->devfn )
+ return;
+ }
+ devfn += pdev->phantom_stride;
+ } while ( devfn != pdev->devfn &&
+ PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+}
+
static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
{
struct setup_dom0 *ctxt = arg;
@@ -747,7 +817,7 @@ static int __init _setup_dom0_pci_device
pdev->domain = ctxt->d;
list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
- ctxt->handler(devfn, pdev);
+ setup_one_dom0_device(ctxt, pdev);
}
}
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -58,6 +58,9 @@ do {
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))
+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
#define reserve_bootmem(_p,_l) ((void)0)
struct domain;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -63,6 +63,8 @@ struct pci_dev {
const u8 bus;
const u8 devfn;
+ u8 phantom_stride;
+
enum pdev_type {
DEV_TYPE_PCI_UNKNOWN,
DEV_TYPE_PCIe_ENDPOINT,
@@ -113,6 +115,7 @@ int pci_remove_device(u16 seg, u8 bus, u
int pci_ro_device(int seg, int bus, int devfn);
void arch_pci_ro_device(int seg, int bdf);
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);

View File

@ -0,0 +1,42 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559812 -3600
# Node ID b514b7118958327605e33dd387944832bc8d734a
# Parent c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
VT-d: relax source qualifier for MSI of phantom functions
With ordinary requests allowed to come from phantom functions, the
remapping tables ought to be set up to allow for MSI triggers to come
from other than the "real" device too.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -438,13 +438,22 @@ static void set_msi_source_id(struct pci
devfn = pdev->devfn;
switch ( pdev->type )
{
+ unsigned int sq;
+
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
case DEV_TYPE_LEGACY_PCI_BRIDGE:
break;
case DEV_TYPE_PCIe_ENDPOINT:
- set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16, PCI_BDF2(bus, devfn));
+ switch ( pdev->phantom_stride )
+ {
+ case 1: sq = SQ_13_IGNORE_3; break;
+ case 2: sq = SQ_13_IGNORE_2; break;
+ case 4: sq = SQ_13_IGNORE_1; break;
+ default: sq = SQ_ALL_16; break;
+ }
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
break;
case DEV_TYPE_PCI:

View File

@ -0,0 +1,108 @@
References: bnc#787169
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357559889 -3600
# Node ID 23c4bbc0111dd807561b2c62cbc5798220943a0d
# Parent b514b7118958327605e33dd387944832bc8d734a
IOMMU: add option to specify devices behaving like ones using phantom functions
At least certain Marvell SATA controllers are known to issue bus master
requests with a non-zero function as origin, despite themselves being
single function devices.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -672,6 +672,16 @@ Defaults to booting secondary processors
Default: `on`
+### pci-phantom
+> `=[<seg>:]<bus>:<device>,<stride>`
+
+Mark a group of PCI devices as using phantom functions without actually
+advertising so, so the IOMMU can create translation contexts for them.
+
+All numbers specified must be hexadecimal ones.
+
+This option can be specified more than once (up to 8 times at present).
+
### ple\_gap
> `= <integer>`
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -123,6 +123,49 @@ const unsigned long *pci_get_ro_map(u16
return pseg ? pseg->ro_map : NULL;
}
+static struct phantom_dev {
+ u16 seg;
+ u8 bus, slot, stride;
+} phantom_devs[8];
+static unsigned int nr_phantom_devs;
+
+static void __init parse_phantom_dev(char *str) {
+ const char *s = str;
+ struct phantom_dev phantom;
+
+ if ( !s || !*s || nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
+ return;
+
+ phantom.seg = simple_strtol(s, &s, 16);
+ if ( *s != ':' )
+ return;
+
+ phantom.bus = simple_strtol(s + 1, &s, 16);
+ if ( *s == ',' )
+ {
+ phantom.slot = phantom.bus;
+ phantom.bus = phantom.seg;
+ phantom.seg = 0;
+ }
+ else if ( *s == ':' )
+ phantom.slot = simple_strtol(s + 1, &s, 16);
+ else
+ return;
+
+ if ( *s != ',' )
+ return;
+ switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
+ {
+ case 1: case 2: case 4:
+ if ( *s )
+ default:
+ return;
+ }
+
+ phantom_devs[nr_phantom_devs++] = phantom;
+}
+custom_param("pci-phantom", parse_phantom_dev);
+
static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
{
struct pci_dev *pdev;
@@ -183,6 +226,20 @@ static struct pci_dev *alloc_pdev(struct
if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
pdev->phantom_stride = 0;
}
+ else
+ {
+ unsigned int i;
+
+ for ( i = 0; i < nr_phantom_devs; ++i )
+ if ( phantom_devs[i].seg == pseg->nr &&
+ phantom_devs[i].bus == bus &&
+ phantom_devs[i].slot == PCI_SLOT(devfn) &&
+ phantom_devs[i].stride > PCI_FUNC(devfn) )
+ {
+ pdev->phantom_stride = phantom_devs[i].stride;
+ break;
+ }
+ }
break;
case DEV_TYPE_PCI:

View File

@ -0,0 +1,30 @@
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357561709 -3600
# Node ID 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
# Parent 23c4bbc0111dd807561b2c62cbc5798220943a0d
x86: compat_show_guest_stack() should not truncate MFN
Re-using "addr" here was a mistake, as it is a 32-bit quantity.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -20,11 +20,12 @@ void compat_show_guest_stack(struct vcpu
if ( v != current )
{
struct vcpu *vcpu;
+ unsigned long mfn;
ASSERT(guest_kernel_mode(v, regs));
- addr = read_cr3() >> PAGE_SHIFT;
+ mfn = read_cr3() >> PAGE_SHIFT;
for_each_vcpu( v->domain, vcpu )
- if ( pagetable_get_pfn(vcpu->arch.guest_table) == addr )
+ if ( pagetable_get_pfn(vcpu->arch.guest_table) == mfn )
break;
if ( !vcpu )
{

View File

@ -0,0 +1,30 @@
References: CVE-2013-0154 XSA-37 bnc#797031
# HG changeset patch
# User Jan Beulich <jbeulich@suse.com>
# Date 1357564826 -3600
# Node ID e1facbde56ff4e5e85f9a4935abc99eb24367cd0
# Parent 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
x86: fix assertion in get_page_type()
c/s 22998:e9fab50d7b61 (and immediately following ones) made it
possible that __get_page_type() returns other than -EINVAL, in
particular -EBUSY. Consequently, the assertion in get_page_type()
should check for only the return values we absolutely don't expect to
see there.
This is XSA-37 / CVE-2013-0154.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2603,7 +2603,7 @@ int get_page_type(struct page_info *page
int rc = __get_page_type(page, type, 0);
if ( likely(rc == 0) )
return 1;
- ASSERT(rc == -EINVAL);
+ ASSERT(rc != -EINTR && rc != -EAGAIN);
return 0;
}

22
CVE-2012-5634-xsa33.patch Normal file
View File

@ -0,0 +1,22 @@
References: CVE-2012-5634 XSA-33 bnc#794316
VT-d: fix interrupt remapping source validation for devices behind legacy bridges
Using SVT_VERIFY_BUS here doesn't make sense; native Linux also
uses SVT_VERIFY_SID_SQ here instead.
This is XSA-33 / CVE-2012-5634.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -469,7 +469,7 @@ static void set_msi_source_id(struct pci
set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
(bus << 8) | pdev->bus);
else if ( pdev_type(seg, bus, devfn) == DEV_TYPE_LEGACY_PCI_BRIDGE )
- set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
PCI_BDF2(bus, devfn));
}
break;

View File

@ -0,0 +1,157 @@
From 9ca313aa0824f2d350a7a6c9b1ef6c47e0408f1d Mon Sep 17 00:00:00 2001
From: aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
Date: Sat, 23 Aug 2008 23:27:37 +0000
Subject: [PATCH] VNC: Support for ExtendedKeyEvent client message
This patch adds support for the ExtendedKeyEvent client message. This message
allows a client to send raw scan codes directly to the server. If the client
and server are using the same keymap, then it's unnecessary to use the '-k'
option with QEMU when this extension is supported.
This is extension is currently only implemented by gtk-vnc based clients
(gvncviewer, virt-manager, vinagre, etc.).
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5076 c046a42c-6fe2-441c-8c8c-71466251a162
---
vnc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 50 insertions(+), 9 deletions(-)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1285,35 +1285,22 @@ static void press_key_altgr_down(VncStat
}
}
-static void do_key_event(VncState *vs, int down, uint32_t sym)
+static void do_key_event(VncState *vs, int down, int keycode, int sym, int shift)
{
- int keycode;
int shift_keys = 0;
- int shift = 0;
int keypad = 0;
int altgr = 0;
int altgr_keys = 0;
if (is_graphic_console()) {
- if (sym >= 'A' && sym <= 'Z') {
- sym = sym - 'A' + 'a';
- shift = 1;
- }
- else {
+ if (!shift)
shift = keysym_is_shift(vs->kbd_layout, sym & 0xFFFF);
- }
altgr = keysym_is_altgr(vs->kbd_layout, sym & 0xFFFF);
}
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
- keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
- if (keycode == 0) {
- fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
- return;
- }
-
/* QEMU console switch */
switch(keycode) {
case 0x2a: /* Left Shift */
@@ -1445,7 +1432,25 @@ static void do_key_event(VncState *vs, i
static void key_event(VncState *vs, int down, uint32_t sym)
{
- do_key_event(vs, down, sym);
+ int keycode;
+ int shift = 0;
+
+ if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
+ sym = sym - 'A' + 'a';
+ shift = 1;
+ }
+ keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
+ do_key_event(vs, down, keycode, sym, shift);
+}
+
+static void ext_key_event(VncState *vs, int down,
+ uint32_t sym, uint16_t keycode)
+{
+ /* if the user specifies a keyboard layout, always use it */
+ if (keyboard_layout)
+ key_event(vs, down, sym);
+ else
+ do_key_event(vs, down, keycode, sym, 0);
}
static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h)
@@ -1534,6 +1539,15 @@ static void framebuffer_update_request(V
qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock));
}
+static void send_ext_key_event_ack(VncState *vs)
+{
+ vnc_write_u8(vs, 0);
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1);
+ vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds), ds_get_height(vs->ds), -258);
+ vnc_flush(vs);
+}
+
static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
{
int i;
@@ -1562,6 +1576,9 @@ static void set_encodings(VncState *vs,
case -257:
vs->has_pointer_type_change = 1;
break;
+ case -258:
+ send_ext_key_event_ack(vs);
+ break;
case 0x574D5669:
vs->has_WMVi = 1;
default:
@@ -1774,6 +1791,24 @@ static int protocol_client_msg(VncState
client_cut_text(vs, read_u32(data, 4), (char *)(data + 8));
break;
+ case 255:
+ if (len == 1)
+ return 2;
+
+ switch (read_u8(data, 1)) {
+ case 0:
+ if (len == 2)
+ return 12;
+
+ ext_key_event(vs, read_u16(data, 2),
+ read_u32(data, 4), read_u32(data, 8));
+ break;
+ default:
+ printf("Msg: %d\n", read_u16(data, 0));
+ vnc_client_error(vs);
+ break;
+ }
+ break;
default:
printf("Msg: %d\n", data[0]);
vnc_client_error(vs);
@@ -2445,10 +2480,11 @@ void vnc_display_init(DisplayState *ds)
vs->ds = ds;
- if (!keyboard_layout)
- keyboard_layout = "en-us";
+ if (keyboard_layout)
+ vs->kbd_layout = init_keyboard_layout(keyboard_layout);
+ else
+ vs->kbd_layout = init_keyboard_layout("en-us");
- vs->kbd_layout = init_keyboard_layout(keyboard_layout);
if (!vs->kbd_layout)
exit(1);
vs->modifiers_state[0x45] = 1; /* NumLock on - on boot */

View File

@ -33,13 +33,13 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1308,6 +1308,9 @@ static void do_key_event(VncState *vs, i
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
@@ -1440,6 +1440,9 @@ static void key_event(VncState *vs, int
int keycode;
int shift = 0;
+ if ( !strcmp(keyboard_layout,"es") && sym == 0xffea )
+ if ( sym == 0xffea && keyboard_layout && !strcmp(keyboard_layout,"es") )
+ sym = 0xffe9;
+
keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
if (keycode == 0) {
fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
sym = sym - 'A' + 'a';
shift = 1;

View File

@ -67,6 +67,6 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
continue;
- if (bdrv_open2(bs, danger_buf, BDRV_O_CACHE_WB /* snapshot and write-back */, &bdrv_raw) == 0) {
+ if (bdrv_open2(bs, danger_buf, flags|BDRV_O_CACHE_WB /* snapshot and write-back */, &bdrv_raw) == 0) {
pstrcpy(bs->filename, sizeof(bs->filename), params);
}
#else
if (pasprintf(&buf, "%s/params", bpath) == -1)
continue;
free(params);

View File

@ -45,7 +45,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -775,6 +783,7 @@ void xenstore_parse_domain_config(int hv
@@ -777,6 +785,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);

View File

@ -26,10 +26,8 @@ dir=$(dirname "$0")
. "$dir/block-common.sh"
#exec >> /tmp/block-dmmd-`date +%F_%T.%N`.log 2>&1
echo ""
date
set -x
echo shell-flags: $-
#echo shell-flags: $-
command=$1
# We check for errors ourselves:
@ -37,85 +35,87 @@ set +e
function run_mdadm()
{
local mdadm_cmd=$1
local msg
local rc
local mdadm_cmd=$1
local msg
local rc
msg="`/sbin/mdadm $mdadm_cmd 2>&1`"
rc=$?
case "$msg" in
*"has been started"* | *"already active"* )
return 0
;;
*"is already in use"* )
: hmm, might be used by another device in this domU
: leave it to upper layers to detect a real error
return 2
;;
* )
return $rc
;;
esac
return 1
msg="`/sbin/mdadm $mdadm_cmd 2>&1`"
rc=$?
case "$msg" in
*"has been started"* | *"already active"* )
return 0
;;
*"is already in use"* )
# hmm, might be used by another device in this domU
# leave it to upper layers to detect a real error
return 2
;;
* )
return $rc
;;
esac
return 1
}
function activate_md()
{
local par=$1
local already_active=0 cfg dev rc t
local par=$1
local already_active=0 cfg dev rc t
if [ ${par} = ${par%%(*} ]; then
# No configuration file specified:
dev=$par
cfd=
else
dev=${par%%(*}
t=${par#*(}
cfg="-c ${t%%)*}"
fi
if /sbin/mdadm -Q -D $dev; then
already_active=1
fi
run_mdadm "-A $dev $cfg"
rc=$?
if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then
return 0
fi
return $rc
if [ ${par} = ${par%%(*} ]; then
# No configuration file specified:
dev=$par
cfg=
else
dev=${par%%(*}
t=${par#*(}
cfg="-c ${t%%)*}"
fi
if /sbin/mdadm -Q -D $dev; then
already_active=1
fi
run_mdadm "-A $dev $cfg"
rc=$?
if [ $already_active -eq 1 ] && [ $rc -eq 2 ]; then
return 0
fi
return $rc
}
function deactivate_md ()
function deactivate_md()
{
local par=$1 # Make it explicitly local
local par=$1 # Make it explicitly local
## We need the device name only while deactivating
/sbin/mdadm -S ${par%%(*}
return $?
## We need the device name only while deactivating
/sbin/mdadm -S ${par%%(*}
return $?
}
function activate_lvm ()
function activate_lvm()
{
# First scan for PVs and VGs; we may then have to activate the VG
# first, but can ignore errors:
# /sbin/pvscan || :
# /sbin/vgscan --mknodes || :
# /sbin/vgchange -ay ${1%/*} || :
/sbin/lvchange -ay $1
return $?
# First scan for PVs and VGs; we may then have to activate the VG
# first, but can ignore errors:
# /sbin/pvscan || :
# /sbin/vgscan --mknodes || :
# /sbin/vgchange -ay ${1%/*} || :
/sbin/lvchange -ay $1
if [ $? -eq 0 ]; then
return 0
fi
return 1
}
function deactivate_lvm ()
function deactivate_lvm()
{
/sbin/lvchange -an $1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# /sbin/vgchange -an ${1%/*} || :
# Maybe we need to cleanup the LVM cache:
# /sbin/vgscan --mknodes || :
return 0
fi
return 1
/sbin/lvchange -an $1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# /sbin/vgchange -an ${1%/*} || :
# Maybe we need to cleanup the LVM cache:
# /sbin/vgscan --mknodes || :
return 0
fi
return 1
}
BP=100
@ -123,16 +123,16 @@ SP=$BP
VBD=
declare -a stack
function push ()
function push()
{
if [ -z "$1" ]; then
return
fi
let "SP -= 1"
stack[$SP]="${1}"
return
}
function pop ()
function pop()
{
VBD=
@ -142,11 +142,10 @@ function pop ()
VBD=${stack[$SP]}
let "SP += 1"
return
}
function activate_dmmd ()
function activate_dmmd()
{
# echo $1 $2
case $1 in
md)
activate_md $2
@ -158,6 +157,7 @@ function activate_dmmd ()
;;
esac
}
function deactivate_dmmd()
{
case "$1" in
@ -171,7 +171,8 @@ function deactivate_dmmd()
;;
esac
}
function cleanup_stack ()
function cleanup_stack()
{
while [ 1 ]; do
pop
@ -180,17 +181,15 @@ function cleanup_stack ()
fi
deactivate_dmmd $VBD
done
return
}
function parse_par ()
function parse_par()
{
local ac par rc s t # Make these explicitly local vars
ac=$1
par="$2"
echo "parse_paring $1, $2"
par="$par;"
while [ 1 ]; do
t=${par%%;*}
@ -205,7 +204,6 @@ function parse_par ()
fi
par=${par#*;}
echo "type is $t, dev is $s"
if [ "$ac" = "activate" ]; then
activate_dmmd $t $s
rc=$?
@ -213,45 +211,39 @@ function parse_par ()
return 1
fi
fi
echo "push $t $s"
push "$t $s"
done
}
echo $command
case "$command" in
add)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
echo "before parse_par $dmmd"
parse_par activate "$dmmd"
rc=$?
echo "reach here with rc: $rc"
if [ $rc -ne 0 ]; then
cleanup_stack
release_lock "dmmd"
exit 1
fi
lastparam=${dmmd##*;}
usedevice=${lastparam%(*}
claim_lock "block"
xenstore-write $XENBUS_PATH/node "$usedevice"
write_dev "$usedevice"
release_lock "block"
release_lock "dmmd"
exit 0
;;
add)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par activate "$dmmd"
rc=$?
if [ $rc -ne 0 ]; then
cleanup_stack
release_lock "dmmd"
exit 1
fi
lastparam=${dmmd##*;}
usedevice=${lastparam%(*}
xenstore-write $XENBUS_PATH/node "$usedevice"
write_dev "$usedevice"
release_lock "dmmd"
exit 0
;;
remove)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par noactivate "$dmmd"
cleanup_stack
release_lock "dmmd"
exit 0
;;
remove)
p=`xenstore-read $XENBUS_PATH/params` || true
claim_lock "dmmd"
dmmd=$p
parse_par noactivate "$dmmd"
cleanup_stack
release_lock "dmmd"
exit 0
;;
esac

View File

@ -2,7 +2,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1342,6 +1342,11 @@ static void do_key_event(VncState *vs, i
@@ -1329,6 +1329,11 @@ static void do_key_event(VncState *vs, i
}
break;
case 0x3a: /* CapsLock */

View File

@ -431,7 +431,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1028,6 +1044,50 @@ static void xenstore_process_vcpu_set_ev
@@ -1030,6 +1046,50 @@ static void xenstore_process_vcpu_set_ev
return;
}
@ -482,7 +482,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image = NULL;
@@ -1063,6 +1123,11 @@ void xenstore_process_event(void *opaque
@@ -1065,6 +1125,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);

View File

@ -19,7 +19,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -2591,6 +2591,7 @@ int vnc_display_password(DisplayState *d
@@ -2627,6 +2627,7 @@ int vnc_display_password(DisplayState *d
if (password && password[0]) {
if (!(vs->password = qemu_strdup(password)))
return -1;
@ -39,7 +39,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
struct xs_handle *xsh = NULL;
static char *media_filename[MAX_DRIVES+1];
static QEMUTimer *insert_timer = NULL;
@@ -937,6 +938,19 @@ static void xenstore_process_dm_command_
@@ -939,6 +940,19 @@ static void xenstore_process_dm_command_
} else if (!strncmp(command, "continue", len)) {
fprintf(logfile, "dm-command: continue after state save\n");
xen_pause_requested = 0;

View File

@ -49,24 +49,23 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
{
char **e_danger = NULL;
char *buf = NULL;
@@ -733,15 +733,19 @@ void xenstore_parse_domain_config(int hv
@@ -739,11 +739,19 @@ void xenstore_parse_domain_config(int hv
#endif
- drives_table[nb_drives].bdrv = bs;
- drives_table[nb_drives].used = 1;
- media_filename[nb_drives] = strdup(bs->filename);
- nb_drives++;
-
+ if (machine == &xenfv_machine) {
+ drives_table[nb_drives].bdrv = bs;
+ drives_table[nb_drives].used = 1;
#ifdef CONFIG_STUBDOM
- media_filename[nb_drives] = strdup(danger_buf);
+#ifdef CONFIG_STUBDOM
+ media_filename[nb_drives] = strdup(danger_buf);
#else
- media_filename[nb_drives] = strdup(bs->filename);
+#else
+ media_filename[nb_drives] = strdup(bs->filename);
#endif
- nb_drives++;
-
+#endif
+ nb_drives++;
+ } else {
+ qemu_aio_flush();

View File

@ -2,7 +2,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1734,6 +1734,25 @@ static int protocol_client_msg(VncState
@@ -1751,6 +1751,25 @@ static int protocol_client_msg(VncState
}
set_encodings(vs, (int32_t *)(data + 4), limit);

View File

@ -60,7 +60,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
if (buffer->max_capacity &&
buffer->size > buffer->max_capacity) {
/* Discard the middle of the data. */
@@ -176,6 +203,36 @@ static void xencons_send(struct XenConso
@@ -176,6 +203,37 @@ static void xencons_send(struct XenConso
}
}
@ -83,7 +83,8 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
+ return -1;
+ }
+
+ asprintf(&logfile, "%s/guest-%s.log", logdir, domname);
+ if (asprintf(&logfile, "%s/guest-%s.log", logdir, domname) < 0)
+ return -1;
+ qemu_free(domname);
+
+ fd = open(logfile, O_WRONLY|O_CREAT|O_APPEND, 0644);
@ -97,7 +98,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
/* -------------------------------------------------------------------- */
static int con_init(struct XenDevice *xendev)
@@ -183,6 +240,7 @@ static int con_init(struct XenDevice *xe
@@ -183,6 +241,7 @@ static int con_init(struct XenDevice *xe
struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
char *type, *dom, label[32];
const char *output;
@ -105,7 +106,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
/* setup */
dom = xs_get_domain_path(xenstore, con->xendev.dom);
@@ -209,6 +267,10 @@ static int con_init(struct XenDevice *xe
@@ -209,6 +268,10 @@ static int con_init(struct XenDevice *xe
con->chr = qemu_chr_open(label, output, NULL);
xenstore_store_pv_console_info(con->xendev.dev, con->chr, output);
@ -116,7 +117,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
return 0;
}
@@ -246,6 +308,9 @@ static int con_initialise(struct XenDevi
@@ -246,6 +309,9 @@ static int con_initialise(struct XenDevi
con->xendev.remote_port,
con->xendev.local_port,
con->buffer.max_capacity);
@ -126,7 +127,7 @@ Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_console.c
return 0;
}
@@ -266,6 +331,12 @@ static void con_disconnect(struct XenDev
@@ -266,6 +332,12 @@ static void con_disconnect(struct XenDev
xc_gnttab_munmap(xendev->gnttabdev, con->sring, 1);
con->sring = NULL;
}

View File

@ -1,9 +0,0 @@
Index: xen-4.2.0-testing/unmodified_drivers/linux-2.6/blkfront/Kbuild
===================================================================
--- xen-4.2.0-testing.orig/unmodified_drivers/linux-2.6/blkfront/Kbuild
+++ xen-4.2.0-testing/unmodified_drivers/linux-2.6/blkfront/Kbuild
@@ -3,3 +3,4 @@ include $(M)/overrides.mk
obj-m += xen-vbd.o
xen-vbd-objs := blkfront.o vbd.o
+xen-vbd-objs += $(patsubst %.c,%.o,$(notdir $(wildcard $(src)/vcd.c)))

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6fe1ff40b8f0305a2911bbf8701096facb6a47f23d2deffb19b22aea9739c81d
size 5111353
oid sha256:e5ba8bcd20390c3773e1e0a3a82c46896af5fb73ec235d27c250ef028212aa7a
size 5111823

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:79d607307935499aca9fa7ddaea37dd9f7f767695b9feb3f25ee9a4a181ccbd9
size 3212153
oid sha256:dfe4a381b86b68e85ea70f6306914cd7e3f9debb7df3797a611f339054042528
size 3212536

View File

@ -0,0 +1,145 @@
Reverse c/s 24757. Breaks booting NetWare.
# HG changeset patch
# User Alex Zeffertt <alex.zeffertt@eu.citrix.com>
# Date 1328812412 0
# Node ID aae516b78fce679f9c367231b7a3891814fcfdbb
# Parent 585caf50a9260d3fc6a4ece0450d10d34e73489f
xenstored: use grant references instead of map_foreign_range
make xenstored use grantref rather than map_foreign_range (which can
only be used by privileged domains)
This patch modifies the xenstore daemon to use xc_gnttab_map_grant_ref
instead of xc_map_foreign_range where available.
Previous versions of this patch have been sent to xen-devel. See
http://lists.xensource.com/archives/html/xen-devel/2008-07/msg00610.html
http://lists.xensource.com/archives/html/xen-devel/2009-03/msg01492.html
Signed-off-by: Diego Ongaro <diego.ongaro@citrix.com>
Signed-off-by: Alex Zeffertt <alex.zeffertt@eu.citrix.com>
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Committed-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
Index: xen-4.2.0-testing/tools/xenstore/xenstored_domain.c
===================================================================
--- xen-4.2.0-testing.orig/tools/xenstore/xenstored_domain.c
+++ xen-4.2.0-testing/tools/xenstore/xenstored_domain.c
@@ -32,10 +32,8 @@
#include "xenstored_watch.h"
#include <xenctrl.h>
-#include <xen/grant_table.h>
static xc_interface **xc_handle;
-xc_gnttab **xcg_handle;
static evtchn_port_t virq_port;
xc_evtchn *xce_handle = NULL;
@@ -165,26 +163,6 @@ static int readchn(struct connection *co
return len;
}
-static void *map_interface(domid_t domid, unsigned long mfn)
-{
- if (*xcg_handle != NULL) {
- /* this is the preferred method */
- return xc_gnttab_map_grant_ref(*xcg_handle, domid,
- GNTTAB_RESERVED_XENSTORE, PROT_READ|PROT_WRITE);
- } else {
- return xc_map_foreign_range(*xc_handle, domid,
- getpagesize(), PROT_READ|PROT_WRITE, mfn);
- }
-}
-
-static void unmap_interface(void *interface)
-{
- if (*xcg_handle != NULL)
- xc_gnttab_munmap(*xcg_handle, interface, 1);
- else
- munmap(interface, getpagesize());
-}
-
static int destroy_domain(void *_domain)
{
struct domain *domain = _domain;
@@ -196,14 +174,8 @@ static int destroy_domain(void *_domain)
eprintf("> Unbinding port %i failed!\n", domain->port);
}
- if (domain->interface) {
- /* Domain 0 was mapped by dom0_init, so it must be unmapped
- using munmap() and not the grant unmap call. */
- if (domain->domid == 0)
- unmap_xenbus(domain->interface);
- else
- unmap_interface(domain->interface);
- }
+ if (domain->interface)
+ munmap(domain->interface, getpagesize());
fire_watches(NULL, "@releaseDomain", false);
@@ -372,7 +344,9 @@ void do_introduce(struct connection *con
domain = find_domain_by_domid(domid);
if (domain == NULL) {
- interface = map_interface(domid, mfn);
+ interface = xc_map_foreign_range(
+ *xc_handle, domid,
+ getpagesize(), PROT_READ|PROT_WRITE, mfn);
if (!interface) {
send_error(conn, errno);
return;
@@ -380,7 +354,7 @@ void do_introduce(struct connection *con
/* Hang domain off "in" until we're finished. */
domain = new_domain(in, domid, port);
if (!domain) {
- unmap_interface(interface);
+ munmap(interface, getpagesize());
send_error(conn, errno);
return;
}
@@ -572,18 +546,6 @@ void do_reset_watches(struct connection
send_ack(conn, XS_RESET_WATCHES);
}
-static int close_xc_handle(void *_handle)
-{
- xc_interface_close(*(xc_interface**)_handle);
- return 0;
-}
-
-static int close_xcg_handle(void *_handle)
-{
- xc_gnttab_close(*(xc_gnttab **)_handle);
- return 0;
-}
-
/* Returns the implicit path of a connection (only domains have this) */
const char *get_implicit_path(const struct connection *conn)
{
@@ -633,18 +595,6 @@ void domain_init(void)
if (!*xc_handle)
barf_perror("Failed to open connection to hypervisor");
- talloc_set_destructor(xc_handle, close_xc_handle);
-
- xcg_handle = talloc(talloc_autofree_context(), xc_gnttab*);
- if (!xcg_handle)
- barf_perror("Failed to allocate domain gnttab handle");
-
- *xcg_handle = xc_gnttab_open(NULL, 0);
- if (*xcg_handle < 0)
- xprintf("WARNING: Failed to open connection to gnttab\n");
- else
- talloc_set_destructor(xcg_handle, close_xcg_handle);
-
xce_handle = xc_evtchn_open(NULL, 0);
if (xce_handle == NULL)

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9569ad2cae31a65caeb17e9b517220f0c18d7165bed1b1412217fcff15c59aef
size 356387
oid sha256:880675bae5305039066c25101305a7854d6d336980ba00dbbbe005a4c23e7126
size 356438

View File

@ -116,7 +116,7 @@ Index: xen-4.2.0-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.2.0-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.2.0-testing/tools/libxl/libxl_types.idl
@@ -319,6 +319,8 @@ libxl_domain_build_info = Struct("domain
@@ -320,6 +320,8 @@ libxl_domain_build_info = Struct("domain
("usbdevice", string),
("soundhw", string),
("xen_platform_pci", libxl_defbool),
@ -129,7 +129,7 @@ Index: xen-4.2.0-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.2.0-testing/tools/libxl/xl_cmdimpl.c
@@ -1405,6 +1405,8 @@ skip_vfb:
@@ -1412,6 +1412,8 @@ skip_vfb:
xlu_cfg_replace_string (config, "soundhw", &b_info->u.hvm.soundhw, 0);
xlu_cfg_get_defbool(config, "xen_platform_pci",
&b_info->u.hvm.xen_platform_pci, 0);

View File

@ -1,8 +1,10 @@
Change default IO-APIC ack mode for single IO-APIC systems to old-style.
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -2013,7 +2013,10 @@ void __init setup_IO_APIC(void)
Index: xen-4.2.0-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-4.2.0-testing.orig/xen/arch/x86/io_apic.c
+++ xen-4.2.0-testing/xen/arch/x86/io_apic.c
@@ -2012,7 +2012,10 @@ void __init setup_IO_APIC(void)
io_apic_irqs = ~PIC_IRQS;
printk("ENABLING IO-APIC IRQs\n");

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:eb9bedd095a2d08ef53a0828ef801e9293cfbfca0a6204bf01a38727e83e92cf
size 4862049

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9819306f1cc5efdc0e97c442c627ab95de251228713ab06efa27e5d0c8cabacd
size 4862820

View File

@ -60,33 +60,7 @@ Index: xen-4.2.0-testing/tools/hotplug/Linux/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/hotplug/Linux/Makefile
+++ xen-4.2.0-testing/tools/hotplug/Linux/Makefile
@@ -41,18 +41,6 @@ endif
UDEV_RULES_DIR = $(CONFIG_DIR)/udev
UDEV_RULES = xen-backend.rules xend.rules
-DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),)
-DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),)
-ifeq ($(findstring $(DI),$(DE)),$(DI))
-HOTPLUGS=install-hotplug install-udev
-else
-ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1)
-HOTPLUGS=install-udev
-else
-HOTPLUGS=install-hotplug
-endif
-endif
-
.PHONY: all
all:
@@ -60,18 +48,18 @@ all:
build:
.PHONY: install
-install: all install-initd install-scripts $(HOTPLUGS)
+install: all install-initd install-scripts install-udev
# See docs/misc/distro_mapping.txt for INITD_DIR location
@@ -43,12 +43,12 @@ install: all install-initd install-scrip
.PHONY: install-initd
install-initd:
[ -d $(DESTDIR)$(INITD_DIR) ] || $(INSTALL_DIR) $(DESTDIR)$(INITD_DIR)
@ -94,11 +68,11 @@ Index: xen-4.2.0-testing/tools/hotplug/Linux/Makefile
+ [ -d $(DESTDIR)/var/adm/fillup-templates ] || $(INSTALL_DIR) $(DESTDIR)/var/adm/fillup-templates/
$(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)$(INITD_DIR)
$(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains
+ $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/
- $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xendomains
+ $(INSTALL_DATA) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/
$(INSTALL_PROG) $(XENCOMMONS_INITD) $(DESTDIR)$(INITD_DIR)
- $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons
+ $(INSTALL_PROG) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/
- $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)$(SYSCONFIG_DIR)/xencommons
+ $(INSTALL_DATA) $(XENCOMMONS_SYSCONFIG) $(DESTDIR)/var/adm/fillup-templates/
$(INSTALL_PROG) init.d/xen-watchdog $(DESTDIR)$(INITD_DIR)
.PHONY: install-scripts
@ -125,64 +99,3 @@ Index: xen-4.2.0-testing/tools/firmware/etherboot/Makefile
mv _$T $T
$D/src/arch/i386/Makefile: $T Config
Index: xen-4.2.0-testing/stubdom/Makefile
===================================================================
--- xen-4.2.0-testing.orig/stubdom/Makefile
+++ xen-4.2.0-testing/stubdom/Makefile
@@ -396,7 +396,7 @@ install-grub: pv-grub
install-xenstore: xenstore-stubdom
$(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_PROG) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz"
+ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-xenstore/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/xenstore-stubdom.gz"
#######
# clean
Index: xen-4.2.0-testing/tools/blktap2/vhd/lib/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap2/vhd/lib/Makefile
+++ xen-4.2.0-testing/tools/blktap2/vhd/lib/Makefile
@@ -68,7 +68,7 @@ libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR
install: all
$(INSTALL_DIR) -p $(DESTDIR)$(INST-DIR)
- $(INSTALL_PROG) libvhd.a $(DESTDIR)$(INST-DIR)
+ $(INSTALL_DATA) libvhd.a $(DESTDIR)$(INST-DIR)
$(INSTALL_PROG) libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR)
ln -sf libvhd.so.$(LIBVHD-MAJOR).$(LIBVHD-MINOR) $(DESTDIR)$(INST-DIR)/libvhd.so.$(LIBVHD-MAJOR)
ln -sf libvhd.so.$(LIBVHD-MAJOR) $(DESTDIR)$(INST-DIR)/libvhd.so
Index: xen-4.2.0-testing/tools/blktap/lib/Makefile
===================================================================
--- xen-4.2.0-testing.orig/tools/blktap/lib/Makefile
+++ xen-4.2.0-testing/tools/blktap/lib/Makefile
@@ -23,23 +23,26 @@ OBJS = $(SRCS:.c=.o)
OBJS_PIC = $(SRCS:.c=.opic)
IBINS :=
-LIB = libblktap.a libblktap.so.$(MAJOR).$(MINOR)
+LIB = libblktap.a
+LIB_SO = libblktap.so.$(MAJOR).$(MINOR)
+LIB_ALL = $(LIB) $(LIB_SO)
.PHONY: all
-all: $(LIB)
+all: $(LIB_ALL)
.PHONY: install
install: all
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)
- $(INSTALL_PROG) $(LIB) $(DESTDIR)$(LIBDIR)
+ $(INSTALL_DATA) $(LIB) $(DESTDIR)$(LIBDIR)
+ $(INSTALL_PROG) $(LIB_SO) $(DESTDIR)$(LIBDIR)
ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libblktap.so.$(MAJOR)
ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so
$(INSTALL_DATA) blktaplib.h $(DESTDIR)$(INCLUDEDIR)
.PHONY: clean
clean:
- rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS
+ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB_ALL) *~ $(DEPS) xen TAGS
libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC)
$(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_LDFLAGS) \

View File

@ -2,7 +2,7 @@ Index: xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
===================================================================
--- xen-4.2.0-testing.orig/docs/man/xmdomain.cfg.pod.5
+++ xen-4.2.0-testing/docs/man/xmdomain.cfg.pod.5
@@ -335,16 +335,10 @@ at hda1, which is the root filesystem.
@@ -333,16 +333,10 @@ at hda1, which is the root filesystem.
=item I<NFS Root>

View File

@ -71,7 +71,7 @@ Index: xen-4.2.0-testing/tools/python/xen/xend/balloon.py
+def get_dom0_min_target():
+ """Returns the minimum amount of memory (in KiB) that dom0 will accept."""
+
+ kb = _get_proc_balloon(labels['min-target'])
+ kb = _get_proc_balloon('min-target')
+ if kb == None:
+ raise VmError('Failed to query minimum target memory allocation of dom0.')
+ return kb
@ -80,7 +80,7 @@ Index: xen-4.2.0-testing/tools/python/xen/xend/balloon.py
+ """Returns the maximum amount of memory (in KiB) that is potentially
+ visible to dom0."""
+
+ kb = _get_proc_balloon(labels['max-target'])
+ kb = _get_proc_balloon('max-target')
+ if kb == None:
+ raise VmError('Failed to query maximum target memory allocation of dom0.')
+ return kb
@ -133,3 +133,16 @@ Index: xen-4.2.0-testing/tools/python/xen/xend/server/SrvDomain.py
[['target', 'int']],
req)
Index: xen-4.2.0-testing/tools/python/xen/xend/osdep.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/osdep.py
+++ xen-4.2.0-testing/tools/python/xen/xend/osdep.py
@@ -42,6 +42,8 @@ def _linux_balloon_stat_proc(label):
xend2linux_labels = { 'current' : 'Current allocation',
'target' : 'Requested target',
+ 'min-target' : 'Minimum target',
+ 'max-target' : 'Maximum target',
'low-balloon' : 'Low-mem balloon',
'high-balloon' : 'High-mem balloon',
'limit' : 'Xen hard limit' }

View File

@ -0,0 +1,29 @@
bnc#757525
Index: xen-4.2.0-testing/tools/python/xen/xend/server/netif.py
===================================================================
--- xen-4.2.0-testing.orig/tools/python/xen/xend/server/netif.py
+++ xen-4.2.0-testing/tools/python/xen/xend/server/netif.py
@@ -23,6 +23,7 @@
import os
import random
import re
+import commands
from xen.xend import XendOptions, sxp
from xen.xend.server.DevController import DevController
@@ -101,6 +102,14 @@ class NetifController(DevController):
def __init__(self, vm):
DevController.__init__(self, vm)
+ def createDevice(self, config):
+ bridge = config.get('bridge')
+ if bridge is not None:
+ bridge_result = commands.getstatusoutput("/sbin/ifconfig %s" % bridge)
+ if bridge_result[0] != 0:
+ raise VmError('Network bridge does not exist: %s' % bridge)
+ DevController.createDevice(self, config)
+
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""

Some files were not shown because too many files have changed in this diff Show More