SHA256
1
0
forked from pool/xen
OBS User unknown 2009-05-04 16:38:09 +00:00 committed by Git OBS Bridge
parent 4790095364
commit 0bb51565d6
187 changed files with 2454 additions and 20791 deletions

View File

@ -1,90 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1220022665 -3600
# Node ID 481f0dc6beb0b19cb02354dbe9b4ce068a5f6a18
# Parent cd078a3d600e6a1bab65e6392a60a832253cff8b
x86: constify microcode hypercall argument
Linux 2.6.27 marks the data pointer in its firmware struct 'const',
and hence, to avoid a compiler warning, Xen's microcode update
interface should be properly properly constified too.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-3.3.1-testing/xen/arch/x86/microcode.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/microcode.c
+++ xen-3.3.1-testing/xen/arch/x86/microcode.c
@@ -124,7 +124,7 @@ static DEFINE_SPINLOCK(microcode_update_
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
-static void __user *user_buffer; /* user area microcode data buffer */
+static const void __user *user_buffer; /* user area microcode data buffer */
static unsigned int user_buffer_size; /* it's size */
typedef enum mc_error_code {
@@ -455,7 +455,7 @@ out:
return error;
}
-int microcode_update(XEN_GUEST_HANDLE(void) buf, unsigned long len)
+int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
{
int ret;
Index: xen-3.3.1-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-3.3.1-testing/xen/arch/x86/platform_hypercall.c
@@ -147,8 +147,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
case XENPF_microcode_update:
{
- extern int microcode_update(XEN_GUEST_HANDLE(void), unsigned long len);
- XEN_GUEST_HANDLE(void) data;
+ XEN_GUEST_HANDLE(const_void) data;
ret = xsm_microcode();
if ( ret )
Index: xen-3.3.1-testing/xen/include/asm-x86/processor.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/processor.h
+++ xen-3.3.1-testing/xen/include/asm-x86/processor.h
@@ -583,6 +583,8 @@ int rdmsr_hypervisor_regs(
int wrmsr_hypervisor_regs(
uint32_t idx, uint32_t eax, uint32_t edx);
+int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
Index: xen-3.3.1-testing/xen/include/public/platform.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/public/platform.h
+++ xen-3.3.1-testing/xen/include/public/platform.h
@@ -97,7 +97,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_read_memty
#define XENPF_microcode_update 35
struct xenpf_microcode_update {
/* IN variables. */
- XEN_GUEST_HANDLE(void) data; /* Pointer to microcode data */
+ XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */
uint32_t length; /* Length of microcode data. */
};
typedef struct xenpf_microcode_update xenpf_microcode_update_t;
Index: xen-3.3.1-testing/xen/include/xen/compat.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/compat.h
+++ xen-3.3.1-testing/xen/include/xen/compat.h
@@ -19,7 +19,9 @@
type *_[0] __attribute__((__packed__)); \
} __compat_handle_ ## name
-#define DEFINE_COMPAT_HANDLE(name) __DEFINE_COMPAT_HANDLE(name, name)
+#define DEFINE_COMPAT_HANDLE(name) \
+ __DEFINE_COMPAT_HANDLE(name, name); \
+ __DEFINE_COMPAT_HANDLE(const_ ## name, const name)
#define COMPAT_HANDLE(name) __compat_handle_ ## name
/* Is the compat handle a NULL reference? */

File diff suppressed because it is too large Load Diff

View File

@ -1,51 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1220450168 -3600
# Node ID c9db93b0660ae644491c862e47744a2349ba630f
# Parent 1e98ea5c860438a227e135701e6439b22826f52f
x86: Fix interpretation of get_l*e_linear_pagetable().
Broken by get_page_type() preemption patch (c/s 18412).
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.0-testing/xen/arch/x86/mm.c
===================================================================
--- xen-3.3.0-testing.orig/xen/arch/x86/mm.c
+++ xen-3.3.0-testing/xen/arch/x86/mm.c
@@ -762,9 +762,8 @@ get_page_from_l2e(
rc = get_page_and_type_from_pagenr(
l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
- if ( unlikely(rc) && rc != -EAGAIN &&
- get_l2_linear_pagetable(l2e, pfn, d) )
- rc = -EINVAL;
+ if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+ rc = 0;
return rc;
}
@@ -788,9 +787,8 @@ get_page_from_l3e(
rc = get_page_and_type_from_pagenr(
l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
- if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR &&
- get_l3_linear_pagetable(l3e, pfn, d) )
- rc = -EINVAL;
+ if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
+ rc = 0;
return rc;
}
@@ -814,9 +812,8 @@ get_page_from_l4e(
rc = get_page_and_type_from_pagenr(
l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
- if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR &&
- get_l4_linear_pagetable(l4e, pfn, d) )
- rc = -EINVAL;
+ if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
+ rc = 0;
return rc;
}

View File

@ -1,235 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1220535506 -3600
# Node ID ae9b223a675d9ed37cffbc24d0abe83ef2a30ab3
# Parent 8d982c7a0d303de1200134fcf3a2573f6f4449fa
More efficient implementation of SCHEDOP_poll when polling a single port.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/common/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/domain.c
+++ xen-3.3.1-testing/xen/common/domain.c
@@ -673,9 +673,11 @@ void vcpu_reset(struct vcpu *v)
set_bit(_VPF_down, &v->pause_flags);
+ clear_bit(v->vcpu_id, d->poll_mask);
+ v->poll_evtchn = 0;
+
v->fpu_initialised = 0;
v->fpu_dirtied = 0;
- v->is_polling = 0;
v->is_initialised = 0;
v->nmi_pending = 0;
v->mce_pending = 0;
Index: xen-3.3.1-testing/xen/common/event_channel.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/event_channel.c
+++ xen-3.3.1-testing/xen/common/event_channel.c
@@ -534,6 +534,7 @@ out:
static int evtchn_set_pending(struct vcpu *v, int port)
{
struct domain *d = v->domain;
+ int vcpuid;
/*
* The following bit operations must happen in strict order.
@@ -553,15 +554,19 @@ static int evtchn_set_pending(struct vcp
}
/* Check if some VCPU might be polling for this event. */
- if ( unlikely(d->is_polling) )
- {
- d->is_polling = 0;
- smp_mb(); /* check vcpu poll-flags /after/ clearing domain poll-flag */
- for_each_vcpu ( d, v )
+ if ( likely(bitmap_empty(d->poll_mask, MAX_VIRT_CPUS)) )
+ return 0;
+
+ /* Wake any interested (or potentially interested) pollers. */
+ for ( vcpuid = find_first_bit(d->poll_mask, MAX_VIRT_CPUS);
+ vcpuid < MAX_VIRT_CPUS;
+ vcpuid = find_next_bit(d->poll_mask, MAX_VIRT_CPUS, vcpuid+1) )
+ {
+ v = d->vcpu[vcpuid];
+ if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
+ test_and_clear_bit(vcpuid, d->poll_mask) )
{
- if ( !v->is_polling )
- continue;
- v->is_polling = 0;
+ v->poll_evtchn = 0;
vcpu_unblock(v);
}
}
Index: xen-3.3.1-testing/xen/common/schedule.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/schedule.c
+++ xen-3.3.1-testing/xen/common/schedule.c
@@ -198,6 +198,27 @@ void vcpu_wake(struct vcpu *v)
TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
}
+void vcpu_unblock(struct vcpu *v)
+{
+ if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
+ return;
+
+ /* Polling period ends when a VCPU is unblocked. */
+ if ( unlikely(v->poll_evtchn != 0) )
+ {
+ v->poll_evtchn = 0;
+ /*
+ * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
+ * this VCPU (and it then going back to sleep on poll_mask).
+ * Test-and-clear is idiomatic and ensures clear_bit not reordered.
+ */
+ if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
+ clear_bit(_VPF_blocked, &v->pause_flags);
+ }
+
+ vcpu_wake(v);
+}
+
static void vcpu_migrate(struct vcpu *v)
{
unsigned long flags;
@@ -337,7 +358,7 @@ static long do_poll(struct sched_poll *s
struct vcpu *v = current;
struct domain *d = v->domain;
evtchn_port_t port;
- long rc = 0;
+ long rc;
unsigned int i;
/* Fairly arbitrary limit. */
@@ -348,11 +369,24 @@ static long do_poll(struct sched_poll *s
return -EFAULT;
set_bit(_VPF_blocked, &v->pause_flags);
- v->is_polling = 1;
- d->is_polling = 1;
+ v->poll_evtchn = -1;
+ set_bit(v->vcpu_id, d->poll_mask);
+#ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
/* Check for events /after/ setting flags: avoids wakeup waiting race. */
- smp_wmb();
+ smp_mb();
+
+ /*
+ * Someone may have seen we are blocked but not that we are polling, or
+ * vice versa. We are certainly being woken, so clean up and bail. Beyond
+ * this point others can be guaranteed to clean up for us if they wake us.
+ */
+ rc = 0;
+ if ( (v->poll_evtchn == 0) ||
+ !test_bit(_VPF_blocked, &v->pause_flags) ||
+ !test_bit(v->vcpu_id, d->poll_mask) )
+ goto out;
+#endif
rc = 0;
if ( local_events_need_delivery() )
@@ -373,6 +407,9 @@ static long do_poll(struct sched_poll *s
goto out;
}
+ if ( sched_poll->nr_ports == 1 )
+ v->poll_evtchn = port;
+
if ( sched_poll->timeout != 0 )
set_timer(&v->poll_timer, sched_poll->timeout);
@@ -382,7 +419,8 @@ static long do_poll(struct sched_poll *s
return 0;
out:
- v->is_polling = 0;
+ v->poll_evtchn = 0;
+ clear_bit(v->vcpu_id, d->poll_mask);
clear_bit(_VPF_blocked, &v->pause_flags);
return rc;
}
@@ -764,11 +802,8 @@ static void poll_timer_fn(void *data)
{
struct vcpu *v = data;
- if ( !v->is_polling )
- return;
-
- v->is_polling = 0;
- vcpu_unblock(v);
+ if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
+ vcpu_unblock(v);
}
/* Initialise the data structures. */
Index: xen-3.3.1-testing/xen/include/xen/sched.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/sched.h
+++ xen-3.3.1-testing/xen/include/xen/sched.h
@@ -106,8 +106,6 @@ struct vcpu
bool_t fpu_initialised;
/* Has the FPU been used since it was last saved? */
bool_t fpu_dirtied;
- /* Is this VCPU polling any event channels (SCHEDOP_poll)? */
- bool_t is_polling;
/* Initialization completed for this VCPU? */
bool_t is_initialised;
/* Currently running on a CPU? */
@@ -134,6 +132,13 @@ struct vcpu
/* VCPU affinity is temporarily locked from controller changes? */
bool_t affinity_locked;
+ /*
+ * > 0: a single port is being polled;
+ * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
+ * < 0: multiple ports may be being polled.
+ */
+ int poll_evtchn;
+
unsigned long pause_flags;
atomic_t pause_count;
@@ -209,8 +214,6 @@ struct domain
struct domain *target;
/* Is this guest being debugged by dom0? */
bool_t debugger_attached;
- /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
- bool_t is_polling;
/* Is this guest dying (i.e., a zombie)? */
enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
/* Domain is paused by controller software? */
@@ -218,6 +221,9 @@ struct domain
/* Domain's VCPUs are pinned 1:1 to physical CPUs? */
bool_t is_pinned;
+ /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
+ DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
+
/* Guest has shut down (inc. reason code)? */
spinlock_t shutdown_lock;
bool_t is_shutting_down; /* in process of shutting down? */
@@ -514,6 +520,7 @@ static inline int vcpu_runnable(struct v
atomic_read(&v->domain->pause_count));
}
+void vcpu_unblock(struct vcpu *v);
void vcpu_pause(struct vcpu *v);
void vcpu_pause_nosync(struct vcpu *v);
void domain_pause(struct domain *d);
@@ -530,12 +537,6 @@ void vcpu_unlock_affinity(struct vcpu *v
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
-static inline void vcpu_unblock(struct vcpu *v)
-{
- if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
- vcpu_wake(v);
-}
-
#define IS_PRIV(_d) ((_d)->is_privileged)
#define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))

View File

@ -1,62 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1220968229 -3600
# Node ID 5ce9459ce8722a8af89da5a73b0c80a767d5b1ad
# Parent d57e9b29858bddf4651efb002cfdadf978da79c0
vtd: Enable pass-through translation for Dom0
If pass-through field in extended capability register is set, set
pass-through translation type for Dom0, that means DMA requests with
Untranslated addresses are processed as pass-through in Dom0, needn't
translate DMA requests through a multi-level page-table.
Signed-off-by: Anthony Xu <anthony.xu@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -1089,12 +1089,12 @@ static int domain_context_mapping_one(
}
spin_lock_irqsave(&iommu->lock, flags);
-
-#ifdef CONTEXT_PASSTHRU
if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+ {
context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+ agaw = level_to_agaw(iommu->nr_pt_levels);
+ }
else
-#endif
{
/* Ensure we have pagetables allocated down to leaf PTE. */
if ( hd->pgd_maddr == 0 )
@@ -1463,11 +1463,12 @@ int intel_iommu_map_page(
u64 pg_maddr;
int pte_present;
-#ifdef CONTEXT_PASSTHRU
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+
/* do nothing if dom0 and iommu supports pass thru */
if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
return 0;
-#endif
pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
if ( pg_maddr == 0 )
@@ -1504,11 +1505,9 @@ int intel_iommu_unmap_page(struct domain
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
-#ifdef CONTEXT_PASSTHRU
/* do nothing if dom0 and iommu supports pass thru */
if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
return 0;
-#endif
dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);

View File

@ -1,91 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221040389 -3600
# Node ID 6a37b3d966f90f3c1604c9a3045d033cc5eeb4ea
# Parent b5912430e66c900c2092c035227816f43f7caeb0
vtd: Add a command line param to enable/disable pass-through feature
Taking security into accout, it's not suitable to bypass VT-d
translation for Dom0 by default when the pass-through field in
extended capability register is set. This feature is for people/usages
who are not overly worried about security/isolation, but want better
performance.
This patch adds a command line param that controls if it's enabled or
disabled.
Signed-off-by: Weidong Han <weidong.han@intel.com>
Index: xen-3.3.1-testing/xen/drivers/passthrough/iommu.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/iommu.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/iommu.c
@@ -33,11 +33,13 @@ int amd_iov_detect(void);
* pv Enable IOMMU for PV domains
* no-pv Disable IOMMU for PV domains (default)
* force|required Don't boot unless IOMMU is enabled
+ * passthrough Bypass VT-d translation for Dom0
*/
custom_param("iommu", parse_iommu_param);
int iommu_enabled = 0;
int iommu_pv_enabled = 0;
int force_iommu = 0;
+int iommu_passthrough = 0;
static void __init parse_iommu_param(char *s)
{
@@ -58,6 +60,8 @@ static void __init parse_iommu_param(cha
iommu_pv_enabled = 0;
else if ( !strcmp(s, "force") || !strcmp(s, "required") )
force_iommu = 1;
+ else if ( !strcmp(s, "passthrough") )
+ iommu_passthrough = 1;
s = ss + 1;
} while ( ss );
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -1089,7 +1089,8 @@ static int domain_context_mapping_one(
}
spin_lock_irqsave(&iommu->lock, flags);
- if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+ if ( iommu_passthrough &&
+ ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
{
context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
agaw = level_to_agaw(iommu->nr_pt_levels);
@@ -1467,7 +1468,8 @@ int intel_iommu_map_page(
iommu = drhd->iommu;
/* do nothing if dom0 and iommu supports pass thru */
- if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+ if ( iommu_passthrough &&
+ ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
return 0;
pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
@@ -1506,7 +1508,8 @@ int intel_iommu_unmap_page(struct domain
iommu = drhd->iommu;
/* do nothing if dom0 and iommu supports pass thru */
- if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+ if ( iommu_passthrough &&
+ ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
return 0;
dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
Index: xen-3.3.1-testing/xen/include/xen/iommu.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/iommu.h
+++ xen-3.3.1-testing/xen/include/xen/iommu.h
@@ -31,6 +31,7 @@ extern int vtd_enabled;
extern int iommu_enabled;
extern int iommu_pv_enabled;
extern int force_iommu;
+extern int iommu_passthrough;
#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)

View File

@ -1,213 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221141982 -3600
# Node ID 4ffc70556000869d3c301452a99e4e524dd54b07
# Parent fba8dca321c2b99842af6624f24afb77c472184b
x86: Support CPU hotplug offline.
Signed-off-by: Shan Haitao <haitao.shan@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -751,9 +751,12 @@ __initcall(setup_dump_irqs);
void fixup_irqs(cpumask_t map)
{
- unsigned int irq;
+ unsigned int irq, sp;
static int warned;
+ irq_guest_action_t *action;
+ struct pending_eoi *peoi;
+ /* Direct all future interrupts away from this CPU. */
for ( irq = 0; irq < NR_IRQS; irq++ )
{
cpumask_t mask;
@@ -772,8 +775,24 @@ void fixup_irqs(cpumask_t map)
printk("Cannot set affinity for irq %i\n", irq);
}
+ /* Service any interrupts that beat us in the re-direction race. */
local_irq_enable();
mdelay(1);
local_irq_disable();
+
+ /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
+ for ( irq = 0; irq < NR_IRQS; irq++ )
+ {
+ if ( !(irq_desc[irq].status & IRQ_GUEST) )
+ continue;
+ action = (irq_guest_action_t *)irq_desc[irq].action;
+ cpu_clear(smp_processor_id(), action->cpu_eoi_map);
+ }
+
+ /* Flush the interrupt EOI stack. */
+ peoi = this_cpu(pending_eoi);
+ for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
+ peoi[sp].ready = 1;
+ flush_ready_eoi(NULL);
}
#endif
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
@@ -1224,15 +1224,6 @@ int __cpu_disable(void)
if (cpu == 0)
return -EBUSY;
- /*
- * Only S3 is using this path, and thus idle vcpus are running on all
- * APs when we are called. To support full cpu hotplug, other
- * notification mechanisms should be introduced (e.g., migrate vcpus
- * off this physical cpu before rendezvous point).
- */
- if (!is_idle_vcpu(current))
- return -EINVAL;
-
local_irq_disable();
clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
@@ -1248,6 +1239,9 @@ int __cpu_disable(void)
fixup_irqs(map);
/* It's now safe to remove this processor from the online map */
cpu_clear(cpu, cpu_online_map);
+
+ cpu_disable_scheduler();
+
return 0;
}
@@ -1274,28 +1268,6 @@ static int take_cpu_down(void *unused)
return __cpu_disable();
}
-/*
- * XXX: One important thing missed here is to migrate vcpus
- * from dead cpu to other online ones and then put whole
- * system into a stop state. It assures a safe environment
- * for a cpu hotplug/remove at normal running state.
- *
- * However for xen PM case, at this point:
- * -> All other domains should be notified with PM event,
- * and then in following states:
- * * Suspend state, or
- * * Paused state, which is a force step to all
- * domains if they do nothing to suspend
- * -> All vcpus of dom0 (except vcpu0) have already beem
- * hot removed
- * with the net effect that all other cpus only have idle vcpu
- * running. In this special case, we can avoid vcpu migration
- * then and system can be considered in a stop state.
- *
- * So current cpu hotplug is a special version for PM specific
- * usage, and need more effort later for full cpu hotplug.
- * (ktian1)
- */
int cpu_down(unsigned int cpu)
{
int err = 0;
@@ -1306,6 +1278,12 @@ int cpu_down(unsigned int cpu)
goto out;
}
+ /* Can not offline BSP */
+ if (cpu == 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (!cpu_online(cpu)) {
err = -EINVAL;
goto out;
Index: xen-3.3.1-testing/xen/common/sched_credit.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/sched_credit.c
+++ xen-3.3.1-testing/xen/common/sched_credit.c
@@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
BUG_ON( cpu != snext->vcpu->processor );
+ /* If this CPU is going offline we shouldn't steal work. */
+ if ( unlikely(!cpu_online(cpu)) )
+ goto out;
+
if ( snext->pri == CSCHED_PRI_IDLE )
CSCHED_STAT_CRANK(load_balance_idle);
else if ( snext->pri == CSCHED_PRI_TS_OVER )
@@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
return speer;
}
+ out:
/* Failed to find more important work elsewhere... */
__runq_remove(snext);
return snext;
Index: xen-3.3.1-testing/xen/common/schedule.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/schedule.c
+++ xen-3.3.1-testing/xen/common/schedule.c
@@ -268,6 +268,48 @@ void vcpu_force_reschedule(struct vcpu *
}
}
+/*
+ * This function is used by cpu_hotplug code from stop_machine context.
+ * Hence we can avoid needing to take the
+ */
+void cpu_disable_scheduler(void)
+{
+ struct domain *d;
+ struct vcpu *v;
+ unsigned int cpu = smp_processor_id();
+
+ for_each_domain ( d )
+ {
+ for_each_vcpu ( d, v )
+ {
+ if ( is_idle_vcpu(v) )
+ continue;
+
+ if ( (cpus_weight(v->cpu_affinity) == 1) &&
+ cpu_isset(cpu, v->cpu_affinity) )
+ {
+ printk("Breaking vcpu affinity for domain %d vcpu %d\n",
+ v->domain->domain_id, v->vcpu_id);
+ cpus_setall(v->cpu_affinity);
+ }
+
+ /*
+ * Migrate single-shot timers to CPU0. A new cpu will automatically
+ * be chosen when the timer is next re-set.
+ */
+ if ( v->singleshot_timer.cpu == cpu )
+ migrate_timer(&v->singleshot_timer, 0);
+
+ if ( v->processor == cpu )
+ {
+ set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_sleep_nosync(v);
+ vcpu_migrate(v);
+ }
+ }
+ }
+}
+
static int __vcpu_set_affinity(
struct vcpu *v, cpumask_t *affinity,
bool_t old_lock_status, bool_t new_lock_status)
Index: xen-3.3.1-testing/xen/include/xen/sched.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/sched.h
+++ xen-3.3.1-testing/xen/include/xen/sched.h
@@ -531,6 +531,7 @@ void domain_unpause_by_systemcontroller(
void cpu_init(void);
void vcpu_force_reschedule(struct vcpu *v);
+void cpu_disable_scheduler(void);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);

View File

@ -1,41 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221148273 -3600
# Node ID a5cc38391afb16af6bdae32c5a5f4f9a3a236987
# Parent d8ce41b79ecc74c00797d73caa56dbdaf02bbd66
ACPI: Grant access of MSR_IA32_THERM_CONTROL MSR to dom0
The purpose is to support dom0 throttling control via MSR.
Signed-off-by: Wei Gang <gang.wei@intel.com>
Index: xen-3.3.1-testing/xen/arch/x86/traps.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/traps.c
+++ xen-3.3.1-testing/xen/arch/x86/traps.c
@@ -2160,6 +2160,12 @@ static int emulate_privileged_op(struct
if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
goto fail;
break;
+ case MSR_IA32_THERM_CONTROL:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ goto fail;
+ if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
+ goto fail;
+ break;
default:
if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
break;
@@ -2236,6 +2242,12 @@ static int emulate_privileged_op(struct
MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
break;
+ case MSR_IA32_THERM_CONTROL:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ goto fail;
+ if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
+ goto fail;
+ break;
default:
if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
{

View File

@ -1,27 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221212419 -3600
# Node ID 34aed15ba9df804ce037c5f691a9b11058fff2b9
# Parent f125e481d8b65b81dd794d60a99fb0b823eaee2c
x86, cpu hotplug: flush softirq work when going offline
From: Haitao Shan <haitao.shan@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -86,6 +86,12 @@ static void default_idle(void)
static void play_dead(void)
{
+ /*
+ * Flush pending softirqs if any. They can be queued up before this CPU
+ * was taken out of cpu_online_map in __cpu_disable().
+ */
+ do_softirq();
+
/* This must be done before dead CPU ack */
cpu_exit_clear();
hvm_cpu_down();

File diff suppressed because it is too large Load Diff

View File

@ -1,37 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221489878 -3600
# Node ID 75c4a603d9cd7f73366986261e1078fce1ead815
# Parent 59aba2cbbb58111de1aba6b173800d62956cf26f
x86: Fix 32-bit build after AMD microcode update patch.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/microcode_amd.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/microcode_amd.c
+++ xen-3.3.1-testing/xen/arch/x86/microcode_amd.c
@@ -170,11 +170,10 @@ out:
static int apply_microcode_amd(int cpu)
{
unsigned long flags;
- unsigned int eax, edx;
- unsigned int rev;
+ uint32_t eax, edx, rev;
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- unsigned long addr;
+ uint64_t addr;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
@@ -185,8 +184,8 @@ static int apply_microcode_amd(int cpu)
spin_lock_irqsave(&microcode_update_lock, flags);
addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
- edx = (unsigned int)((unsigned long)(addr >> 32));
- eax = (unsigned int)((unsigned long)(addr & 0xffffffff));
+ edx = (uint32_t)(addr >> 32);
+ eax = (uint32_t)addr;
asm volatile("movl %0, %%ecx; wrmsr" :
: "i" (MSR_AMD_PATCHLOADER), "a" (eax), "d" (edx) : "ecx");

File diff suppressed because it is too large Load Diff

View File

@ -1,85 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221565466 -3600
# Node ID f03b0cc33576e4fe3ff6adfd0853addf676c105e
# Parent 087b8b29b6b20165062697305c6651ca2acb7b5b
stubdom/ioemu link farm creation fixes
Replace the stubdom/ioemu link farm creation in stubdom/Makefile,
with code which arranges that:
* No symlinks are made for output files - in particular, any
symlinks for .d files would be written through by the compiler
and cause damage to the original tree and other strange
behaviours
* All subdirectories are made as local subdirectories rather than
links
* Any interrupted or half-completed creation of the link farm
leaves the directory in a state where the link farming will be
restarted
* We use make's inherent ability to test for the existence of files
rather than using [ -f ... ] at the start of the rule's commands
* The list of files to be excluded from the link farm can be
easily updated
etc.
This should fix some problems particularly with parallel builds,
or by-hand builds where directories are entered in other than the
usual order.
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Index: xen-3.3.1-testing/stubdom/Makefile
===================================================================
--- xen-3.3.1-testing.orig/stubdom/Makefile
+++ xen-3.3.1-testing/stubdom/Makefile
@@ -164,7 +164,26 @@ lwip-$(XEN_TARGET_ARCH): lwip-$(LWIP_VER
.PHONY: $(CROSS_ROOT)
$(CROSS_ROOT): cross-newlib cross-zlib cross-libpci
-mk-headers-$(XEN_TARGET_ARCH):
+ioemu/linkfarm.stamp: $(XEN_ROOT)/tools/ioemu-dir
+ mkdir -p ioemu
+ifeq ($(CONFIG_QEMU),ioemu)
+ [ -h ioemu/Makefile ] || ( cd ioemu && \
+ ln -sf ../$(XEN_ROOT)/tools/ioemu/* .)
+else
+ set -e; \
+ $(absolutify_xen_root); \
+ cd ioemu; \
+ src="$$XEN_ROOT/tools/ioemu-dir"; export src; \
+ (cd $$src && find * -type d -print) | xargs mkdir -p; \
+ (cd $$src && find * ! -type l -type f $(addprefix ! -name , \
+ '*.[oda1]' 'config-*' config.mak qemu-dm qemu-img-xen \
+ '*.html' '*.pod' \
+ )) | \
+ while read f; do rm -f "$$f"; ln -s "$$src/$$f" "$$f"; done
+endif
+ touch ioemu/linkfarm.stamp
+
+mk-headers-$(XEN_TARGET_ARCH): ioemu/linkfarm.stamp
mkdir -p include/xen && \
ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) include/xen && \
@@ -183,22 +202,6 @@ mk-headers-$(XEN_TARGET_ARCH):
ln -sf ../$(XEN_ROOT)/tools/libxc/$(XEN_TARGET_ARCH)/*.c . && \
ln -sf ../$(XEN_ROOT)/tools/libxc/$(XEN_TARGET_ARCH)/*.h . && \
ln -sf ../$(XEN_ROOT)/tools/libxc/$(XEN_TARGET_ARCH)/Makefile . )
- mkdir -p ioemu
-ifeq ($(CONFIG_QEMU),ioemu)
- [ -h ioemu/Makefile ] || ( cd ioemu && \
- ln -sf ../$(XEN_ROOT)/tools/ioemu/* .)
-else
- [ -h ioemu/Makefile ] || ( cd ioemu && \
- ln -sf $(CONFIG_QEMU)/* . && \
- rm -fr i386-dm && \
- rm -fr i386-stubdom && \
- mkdir i386-dm && \
- mkdir i386-stubdom && \
- ln -sf $(CONFIG_QEMU)/i386-dm/* i386-dm/ && \
- ln -sf $(CONFIG_QEMU)/i386-stubdom/* i386-stubdom/ )
-endif
- [ ! -h ioemu/config-host.h ] || rm -f ioemu/config-host.h
- [ ! -h ioemu/config-host.mak ] || rm -f ioemu/config-host.mak
$(MAKE) -C $(MINI_OS) links
touch mk-headers-$(XEN_TARGET_ARCH)

View File

@ -1,149 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221568859 -3600
# Node ID 879330497672d96ee966c9774d21c437895f6839
# Parent 88445b184dc666fc196cffab19eac75cd9c10b87
x86, microcode: Do not run microcode update in IRQ context.
It's unnecessary, and also invalid since the update process tries to
allocate memory.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/microcode.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/microcode.c
+++ xen-3.3.1-testing/xen/arch/x86/microcode.c
@@ -45,14 +45,13 @@ static DEFINE_SPINLOCK(microcode_mutex);
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
-struct microcode_buffer {
- void *buf;
- size_t size;
+struct microcode_info {
+ unsigned int cpu;
+ uint32_t buffer_size;
+ int error;
+ char buffer[1];
};
-static struct microcode_buffer microcode_buffer;
-static bool_t microcode_error;
-
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -110,14 +109,12 @@ static int microcode_resume_cpu(int cpu)
return err;
}
-static int microcode_update_cpu(int cpu, const void *buf, size_t size)
+static int microcode_update_cpu(const void *buf, size_t size)
{
- int err = 0;
+ int err;
+ unsigned int cpu = smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- /* We should bind the task to the CPU */
- BUG_ON(raw_smp_processor_id() != cpu);
-
spin_lock(&microcode_mutex);
/*
@@ -140,72 +137,50 @@ static int microcode_update_cpu(int cpu,
return err;
}
-static void do_microcode_update_one(void *info)
+static long do_microcode_update(void *_info)
{
+ struct microcode_info *info = _info;
int error;
- error = microcode_update_cpu(
- smp_processor_id(), microcode_buffer.buf, microcode_buffer.size);
+ BUG_ON(info->cpu != smp_processor_id());
- if ( error )
- microcode_error = error;
-}
+ error = microcode_update_cpu(info->buffer, info->buffer_size);
-static int do_microcode_update(void)
-{
- int error = 0;
-
- microcode_error = 0;
-
- if ( on_each_cpu(do_microcode_update_one, NULL, 1, 1) != 0 )
- {
- printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
- error = -EIO;
- goto out;
- }
+ if ( error )
+ info->error = error;
- if ( microcode_error )
- {
- error = microcode_error;
- goto out;
- }
+ info->cpu = next_cpu(info->cpu, cpu_online_map);
+ if ( info->cpu >= NR_CPUS )
+ return info->error;
- out:
- return error;
+ return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
}
int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
{
int ret;
+ struct microcode_info *info;
- /* XXX FIXME: No allocations in interrupt context. */
- return -EINVAL;
-
- if ( len != (typeof(microcode_buffer.size))len )
- {
- printk(KERN_ERR "microcode: too much data\n");
+ if ( len != (uint32_t)len )
return -E2BIG;
- }
if (microcode_ops == NULL)
return -EINVAL;
- microcode_buffer.buf = xmalloc_array(uint8_t, len);
- if ( microcode_buffer.buf == NULL )
+ info = xmalloc_bytes(sizeof(*info) + len);
+ if ( info == NULL )
return -ENOMEM;
- ret = copy_from_guest(microcode_buffer.buf, buf, len);
+ ret = copy_from_guest(info->buffer, buf, len);
if ( ret != 0 )
+ {
+ xfree(info);
return ret;
+ }
- microcode_buffer.size = len;
- wmb();
-
- ret = do_microcode_update();
-
- xfree(microcode_buffer.buf);
- microcode_buffer.buf = NULL;
- microcode_buffer.size = 0;
+ info->buffer_size = len;
+ info->error = 0;
+ info->cpu = first_cpu(cpu_online_map);
- return ret;
+ return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
}

View File

@ -1,29 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221569356 -3600
# Node ID f163138e33402ca565d9886df8ecb21e98f77be6
# Parent 879330497672d96ee966c9774d21c437895f6839
x86, microcode: Free microcode_info struct at end of hypercall.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/microcode.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/microcode.c
+++ xen-3.3.1-testing/xen/arch/x86/microcode.c
@@ -150,10 +150,13 @@ static long do_microcode_update(void *_i
info->error = error;
info->cpu = next_cpu(info->cpu, cpu_online_map);
- if ( info->cpu >= NR_CPUS )
- return info->error;
+ if ( info->cpu < NR_CPUS )
+ return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+
+ error = info->error;
+ xfree(info);
+ return error;
- return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
}
int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)

View File

@ -1,136 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1221657190 -3600
# Node ID 366c78ff361bafb2271c551c4976e4caedea72b2
# Parent beb28a3975bd39c93c7934dd5e7ec80c69a86c4a
x86: Allow continue_hypercall_on_cpu() to be called from within an
existing continuation handler. This fix is needed for the new method
of microcode re-programming.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -1356,6 +1356,7 @@ struct migrate_info {
void *data;
void (*saved_schedule_tail)(struct vcpu *);
cpumask_t saved_affinity;
+ unsigned int nest;
};
static void continue_hypercall_on_cpu_helper(struct vcpu *v)
@@ -1363,48 +1364,64 @@ static void continue_hypercall_on_cpu_he
struct cpu_user_regs *regs = guest_cpu_user_regs();
struct migrate_info *info = v->arch.continue_info;
cpumask_t mask = info->saved_affinity;
+ void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail;
regs->eax = info->func(info->data);
- v->arch.schedule_tail = info->saved_schedule_tail;
- v->arch.continue_info = NULL;
-
- xfree(info);
+ if ( info->nest-- == 0 )
+ {
+ xfree(info);
+ v->arch.schedule_tail = saved_schedule_tail;
+ v->arch.continue_info = NULL;
+ vcpu_unlock_affinity(v, &mask);
+ }
- vcpu_unlock_affinity(v, &mask);
- schedule_tail(v);
+ (*saved_schedule_tail)(v);
}
int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data)
{
struct vcpu *v = current;
struct migrate_info *info;
+ cpumask_t mask = cpumask_of_cpu(cpu);
int rc;
if ( cpu == smp_processor_id() )
return func(data);
- info = xmalloc(struct migrate_info);
+ info = v->arch.continue_info;
if ( info == NULL )
- return -ENOMEM;
+ {
+ info = xmalloc(struct migrate_info);
+ if ( info == NULL )
+ return -ENOMEM;
- info->func = func;
- info->data = data;
- info->saved_schedule_tail = v->arch.schedule_tail;
- info->saved_affinity = cpumask_of_cpu(cpu);
+ rc = vcpu_lock_affinity(v, &mask);
+ if ( rc )
+ {
+ xfree(info);
+ return rc;
+ }
- v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
- v->arch.continue_info = info;
+ info->saved_schedule_tail = v->arch.schedule_tail;
+ info->saved_affinity = mask;
+ info->nest = 0;
- rc = vcpu_lock_affinity(v, &info->saved_affinity);
- if ( rc )
+ v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
+ v->arch.continue_info = info;
+ }
+ else
{
- v->arch.schedule_tail = info->saved_schedule_tail;
- v->arch.continue_info = NULL;
- xfree(info);
- return rc;
+ BUG_ON(info->nest != 0);
+ rc = vcpu_locked_change_affinity(v, &mask);
+ if ( rc )
+ return rc;
+ info->nest++;
}
+ info->func = func;
+ info->data = data;
+
/* Dummy return value will be overwritten by new schedule_tail. */
BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id())));
return 0;
Index: xen-3.3.1-testing/xen/common/schedule.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/schedule.c
+++ xen-3.3.1-testing/xen/common/schedule.c
@@ -360,6 +360,11 @@ int vcpu_lock_affinity(struct vcpu *v, c
return __vcpu_set_affinity(v, affinity, 0, 1);
}
+int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
+{
+ return __vcpu_set_affinity(v, affinity, 1, 1);
+}
+
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
{
cpumask_t online_affinity;
Index: xen-3.3.1-testing/xen/include/xen/sched.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/sched.h
+++ xen-3.3.1-testing/xen/include/xen/sched.h
@@ -534,6 +534,7 @@ void vcpu_force_reschedule(struct vcpu *
void cpu_disable_scheduler(void);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
+int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);

View File

@ -1,25 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222087267 -3600
# Node ID 3c42b5ad0a4f607749426f82ecf11f75d84699c5
# Parent e61c7833dc9d87eb7fb41f47d2377370aa9a0b46
x86,amd,microcode: fix hypercall return code
Make the hypercall return failure if the microcode didn't apply.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
--- a/xen/arch/x86/microcode_amd.c
+++ b/xen/arch/x86/microcode_amd.c
@@ -335,10 +335,7 @@ static int cpu_request_microcode(int cpu
* lets keep searching till the latest version
*/
if ( error == 1 )
- {
- apply_microcode(cpu);
- error = 0;
- }
+ error = apply_microcode(cpu);
xfree(mc);
}
if ( offset > 0 )

View File

@ -1,719 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222087617 -3600
# Node ID 7f1a36b834e183904f069948d3037d50492d98d2
# Parent 3c42b5ad0a4f607749426f82ecf11f75d84699c5
x86: make GDT per-CPU
The major issue with supporting a significantly larger number of
physical CPUs appears to be the use of per-CPU GDT entries - at
present, x86-64 could support only up to 126 CPUs (with code changes
to also use the top-most GDT page, that would be 254). Instead of
trying to go with incremental steps here, by converting the GDT itself
to be per-CPU, limitations in that respect go away entirely.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-3.3.1-testing/xen/arch/x86/boot/wakeup.S
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/wakeup.S
+++ xen-3.3.1-testing/xen/arch/x86/boot/wakeup.S
@@ -167,7 +167,7 @@ wakeup_32:
.word 0,0,0
lgdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE
+ .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
wakeup_64:
lgdt lgdt_descr(%rip)
Index: xen-3.3.1-testing/xen/arch/x86/boot/x86_32.S
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/x86_32.S
+++ xen-3.3.1-testing/xen/arch/x86/boot/x86_32.S
@@ -78,7 +78,7 @@ idt_descr:
.word 0
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .long gdt_table - FIRST_RESERVED_GDT_BYTE
+ .long boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
.align 32
@@ -94,7 +94,7 @@ ENTRY(idle_pg_table)
#define GUEST_DESC(d) \
.long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff, \
((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
@@ -102,4 +102,6 @@ ENTRY(gdt_table)
GUEST_DESC(0x00c0b200) /* 0xe021 ring 1 3.xxGB data at 0x0 */
GUEST_DESC(0x00c0fa00) /* 0xe02b ring 3 3.xxGB code at 0x0 */
GUEST_DESC(0x00c0f200) /* 0xe033 ring 3 3.xxGB data at 0x0 */
+ .fill (PER_CPU_GDT_ENTRY - FLAT_RING3_DS / 8 - 1), 8, 0
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
.align PAGE_SIZE,0
Index: xen-3.3.1-testing/xen/arch/x86/boot/x86_64.S
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/x86_64.S
+++ xen-3.3.1-testing/xen/arch/x86/boot/x86_64.S
@@ -85,7 +85,7 @@ multiboot_ptr:
.word 0
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE
+ .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
.word 0,0,0
idt_descr:
@@ -96,7 +96,7 @@ ENTRY(stack_start)
.quad cpu0_stack
.align PAGE_SIZE, 0
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
@@ -105,11 +105,13 @@ ENTRY(gdt_table)
.quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
.quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
.align PAGE_SIZE, 0
/* NB. Even rings != 0 get access to the full 4Gb, as only the */
/* (compatibility) machine->physical mapping table lives there. */
-ENTRY(compat_gdt_table)
+ENTRY(boot_cpu_compat_gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
@@ -118,4 +120,6 @@ ENTRY(compat_gdt_table)
.quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */
.quad 0x00cff2000000ffff /* 0xe033 ring 3 data */
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
.align PAGE_SIZE, 0
Index: xen-3.3.1-testing/xen/arch/x86/cpu/common.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/cpu/common.c
+++ xen-3.3.1-testing/xen/arch/x86/cpu/common.c
@@ -575,6 +575,9 @@ void __cpuinit cpu_init(void)
if (cpu_has_pat)
wrmsrl(MSR_IA32_CR_PAT, host_pat);
+ /* Install correct page table. */
+ write_ptbase(current);
+
*(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
*(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
asm volatile ( "lgdt %0" : "=m" (gdt_load) );
@@ -605,9 +608,6 @@ void __cpuinit cpu_init(void)
#define CD(register) asm volatile ( "mov %0,%%db" #register : : "r"(0UL) );
CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
#undef CD
-
- /* Install correct page table. */
- write_ptbase(current);
}
#ifdef CONFIG_HOTPLUG_CPU
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -211,7 +211,6 @@ static inline int may_switch_mode(struct
int switch_native(struct domain *d)
{
- l1_pgentry_t gdt_l1e;
unsigned int vcpuid;
if ( d == NULL )
@@ -223,12 +222,8 @@ int switch_native(struct domain *d)
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
- /* switch gdt */
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
if (d->vcpu[vcpuid])
release_compat_l4(d->vcpu[vcpuid]);
}
@@ -238,7 +233,6 @@ int switch_native(struct domain *d)
int switch_compat(struct domain *d)
{
- l1_pgentry_t gdt_l1e;
unsigned int vcpuid;
if ( d == NULL )
@@ -250,15 +244,11 @@ int switch_compat(struct domain *d)
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
- /* switch gdt */
- gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
if ( (d->vcpu[vcpuid] != NULL) &&
(setup_compat_l4(d->vcpu[vcpuid]) != 0) )
goto undo_and_fail;
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
}
domain_set_alloc_bitsize(d);
@@ -267,13 +257,10 @@ int switch_compat(struct domain *d)
undo_and_fail:
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
while ( vcpuid-- != 0 )
{
if ( d->vcpu[vcpuid] != NULL )
release_compat_l4(d->vcpu[vcpuid]);
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
}
return -ENOMEM;
}
@@ -322,7 +309,12 @@ int vcpu_initialise(struct vcpu *v)
if ( is_idle_domain(d) )
{
v->arch.schedule_tail = continue_idle_domain;
- v->arch.cr3 = __pa(idle_pg_table);
+ if ( v->vcpu_id )
+ v->arch.cr3 = d->vcpu[0]->arch.cr3;
+ else if ( !*idle_vcpu )
+ v->arch.cr3 = __pa(idle_pg_table);
+ else if ( !(v->arch.cr3 = clone_idle_pagetable(v)) )
+ return -ENOMEM;
}
v->arch.guest_context.ctrlreg[4] =
@@ -349,8 +341,7 @@ int arch_domain_create(struct domain *d,
#ifdef __x86_64__
struct page_info *pg;
#endif
- l1_pgentry_t gdt_l1e;
- int i, vcpuid, pdpt_order, paging_initialised = 0;
+ int i, pdpt_order, paging_initialised = 0;
int rc = -ENOMEM;
d->arch.hvm_domain.hap_enabled =
@@ -369,18 +360,6 @@ int arch_domain_create(struct domain *d,
goto fail;
memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
- /*
- * Map Xen segments into every VCPU's GDT, irrespective of whether every
- * VCPU will actually be used. This avoids an NMI race during context
- * switch: if we take an interrupt after switching CR3 but before switching
- * GDT, and the old VCPU# is invalid in the new domain, we would otherwise
- * try to load CS from an invalid table.
- */
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
- for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
-
#if defined(__i386__)
mapcache_domain_init(d);
@@ -1193,9 +1172,12 @@ static void paravirt_ctxt_switch_to(stru
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int cpu = smp_processor_id();
+ unsigned int i, cpu = smp_processor_id();
struct vcpu *p = per_cpu(curr_vcpu, cpu);
struct vcpu *n = current;
+ struct desc_struct *gdt;
+ struct page_info *page;
+ struct desc_ptr gdt_desc;
ASSERT(p != n);
ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
@@ -1221,14 +1203,30 @@ static void __context_switch(void)
cpu_set(cpu, n->domain->domain_dirty_cpumask);
cpu_set(cpu, n->vcpu_dirty_cpumask);
+ gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
+ per_cpu(compat_gdt_table, cpu);
+ page = virt_to_page(gdt);
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ {
+ l1e_write(n->domain->arch.mm_perdomain_pt +
+ (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE + i,
+ l1e_from_page(page + i, __PAGE_HYPERVISOR));
+ }
+
+ if ( p->vcpu_id != n->vcpu_id )
+ {
+ gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
+ gdt_desc.base = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+ }
+
write_ptbase(n);
if ( p->vcpu_id != n->vcpu_id )
{
- char gdt_load[10];
- *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
- *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n);
- asm volatile ( "lgdt %0" : "=m" (gdt_load) );
+ gdt_desc.base = GDT_VIRT_START(n);
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
}
if ( p->domain != n->domain )
@@ -1279,8 +1277,6 @@ void context_switch(struct vcpu *prev, s
uint64_t efer = read_efer();
if ( !(efer & EFER_SCE) )
write_efer(efer | EFER_SCE);
- flush_tlb_one_local(GDT_VIRT_START(next) +
- FIRST_RESERVED_GDT_BYTE);
}
#endif
Index: xen-3.3.1-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain_build.c
+++ xen-3.3.1-testing/xen/arch/x86/domain_build.c
@@ -314,24 +314,11 @@ int __init construct_dom0(
#if defined(__x86_64__)
if ( compat32 )
{
- l1_pgentry_t gdt_l1e;
-
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
if ( nr_pages != (unsigned int)nr_pages )
nr_pages = UINT_MAX;
-
- /*
- * Map compatibility Xen segments into every VCPU's GDT. See
- * arch_domain_create() for further comments.
- */
- gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
- PAGE_HYPERVISOR);
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
- flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
}
#endif
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c
@@ -446,7 +446,7 @@ static void vmx_set_host_env(struct vcpu
__vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
- __vmwrite(HOST_TR_SELECTOR, __TSS(cpu) << 3);
+ __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
__vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
__vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
Index: xen-3.3.1-testing/xen/arch/x86/setup.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/setup.c
+++ xen-3.3.1-testing/xen/arch/x86/setup.c
@@ -115,6 +115,12 @@ extern void early_cpu_init(void);
extern void vesa_init(void);
extern void vesa_mtrr_init(void);
+DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
+#ifdef CONFIG_COMPAT
+DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
+ = boot_cpu_compat_gdt_table;
+#endif
+
struct tss_struct init_tss[NR_CPUS];
char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
@@ -224,6 +230,7 @@ static void __init percpu_init_areas(voi
static void __init init_idle_domain(void)
{
struct domain *idle_domain;
+ unsigned int i;
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
@@ -236,6 +243,12 @@ static void __init init_idle_domain(void
idle_vcpu[0] = this_cpu(curr_vcpu) = current;
setup_idle_pagetable();
+
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ idle_domain->arch.mm_perdomain_pt[FIRST_RESERVED_GDT_PAGE + i] =
+ l1e_from_page(virt_to_page(boot_cpu_gdt_table) + i,
+ __PAGE_HYPERVISOR);
+
}
static void __init srat_detect_node(int cpu)
@@ -443,7 +456,6 @@ void __init __start_xen(unsigned long mb
parse_video_info();
set_current((struct vcpu *)0xfffff000); /* debug sanity */
- idle_vcpu[0] = current;
set_processor_id(0); /* needed early, for smp_processor_id() */
if ( cpu_has_efer )
rdmsrl(MSR_EFER, this_cpu(efer));
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
@@ -828,10 +828,15 @@ static int __devinit do_boot_cpu(int api
*/
{
unsigned long boot_error;
+ unsigned int i;
int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
struct vcpu *v;
+ struct desc_struct *gdt;
+#ifdef __x86_64__
+ struct page_info *page;
+#endif
/*
* Save current MTRR state in case it was changed since early boot
@@ -857,6 +862,37 @@ static int __devinit do_boot_cpu(int api
/* Debug build: detect stack overflow by setting up a guard page. */
memguard_guard_stack(stack_start.esp);
+ gdt = per_cpu(gdt_table, cpu);
+ if (gdt == boot_cpu_gdt_table) {
+ i = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+#ifdef __x86_64__
+#ifdef CONFIG_COMPAT
+ page = alloc_domheap_pages(NULL, i,
+ MEMF_node(cpu_to_node(cpu)));
+ per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
+ memcpy(gdt, boot_cpu_compat_gdt_table,
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+#endif
+ page = alloc_domheap_pages(NULL, i,
+ MEMF_node(cpu_to_node(cpu)));
+ per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
+#else
+ per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(i);
+#endif
+ memcpy(gdt, boot_cpu_gdt_table,
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+ BUILD_BUG_ON(NR_CPUS > 0x10000);
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+ }
+
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ v->domain->arch.mm_perdomain_pt
+ [(v->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE + i]
+ = l1e_from_page(virt_to_page(gdt) + i,
+ __PAGE_HYPERVISOR);
+
/*
* This grunge runs the startup process for
* the targeted processor.
Index: xen-3.3.1-testing/xen/arch/x86/traps.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/traps.c
+++ xen-3.3.1-testing/xen/arch/x86/traps.c
@@ -2978,13 +2978,13 @@ void set_intr_gate(unsigned int n, void
void set_tss_desc(unsigned int n, void *addr)
{
_set_tssldt_desc(
- gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ per_cpu(gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
(unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
9);
#ifdef CONFIG_COMPAT
_set_tssldt_desc(
- compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ per_cpu(compat_gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
(unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
11);
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/mm.c
@@ -132,6 +132,30 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
+unsigned long clone_idle_pagetable(struct vcpu *v)
+{
+ unsigned int i;
+ struct domain *d = v->domain;
+ l3_pgentry_t *l3_table = v->arch.pae_l3_cache.table[0];
+ l2_pgentry_t *l2_table = alloc_xenheap_page();
+
+ if ( !l2_table )
+ return 0;
+
+ memcpy(l3_table, idle_pg_table, L3_PAGETABLE_ENTRIES * sizeof(*l3_table));
+ l3_table[l3_table_offset(PERDOMAIN_VIRT_START)] =
+ l3e_from_page(virt_to_page(l2_table), _PAGE_PRESENT);
+
+ copy_page(l2_table, idle_pg_table_l2 +
+ l3_table_offset(PERDOMAIN_VIRT_START) * L2_PAGETABLE_ENTRIES);
+ for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
+ l2_table[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
+
+ return __pa(l3_table);
+}
+
void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
{
int i;
@@ -186,7 +210,7 @@ void __init subarch_init_memory(void)
{
/* Guest kernel runs in ring 0, not ring 1. */
struct desc_struct *d;
- d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
+ d = &boot_cpu_gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
d[0].b &= ~_SEGMENT_DPL;
d[1].b &= ~_SEGMENT_DPL;
}
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/supervisor_mode_kernel.S
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/supervisor_mode_kernel.S
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/supervisor_mode_kernel.S
@@ -100,15 +100,10 @@ ENTRY(fixup_ring0_guest_stack)
# %gs:%esi now points to the guest stack before the
# interrupt/exception occured.
- /*
- * Reverse the __TSS macro, giving us the CPU number.
- * The TSS for this cpu is at init_tss + ( cpu * 128 ).
- */
- str %ecx
- shrl $3,%ecx # Calculate GDT index for TSS.
- subl $(FIRST_RESERVED_GDT_ENTRY+8),%ecx # %ecx = 2*cpu.
- shll $6,%ecx # Each TSS entry is 0x80 bytes
- addl $init_tss,%ecx # but we have 2*cpu from above.
+ movl $PER_CPU_GDT_ENTRY*8,%ecx
+ lsll %ecx,%ecx
+ shll $7,%ecx # Each TSS entry is 0x80 bytes
+ addl $init_tss,%ecx
# Load Xen stack from TSS.
movw TSS_ss0(%ecx),%ax
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/traps.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/traps.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/traps.c
@@ -194,13 +194,15 @@ static unsigned char doublefault_stack[D
asmlinkage void do_double_fault(void)
{
- struct tss_struct *tss = &doublefault_tss;
- unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
+ struct tss_struct *tss;
+ unsigned int cpu;
watchdog_disable();
console_force_unlock();
+ asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
+
/* Find information saved during fault and dump it to the console. */
tss = &init_tss[cpu];
printk("*** DOUBLE FAULT ***\n");
@@ -325,7 +327,7 @@ void __devinit subarch_percpu_traps_init
tss->eflags = 2;
tss->bitmap = IOBMP_INVALID_OFFSET;
_set_tssldt_desc(
- gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+ boot_cpu_gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
(unsigned long)tss, 235, 9);
set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/mm.c
@@ -21,6 +21,7 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/numa.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -206,6 +207,24 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
+unsigned long clone_idle_pagetable(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct page_info *page = alloc_domheap_page(NULL,
+ MEMF_node(vcpu_to_node(v)));
+ l4_pgentry_t *l4_table = page_to_virt(page);
+
+ if ( !page )
+ return 0;
+
+ copy_page(l4_table, idle_pg_table);
+ l4_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
+ __PAGE_HYPERVISOR);
+
+ return __pa(l4_table);
+}
+
void __init zap_low_mappings(void)
{
BUG_ON(num_online_cpus() != 1);
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/traps.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/traps.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/traps.c
@@ -213,15 +213,14 @@ void show_page_walk(unsigned long addr)
asmlinkage void double_fault(void);
asmlinkage void do_double_fault(struct cpu_user_regs *regs)
{
- unsigned int cpu, tr;
-
- asm volatile ( "str %0" : "=r" (tr) );
- cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
+ unsigned int cpu;
watchdog_disable();
console_force_unlock();
+ asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
+
/* Find information saved during fault and dump it to the console. */
printk("*** DOUBLE FAULT ***\n");
print_xen_info();
Index: xen-3.3.1-testing/xen/include/asm-x86/desc.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/desc.h
+++ xen-3.3.1-testing/xen/include/asm-x86/desc.h
@@ -34,11 +34,9 @@
#define FLAT_COMPAT_USER_CS FLAT_COMPAT_RING3_CS
#define FLAT_COMPAT_USER_SS FLAT_COMPAT_RING3_SS
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
-
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 2)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 2)
#elif defined(__i386__)
@@ -51,17 +49,15 @@
#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 1)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 1)
#endif
#ifndef __ASSEMBLY__
-#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
+#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (TSS_ENTRY<<3) )
#if defined(__x86_64__)
#define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3)
@@ -205,11 +201,19 @@ do {
#endif
-extern struct desc_struct gdt_table[];
+struct desc_ptr {
+ unsigned short limit;
+ unsigned long base;
+} __attribute__((__packed__)) ;
+
+extern struct desc_struct boot_cpu_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, gdt_table);
#ifdef CONFIG_COMPAT
-extern struct desc_struct compat_gdt_table[];
+extern struct desc_struct boot_cpu_compat_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, compat_gdt_table);
#else
-# define compat_gdt_table gdt_table
+# define boot_cpu_compat_gdt_table boot_cpu_gdt_table
+# define per_cpu__compat_gdt_table per_cpu__gdt_table
#endif
extern void set_intr_gate(unsigned int irq, void * addr);
Index: xen-3.3.1-testing/xen/include/asm-x86/ldt.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/ldt.h
+++ xen-3.3.1-testing/xen/include/asm-x86/ldt.h
@@ -6,7 +6,6 @@
static inline void load_LDT(struct vcpu *v)
{
- unsigned int cpu;
struct desc_struct *desc;
unsigned long ents;
@@ -16,11 +15,11 @@ static inline void load_LDT(struct vcpu
}
else
{
- cpu = smp_processor_id();
- desc = (!is_pv_32on64_vcpu(v) ? gdt_table : compat_gdt_table)
- + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+ desc = (!is_pv_32on64_vcpu(v)
+ ? this_cpu(gdt_table) : this_cpu(compat_gdt_table))
+ + LDT_ENTRY - FIRST_RESERVED_GDT_ENTRY;
_set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
- __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (LDT_ENTRY << 3) );
}
}
Index: xen-3.3.1-testing/xen/include/asm-x86/page.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/page.h
+++ xen-3.3.1-testing/xen/include/asm-x86/page.h
@@ -278,6 +278,7 @@ extern unsigned int m2p_compat_vstart;
#endif
void paging_init(void);
void setup_idle_pagetable(void);
+unsigned long clone_idle_pagetable(struct vcpu *);
#endif /* !defined(__ASSEMBLY__) */
#define _PAGE_PRESENT 0x001U

View File

@ -1,128 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222088424 -3600
# Node ID 6d3b932cbecac19bf71d7be51e4f4489089ed753
# Parent 7f1a36b834e183904f069948d3037d50492d98d2
i386: make double fault TSS per-CPU
As a follow-up to the per-CPU-GDT patch, this also makes the double
fault TSS (and the associated stack) per-CPU.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-3.3.1-testing/xen/arch/x86/boot/x86_32.S
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/boot/x86_32.S
+++ xen-3.3.1-testing/xen/arch/x86/boot/x86_32.S
@@ -95,7 +95,7 @@ ENTRY(idle_pg_table)
.long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff, \
((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
ENTRY(boot_cpu_gdt_table)
- .quad 0x0000000000000000 /* unused */
+ .quad 0x0000000000000000 /* double fault TSS */
.quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
GUEST_DESC(0x00c0ba00) /* 0xe019 ring 1 3.xxGB code at 0x0 */
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
@@ -893,6 +893,13 @@ static int __devinit do_boot_cpu(int api
= l1e_from_page(virt_to_page(gdt) + i,
__PAGE_HYPERVISOR);
+#ifdef __i386__
+ if (!per_cpu(doublefault_tss, cpu)) {
+ per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
+ memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE);
+ }
+#endif
+
/*
* This grunge runs the startup process for
* the targeted processor.
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/traps.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/traps.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/traps.c
@@ -188,9 +188,9 @@ void show_page_walk(unsigned long addr)
unmap_domain_page(l1t);
}
-#define DOUBLEFAULT_STACK_SIZE 2048
-static struct tss_struct doublefault_tss;
-static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+DEFINE_PER_CPU(struct tss_struct *, doublefault_tss);
+static unsigned char __attribute__ ((__section__ (".bss.page_aligned")))
+ boot_cpu_doublefault_space[PAGE_SIZE];
asmlinkage void do_double_fault(void)
{
@@ -303,34 +303,36 @@ static void set_task_gate(unsigned int n
void __devinit subarch_percpu_traps_init(void)
{
- struct tss_struct *tss = &doublefault_tss;
+ struct tss_struct *tss = this_cpu(doublefault_tss);
asmlinkage int hypercall(void);
- if ( smp_processor_id() != 0 )
- return;
+ if ( !tss )
+ {
+ /* The hypercall entry vector is only accessible from ring 1. */
+ _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
- /* The hypercall entry vector is only accessible from ring 1. */
- _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
+ tss = (void *)boot_cpu_doublefault_space;
+ this_cpu(doublefault_tss) = tss;
+ }
/*
* Make a separate task for double faults. This will get us debug output if
* we blow the kernel stack.
*/
- memset(tss, 0, sizeof(*tss));
tss->ds = __HYPERVISOR_DS;
tss->es = __HYPERVISOR_DS;
tss->ss = __HYPERVISOR_DS;
- tss->esp = (unsigned long)&doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+ tss->esp = (unsigned long)tss + PAGE_SIZE;
tss->__cr3 = __pa(idle_pg_table);
tss->cs = __HYPERVISOR_CS;
tss->eip = (unsigned long)do_double_fault;
tss->eflags = 2;
tss->bitmap = IOBMP_INVALID_OFFSET;
_set_tssldt_desc(
- boot_cpu_gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+ this_cpu(gdt_table) + DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
(unsigned long)tss, 235, 9);
- set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
+ set_task_gate(TRAP_double_fault, DOUBLEFAULT_TSS_ENTRY << 3);
}
void init_int80_direct_trap(struct vcpu *v)
Index: xen-3.3.1-testing/xen/include/asm-x86/desc.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/desc.h
+++ xen-3.3.1-testing/xen/include/asm-x86/desc.h
@@ -47,7 +47,7 @@
#define FLAT_COMPAT_USER_DS FLAT_USER_DS
#define FLAT_COMPAT_USER_SS FLAT_USER_SS
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+#define DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
#define LDT_ENTRY (TSS_ENTRY + 1)
@@ -199,6 +199,8 @@ do {
(((u32)(addr) & 0x00FF0000U) >> 16); \
} while (0)
+DECLARE_PER_CPU(struct tss_struct *, doublefault_tss);
+
#endif
struct desc_ptr {

View File

@ -1,201 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222090651 -3600
# Node ID c0db74e416626f34cf91b0eefe659bcfe8b43a35
# Parent ae24b533dc9d0d5ce05b34a1ef72917589b4e63d
Fix misc issues related to allowing support of more CPUs
This mainly means removing stack variables that (should) depend on
NR_CPUS (other than cpumask_t ones) and adjusting certain array sizes.
There's at least one open tools issue: The 'xm vcpu-pin' path assumes
a maximum of 64 CPU-s in many places.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-3.3.1-testing/xen/arch/x86/nmi.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/nmi.c
+++ xen-3.3.1-testing/xen/arch/x86/nmi.c
@@ -96,7 +96,7 @@ int nmi_active;
int __init check_nmi_watchdog (void)
{
- unsigned int prev_nmi_count[NR_CPUS];
+ static unsigned int __initdata prev_nmi_count[NR_CPUS];
int cpu;
if ( !nmi_watchdog )
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
@@ -1119,7 +1119,7 @@ static void __init smp_boot_cpus(unsigne
Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
kicked = 1;
- for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
+ for (bit = 0; kicked < NR_CPUS && bit < NR_CPUS; bit++) {
apicid = cpu_present_to_apicid(bit);
/*
* Don't even attempt to start the boot CPU!
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/domain_page.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/domain_page.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/domain_page.c
@@ -201,6 +201,9 @@ void *map_domain_page_global(unsigned lo
ASSERT(!in_irq() && local_irq_is_enabled());
+ /* At least half the ioremap space should be available to us. */
+ BUILD_BUG_ON(IOREMAP_VIRT_START + (IOREMAP_MBYTES << 19) >= FIXADDR_START);
+
spin_lock(&globalmap_lock);
idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
Index: xen-3.3.1-testing/xen/common/domctl.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/domctl.c
+++ xen-3.3.1-testing/xen/common/domctl.c
@@ -145,16 +145,23 @@ static unsigned int default_vcpu0_locati
{
struct domain *d;
struct vcpu *v;
- unsigned int i, cpu, cnt[NR_CPUS] = { 0 };
+ unsigned int i, cpu, nr_cpus, *cnt;
cpumask_t cpu_exclude_map;
/* Do an initial CPU placement. Pick the least-populated CPU. */
- rcu_read_lock(&domlist_read_lock);
- for_each_domain ( d )
- for_each_vcpu ( d, v )
- if ( !test_bit(_VPF_down, &v->pause_flags) )
- cnt[v->processor]++;
- rcu_read_unlock(&domlist_read_lock);
+ nr_cpus = last_cpu(cpu_possible_map) + 1;
+ cnt = xmalloc_array(unsigned int, nr_cpus);
+ if ( cnt )
+ {
+ memset(cnt, 0, nr_cpus * sizeof(*cnt));
+
+ rcu_read_lock(&domlist_read_lock);
+ for_each_domain ( d )
+ for_each_vcpu ( d, v )
+ if ( !test_bit(_VPF_down, &v->pause_flags) )
+ cnt[v->processor]++;
+ rcu_read_unlock(&domlist_read_lock);
+ }
/*
* If we're on a HT system, we only auto-allocate to a non-primary HT. We
@@ -172,10 +179,12 @@ static unsigned int default_vcpu0_locati
(cpus_weight(cpu_sibling_map[i]) > 1) )
continue;
cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
- if ( cnt[i] <= cnt[cpu] )
+ if ( !cnt || cnt[i] <= cnt[cpu] )
cpu = i;
}
+ xfree(cnt);
+
return cpu;
}
Index: xen-3.3.1-testing/xen/common/sched_credit.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/sched_credit.c
+++ xen-3.3.1-testing/xen/common/sched_credit.c
@@ -1258,14 +1258,15 @@ csched_dump_pcpu(int cpu)
struct csched_pcpu *spc;
struct csched_vcpu *svc;
int loop;
+ char cpustr[100];
spc = CSCHED_PCPU(cpu);
runq = &spc->runq;
- printk(" sort=%d, sibling=0x%lx, core=0x%lx\n",
- spc->runq_sort_last,
- cpu_sibling_map[cpu].bits[0],
- cpu_core_map[cpu].bits[0]);
+ cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_sibling_map[cpu]);
+ printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
+ cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_core_map[cpu]);
+ printk("core=%s\n", cpustr);
/* current VCPU */
svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
@@ -1292,6 +1293,7 @@ csched_dump(void)
{
struct list_head *iter_sdom, *iter_svc;
int loop;
+ char idlers_buf[100];
printk("info:\n"
"\tncpus = %u\n"
@@ -1317,7 +1319,8 @@ csched_dump(void)
CSCHED_TICKS_PER_TSLICE,
CSCHED_TICKS_PER_ACCT);
- printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+ cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
+ printk("idlers: %s\n", idlers_buf);
CSCHED_STATS_PRINTK();
Index: xen-3.3.1-testing/xen/common/sched_sedf.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/sched_sedf.c
+++ xen-3.3.1-testing/xen/common/sched_sedf.c
@@ -1298,8 +1298,18 @@ static int sedf_adjust_weights(struct xe
{
struct vcpu *p;
struct domain *d;
- int sumw[NR_CPUS] = { 0 };
- s_time_t sumt[NR_CPUS] = { 0 };
+ unsigned int nr_cpus = last_cpu(cpu_possible_map) + 1;
+ int *sumw = xmalloc_array(int, nr_cpus);
+ s_time_t *sumt = xmalloc_array(s_time_t, nr_cpus);
+
+ if ( !sumw || !sumt )
+ {
+ xfree(sumt);
+ xfree(sumw);
+ return -ENOMEM;
+ }
+ memset(sumw, 0, nr_cpus * sizeof(*sumw));
+ memset(sumt, 0, nr_cpus * sizeof(*sumt));
/* Sum across all weights. */
rcu_read_lock(&domlist_read_lock);
@@ -1348,6 +1358,9 @@ static int sedf_adjust_weights(struct xe
}
rcu_read_unlock(&domlist_read_lock);
+ xfree(sumt);
+ xfree(sumw);
+
return 0;
}
@@ -1356,6 +1369,7 @@ static int sedf_adjust_weights(struct xe
static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
{
struct vcpu *v;
+ int rc;
PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
@@ -1411,8 +1425,9 @@ static int sedf_adjust(struct domain *p,
}
}
- if ( sedf_adjust_weights(op) )
- return -EINVAL;
+ rc = sedf_adjust_weights(op);
+ if ( rc )
+ return rc;
for_each_vcpu ( p, v )
{

View File

@ -1,113 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222095059 -3600
# Node ID 81483e49c74c314ae3ed098c1373dfc3f2d3f31e
# Parent ae29cd95ba7d7f5cdcbb32509575b83e9fb3d43c
Add debug key 'e' for event channel information
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -25,6 +25,7 @@
#include <xen/iocap.h>
#include <xen/compat.h>
#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
#include <asm/current.h>
#include <public/xen.h>
@@ -1046,6 +1047,77 @@ void evtchn_destroy(struct domain *d)
spin_unlock(&d->evtchn_lock);
}
+static void domain_dump_evtchn_info(struct domain *d)
+{
+ unsigned int port;
+
+ printk("Domain %d polling vCPUs: %08lx\n", d->domain_id, d->poll_mask[0]);
+
+ if ( !spin_trylock(&d->evtchn_lock) )
+ return;
+
+ printk("Event channel information for domain %d:\n",
+ d->domain_id);
+
+ for ( port = 1; port < MAX_EVTCHNS(d); ++port )
+ {
+ const struct evtchn *chn;
+
+ if ( !port_is_valid(d, port) )
+ continue;
+ chn = evtchn_from_port(d, port);
+ if ( chn->state == ECS_FREE )
+ continue;
+
+ printk(" %4u[%d/%d]: s=%d n=%d",
+ port,
+ test_bit(port, &shared_info(d, evtchn_pending)),
+ test_bit(port, &shared_info(d, evtchn_mask)),
+ chn->state, chn->notify_vcpu_id);
+ switch ( chn->state )
+ {
+ case ECS_UNBOUND:
+ printk(" d=%d", chn->u.unbound.remote_domid);
+ break;
+ case ECS_INTERDOMAIN:
+ printk(" d=%d p=%d",
+ chn->u.interdomain.remote_dom->domain_id,
+ chn->u.interdomain.remote_port);
+ break;
+ case ECS_PIRQ:
+ printk(" p=%d", chn->u.pirq);
+ break;
+ case ECS_VIRQ:
+ printk(" v=%d", chn->u.virq);
+ break;
+ }
+ printk(" x=%d\n", chn->consumer_is_xen);
+ }
+
+ spin_unlock(&d->evtchn_lock);
+}
+
+static void dump_evtchn_info(unsigned char key)
+{
+ struct domain *d;
+
+ printk("'%c' pressed -> dumping event-channel info\n", key);
+
+ rcu_read_lock(&domlist_read_lock);
+
+ for_each_domain ( d )
+ domain_dump_evtchn_info(d);
+
+ rcu_read_unlock(&domlist_read_lock);
+}
+
+static int __init dump_evtchn_info_key_init(void)
+{
+ register_keyhandler('e', dump_evtchn_info, "dump evtchn info");
+ return 0;
+}
+__initcall(dump_evtchn_info_key_init);
+
/*
* Local variables:
* mode: C
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -204,11 +204,11 @@ static void dump_domains(unsigned char k
printk("VCPU information and callbacks for domain %u:\n",
d->domain_id);
for_each_vcpu ( d, v ) {
- printk(" VCPU%d: CPU%d [has=%c] flags=%lx "
+ printk(" VCPU%d: CPU%d [has=%c] flags=%lx poll=%d "
"upcall_pend = %02x, upcall_mask = %02x ",
v->vcpu_id, v->processor,
v->is_running ? 'T':'F',
- v->pause_flags,
+ v->pause_flags, v->poll_evtchn,
vcpu_info(v, evtchn_upcall_pending),
vcpu_info(v, evtchn_upcall_mask));
cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask);

View File

@ -1,654 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222256215 -3600
# Node ID 31f09a5e24cf8eb8a9d73acc6c23262fe9d463d7
# Parent 7750906b06b3ebbba529e6d1042d7a2a2712623c
x86: Properly synchronise updates to pirq-to-vector mapping.
Per-domain irq mappings are now protected by d->evtchn_lock and by the
per-vector irq_desc lock.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/ia64/xen/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/ia64/xen/irq.c
+++ xen-3.3.1-testing/xen/arch/ia64/xen/irq.c
@@ -459,20 +459,24 @@ int pirq_guest_bind(struct vcpu *v, int
return rc;
}
-void pirq_guest_unbind(struct domain *d, int irq)
+int pirq_guest_unbind(struct domain *d, int irq)
{
irq_desc_t *desc = &irq_desc[irq];
irq_guest_action_t *action;
unsigned long flags;
- int i;
+ int i, rc = 0;
spin_lock_irqsave(&desc->lock, flags);
action = (irq_guest_action_t *)desc->action;
- i = 0;
- while ( action->guest[i] && (action->guest[i] != d) )
- i++;
+ for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
+ continue;
+ if ( i == action->nr_guests )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
action->nr_guests--;
@@ -492,7 +496,9 @@ void pirq_guest_unbind(struct domain *d,
desc->handler->shutdown(irq);
}
+ out:
spin_unlock_irqrestore(&desc->lock, flags);
+ return rc;
}
void
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -414,8 +414,6 @@ int arch_domain_create(struct domain *d,
goto fail;
}
- spin_lock_init(&d->arch.irq_lock);
-
if ( is_hvm_domain(d) )
{
if ( (rc = hvm_domain_initialise(d)) != 0 )
Index: xen-3.3.1-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/io_apic.c
+++ xen-3.3.1-testing/xen/arch/x86/io_apic.c
@@ -48,22 +48,6 @@ atomic_t irq_mis_count;
int msi_enable = 0;
boolean_param("msi", msi_enable);
-int domain_irq_to_vector(struct domain *d, int irq)
-{
- if ( !msi_enable )
- return irq_to_vector(irq);
- else
- return d->arch.pirq_vector[irq];
-}
-
-int domain_vector_to_irq(struct domain *d, int vector)
-{
- if ( !msi_enable )
- return vector_to_irq(vector);
- else
- return d->arch.vector_pirq[vector];
-}
-
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -730,7 +714,6 @@ next:
static struct hw_interrupt_type ioapic_level_type;
static struct hw_interrupt_type ioapic_edge_type;
-struct hw_interrupt_type pci_msi_type;
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -277,6 +277,35 @@ static void __do_IRQ_guest(int vector)
}
}
+/*
+ * Retrieve Xen irq-descriptor corresponding to a domain-specific irq.
+ * The descriptor is returned locked. This function is safe against changes
+ * to the per-domain irq-to-vector mapping.
+ */
+static irq_desc_t *domain_spin_lock_irq_desc(
+ struct domain *d, int irq, unsigned long *pflags)
+{
+ unsigned int vector;
+ unsigned long flags;
+ irq_desc_t *desc;
+
+ for ( ; ; )
+ {
+ vector = domain_irq_to_vector(d, irq);
+ if ( vector <= 0 )
+ return NULL;
+ desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( vector == domain_irq_to_vector(d, irq) )
+ break;
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ if ( pflags != NULL )
+ *pflags = flags;
+ return desc;
+}
+
/* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
static void flush_ready_eoi(void *unused)
{
@@ -342,11 +371,13 @@ static void __pirq_guest_eoi(struct doma
cpumask_t cpu_eoi_map;
int vector;
- vector = domain_irq_to_vector(d, irq);
- desc = &irq_desc[vector];
- action = (irq_guest_action_t *)desc->action;
+ ASSERT(local_irq_is_enabled());
+ desc = domain_spin_lock_irq_desc(d, irq, NULL);
+ if ( desc == NULL )
+ return;
- spin_lock_irq(&desc->lock);
+ action = (irq_guest_action_t *)desc->action;
+ vector = desc - irq_desc;
ASSERT(!test_bit(irq, d->pirq_mask) ||
(action->ack_type != ACKTYPE_NONE));
@@ -418,7 +449,7 @@ int pirq_acktype(struct domain *d, int i
unsigned int vector;
vector = domain_irq_to_vector(d, irq);
- if ( vector == 0 )
+ if ( vector <= 0 )
return ACKTYPE_NONE;
desc = &irq_desc[vector];
@@ -447,13 +478,6 @@ int pirq_acktype(struct domain *d, int i
if ( !strcmp(desc->handler->typename, "XT-PIC") )
return ACKTYPE_UNMASK;
- if ( strstr(desc->handler->typename, "MPIC") )
- {
- if ( desc->status & IRQ_LEVEL )
- return (desc->status & IRQ_PER_CPU) ? ACKTYPE_EOI : ACKTYPE_UNMASK;
- return ACKTYPE_NONE; /* edge-triggered => no final EOI */
- }
-
printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq);
BUG();
@@ -462,21 +486,18 @@ int pirq_acktype(struct domain *d, int i
int pirq_shared(struct domain *d, int irq)
{
- unsigned int vector;
irq_desc_t *desc;
irq_guest_action_t *action;
unsigned long flags;
int shared;
- vector = domain_irq_to_vector(d, irq);
- if ( vector == 0 )
+ desc = domain_spin_lock_irq_desc(d, irq, &flags);
+ if ( desc == NULL )
return 0;
- desc = &irq_desc[vector];
-
- spin_lock_irqsave(&desc->lock, flags);
action = (irq_guest_action_t *)desc->action;
shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1));
+
spin_unlock_irqrestore(&desc->lock, flags);
return shared;
@@ -489,20 +510,17 @@ int pirq_guest_bind(struct vcpu *v, int
irq_guest_action_t *action, *newaction = NULL;
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
+ unsigned long flags;
- retry:
- vector = domain_irq_to_vector(v->domain, irq);
- if ( vector == 0 )
- {
- rc = -EINVAL;
- goto out;
- }
-
- desc = &irq_desc[vector];
+ WARN_ON(!spin_is_locked(&v->domain->evtchn_lock));
- spin_lock_irq(&desc->lock);
+ retry:
+ desc = domain_spin_lock_irq_desc(v->domain, irq, &flags);
+ if ( desc == NULL )
+ return -EINVAL;
action = (irq_guest_action_t *)desc->action;
+ vector = desc - irq_desc;
if ( !(desc->status & IRQ_GUEST) )
{
@@ -517,7 +535,7 @@ int pirq_guest_bind(struct vcpu *v, int
if ( newaction == NULL )
{
- spin_unlock_irq(&desc->lock);
+ spin_unlock_irqrestore(&desc->lock, flags);
if ( (newaction = xmalloc(irq_guest_action_t)) != NULL )
goto retry;
gdprintk(XENLOG_INFO,
@@ -563,7 +581,7 @@ int pirq_guest_bind(struct vcpu *v, int
*/
ASSERT(action->ack_type == ACKTYPE_EOI);
ASSERT(desc->status & IRQ_DISABLED);
- spin_unlock_irq(&desc->lock);
+ spin_unlock_irqrestore(&desc->lock, flags);
cpu_relax();
goto retry;
}
@@ -579,32 +597,45 @@ int pirq_guest_bind(struct vcpu *v, int
action->guest[action->nr_guests++] = v->domain;
unlock_out:
- spin_unlock_irq(&desc->lock);
+ spin_unlock_irqrestore(&desc->lock, flags);
out:
if ( newaction != NULL )
xfree(newaction);
return rc;
}
-void pirq_guest_unbind(struct domain *d, int irq)
+int pirq_guest_unbind(struct domain *d, int irq)
{
- unsigned int vector;
+ int vector;
irq_desc_t *desc;
irq_guest_action_t *action;
cpumask_t cpu_eoi_map;
- int i;
+ int i, rc = 0;
- vector = domain_irq_to_vector(d, irq);
- desc = &irq_desc[vector];
- BUG_ON(vector == 0);
+ WARN_ON(!spin_is_locked(&d->evtchn_lock));
- spin_lock_irq(&desc->lock);
+ desc = domain_spin_lock_irq_desc(d, irq, &flags);
+ if ( unlikely(desc == NULL) )
+ {
+ if ( !msi_enable || (vector = -domain_irq_to_vector(d, irq)) == 0 )
+ return -EINVAL;
+ BUG_ON(vector <= 0);
+ desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
+ d->arch.pirq_vector[irq] = d->arch.vector_pirq[vector] = 0;
+ goto out;
+ }
action = (irq_guest_action_t *)desc->action;
+ vector = desc - irq_desc;
- i = 0;
- while ( action->guest[i] && (action->guest[i] != d) )
- i++;
+ for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
+ continue;
+ if ( i == action->nr_guests )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
action->nr_guests--;
@@ -622,7 +653,7 @@ void pirq_guest_unbind(struct domain *d,
(action->nr_guests != 0) )
{
cpu_eoi_map = action->cpu_eoi_map;
- spin_unlock_irq(&desc->lock);
+ spin_unlock_irqrestore(&desc->lock, flags);
on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0);
spin_lock_irq(&desc->lock);
}
@@ -659,7 +690,7 @@ void pirq_guest_unbind(struct domain *d,
if ( !cpus_empty(cpu_eoi_map) )
{
BUG_ON(action->ack_type != ACKTYPE_EOI);
- spin_unlock_irq(&desc->lock);
+ spin_unlock_irqrestore(&desc->lock, flags);
on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 1);
spin_lock_irq(&desc->lock);
}
@@ -673,9 +704,8 @@ void pirq_guest_unbind(struct domain *d,
desc->handler->shutdown(vector);
out:
- spin_unlock_irq(&desc->lock);
- if ( action != NULL )
- xfree(action);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return rc;
}
extern void dump_ioapic_irq_info(void);
Index: xen-3.3.1-testing/xen/arch/x86/msi.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/msi.c
+++ xen-3.3.1-testing/xen/arch/x86/msi.c
@@ -728,7 +728,6 @@ void pci_disable_msi(int vector)
__pci_disable_msix(vector);
}
-extern struct hw_interrupt_type pci_msi_type;
static void msi_free_vectors(struct pci_dev* dev)
{
struct msi_desc *entry, *tmp;
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -26,17 +26,11 @@ int
ioapic_guest_write(
unsigned long physbase, unsigned int reg, u32 pval);
-
-extern struct hw_interrupt_type pci_msi_type;
-
static int get_free_pirq(struct domain *d, int type, int index)
{
int i;
- if ( d == NULL )
- return -EINVAL;
-
- ASSERT(spin_is_locked(&d->arch.irq_lock));
+ ASSERT(spin_is_locked(&d->evtchn_lock));
if ( type == MAP_PIRQ_TYPE_GSI )
{
@@ -64,11 +58,10 @@ static int map_domain_pirq(struct domain
int ret = 0;
int old_vector, old_pirq;
struct msi_info msi;
+ irq_desc_t *desc;
+ unsigned long flags;
- if ( d == NULL )
- return -EINVAL;
-
- ASSERT(spin_is_locked(&d->arch.irq_lock));
+ ASSERT(spin_is_locked(&d->evtchn_lock));
if ( !IS_PRIV(current->domain) )
return -EPERM;
@@ -88,8 +81,7 @@ static int map_domain_pirq(struct domain
{
dprintk(XENLOG_G_ERR, "dom%d: pirq %d or vector %d already mapped\n",
d->domain_id, pirq, vector);
- ret = -EINVAL;
- goto done;
+ return -EINVAL;
}
ret = irq_permit_access(d, pirq);
@@ -97,17 +89,14 @@ static int map_domain_pirq(struct domain
{
dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d\n",
d->domain_id, pirq);
- goto done;
+ return ret;
}
+ desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
+
if ( map && MAP_PIRQ_TYPE_MSI == map->type )
{
- irq_desc_t *desc;
- unsigned long flags;
-
- desc = &irq_desc[vector];
-
- spin_lock_irqsave(&desc->lock, flags);
if ( desc->handler != &no_irq_type )
dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
d->domain_id, vector);
@@ -120,8 +109,6 @@ static int map_domain_pirq(struct domain
msi.vector = vector;
ret = pci_enable_msi(&msi);
-
- spin_unlock_irqrestore(&desc->lock, flags);
if ( ret )
goto done;
}
@@ -130,6 +117,7 @@ static int map_domain_pirq(struct domain
d->arch.vector_pirq[vector] = pirq;
done:
+ spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
@@ -145,11 +133,11 @@ static int unmap_domain_pirq(struct doma
if ( !IS_PRIV(current->domain) )
return -EINVAL;
- ASSERT(spin_is_locked(&d->arch.irq_lock));
+ ASSERT(spin_is_locked(&d->evtchn_lock));
vector = d->arch.pirq_vector[pirq];
- if ( !vector )
+ if ( vector <= 0 )
{
dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
d->domain_id, pirq);
@@ -159,21 +147,35 @@ static int unmap_domain_pirq(struct doma
{
unsigned long flags;
irq_desc_t *desc;
+ bool_t forced_unbind = (pirq_guest_unbind(d, pirq) == 0);
+
+ if ( forced_unbind )
+ dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
+ d->domain_id, pirq);
desc = &irq_desc[vector];
spin_lock_irqsave(&desc->lock, flags);
+
+ BUG_ON(vector != d->arch.pirq_vector[pirq]);
+
if ( desc->msi_desc )
pci_disable_msi(vector);
if ( desc->handler == &pci_msi_type )
+ desc->handler = &no_irq_type;
+
+ if ( !forced_unbind )
{
- /* MSI is not shared, so should be released already */
- BUG_ON(desc->status & IRQ_GUEST);
- irq_desc[vector].handler = &no_irq_type;
+ d->arch.pirq_vector[pirq] = 0;
+ d->arch.vector_pirq[vector] = 0;
+ }
+ else
+ {
+ d->arch.pirq_vector[pirq] = -vector;
+ d->arch.vector_pirq[vector] = -pirq;
}
- spin_unlock_irqrestore(&desc->lock, flags);
- d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
ret = irq_deny_access(d, pirq);
@@ -189,7 +191,6 @@ static int physdev_map_pirq(struct physd
{
struct domain *d;
int vector, pirq, ret = 0;
- unsigned long flags;
/* if msi_enable is not enabled, map always succeeds */
if ( !msi_enable )
@@ -250,8 +251,8 @@ static int physdev_map_pirq(struct physd
goto free_domain;
}
- spin_lock_irqsave(&d->arch.irq_lock, flags);
- if ( map->pirq == -1 )
+ spin_lock(&d->evtchn_lock);
+ if ( map->pirq < 0 )
{
if ( d->arch.vector_pirq[vector] )
{
@@ -259,6 +260,11 @@ static int physdev_map_pirq(struct physd
d->domain_id, map->index, map->pirq,
d->arch.vector_pirq[vector]);
pirq = d->arch.vector_pirq[vector];
+ if ( pirq < 0 )
+ {
+ ret = -EBUSY;
+ goto done;
+ }
}
else
{
@@ -291,7 +297,7 @@ static int physdev_map_pirq(struct physd
if ( !ret )
map->pirq = pirq;
done:
- spin_unlock_irqrestore(&d->arch.irq_lock, flags);
+ spin_unlock(&d->evtchn_lock);
free_domain:
rcu_unlock_domain(d);
return ret;
@@ -300,7 +306,6 @@ free_domain:
static int physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
{
struct domain *d;
- unsigned long flags;
int ret;
if ( !msi_enable )
@@ -323,9 +328,10 @@ static int physdev_unmap_pirq(struct phy
return -ESRCH;
}
- spin_lock_irqsave(&d->arch.irq_lock, flags);
+ spin_lock(&d->evtchn_lock);
ret = unmap_domain_pirq(d, unmap->pirq);
- spin_unlock_irqrestore(&d->arch.irq_lock, flags);
+ spin_unlock(&d->evtchn_lock);
+
rcu_unlock_domain(d);
return ret;
@@ -431,7 +437,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
case PHYSDEVOP_alloc_irq_vector: {
struct physdev_irq irq_op;
- unsigned long flags;
ret = -EFAULT;
if ( copy_from_guest(&irq_op, arg, 1) != 0 )
@@ -456,9 +461,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( msi_enable )
{
- spin_lock_irqsave(&dom0->arch.irq_lock, flags);
+ spin_lock(&dom0->evtchn_lock);
ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
- spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
+ spin_unlock(&dom0->evtchn_lock);
}
if ( copy_to_guest(arg, &irq_op, 1) != 0 )
Index: xen-3.3.1-testing/xen/common/event_channel.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/event_channel.c
+++ xen-3.3.1-testing/xen/common/event_channel.c
@@ -376,7 +376,8 @@ static long __evtchn_close(struct domain
break;
case ECS_PIRQ:
- pirq_guest_unbind(d1, chn1->u.pirq);
+ if ( pirq_guest_unbind(d1, chn1->u.pirq) != 0 )
+ BUG();
d1->pirq_to_evtchn[chn1->u.pirq] = 0;
break;
Index: xen-3.3.1-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/domain.h
+++ xen-3.3.1-testing/xen/include/asm-x86/domain.h
@@ -235,7 +235,7 @@ struct arch_domain
/* Shadow translated domain: P2M mapping */
pagetable_t phys_table;
- spinlock_t irq_lock;
+ /* NB. protected by d->evtchn_lock and by irq_desc[vector].lock */
int vector_pirq[NR_VECTORS];
int pirq_vector[NR_PIRQS];
Index: xen-3.3.1-testing/xen/include/asm-x86/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/irq.h
+++ xen-3.3.1-testing/xen/include/asm-x86/irq.h
@@ -52,6 +52,7 @@ extern atomic_t irq_mis_count;
int pirq_acktype(struct domain *d, int irq);
int pirq_shared(struct domain *d , int irq);
-extern int domain_irq_to_vector(struct domain *d, int irq);
-extern int domain_vector_to_irq(struct domain *d, int vector);
+#define domain_irq_to_vector(d, irq) (msi_enable ? (d)->arch.pirq_vector[irq] : irq_to_vector(irq))
+#define domain_vector_to_irq(d, vec) (msi_enable ? (d)->arch.vector_pirq[vec] : vector_to_irq(vec))
+
#endif /* _ASM_HW_IRQ_H */
Index: xen-3.3.1-testing/xen/include/asm-x86/msi.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/msi.h
+++ xen-3.3.1-testing/xen/include/asm-x86/msi.h
@@ -107,7 +107,7 @@ struct msi_desc {
*/
#define NR_HP_RESERVED_VECTORS 20
-extern int vector_irq[NR_VECTORS];
+extern struct hw_interrupt_type pci_msi_type;
/*
* MSI-X Address Register
Index: xen-3.3.1-testing/xen/include/xen/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/irq.h
+++ xen-3.3.1-testing/xen/include/xen/irq.h
@@ -22,7 +22,6 @@ struct irqaction
#define IRQ_PENDING 4 /* IRQ pending - replay on enable */
#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
#define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */
-#define IRQ_LEVEL 64 /* IRQ level triggered */
#define IRQ_PER_CPU 256 /* IRQ is per CPU */
/*
@@ -78,7 +77,7 @@ struct vcpu;
extern int pirq_guest_eoi(struct domain *d, int irq);
extern int pirq_guest_unmask(struct domain *d);
extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
-extern void pirq_guest_unbind(struct domain *d, int irq);
+extern int pirq_guest_unbind(struct domain *d, int irq);
static inline void set_native_irq_info(int irq, cpumask_t mask)
{

View File

@ -1,283 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1222349872 -3600
# Node ID e1507b441be45d6d1cac25a196b53beff857a083
# Parent ddf62f69611127319e3c756b9fbc82e29f59ef36
x86: Clean up and fix 18539:31f09a5e24cf8
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/ia64/xen/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/ia64/xen/irq.c
+++ xen-3.3.1-testing/xen/arch/ia64/xen/irq.c
@@ -459,12 +459,12 @@ int pirq_guest_bind(struct vcpu *v, int
return rc;
}
-int pirq_guest_unbind(struct domain *d, int irq)
+void pirq_guest_unbind(struct domain *d, int irq)
{
irq_desc_t *desc = &irq_desc[irq];
irq_guest_action_t *action;
unsigned long flags;
- int i, rc = 0;
+ int i;
spin_lock_irqsave(&desc->lock, flags);
@@ -472,11 +472,7 @@ int pirq_guest_unbind(struct domain *d,
for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
continue;
- if ( i == action->nr_guests )
- {
- rc = -EINVAL;
- goto out;
- }
+ BUG_ON(i == action->nr_guests);
memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
action->nr_guests--;
@@ -496,9 +492,7 @@ int pirq_guest_unbind(struct domain *d,
desc->handler->shutdown(irq);
}
- out:
spin_unlock_irqrestore(&desc->lock, flags);
- return rc;
}
void
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -510,12 +510,12 @@ int pirq_guest_bind(struct vcpu *v, int
irq_guest_action_t *action, *newaction = NULL;
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
- unsigned long flags;
WARN_ON(!spin_is_locked(&v->domain->evtchn_lock));
+ BUG_ON(!local_irq_is_enabled());
retry:
- desc = domain_spin_lock_irq_desc(v->domain, irq, &flags);
+ desc = domain_spin_lock_irq_desc(v->domain, irq, NULL);
if ( desc == NULL )
return -EINVAL;
@@ -535,7 +535,7 @@ int pirq_guest_bind(struct vcpu *v, int
if ( newaction == NULL )
{
- spin_unlock_irqrestore(&desc->lock, flags);
+ spin_unlock_irq(&desc->lock);
if ( (newaction = xmalloc(irq_guest_action_t)) != NULL )
goto retry;
gdprintk(XENLOG_INFO,
@@ -581,7 +581,7 @@ int pirq_guest_bind(struct vcpu *v, int
*/
ASSERT(action->ack_type == ACKTYPE_EOI);
ASSERT(desc->status & IRQ_DISABLED);
- spin_unlock_irqrestore(&desc->lock, flags);
+ spin_unlock_irq(&desc->lock);
cpu_relax();
goto retry;
}
@@ -597,45 +597,26 @@ int pirq_guest_bind(struct vcpu *v, int
action->guest[action->nr_guests++] = v->domain;
unlock_out:
- spin_unlock_irqrestore(&desc->lock, flags);
+ spin_unlock_irq(&desc->lock);
out:
- if ( newaction != NULL )
- xfree(newaction);
return rc;
}
-int pirq_guest_unbind(struct domain *d, int irq)
+static void __pirq_guest_unbind(struct domain *d, int irq, irq_desc_t *desc)
{
- int vector;
- irq_desc_t *desc;
+ unsigned int vector;
irq_guest_action_t *action;
cpumask_t cpu_eoi_map;
- int i, rc = 0;
-
- WARN_ON(!spin_is_locked(&d->evtchn_lock));
+ int i;
- desc = domain_spin_lock_irq_desc(d, irq, &flags);
- if ( unlikely(desc == NULL) )
- {
- if ( !msi_enable || (vector = -domain_irq_to_vector(d, irq)) == 0 )
- return -EINVAL;
- BUG_ON(vector <= 0);
- desc = &irq_desc[vector];
- spin_lock_irqsave(&desc->lock, flags);
- d->arch.pirq_vector[irq] = d->arch.vector_pirq[vector] = 0;
- goto out;
- }
+ BUG_ON(!(desc->status & IRQ_GUEST));
action = (irq_guest_action_t *)desc->action;
vector = desc - irq_desc;
for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
continue;
- if ( i == action->nr_guests )
- {
- rc = -EINVAL;
- goto out;
- }
+ BUG_ON(i == action->nr_guests);
memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
action->nr_guests--;
@@ -653,7 +634,7 @@ int pirq_guest_unbind(struct domain *d,
(action->nr_guests != 0) )
{
cpu_eoi_map = action->cpu_eoi_map;
- spin_unlock_irqrestore(&desc->lock, flags);
+ spin_unlock_irq(&desc->lock);
on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0);
spin_lock_irq(&desc->lock);
}
@@ -669,7 +650,7 @@ int pirq_guest_unbind(struct domain *d,
if ( action->nr_guests != 0 )
{
action = NULL;
- goto out;
+ return;
}
BUG_ON(action->in_flight != 0);
@@ -690,7 +671,7 @@ int pirq_guest_unbind(struct domain *d,
if ( !cpus_empty(cpu_eoi_map) )
{
BUG_ON(action->ack_type != ACKTYPE_EOI);
- spin_unlock_irqrestore(&desc->lock, flags);
+ spin_unlock_irq(&desc->lock);
on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 1);
spin_lock_irq(&desc->lock);
}
@@ -702,10 +683,63 @@ int pirq_guest_unbind(struct domain *d,
desc->status &= ~IRQ_INPROGRESS;
kill_timer(&irq_guest_eoi_timer[vector]);
desc->handler->shutdown(vector);
+}
+
+void pirq_guest_unbind(struct domain *d, int irq)
+{
+ irq_desc_t *desc;
+ int vector;
+
+ WARN_ON(!spin_is_locked(&d->evtchn_lock));
+
+ BUG_ON(!local_irq_is_enabled());
+ desc = domain_spin_lock_irq_desc(d, irq, NULL);
+
+ if ( desc == NULL )
+ {
+ if ( !msi_enable )
+ return;
+ vector = -domain_irq_to_vector(d, irq);
+ BUG_ON(vector <= 0);
+ desc = &irq_desc[vector];
+ spin_lock_irq(&desc->lock);
+ d->arch.pirq_vector[irq] = d->arch.vector_pirq[vector] = 0;
+ }
+ else
+ {
+ __pirq_guest_unbind(d, irq, desc);
+ }
+
+ spin_unlock_irq(&desc->lock);
+}
+
+int pirq_guest_force_unbind(struct domain *d, int irq)
+{
+ irq_desc_t *desc;
+ irq_guest_action_t *action;
+ int i, bound = 0;
+
+ WARN_ON(!spin_is_locked(&d->evtchn_lock));
+
+ BUG_ON(!local_irq_is_enabled());
+ desc = domain_spin_lock_irq_desc(d, irq, NULL);
+ BUG_ON(desc == NULL);
+
+ if ( !(desc->status & IRQ_GUEST) )
+ goto out;
+
+ action = (irq_guest_action_t *)desc->action;
+ for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
+ continue;
+ if ( i == action->nr_guests )
+ goto out;
+
+ bound = 1;
+ __pirq_guest_unbind(d, irq, desc);
out:
- spin_unlock_irqrestore(&desc->lock, flags);
- return rc;
+ spin_unlock_irq(&desc->lock);
+ return bound;
}
extern void dump_ioapic_irq_info(void);
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -147,7 +147,7 @@ static int unmap_domain_pirq(struct doma
{
unsigned long flags;
irq_desc_t *desc;
- bool_t forced_unbind = (pirq_guest_unbind(d, pirq) == 0);
+ bool_t forced_unbind = pirq_guest_force_unbind(d, pirq);
if ( forced_unbind )
dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
Index: xen-3.3.1-testing/xen/common/event_channel.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/event_channel.c
+++ xen-3.3.1-testing/xen/common/event_channel.c
@@ -376,8 +376,7 @@ static long __evtchn_close(struct domain
break;
case ECS_PIRQ:
- if ( pirq_guest_unbind(d1, chn1->u.pirq) != 0 )
- BUG();
+ pirq_guest_unbind(d1, chn1->u.pirq);
d1->pirq_to_evtchn[chn1->u.pirq] = 0;
break;
Index: xen-3.3.1-testing/xen/include/asm-x86/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/irq.h
+++ xen-3.3.1-testing/xen/include/asm-x86/irq.h
@@ -55,4 +55,6 @@ int pirq_shared(struct domain *d , int i
#define domain_irq_to_vector(d, irq) (msi_enable ? (d)->arch.pirq_vector[irq] : irq_to_vector(irq))
#define domain_vector_to_irq(d, vec) (msi_enable ? (d)->arch.vector_pirq[vec] : vector_to_irq(vec))
+int pirq_guest_force_unbind(struct domain *d, int irq);
+
#endif /* _ASM_HW_IRQ_H */
Index: xen-3.3.1-testing/xen/include/xen/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/irq.h
+++ xen-3.3.1-testing/xen/include/xen/irq.h
@@ -77,7 +77,7 @@ struct vcpu;
extern int pirq_guest_eoi(struct domain *d, int irq);
extern int pirq_guest_unmask(struct domain *d);
extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
-extern int pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_guest_unbind(struct domain *d, int irq);
static inline void set_native_irq_info(int irq, cpumask_t mask)
{

View File

@ -1,455 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1223459328 -3600
# Node ID ed398097c03e16dacb1f3af19fa8faddf2deae1f
# Parent 1f85f7b216b34bfda4911b6a46548478f0e5d682
x86: Move pirq logic to irq.c.
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -14,8 +14,11 @@
#include <xen/sched.h>
#include <xen/keyhandler.h>
#include <xen/compat.h>
-#include <asm/current.h>
+#include <xen/iocap.h>
#include <xen/iommu.h>
+#include <asm/msi.h>
+#include <asm/current.h>
+#include <public/physdev.h>
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
int opt_noirqbalance = 0;
@@ -742,6 +745,157 @@ int pirq_guest_force_unbind(struct domai
return bound;
}
+int get_free_pirq(struct domain *d, int type, int index)
+{
+ int i;
+
+ ASSERT(spin_is_locked(&d->evtchn_lock));
+
+ if ( type == MAP_PIRQ_TYPE_GSI )
+ {
+ for ( i = 16; i < NR_PIRQS; i++ )
+ if ( !d->arch.pirq_vector[i] )
+ break;
+ if ( i == NR_PIRQS )
+ return -ENOSPC;
+ }
+ else
+ {
+ for ( i = NR_PIRQS - 1; i >= 16; i-- )
+ if ( !d->arch.pirq_vector[i] )
+ break;
+ if ( i == 16 )
+ return -ENOSPC;
+ }
+
+ return i;
+}
+
+int map_domain_pirq(
+ struct domain *d, int pirq, int vector, int type, void *data)
+{
+ int ret = 0;
+ int old_vector, old_pirq;
+ irq_desc_t *desc;
+ unsigned long flags;
+
+ ASSERT(spin_is_locked(&d->evtchn_lock));
+
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ if ( pirq < 0 || pirq >= NR_PIRQS || vector < 0 || vector >= NR_VECTORS )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or vector %d\n",
+ d->domain_id, pirq, vector);
+ return -EINVAL;
+ }
+
+ old_vector = d->arch.pirq_vector[pirq];
+ old_pirq = d->arch.vector_pirq[vector];
+
+ if ( (old_vector && (old_vector != vector) ) ||
+ (old_pirq && (old_pirq != pirq)) )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: pirq %d or vector %d already mapped\n",
+ d->domain_id, pirq, vector);
+ return -EINVAL;
+ }
+
+ ret = irq_permit_access(d, pirq);
+ if ( ret )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d\n",
+ d->domain_id, pirq);
+ return ret;
+ }
+
+ desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if ( type == MAP_PIRQ_TYPE_MSI )
+ {
+ struct msi_info *msi = (struct msi_info *)data;
+ if ( desc->handler != &no_irq_type )
+ dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
+ d->domain_id, vector);
+ desc->handler = &pci_msi_type;
+ ret = pci_enable_msi(msi);
+ if ( ret )
+ goto done;
+ }
+
+ d->arch.pirq_vector[pirq] = vector;
+ d->arch.vector_pirq[vector] = pirq;
+
+done:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return ret;
+}
+
+/* The pirq should have been unbound before this call. */
+int unmap_domain_pirq(struct domain *d, int pirq)
+{
+ unsigned long flags;
+ irq_desc_t *desc;
+ int vector, ret = 0;
+ bool_t forced_unbind;
+
+ if ( (pirq < 0) || (pirq >= NR_PIRQS) )
+ return -EINVAL;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EINVAL;
+
+ ASSERT(spin_is_locked(&d->evtchn_lock));
+
+ vector = d->arch.pirq_vector[pirq];
+ if ( vector <= 0 )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
+ d->domain_id, pirq);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ forced_unbind = pirq_guest_force_unbind(d, pirq);
+ if ( forced_unbind )
+ dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
+ d->domain_id, pirq);
+
+ desc = &irq_desc[vector];
+ spin_lock_irqsave(&desc->lock, flags);
+
+ BUG_ON(vector != d->arch.pirq_vector[pirq]);
+
+ if ( desc->msi_desc )
+ pci_disable_msi(vector);
+
+ if ( desc->handler == &pci_msi_type )
+ desc->handler = &no_irq_type;
+
+ if ( !forced_unbind )
+ {
+ d->arch.pirq_vector[pirq] = 0;
+ d->arch.vector_pirq[vector] = 0;
+ }
+ else
+ {
+ d->arch.pirq_vector[pirq] = -vector;
+ d->arch.vector_pirq[vector] = -pirq;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ ret = irq_deny_access(d, pirq);
+ if ( ret )
+ dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n",
+ d->domain_id, pirq);
+
+ done:
+ return ret;
+}
+
extern void dump_ioapic_irq_info(void);
static void dump_irqs(unsigned char key)
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -26,171 +26,12 @@ int
ioapic_guest_write(
unsigned long physbase, unsigned int reg, u32 pval);
-static int get_free_pirq(struct domain *d, int type, int index)
-{
- int i;
-
- ASSERT(spin_is_locked(&d->evtchn_lock));
-
- if ( type == MAP_PIRQ_TYPE_GSI )
- {
- for ( i = 16; i < NR_PIRQS; i++ )
- if ( !d->arch.pirq_vector[i] )
- break;
- if ( i == NR_PIRQS )
- return -ENOSPC;
- }
- else
- {
- for ( i = NR_PIRQS - 1; i >= 16; i-- )
- if ( !d->arch.pirq_vector[i] )
- break;
- if ( i == 16 )
- return -ENOSPC;
- }
-
- return i;
-}
-
-static int map_domain_pirq(struct domain *d, int pirq, int vector,
- struct physdev_map_pirq *map)
-{
- int ret = 0;
- int old_vector, old_pirq;
- struct msi_info msi;
- irq_desc_t *desc;
- unsigned long flags;
-
- ASSERT(spin_is_locked(&d->evtchn_lock));
-
- if ( !IS_PRIV(current->domain) )
- return -EPERM;
-
- if ( pirq < 0 || pirq >= NR_PIRQS || vector < 0 || vector >= NR_VECTORS )
- {
- dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or vector %d\n",
- d->domain_id, pirq, vector);
- return -EINVAL;
- }
-
- old_vector = d->arch.pirq_vector[pirq];
- old_pirq = d->arch.vector_pirq[vector];
-
- if ( (old_vector && (old_vector != vector) ) ||
- (old_pirq && (old_pirq != pirq)) )
- {
- dprintk(XENLOG_G_ERR, "dom%d: pirq %d or vector %d already mapped\n",
- d->domain_id, pirq, vector);
- return -EINVAL;
- }
-
- ret = irq_permit_access(d, pirq);
- if ( ret )
- {
- dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d\n",
- d->domain_id, pirq);
- return ret;
- }
-
- desc = &irq_desc[vector];
- spin_lock_irqsave(&desc->lock, flags);
-
- if ( map && MAP_PIRQ_TYPE_MSI == map->type )
- {
- if ( desc->handler != &no_irq_type )
- dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
- d->domain_id, vector);
- desc->handler = &pci_msi_type;
-
- msi.bus = map->bus;
- msi.devfn = map->devfn;
- msi.entry_nr = map->entry_nr;
- msi.table_base = map->table_base;
- msi.vector = vector;
-
- ret = pci_enable_msi(&msi);
- if ( ret )
- goto done;
- }
-
- d->arch.pirq_vector[pirq] = vector;
- d->arch.vector_pirq[vector] = pirq;
-
-done:
- spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
-}
-
-/* The pirq should have been unbound before this call. */
-static int unmap_domain_pirq(struct domain *d, int pirq)
-{
- int ret = 0;
- int vector;
-
- if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
- return -EINVAL;
-
- if ( !IS_PRIV(current->domain) )
- return -EINVAL;
-
- ASSERT(spin_is_locked(&d->evtchn_lock));
-
- vector = d->arch.pirq_vector[pirq];
-
- if ( vector <= 0 )
- {
- dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
- d->domain_id, pirq);
- ret = -EINVAL;
- }
- else
- {
- unsigned long flags;
- irq_desc_t *desc;
- bool_t forced_unbind = pirq_guest_force_unbind(d, pirq);
-
- if ( forced_unbind )
- dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
- d->domain_id, pirq);
-
- desc = &irq_desc[vector];
- spin_lock_irqsave(&desc->lock, flags);
-
- BUG_ON(vector != d->arch.pirq_vector[pirq]);
-
- if ( desc->msi_desc )
- pci_disable_msi(vector);
-
- if ( desc->handler == &pci_msi_type )
- desc->handler = &no_irq_type;
-
- if ( !forced_unbind )
- {
- d->arch.pirq_vector[pirq] = 0;
- d->arch.vector_pirq[vector] = 0;
- }
- else
- {
- d->arch.pirq_vector[pirq] = -vector;
- d->arch.vector_pirq[vector] = -pirq;
- }
-
- spin_unlock_irqrestore(&desc->lock, flags);
- }
-
- ret = irq_deny_access(d, pirq);
-
- if ( ret )
- dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n",
- d->domain_id, pirq);
-
- return ret;
-}
-
static int physdev_map_pirq(struct physdev_map_pirq *map)
{
struct domain *d;
int vector, pirq, ret = 0;
+ struct msi_info _msi;
+ void *map_data = NULL;
/* if msi_enable is not enabled, map always succeeds */
if ( !msi_enable )
@@ -213,6 +54,7 @@ static int physdev_map_pirq(struct physd
goto free_domain;
}
+ /* Verify or get vector. */
switch ( map->type )
{
case MAP_PIRQ_TYPE_GSI:
@@ -227,15 +69,16 @@ static int physdev_map_pirq(struct physd
if ( !vector )
{
dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
- d->domain_id, map->index);
+ d->domain_id, vector);
ret = -EINVAL;
goto free_domain;
}
break;
+
case MAP_PIRQ_TYPE_MSI:
vector = map->index;
- if ( vector == -1 )
- vector = assign_irq_vector(AUTO_ASSIGN);
+ if ( vector == -1 )
+ vector = assign_irq_vector(AUTO_ASSIGN);
if ( vector < 0 || vector >= NR_VECTORS )
{
@@ -244,13 +87,23 @@ static int physdev_map_pirq(struct physd
ret = -EINVAL;
goto free_domain;
}
+
+ _msi.bus = map->bus;
+ _msi.devfn = map->devfn;
+ _msi.entry_nr = map->entry_nr;
+ _msi.table_base = map->table_base;
+ _msi.vector = vector;
+ map_data = &_msi;
break;
+
default:
- dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n", d->domain_id, map->type);
+ dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n",
+ d->domain_id, map->type);
ret = -EINVAL;
goto free_domain;
}
+ /* Verify or get pirq. */
spin_lock(&d->evtchn_lock);
if ( map->pirq < 0 )
{
@@ -292,10 +145,10 @@ static int physdev_map_pirq(struct physd
}
- ret = map_domain_pirq(d, pirq, vector, map);
-
+ ret = map_domain_pirq(d, pirq, vector, map->type, map_data);
if ( !ret )
map->pirq = pirq;
+
done:
spin_unlock(&d->evtchn_lock);
free_domain:
@@ -462,7 +315,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( msi_enable )
{
spin_lock(&dom0->evtchn_lock);
- ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
+ ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
+ MAP_PIRQ_TYPE_GSI, NULL);
spin_unlock(&dom0->evtchn_lock);
}
Index: xen-3.3.1-testing/xen/include/asm-x86/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/irq.h
+++ xen-3.3.1-testing/xen/include/asm-x86/irq.h
@@ -52,6 +52,11 @@ extern atomic_t irq_mis_count;
int pirq_acktype(struct domain *d, int irq);
int pirq_shared(struct domain *d , int irq);
+int map_domain_pirq(struct domain *d, int pirq, int vector, int type,
+ void *data);
+int unmap_domain_pirq(struct domain *d, int pirq);
+int get_free_pirq(struct domain *d, int type, int index);
+
#define domain_irq_to_vector(d, irq) (msi_enable ? (d)->arch.pirq_vector[irq] : irq_to_vector(irq))
#define domain_vector_to_irq(d, vec) (msi_enable ? (d)->arch.vector_pirq[vec] : vector_to_irq(vec))

View File

@ -1,219 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1223463099 -3600
# Node ID 51a05fb4c6014059058de48b83a9431e7474a456
# Parent ed398097c03e16dacb1f3af19fa8faddf2deae1f
x86: Free MSI vector when a pirq is unmapped.
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -459,6 +459,7 @@ void arch_domain_destroy(struct domain *
hvm_domain_destroy(d);
pci_release_devices(d);
+ free_domain_pirqs(d);
if ( !is_idle_domain(d) )
iommu_domain_destroy(d);
Index: xen-3.3.1-testing/xen/arch/x86/i8259.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/i8259.c
+++ xen-3.3.1-testing/xen/arch/x86/i8259.c
@@ -408,6 +408,10 @@ void __init init_IRQ(void)
irq_desc[LEGACY_VECTOR(i)].handler = &i8259A_irq_type;
}
+ /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
+ vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN;
+ vector_irq[0x80] = NEVER_ASSIGN;
+
apic_intr_init();
/* Set the clock to HZ Hz */
Index: xen-3.3.1-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/io_apic.c
+++ xen-3.3.1-testing/xen/arch/x86/io_apic.c
@@ -90,7 +90,8 @@ static struct irq_pin_list {
} irq_2_pin[PIN_MAP_SIZE];
static int irq_2_pin_free_entry = NR_IRQS;
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = {
+ [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN};
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -669,40 +670,46 @@ static inline int IO_APIC_irq_trigger(in
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
+int free_irq_vector(int vector)
+{
+ int irq;
+
+ BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
+
+ spin_lock(&vector_lock);
+ if ((irq = vector_irq[vector]) == AUTO_ASSIGN)
+ vector_irq[vector] = FREE_TO_ASSIGN;
+ spin_unlock(&vector_lock);
+
+ return (irq == AUTO_ASSIGN) ? 0 : -EINVAL;
+}
+
int assign_irq_vector(int irq)
{
- static unsigned current_vector = FIRST_DYNAMIC_VECTOR, offset = 0;
+ static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
unsigned vector;
BUG_ON(irq >= NR_IRQ_VECTORS);
+
spin_lock(&vector_lock);
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+ if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) {
spin_unlock(&vector_lock);
return IO_APIC_VECTOR(irq);
}
-next:
- current_vector += 8;
+ vector = current_vector;
+ while (vector_irq[vector] != FREE_TO_ASSIGN) {
+ if (++vector > LAST_DYNAMIC_VECTOR)
+ vector = FIRST_DYNAMIC_VECTOR;
- /* Skip the hypercall vector. */
- if (current_vector == HYPERCALL_VECTOR)
- goto next;
-
- /* Skip the Linux/BSD fast-trap vector. */
- if (current_vector == 0x80)
- goto next;
-
- if (current_vector > LAST_DYNAMIC_VECTOR) {
- offset++;
- if (!(offset%8)) {
+ if (vector == current_vector) {
spin_unlock(&vector_lock);
return -ENOSPC;
}
- current_vector = FIRST_DYNAMIC_VECTOR + offset;
}
- vector = current_vector;
+ current_vector = vector;
vector_irq[vector] = irq;
if (irq != AUTO_ASSIGN)
IO_APIC_VECTOR(irq) = vector;
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -872,7 +872,10 @@ int unmap_domain_pirq(struct domain *d,
pci_disable_msi(vector);
if ( desc->handler == &pci_msi_type )
+ {
desc->handler = &no_irq_type;
+ free_irq_vector(vector);
+ }
if ( !forced_unbind )
{
@@ -896,6 +899,24 @@ int unmap_domain_pirq(struct domain *d,
return ret;
}
+void free_domain_pirqs(struct domain *d)
+{
+ int i;
+
+ ASSERT(d->is_dying == DOMDYING_dying);
+
+ if ( !msi_enable )
+ return;
+
+ spin_lock(&d->evtchn_lock);
+
+ for ( i = 0; i < NR_PIRQS; i++ )
+ if ( d->arch.pirq_vector[i] > 0 )
+ unmap_domain_pirq(d, i);
+
+ spin_unlock(&d->evtchn_lock);
+}
+
extern void dump_ioapic_irq_info(void);
static void dump_irqs(unsigned char key)
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -83,7 +83,7 @@ static int physdev_map_pirq(struct physd
if ( vector < 0 || vector >= NR_VECTORS )
{
dprintk(XENLOG_G_ERR, "dom%d: map irq with wrong vector %d\n",
- d->domain_id, map->index);
+ d->domain_id, vector);
ret = -EINVAL;
goto free_domain;
}
@@ -144,13 +144,14 @@ static int physdev_map_pirq(struct physd
pirq = map->pirq;
}
-
ret = map_domain_pirq(d, pirq, vector, map->type, map_data);
- if ( !ret )
+ if ( ret == 0 )
map->pirq = pirq;
done:
spin_unlock(&d->evtchn_lock);
+ if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
+ free_irq_vector(vector);
free_domain:
rcu_unlock_domain(d);
return ret;
Index: xen-3.3.1-testing/xen/include/asm-x86/io_apic.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/io_apic.h
+++ xen-3.3.1-testing/xen/include/asm-x86/io_apic.h
@@ -192,5 +192,6 @@ static inline int ioapic_resume(void) {r
#endif
extern int assign_irq_vector(int irq);
+extern int free_irq_vector(int vector);
#endif
Index: xen-3.3.1-testing/xen/include/asm-x86/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/irq.h
+++ xen-3.3.1-testing/xen/include/asm-x86/irq.h
@@ -19,7 +19,9 @@
extern int vector_irq[NR_VECTORS];
extern u8 irq_vector[NR_IRQ_VECTORS];
-#define AUTO_ASSIGN -1
+#define AUTO_ASSIGN -1
+#define NEVER_ASSIGN -2
+#define FREE_TO_ASSIGN -3
#define platform_legacy_irq(irq) ((irq) < 16)
@@ -56,6 +58,7 @@ int map_domain_pirq(struct domain *d, in
void *data);
int unmap_domain_pirq(struct domain *d, int pirq);
int get_free_pirq(struct domain *d, int type, int index);
+void free_domain_pirqs(struct domain *d);
#define domain_irq_to_vector(d, irq) (msi_enable ? (d)->arch.pirq_vector[irq] : irq_to_vector(irq))
#define domain_vector_to_irq(d, vec) (msi_enable ? (d)->arch.vector_pirq[vec] : vector_to_irq(vec))

View File

@ -1,21 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1223470858 -3600
# Node ID e66cecb66b1ed83b65804d2eb7c3a30f9f1f01d4
# Parent 5e4e234d58be41401909f160cb9ed2ee0379c6a9
x86: Remove bogus assertion from free_domain_pirqs().
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -903,8 +903,6 @@ void free_domain_pirqs(struct domain *d)
{
int i;
- ASSERT(d->is_dying == DOMDYING_dying);
-
if ( !msi_enable )
return;

View File

@ -1,569 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1223547292 -3600
# Node ID a11ad61bdb5b188a8116b533c87c31d6e9bd62d4
# Parent b8f329d2c074a06b47f3be2b4e0bfe1ac5b232e5
Fix lock issue for hvm pass-through domain
This patch protect the hvm_irq_dpci structure with evtchn_lock, thus
the access to domain's pirq_vector mapping is also protected.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
Index: xen-3.3.1-testing/xen/arch/x86/hvm/svm/intr.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/svm/intr.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/svm/intr.c
@@ -124,9 +124,11 @@ static void svm_dirq_assist(struct vcpu
if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
continue;
+ spin_lock(&d->evtchn_lock);
if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
{
hvm_pci_msi_assert(d, irq);
+ spin_unlock(&d->evtchn_lock);
continue;
}
@@ -137,9 +139,7 @@ static void svm_dirq_assist(struct vcpu
device = digl->device;
intx = digl->intx;
hvm_pci_intx_assert(d, device, intx);
- spin_lock(&hvm_irq_dpci->dirq_lock);
hvm_irq_dpci->mirq[irq].pending++;
- spin_unlock(&hvm_irq_dpci->dirq_lock);
}
/*
@@ -151,6 +151,7 @@ static void svm_dirq_assist(struct vcpu
*/
set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
NOW() + PT_IRQ_TIME_OUT);
+ spin_unlock(&d->evtchn_lock);
}
}
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmsi.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmsi.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmsi.c
@@ -134,7 +134,7 @@ int vmsi_deliver(struct domain *d, int p
"vector=%x trig_mode=%x\n",
dest, dest_mode, delivery_mode, vector, trig_mode);
- if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MSI) )
+ if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) )
{
gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
return 0;
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/intr.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/intr.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/intr.c
@@ -127,11 +127,13 @@ static void vmx_dirq_assist(struct vcpu
if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
continue;
- if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
- {
- hvm_pci_msi_assert(d, irq);
- continue;
- }
+ spin_lock(&d->evtchn_lock);
+ if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
+ {
+ hvm_pci_msi_assert(d, irq);
+ spin_unlock(&d->evtchn_lock);
+ continue;
+ }
stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
@@ -140,9 +142,7 @@ static void vmx_dirq_assist(struct vcpu
device = digl->device;
intx = digl->intx;
hvm_pci_intx_assert(d, device, intx);
- spin_lock(&hvm_irq_dpci->dirq_lock);
hvm_irq_dpci->mirq[irq].pending++;
- spin_unlock(&hvm_irq_dpci->dirq_lock);
}
/*
@@ -154,6 +154,7 @@ static void vmx_dirq_assist(struct vcpu
*/
set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
NOW() + PT_IRQ_TIME_OUT);
+ spin_unlock(&d->evtchn_lock);
}
}
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -285,7 +285,7 @@ static void __do_IRQ_guest(int vector)
* The descriptor is returned locked. This function is safe against changes
* to the per-domain irq-to-vector mapping.
*/
-static irq_desc_t *domain_spin_lock_irq_desc(
+irq_desc_t *domain_spin_lock_irq_desc(
struct domain *d, int irq, unsigned long *pflags)
{
unsigned int vector;
Index: xen-3.3.1-testing/xen/drivers/passthrough/io.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/io.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/io.c
@@ -26,10 +26,14 @@ static void pt_irq_time_out(void *data)
struct hvm_mirq_dpci_mapping *irq_map = data;
unsigned int guest_gsi, machine_gsi = 0;
int vector;
- struct hvm_irq_dpci *dpci = domain_get_irq_dpci(irq_map->dom);
+ struct hvm_irq_dpci *dpci = NULL;
struct dev_intx_gsi_link *digl;
uint32_t device, intx;
+ spin_lock(&irq_map->dom->evtchn_lock);
+
+ dpci = domain_get_irq_dpci(irq_map->dom);
+ ASSERT(dpci);
list_for_each_entry ( digl, &irq_map->digl_list, list )
{
guest_gsi = digl->gsi;
@@ -41,55 +45,65 @@ static void pt_irq_time_out(void *data)
clear_bit(machine_gsi, dpci->dirq_mask);
vector = domain_irq_to_vector(irq_map->dom, machine_gsi);
- stop_timer(&dpci->hvm_timer[vector]);
- spin_lock(&dpci->dirq_lock);
dpci->mirq[machine_gsi].pending = 0;
- spin_unlock(&dpci->dirq_lock);
+ spin_unlock(&irq_map->dom->evtchn_lock);
pirq_guest_eoi(irq_map->dom, machine_gsi);
}
int pt_irq_create_bind_vtd(
struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
{
- struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+ struct hvm_irq_dpci *hvm_irq_dpci = NULL;
uint32_t machine_gsi, guest_gsi;
uint32_t device, intx, link;
struct dev_intx_gsi_link *digl;
+ int pirq = pt_irq_bind->machine_irq;
+
+ if ( pirq < 0 || pirq >= NR_PIRQS )
+ return -EINVAL;
+
+ spin_lock(&d->evtchn_lock);
+ hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci == NULL )
{
hvm_irq_dpci = xmalloc(struct hvm_irq_dpci);
if ( hvm_irq_dpci == NULL )
+ {
+ spin_unlock(&d->evtchn_lock);
return -ENOMEM;
-
+ }
memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
- spin_lock_init(&hvm_irq_dpci->dirq_lock);
for ( int i = 0; i < NR_IRQS; i++ )
INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
+ }
- if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
- xfree(hvm_irq_dpci);
+ if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
+ {
+ xfree(hvm_irq_dpci);
+ spin_unlock(&d->evtchn_lock);
+ return -EINVAL;
}
if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
{
- int pirq = pt_irq_bind->machine_irq;
-
- if ( pirq < 0 || pirq >= NR_IRQS )
- return -EINVAL;
- if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_VALID ) )
+ if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
{
- hvm_irq_dpci->mirq[pirq].flags |= HVM_IRQ_DPCI_VALID |
- HVM_IRQ_DPCI_MSI ;
+ set_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags);
+ hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
+ hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
+ hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = pirq;
+ /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
pirq_guest_bind(d->vcpu[0], pirq, 0);
}
+ else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
+ ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
- hvm_irq_dpci->mirq[pirq].flags |= HVM_IRQ_DPCI_VALID |HVM_IRQ_DPCI_MSI ;
- hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
- hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
- hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = pirq;
-
+ {
+ spin_unlock(&d->evtchn_lock);
+ return -EBUSY;
+ }
}
else
{
@@ -102,7 +116,10 @@ int pt_irq_create_bind_vtd(
digl = xmalloc(struct dev_intx_gsi_link);
if ( !digl )
+ {
+ spin_unlock(&d->evtchn_lock);
return -ENOMEM;
+ }
digl->device = device;
digl->intx = intx;
@@ -117,11 +134,11 @@ int pt_irq_create_bind_vtd(
hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
/* Bind the same mirq once in the same domain */
- if ( !(hvm_irq_dpci->mirq[machine_gsi].flags & HVM_IRQ_DPCI_VALID) )
+ if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
{
- hvm_irq_dpci->mirq[machine_gsi].flags |= HVM_IRQ_DPCI_VALID;
hvm_irq_dpci->mirq[machine_gsi].dom = d;
+ /* Init timer before binding */
init_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)],
pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
/* Deal with gsi for legacy devices */
@@ -132,37 +149,45 @@ int pt_irq_create_bind_vtd(
"VT-d irq bind: m_irq = %x device = %x intx = %x\n",
machine_gsi, device, intx);
}
+ spin_unlock(&d->evtchn_lock);
return 0;
}
int pt_irq_destroy_bind_vtd(
struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
{
- struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+ struct hvm_irq_dpci *hvm_irq_dpci = NULL;
uint32_t machine_gsi, guest_gsi;
uint32_t device, intx, link;
struct list_head *digl_list, *tmp;
struct dev_intx_gsi_link *digl;
- if ( hvm_irq_dpci == NULL )
- return 0;
-
machine_gsi = pt_irq_bind->machine_irq;
device = pt_irq_bind->u.pci.device;
intx = pt_irq_bind->u.pci.intx;
guest_gsi = hvm_pci_intx_gsi(device, intx);
link = hvm_pci_intx_link(device, intx);
- hvm_irq_dpci->link_cnt[link]--;
gdprintk(XENLOG_INFO,
"pt_irq_destroy_bind_vtd: machine_gsi=%d "
"guest_gsi=%d, device=%d, intx=%d.\n",
machine_gsi, guest_gsi, device, intx);
+ spin_lock(&d->evtchn_lock);
+
+ hvm_irq_dpci = domain_get_irq_dpci(d);
+
+ if ( hvm_irq_dpci == NULL )
+ {
+ spin_unlock(&d->evtchn_lock);
+ return -EINVAL;
+ }
+
+ hvm_irq_dpci->link_cnt[link]--;
memset(&hvm_irq_dpci->girq[guest_gsi], 0,
sizeof(struct hvm_girq_dpci_mapping));
/* clear the mirq info */
- if ( (hvm_irq_dpci->mirq[machine_gsi].flags & HVM_IRQ_DPCI_VALID) )
+ if ( test_bit(machine_gsi, hvm_irq_dpci->mapping))
{
list_for_each_safe ( digl_list, tmp,
&hvm_irq_dpci->mirq[machine_gsi].digl_list )
@@ -185,9 +210,10 @@ int pt_irq_destroy_bind_vtd(
kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]);
hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
hvm_irq_dpci->mirq[machine_gsi].flags = 0;
+ clear_bit(machine_gsi, hvm_irq_dpci->mapping);
}
}
-
+ spin_unlock(&d->evtchn_lock);
gdprintk(XENLOG_INFO,
"XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n",
machine_gsi, device, intx);
@@ -199,8 +225,9 @@ int hvm_do_IRQ_dpci(struct domain *d, un
{
struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
+ ASSERT(spin_is_locked(&irq_desc[domain_irq_to_vector(d, mirq)].lock));
if ( !iommu_enabled || (d == dom0) || !dpci ||
- !dpci->mirq[mirq].flags & HVM_IRQ_DPCI_VALID )
+ !test_bit(mirq, dpci->mapping))
return 0;
/*
@@ -218,44 +245,46 @@ int hvm_do_IRQ_dpci(struct domain *d, un
return 1;
}
-
void hvm_dpci_msi_eoi(struct domain *d, int vector)
{
struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
- int pirq;
- unsigned long flags;
irq_desc_t *desc;
+ int pirq;
if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
return;
+ spin_lock(&d->evtchn_lock);
pirq = hvm_irq_dpci->msi_gvec_pirq[vector];
if ( ( pirq >= 0 ) && (pirq < NR_PIRQS) &&
- (hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_VALID) &&
- (hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MSI) )
- {
- int vec;
- vec = domain_irq_to_vector(d, pirq);
- desc = &irq_desc[vec];
-
- spin_lock_irqsave(&desc->lock, flags);
- desc->status &= ~IRQ_INPROGRESS;
- spin_unlock_irqrestore(&desc->lock, flags);
+ test_bit(pirq, hvm_irq_dpci->mapping) &&
+ (test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags)))
+ {
+ BUG_ON(!local_irq_is_enabled());
+ desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+ if (!desc)
+ {
+ spin_unlock(&d->evtchn_lock);
+ return;
+ }
- pirq_guest_eoi(d, pirq);
- }
+ desc->status &= ~IRQ_INPROGRESS;
+ spin_unlock_irq(&desc->lock);
+
+ pirq_guest_eoi(d, pirq);
+ }
+
+ spin_unlock(&d->evtchn_lock);
}
void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
union vioapic_redir_entry *ent)
{
- struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+ struct hvm_irq_dpci *hvm_irq_dpci = NULL;
uint32_t device, intx, machine_gsi;
- if ( !iommu_enabled || (hvm_irq_dpci == NULL) ||
- (guest_gsi >= NR_ISAIRQS &&
- !hvm_irq_dpci->girq[guest_gsi].valid) )
+ if ( !iommu_enabled)
return;
if ( guest_gsi < NR_ISAIRQS )
@@ -264,23 +293,34 @@ void hvm_dpci_eoi(struct domain *d, unsi
return;
}
- machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
+ spin_lock(&d->evtchn_lock);
+ hvm_irq_dpci = domain_get_irq_dpci(d);
+
+ if((hvm_irq_dpci == NULL) ||
+ (guest_gsi >= NR_ISAIRQS &&
+ !hvm_irq_dpci->girq[guest_gsi].valid) )
+ {
+ spin_unlock(&d->evtchn_lock);
+ return;
+ }
+
device = hvm_irq_dpci->girq[guest_gsi].device;
intx = hvm_irq_dpci->girq[guest_gsi].intx;
hvm_pci_intx_deassert(d, device, intx);
- spin_lock(&hvm_irq_dpci->dirq_lock);
+ machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
if ( --hvm_irq_dpci->mirq[machine_gsi].pending == 0 )
{
- spin_unlock(&hvm_irq_dpci->dirq_lock);
-
if ( (ent == NULL) || !ent->fields.mask )
{
+ /*
+ * No need to get vector lock for timer
+ * since interrupt is still not EOIed
+ */
stop_timer(&hvm_irq_dpci->hvm_timer[
domain_irq_to_vector(d, machine_gsi)]);
pirq_guest_eoi(d, machine_gsi);
}
}
- else
- spin_unlock(&hvm_irq_dpci->dirq_lock);
+ spin_unlock(&d->evtchn_lock);
}
Index: xen-3.3.1-testing/xen/drivers/passthrough/pci.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/pci.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/pci.c
@@ -154,7 +154,7 @@ int pci_remove_device(u8 bus, u8 devfn)
static void pci_clean_dpci_irqs(struct domain *d)
{
- struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+ struct hvm_irq_dpci *hvm_irq_dpci = NULL;
uint32_t i;
struct list_head *digl_list, *tmp;
struct dev_intx_gsi_link *digl;
@@ -165,13 +165,14 @@ static void pci_clean_dpci_irqs(struct d
if ( !is_hvm_domain(d) && !need_iommu(d) )
return;
+ spin_lock(&d->evtchn_lock);
+ hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
- for ( i = 0; i < NR_IRQS; i++ )
+ for ( i = find_first_bit(hvm_irq_dpci->mapping, NR_PIRQS);
+ i < NR_PIRQS;
+ i = find_next_bit(hvm_irq_dpci->mapping, NR_PIRQS, i + 1) )
{
- if ( !(hvm_irq_dpci->mirq[i].flags & HVM_IRQ_DPCI_VALID) )
- continue;
-
pirq_guest_unbind(d, i);
kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]);
@@ -188,6 +189,7 @@ static void pci_clean_dpci_irqs(struct d
d->arch.hvm_domain.irq.dpci = NULL;
xfree(hvm_irq_dpci);
}
+ spin_unlock(&d->evtchn_lock);
}
void pci_release_devices(struct domain *d)
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/x86/vtd.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/x86/vtd.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -85,37 +85,41 @@ int domain_set_irq_dpci(struct domain *d
void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
{
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
- struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
+ struct hvm_irq_dpci *dpci = NULL;
struct dev_intx_gsi_link *digl, *tmp;
int i;
ASSERT(isairq < NR_ISAIRQS);
- if ( !vtd_enabled || !dpci ||
- !test_bit(isairq, dpci->isairq_map) )
+ if ( !vtd_enabled)
return;
+ spin_lock(&d->evtchn_lock);
+
+ dpci = domain_get_irq_dpci(d);
+
+ if ( !dpci || !test_bit(isairq, dpci->isairq_map) )
+ {
+ spin_unlock(&d->evtchn_lock);
+ return;
+ }
/* Multiple mirq may be mapped to one isa irq */
- for ( i = 0; i < NR_IRQS; i++ )
+ for ( i = find_first_bit(dpci->mapping, NR_PIRQS);
+ i < NR_PIRQS;
+ i = find_next_bit(dpci->mapping, NR_PIRQS, i + 1) )
{
- if ( !dpci->mirq[i].flags & HVM_IRQ_DPCI_VALID )
- continue;
-
list_for_each_entry_safe ( digl, tmp,
&dpci->mirq[i].digl_list, list )
{
if ( hvm_irq->pci_link.route[digl->link] == isairq )
{
hvm_pci_intx_deassert(d, digl->device, digl->intx);
- spin_lock(&dpci->dirq_lock);
if ( --dpci->mirq[i].pending == 0 )
{
- spin_unlock(&dpci->dirq_lock);
stop_timer(&dpci->hvm_timer[domain_irq_to_vector(d, i)]);
pirq_guest_eoi(d, i);
}
- else
- spin_unlock(&dpci->dirq_lock);
}
}
}
+ spin_unlock(&d->evtchn_lock);
}
Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/irq.h
+++ xen-3.3.1-testing/xen/include/asm-x86/hvm/irq.h
@@ -25,6 +25,7 @@
#include <xen/types.h>
#include <xen/spinlock.h>
#include <asm/irq.h>
+#include <asm/pirq.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vioapic.h>
@@ -38,8 +39,6 @@ struct dev_intx_gsi_link {
uint8_t link;
};
-#define HVM_IRQ_DPCI_VALID 0x1
-#define HVM_IRQ_DPCI_MSI 0x2
#define _HVM_IRQ_DPCI_MSI 0x1
struct hvm_gmsi_info {
@@ -64,9 +63,10 @@ struct hvm_girq_dpci_mapping {
#define NR_ISAIRQS 16
#define NR_LINK 4
+/* Protected by domain's evtchn_lock */
struct hvm_irq_dpci {
- spinlock_t dirq_lock;
/* Machine IRQ to guest device/intx mapping. */
+ DECLARE_BITMAP(mapping, NR_PIRQS);
struct hvm_mirq_dpci_mapping mirq[NR_IRQS];
/* Guest IRQ to guest device/intx mapping. */
struct hvm_girq_dpci_mapping girq[NR_IRQS];
Index: xen-3.3.1-testing/xen/include/xen/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/irq.h
+++ xen-3.3.1-testing/xen/include/xen/irq.h
@@ -78,6 +78,8 @@ extern int pirq_guest_eoi(struct domain
extern int pirq_guest_unmask(struct domain *d);
extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
extern void pirq_guest_unbind(struct domain *d, int irq);
+extern irq_desc_t *domain_spin_lock_irq_desc(
+ struct domain *d, int irq, unsigned long *pflags);
static inline void set_native_irq_info(int irq, cpumask_t mask)
{

View File

@ -1,808 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1223547471 -3600
# Node ID 0033c944318f266a0e367678bf9f46042ae03397
# Parent a11ad61bdb5b188a8116b533c87c31d6e9bd62d4
Rename evtchn_lock to event_lock, since it protects more than just
event-channel state now.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/hvm/svm/intr.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/svm/intr.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/svm/intr.c
@@ -124,11 +124,11 @@ static void svm_dirq_assist(struct vcpu
if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
continue;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
{
hvm_pci_msi_assert(d, irq);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
continue;
}
@@ -151,7 +151,7 @@ static void svm_dirq_assist(struct vcpu
*/
set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
NOW() + PT_IRQ_TIME_OUT);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
}
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/intr.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/intr.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/intr.c
@@ -127,11 +127,11 @@ static void vmx_dirq_assist(struct vcpu
if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
continue;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
{
hvm_pci_msi_assert(d, irq);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
continue;
}
@@ -154,7 +154,7 @@ static void vmx_dirq_assist(struct vcpu
*/
set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
NOW() + PT_IRQ_TIME_OUT);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
}
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -514,7 +514,7 @@ int pirq_guest_bind(struct vcpu *v, int
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
- WARN_ON(!spin_is_locked(&v->domain->evtchn_lock));
+ WARN_ON(!spin_is_locked(&v->domain->event_lock));
BUG_ON(!local_irq_is_enabled());
retry:
@@ -693,7 +693,7 @@ void pirq_guest_unbind(struct domain *d,
irq_desc_t *desc;
int vector;
- WARN_ON(!spin_is_locked(&d->evtchn_lock));
+ WARN_ON(!spin_is_locked(&d->event_lock));
BUG_ON(!local_irq_is_enabled());
desc = domain_spin_lock_irq_desc(d, irq, NULL);
@@ -722,7 +722,7 @@ int pirq_guest_force_unbind(struct domai
irq_guest_action_t *action;
int i, bound = 0;
- WARN_ON(!spin_is_locked(&d->evtchn_lock));
+ WARN_ON(!spin_is_locked(&d->event_lock));
BUG_ON(!local_irq_is_enabled());
desc = domain_spin_lock_irq_desc(d, irq, NULL);
@@ -749,7 +749,7 @@ int get_free_pirq(struct domain *d, int
{
int i;
- ASSERT(spin_is_locked(&d->evtchn_lock));
+ ASSERT(spin_is_locked(&d->event_lock));
if ( type == MAP_PIRQ_TYPE_GSI )
{
@@ -779,7 +779,7 @@ int map_domain_pirq(
irq_desc_t *desc;
unsigned long flags;
- ASSERT(spin_is_locked(&d->evtchn_lock));
+ ASSERT(spin_is_locked(&d->event_lock));
if ( !IS_PRIV(current->domain) )
return -EPERM;
@@ -847,7 +847,7 @@ int unmap_domain_pirq(struct domain *d,
if ( !IS_PRIV(current->domain) )
return -EINVAL;
- ASSERT(spin_is_locked(&d->evtchn_lock));
+ ASSERT(spin_is_locked(&d->event_lock));
vector = d->arch.pirq_vector[pirq];
if ( vector <= 0 )
@@ -906,13 +906,13 @@ void free_domain_pirqs(struct domain *d)
if ( !msi_enable )
return;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
for ( i = 0; i < NR_PIRQS; i++ )
if ( d->arch.pirq_vector[i] > 0 )
unmap_domain_pirq(d, i);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
extern void dump_ioapic_irq_info(void);
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -104,7 +104,7 @@ static int physdev_map_pirq(struct physd
}
/* Verify or get pirq. */
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( map->pirq < 0 )
{
if ( d->arch.vector_pirq[vector] )
@@ -149,7 +149,7 @@ static int physdev_map_pirq(struct physd
map->pirq = pirq;
done:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
free_irq_vector(vector);
free_domain:
@@ -182,9 +182,9 @@ static int physdev_unmap_pirq(struct phy
return -ESRCH;
}
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
ret = unmap_domain_pirq(d, unmap->pirq);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
rcu_unlock_domain(d);
@@ -315,10 +315,10 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( msi_enable )
{
- spin_lock(&dom0->evtchn_lock);
+ spin_lock(&dom0->event_lock);
ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
MAP_PIRQ_TYPE_GSI, NULL);
- spin_unlock(&dom0->evtchn_lock);
+ spin_unlock(&dom0->event_lock);
}
if ( copy_to_guest(arg, &irq_op, 1) != 0 )
Index: xen-3.3.1-testing/xen/common/event_channel.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/event_channel.c
+++ xen-3.3.1-testing/xen/common/event_channel.c
@@ -133,7 +133,7 @@ static long evtchn_alloc_unbound(evtchn_
if ( rc )
return rc;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( (port = get_free_port(d)) < 0 )
ERROR_EXIT_DOM(port, d);
@@ -150,7 +150,7 @@ static long evtchn_alloc_unbound(evtchn_
alloc->port = port;
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
rcu_unlock_domain(d);
return rc;
@@ -174,14 +174,14 @@ static long evtchn_bind_interdomain(evtc
/* Avoid deadlock by first acquiring lock of domain with smaller id. */
if ( ld < rd )
{
- spin_lock(&ld->evtchn_lock);
- spin_lock(&rd->evtchn_lock);
+ spin_lock(&ld->event_lock);
+ spin_lock(&rd->event_lock);
}
else
{
if ( ld != rd )
- spin_lock(&rd->evtchn_lock);
- spin_lock(&ld->evtchn_lock);
+ spin_lock(&rd->event_lock);
+ spin_lock(&ld->event_lock);
}
if ( (lport = get_free_port(ld)) < 0 )
@@ -216,9 +216,9 @@ static long evtchn_bind_interdomain(evtc
bind->local_port = lport;
out:
- spin_unlock(&ld->evtchn_lock);
+ spin_unlock(&ld->event_lock);
if ( ld != rd )
- spin_unlock(&rd->evtchn_lock);
+ spin_unlock(&rd->event_lock);
rcu_unlock_domain(rd);
@@ -244,7 +244,7 @@ static long evtchn_bind_virq(evtchn_bind
((v = d->vcpu[vcpu]) == NULL) )
return -ENOENT;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( v->virq_to_evtchn[virq] != 0 )
ERROR_EXIT(-EEXIST);
@@ -260,7 +260,7 @@ static long evtchn_bind_virq(evtchn_bind
v->virq_to_evtchn[virq] = bind->port = port;
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return rc;
}
@@ -277,7 +277,7 @@ static long evtchn_bind_ipi(evtchn_bind_
(d->vcpu[vcpu] == NULL) )
return -ENOENT;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( (port = get_free_port(d)) < 0 )
ERROR_EXIT(port);
@@ -289,7 +289,7 @@ static long evtchn_bind_ipi(evtchn_bind_
bind->port = port;
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return rc;
}
@@ -308,7 +308,7 @@ static long evtchn_bind_pirq(evtchn_bind
if ( !irq_access_permitted(d, pirq) )
return -EPERM;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( d->pirq_to_evtchn[pirq] != 0 )
ERROR_EXIT(-EEXIST);
@@ -333,7 +333,7 @@ static long evtchn_bind_pirq(evtchn_bind
bind->port = port;
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return rc;
}
@@ -348,7 +348,7 @@ static long __evtchn_close(struct domain
long rc = 0;
again:
- spin_lock(&d1->evtchn_lock);
+ spin_lock(&d1->event_lock);
if ( !port_is_valid(d1, port1) )
{
@@ -404,12 +404,12 @@ static long __evtchn_close(struct domain
if ( d1 < d2 )
{
- spin_lock(&d2->evtchn_lock);
+ spin_lock(&d2->event_lock);
}
else if ( d1 != d2 )
{
- spin_unlock(&d1->evtchn_lock);
- spin_lock(&d2->evtchn_lock);
+ spin_unlock(&d1->event_lock);
+ spin_lock(&d2->event_lock);
goto again;
}
}
@@ -454,11 +454,11 @@ static long __evtchn_close(struct domain
if ( d2 != NULL )
{
if ( d1 != d2 )
- spin_unlock(&d2->evtchn_lock);
+ spin_unlock(&d2->event_lock);
put_domain(d2);
}
- spin_unlock(&d1->evtchn_lock);
+ spin_unlock(&d1->event_lock);
return rc;
}
@@ -476,11 +476,11 @@ int evtchn_send(struct domain *d, unsign
struct vcpu *rvcpu;
int rport, ret = 0;
- spin_lock(&ld->evtchn_lock);
+ spin_lock(&ld->event_lock);
if ( unlikely(!port_is_valid(ld, lport)) )
{
- spin_unlock(&ld->evtchn_lock);
+ spin_unlock(&ld->event_lock);
return -EINVAL;
}
@@ -489,7 +489,7 @@ int evtchn_send(struct domain *d, unsign
/* Guest cannot send via a Xen-attached event channel. */
if ( unlikely(lchn->consumer_is_xen) )
{
- spin_unlock(&ld->evtchn_lock);
+ spin_unlock(&ld->event_lock);
return -EINVAL;
}
@@ -527,7 +527,7 @@ int evtchn_send(struct domain *d, unsign
}
out:
- spin_unlock(&ld->evtchn_lock);
+ spin_unlock(&ld->event_lock);
return ret;
}
@@ -656,7 +656,7 @@ static long evtchn_status(evtchn_status_
if ( rc )
return rc;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( !port_is_valid(d, port) )
{
@@ -704,7 +704,7 @@ static long evtchn_status(evtchn_status_
status->vcpu = chn->notify_vcpu_id;
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
rcu_unlock_domain(d);
return rc;
@@ -720,7 +720,7 @@ long evtchn_bind_vcpu(unsigned int port,
if ( (vcpu_id >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu_id] == NULL) )
return -ENOENT;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( !port_is_valid(d, port) )
{
@@ -756,7 +756,7 @@ long evtchn_bind_vcpu(unsigned int port,
}
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return rc;
}
@@ -768,11 +768,11 @@ static long evtchn_unmask(evtchn_unmask_
int port = unmask->port;
struct vcpu *v;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( unlikely(!port_is_valid(d, port)) )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return -EINVAL;
}
@@ -790,7 +790,7 @@ static long evtchn_unmask(evtchn_unmask_
vcpu_mark_events_pending(v);
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return 0;
}
@@ -944,7 +944,7 @@ int alloc_unbound_xen_event_channel(
struct domain *d = local_vcpu->domain;
int port;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( (port = get_free_port(d)) < 0 )
goto out;
@@ -956,7 +956,7 @@ int alloc_unbound_xen_event_channel(
chn->u.unbound.remote_domid = remote_domid;
out:
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return port;
}
@@ -968,11 +968,11 @@ void free_xen_event_channel(
struct evtchn *chn;
struct domain *d = local_vcpu->domain;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
if ( unlikely(d->is_dying) )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return;
}
@@ -981,7 +981,7 @@ void free_xen_event_channel(
BUG_ON(!chn->consumer_is_xen);
chn->consumer_is_xen = 0;
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
(void)__evtchn_close(d, port);
}
@@ -993,7 +993,7 @@ void notify_via_xen_event_channel(int lp
struct domain *ld = current->domain, *rd;
int rport;
- spin_lock(&ld->evtchn_lock);
+ spin_lock(&ld->event_lock);
ASSERT(port_is_valid(ld, lport));
lchn = evtchn_from_port(ld, lport);
@@ -1007,13 +1007,13 @@ void notify_via_xen_event_channel(int lp
evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
}
- spin_unlock(&ld->evtchn_lock);
+ spin_unlock(&ld->event_lock);
}
int evtchn_init(struct domain *d)
{
- spin_lock_init(&d->evtchn_lock);
+ spin_lock_init(&d->event_lock);
if ( get_free_port(d) != 0 )
return -EINVAL;
evtchn_from_port(d, 0)->state = ECS_RESERVED;
@@ -1027,7 +1027,7 @@ void evtchn_destroy(struct domain *d)
/* After this barrier no new event-channel allocations can occur. */
BUG_ON(!d->is_dying);
- spin_barrier(&d->evtchn_lock);
+ spin_barrier(&d->event_lock);
/* Close all existing event channels. */
for ( i = 0; port_is_valid(d, i); i++ )
@@ -1037,14 +1037,14 @@ void evtchn_destroy(struct domain *d)
}
/* Free all event-channel buckets. */
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
for ( i = 0; i < NR_EVTCHN_BUCKETS; i++ )
{
xsm_free_security_evtchn(d->evtchn[i]);
xfree(d->evtchn[i]);
d->evtchn[i] = NULL;
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
static void domain_dump_evtchn_info(struct domain *d)
@@ -1053,7 +1053,7 @@ static void domain_dump_evtchn_info(stru
printk("Domain %d polling vCPUs: %08lx\n", d->domain_id, d->poll_mask[0]);
- if ( !spin_trylock(&d->evtchn_lock) )
+ if ( !spin_trylock(&d->event_lock) )
return;
printk("Event channel information for domain %d:\n",
@@ -1094,7 +1094,7 @@ static void domain_dump_evtchn_info(stru
printk(" x=%d\n", chn->consumer_is_xen);
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
static void dump_evtchn_info(unsigned char key)
Index: xen-3.3.1-testing/xen/drivers/passthrough/io.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/io.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/io.c
@@ -30,7 +30,7 @@ static void pt_irq_time_out(void *data)
struct dev_intx_gsi_link *digl;
uint32_t device, intx;
- spin_lock(&irq_map->dom->evtchn_lock);
+ spin_lock(&irq_map->dom->event_lock);
dpci = domain_get_irq_dpci(irq_map->dom);
ASSERT(dpci);
@@ -46,7 +46,7 @@ static void pt_irq_time_out(void *data)
clear_bit(machine_gsi, dpci->dirq_mask);
vector = domain_irq_to_vector(irq_map->dom, machine_gsi);
dpci->mirq[machine_gsi].pending = 0;
- spin_unlock(&irq_map->dom->evtchn_lock);
+ spin_unlock(&irq_map->dom->event_lock);
pirq_guest_eoi(irq_map->dom, machine_gsi);
}
@@ -62,7 +62,7 @@ int pt_irq_create_bind_vtd(
if ( pirq < 0 || pirq >= NR_PIRQS )
return -EINVAL;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci == NULL )
@@ -70,7 +70,7 @@ int pt_irq_create_bind_vtd(
hvm_irq_dpci = xmalloc(struct hvm_irq_dpci);
if ( hvm_irq_dpci == NULL )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return -ENOMEM;
}
memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
@@ -81,7 +81,7 @@ int pt_irq_create_bind_vtd(
if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
{
xfree(hvm_irq_dpci);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return -EINVAL;
}
@@ -101,7 +101,7 @@ int pt_irq_create_bind_vtd(
||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return -EBUSY;
}
}
@@ -117,7 +117,7 @@ int pt_irq_create_bind_vtd(
digl = xmalloc(struct dev_intx_gsi_link);
if ( !digl )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return -ENOMEM;
}
@@ -149,7 +149,7 @@ int pt_irq_create_bind_vtd(
"VT-d irq bind: m_irq = %x device = %x intx = %x\n",
machine_gsi, device, intx);
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return 0;
}
@@ -172,13 +172,13 @@ int pt_irq_destroy_bind_vtd(
"pt_irq_destroy_bind_vtd: machine_gsi=%d "
"guest_gsi=%d, device=%d, intx=%d.\n",
machine_gsi, guest_gsi, device, intx);
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci == NULL )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return -EINVAL;
}
@@ -213,7 +213,7 @@ int pt_irq_destroy_bind_vtd(
clear_bit(machine_gsi, hvm_irq_dpci->mapping);
}
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
gdprintk(XENLOG_INFO,
"XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n",
machine_gsi, device, intx);
@@ -254,7 +254,7 @@ void hvm_dpci_msi_eoi(struct domain *d,
if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
return;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
pirq = hvm_irq_dpci->msi_gvec_pirq[vector];
if ( ( pirq >= 0 ) && (pirq < NR_PIRQS) &&
@@ -265,7 +265,7 @@ void hvm_dpci_msi_eoi(struct domain *d,
desc = domain_spin_lock_irq_desc(d, pirq, NULL);
if (!desc)
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return;
}
@@ -275,7 +275,7 @@ void hvm_dpci_msi_eoi(struct domain *d,
pirq_guest_eoi(d, pirq);
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
@@ -293,14 +293,14 @@ void hvm_dpci_eoi(struct domain *d, unsi
return;
}
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
if((hvm_irq_dpci == NULL) ||
(guest_gsi >= NR_ISAIRQS &&
!hvm_irq_dpci->girq[guest_gsi].valid) )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return;
}
@@ -322,5 +322,5 @@ void hvm_dpci_eoi(struct domain *d, unsi
pirq_guest_eoi(d, machine_gsi);
}
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
Index: xen-3.3.1-testing/xen/drivers/passthrough/pci.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/pci.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/pci.c
@@ -165,7 +165,7 @@ static void pci_clean_dpci_irqs(struct d
if ( !is_hvm_domain(d) && !need_iommu(d) )
return;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
@@ -189,7 +189,7 @@ static void pci_clean_dpci_irqs(struct d
d->arch.hvm_domain.irq.dpci = NULL;
xfree(hvm_irq_dpci);
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
void pci_release_devices(struct domain *d)
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/x86/vtd.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/x86/vtd.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -93,13 +93,13 @@ void hvm_dpci_isairq_eoi(struct domain *
if ( !vtd_enabled)
return;
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
dpci = domain_get_irq_dpci(d);
if ( !dpci || !test_bit(isairq, dpci->isairq_map) )
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
return;
}
/* Multiple mirq may be mapped to one isa irq */
@@ -121,5 +121,5 @@ void hvm_dpci_isairq_eoi(struct domain *
}
}
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}
Index: xen-3.3.1-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/domain.h
+++ xen-3.3.1-testing/xen/include/asm-x86/domain.h
@@ -235,7 +235,7 @@ struct arch_domain
/* Shadow translated domain: P2M mapping */
pagetable_t phys_table;
- /* NB. protected by d->evtchn_lock and by irq_desc[vector].lock */
+ /* NB. protected by d->event_lock and by irq_desc[vector].lock */
int vector_pirq[NR_VECTORS];
int pirq_vector[NR_PIRQS];
Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/irq.h
+++ xen-3.3.1-testing/xen/include/asm-x86/hvm/irq.h
@@ -63,7 +63,7 @@ struct hvm_girq_dpci_mapping {
#define NR_ISAIRQS 16
#define NR_LINK 4
-/* Protected by domain's evtchn_lock */
+/* Protected by domain's event_lock */
struct hvm_irq_dpci {
/* Machine IRQ to guest device/intx mapping. */
DECLARE_BITMAP(mapping, NR_PIRQS);
Index: xen-3.3.1-testing/xen/include/xen/sched.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/sched.h
+++ xen-3.3.1-testing/xen/include/xen/sched.h
@@ -188,7 +188,7 @@ struct domain
/* Event channel information. */
struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
- spinlock_t evtchn_lock;
+ spinlock_t event_lock;
struct grant_table *grant_table;
Index: xen-3.3.1-testing/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
===================================================================
--- xen-3.3.1-testing.orig/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
+++ xen-3.3.1-testing/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
@@ -248,11 +248,11 @@ ste_init_state(struct acm_sized_buffer *
/* a) check for event channel conflicts */
for ( bucket = 0; bucket < NR_EVTCHN_BUCKETS; bucket++ )
{
- spin_lock(&d->evtchn_lock);
+ spin_lock(&d->event_lock);
ports = d->evtchn[bucket];
if ( ports == NULL)
{
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
break;
}
@@ -280,7 +280,7 @@ ste_init_state(struct acm_sized_buffer *
printkd("%s: Policy violation in event channel domain "
"%x -> domain %x.\n",
__func__, d->domain_id, rdomid);
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
acm_array_append_tuple(errors,
ACM_EVTCHN_SHARING_VIOLATION,
@@ -288,7 +288,7 @@ ste_init_state(struct acm_sized_buffer *
goto out;
}
}
- spin_unlock(&d->evtchn_lock);
+ spin_unlock(&d->event_lock);
}

View File

@ -1,26 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1223900120 -3600
# Node ID a26194601c8f2b223e380bbb7153df7027e8d7f5
# Parent e4bddd01cb3ebe0c4a72603c722889b22d3943fd
x86: propagate return value of alloc_l1_table()
A blatant mistake of mine resulted in the return value of
alloc_l1_table() to be ignored with the preemptable page table update
changes.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
diff -r e4bddd01cb3e -r a26194601c8f xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Oct 13 10:09:09 2008 +0100
+++ b/xen/arch/x86/mm.c Mon Oct 13 13:15:20 2008 +0100
@@ -1883,8 +1883,7 @@ static int alloc_page_type(struct page_i
switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
- alloc_l1_table(page);
- rc = 0;
+ rc = alloc_l1_table(page);
break;
case PGT_l2_page_table:
rc = alloc_l2_table(page, type, preemptible);

View File

@ -1,78 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1224147160 -3600
# Node ID 98ff908a91b7e12d7ddc609853fa1237d1714dec
# Parent 819ab49deef1da3042d2c111d6c99c3de535dae0
vmx: set DR7 via DOMCTL_setvcpucontext
This patch is needed for a guest domain debugger
to support hardware watchpoint.
Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -575,7 +575,10 @@ int arch_set_info_guest(
v->arch.guest_context.user_regs.eflags |= 2;
if ( is_hvm_vcpu(v) )
+ {
+ hvm_set_info_guest(v);
goto out;
+ }
/* Only CR0.TS is modifiable by guest or admin. */
v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS;
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1165,6 +1165,13 @@ static void vmx_set_uc_mode(struct vcpu
vpid_sync_all();
}
+static void vmx_set_info_guest(struct vcpu *v)
+{
+ vmx_vmcs_enter(v);
+ __vmwrite(GUEST_DR7, v->arch.guest_context.debugreg[7]);
+ vmx_vmcs_exit(v);
+}
+
static struct hvm_function_table vmx_function_table = {
.name = "VMX",
.domain_initialise = vmx_domain_initialise,
@@ -1195,7 +1202,8 @@ static struct hvm_function_table vmx_fun
.msr_read_intercept = vmx_msr_read_intercept,
.msr_write_intercept = vmx_msr_write_intercept,
.invlpg_intercept = vmx_invlpg_intercept,
- .set_uc_mode = vmx_set_uc_mode
+ .set_uc_mode = vmx_set_uc_mode,
+ .set_info_guest = vmx_set_info_guest
};
static unsigned long *vpid_bitmap;
Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-3.3.1-testing/xen/include/asm-x86/hvm/hvm.h
@@ -128,6 +128,7 @@ struct hvm_function_table {
int (*msr_write_intercept)(struct cpu_user_regs *regs);
void (*invlpg_intercept)(unsigned long vaddr);
void (*set_uc_mode)(struct vcpu *v);
+ void (*set_info_guest)(struct vcpu *v);
};
extern struct hvm_function_table hvm_funcs;
@@ -311,4 +312,10 @@ int hvm_virtual_to_linear_addr(
unsigned int addr_size,
unsigned long *linear_addr);
+static inline void hvm_set_info_guest(struct vcpu *v)
+{
+ if ( hvm_funcs.set_info_guest )
+ return hvm_funcs.set_info_guest(v);
+}
+
#endif /* __ASM_X86_HVM_HVM_H__ */

View File

@ -1,44 +0,0 @@
diff -r 22c89412fc8c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Oct 15 15:58:09 2008 +0100
+++ b/tools/python/xen/xend/XendDomainInfo.py Sun Oct 19 22:08:10 2008 -0600
@@ -1502,23 +1502,18 @@ class XendDomainInfo:
return self.info['VCPUs_max']
def setVCpuCount(self, vcpus):
- if vcpus <= 0:
- raise XendError('Invalid VCPUs')
+ def vcpus_valid(n):
+ if vcpus <= 0:
+ raise XendError('Zero or less VCPUs is invalid')
+ if self.domid >= 0 and vcpus > self.info['VCPUs_max']:
+ raise XendError('Cannot set vcpus greater than max vcpus on running domain')
+ vcpus_valid(vcpus)
self.info['vcpu_avail'] = (1 << vcpus) - 1
if self.domid >= 0:
self.storeVm('vcpu_avail', self.info['vcpu_avail'])
- # update dom differently depending on whether we are adjusting
- # vcpu number up or down, otherwise _vcpuDomDetails does not
- # disable the vcpus
- if self.info['VCPUs_max'] > vcpus:
- # decreasing
- self._writeDom(self._vcpuDomDetails())
- self.info['VCPUs_live'] = vcpus
- else:
- # same or increasing
- self.info['VCPUs_live'] = vcpus
- self._writeDom(self._vcpuDomDetails())
+ self._writeDom(self._vcpuDomDetails())
+ self.info['VCPUs_live'] = vcpus
else:
if self.info['VCPUs_max'] > vcpus:
# decreasing
@@ -1528,7 +1523,7 @@ class XendDomainInfo:
for c in range(self.info['VCPUs_max'], vcpus):
self.info['cpus'].append(list())
self.info['VCPUs_max'] = vcpus
- xen.xend.XendDomain.instance().managed_config_save(self)
+ xen.xend.XendDomain.instance().managed_config_save(self)
log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
vcpus)

View File

@ -1,97 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1224512379 -3600
# Node ID 2a25fd94c6f207d5b9066a1d765697a5a680fc42
# Parent bf84c03c38eebc527786e96af4178f114a5bea41
VT-d: correct allocation failure checks
Checking the return value of map_domain_page() (and hence
map_vtd_domain_page()) against NULL is pointless, checking the return
value of alloc_domheap_page() (and thus alloc_pgtable_maddr()) is
mandatory, however.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/intremap.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/intremap.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/intremap.c
@@ -458,7 +458,7 @@ int intremap_setup(struct iommu *iommu)
{
dprintk(XENLOG_WARNING VTDPREFIX,
"Cannot allocate memory for ir_ctrl->iremap_maddr\n");
- return -ENODEV;
+ return -ENOMEM;
}
ir_ctrl->iremap_index = -1;
}
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -220,10 +220,10 @@ static u64 addr_to_dma_page_maddr(struct
if ( !alloc )
break;
maddr = alloc_pgtable_maddr();
+ if ( !maddr )
+ break;
dma_set_pte_addr(*pte, maddr);
vaddr = map_vtd_domain_page(maddr);
- if ( !vaddr )
- break;
/*
* high level table always sets r/w, last level
@@ -236,8 +236,6 @@ static u64 addr_to_dma_page_maddr(struct
else
{
vaddr = map_vtd_domain_page(pte->val);
- if ( !vaddr )
- break;
}
if ( level == 2 )
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/qinval.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/qinval.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/qinval.c
@@ -430,7 +430,11 @@ int qinval_setup(struct iommu *iommu)
{
qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
if ( qi_ctrl->qinval_maddr == 0 )
- panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+ return -ENOMEM;
+ }
flush->context = flush_context_qi;
flush->iotlb = flush_iotlb_qi;
}
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/x86/vtd.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/x86/vtd.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -41,17 +41,19 @@ u64 alloc_pgtable_maddr(void)
{
struct page_info *pg;
u64 *vaddr;
+ unsigned long mfn;
pg = alloc_domheap_page(NULL, 0);
- vaddr = map_domain_page(page_to_mfn(pg));
- if ( !vaddr )
+ if ( !pg )
return 0;
+ mfn = page_to_mfn(pg);
+ vaddr = map_domain_page(mfn);
memset(vaddr, 0, PAGE_SIZE);
iommu_flush_cache_page(vaddr);
unmap_domain_page(vaddr);
- return page_to_maddr(pg);
+ return (u64)mfn << PAGE_SHIFT_4K;
}
void free_pgtable_maddr(u64 maddr)

View File

@ -1,50 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1224519405 -3600
# Node ID 54d74fc0037ce688e79759ca632d3918f7aaa399
# Parent f4dab783b58b41f2c67a66d6d095887faec3c296
spinlock: Modify recursive spinlock definitions to support up to 4095 CPUs.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/include/asm-x86/spinlock.h
+++ b/xen/include/asm-x86/spinlock.h
@@ -8,11 +8,11 @@
typedef struct {
volatile s16 lock;
- s8 recurse_cpu;
- u8 recurse_cnt;
+ u16 recurse_cpu:12;
+ u16 recurse_cnt:4;
} spinlock_t;
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 1, -1, 0 }
+#define SPIN_LOCK_UNLOCKED { 1, 0xfffu, 0 }
#define spin_lock_init(x) do { *(x) = (spinlock_t) SPIN_LOCK_UNLOCKED; } while(0)
#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0)
@@ -59,11 +59,15 @@ static inline int _raw_spin_trylock(spin
#define _raw_spin_lock_recursive(_lock) \
do { \
int cpu = smp_processor_id(); \
+ /* Don't allow overflow of recurse_cpu field. */ \
+ BUILD_BUG_ON(NR_CPUS > 0xfffu); \
if ( likely((_lock)->recurse_cpu != cpu) ) \
{ \
spin_lock(_lock); \
(_lock)->recurse_cpu = cpu; \
} \
+ /* We support only fairly shallow recursion, else the counter overflows. */ \
+ ASSERT((_lock)->recurse_cnt < 0xfu); \
(_lock)->recurse_cnt++; \
} while ( 0 )
@@ -71,7 +75,7 @@ static inline int _raw_spin_trylock(spin
do { \
if ( likely(--(_lock)->recurse_cnt == 0) ) \
{ \
- (_lock)->recurse_cpu = -1; \
+ (_lock)->recurse_cpu = 0xfffu; \
spin_unlock(_lock); \
} \
} while ( 0 )

View File

@ -1,75 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225113763 0
# Node ID 11c86c51a697dab2e4a49efe3dda139ea206f423
# Parent 101e50cffc7825065f4dd39610728a2ba3ea68b4
x86: fix domain cleanup
The preemptable page type handling changes modified free_page_type()
behavior without adjusting the call site in relinquish_memory(): Any
type reference left pending when leaving hypercall handlers is
associated with a page reference, and when successful free_page_type()
decrements the type refcount - hence relinquish_memory() must now also
drop the page reference.
Also, the recursion avoidance during domain shutdown somehow (probably
by me when I merged the patch up to a newer snapshot) got screwed up:
The avoidance logic in mm.c should short circuit levels below the top
one currently being processed, rather than the top one itself.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1687,6 +1687,7 @@ static int relinquish_memory(
{
if ( free_page_type(page, x, 0) != 0 )
BUG();
+ put_page(page);
break;
}
}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1343,7 +1343,7 @@ static void free_l1_table(struct page_in
static int free_l2_table(struct page_info *page, int preemptible)
{
-#ifdef CONFIG_COMPAT
+#if defined(CONFIG_COMPAT) || defined(DOMAIN_DESTRUCT_AVOID_RECURSION)
struct domain *d = page_get_owner(page);
#endif
unsigned long pfn = page_to_mfn(page);
@@ -1351,6 +1351,11 @@ static int free_l2_table(struct page_inf
unsigned int i = page->nr_validated_ptes - 1;
int err = 0;
+#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
+ if ( d->arch.relmem == RELMEM_l3 )
+ return 0;
+#endif
+
pl2e = map_domain_page(pfn);
ASSERT(page->nr_validated_ptes);
@@ -1381,7 +1386,7 @@ static int free_l3_table(struct page_inf
int rc = 0;
#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l3 )
+ if ( d->arch.relmem == RELMEM_l4 )
return 0;
#endif
@@ -1424,11 +1429,6 @@ static int free_l4_table(struct page_inf
unsigned int i = page->nr_validated_ptes - !page->partial_pte;
int rc = 0;
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l4 )
- return 0;
-#endif
-
do {
if ( is_guest_l4_slot(d, i) )
rc = put_page_from_l4e(pl4e[i], pfn, preemptible);

View File

@ -1,19 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225114010 0
# Node ID 604ffa3cdcc48bbfcfe5e4ccd0af735ddc49d839
# Parent 15aed96c7b5cd5a435754a57db13cd72b386717a
x86: First fixmap entry (0) is invalid.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/include/asm-x86/fixmap.h
+++ b/xen/include/asm-x86/fixmap.h
@@ -29,6 +29,7 @@
* from the end of virtual memory backwards.
*/
enum fixed_addresses {
+ FIX_RESERVED, /* Index 0 is reserved since fix_to_virt(0) > FIXADDR_TOP. */
#ifdef __i386__
FIX_PAE_HIGHMEM_0,
FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,

View File

@ -1,58 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225114053 0
# Node ID 9bbb54fd9181644d2bdd3c7f93c2cba1dac1b719
# Parent 604ffa3cdcc48bbfcfe5e4ccd0af735ddc49d839
Constify arguments to unmap_domain_page() etc.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/x86_32/domain_page.c
+++ b/xen/arch/x86/x86_32/domain_page.c
@@ -114,7 +114,7 @@ void *map_domain_page(unsigned long mfn)
return (void *)va;
}
-void unmap_domain_page(void *va)
+void unmap_domain_page(const void *va)
{
unsigned int idx;
struct vcpu *v;
@@ -241,7 +241,7 @@ void *map_domain_page_global(unsigned lo
return (void *)va;
}
-void unmap_domain_page_global(void *va)
+void unmap_domain_page_global(const void *va)
{
unsigned long __va = (unsigned long)va;
l2_pgentry_t *pl2e;
--- a/xen/include/xen/domain_page.h
+++ b/xen/include/xen/domain_page.h
@@ -24,7 +24,7 @@ void *map_domain_page(unsigned long mfn)
* Pass a VA within a page previously mapped in the context of the
* currently-executing VCPU via a call to map_domain_page().
*/
-void unmap_domain_page(void *va);
+void unmap_domain_page(const void *va);
/*
* Similar to the above calls, except the mapping is accessible in all
@@ -32,7 +32,7 @@ void unmap_domain_page(void *va);
* mappings can also be unmapped from any context.
*/
void *map_domain_page_global(unsigned long mfn);
-void unmap_domain_page_global(void *va);
+void unmap_domain_page_global(const void *va);
#define DMCACHE_ENTRY_VALID 1U
#define DMCACHE_ENTRY_HELD 2U
@@ -75,7 +75,7 @@ map_domain_page_with_cache(unsigned long
}
static inline void
-unmap_domain_page_with_cache(void *va, struct domain_mmap_cache *cache)
+unmap_domain_page_with_cache(const void *va, struct domain_mmap_cache *cache)
{
ASSERT(cache != NULL);
cache->flags &= ~DMCACHE_ENTRY_HELD;

View File

@ -1,206 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225114175 0
# Node ID 4413d53a8320809e93142ed599a81e1bfe5ae900
# Parent 9bbb54fd9181644d2bdd3c7f93c2cba1dac1b719
x86: highmem handling assistance hypercalls
While looking at the origin of very frequently executed hypercalls I
realized that the high page accessor functions in Linux would be good
candidates to handle in the hypervisor - clearing or copying to/from
a high page is a pretty frequent operation (provided there's enough
memory in the domain). While prior to the first submission I only
measured kernel builds (where the results are not hinting at a
meaningful improvement), I now found time to do a more specific
analysis: page clearing is being improved by about 20%, page copying
doesn't seem to significantly benefit (though that may be an effect of
the simplistic copy_page() implementation Xen currently uses) -
nevertheless I would think that if one function is supported by the
hypervisor, then the other should also be.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2431,6 +2431,29 @@ static inline cpumask_t vcpumask_to_pcpu
return pmask;
}
+#ifdef __i386__
+static inline void *fixmap_domain_page(unsigned long mfn)
+{
+ unsigned int cpu = smp_processor_id();
+ void *ptr = (void *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
+
+ l1e_write(fix_pae_highmem_pl1e - cpu,
+ l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+ flush_tlb_one_local(ptr);
+ return ptr;
+}
+static inline void fixunmap_domain_page(const void *ptr)
+{
+ unsigned int cpu = virt_to_fix((unsigned long)ptr) - FIX_PAE_HIGHMEM_0;
+
+ l1e_write(fix_pae_highmem_pl1e - cpu, l1e_empty());
+ this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
+}
+#else
+#define fixmap_domain_page(mfn) mfn_to_virt(mfn)
+#define fixunmap_domain_page(ptr) ((void)(ptr))
+#endif
+
int do_mmuext_op(
XEN_GUEST_HANDLE(mmuext_op_t) uops,
unsigned int count,
@@ -2700,6 +2723,66 @@ int do_mmuext_op(
break;
}
+ case MMUEXT_CLEAR_PAGE:
+ {
+ unsigned char *ptr;
+
+ okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
+ FOREIGNDOM, 0);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while clearing mfn %lx", mfn);
+ break;
+ }
+
+ /* A page is dirtied when it's being cleared. */
+ paging_mark_dirty(d, mfn);
+
+ ptr = fixmap_domain_page(mfn);
+ clear_page(ptr);
+ fixunmap_domain_page(ptr);
+
+ put_page_and_type(page);
+ break;
+ }
+
+ case MMUEXT_COPY_PAGE:
+ {
+ const unsigned char *src;
+ unsigned char *dst;
+ unsigned long src_mfn;
+
+ src_mfn = gmfn_to_mfn(FOREIGNDOM, op.arg2.src_mfn);
+ okay = get_page_from_pagenr(src_mfn, FOREIGNDOM);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while copying from mfn %lx", src_mfn);
+ break;
+ }
+
+ okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
+ FOREIGNDOM, 0);
+ if ( unlikely(!okay) )
+ {
+ put_page(mfn_to_page(src_mfn));
+ MEM_LOG("Error while copying to mfn %lx", mfn);
+ break;
+ }
+
+ /* A page is dirtied when it's being copied to. */
+ paging_mark_dirty(d, mfn);
+
+ src = map_domain_page(src_mfn);
+ dst = fixmap_domain_page(mfn);
+ copy_page(dst, src);
+ fixunmap_domain_page(dst);
+ unmap_domain_page(src);
+
+ put_page_and_type(page);
+ put_page(mfn_to_page(src_mfn));
+ break;
+ }
+
default:
MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
rc = -ENOSYS;
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -217,6 +217,8 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
case MMUEXT_PIN_L4_TABLE:
case MMUEXT_UNPIN_TABLE:
case MMUEXT_NEW_BASEPTR:
+ case MMUEXT_CLEAR_PAGE:
+ case MMUEXT_COPY_PAGE:
arg1 = XLAT_mmuext_op_arg1_mfn;
break;
default:
@@ -244,6 +246,9 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
case MMUEXT_INVLPG_MULTI:
arg2 = XLAT_mmuext_op_arg2_vcpumask;
break;
+ case MMUEXT_COPY_PAGE:
+ arg2 = XLAT_mmuext_op_arg2_src_mfn;
+ break;
default:
arg2 = -1;
break;
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -222,6 +222,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
#ifdef CONFIG_X86
if ( !is_hvm_vcpu(current) )
fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
+ (1U << XENFEAT_highmem_assist) |
(1U << XENFEAT_gnttab_map_avail_bits);
#endif
break;
--- a/xen/include/public/features.h
+++ b/xen/include/public/features.h
@@ -59,6 +59,9 @@
/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
#define XENFEAT_mmu_pt_update_preserve_ad 5
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist 6
+
/*
* If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
* available pte bits.
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -231,6 +231,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
* cmd: MMUEXT_SET_LDT
* linear_addr: Linear address of LDT base (NB. must be page-aligned).
* nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
*/
#define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1
@@ -247,12 +254,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#define MMUEXT_FLUSH_CACHE 12
#define MMUEXT_SET_LDT 13
#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
#ifndef __ASSEMBLY__
struct mmuext_op {
unsigned int cmd;
union {
- /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+ * CLEAR_PAGE, COPY_PAGE */
xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr;
@@ -266,6 +276,8 @@ struct mmuext_op {
#else
void *vcpumask;
#endif
+ /* COPY_PAGE */
+ xen_pfn_t src_mfn;
} arg2;
};
typedef struct mmuext_op mmuext_op_t;

View File

@ -1,57 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225193120 0
# Node ID 19549b9766fdd68380ded8efd975c41269ab2801
# Parent 2c20d026bb55722247c0d9ab81c125118a10346f
x86: Fix circular page reference destruction in relinquish_memory().
Tested by Jan Beulich and fixes a memory leak, but there is more to be
done here.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1687,7 +1687,6 @@ static int relinquish_memory(
{
if ( free_page_type(page, x, 0) != 0 )
BUG();
- put_page(page);
break;
}
}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1973,6 +1973,7 @@ int free_page_type(struct page_info *pag
page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
page->partial_pte = 0;
}
+
switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
@@ -1998,6 +1999,15 @@ int free_page_type(struct page_info *pag
BUG();
}
+ return rc;
+}
+
+
+static int __put_final_page_type(
+ struct page_info *page, unsigned long type, int preemptible)
+{
+ int rc = free_page_type(page, type, preemptible);
+
/* No need for atomic update of type_info here: noone else updates it. */
if ( rc == 0 )
{
@@ -2062,7 +2072,7 @@ static int __put_page_type(struct page_i
x, nx)) != x) )
continue;
/* We cleared the 'valid bit' so we do the clean up. */
- return free_page_type(page, x, preemptible);
+ return __put_final_page_type(page, x, preemptible);
}
/*

View File

@ -1,22 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225285777 0
# Node ID ae100f264f6ad4e828de1ca2d228cccf6ed2bbfd
# Parent 183d2d7adc2f02db63aedaf199e3b006d2e4a053
x86: Fix relinquish_memory() for PGT_partial pages.
Original patch by Jan Beulich.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1687,6 +1687,8 @@ static int relinquish_memory(
{
if ( free_page_type(page, x, 0) != 0 )
BUG();
+ if ( x & PGT_partial )
+ page->u.inuse.type_info--;
break;
}
}

View File

@ -1,147 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225377468 0
# Node ID 9e5cf6778a6d1057900c3709f544ac176ddfab67
# Parent 112e81ae5824e213b181a65f944b729ba270d658
x86: eliminate domain cleanup hack in favor of using the preemptable
flavors of the respective functions.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1639,32 +1639,23 @@ static int relinquish_memory(
}
if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
- put_page_and_type(page);
+ ret = put_page_and_type_preemptible(page, 1);
+ switch ( ret )
+ {
+ case 0:
+ break;
+ case -EAGAIN:
+ case -EINTR:
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ put_page(page);
+ goto out;
+ default:
+ BUG();
+ }
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- /*
- * Forcibly drop reference counts of page tables above top most (which
- * were skipped to prevent long latencies due to deep recursion - see
- * the special treatment in free_lX_table()).
- */
- y = page->u.inuse.type_info;
- if ( (type < PGT_root_page_table) &&
- unlikely(((y + PGT_type_mask) &
- (PGT_type_mask|PGT_validated)) == type) )
- {
- BUG_ON((y & PGT_count_mask) >=
- (page->count_info & PGC_count_mask));
- while ( y & PGT_count_mask )
- {
- put_page_and_type(page);
- y = page->u.inuse.type_info;
- }
- }
-#endif
-
/*
* Forcibly invalidate top-most, still valid page tables at this point
* to break circular 'linear page table' references as well as clean up
@@ -1685,8 +1676,23 @@ static int relinquish_memory(
x & ~(PGT_validated|PGT_partial));
if ( likely(y == x) )
{
- if ( free_page_type(page, x, 0) != 0 )
+ /* No need for atomic update of type_info here: noone else updates it. */
+ switch ( ret = free_page_type(page, x, 1) )
+ {
+ case 0:
+ break;
+ case -EINTR:
+ page->u.inuse.type_info |= PGT_validated;
+ put_page(page);
+ ret = -EAGAIN;
+ goto out;
+ case -EAGAIN:
+ page->u.inuse.type_info |= PGT_partial;
+ put_page(page);
+ goto out;
+ default:
BUG();
+ }
if ( x & PGT_partial )
page->u.inuse.type_info--;
break;
@@ -1833,11 +1839,6 @@ int domain_relinquish_resources(struct d
/* fallthrough */
case RELMEM_done:
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- ret = relinquish_memory(d, &d->page_list, PGT_l1_page_table);
- if ( ret )
- return ret;
-#endif
break;
default:
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1343,7 +1343,7 @@ static void free_l1_table(struct page_in
static int free_l2_table(struct page_info *page, int preemptible)
{
-#if defined(CONFIG_COMPAT) || defined(DOMAIN_DESTRUCT_AVOID_RECURSION)
+#ifdef CONFIG_COMPAT
struct domain *d = page_get_owner(page);
#endif
unsigned long pfn = page_to_mfn(page);
@@ -1351,11 +1351,6 @@ static int free_l2_table(struct page_inf
unsigned int i = page->nr_validated_ptes - 1;
int err = 0;
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l3 )
- return 0;
-#endif
-
pl2e = map_domain_page(pfn);
ASSERT(page->nr_validated_ptes);
@@ -1385,11 +1380,6 @@ static int free_l3_table(struct page_inf
unsigned int i = page->nr_validated_ptes - !page->partial_pte;
int rc = 0;
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l4 )
- return 0;
-#endif
-
pl3e = map_domain_page(pfn);
do {
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -41,14 +41,6 @@
#define CONFIG_HOTPLUG 1
#define CONFIG_HOTPLUG_CPU 1
-/*
- * Avoid deep recursion when tearing down pagetables during domain destruction,
- * causing dom0 to become unresponsive and Xen to miss time-critical softirq
- * deadlines. This will ultimately be replaced by built-in preemptibility of
- * get_page_type().
- */
-#define DOMAIN_DESTRUCT_AVOID_RECURSION 1
-
#define HZ 100
#define OPT_CONSOLE_STR "vga"

View File

@ -1,510 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225378404 0
# Node ID ed30f4efb728980ba84c34fc7fdc7be5f5a4a78e
# Parent 9e5cf6778a6d1057900c3709f544ac176ddfab67
x86: fix preemptable page type handling
- retain a page reference when PGT_partial is set on a page (and drop
it when clearing that flag)
- don't drop a page reference never acquired when freeing the page
type
of a page where the allocation of the type got preempted (and never
completed)
- don't acquire a page reference when allocating the page type of a
page where freeing the type got preempted (and never completed, and
hence didn't drop the respective reference)
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1683,18 +1683,24 @@ static int relinquish_memory(
break;
case -EINTR:
page->u.inuse.type_info |= PGT_validated;
+ if ( x & PGT_partial )
+ put_page(page);
put_page(page);
ret = -EAGAIN;
goto out;
case -EAGAIN:
page->u.inuse.type_info |= PGT_partial;
- put_page(page);
+ if ( x & PGT_partial )
+ put_page(page);
goto out;
default:
BUG();
}
if ( x & PGT_partial )
+ {
page->u.inuse.type_info--;
+ put_page(page);
+ }
break;
}
}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -566,19 +566,21 @@ static int get_page_from_pagenr(unsigned
static int get_page_and_type_from_pagenr(unsigned long page_nr,
unsigned long type,
struct domain *d,
+ int partial,
int preemptible)
{
struct page_info *page = mfn_to_page(page_nr);
int rc;
- if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
+ if ( likely(partial >= 0) &&
+ unlikely(!get_page_from_pagenr(page_nr, d)) )
return -EINVAL;
rc = (preemptible ?
get_page_type_preemptible(page, type) :
(get_page_type(page, type) ? 0 : -EINVAL));
- if ( rc )
+ if ( unlikely(rc) && partial >= 0 )
put_page(page);
return rc;
@@ -761,7 +763,7 @@ get_page_from_l2e(
}
rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+ l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
rc = 0;
@@ -772,7 +774,7 @@ get_page_from_l2e(
define_get_linear_pagetable(l3);
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible)
{
int rc;
@@ -786,7 +788,7 @@ get_page_from_l3e(
}
rc = get_page_and_type_from_pagenr(
- l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
rc = 0;
@@ -797,7 +799,7 @@ get_page_from_l3e(
define_get_linear_pagetable(l4);
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible)
{
int rc;
@@ -811,7 +813,7 @@ get_page_from_l4e(
}
rc = get_page_and_type_from_pagenr(
- l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
rc = 0;
@@ -961,23 +963,32 @@ static int put_page_from_l2e(l2_pgentry_
return 1;
}
+static int __put_page_type(struct page_info *, int preemptible);
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- int preemptible)
+ int partial, int preemptible)
{
if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
(l3e_get_pfn(l3e) != pfn) )
+ {
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l3e_get_page(l3e), preemptible);
return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+ }
return 1;
}
#if CONFIG_PAGING_LEVELS >= 4
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- int preemptible)
+ int partial, int preemptible)
{
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
(l4e_get_pfn(l4e) != pfn) )
+ {
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l4e_get_page(l4e), preemptible);
return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+ }
return 1;
}
#endif
@@ -1184,7 +1195,7 @@ static int alloc_l3_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
unsigned int i;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
#if CONFIG_PAGING_LEVELS == 3
/*
@@ -1213,7 +1224,8 @@ static int alloc_l3_table(struct page_in
if ( is_pv_32on64_domain(d) )
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
- for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
+ for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+ i++, partial = 0 )
{
if ( is_pv_32bit_domain(d) && (i == 3) )
{
@@ -1224,16 +1236,17 @@ static int alloc_l3_table(struct page_in
rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
PGT_l2_page_table |
PGT_pae_xen_l2,
- d, preemptible);
+ d, partial, preemptible);
}
else if ( !is_guest_l3_slot(i) ||
- (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
+ (rc = get_page_from_l3e(pl3e[i], pfn, d,
+ partial, preemptible)) > 0 )
continue;
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: 1;
}
else if ( rc == -EINTR && i )
{
@@ -1257,7 +1270,7 @@ static int alloc_l3_table(struct page_in
if ( !is_guest_l3_slot(i) )
continue;
unadjust_guest_l3e(pl3e[i], d);
- put_page_from_l3e(pl3e[i], pfn, 0);
+ put_page_from_l3e(pl3e[i], pfn, 0, 0);
}
}
@@ -1272,18 +1285,20 @@ static int alloc_l4_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
unsigned int i;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
- for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
+ for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
+ i++, partial = 0 )
{
if ( !is_guest_l4_slot(d, i) ||
- (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
+ (rc = get_page_from_l4e(pl4e[i], pfn, d,
+ partial, preemptible)) > 0 )
continue;
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: 1;
}
else if ( rc == -EINTR )
{
@@ -1299,7 +1314,7 @@ static int alloc_l4_table(struct page_in
MEM_LOG("Failure in alloc_l4_table: entry %d", i);
while ( i-- > 0 )
if ( is_guest_l4_slot(d, i) )
- put_page_from_l4e(pl4e[i], pfn, 0);
+ put_page_from_l4e(pl4e[i], pfn, 0, 0);
}
if ( rc < 0 )
return rc;
@@ -1377,19 +1392,20 @@ static int free_l3_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
- unsigned int i = page->nr_validated_ptes - !page->partial_pte;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
+ unsigned int i = page->nr_validated_ptes - !partial;
pl3e = map_domain_page(pfn);
do {
if ( is_guest_l3_slot(i) )
{
- rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
+ rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
+ if ( rc < 0 )
+ break;
+ partial = 0;
if ( rc > 0 )
continue;
- if ( rc )
- break;
unadjust_guest_l3e(pl3e[i], d);
}
} while ( i-- );
@@ -1399,7 +1415,7 @@ static int free_l3_table(struct page_inf
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: -1;
}
else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
{
@@ -1416,18 +1432,21 @@ static int free_l4_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
- unsigned int i = page->nr_validated_ptes - !page->partial_pte;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
+ unsigned int i = page->nr_validated_ptes - !partial;
do {
if ( is_guest_l4_slot(d, i) )
- rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
- } while ( rc >= 0 && i-- );
+ rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
+ if ( rc < 0 )
+ break;
+ partial = 0;
+ } while ( i-- );
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: -1;
}
else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
{
@@ -1703,7 +1722,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
return rc ? 0 : -EFAULT;
}
- rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
+ rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
return page_unlock(l3pg), rc;
rc = 0;
@@ -1732,7 +1751,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
}
page_unlock(l3pg);
- put_page_from_l3e(ol3e, pfn, 0);
+ put_page_from_l3e(ol3e, pfn, 0, 0);
return rc;
}
@@ -1781,7 +1800,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
return rc ? 0 : -EFAULT;
}
- rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
+ rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
return page_unlock(l4pg), rc;
rc = 0;
@@ -1802,7 +1821,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
}
page_unlock(l4pg);
- put_page_from_l4e(ol4e, pfn, 0);
+ put_page_from_l4e(ol4e, pfn, 0, 0);
return rc;
}
@@ -1866,6 +1885,10 @@ static int alloc_page_type(struct page_i
struct domain *owner = page_get_owner(page);
int rc;
+ /* Obtain an extra reference to retain if we set PGT_partial. */
+ if ( preemptible && !get_page(page, owner) )
+ return -EINVAL;
+
/* A page table is dirtied when its type count becomes non-zero. */
if ( likely(owner != NULL) )
paging_mark_dirty(owner, page_to_mfn(page));
@@ -1900,8 +1923,13 @@ static int alloc_page_type(struct page_i
if ( rc == -EAGAIN )
{
page->u.inuse.type_info |= PGT_partial;
+ return -EAGAIN;
}
- else if ( rc == -EINTR )
+
+ if ( preemptible )
+ put_page(page);
+
+ if ( rc == -EINTR )
{
ASSERT((page->u.inuse.type_info &
(PGT_count_mask|PGT_validated|PGT_partial)) == 1);
@@ -2029,8 +2057,13 @@ static int __put_final_page_type(
BUG_ON(rc != -EAGAIN);
wmb();
page->u.inuse.type_info |= PGT_partial;
+ /* Must skip put_page() below. */
+ preemptible = 0;
}
+ if ( preemptible )
+ put_page(page);
+
return rc;
}
@@ -2040,6 +2073,10 @@ static int __put_page_type(struct page_i
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ /* Obtain an extra reference to retain if we set PGT_partial. */
+ if ( preemptible && !get_page(page, page_get_owner(page)) )
+ return -EINVAL;
+
for ( ; ; )
{
x = y;
@@ -2061,6 +2098,8 @@ static int __put_page_type(struct page_i
if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
x, nx)) != x) )
continue;
+ if ( x & PGT_partial )
+ put_page(page);
/* We cleared the 'valid bit' so we do the clean up. */
return __put_final_page_type(page, x, preemptible);
}
@@ -2081,9 +2120,16 @@ static int __put_page_type(struct page_i
break;
if ( preemptible && hypercall_preempt_check() )
+ {
+ if ( preemptible )
+ put_page(page);
return -EINTR;
+ }
}
+ if ( preemptible )
+ put_page(page);
+
return 0;
}
@@ -2187,7 +2233,11 @@ static int __get_page_type(struct page_i
}
if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
+ {
+ if ( (x & PGT_partial) && !(nx & PGT_partial) )
+ put_page(page);
break;
+ }
if ( preemptible && hypercall_preempt_check() )
return -EINTR;
@@ -2296,7 +2346,7 @@ int new_guest_cr3(unsigned long mfn)
#endif
okay = paging_mode_refcounts(d)
? get_page_from_pagenr(mfn, d)
- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
+ : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new baseptr %lx", mfn);
@@ -2540,7 +2590,7 @@ int do_mmuext_op(
if ( paging_mode_refcounts(FOREIGNDOM) )
break;
- rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
+ rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1);
okay = !rc;
if ( unlikely(!okay) )
{
@@ -2621,7 +2671,7 @@ int do_mmuext_op(
okay = get_page_from_pagenr(mfn, d);
else
okay = !get_page_and_type_from_pagenr(
- mfn, PGT_root_page_table, d, 0);
+ mfn, PGT_root_page_table, d, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new mfn %lx", mfn);
@@ -2728,7 +2778,7 @@ int do_mmuext_op(
unsigned char *ptr;
okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
- FOREIGNDOM, 0);
+ FOREIGNDOM, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while clearing mfn %lx", mfn);
@@ -2761,7 +2811,7 @@ int do_mmuext_op(
}
okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
- FOREIGNDOM, 0);
+ FOREIGNDOM, 0, 0);
if ( unlikely(!okay) )
{
put_page(mfn_to_page(src_mfn));
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -61,12 +61,36 @@ struct page_info
/*
* When PGT_partial is true then this field is valid and indicates
* that PTEs in the range [0, @nr_validated_ptes) have been validated.
- * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
- * partially validated.
+ * An extra page reference must be acquired (or not dropped) whenever
+ * PGT_partial gets set, and it must be dropped when the flag gets
+ * cleared. This is so that a get() leaving a page in partially
+ * validated state (where the caller would drop the reference acquired
+ * due to the getting of the type [apparently] failing [-EAGAIN])
+ * would not accidentally result in a page left with zero general
+ * reference count, but non-zero type reference count (possible when
+ * the partial get() is followed immediately by domain destruction).
+ * Likewise, the ownership of the single type reference for partially
+ * (in-)validated pages is tied to this flag, i.e. the instance
+ * setting the flag must not drop that reference, whereas the instance
+ * clearing it will have to.
+ *
+ * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
+ * been partially validated. This implies that the general reference
+ * to the page (acquired from get_page_from_lNe()) would be dropped
+ * (again due to the apparent failure) and hence must be re-acquired
+ * when resuming the validation, but must not be dropped when picking
+ * up the page for invalidation.
+ *
+ * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
+ * been partially invalidated. This is basically the opposite case of
+ * above, i.e. the general reference to the page was not dropped in
+ * put_page_from_lNe() (due to the apparent failure), and hence it
+ * must be dropped when the put operation is resumed (and completes),
+ * but it must not be acquired if picking up the page for validation.
*/
struct {
u16 nr_validated_ptes;
- bool_t partial_pte;
+ s8 partial_pte;
};
/*

View File

@ -1,99 +0,0 @@
Index: xen-3.3.1-testing/tools/python/xen/xend/server/iopif.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/iopif.py
+++ xen-3.3.1-testing/tools/python/xen/xend/server/iopif.py
@@ -45,9 +45,22 @@ def parse_ioport(val):
class IOPortsController(DevController):
+ valid_cfg = ['to', 'from', 'uuid']
+
def __init__(self, vm):
DevController.__init__(self, vm)
+ def getDeviceConfiguration(self, devid, transaction = None):
+ result = DevController.getDeviceConfiguration(self, devid, transaction)
+ if transaction is None:
+ devinfo = self.readBackend(devid, *self.valid_cfg)
+ else:
+ devinfo = self.readBackendTxn(transaction, devid, *self.valid_cfg)
+ config = dict(zip(self.valid_cfg, devinfo))
+ config = dict([(key, val) for key, val in config.items()
+ if val != None])
+ return config
+
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
@@ -81,4 +94,9 @@ class IOPortsController(DevController):
'ioports: Failed to configure legacy i/o range: %s - %s' %
(io_from, io_to))
- return (None, {}, {})
+ back = dict([(k, config[k]) for k in self.valid_cfg if k in config])
+ return (self.allocateDeviceID(), back, {})
+
+ def waitForDevice(self, devid):
+ # don't wait for hotplug
+ return
Index: xen-3.3.1-testing/tools/python/xen/xend/server/irqif.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/irqif.py
+++ xen-3.3.1-testing/tools/python/xen/xend/server/irqif.py
@@ -39,6 +39,18 @@ class IRQController(DevController):
def __init__(self, vm):
DevController.__init__(self, vm)
+ valid_cfg = ['irq', 'uuid']
+
+ def getDeviceConfiguration(self, devid, transaction = None):
+ result = DevController.getDeviceConfiguration(self, devid, transaction)
+ if transaction is None:
+ devinfo = self.readBackend(devid, *self.valid_cfg)
+ else:
+ devinfo = self.readBackendTxn(transaction, devid, *self.valid_cfg)
+ config = dict(zip(self.valid_cfg, devinfo))
+ config = dict([(key, val) for key, val in config.items()
+ if val != None])
+ return config
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
@@ -75,4 +87,9 @@ class IRQController(DevController):
if rc < 0:
raise VmError(
'irq: Failed to map irq %x' % (pirq))
- return (None, {}, {})
+ back = dict([(k, config[k]) for k in self.valid_cfg if k in config])
+ return (self.allocateDeviceID(), back, {})
+
+ def waitForDevice(self, devid):
+ # don't wait for hotplug
+ return
Index: xen-3.3.1-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xm/create.py
+++ xen-3.3.1-testing/tools/python/xen/xm/create.py
@@ -1032,6 +1032,14 @@ def preprocess_ioports(vals):
ioports.append(hexd)
vals.ioports = ioports
+def preprocess_irq(vals):
+ if not vals.irq: return
+ irq = []
+ for v in vals.irq:
+ d = repr(v)
+ irq.append(d)
+ vals.irq = irq
+
def preprocess_vtpm(vals):
if not vals.vtpm: return
vtpms = []
@@ -1162,6 +1170,7 @@ def preprocess(vals):
preprocess_vscsi(vals)
preprocess_ioports(vals)
preprocess_ip(vals)
+ preprocess_irq(vals)
preprocess_nfs(vals)
preprocess_vnc(vals)
preprocess_vtpm(vals)

View File

@ -1,198 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1225708322 0
# Node ID 540483d2a98f3fbabf06961cc0cc52e3c59c245b
# Parent 303b1014f91e5fa0783a5d7095626a47e82db9d0
x86: simplify page reference handling for partially (in-)validated pages
Simplify general page reference management for preempted (partially
[in-]validated) pages: Reserve on reference that can be acquired
without the risk of overflowing the reference count, thus allowing to
have a simplified get_page() equivalent that cannot fail (but must be
used with care).
Doing this conversion pointed out a latent issue in the changes done
previously in this area: The extra reference must be acquired before
the 'normal' reference gets dropped, so the patch fixes this at once
in both the alloc_page_type() and free_page_type() paths (it's really
only the latter that failed to work with the change described above).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str
nx = x + 1;
d = nd;
if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
- unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+ /* Keep one spare reference to be acquired by get_page_light(). */
+ unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
unlikely(d != _domain) ) /* Wrong owner? */
{
if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
@@ -1878,6 +1879,28 @@ int get_page(struct page_info *page, str
return 1;
}
+/*
+ * Special version of get_page() to be used exclusively when
+ * - a page is known to already have a non-zero reference count
+ * - the page does not need its owner to be checked
+ * - it will not be called more than once without dropping the thus
+ * acquired reference again.
+ * Due to get_page() reserving one reference, this call cannot fail.
+ */
+static void get_page_light(struct page_info *page)
+{
+ u32 x, nx, y = page->count_info;
+
+ do {
+ x = y;
+ nx = x + 1;
+ BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+ BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+ y = cmpxchg(&page->count_info, x, nx);
+ }
+ while ( unlikely(y != x) );
+}
+
static int alloc_page_type(struct page_info *page, unsigned long type,
int preemptible)
@@ -1885,10 +1908,6 @@ static int alloc_page_type(struct page_i
struct domain *owner = page_get_owner(page);
int rc;
- /* Obtain an extra reference to retain if we set PGT_partial. */
- if ( preemptible && !get_page(page, owner) )
- return -EINVAL;
-
/* A page table is dirtied when its type count becomes non-zero. */
if ( likely(owner != NULL) )
paging_mark_dirty(owner, page_to_mfn(page));
@@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i
wmb();
if ( rc == -EAGAIN )
{
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
- return -EAGAIN;
}
-
- if ( preemptible )
- put_page(page);
-
- if ( rc == -EINTR )
+ else if ( rc == -EINTR )
{
ASSERT((page->u.inuse.type_info &
(PGT_count_mask|PGT_validated|PGT_partial)) == 1);
@@ -2044,8 +2059,8 @@ static int __put_final_page_type(
}
else if ( rc == -EINTR )
{
- ASSERT(!(page->u.inuse.type_info &
- (PGT_count_mask|PGT_validated|PGT_partial)));
+ ASSERT((page->u.inuse.type_info &
+ (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
(page->count_info & PGC_page_table)) )
page->tlbflush_timestamp = tlbflush_current_time();
@@ -2056,14 +2071,10 @@ static int __put_final_page_type(
{
BUG_ON(rc != -EAGAIN);
wmb();
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
- /* Must skip put_page() below. */
- preemptible = 0;
}
- if ( preemptible )
- put_page(page);
-
return rc;
}
@@ -2072,10 +2083,7 @@ static int __put_page_type(struct page_i
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
-
- /* Obtain an extra reference to retain if we set PGT_partial. */
- if ( preemptible && !get_page(page, page_get_owner(page)) )
- return -EINVAL;
+ int rc = 0;
for ( ; ; )
{
@@ -2098,10 +2106,11 @@ static int __put_page_type(struct page_i
if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
x, nx)) != x) )
continue;
+ /* We cleared the 'valid bit' so we do the clean up. */
+ rc = __put_final_page_type(page, x, preemptible);
if ( x & PGT_partial )
put_page(page);
- /* We cleared the 'valid bit' so we do the clean up. */
- return __put_final_page_type(page, x, preemptible);
+ break;
}
/*
@@ -2120,17 +2129,10 @@ static int __put_page_type(struct page_i
break;
if ( preemptible && hypercall_preempt_check() )
- {
- if ( preemptible )
- put_page(page);
return -EINTR;
- }
}
- if ( preemptible )
- put_page(page);
-
- return 0;
+ return rc;
}
@@ -2138,6 +2140,7 @@ static int __get_page_type(struct page_i
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ int rc = 0;
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
@@ -2233,11 +2236,7 @@ static int __get_page_type(struct page_i
}
if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
- {
- if ( (x & PGT_partial) && !(nx & PGT_partial) )
- put_page(page);
break;
- }
if ( preemptible && hypercall_preempt_check() )
return -EINTR;
@@ -2264,10 +2263,13 @@ static int __get_page_type(struct page_i
page->nr_validated_ptes = 0;
page->partial_pte = 0;
}
- return alloc_page_type(page, type, preemptible);
+ rc = alloc_page_type(page, type, preemptible);
}
- return 0;
+ if ( (x & PGT_partial) && !(nx & PGT_partial) )
+ put_page(page);
+
+ return rc;
}
void put_page_type(struct page_info *page)

View File

@ -1,78 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1226313701 0
# Node ID 40668908260c7667cc5a0b75862352016c52e38f
# Parent 832efb028a1dc72fb52edc11c958fd19f8542e48
vtd: fix interrupt remapping to handle SMI RTE's with uninitialized
reserved fields
Some BIOS does not zero out reserve fields in IOAPIC RTE's.
clear_IO_APIC() zeroes out all RTE's except for RTE with MSI delivery
type. This is a problem when the host OS converts SMI delivery type
to some other type but leaving the reserved field uninitialized. This
can cause interrupt remapping table out of bound error if "format"
field is 1 and the uninitialized "index" field has a value that that
is larger than the maximum index of interrupt remapping table.
Signed-off-by: Allen Kay <allen.m.kay@intel.com>=
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/dmar.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c
@@ -369,7 +369,9 @@ acpi_parse_one_rmrr(struct acpi_dmar_ent
if ( rmrr->base_address >= rmrr->end_address )
{
- dprintk(XENLOG_ERR VTDPREFIX, "RMRR is incorrect.\n");
+ dprintk(XENLOG_ERR VTDPREFIX,
+ "RMRR error: base_addr %"PRIx64" end_address %"PRIx64"\n",
+ rmrr->base_address, rmrr->end_address);
return -EFAULT;
}
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/intremap.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/intremap.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/intremap.c
@@ -201,7 +201,7 @@ unsigned int io_apic_read_remap_rte(
remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
- if ( remap_rte->format == 0 )
+ if ( (remap_rte->format == 0) || (old_rte.delivery_mode == dest_SMI) )
{
*IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
return *(IO_APIC_BASE(apic)+4);
@@ -247,6 +247,31 @@ void io_apic_write_remap_rte(
remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
+ if ( old_rte.delivery_mode == dest_SMI )
+ {
+ /* Some BIOS does not zero out reserve fields in IOAPIC
+ * RTE's. clear_IO_APIC() zeroes out all RTE's except for RTE
+ * with MSI delivery type. This is a problem when the host
+ * OS converts SMI delivery type to some other type but leaving
+ * the reserved field uninitialized. This can cause interrupt
+ * remapping table out of bound error if "format" field is 1
+ * and the "index" field has a value that that is larger than
+ * the maximum index of interrupt remapping table.
+ */
+ if ( remap_rte->format == 1 )
+ {
+ remap_rte->format = 0;
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0);
+ *IO_APIC_BASE(apic) = reg + 1;
+ *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1);
+ }
+
+ *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
+ *(IO_APIC_BASE(apic)+4) = value;
+ return;
+ }
+
/* mask the interrupt while we change the intremap table */
saved_mask = remap_rte->mask;
remap_rte->mask = 1;

View File

@ -1,150 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1226401587 0
# Node ID 76e90ac5067ef71f60b68ea0515f7f0466be5dca
# Parent beade55d67fc2c81adaaa552804e0b66dc25becb
xend: Restore CPU affinity on domain resume.
Move affinity-setting logic into its own function and call from
relevant places.
From: Jiri Denemark <jdenemar@redhat.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -476,6 +476,7 @@ class XendDomainInfo:
if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED):
try:
self._constructDomain()
+ self._setCPUAffinity()
self._storeVmDetails()
self._createChannels()
self._createDevices()
@@ -2131,6 +2132,64 @@ class XendDomainInfo:
raise XendError(str(exn))
+ def _setCPUAffinity(self):
+ """ Repin domain vcpus if a restricted cpus list is provided
+ """
+
+ def has_cpus():
+ if self.info['cpus'] is not None:
+ for c in self.info['cpus']:
+ if c:
+ return True
+ return False
+
+ if has_cpus():
+ for v in range(0, self.info['VCPUs_max']):
+ if self.info['cpus'][v]:
+ xc.vcpu_setaffinity(self.domid, v, self.info['cpus'][v])
+ else:
+ def find_relaxed_node(node_list):
+ import sys
+ nr_nodes = info['nr_nodes']
+ if node_list is None:
+ node_list = range(0, nr_nodes)
+ nodeload = [0]
+ nodeload = nodeload * nr_nodes
+ from xen.xend import XendDomain
+ doms = XendDomain.instance().list('all')
+ for dom in filter (lambda d: d.domid != self.domid, doms):
+ cpuinfo = dom.getVCPUInfo()
+ for vcpu in sxp.children(cpuinfo, 'vcpu'):
+ if sxp.child_value(vcpu, 'online') == 0: continue
+ cpumap = list(sxp.child_value(vcpu,'cpumap'))
+ for i in range(0, nr_nodes):
+ node_cpumask = info['node_to_cpu'][i]
+ for j in node_cpumask:
+ if j in cpumap:
+ nodeload[i] += 1
+ break
+ for i in range(0, nr_nodes):
+ if len(info['node_to_cpu'][i]) > 0 and i in node_list:
+ nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i]))
+ else:
+ nodeload[i] = sys.maxint
+ index = nodeload.index( min(nodeload) )
+ return index
+
+ info = xc.physinfo()
+ if info['nr_nodes'] > 1:
+ node_memory_list = info['node_to_memory']
+ needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
+ candidate_node_list = []
+ for i in range(0, info['nr_nodes']):
+ if node_memory_list[i] >= needmem and len(info['node_to_cpu'][i]) > 0:
+ candidate_node_list.append(i)
+ index = find_relaxed_node(candidate_node_list)
+ cpumask = info['node_to_cpu'][index]
+ for v in range(0, self.info['VCPUs_max']):
+ xc.vcpu_setaffinity(self.domid, v, cpumask)
+
+
def _initDomain(self):
log.debug('XendDomainInfo.initDomain: %s %s',
self.domid,
@@ -2150,58 +2209,7 @@ class XendDomainInfo:
# repin domain vcpus if a restricted cpus list is provided
# this is done prior to memory allocation to aide in memory
# distribution for NUMA systems.
- def has_cpus():
- if self.info['cpus'] is not None:
- for c in self.info['cpus']:
- if c:
- return True
- return False
-
- if has_cpus():
- for v in range(0, self.info['VCPUs_max']):
- if self.info['cpus'][v]:
- xc.vcpu_setaffinity(self.domid, v, self.info['cpus'][v])
- else:
- def find_relaxed_node(node_list):
- import sys
- nr_nodes = info['nr_nodes']
- if node_list is None:
- node_list = range(0, nr_nodes)
- nodeload = [0]
- nodeload = nodeload * nr_nodes
- from xen.xend import XendDomain
- doms = XendDomain.instance().list('all')
- for dom in filter (lambda d: d.domid != self.domid, doms):
- cpuinfo = dom.getVCPUInfo()
- for vcpu in sxp.children(cpuinfo, 'vcpu'):
- if sxp.child_value(vcpu, 'online') == 0: continue
- cpumap = list(sxp.child_value(vcpu,'cpumap'))
- for i in range(0, nr_nodes):
- node_cpumask = info['node_to_cpu'][i]
- for j in node_cpumask:
- if j in cpumap:
- nodeload[i] += 1
- break
- for i in range(0, nr_nodes):
- if len(info['node_to_cpu'][i]) > 0 and i in node_list:
- nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i]))
- else:
- nodeload[i] = sys.maxint
- index = nodeload.index( min(nodeload) )
- return index
-
- info = xc.physinfo()
- if info['nr_nodes'] > 1:
- node_memory_list = info['node_to_memory']
- needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
- candidate_node_list = []
- for i in range(0, info['nr_nodes']):
- if node_memory_list[i] >= needmem and len(info['node_to_cpu'][i]) > 0:
- candidate_node_list.append(i)
- index = find_relaxed_node(candidate_node_list)
- cpumask = info['node_to_cpu'][index]
- for v in range(0, self.info['VCPUs_max']):
- xc.vcpu_setaffinity(self.domid, v, cpumask)
+ self._setCPUAffinity()
# Use architecture- and image-specific calculations to determine
# the various headrooms necessary, given the raw configured

View File

@ -1,347 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1226491295 0
# Node ID 8e18dd41c6c7bb0980b29393b275c564cfb96437
# Parent 2bd99c5faa420612544a9d94e298332e0e72a86a
x86: reduce GDT switching
Both idle and HVM vCPU-s can easily run on the GDT mapped into general
hypervisor space (rather than that placed in per-vCPU virtual space).
This makes unnecessary some of the additions c/s 18520 did.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: xen-3.3.1-testing/xen/arch/x86/cpu/common.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/cpu/common.c
+++ xen-3.3.1-testing/xen/arch/x86/cpu/common.c
@@ -564,7 +564,10 @@ void __cpuinit cpu_init(void)
{
int cpu = smp_processor_id();
struct tss_struct *t = &init_tss[cpu];
- char gdt_load[10];
+ struct desc_ptr gdt_desc = {
+ .base = (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY),
+ .limit = LAST_RESERVED_GDT_BYTE
+ };
if (cpu_test_and_set(cpu, cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -578,9 +581,7 @@ void __cpuinit cpu_init(void)
/* Install correct page table. */
write_ptbase(current);
- *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
- *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
- asm volatile ( "lgdt %0" : "=m" (gdt_load) );
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
/* No nested task. */
asm volatile ("pushf ; andw $0xbfff,(%"__OP"sp) ; popf" );
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -309,12 +309,7 @@ int vcpu_initialise(struct vcpu *v)
if ( is_idle_domain(d) )
{
v->arch.schedule_tail = continue_idle_domain;
- if ( v->vcpu_id )
- v->arch.cr3 = d->vcpu[0]->arch.cr3;
- else if ( !*idle_vcpu )
- v->arch.cr3 = __pa(idle_pg_table);
- else if ( !(v->arch.cr3 = clone_idle_pagetable(v)) )
- return -ENOMEM;
+ v->arch.cr3 = __pa(idle_pg_table);
}
v->arch.guest_context.ctrlreg[4] =
@@ -1171,14 +1166,18 @@ static void paravirt_ctxt_switch_to(stru
}
}
+static inline int need_full_gdt(struct vcpu *v)
+{
+ return (!is_hvm_vcpu(v) && !is_idle_vcpu(v));
+}
+
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int i, cpu = smp_processor_id();
+ unsigned int cpu = smp_processor_id();
struct vcpu *p = per_cpu(curr_vcpu, cpu);
struct vcpu *n = current;
struct desc_struct *gdt;
- struct page_info *page;
struct desc_ptr gdt_desc;
ASSERT(p != n);
@@ -1207,16 +1206,19 @@ static void __context_switch(void)
gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
per_cpu(compat_gdt_table, cpu);
- page = virt_to_page(gdt);
- for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ if ( need_full_gdt(n) )
{
- l1e_write(n->domain->arch.mm_perdomain_pt +
- (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE + i,
- l1e_from_page(page + i, __PAGE_HYPERVISOR));
+ struct page_info *page = virt_to_page(gdt);
+ unsigned int i;
+ for ( i = 0; i < NR_RESERVED_GDT_PAGES; i++ )
+ l1e_write(n->domain->arch.mm_perdomain_pt +
+ (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE + i,
+ l1e_from_page(page + i, __PAGE_HYPERVISOR));
}
- if ( p->vcpu_id != n->vcpu_id )
+ if ( need_full_gdt(p) &&
+ ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(n)) )
{
gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
gdt_desc.base = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
@@ -1225,8 +1227,10 @@ static void __context_switch(void)
write_ptbase(n);
- if ( p->vcpu_id != n->vcpu_id )
+ if ( need_full_gdt(n) &&
+ ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(p)) )
{
+ gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
gdt_desc.base = GDT_VIRT_START(n);
asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
}
Index: xen-3.3.1-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain_build.c
+++ xen-3.3.1-testing/xen/arch/x86/domain_build.c
@@ -707,6 +707,7 @@ int __init construct_dom0(
/* Install the new page tables. */
local_irq_disable();
+ /* We run on dom0's page tables for the final part of the build process. */
write_ptbase(v);
/* Copy the OS image and free temporary buffer. */
@@ -719,11 +720,11 @@ int __init construct_dom0(
(parms.virt_hypercall >= v_end) )
{
write_ptbase(current);
- local_irq_enable();
printk("Invalid HYPERCALL_PAGE field in ELF notes.\n");
return -1;
}
- hypercall_page_initialise(d, (void *)(unsigned long)parms.virt_hypercall);
+ hypercall_page_initialise(
+ d, (void *)(unsigned long)parms.virt_hypercall);
}
/* Copy the initial ramdisk. */
@@ -804,7 +805,7 @@ int __init construct_dom0(
xlat_start_info(si, XLAT_start_info_console_dom0);
#endif
- /* Reinstate the caller's page tables. */
+ /* Return to idle domain's page tables. */
write_ptbase(current);
local_irq_enable();
Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
+++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c
@@ -444,6 +444,8 @@ static void vmx_set_host_env(struct vcpu
{
unsigned int cpu = smp_processor_id();
+ __vmwrite(HOST_GDTR_BASE,
+ (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY));
__vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
__vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
@@ -541,9 +543,6 @@ static int construct_vmcs(struct vcpu *v
__vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
__vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
- /* Host GDTR base. */
- __vmwrite(HOST_GDTR_BASE, GDT_VIRT_START(v));
-
/* Host data selectors. */
__vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
__vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
Index: xen-3.3.1-testing/xen/arch/x86/setup.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/setup.c
+++ xen-3.3.1-testing/xen/arch/x86/setup.c
@@ -230,7 +230,6 @@ static void __init percpu_init_areas(voi
static void __init init_idle_domain(void)
{
struct domain *idle_domain;
- unsigned int i;
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
@@ -243,12 +242,6 @@ static void __init init_idle_domain(void
idle_vcpu[0] = this_cpu(curr_vcpu) = current;
setup_idle_pagetable();
-
- for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
- idle_domain->arch.mm_perdomain_pt[FIRST_RESERVED_GDT_PAGE + i] =
- l1e_from_page(virt_to_page(boot_cpu_gdt_table) + i,
- __PAGE_HYPERVISOR);
-
}
static void __init srat_detect_node(int cpu)
@@ -456,6 +449,7 @@ void __init __start_xen(unsigned long mb
parse_video_info();
set_current((struct vcpu *)0xfffff000); /* debug sanity */
+ idle_vcpu[0] = current;
set_processor_id(0); /* needed early, for smp_processor_id() */
if ( cpu_has_efer )
rdmsrl(MSR_EFER, this_cpu(efer));
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
@@ -828,7 +828,7 @@ static int __devinit do_boot_cpu(int api
*/
{
unsigned long boot_error;
- unsigned int i;
+ unsigned int order;
int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
@@ -864,21 +864,21 @@ static int __devinit do_boot_cpu(int api
gdt = per_cpu(gdt_table, cpu);
if (gdt == boot_cpu_gdt_table) {
- i = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+ order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
#ifdef __x86_64__
#ifdef CONFIG_COMPAT
- page = alloc_domheap_pages(NULL, i,
+ page = alloc_domheap_pages(NULL, order,
MEMF_node(cpu_to_node(cpu)));
per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
memcpy(gdt, boot_cpu_compat_gdt_table,
NR_RESERVED_GDT_PAGES * PAGE_SIZE);
gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
#endif
- page = alloc_domheap_pages(NULL, i,
+ page = alloc_domheap_pages(NULL, order,
MEMF_node(cpu_to_node(cpu)));
per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
#else
- per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(i);
+ per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order);
#endif
memcpy(gdt, boot_cpu_gdt_table,
NR_RESERVED_GDT_PAGES * PAGE_SIZE);
@@ -886,13 +886,6 @@ static int __devinit do_boot_cpu(int api
gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
}
- for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
- v->domain->arch.mm_perdomain_pt
- [(v->vcpu_id << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE + i]
- = l1e_from_page(virt_to_page(gdt) + i,
- __PAGE_HYPERVISOR);
-
#ifdef __i386__
if (!per_cpu(doublefault_tss, cpu)) {
per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
Index: xen-3.3.1-testing/xen/arch/x86/x86_32/mm.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_32/mm.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_32/mm.c
@@ -132,30 +132,6 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
-unsigned long clone_idle_pagetable(struct vcpu *v)
-{
- unsigned int i;
- struct domain *d = v->domain;
- l3_pgentry_t *l3_table = v->arch.pae_l3_cache.table[0];
- l2_pgentry_t *l2_table = alloc_xenheap_page();
-
- if ( !l2_table )
- return 0;
-
- memcpy(l3_table, idle_pg_table, L3_PAGETABLE_ENTRIES * sizeof(*l3_table));
- l3_table[l3_table_offset(PERDOMAIN_VIRT_START)] =
- l3e_from_page(virt_to_page(l2_table), _PAGE_PRESENT);
-
- copy_page(l2_table, idle_pg_table_l2 +
- l3_table_offset(PERDOMAIN_VIRT_START) * L2_PAGETABLE_ENTRIES);
- for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
- l2_table[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
- l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
- __PAGE_HYPERVISOR);
-
- return __pa(l3_table);
-}
-
void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
{
int i;
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/mm.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/mm.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/mm.c
@@ -21,7 +21,6 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
-#include <xen/numa.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -207,24 +206,6 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
-unsigned long clone_idle_pagetable(struct vcpu *v)
-{
- struct domain *d = v->domain;
- struct page_info *page = alloc_domheap_page(NULL,
- MEMF_node(vcpu_to_node(v)));
- l4_pgentry_t *l4_table = page_to_virt(page);
-
- if ( !page )
- return 0;
-
- copy_page(l4_table, idle_pg_table);
- l4_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
- l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
- __PAGE_HYPERVISOR);
-
- return __pa(l4_table);
-}
-
void __init zap_low_mappings(void)
{
BUG_ON(num_online_cpus() != 1);
Index: xen-3.3.1-testing/xen/include/asm-x86/page.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/page.h
+++ xen-3.3.1-testing/xen/include/asm-x86/page.h
@@ -278,7 +278,6 @@ extern unsigned int m2p_compat_vstart;
#endif
void paging_init(void);
void setup_idle_pagetable(void);
-unsigned long clone_idle_pagetable(struct vcpu *);
#endif /* !defined(__ASSEMBLY__) */
#define _PAGE_PRESENT 0x001U

View File

@ -1,113 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1226593868 0
# Node ID a0910b1b5ec0c938f1c46437df6c28cbeff52c68
# Parent d44ad6db638c1308e5ee4a47509769c3cccbe1e8
x86: don't disable MSI in order to mask an IRQ
... as that's not really correct, and there are devices which can't
even cope with that. Instead, check whether an MSI IRQ can be masked,
and if it can't, treat it just like a level triggered IO-APIC IRQ.
There's one other bug fix in here, correcting an off-by-one error on
the entry_nr range check in __pci_enable_msix().
Signed-off-by: Jan Beulich <jbeulich@novell.com>
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232549083 0
# Node ID af1d9af1a993001bdfdb81d9af1af4fd4a9d3852
# Parent 033945166a3a5f3078b1e583bc5e50871ef7e801
x86: Fix unmaskable MSI handling.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1567,11 +1567,14 @@ static unsigned int startup_msi_vector(u
static void ack_msi_vector(unsigned int vector)
{
- ack_APIC_irq();
+ if ( msi_maskable_irq(irq_desc[vector].msi_desc) )
+ ack_APIC_irq(); /* ACKTYPE_NONE */
}
static void end_msi_vector(unsigned int vector)
{
+ if ( !msi_maskable_irq(irq_desc[vector].msi_desc) )
+ ack_APIC_irq(); /* ACKTYPE_EOI */
}
static void shutdown_msi_vector(unsigned int vector)
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -463,14 +463,19 @@ int pirq_acktype(struct domain *d, int i
/*
* Edge-triggered IO-APIC and LAPIC interrupts need no final
* acknowledgement: we ACK early during interrupt processing.
- * MSIs are treated as edge-triggered interrupts.
*/
if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
- !strcmp(desc->handler->typename, "local-APIC-edge") ||
- !strcmp(desc->handler->typename, "PCI-MSI") )
+ !strcmp(desc->handler->typename, "local-APIC-edge") )
return ACKTYPE_NONE;
/*
+ * MSIs are treated as edge-triggered interrupts, except
+ * when there is no proper way to mask them.
+ */
+ if ( desc->handler == &pci_msi_type )
+ return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI;
+
+ /*
* Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
* on which they were received. This is because we tickle the LAPIC to EOI.
*/
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -303,6 +303,13 @@ static void msix_flush_writes(unsigned i
}
}
+int msi_maskable_irq(const struct msi_desc *entry)
+{
+ BUG_ON(!entry);
+ return entry->msi_attrib.type != PCI_CAP_ID_MSI
+ || entry->msi_attrib.maskbit;
+}
+
static void msi_set_mask_bit(unsigned int irq, int flag)
{
struct msi_desc *entry = irq_desc[irq].msi_desc;
@@ -323,8 +330,6 @@ static void msi_set_mask_bit(unsigned in
mask_bits &= ~(1);
mask_bits |= flag;
pci_conf_write32(bus, slot, func, pos, mask_bits);
- } else {
- msi_set_enable(entry->dev, !flag);
}
break;
case PCI_CAP_ID_MSIX:
@@ -654,7 +659,7 @@ static int __pci_enable_msix(struct msi_
pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
nr_entries = multi_msix_capable(control);
- if (msi->entry_nr > nr_entries)
+ if (msi->entry_nr >= nr_entries)
{
spin_unlock(&pdev->lock);
return -EINVAL;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -97,6 +97,8 @@ struct msi_desc {
int remap_index; /* index in interrupt remapping table */
};
+int msi_maskable_irq(const struct msi_desc *);
+
/*
* Assume the maximum number of hot plug slots supported by the system is about
* ten. The worstcase is that each of these slots is hot-added with a device,

View File

@ -1,21 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1226672871 0
# Node ID 85198c4d4da516000d002f66fded65f11ef64ab6
# Parent 3ba83def85a234d49ac426f46100dc2a6bcda761
Fix to save CPU affinity for xm save/restore
Signed-off-by: Masaki Kanno <kanno.masaki@jp.fujitsu.com>
Index: xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py
@@ -1030,8 +1030,6 @@ class XendConfig(dict):
sxpr.append([name, s])
for xenapi, legacy in XENAPI_CFG_TO_LEGACY_CFG.items():
- if legacy in ('cpus'): # skip this
- continue
if self.has_key(xenapi) and self[xenapi] not in (None, []):
if type(self[xenapi]) == bool:
# convert booleans to ints before making an sxp item

View File

@ -1,109 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227006996 0
# Node ID ae891977a4d3f5d8d8330ed3796881867b4d88a8
# Parent 2604400f75e318dc9f5201e3626213290a89862a
x86, hvm: Implement interrupt routing to least priority processor.
References: bnc#429904
Instead of round robin the vcpu with the lowest processor
priority is selected for the interrupt. If multiple vcpus
share the same low priority then interrupts are distributed between
those round robin.
Signed-off-by: Juergen Gross <juergen.gross@fujitsu-siemens.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -344,8 +344,8 @@ static void vioapic_deliver(struct hvm_h
}
else
#endif
- target = apic_round_robin(vioapic_domain(vioapic),
- vector, deliver_bitmask);
+ target = apic_lowest_prio(vioapic_domain(vioapic),
+ deliver_bitmask);
if ( target != NULL )
{
ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -377,26 +377,30 @@ static int vlapic_accept_irq(struct vcpu
}
/* This function is used by both ioapic and lapic.The bitmap is for vcpu_id. */
-struct vlapic *apic_round_robin(
- struct domain *d, uint8_t vector, uint32_t bitmap)
+struct vlapic *apic_lowest_prio(struct domain *d, uint32_t bitmap)
{
- int next, old;
- struct vlapic *target = NULL;
+ int old = d->arch.hvm_domain.irq.round_robin_prev_vcpu;
+ uint32_t ppr, target_ppr = UINT_MAX;
+ struct vlapic *vlapic, *target = NULL;
+ struct vcpu *v;
- old = next = d->arch.hvm_domain.irq.round_robin_prev_vcpu;
+ if ( unlikely((v = d->vcpu[old]) == NULL) )
+ return NULL;
do {
- if ( ++next == MAX_VIRT_CPUS )
- next = 0;
- if ( (d->vcpu[next] == NULL) || !test_bit(next, &bitmap) )
- continue;
- target = vcpu_vlapic(d->vcpu[next]);
- if ( vlapic_enabled(target) )
- break;
- target = NULL;
- } while ( next != old );
+ v = v->next_in_list ? : d->vcpu[0];
+ vlapic = vcpu_vlapic(v);
+ if ( test_bit(v->vcpu_id, &bitmap) && vlapic_enabled(vlapic) &&
+ ((ppr = vlapic_get_ppr(vlapic)) < target_ppr) )
+ {
+ target = vlapic;
+ target_ppr = ppr;
+ }
+ } while ( v->vcpu_id != old );
- d->arch.hvm_domain.irq.round_robin_prev_vcpu = next;
+ if ( target != NULL )
+ d->arch.hvm_domain.irq.round_robin_prev_vcpu =
+ vlapic_vcpu(target)->vcpu_id;
return target;
}
@@ -456,7 +460,7 @@ static int vlapic_ipi(
if ( delivery_mode == APIC_DM_LOWEST )
{
- target = apic_round_robin(vlapic_domain(v), vector, lpr_map);
+ target = apic_lowest_prio(vlapic_domain(v), lpr_map);
if ( target != NULL )
rc = vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
vector, level, trig_mode);
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -152,7 +152,7 @@ int vmsi_deliver(struct domain *d, int p
{
case dest_LowestPrio:
{
- target = apic_round_robin(d, vector, deliver_bitmask);
+ target = apic_lowest_prio(d, deliver_bitmask);
if ( target != NULL )
vmsi_inj_irq(d, target, vector, trig_mode, delivery_mode);
else
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -93,8 +93,7 @@ void vlapic_msr_set(struct vlapic *vlapi
int vlapic_accept_pic_intr(struct vcpu *v);
-struct vlapic *apic_round_robin(
- struct domain *d, uint8_t vector, uint32_t bitmap);
+struct vlapic *apic_lowest_prio(struct domain *d, uint32_t bitmap);
int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);

View File

@ -1,35 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227023966 0
# Node ID f09a1d5d4338eab9c593b63b8ae89ddf481a3681
# Parent ed8524f4a044efbd6d30f9340c6ddfb00f972407
x86, hvm: Fix domain restore bug with Intel VLAPIC acceleration.
r18383 mark video memory as ram, and make all valid pages migrated,
including vlapic page (0xFEE00), and share page(0xFFFFF).
An extra memory population for lapic page would override previous
mapping then cause HVM guest with vlapic acceleration hang.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1541,6 +1541,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
struct domain *d = curr->domain;
unsigned long mfn;
struct page_info *l1pg = mfn_to_page(gl1mfn);
+ p2m_type_t p2mt;
int rc = 1;
page_lock(l1pg);
@@ -1558,8 +1559,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
/* Translate foreign guest addresses. */
- mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e));
- if ( unlikely(mfn == INVALID_MFN) )
+ mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt));
+ if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
return page_unlock(l1pg), 0;
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));

View File

@ -1,29 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227111099 0
# Node ID bddd2d344c5425bfd25564bc20f90c3776552c6e
# Parent 4107618ee0d8aceb517f43ffa79197a041ed4bcf
x86: secure ioapic_guest_write() against FREE_TO_ASSIGN irq values
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -2199,7 +2199,7 @@ int ioapic_guest_write(unsigned long phy
if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR )
new_irq = vector_irq[new_rte.vector];
- if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) )
+ if ( (old_irq != new_irq) && (old_irq >= 0) && IO_APIC_IRQ(old_irq) )
{
if ( irq_desc[IO_APIC_VECTOR(old_irq)].action )
{
@@ -2211,7 +2211,7 @@ int ioapic_guest_write(unsigned long phy
remove_pin_at_irq(old_irq, apic, pin);
}
- if ( (new_irq != -1) && IO_APIC_IRQ(new_irq) )
+ if ( (new_irq >= 0) && IO_APIC_IRQ(new_irq) )
{
if ( irq_desc[IO_APIC_VECTOR(new_irq)].action )
{

View File

@ -1,20 +0,0 @@
Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -476,7 +476,14 @@ class XendDomainInfo:
if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED):
try:
self._constructDomain()
- self._setCPUAffinity()
+
+ try:
+ self._setCPUAffinity()
+ except:
+ # usually a CPU we want to set affinity to does not exist
+ # we just ignore it so that the domain can still be restored
+ log.warn("Cannot restore CPU affinity")
+
self._storeVmDetails()
self._createChannels()
self._createDevices()

View File

@ -1,65 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227525080 0
# Node ID 0b8c6c91c5a408345e6ed650fb9f19e4fa9809b9
# Parent cd45b5c9561250b999476227dbc7f7ede377d3d4
pv-on-hvm drivers: build fixes for Linux 2.6.27+
Make the drivers build properly in a 2.6.27 environment as well as
against a kernel with pv-ops Xen configured on (in the latter case
more work would be needed to also make the drivers work, as there's a
large number of duplicate exports).
Portions from Charles Arnold <carnold@novell.com>.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/unmodified_drivers/linux-2.6/balloon/Kbuild
+++ b/unmodified_drivers/linux-2.6/balloon/Kbuild
@@ -4,6 +4,5 @@ obj-m = xen-balloon.o
EXTRA_CFLAGS += -I$(M)/platform-pci
-xen-balloon-objs =
-xen-balloon-objs += balloon.o
-xen-balloon-objs += sysfs.o
+xen-balloon-y := balloon.o sysfs.o
+xen-balloon-$(CONFIG_XEN_SCRUB_PAGES) += scrub.o
--- a/unmodified_drivers/linux-2.6/mkbuildtree
+++ b/unmodified_drivers/linux-2.6/mkbuildtree
@@ -53,6 +53,7 @@ i[34567]86|x86_64)
ln -sf ${XL}/include/asm-x86/mach-xen/asm/synch_bitops*.h include/asm
ln -sf ${XL}/include/asm-x86/mach-xen/asm/maddr*.h include/asm
ln -sf ${XL}/include/asm-x86/mach-xen/asm/gnttab_dma.h include/asm
+ ln -sf ${XL}/arch/x86/lib/scrub.c balloon
else
if [ $uname = x86_64 ]; then
mkdir -p include/asm-i386
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
@@ -34,7 +34,11 @@ static void ap_suspend(void *_info)
atomic_dec(&info->nr_spinning);
}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
#define initiate_ap_suspend(i) smp_call_function(ap_suspend, i, 0, 0)
+#else
+#define initiate_ap_suspend(i) smp_call_function(ap_suspend, i, 0)
+#endif
#else /* !defined(CONFIG_SMP) */
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c
@@ -14,7 +14,11 @@ EXPORT_SYMBOL(system_state);
void ctrl_alt_del(void)
{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
kill_proc(1, SIGINT, 1); /* interrupt init */
+#else
+ kill_cad_pid(SIGINT, 1);
+#endif
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)

View File

@ -1,42 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227611877 0
# Node ID c2a018cdb45d7419aa068c2dc4894e06ec5097e3
# Parent e7c421510be96f456cd367d125d86f939d27d253
Fix PSE PAT handling in guest walk.
Guest walk was currently checking for _PAGE_PSE_PAT flag in
guest_l2e_get_flags(). The problem is that this function only checks
for the first 12 bits of the PDE, while _PAGE_PSE_PAT is actually on bit
12 (that is the 13th bit). This caused _PAGE_PAT bit to never been set on
splintered L1s.
Signed-off-by: Gianluca Guida <gianluca.guida@eu.citrix.com>
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -484,15 +484,15 @@ guest_walk_tables(struct vcpu *v, unsign
* access controls are enforced in the shadow l2e. */
int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
_PAGE_ACCESSED|_PAGE_DIRTY);
- /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
- * of the level 1. */
- if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) )
- flags |= _PAGE_PAT;
- /* Copy the cache-control bits to the l1 as well, because we
- * can't represent PAT in the (non-PSE) shadow l2e. :(
- * This could cause problems if a guest ever maps an area of
- * memory with superpages using more than one caching mode. */
- flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
+ /* Import cache-control bits. Note that _PAGE_PAT is actually
+ * _PAGE_PSE, and it is always set. We will clear it in case
+ * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */
+ flags |= (guest_l2e_get_flags(gw->l2e)
+ & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
+ if ( !(gfn_x(start) & 1) )
+ /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
+ flags &= ~_PAGE_PAT;
+
/* Increment the pfn by the right number of 4k pages.
* The ~0x1 is to mask out the PAT bit mentioned above. */
start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));

View File

@ -1,312 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227878852 0
# Node ID c820bf73a914f643ab48864629c0559e68ceede1
# Parent 8dbf23c89cc6a4fbd7b9063b14e706c065ba1678
x86: add a shared page indicating the need for an EOI notification
To simplify the interface for the guest, when a guest uses this new
(sub-)hypercall, PHYSDEVOP_eoi behavior changes to unmask the
corresponding event channel at once, avoiding the eventual need for a
second hypercall from the guest.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
18846:
x86: Fix PHYSDEVOP_pirq_eoi_mfn, which I modified and broke.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
18851:
x86: Fix mfn_to_virt() to cast MFN to address size.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -1812,6 +1812,13 @@ int domain_relinquish_resources(struct d
unmap_vcpu_info(v);
}
+ if ( d->arch.pirq_eoi_map != NULL )
+ {
+ unmap_domain_page_global(d->arch.pirq_eoi_map);
+ put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn));
+ d->arch.pirq_eoi_map = NULL;
+ }
+
d->arch.relmem = RELMEM_xen;
/* fallthrough */
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -18,6 +18,7 @@
#include <xen/iommu.h>
#include <asm/msi.h>
#include <asm/current.h>
+#include <asm/flushtlb.h>
#include <public/physdev.h>
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
@@ -206,16 +207,42 @@ struct pending_eoi {
static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]);
#define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector)
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ set_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ clear_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ unsigned int i, vector = desc - irq_desc;
+
+ if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
+ return;
+
+ for ( i = 0; i < action->nr_guests; ++i )
+ clear_pirq_eoi(action->guest[i],
+ domain_vector_to_irq(action->guest[i], vector));
+
+ desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
+ desc->handler->enable(vector);
+}
+
static struct timer irq_guest_eoi_timer[NR_IRQS];
static void irq_guest_eoi_timer_fn(void *data)
{
irq_desc_t *desc = data;
- unsigned vector = desc - irq_desc;
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
- desc->status &= ~IRQ_INPROGRESS;
- desc->handler->enable(vector);
+ _irq_guest_eoi(desc);
spin_unlock_irqrestore(&desc->lock, flags);
}
@@ -272,8 +299,22 @@ static void __do_IRQ_guest(int vector)
if ( already_pending == action->nr_guests )
{
- desc->handler->disable(vector);
stop_timer(&irq_guest_eoi_timer[vector]);
+ desc->handler->disable(vector);
+ desc->status |= IRQ_GUEST_EOI_PENDING;
+ for ( i = 0; i < already_pending; ++i )
+ {
+ d = action->guest[i];
+ set_pirq_eoi(d, domain_vector_to_irq(d, vector));
+ /*
+ * Could check here whether the guest unmasked the event by now
+ * (or perhaps just re-issue the send_guest_pirq()), and if it
+ * can now accept the event,
+ * - clear all the pirq_eoi bits we already set,
+ * - re-enable the vector, and
+ * - skip the timer setup below.
+ */
+ }
init_timer(&irq_guest_eoi_timer[vector],
irq_guest_eoi_timer_fn, desc, smp_processor_id());
set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1));
@@ -382,8 +423,12 @@ static void __pirq_guest_eoi(struct doma
action = (irq_guest_action_t *)desc->action;
vector = desc - irq_desc;
- ASSERT(!test_bit(irq, d->pirq_mask) ||
- (action->ack_type != ACKTYPE_NONE));
+ if ( action->ack_type == ACKTYPE_NONE )
+ {
+ ASSERT(!test_bit(irq, d->pirq_mask));
+ stop_timer(&irq_guest_eoi_timer[vector]);
+ _irq_guest_eoi(desc);
+ }
if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) ||
unlikely(--action->in_flight != 0) )
@@ -604,6 +649,11 @@ int pirq_guest_bind(struct vcpu *v, int
action->guest[action->nr_guests++] = v->domain;
+ if ( action->ack_type != ACKTYPE_NONE )
+ set_pirq_eoi(v->domain, irq);
+ else
+ clear_pirq_eoi(v->domain, irq);
+
unlock_out:
spin_unlock_irq(&desc->lock);
out:
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -204,10 +204,50 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
ret = -EFAULT;
if ( copy_from_guest(&eoi, arg, 1) != 0 )
break;
+ ret = -EINVAL;
+ if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+ break;
+ if ( v->domain->arch.pirq_eoi_map )
+ evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
ret = pirq_guest_eoi(v->domain, eoi.irq);
break;
}
+ case PHYSDEVOP_pirq_eoi_mfn: {
+ struct physdev_pirq_eoi_mfn info;
+
+ BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&info, arg, 1) != 0 )
+ break;
+
+ ret = -EINVAL;
+ if ( !mfn_valid(info.mfn) ||
+ !get_page_and_type(mfn_to_page(info.mfn), v->domain,
+ PGT_writable_page) )
+ break;
+
+ if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, info.mfn) != 0 )
+ {
+ put_page_and_type(mfn_to_page(info.mfn));
+ ret = -EBUSY;
+ break;
+ }
+
+ v->domain->arch.pirq_eoi_map = map_domain_page_global(info.mfn);
+ if ( v->domain->arch.pirq_eoi_map == NULL )
+ {
+ v->domain->arch.pirq_eoi_map_mfn = 0;
+ put_page_and_type(mfn_to_page(info.mfn));
+ ret = -ENOSPC;
+ break;
+ }
+
+ ret = 0;
+ break;
+ }
+
/* Legacy since 0x00030202. */
case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
ret = pirq_guest_unmask(v->domain);
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/physdev.c
@@ -18,6 +18,9 @@
#define physdev_eoi compat_physdev_eoi
#define physdev_eoi_t physdev_eoi_compat_t
+#define physdev_pirq_eoi_mfn compat_physdev_pirq_eoi_mfn
+#define physdev_pirq_eoi_mfn_t physdev_pirq_eoi_mfn_compat_t
+
#define physdev_set_iobitmap compat_physdev_set_iobitmap
#define physdev_set_iobitmap_t physdev_set_iobitmap_compat_t
Index: xen-3.3.1-testing/xen/common/event_channel.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/event_channel.c
+++ xen-3.3.1-testing/xen/common/event_channel.c
@@ -762,10 +762,9 @@ long evtchn_bind_vcpu(unsigned int port,
}
-static long evtchn_unmask(evtchn_unmask_t *unmask)
+int evtchn_unmask(unsigned int port)
{
struct domain *d = current->domain;
- int port = unmask->port;
struct vcpu *v;
spin_lock(&d->event_lock);
@@ -916,7 +915,7 @@ long do_event_channel_op(int cmd, XEN_GU
struct evtchn_unmask unmask;
if ( copy_from_guest(&unmask, arg, 1) != 0 )
return -EFAULT;
- rc = evtchn_unmask(&unmask);
+ rc = evtchn_unmask(unmask.port);
break;
}
Index: xen-3.3.1-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/domain.h
+++ xen-3.3.1-testing/xen/include/asm-x86/domain.h
@@ -239,6 +239,10 @@ struct arch_domain
int vector_pirq[NR_VECTORS];
int pirq_vector[NR_PIRQS];
+ /* Shared page for notifying that explicit PIRQ EOI is required. */
+ unsigned long *pirq_eoi_map;
+ unsigned long pirq_eoi_map_mfn;
+
/* Pseudophysical e820 map (XENMEM_memory_map). */
struct e820entry e820[3];
unsigned int nr_e820;
Index: xen-3.3.1-testing/xen/include/public/physdev.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/public/physdev.h
+++ xen-3.3.1-testing/xen/include/public/physdev.h
@@ -41,6 +41,21 @@ typedef struct physdev_eoi physdev_eoi_t
DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
/*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_mfn 17
+struct physdev_pirq_eoi_mfn {
+ /* IN */
+ xen_pfn_t mfn;
+};
+typedef struct physdev_pirq_eoi_mfn physdev_pirq_eoi_mfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_mfn_t);
+
+/*
* Query the status of an IRQ line.
* @arg == pointer to physdev_irq_status_query structure.
*/
Index: xen-3.3.1-testing/xen/include/xen/event.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/event.h
+++ xen-3.3.1-testing/xen/include/xen/event.h
@@ -44,6 +44,9 @@ int evtchn_send(struct domain *d, unsign
/* Bind a local event-channel port to the specified VCPU. */
long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
+/* Unmask a local event-channel port. */
+int evtchn_unmask(unsigned int port);
+
/* Allocate/free a Xen-attached event channel port. */
int alloc_unbound_xen_event_channel(
struct vcpu *local_vcpu, domid_t remote_domid);
Index: xen-3.3.1-testing/xen/include/xen/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/irq.h
+++ xen-3.3.1-testing/xen/include/xen/irq.h
@@ -22,6 +22,7 @@ struct irqaction
#define IRQ_PENDING 4 /* IRQ pending - replay on enable */
#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
#define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */
+#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
#define IRQ_PER_CPU 256 /* IRQ is per CPU */
/*

View File

@ -1,102 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228304687 0
# Node ID cb526325927c0abac441588b4a69bccd0b99d7b3
# Parent 9a6153a89d6642555c9ed4dc386d243c3df23eab
physdev: make PHYSDEVOP_pirq_eoi_mfn use of gmfn instead of mfn.
To pass a page from a guest to hypervisor, gmfn should be used
instead of mfn like grant table and other hypercalls. It's more
consistent. So make use of gmfn instead of mfn for
PHYSDEVOP_pirq_eoi_mfn hypercall.
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -14,6 +14,7 @@
#include <public/xen.h>
#include <public/physdev.h>
#include <xsm/xsm.h>
+#include <asm/p2m.h>
#ifndef COMPAT
typedef long ret_t;
@@ -213,8 +214,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
}
- case PHYSDEVOP_pirq_eoi_mfn: {
- struct physdev_pirq_eoi_mfn info;
+ case PHYSDEVOP_pirq_eoi_gmfn: {
+ struct physdev_pirq_eoi_gmfn info;
+ unsigned long mfn;
BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
@@ -223,23 +225,24 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
ret = -EINVAL;
- if ( !mfn_valid(info.mfn) ||
- !get_page_and_type(mfn_to_page(info.mfn), v->domain,
+ mfn = gmfn_to_mfn(current->domain, info.gmfn);
+ if ( !mfn_valid(mfn) ||
+ !get_page_and_type(mfn_to_page(mfn), v->domain,
PGT_writable_page) )
break;
- if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, info.mfn) != 0 )
+ if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, mfn) != 0 )
{
- put_page_and_type(mfn_to_page(info.mfn));
+ put_page_and_type(mfn_to_page(mfn));
ret = -EBUSY;
break;
}
- v->domain->arch.pirq_eoi_map = map_domain_page_global(info.mfn);
+ v->domain->arch.pirq_eoi_map = map_domain_page_global(mfn);
if ( v->domain->arch.pirq_eoi_map == NULL )
{
v->domain->arch.pirq_eoi_map_mfn = 0;
- put_page_and_type(mfn_to_page(info.mfn));
+ put_page_and_type(mfn_to_page(mfn));
ret = -ENOSPC;
break;
}
--- a/xen/arch/x86/x86_64/physdev.c
+++ b/xen/arch/x86/x86_64/physdev.c
@@ -18,8 +18,8 @@
#define physdev_eoi compat_physdev_eoi
#define physdev_eoi_t physdev_eoi_compat_t
-#define physdev_pirq_eoi_mfn compat_physdev_pirq_eoi_mfn
-#define physdev_pirq_eoi_mfn_t physdev_pirq_eoi_mfn_compat_t
+#define physdev_pirq_eoi_gmfn compat_physdev_pirq_eoi_gmfn
+#define physdev_pirq_eoi_gmfn_t physdev_pirq_eoi_gmfn_compat_t
#define physdev_set_iobitmap compat_physdev_set_iobitmap
#define physdev_set_iobitmap_t physdev_set_iobitmap_compat_t
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -47,13 +47,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
* will automatically get unmasked. The page registered is used as a bit
* array indexed by Xen's PIRQ value.
*/
-#define PHYSDEVOP_pirq_eoi_mfn 17
-struct physdev_pirq_eoi_mfn {
+#define PHYSDEVOP_pirq_eoi_gmfn 17
+struct physdev_pirq_eoi_gmfn {
/* IN */
- xen_pfn_t mfn;
+ xen_pfn_t gmfn;
};
-typedef struct physdev_pirq_eoi_mfn physdev_pirq_eoi_mfn_t;
-DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_mfn_t);
+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);
/*
* Query the status of an IRQ line.

View File

@ -1,25 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228474781 0
# Node ID 99f01b8184c7c16f612731e2a525687dc8d424dc
# Parent 09160c3bd1797fdee111c2837f26a749e0bf9435
VT-d code cleanup
This patch narrow context caching flush range from the
domain-selective to the device-selective, when unmapping a device.
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1323,7 +1323,9 @@ static int domain_context_unmap_one(
context_clear_entry(*context);
iommu_flush_cache_entry(context);
- if ( iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0) )
+ if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT, 0) )
iommu_flush_write_buffer(iommu);
else
iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);

View File

@ -1,33 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228490563 0
# Node ID 3905cbf523b2550f5025df6cc31ac60e48c1706f
# Parent 3db54d2aa8bd7ec8c096fb4fafa068850ff0ff35
x86/cpufreq: reduce verbosity
These messages don't exist in powernow's equivalent code, and are
pretty useless anyway, hence just cluttering the logs.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -339,16 +339,10 @@ static int acpi_cpufreq_target(struct cp
next_perf_state = data->freq_table[next_state].index;
if (perf->state == next_perf_state) {
- if (unlikely(policy->resume)) {
- printk(KERN_INFO "Called after resume, resetting to P%d\n",
- next_perf_state);
+ if (unlikely(policy->resume))
policy->resume = 0;
- }
- else {
- printk(KERN_INFO "Already at target state (P%d)\n",
- next_perf_state);
+ else
return 0;
- }
}
switch (data->cpu_feature) {

View File

@ -1,197 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228490612 0
# Node ID de7fd862ada2ed079d0a5c407508eb63bb936992
# Parent 3905cbf523b2550f5025df6cc31ac60e48c1706f
cpufreq: allow customization of some parameters
Short of having a way for powersaved to dynamically adjust these
values, at least allow specifying them on the command line. In
particular, always running at an up-threshold of 80% is perhaps nice
for laptop use, but certainly not desirable on servers. On shell
scripts invoking large numbers of short-lived processes I noticed a
50% performance degradation on a dual-socket quad-core Barcelona just
because of the load of an individual core never crossing the 80%
boundary that would have resulted in increasing the frequency.
(Powersaved on SLE10 sets this on native kernels to 60% or 80%,
depending on whether performance or power reduction is preferred,
*divided* by the number of CPUs, but capped at the lower limit of
20%.)
Signed-off-by: Jan Beulich <jbeulich@novell.com>
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1230557866 0
# Node ID 4035ea96ae2fafba7a5a4c1e810aa7d591758e8c
# Parent 0af9fbf3f05306d4972cf05e4b6d7be2199a41cb
cpufreq: Fix a cpufreq cmdline parse bug, and change sample_rate unit
Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
--- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
@@ -22,15 +22,22 @@
#include <acpi/cpufreq/cpufreq.h>
#define DEF_FREQUENCY_UP_THRESHOLD (80)
+#define MIN_FREQUENCY_UP_THRESHOLD (11)
+#define MAX_FREQUENCY_UP_THRESHOLD (100)
#define MIN_DBS_INTERVAL (MICROSECS(100))
-#define MIN_SAMPLING_MILLISECS (20)
-#define MIN_STAT_SAMPLING_RATE \
+#define MIN_SAMPLING_RATE_RATIO (2)
+#define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10)
+#define MIN_STAT_SAMPLING_RATE \
(MIN_SAMPLING_MILLISECS * MILLISECS(1))
+#define MIN_SAMPLING_RATE \
+ (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
+#define MAX_SAMPLING_RATE (500 * def_sampling_rate)
#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000)
#define TRANSITION_LATENCY_LIMIT (10 * 1000 )
static uint64_t def_sampling_rate;
+static uint64_t usr_sampling_rate;
/* Sampling types */
enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
@@ -42,11 +49,9 @@ static unsigned int dbs_enable; /* nu
static struct dbs_tuners {
uint64_t sampling_rate;
unsigned int up_threshold;
- unsigned int ignore_nice;
unsigned int powersave_bias;
} dbs_tuners_ins = {
.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
- .ignore_nice = 0,
.powersave_bias = 0,
};
@@ -216,7 +221,20 @@ int cpufreq_governor_dbs(struct cpufreq_
if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
def_sampling_rate = MIN_STAT_SAMPLING_RATE;
- dbs_tuners_ins.sampling_rate = def_sampling_rate;
+ if (!usr_sampling_rate)
+ dbs_tuners_ins.sampling_rate = def_sampling_rate;
+ else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
+ printk(KERN_WARNING "cpufreq/ondemand: "
+ "specified sampling rate too low, using %"PRIu64"\n",
+ MIN_SAMPLING_RATE);
+ dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
+ } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
+ printk(KERN_WARNING "cpufreq/ondemand: "
+ "specified sampling rate too high, using %"PRIu64"\n",
+ MAX_SAMPLING_RATE);
+ dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
+ } else
+ dbs_tuners_ins.sampling_rate = usr_sampling_rate;
}
dbs_timer_init(this_dbs_info);
@@ -239,3 +257,55 @@ int cpufreq_governor_dbs(struct cpufreq_
}
return 0;
}
+
+void __init cpufreq_cmdline_parse(char *str)
+{
+ do {
+ char *val, *end = strchr(str, ',');
+
+ if ( end )
+ *end++ = '\0';
+ val = strchr(str, '=');
+ if ( val )
+ *val++ = '\0';
+
+ if ( !strcmp(str, "rate") && val )
+ {
+ usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
+ }
+ else if ( !strcmp(str, "threshold") && val )
+ {
+ unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+ if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified threshold too low, using %d\n",
+ MIN_FREQUENCY_UP_THRESHOLD);
+ tmp = MIN_FREQUENCY_UP_THRESHOLD;
+ }
+ else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified threshold too high, using %d\n",
+ MAX_FREQUENCY_UP_THRESHOLD);
+ tmp = MAX_FREQUENCY_UP_THRESHOLD;
+ }
+ dbs_tuners_ins.up_threshold = tmp;
+ }
+ else if ( !strcmp(str, "bias") && val )
+ {
+ unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+ if ( tmp > 1000 )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified bias too high, using 1000\n");
+ tmp = 1000;
+ }
+ dbs_tuners_ins.powersave_bias = tmp;
+ }
+
+ str = end;
+ } while ( str );
+}
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -25,6 +25,7 @@
#include <xen/percpu.h>
#include <xen/multicall.h>
#include <xen/rcupdate.h>
+#include <acpi/cpufreq/cpufreq.h>
#include <asm/debugger.h>
#include <public/sched.h>
#include <public/vcpu.h>
@@ -41,16 +42,25 @@ boolean_param("dom0_vcpus_pin", opt_dom0
enum cpufreq_controller cpufreq_controller;
static void __init setup_cpufreq_option(char *str)
{
+ char *arg;
+
if ( !strcmp(str, "dom0-kernel") )
{
xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
cpufreq_controller = FREQCTL_dom0_kernel;
opt_dom0_vcpus_pin = 1;
+ return;
}
- else if ( !strcmp(str, "xen") )
+
+ if ( (arg = strpbrk(str, ",:")) != NULL )
+ *arg++ = '\0';
+
+ if ( !strcmp(str, "xen") )
{
xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
cpufreq_controller = FREQCTL_xen;
+ if ( arg && *arg )
+ cpufreq_cmdline_parse(arg);
}
}
custom_param("cpufreq", setup_cpufreq_option);
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -41,6 +41,8 @@ struct cpufreq_policy {
};
extern struct cpufreq_policy xen_px_policy[NR_CPUS];
+void cpufreq_cmdline_parse(char *);
+
#define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
#define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */
#define CPUFREQ_SHARED_TYPE_ALL (2) /* All dependent CPUs should set freq */

View File

@ -1,29 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228490652 0
# Node ID d206692cbcbe33305afc4879a4b3ece44a8aba93
# Parent de7fd862ada2ed079d0a5c407508eb63bb936992
x86: make an error message more precise
... allowing to distinguish whether the to be added or the already
existing PIRQ binding is causing the failure.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -620,9 +620,11 @@ int pirq_guest_bind(struct vcpu *v, int
}
else if ( !will_share || !action->shareable )
{
- gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. "
- "Will not share with others.\n",
- irq);
+ gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. %s.\n",
+ irq,
+ will_share ?
+ "Others do not share" :
+ "Will not share with others");
rc = -EBUSY;
goto unlock_out;
}

View File

@ -1,76 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228827329 0
# Node ID 043aba2b67a195d2c2707f8fd0c05bbbf2078d2a
# Parent f7f8f44b9292a30707bd645739390ef3d0f22232
VT-d: check return value of pirq_guest_bind()
The eliminates a hypervisor crash when the respective domain dies or
gets the device hot removed.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Reviewed-by: Weidong Han <weidong.han@intel.com>
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -57,7 +57,7 @@ int pt_irq_create_bind_vtd(
uint32_t machine_gsi, guest_gsi;
uint32_t device, intx, link;
struct dev_intx_gsi_link *digl;
- int pirq = pt_irq_bind->machine_irq;
+ int rc, pirq = pt_irq_bind->machine_irq;
if ( pirq < 0 || pirq >= NR_PIRQS )
return -EINVAL;
@@ -95,7 +95,17 @@ int pt_irq_create_bind_vtd(
hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = pirq;
/* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
- pirq_guest_bind(d->vcpu[0], pirq, 0);
+ rc = pirq_guest_bind(d->vcpu[0], pirq, 0);
+ if ( unlikely(rc) )
+ {
+ hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = 0;
+ hvm_irq_dpci->mirq[pirq].gmsi.gflags = 0;
+ hvm_irq_dpci->mirq[pirq].gmsi.gvec = 0;
+ clear_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags);
+ clear_bit(pirq, hvm_irq_dpci->mapping);
+ spin_unlock(&d->event_lock);
+ return rc;
+ }
}
else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
@@ -136,13 +146,30 @@ int pt_irq_create_bind_vtd(
/* Bind the same mirq once in the same domain */
if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
{
+ unsigned int vector = domain_irq_to_vector(d, machine_gsi);
+
hvm_irq_dpci->mirq[machine_gsi].dom = d;
/* Init timer before binding */
- init_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)],
+ init_timer(&hvm_irq_dpci->hvm_timer[vector],
pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
/* Deal with gsi for legacy devices */
- pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
+ rc = pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
+ if ( unlikely(rc) )
+ {
+ kill_timer(&hvm_irq_dpci->hvm_timer[vector]);
+ hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
+ clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+ hvm_irq_dpci->girq[guest_gsi].machine_gsi = 0;
+ hvm_irq_dpci->girq[guest_gsi].intx = 0;
+ hvm_irq_dpci->girq[guest_gsi].device = 0;
+ hvm_irq_dpci->girq[guest_gsi].valid = 0;
+ list_del(&digl->list);
+ hvm_irq_dpci->link_cnt[link]--;
+ spin_unlock(&d->event_lock);
+ xfree(digl);
+ return rc;
+ }
}
gdprintk(XENLOG_INFO VTDPREFIX,

View File

@ -1,95 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228995360 0
# Node ID f4c1a347311bbdc7dbf3b1b213719929cf03ede3
# Parent 68555b9a7d98a6e91b55766e54d8e6d08589b3ac
x86: unify local_irq_XXX()
This also removes an inconsistency in that x86-64's __save_flags() had
a memory clobber, while x86_32's didn't.
It further adds type checking since blindly using {pop,push}{l,q} on a
memory operand of unknown size bares the risk of corrupting other
data.
Finally, it eliminates the redundant (with local_irq_restore())
__restore_flags() macro and renames __save_flags() to
local_save_flags(), making the naming consistent with Linux (again?).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -1,8 +1,7 @@
#ifndef __ASM_SYSTEM_H
#define __ASM_SYSTEM_H
-#include <xen/config.h>
-#include <xen/types.h>
+#include <xen/lib.h>
#include <asm/bitops.h>
#define read_segment_register(name) \
@@ -171,10 +170,27 @@ static always_inline unsigned long __cmp
/* used when interrupts are already enabled or to shutdown the processor */
#define halt() asm volatile ( "hlt" : : : "memory" )
+#define local_save_flags(x) \
+({ \
+ BUILD_BUG_ON(sizeof(x) != sizeof(long)); \
+ asm volatile ( "pushf" __OS " ; pop" __OS " %0" : "=g" (x)); \
+})
+#define local_irq_save(x) \
+({ \
+ local_save_flags(x); \
+ local_irq_disable(); \
+})
+#define local_irq_restore(x) \
+({ \
+ BUILD_BUG_ON(sizeof(x) != sizeof(long)); \
+ asm volatile ( "push" __OS " %0 ; popf" __OS \
+ : : "g" (x) : "memory", "cc" ); \
+})
+
static inline int local_irq_is_enabled(void)
{
unsigned long flags;
- __save_flags(flags);
+ local_save_flags(flags);
return !!(flags & (1<<9)); /* EFLAGS_IF */
}
--- a/xen/include/asm-x86/x86_32/system.h
+++ b/xen/include/asm-x86/x86_32/system.h
@@ -101,14 +101,4 @@ static inline void atomic_write64(uint64
#define mb() \
asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
-#define __save_flags(x) \
- asm volatile ( "pushfl ; popl %0" : "=g" (x) : )
-#define __restore_flags(x) \
- asm volatile ( "pushl %0 ; popfl" : : "g" (x) : "memory", "cc" )
-
-#define local_irq_save(x) \
- asm volatile ( "pushfl ; popl %0 ; cli" : "=g" (x) : : "memory" )
-#define local_irq_restore(x) \
- __restore_flags(x)
-
#endif /* __X86_32_SYSTEM_H__ */
--- a/xen/include/asm-x86/x86_64/system.h
+++ b/xen/include/asm-x86/x86_64/system.h
@@ -55,14 +55,4 @@ static inline void atomic_write64(uint64
#define mb() \
asm volatile ( "mfence" : : : "memory" )
-#define __save_flags(x) \
- asm volatile ( "pushfq ; popq %q0" : "=g" (x) : :"memory" )
-#define __restore_flags(x) \
- asm volatile ( "pushq %0 ; popfq" : : "g" (x) : "memory", "cc" )
-
-#define local_irq_save(x) \
- asm volatile ( "pushfq ; popq %0 ; cli" : "=g" (x) : : "memory" )
-#define local_irq_restore(x) \
- __restore_flags(x)
-
#endif /* __X86_64_SYSTEM_H__ */

View File

@ -1,94 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1228995610 0
# Node ID c15244125a693d2a1ae5e5745a649467394d8dac
# Parent f4c1a347311bbdc7dbf3b1b213719929cf03ede3
x86: fix the potential of encountering panic "IO-APIC + timer doesn't work! ..."
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Linux commit:
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=4aae07025265151e3f7041dfbf0f529e122de1d8
x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
Under rare circumstances we found we could have an IRQ0 entry while we
are in the middle of setting up the local APIC, the i8259A and the
PIT. That is certainly not how it's supposed to work! check_timer()
was supposed to be called with irqs turned off - but this eroded away
sometime in the past. This code would still work most of the time
because this code runs very quickly, but just the right timing
conditions are present and IRQ0 hits in this small, ~30 usecs window,
timer irqs stop and the system does not boot up. Also, given how early
this is during bootup, the hang is very deterministic - but it would
only occur on certain machines (and certain configs).
The fix was quite simple: disable/restore interrupts properly in this
function. With that in place the test-system now boots up just fine.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1259,14 +1259,16 @@ static void __init setup_ioapic_ids_from
static int __init timer_irq_works(void)
{
extern unsigned long pit0_ticks;
- unsigned long t1;
+ unsigned long t1, flags;
t1 = pit0_ticks;
mb();
+ local_save_flags(flags);
local_irq_enable();
/* Let ten ticks pass... */
mdelay((10 * 1000) / HZ);
+ local_irq_restore(flags);
/*
* Expect a few ticks at least, to be sure some possible
@@ -1720,6 +1722,9 @@ static inline void check_timer(void)
{
int apic1, pin1, apic2, pin2;
int vector;
+ unsigned long flags;
+
+ local_irq_save(flags);
/*
* get/set the timer IRQ vector:
@@ -1761,6 +1766,7 @@ static inline void check_timer(void)
*/
unmask_IO_APIC_irq(0);
if (timer_irq_works()) {
+ local_irq_restore(flags);
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(apic1, pin1);
return;
@@ -1778,6 +1784,7 @@ static inline void check_timer(void)
*/
setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
if (timer_irq_works()) {
+ local_irq_restore(flags);
printk("works.\n");
if (pin1 != -1)
replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
@@ -1805,6 +1812,7 @@ static inline void check_timer(void)
enable_8259A_irq(0);
if (timer_irq_works()) {
+ local_irq_restore(flags);
printk(" works.\n");
return;
}
@@ -1820,6 +1828,8 @@ static inline void check_timer(void)
unlock_ExtINT_logic();
+ local_irq_restore(flags);
+
if (timer_irq_works()) {
printk(" works.\n");
return;

View File

@ -1,29 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1229599705 0
# Node ID c2dad16819b54c2c4b2bb0e9e89f71c279eaf156
# Parent b33b745cd5ec3213feeb1d99e421e79cc5f12370
x86, shadow: Avoid duplicates in fixup tables.
Avoid entering duplicates in fixup tables, reducing fixup evictions.
Signed-off-by: Gianluca Guida <gianluca.guida@eu.citrix.com>
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -626,6 +626,15 @@ void oos_fixup_add(struct vcpu *v, mfn_t
idx = (idx + 1) % SHADOW_OOS_PAGES;
if ( mfn_x(oos[idx]) == mfn_x(gmfn) )
{
+ int i;
+ for ( i = 0; i < SHADOW_OOS_FIXUPS; i++ )
+ {
+ if ( mfn_valid(oos_fixup[idx].smfn[i])
+ && (mfn_x(oos_fixup[idx].smfn[i]) == mfn_x(smfn))
+ && (oos_fixup[idx].off[i] == off) )
+ return;
+ }
+
next = oos_fixup[idx].next;
if ( mfn_x(oos_fixup[idx].smfn[next]) != INVALID_MFN )

View File

@ -1,27 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1229599773 0
# Node ID 768759d4e319f8c46a8558782a9bf1c7982e662d
# Parent c2dad16819b54c2c4b2bb0e9e89f71c279eaf156
xenoprof: Add support for Intel Dunnington cores.
Signed-off-by: Xiaowei Yang <Xiaowei.yang@intel.com>
Signed-off-by: Ting Zhou <ting.g.zhou@intel.com>
--- a/xen/arch/x86/oprofile/nmi_int.c
+++ b/xen/arch/x86/oprofile/nmi_int.c
@@ -315,11 +315,10 @@ static int __init ppro_init(char ** cpu_
case 14:
*cpu_type = "i386/core";
break;
- case 15: case 23:
- *cpu_type = "i386/core_2";
- ppro_has_global_ctrl = 1;
- break;
+ case 15:
+ case 23:
case 26:
+ case 29:
*cpu_type = "i386/core_2";
ppro_has_global_ctrl = 1;
break;

View File

@ -1,212 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1229694124 0
# Node ID d238101c1832ba178bfc00a20b461fcebe21d5df
# Parent 8c35da364ab39605839869d8eb0ac9b831c370f0
VT-d: Fix PCI-X device assignment
When assign PCI device, current code just map its bridge and its
secondary bus number and devfn 0. It doesn't work for PCI-x device
assignment, because the request may be the source-id in the original
PCI-X transaction or the source-id provided by the bridge. It needs to
map the device itself, and its upstream bridges till PCIe-to-PCI/PCI-x
bridge.
In addition, add description for DEV_TYPE_PCIe_BRIDGE and
DEV_TYPE_PCI_BRIDGE for understandability.
Signed-off-by: Weidong Han <weidong.han@intel.com>
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231154002 0
# Node ID b3a9bc72624166a230da74c498154ae2cb45eacc
# Parent 9cc632cc6d400685679671b6bbc58dfe4c5e287e
vtd: avoid redundant context mapping
After changeset 18934 (VT-d: Fix PCI-X device assignment), my assigned
PCI E1000 NIC doesn't work in guest.
The NIC is 03:00.0. Its parent bridge is: 00:1e.0.
In domain_context_mapping():
case DEV_TYPE_PCI:
After we domain_context_mapping_one() 03:00.0 and 00:1e.0, the
'secbus' is 3 and 'bus' is 0, so we domain_context_mapping_one()
03:00.0 again -- this redundant invocation returns -EINVAL because
we have created the mapping but haven't changed pdev->domain from
Dom0 to a new domain at this time and eventually the
XEN_DOMCTL_assign_device hypercall returns a failure.
The attached patch detects this case and avoids the redundant
invocation.
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1155,8 +1155,8 @@ static int domain_context_mapping_one(
enum {
DEV_TYPE_PCIe_ENDPOINT,
- DEV_TYPE_PCIe_BRIDGE,
- DEV_TYPE_PCI_BRIDGE,
+ DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
+ DEV_TYPE_PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge, PCI-to-PCI bridge
DEV_TYPE_PCI,
};
@@ -1170,7 +1170,8 @@ int pdev_type(u8 bus, u8 devfn)
class_device = pci_conf_read16(bus, d, f, PCI_CLASS_DEVICE);
if ( class_device == PCI_CLASS_BRIDGE_PCI )
{
- pos = pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
+ pos = pci_find_next_cap(bus, devfn,
+ PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
if ( !pos )
return DEV_TYPE_PCI_BRIDGE;
creg = pci_conf_read16(bus, d, f, pos + PCI_EXP_FLAGS);
@@ -1219,9 +1220,9 @@ static int domain_context_mapping(struct
{
struct acpi_drhd_unit *drhd;
int ret = 0;
- u16 sec_bus, sub_bus, ob, odf;
+ u16 sec_bus, sub_bus;
u32 type;
- u8 secbus;
+ u8 secbus, secdevfn;
drhd = acpi_find_matched_drhd_unit(bus, devfn);
if ( !drhd )
@@ -1231,15 +1232,13 @@ static int domain_context_mapping(struct
switch ( type )
{
case DEV_TYPE_PCIe_BRIDGE:
+ break;
+
case DEV_TYPE_PCI_BRIDGE:
sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_SECONDARY_BUS);
sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_SUBORDINATE_BUS);
- /*dmar_scope_add_buses(&drhd->scope, sec_bus, sub_bus);*/
-
- if ( type == DEV_TYPE_PCIe_BRIDGE )
- break;
for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
{
@@ -1258,26 +1257,28 @@ static int domain_context_mapping(struct
case DEV_TYPE_PCI:
gdprintk(XENLOG_INFO VTDPREFIX,
- "domain_context_mapping:PCI: bdf = %x:%x.%x\n",
+ "domain_context_mapping:PCI: bdf = %x:%x.%x\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- ob = bus; odf = devfn;
- if ( !find_pcie_endpoint(&bus, &devfn, &secbus) )
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ if ( ret )
+ break;
+
+ secbus = bus;
+ secdevfn = devfn;
+ /* dependent devices mapping */
+ while ( bus2bridge[bus].map )
{
- gdprintk(XENLOG_WARNING VTDPREFIX,
- "domain_context_mapping:invalid\n");
- break;
+ secbus = bus;
+ secdevfn = devfn;
+ devfn = bus2bridge[bus].devfn;
+ bus = bus2bridge[bus].bus;
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ if ( ret )
+ return ret;
}
- if ( ob != bus || odf != devfn )
- gdprintk(XENLOG_INFO VTDPREFIX,
- "domain_context_mapping:map: "
- "bdf = %x:%x.%x -> %x:%x.%x\n",
- ob, PCI_SLOT(odf), PCI_FUNC(odf),
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- if ( secbus != bus )
+ if ( (secbus != bus) && (secdevfn != 0) )
/*
* The source-id for transactions on non-PCIe buses seem
* to originate from devfn=0 on the secondary bus behind
@@ -1285,7 +1286,7 @@ static int domain_context_mapping(struct
* these scanarios is not particularly well documented
* anywhere.
*/
- domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
+ ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
break;
default:
@@ -1339,10 +1340,9 @@ static int domain_context_unmap_one(
static int domain_context_unmap(struct domain *domain, u8 bus, u8 devfn)
{
struct acpi_drhd_unit *drhd;
- u16 sec_bus, sub_bus;
int ret = 0;
u32 type;
- u8 secbus;
+ u8 secbus, secdevfn;
drhd = acpi_find_matched_drhd_unit(bus, devfn);
if ( !drhd )
@@ -1353,24 +1353,39 @@ static int domain_context_unmap(struct d
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCI_BRIDGE:
- sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- PCI_SECONDARY_BUS);
- sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- PCI_SUBORDINATE_BUS);
- /*dmar_scope_remove_buses(&drhd->scope, sec_bus, sub_bus);*/
- if ( DEV_TYPE_PCI_BRIDGE )
- ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
break;
case DEV_TYPE_PCIe_ENDPOINT:
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:PCIe: bdf = %x:%x.%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
break;
case DEV_TYPE_PCI:
- if ( find_pcie_endpoint(&bus, &devfn, &secbus) )
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:PCI: bdf = %x:%x.%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
+ if ( ret )
+ break;
+
+ secbus = bus;
+ secdevfn = devfn;
+ /* dependent devices unmapping */
+ while ( bus2bridge[bus].map )
+ {
+ secbus = bus;
+ secdevfn = devfn;
+ devfn = bus2bridge[bus].devfn;
+ bus = bus2bridge[bus].bus;
ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
- if ( bus != secbus )
- domain_context_unmap_one(domain, drhd->iommu, secbus, 0);
+ if ( ret )
+ return ret;
+ }
+
+ if ( (secbus != bus) && (secdevfn != 0) )
+ ret = domain_context_unmap_one(domain, drhd->iommu, secbus, 0);
break;
default:

View File

@ -1,112 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1229698596 0
# Node ID 2dffa6ceb0af954e7f3a9ad7e993b8aee7b7de65
# Parent 738513b106fa262a11cc3254cd6dd67afb3a63e7
Support S3 for MSI interrupt
From: "Jiang, Yunhong" <yunhong.jiang@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -771,3 +771,41 @@ void pci_cleanup_msi(struct pci_dev *pde
msi_free_vectors(pdev);
}
+int pci_restore_msi_state(struct pci_dev *pdev)
+{
+ unsigned long flags;
+ int vector;
+ struct msi_desc *entry, *tmp;
+ irq_desc_t *desc;
+
+ if (!pdev)
+ return -EINVAL;
+
+ list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
+ {
+ vector = entry->vector;
+ desc = &irq_desc[vector];
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ ASSERT(desc->msi_desc == entry);
+
+ if (desc->msi_desc != entry)
+ {
+ dprintk(XENLOG_ERR, "Restore MSI for dev %x:%x not set before?\n",
+ pdev->bus, pdev->devfn);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return -EINVAL;
+ }
+
+ msi_set_enable(pdev, 0);
+ write_msi_msg(entry, &entry->msg);
+
+ msi_set_enable(pdev, 1);
+ msi_set_mask_bit(vector, entry->msi_attrib.masked);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ return 0;
+}
+
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -427,6 +427,27 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
}
+ case PHYSDEVOP_restore_msi: {
+ struct physdev_restore_msi restore_msi;
+ struct pci_dev *pdev;
+
+ ret = -EPERM;
+ if ( !IS_PRIV(v->domain) )
+ break;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&restore_msi, arg, 1) != 0 )
+ break;
+
+ pdev = pci_lock_pdev(restore_msi.bus, restore_msi.devfn);
+ ret = -ENODEV;
+ if ( !pdev )
+ break;
+
+ ret = pci_restore_msi_state(pdev);
+ spin_unlock(&pdev->lock);
+ break;
+ }
default:
ret = -ENOSYS;
break;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -75,6 +75,7 @@ extern void set_msi_irq_affinity(unsigne
extern int pci_enable_msi(struct msi_info *msi);
extern void pci_disable_msi(int vector);
extern void pci_cleanup_msi(struct pci_dev *pdev);
+extern int pci_restore_msi_state(struct pci_dev *pdev);
struct msi_desc {
struct {
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -183,6 +183,15 @@ struct physdev_manage_pci {
typedef struct physdev_manage_pci physdev_manage_pci_t;
DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+#define PHYSDEVOP_restore_msi 19
+struct physdev_restore_msi {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+};
+typedef struct physdev_restore_msi physdev_restore_msi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);
+
/*
* Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
* hypercall since 0x00030202.

View File

@ -1,28 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1230557552 0
# Node ID 0af9fbf3f05306d4972cf05e4b6d7be2199a41cb
# Parent c54d6f871de8f271aaeb571c3b87eae9165e3183
x86: Do not restrict 32-bit EPT to 4GB.
Signed-off-by: Xin, Xiaohui <xiaohui.xin@intel.com>
Index: xen-3.3.1-testing/xen/arch/x86/mm/p2m.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-3.3.1-testing/xen/arch/x86/mm/p2m.c
@@ -935,11 +935,12 @@ guest_physmap_add_entry(struct domain *d
#if CONFIG_PAGING_LEVELS == 3
/*
- * 32bit PAE nested paging does not support over 4GB guest due to
+ * 32bit AMD nested paging does not support over 4GB guest due to
* hardware translation limit. This limitation is checked by comparing
* gfn with 0xfffffUL.
*/
- if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
+ if ( paging_mode_hap(d) && (gfn > 0xfffffUL) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
{
if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"

View File

@ -1,44 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231156354 0
# Node ID 2c5a2e99a1d69d635843955310488fbd5e1bcdd2
# Parent d6889b3b64231dd4c2cd86ca6e66d0a4ef2d5dfc
vmx: Print advanced features during boot
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -55,6 +55,25 @@ static DEFINE_PER_CPU(struct list_head,
static u32 vmcs_revision_id __read_mostly;
+static void __init vmx_display_features(void)
+{
+ int printed = 0;
+
+ printk("VMX: Supported advanced features:\n");
+
+#define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
+ P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
+ P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
+ P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
+ P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
+ P(cpu_has_vmx_vnmi, "Virtual NMI");
+ P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
+#undef P
+
+ if ( !printed )
+ printk(" - none\n");
+}
+
static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr)
{
u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
@@ -168,6 +187,7 @@ static void vmx_init_vmcs_config(void)
vmx_vmexit_control = _vmx_vmexit_control;
vmx_vmentry_control = _vmx_vmentry_control;
cpu_has_vmx_ins_outs_instr_info = !!(vmx_basic_msr_high & (1U<<22));
+ vmx_display_features();
}
else
{

View File

@ -1,27 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231755835 0
# Node ID 95d8788bf4be2e8b8d2b984e290f5e19eef1a16c
# Parent 59d511c4a8d8ba451afc6ebd88e049fa2addf9f5
hvmloader: Fix SMBIOS memory device length boundary condition.
dev_memsize ends up 0 when it shouldn't be on 16G boundary conditions.
Signed-off-by: Bill Rieske <brieske@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
diff -r 59d511c4a8d8 -r 95d8788bf4be tools/firmware/hvmloader/smbios.c
--- a/tools/firmware/hvmloader/smbios.c Mon Jan 12 10:17:12 2009 +0000
+++ b/tools/firmware/hvmloader/smbios.c Mon Jan 12 10:23:55 2009 +0000
@@ -118,8 +118,9 @@ write_smbios_tables(void *start,
do_struct(smbios_type_16_init(p, memsize, nr_mem_devs));
for ( i = 0; i < nr_mem_devs; i++ )
{
- uint32_t dev_memsize = ((i == (nr_mem_devs - 1))
- ? (memsize & 0x3fff) : 0x4000);
+ uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */
+ if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) )
+ dev_memsize = memsize & 0x3fff; /* last dev is <16GB */
do_struct(smbios_type_17_init(p, dev_memsize, i));
do_struct(smbios_type_19_init(p, dev_memsize, i));
do_struct(smbios_type_20_init(p, dev_memsize, i));

View File

@ -1,37 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231859806 0
# Node ID 73770182aee48e79a2caa441ad1013982deefddb
# Parent 1c6642adaeb204495c95ab86c8aee41587a22928
AMD IOMMU: Reset tail and head pointer of cmd buffer and event log
Reset the tail and the head pointers of command buffer and event log
to zero in case that iommu does not reset them after the base
addresses of those buffers are updated.
Signed-off-by: Wei Wang <wei.wang2@amd.com>
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -195,6 +195,10 @@ static void __init set_iommu_command_buf
IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK,
IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+ /*reset head and tail pointer */
+ writel(0x0, iommu->mmio_base + IOMMU_CMD_BUFFER_HEAD_OFFSET);
+ writel(0x0, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
}
static void __init register_iommu_exclusion_range(struct amd_iommu *iommu)
@@ -259,6 +263,10 @@ static void __init set_iommu_event_log_c
IOMMU_CONTROL_COMP_WAIT_INT_MASK,
IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+ /*reset head and tail pointer */
+ writel(0x0, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
+ writel(0x0, iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET);
}
static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[])

View File

@ -1,46 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231862027 0
# Node ID 4f6a2bbdff3fea7db13979ffccb1ae5403ca79c8
# Parent b169db55faf38fda27985626284c9262aac09784
x86: Call msix_set_enable for MSI-x interrupt
For MSI-x, we should call msix_set_enable() instead of
msi_set_enable().
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -702,7 +702,7 @@ static void __pci_disable_msix(int vecto
pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
- msi_set_enable(dev, 0);
+ msix_set_enable(dev, 0);
BUG_ON(list_empty(&dev->msi_list));
@@ -798,11 +798,20 @@ int pci_restore_msi_state(struct pci_dev
return -EINVAL;
}
- msi_set_enable(pdev, 0);
+ if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ msi_set_enable(pdev, 0);
+ else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+ msix_set_enable(pdev, 0);
+
write_msi_msg(entry, &entry->msg);
- msi_set_enable(pdev, 1);
msi_set_mask_bit(vector, entry->msi_attrib.masked);
+
+ if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ msi_set_enable(pdev, 1);
+ else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+ msix_set_enable(pdev, 1);
+
spin_unlock_irqrestore(&desc->lock, flags);
}

View File

@ -1,135 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231930540 0
# Node ID 59274c49a0298fd73f60759c0842a293b5816057
# Parent cc542ebe48539b9ca0534ca241209734234fdff2
x86: restore ability to work on systems without APIC
This got broken with the default-enabling of MSI. Apart from fixing
the base issue, the patch also addresses
- the 'i' command crashing where there is no IO-APIC,
- the 'i' command needlessly printing information for all 256 vectors
when the use of IO-APIC(s) is disabled, and
- the need to specify both "nolapic" and "noapic" when "nolapic" alone
should already have the intended effect.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -40,7 +40,7 @@
/*
* Knob to control our willingness to enable the local APIC.
*/
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
/*
* Debug level
@@ -719,7 +719,7 @@ static void apic_pm_activate(void)
static void __init lapic_disable(char *str)
{
enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
}
custom_param("nolapic", lapic_disable);
@@ -852,6 +852,7 @@ void __init init_apic_mappings(void)
if (!smp_found_config && detect_init_APIC()) {
apic_phys = __pa(alloc_xenheap_page());
clear_page(__va(apic_phys));
+ msi_enable = 0;
} else
apic_phys = mp_lapic_addr;
@@ -1280,8 +1281,10 @@ int __init APIC_init_uniprocessor (void)
if (enable_local_apic < 0)
clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- if (!smp_found_config && !cpu_has_apic)
+ if (!smp_found_config && !cpu_has_apic) {
+ msi_enable = 0;
return -1;
+ }
/*
* Complain if the BIOS pretends there is one.
@@ -1290,6 +1293,7 @@ int __init APIC_init_uniprocessor (void)
printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ msi_enable = 0;
return -1;
}
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -29,6 +29,14 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
*/
u64 host_pat = 0x050100070406;
+static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
+
+void __init setup_clear_cpu_cap(unsigned int cap)
+{
+ __clear_bit(cap, boot_cpu_data.x86_capability);
+ __set_bit(cap, cleared_caps);
+}
+
static void default_init(struct cpuinfo_x86 * c)
{
/* Not much we can do here... */
@@ -235,6 +243,7 @@ static void __init early_cpu_detect(void
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
c->x86_mask = tfms & 15;
+ cap0 &= ~cleared_caps[0];
if (cap0 & (1<<19))
c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
@@ -395,6 +404,9 @@ void __cpuinit identify_cpu(struct cpuin
if (disable_pse)
clear_bit(X86_FEATURE_PSE, c->x86_capability);
+ for (i = 0 ; i < NCAPINTS ; ++i)
+ c->x86_capability[i] &= ~cleared_caps[i];
+
/* If the model name is still unset, do table lookup. */
if ( !c->x86_model_id[0] ) {
char *p;
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -87,7 +87,9 @@ int disable_timer_pin_1 __initdata;
static struct irq_pin_list {
int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+} irq_2_pin[PIN_MAP_SIZE] = {
+ [0 ... PIN_MAP_SIZE-1].pin = -1
+};
static int irq_2_pin_free_entry = NR_IRQS;
int vector_irq[NR_VECTORS] __read_mostly = {
@@ -1020,11 +1022,6 @@ static void __init enable_IO_APIC(void)
int i, apic;
unsigned long flags;
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
-
/* Initialise dynamic irq_2_pin free list. */
for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
irq_2_pin[i].next = i + 1;
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -191,6 +191,7 @@ extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
extern void identify_cpu(struct cpuinfo_x86 *);
+extern void setup_clear_cpu_cap(unsigned int);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void dodgy_tsc(void);

View File

@ -1,44 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1231930578 0
# Node ID 3fb43f23f64ccc1687d1dc348a9eb454397d4887
# Parent 59274c49a0298fd73f60759c0842a293b5816057
x86: propagate disabled APIC state to Dom0
This in particular eliminates the need to specify "noapic" to Dom0
when "noapic" is passed to Xen, provided the kernel side gets slightly
modified to make use of this information (Linux side patch cannot
easily be provided for the 2.6.18 tree, but this is being used by our
2.6.27-based one).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -711,6 +711,8 @@ static void pv_cpuid(struct cpu_user_reg
{
/* Modify Feature Information. */
__clear_bit(X86_FEATURE_VME, &d);
+ if ( !cpu_has_apic )
+ __clear_bit(X86_FEATURE_APIC % 32, &d);
__clear_bit(X86_FEATURE_PSE, &d);
__clear_bit(X86_FEATURE_PGE, &d);
__clear_bit(X86_FEATURE_MCE, &d);
@@ -742,6 +744,8 @@ static void pv_cpuid(struct cpu_user_reg
__clear_bit(X86_FEATURE_XTPR % 32, &c);
__clear_bit(X86_FEATURE_PDCM % 32, &c);
__clear_bit(X86_FEATURE_DCA % 32, &c);
+ if ( !cpu_has_apic )
+ __clear_bit(X86_FEATURE_X2APIC % 32, &c);
__set_bit(X86_FEATURE_HYPERVISOR % 32, &c);
break;
case 0x80000001:
@@ -760,6 +764,8 @@ static void pv_cpuid(struct cpu_user_reg
__clear_bit(X86_FEATURE_RDTSCP % 32, &d);
__clear_bit(X86_FEATURE_SVME % 32, &c);
+ if ( !cpu_has_apic )
+ __clear_bit(X86_FEATURE_EXTAPICSPACE % 32, &c);
__clear_bit(X86_FEATURE_OSVW % 32, &c);
__clear_bit(X86_FEATURE_IBS % 32, &c);
__clear_bit(X86_FEATURE_SKINIT % 32, &c);

View File

@ -1,391 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232023062 0
# Node ID ecf603780f560095c4316169c9473d040f216dfc
# Parent f6a455c9f01db586832c0eb98c14965c045e07ac
libxc: Support cross-bitness guest when core-dumping
This patch allows core-dumping to work on a cross-bit host/guest
configuration, whereas previously that was not supported. It supports
both PV and FV guests. The core file format generated by the host,
needs to match that of the guest, so an alignment issue is addressed,
along with the p2m frame list handling being done according to the
guest size.
Signed-off-by: Bruce Rogers <brogers@novell.com>
Index: xen-3.3.1-testing/tools/libxc/xc_core.c
===================================================================
--- xen-3.3.1-testing.orig/tools/libxc/xc_core.c
+++ xen-3.3.1-testing/tools/libxc/xc_core.c
@@ -58,9 +58,6 @@
/* number of pages to write at a time */
#define DUMP_INCREMENT (4 * 1024)
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
-
/* string table */
struct xc_core_strtab {
char *strings;
@@ -240,7 +237,7 @@ xc_core_ehdr_init(Elf64_Ehdr *ehdr)
ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT;
ehdr->e_type = ET_CORE;
- ehdr->e_machine = ELF_ARCH_MACHINE;
+ /* e_machine will be filled in later */
ehdr->e_version = EV_CURRENT;
ehdr->e_entry = 0;
ehdr->e_phoff = 0;
@@ -359,7 +356,8 @@ elfnote_dump_core_header(
}
static int
-elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle)
+elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle,
+ unsigned int guest_width)
{
int sts;
struct elfnote elfnote;
@@ -371,6 +369,12 @@ elfnote_dump_xen_version(void *args, dum
elfnote.descsz = sizeof(xen_version);
elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION;
elfnote_fill_xen_version(xc_handle, &xen_version);
+ if (guest_width < sizeof(unsigned long))
+ {
+ // 32 bit elf file format differs in pagesize's alignment
+ char *p = (char *)&xen_version.pagesize;
+ memmove(p - 4, p, sizeof(xen_version.pagesize));
+ }
sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote));
if ( sts != 0 )
return sts;
@@ -396,6 +400,24 @@ elfnote_dump_format_version(void *args,
return dump_rtn(args, (char*)&format_version, sizeof(format_version));
}
+static int
+get_guest_width(int xc_handle,
+ uint32_t domid,
+ unsigned int *guest_width)
+{
+ DECLARE_DOMCTL;
+
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = domid;
+ domctl.cmd = XEN_DOMCTL_get_address_size;
+
+ if ( do_domctl(xc_handle, &domctl) != 0 )
+ return 1;
+
+ *guest_width = domctl.u.address_size.size / 8;
+ return 0;
+}
+
int
xc_domain_dumpcore_via_callback(int xc_handle,
uint32_t domid,
@@ -403,7 +425,8 @@ xc_domain_dumpcore_via_callback(int xc_h
dumpcore_rtn_t dump_rtn)
{
xc_dominfo_t info;
- shared_info_t *live_shinfo = NULL;
+ shared_info_any_t *live_shinfo = NULL;
+ unsigned int guest_width;
int nr_vcpus = 0;
char *dump_mem, *dump_mem_start = NULL;
@@ -437,6 +460,12 @@ xc_domain_dumpcore_via_callback(int xc_h
uint16_t strtab_idx;
struct xc_core_section_headers *sheaders = NULL;
Elf64_Shdr *shdr;
+
+ if ( get_guest_width(xc_handle, domid, &guest_width) != 0 )
+ {
+ PERROR("Could not get address size for domain");
+ return sts;
+ }
xc_core_arch_context_init(&arch_ctxt);
if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL )
@@ -500,7 +529,7 @@ xc_domain_dumpcore_via_callback(int xc_h
goto out;
}
- sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
+ sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo,
&p2m, &p2m_size);
if ( sts != 0 )
goto out;
@@ -676,6 +705,7 @@ xc_domain_dumpcore_via_callback(int xc_h
/* write out elf header */
ehdr.e_shnum = sheaders->num;
ehdr.e_shstrndx = strtab_idx;
+ ehdr.e_machine = ELF_ARCH_MACHINE;
sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr));
if ( sts != 0 )
goto out;
@@ -697,7 +727,7 @@ xc_domain_dumpcore_via_callback(int xc_h
goto out;
/* elf note section: xen version */
- sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle);
+ sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width);
if ( sts != 0 )
goto out;
@@ -757,9 +787,21 @@ xc_domain_dumpcore_via_callback(int xc_h
if ( !auto_translated_physmap )
{
- gmfn = p2m[i];
- if ( gmfn == INVALID_P2M_ENTRY )
- continue;
+ if ( guest_width >= sizeof(unsigned long) )
+ {
+ if ( guest_width == sizeof(unsigned long) )
+ gmfn = p2m[i];
+ else
+ gmfn = ((uint64_t *)p2m)[i];
+ if ( gmfn == INVALID_P2M_ENTRY )
+ continue;
+ }
+ else
+ {
+ gmfn = ((uint32_t *)p2m)[i];
+ if ( gmfn == (uint32_t)INVALID_P2M_ENTRY )
+ continue;
+ }
p2m_array[j].pfn = i;
p2m_array[j].gmfn = gmfn;
@@ -802,7 +844,7 @@ copy_done:
/* When live dump-mode (-L option) is specified,
* guest domain may reduce memory. pad with zero pages.
*/
- IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages);
+ IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages);
memset(dump_mem_start, 0, PAGE_SIZE);
for (; j < nr_pages; j++) {
sts = dump_rtn(args, dump_mem_start, PAGE_SIZE);
@@ -891,7 +933,7 @@ xc_domain_dumpcore(int xc_handle,
struct dump_args da;
int sts;
- if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
+ if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 )
{
PERROR("Could not open corefile %s", corename);
return -errno;
Index: xen-3.3.1-testing/tools/libxc/xc_core.h
===================================================================
--- xen-3.3.1-testing.orig/tools/libxc/xc_core.h
+++ xen-3.3.1-testing/tools/libxc/xc_core.h
@@ -136,12 +136,12 @@ int xc_core_arch_auto_translated_physmap
struct xc_core_arch_context;
int xc_core_arch_memory_map_get(int xc_handle,
struct xc_core_arch_context *arch_ctxt,
- xc_dominfo_t *info, shared_info_t *live_shinfo,
+ xc_dominfo_t *info, shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp,
unsigned int *nr_entries);
-int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
- unsigned long *pfnp);
+int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width,
+ xc_dominfo_t *info, shared_info_any_t *live_shinfo,
+ xen_pfn_t **live_p2m, unsigned long *pfnp);
#if defined (__i386__) || defined (__x86_64__)
Index: xen-3.3.1-testing/tools/libxc/xc_core_ia64.c
===================================================================
--- xen-3.3.1-testing.orig/tools/libxc/xc_core_ia64.c
+++ xen-3.3.1-testing/tools/libxc/xc_core_ia64.c
@@ -270,7 +270,7 @@ old:
}
int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info,
shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
unsigned long *pfnp)
{
Index: xen-3.3.1-testing/tools/libxc/xc_core_x86.c
===================================================================
--- xen-3.3.1-testing.orig/tools/libxc/xc_core_x86.c
+++ xen-3.3.1-testing/tools/libxc/xc_core_x86.c
@@ -20,9 +20,25 @@
#include "xg_private.h"
#include "xc_core.h"
+#include "xc_e820.h"
+
+#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f))
+
+#ifndef MAX
+#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b))
+#endif
+
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+ unsigned long pfn)
+{
+ if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */
+ || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
+ && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */
+ return 0;
+ return 1;
+}
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
static int nr_gpfns(int xc_handle, domid_t domid)
{
@@ -37,7 +53,7 @@ xc_core_arch_auto_translated_physmap(con
int
xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused,
- xc_dominfo_t *info, shared_info_t *live_shinfo,
+ xc_dominfo_t *info, shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp,
unsigned int *nr_entries)
{
@@ -60,17 +76,22 @@ xc_core_arch_memory_map_get(int xc_handl
}
int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info,
+ shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
unsigned long *pfnp)
{
/* Double and single indirect references to the live P2M table */
xen_pfn_t *live_p2m_frame_list_list = NULL;
xen_pfn_t *live_p2m_frame_list = NULL;
+ /* Copies of the above. */
+ xen_pfn_t *p2m_frame_list_list = NULL;
+ xen_pfn_t *p2m_frame_list = NULL;
+
uint32_t dom = info->domid;
unsigned long p2m_size = nr_gpfns(xc_handle, info->domid);
int ret = -1;
int err;
+ int i;
if ( p2m_size < info->nr_pages )
{
@@ -80,7 +101,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
live_p2m_frame_list_list =
xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
- live_shinfo->arch.pfn_to_mfn_frame_list_list);
+ GET_FIELD(live_shinfo, arch.pfn_to_mfn_frame_list_list));
if ( !live_p2m_frame_list_list )
{
@@ -88,9 +109,28 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
goto out;
}
+ /* Get a local copy of the live_P2M_frame_list_list */
+ if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
+ {
+ ERROR("Couldn't allocate p2m_frame_list_list array");
+ goto out;
+ }
+ memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE);
+
+ /* Canonicalize guest's unsigned long vs ours */
+ if ( guest_width > sizeof(unsigned long) )
+ for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ )
+ if ( i < PAGE_SIZE/guest_width )
+ p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i];
+ else
+ p2m_frame_list_list[i] = 0;
+ else if ( guest_width < sizeof(unsigned long) )
+ for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- )
+ p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i];
+
live_p2m_frame_list =
xc_map_foreign_pages(xc_handle, dom, PROT_READ,
- live_p2m_frame_list_list,
+ p2m_frame_list_list,
P2M_FLL_ENTRIES);
if ( !live_p2m_frame_list )
@@ -99,8 +139,25 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
goto out;
}
+ /* Get a local copy of the live_P2M_frame_list */
+ if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) )
+ {
+ ERROR("Couldn't allocate p2m_frame_list array");
+ goto out;
+ }
+ memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE);
+ memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE);
+
+ /* Canonicalize guest's unsigned long vs ours */
+ if ( guest_width > sizeof(unsigned long) )
+ for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+ p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i];
+ else if ( guest_width < sizeof(unsigned long) )
+ for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- )
+ p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i];
+
*live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
- live_p2m_frame_list,
+ p2m_frame_list,
P2M_FL_ENTRIES);
if ( !*live_p2m )
@@ -122,6 +179,12 @@ out:
if ( live_p2m_frame_list )
munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
+ if ( p2m_frame_list_list )
+ free(p2m_frame_list_list);
+
+ if ( p2m_frame_list )
+ free(p2m_frame_list);
+
errno = err;
return ret;
}
Index: xen-3.3.1-testing/tools/libxc/xc_core_x86.h
===================================================================
--- xen-3.3.1-testing.orig/tools/libxc/xc_core_x86.h
+++ xen-3.3.1-testing/tools/libxc/xc_core_x86.h
@@ -21,15 +21,8 @@
#ifndef XC_CORE_X86_H
#define XC_CORE_X86_H
-#if defined(__i386__) || defined(__x86_64__)
#define ELF_ARCH_DATA ELFDATA2LSB
-#if defined (__i386__)
-# define ELF_ARCH_MACHINE EM_386
-#else
-# define ELF_ARCH_MACHINE EM_X86_64
-#endif
-#endif /* __i386__ or __x86_64__ */
-
+#define ELF_ARCH_MACHINE (guest_width == 8 ? EM_X86_64 : EM_386)
struct xc_core_arch_context {
/* nothing */
@@ -40,8 +33,10 @@ struct xc_core_arch_context {
#define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \
(0)
#define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn) (0)
-#define xc_core_arch_gpfn_may_present(arch_ctxt, i) (1)
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+ unsigned long pfn);
static inline int
xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt,
struct xc_core_section_headers *sheaders,

View File

@ -1,37 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232023248 0
# Node ID 04f913ab2049bd0d8f13cdd72a487376d3909f87
# Parent e98032a016d62c4ee09bb59ab9e0987c2563804a
dump-core: update the documentation
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Index: xen-3.3.1-testing/docs/misc/dump-core-format.txt
===================================================================
--- xen-3.3.1-testing.orig/docs/misc/dump-core-format.txt
+++ xen-3.3.1-testing/docs/misc/dump-core-format.txt
@@ -30,8 +30,13 @@ The elf header members are set as follow
e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
e_type = ET_CORE = 4
ELFCLASS64 is always used independent of architecture.
-e_ident[EI_DATA] and e_flags are set according to the dumping system's
-architecture. Other members are set as usual.
+e_ident[EI_DATA] is set as follows
+ For x86 PV domain case, it is set according to the guest configuration
+ (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.)
+ For other domain case (x86 HVM domain case and ia64 domain case),
+ it is set according to the dumping system's architecture.
+e_flags is set according to the dumping system's architecture.
+Other members are set as usual.
Sections
--------
@@ -241,3 +246,7 @@ Currently only (major, minor) = (0, 1) i
The format version isn't bumped because analysis tools can distinguish it.
- .xen_ia64_mapped_regs section was made only for ia64 PV domain.
In case of IA64 HVM domain, this section doesn't exist.
+- elf header e_ident[EI_DATA]
+ On x86 PV domain case, it is set according to the guest configuration.
+ I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64.
+ This is the same as 32-on-32 case, so there is no impact on analysis tools.

View File

@ -1,24 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232106411 0
# Node ID 71e0b8adeb1f71d0055fabba0e97a4bdbf594c72
# Parent 40d9d9ff435afee74431102e4e1ac6c7542649bd
ptrace_core: Handle FV cross-bitness.
Signed-off-by: Bruce Rogers <brogers@novell.com>
Index: xen-3.3.1-testing/tools/libxc/xc_ptrace_core.c
===================================================================
--- xen-3.3.1-testing.orig/tools/libxc/xc_ptrace_core.c
+++ xen-3.3.1-testing/tools/libxc/xc_ptrace_core.c
@@ -540,7 +540,9 @@ xc_waitdomain_core_elf(
XEN_ELFNOTE_DUMPCORE_XEN_VERSION,
(void**)&xen_version) < 0)
goto out;
- if (xen_version->xen_version.pagesize != PAGE_SIZE)
+ /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */
+ if (xen_version->xen_version.pagesize != PAGE_SIZE &&
+ (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE)
goto out;
/* .note.Xen: format_version */

View File

@ -1,148 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232623303 0
# Node ID d52921c18c3d0171bccb4651cca8412f2fff2dd9
# Parent 9f9ba1a7cc924fbc547e05ea21071becafe5e2c2
vmx: utilise the GUEST_PAT and HOST_PAT vmcs area
Signed-off-by: Xin Li <Xin.Li@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -166,14 +166,15 @@ static void vmx_init_vmcs_config(void)
#endif
min = VM_EXIT_ACK_INTR_ON_EXIT;
- opt = 0;
+ opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT;
#ifdef __x86_64__
min |= VM_EXIT_IA32E_MODE;
#endif
_vmx_vmexit_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_EXIT_CTLS);
- min = opt = 0;
+ min = 0;
+ opt = VM_ENTRY_LOAD_GUEST_PAT;
_vmx_vmentry_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_ENTRY_CTLS);
@@ -518,8 +519,6 @@ static int construct_vmcs(struct vcpu *v
/* VMCS controls. */
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
- __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
- __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
@@ -533,9 +532,15 @@ static int construct_vmcs(struct vcpu *v
else
{
v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT |
+ VM_EXIT_LOAD_HOST_PAT);
+ vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT;
}
__vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+ __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
+ __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
+
if ( cpu_has_vmx_secondary_exec_control )
__vmwrite(SECONDARY_VM_EXEC_CONTROL,
v->arch.hvm_vmx.secondary_exec_control);
@@ -557,6 +562,8 @@ static int construct_vmcs(struct vcpu *v
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+ if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
}
/* I/O access bitmap. */
@@ -688,6 +695,21 @@ static int construct_vmcs(struct vcpu *v
__vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
}
+ if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+ {
+ u64 host_pat, guest_pat;
+
+ rdmsrl(MSR_IA32_CR_PAT, host_pat);
+ guest_pat = 0x7040600070406ULL;
+
+ __vmwrite(HOST_PAT, host_pat);
+ __vmwrite(GUEST_PAT, guest_pat);
+#ifdef __i386__
+ __vmwrite(HOST_PAT_HIGH, host_pat >> 32);
+ __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32);
+#endif
+ }
+
vmx_vmcs_exit(v);
paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
@@ -968,6 +990,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
vmx_dump_sel("LDTR", x86_seg_ldtr);
vmx_dump_sel("IDTR", x86_seg_idtr);
vmx_dump_sel("TR", x86_seg_tr);
+ printk("Guest PAT = 0x%08x%08x\n",
+ (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT));
x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
x |= (uint32_t)vmr(TSC_OFFSET);
printk("TSC Offset = %016llx\n", x);
@@ -1006,6 +1030,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
(unsigned long long)vmr(HOST_SYSENTER_ESP),
(int)vmr(HOST_SYSENTER_CS),
(unsigned long long)vmr(HOST_SYSENTER_EIP));
+ printk("Host PAT = 0x%08x%08x\n",
+ (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT));
printk("*** Control State ***\n");
printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -150,11 +150,14 @@ extern u32 vmx_pin_based_exec_control;
#define VM_EXIT_IA32E_MODE 0x00000200
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_GUEST_PAT 0x00040000
+#define VM_EXIT_LOAD_HOST_PAT 0x00080000
extern u32 vmx_vmexit_control;
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_GUEST_PAT 0x00004000
extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
@@ -181,6 +184,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr
(vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)
#define cpu_has_vmx_vpid \
(vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
+#define cpu_has_vmx_pat \
+ (vmx_vmentry_control & VM_ENTRY_LOAD_GUEST_PAT)
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -232,6 +237,8 @@ enum vmcs_field {
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_PAT = 0x00002804,
+ GUEST_PAT_HIGH = 0x00002805,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
@@ -240,6 +247,8 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
+ HOST_PAT = 0x00002c00,
+ HOST_PAT_HIGH = 0x00002c01,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,

View File

@ -1,315 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232721749 0
# Node ID 175a425e9b55e63c240b0a2ad61f5ed251e85ead
# Parent f3240cd3cd2b9d48acf3d82caa2ca1cab1f66325
EPT/VT-d: Enhance MTRR/PAT virtualization when EPT/VT-d both enabled
Set effective memory type for EPT according to the VT-d snoop control
capability, and also includes some cleanups for EPT & VT-d both enabled.
Signed-off-by: Edwin Zhai <Edwin.Zhai@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
1, HVMSR_PER_VCPU);
uint8_t epte_get_entry_emt(
- struct domain *d, unsigned long gfn, unsigned long mfn)
+ struct domain *d, unsigned long gfn,
+ unsigned long mfn, uint8_t *igmt, int direct_mmio)
{
uint8_t gmtrr_mtype, hmtrr_mtype;
uint32_t type;
struct vcpu *v = current;
+ *igmt = 0;
+
if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) )
return MTRR_TYPE_WRBACK;
@@ -723,6 +726,21 @@ uint8_t epte_get_entry_emt(
if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
return type;
+ if ( !iommu_enabled )
+ {
+ *igmt = 1;
+ return MTRR_TYPE_WRBACK;
+ }
+
+ if ( direct_mmio )
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ( iommu_snoop )
+ {
+ *igmt = 1;
+ return MTRR_TYPE_WRBACK;
+ }
+
gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT));
hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT));
return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
--- a/xen/arch/x86/mm/hap/p2m-ept.c
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d
list_add_tail(&pg->list, &d->arch.p2m->pages);
ept_entry->emt = 0;
+ ept_entry->igmt = 0;
ept_entry->sp_avail = 0;
ept_entry->avail1 = 0;
ept_entry->mfn = page_to_mfn(pg);
@@ -114,9 +115,13 @@ static int ept_next_level(struct domain
}
}
+/*
+ * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * by observing whether any gfn->mfn translations are modified.
+ */
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
- unsigned int order, p2m_type_t p2mt)
+_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
{
ept_entry_t *table = NULL;
unsigned long gfn_remainder = gfn, offset = 0;
@@ -124,6 +129,8 @@ ept_set_entry(struct domain *d, unsigned
u32 index;
int i, rv = 0, ret = 0;
int walk_level = order / EPT_TABLE_ORDER;
+ int direct_mmio = (p2mt == p2m_mmio_direct);
+ uint8_t igmt = 0;
/* we only support 4k and 2m pages now */
@@ -157,7 +164,9 @@ ept_set_entry(struct domain *d, unsigned
{
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
{
- ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+ ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+ &igmt, direct_mmio);
+ ept_entry->igmt = igmt;
ept_entry->sp_avail = walk_level ? 1 : 0;
if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -208,7 +217,10 @@ ept_set_entry(struct domain *d, unsigned
{
split_ept_entry = split_table + i;
split_ept_entry->emt = epte_get_entry_emt(d,
- gfn-offset+i, split_mfn+i);
+ gfn-offset+i, split_mfn+i,
+ &igmt, direct_mmio);
+ split_ept_entry->igmt = igmt;
+
split_ept_entry->sp_avail = 0;
split_ept_entry->mfn = split_mfn+i;
@@ -223,7 +235,10 @@ ept_set_entry(struct domain *d, unsigned
/* Set the destinated 4k page as normal */
split_ept_entry = split_table + offset;
- split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+ split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+ &igmt, direct_mmio);
+ split_ept_entry->igmt = igmt;
+
split_ept_entry->mfn = mfn_x(mfn);
split_ept_entry->avail1 = p2mt;
ept_p2m_type_to_flags(split_ept_entry, p2mt);
@@ -246,7 +261,8 @@ out:
/* Now the p2m table is not shared with vt-d page table */
- if ( iommu_enabled && is_hvm_domain(d) )
+ if ( iommu_enabled && is_hvm_domain(d)
+ && need_modify_vtd_table )
{
if ( p2mt == p2m_ram_rw )
{
@@ -273,6 +289,17 @@ out:
return rv;
}
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ /* ept_set_entry() are called from set_entry(),
+ * We should always create VT-d page table acording
+ * to the gfn to mfn translations changes.
+ */
+ return _ept_set_entry(d, gfn, mfn, order, p2mt, 1);
+}
+
/* Read ept p2m entries */
static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t)
{
@@ -393,18 +420,30 @@ void ept_change_entry_emt_with_range(str
* Set emt for super page.
*/
order = EPT_TABLE_ORDER;
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ /* vmx_set_uc_mode() dont' touch the gfn to mfn
+ * translations, only modify the emt field of the EPT entries.
+ * so we need not modify the current VT-d page tables.
+ */
+ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
gfn += 0x1FF;
}
else
{
- /* change emt for partial entries of the 2m area */
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ /* 1)change emt for partial entries of the 2m area.
+ * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+ * translations, only modify the emt field of the EPT entries.
+ * so we need not modify the current VT-d page tables.
+ */
+ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
}
}
- else /* gfn assigned with 4k */
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ else /* 1)gfn assigned with 4k
+ * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+ * translations, only modify the emt field of the EPT entries.
+ * so we need not modify the current VT-d page tables.
+ */
+ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
}
}
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -40,6 +40,7 @@ int iommu_enabled = 0;
int iommu_pv_enabled = 0;
int force_iommu = 0;
int iommu_passthrough = 0;
+int iommu_snoop = 0;
static void __init parse_iommu_param(char *s)
{
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -29,6 +29,7 @@
#include <xen/pci_regs.h>
#include <asm/string.h>
#include "dmar.h"
+#include "iommu.h"
int vtd_enabled = 1;
@@ -508,6 +509,8 @@ static int __init acpi_parse_dmar(struct
int acpi_dmar_init(void)
{
int rc;
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
rc = -ENODEV;
if ( force_iommu )
@@ -524,7 +527,20 @@ int acpi_dmar_init(void)
if ( list_empty(&acpi_drhd_units) )
goto fail;
- printk("Intel VT-d has been enabled\n");
+ /* Giving that all devices within guest use same io page table,
+ * enable snoop control only if all VT-d engines support it.
+ */
+ iommu_snoop = 1;
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( !ecap_snp_ctl(iommu->ecap) ) {
+ iommu_snoop = 0;
+ break;
+ }
+ }
+
+ printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);
return 0;
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1495,6 +1495,11 @@ int intel_iommu_map_page(
pte_present = dma_pte_present(*pte);
dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
+
+ /* Set the SNP on leaf page table if Snoop Control available */
+ if ( iommu_snoop )
+ dma_set_pte_snp(*pte);
+
iommu_flush_cache_entry(pte);
unmap_vtd_domain_page(page);
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -104,6 +104,7 @@
#define ecap_ext_intr(e) ((e >> 4) & 0x1)
#define ecap_cache_hints(e) ((e >> 5) & 0x1)
#define ecap_pass_thru(e) ((e >> 6) & 0x1)
+#define ecap_snp_ctl(e) ((e >> 7) & 0x1)
/* IOTLB_REG */
#define DMA_TLB_FLUSH_GRANU_OFFSET 60
@@ -260,10 +261,12 @@ struct dma_pte {
};
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
+#define DMA_PTE_SNP (1 << 11)
#define dma_clear_pte(p) do {(p).val = 0;} while(0)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while(0)
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while(0)
#define dma_set_pte_superpage(p) do {(p).val |= (1 << 7);} while(0)
+#define dma_set_pte_snp(p) do {(p).val |= DMA_PTE_SNP;} while(0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -33,7 +33,8 @@ typedef union {
u64 r : 1,
w : 1,
x : 1,
- emt : 4,
+ emt : 3,
+ igmt : 1,
sp_avail : 1,
avail1 : 4,
mfn : 45,
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -64,9 +64,11 @@ extern int mtrr_del_page(int reg, unsign
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
paddr_t spaddr);
-extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, unsigned long mfn);
-extern void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
- unsigned long end_gfn);
+extern uint8_t epte_get_entry_emt(
+ struct domain *d, unsigned long gfn, unsigned long mfn,
+ uint8_t *igmt, int direct_mmio);
+extern void ept_change_entry_emt_with_range(
+ struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);
#endif /* __ASM_X86_MTRR_H__ */
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -32,6 +32,7 @@ extern int iommu_enabled;
extern int iommu_pv_enabled;
extern int force_iommu;
extern int iommu_passthrough;
+extern int iommu_snoop;
#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)

View File

@ -1,146 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232981779 0
# Node ID 055c589f4791811797867736857b08fdd0fd6d49
# Parent c9dc7dcacc1d0c064a131da98a4063fa2cedd716
x86: No need for CMPXCHG8B on page_info structure.
References: bnc#470949
Updates and checks on count_info and page owner can safely be
non-atomic.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1887,36 +1887,29 @@ void put_page(struct page_info *page)
int get_page(struct page_info *page, struct domain *domain)
{
- u32 x, nx, y = page->count_info;
- u32 d, nd = page->u.inuse._domain;
- u32 _domain = pickle_domptr(domain);
+ u32 x, y = page->count_info;
do {
- x = y;
- nx = x + 1;
- d = nd;
+ x = y;
if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
/* Keep one spare reference to be acquired by get_page_light(). */
- unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
- unlikely(d != _domain) ) /* Wrong owner? */
- {
- if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
- gdprintk(XENLOG_INFO,
- "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
- PRtype_info "\n",
- page_to_mfn(page), domain, unpickle_domptr(d),
- x, page->u.inuse.type_info);
- return 0;
- }
- asm volatile (
- LOCK_PREFIX "cmpxchg8b %2"
- : "=d" (nd), "=a" (y),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (d), "1" (x), "c" (d), "b" (nx) );
+ unlikely(((x + 2) & PGC_count_mask) <= 1) ) /* Overflow? */
+ goto fail;
}
- while ( unlikely(nd != d) || unlikely(y != x) );
+ while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
- return 1;
+ if ( likely(page_get_owner(page) == domain) )
+ return 1;
+
+ put_page(page);
+
+ fail:
+ if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
+ gdprintk(XENLOG_INFO,
+ "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info,
+ page_to_mfn(page), domain, page_get_owner(page),
+ y, page->u.inuse.type_info);
+ return 0;
}
/*
@@ -3438,49 +3431,47 @@ int replace_grant_host_mapping(
int steal_page(
struct domain *d, struct page_info *page, unsigned int memflags)
{
- u32 _d, _nd, x, y;
+ u32 x, y;
spin_lock(&d->page_alloc_lock);
+ if ( is_xen_heap_page(page) || (page_get_owner(page) != d) )
+ goto fail;
+
/*
- * The tricky bit: atomically release ownership while there is just one
- * benign reference to the page (PGC_allocated). If that reference
- * disappears then the deallocation routine will safely spin.
+ * We require there is just one reference (PGC_allocated). We temporarily
+ * drop this reference now so that we can safely swizzle the owner.
*/
- _d = pickle_domptr(d);
- _nd = page->u.inuse._domain;
- y = page->count_info;
+ y = page->count_info;
do {
x = y;
- if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
- (1 | PGC_allocated)) || unlikely(_nd != _d) )
- {
- MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%" PRtype_info "\n",
- (void *) page_to_mfn(page),
- d, d->domain_id, unpickle_domptr(_nd), x,
- page->u.inuse.type_info);
- spin_unlock(&d->page_alloc_lock);
- return -1;
- }
- asm volatile (
- LOCK_PREFIX "cmpxchg8b %2"
- : "=d" (_nd), "=a" (y),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
- } while (unlikely(_nd != _d) || unlikely(y != x));
+ if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
+ goto fail;
+ y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
+ } while ( y != x );
- /*
- * Unlink from 'd'. At least one reference remains (now anonymous), so
- * noone else is spinning to try to delete this page from 'd'.
- */
+ /* Swizzle the owner then reinstate the PGC_allocated reference. */
+ page_set_owner(page, NULL);
+ y = page->count_info;
+ do {
+ x = y;
+ BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
+ } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
+
+ /* Unlink from original owner. */
if ( !(memflags & MEMF_no_refcount) )
d->tot_pages--;
list_del(&page->list);
spin_unlock(&d->page_alloc_lock);
-
return 0;
+
+ fail:
+ spin_unlock(&d->page_alloc_lock);
+ MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08x, taf=%" PRtype_info,
+ (void *)page_to_mfn(page), d, d->domain_id,
+ page_get_owner(page), page->count_info, page->u.inuse.type_info);
+ return -1;
}
int do_update_va_mapping(unsigned long va, u64 val64,

View File

@ -1,445 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232986782 0
# Node ID 39517e863cc89a085341e1d53317aaa7ceddd127
# Parent 055c589f4791811797867736857b08fdd0fd6d49
x86_64: Widen page counts to avoid overflow.
References: bnc#470949
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232988758 0
# Node ID 728d1892f0e24c2531df2d61a2d95177400ceb17
# Parent 90909b81b3b9cf9b303e2bc457580603da3ac7fd
x86: Clean up shadow_page_info after page_info changes.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1233056759 0
# Node ID 6e623569455c08b57e43e3355f6809b3a4ba0707
# Parent 7b56dbd1b439e0996083810489398cb51dc43aa6
x86: clean up struct page_info
Remove the now unnecessary (and leading to misalignment of cpumask on
x86-64) 'packed' attributes.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -143,7 +143,7 @@ void dump_pageframe_info(struct domain *
{
list_for_each_entry ( page, &d->page_list, list )
{
- printk(" DomPage %p: caf=%08x, taf=%" PRtype_info "\n",
+ printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
@@ -151,7 +151,7 @@ void dump_pageframe_info(struct domain *
list_for_each_entry ( page, &d->xenpage_list, list )
{
- printk(" XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
+ printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -714,8 +714,8 @@ get_page_from_l1e(
else if ( pte_flags_to_cacheattr(l1f) !=
((page->count_info >> PGC_cacheattr_base) & 7) )
{
- uint32_t x, nx, y = page->count_info;
- uint32_t cacheattr = pte_flags_to_cacheattr(l1f);
+ unsigned long x, nx, y = page->count_info;
+ unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
if ( is_xen_heap_page(page) )
{
@@ -1869,7 +1869,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
void put_page(struct page_info *page)
{
- u32 nx, x, y = page->count_info;
+ unsigned long nx, x, y = page->count_info;
do {
x = y;
@@ -1887,7 +1887,7 @@ void put_page(struct page_info *page)
int get_page(struct page_info *page, struct domain *domain)
{
- u32 x, y = page->count_info;
+ unsigned long x, y = page->count_info;
do {
x = y;
@@ -1906,7 +1906,7 @@ int get_page(struct page_info *page, str
fail:
if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
gdprintk(XENLOG_INFO,
- "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info,
+ "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%" PRtype_info,
page_to_mfn(page), domain, page_get_owner(page),
y, page->u.inuse.type_info);
return 0;
@@ -1922,7 +1922,7 @@ int get_page(struct page_info *page, str
*/
static void get_page_light(struct page_info *page)
{
- u32 x, nx, y = page->count_info;
+ unsigned long x, nx, y = page->count_info;
do {
x = y;
@@ -1963,7 +1963,7 @@ static int alloc_page_type(struct page_i
rc = alloc_segdesc_page(page);
break;
default:
- printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
+ printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n",
type, page->u.inuse.type_info,
page->count_info);
rc = -EINVAL;
@@ -1987,7 +1987,7 @@ static int alloc_page_type(struct page_i
{
ASSERT(rc < 0);
MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
- PRtype_info ": caf=%08x taf=%" PRtype_info,
+ PRtype_info ": caf=%08lx taf=%" PRtype_info,
page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
type, page->count_info, page->u.inuse.type_info);
page->u.inuse.type_info = 0;
@@ -3144,7 +3144,7 @@ static int create_grant_pte_mapping(
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- u32 type;
+ unsigned long type;
l1_pgentry_t ol1e;
struct domain *d = v->domain;
@@ -3205,7 +3205,7 @@ static int destroy_grant_pte_mapping(
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- u32 type;
+ unsigned long type;
l1_pgentry_t ol1e;
gmfn = addr >> PAGE_SHIFT;
@@ -3431,7 +3431,7 @@ int replace_grant_host_mapping(
int steal_page(
struct domain *d, struct page_info *page, unsigned int memflags)
{
- u32 x, y;
+ unsigned long x, y;
spin_lock(&d->page_alloc_lock);
@@ -3468,7 +3468,7 @@ int steal_page(
fail:
spin_unlock(&d->page_alloc_lock);
- MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08x, taf=%" PRtype_info,
+ MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08lx, taf=%" PRtype_info,
(void *)page_to_mfn(page), d, d->domain_id,
page_get_owner(page), page->count_info, page->u.inuse.type_info);
return -1;
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -166,7 +166,7 @@ void hap_free_p2m_page(struct domain *d,
ASSERT(page_get_owner(pg) == d);
/* Should have just the one ref we gave it in alloc_p2m_page() */
if ( (pg->count_info & PGC_count_mask) != 1 )
- HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+ HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
pg->count_info, pg->u.inuse.type_info);
pg->count_info = 0;
/* Free should not decrement domain's total allocation, since
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1678,7 +1678,7 @@ shadow_free_p2m_page(struct domain *d, s
/* Should have just the one ref we gave it in alloc_p2m_page() */
if ( (pg->count_info & PGC_count_mask) != 1 )
{
- SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+ SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
pg->count_info, pg->u.inuse.type_info);
}
pg->count_info = 0;
@@ -1796,14 +1796,21 @@ static unsigned int sh_set_allocation(st
sp = list_entry(d->arch.paging.shadow.freelists[order].next,
struct shadow_page_info, list);
list_del(&sp->list);
-#if defined(__x86_64__)
/*
- * Re-instate lock field which we overwrite with shadow_page_info.
- * This was safe, since the lock is only used on guest pages.
+ * The pages were allocated anonymously, but the owner field
+ * may get overwritten, so need to clear it here.
*/
for ( j = 0; j < 1U << order; j++ )
+ {
+ page_set_owner(&((struct page_info *)sp)[j], NULL);
+#if defined(__x86_64__)
+ /*
+ * Re-instate lock field which we overwrite with shadow_page_info.
+ * This was safe, since the lock is only used on guest pages.
+ */
spin_lock_init(&((struct page_info *)sp)[j].lock);
#endif
+ }
d->arch.paging.shadow.free_pages -= 1 << order;
d->arch.paging.shadow.total_pages -= 1 << order;
free_domheap_pages((struct page_info *)sp, order);
@@ -2516,7 +2523,7 @@ int sh_remove_all_mappings(struct vcpu *
&& (page->u.inuse.type_info & PGT_count_mask) == 0) )
{
SHADOW_ERROR("can't find all mappings of mfn %lx: "
- "c=%08x t=%08lx\n", mfn_x(gmfn),
+ "c=%08lx t=%08lx\n", mfn_x(gmfn),
page->count_info, page->u.inuse.type_info);
}
}
@@ -3591,7 +3598,6 @@ int shadow_track_dirty_vram(struct domai
for ( i = 0; i < nr; i++ ) {
mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
struct page_info *page;
- u32 count_info;
int dirty = 0;
paddr_t sl1ma = d->dirty_vram->sl1ma[i];
@@ -3602,8 +3608,7 @@ int shadow_track_dirty_vram(struct domai
else
{
page = mfn_to_page(mfn);
- count_info = page->u.inuse.type_info & PGT_count_mask;
- switch (count_info)
+ switch (page->u.inuse.type_info & PGT_count_mask)
{
case 0:
/* No guest reference, nothing to track. */
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1334,9 +1334,8 @@ static inline void shadow_vram_get_l1e(s
if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
unsigned long i = gfn - d->dirty_vram->begin_pfn;
struct page_info *page = mfn_to_page(mfn);
- u32 count_info = page->u.inuse.type_info & PGT_count_mask;
- if ( count_info == 1 )
+ if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
/* Initial guest reference, record it */
d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
| ((unsigned long)sl1e & ~PAGE_MASK);
@@ -1362,12 +1361,11 @@ static inline void shadow_vram_put_l1e(s
if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
unsigned long i = gfn - d->dirty_vram->begin_pfn;
struct page_info *page = mfn_to_page(mfn);
- u32 count_info = page->u.inuse.type_info & PGT_count_mask;
int dirty = 0;
paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
| ((unsigned long)sl1e & ~PAGE_MASK);
- if ( count_info == 1 ) {
+ if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) {
/* Last reference */
if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
/* We didn't know it was that one, let's say it is dirty */
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -201,12 +201,11 @@ struct shadow_page_info
u32 tlbflush_timestamp;
};
struct {
- unsigned int type:5; /* What kind of shadow is this? */
- unsigned int pinned:1; /* Is the shadow pinned? */
- unsigned int count:26; /* Reference count */
- u32 mbz; /* Must be zero: this is where the
- * owner field lives in page_info */
- } __attribute__((packed));
+ unsigned long mbz; /* Must be zero: count_info is here. */
+ unsigned long type:5; /* What kind of shadow is this? */
+ unsigned long pinned:1; /* Is the shadow pinned? */
+ unsigned long count:26; /* Reference count */
+ };
union {
/* For unused shadow pages, a list of pages of this order; for
* pinnable shadows, if pinned, a list of other pinned shadows
@@ -229,7 +228,7 @@ static inline void shadow_check_page_str
BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
sizeof (struct page_info));
BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
- offsetof(struct page_info, u.inuse._domain));
+ offsetof(struct page_info, count_info));
};
/* Shadow type codes */
--- a/xen/arch/x86/x86_32/mm.c
+++ b/xen/arch/x86/x86_32/mm.c
@@ -159,15 +159,6 @@ void __init subarch_init_memory(void)
unsigned long m2p_start_mfn;
unsigned int i, j;
- /*
- * We are rather picky about the layout of 'struct page_info'. The
- * count_info and domain fields must be adjacent, as we perform atomic
- * 64-bit operations on them. Also, just for sanity, we assert the size
- * of the structure here.
- */
- BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) !=
- (offsetof(struct page_info, count_info) + sizeof(u32)));
- BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
BUILD_BUG_ON(sizeof(struct page_info) != 24);
/* M2P table is mappable read-only by privileged domains. */
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -225,17 +225,6 @@ void __init subarch_init_memory(void)
l3_pgentry_t l3e;
l2_pgentry_t l2e;
- /*
- * We are rather picky about the layout of 'struct page_info'. The
- * count_info and domain fields must be adjacent, as we perform atomic
- * 64-bit operations on them.
- */
- BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) !=
- (offsetof(struct page_info, count_info) + sizeof(u32)));
- BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
- BUILD_BUG_ON(sizeof(struct page_info) !=
- (32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long)));
-
/* M2P table is mappable read-only by privileged domains. */
for ( v = RDWR_MPT_VIRT_START;
v != RDWR_MPT_VIRT_END;
--- a/xen/common/xenoprof.c
+++ b/xen/common/xenoprof.c
@@ -142,8 +142,8 @@ share_xenoprof_page_with_guest(struct do
struct page_info *page = mfn_to_page(mfn + i);
if ( (page->count_info & (PGC_allocated|PGC_count_mask)) != 0 )
{
- gdprintk(XENLOG_INFO, "mfn 0x%lx page->count_info 0x%x\n",
- mfn + i, page->count_info);
+ gdprintk(XENLOG_INFO, "mfn 0x%lx page->count_info 0x%lx\n",
+ mfn + i, (unsigned long)page->count_info);
return -EBUSY;
}
page_set_owner(page, NULL);
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -23,7 +23,7 @@ struct page_info
struct list_head list;
/* Reference count and various PGC_xxx flags and fields. */
- u32 count_info;
+ unsigned long count_info;
/* Context-dependent fields follow... */
union {
@@ -34,7 +34,7 @@ struct page_info
u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
unsigned long type_info;
- } __attribute__ ((packed)) inuse;
+ } inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
@@ -42,7 +42,7 @@ struct page_info
u32 order;
/* Mask of possibly-tainted TLBs. */
cpumask_t cpumask;
- } __attribute__ ((packed)) free;
+ } free;
} u;
@@ -102,48 +102,53 @@ struct page_info
};
};
+#define PG_shift(idx) (BITS_PER_LONG - (idx))
+#define PG_mask(x, idx) (x ## UL << PG_shift(idx))
+
/* The following page types are MUTUALLY EXCLUSIVE. */
-#define PGT_none (0U<<29) /* no special uses of this page */
-#define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
-#define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
-#define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
-#define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
-#define PGT_seg_desc_page (5U<<29) /* using this page in a GDT/LDT? */
-#define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
-#define PGT_type_mask (7U<<29) /* Bits 29-31. */
+#define PGT_none PG_mask(0, 3) /* no special uses of this page */
+#define PGT_l1_page_table PG_mask(1, 3) /* using as an L1 page table? */
+#define PGT_l2_page_table PG_mask(2, 3) /* using as an L2 page table? */
+#define PGT_l3_page_table PG_mask(3, 3) /* using as an L3 page table? */
+#define PGT_l4_page_table PG_mask(4, 3) /* using as an L4 page table? */
+#define PGT_seg_desc_page PG_mask(5, 3) /* using this page in a GDT/LDT? */
+#define PGT_writable_page PG_mask(7, 3) /* has writable mappings? */
+#define PGT_type_mask PG_mask(7, 3) /* Bits 29-31. */
/* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned 28
-#define PGT_pinned (1U<<_PGT_pinned)
+#define _PGT_pinned PG_shift(4)
+#define PGT_pinned PG_mask(1, 4)
/* Has this page been validated for use as its current type? */
-#define _PGT_validated 27
-#define PGT_validated (1U<<_PGT_validated)
+#define _PGT_validated PG_shift(5)
+#define PGT_validated PG_mask(1, 5)
/* PAE only: is this an L2 page directory containing Xen-private mappings? */
-#define _PGT_pae_xen_l2 26
-#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
+#define _PGT_pae_xen_l2 PG_shift(6)
+#define PGT_pae_xen_l2 PG_mask(1, 6)
/* Has this page been *partially* validated for use as its current type? */
-#define _PGT_partial 25
-#define PGT_partial (1U<<_PGT_partial)
+#define _PGT_partial PG_shift(7)
+#define PGT_partial PG_mask(1, 7)
- /* 25-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<25)-1)
+ /* Count of uses of this frame as its current type. */
+#define PGT_count_width PG_shift(7)
+#define PGT_count_mask ((1UL<<PGT_count_width)-1)
/* Cleared when the owning guest 'frees' this page. */
-#define _PGC_allocated 31
-#define PGC_allocated (1U<<_PGC_allocated)
+#define _PGC_allocated PG_shift(1)
+#define PGC_allocated PG_mask(1, 1)
#if defined(__i386__)
/* Page is locked? */
-# define _PGC_locked 30
-# define PGC_locked (1U<<_PGC_out_of_sync)
+# define _PGC_locked PG_shift(2)
+# define PGC_locked PG_mask(1, 2)
#endif
/* Set when is using a page as a page table */
-#define _PGC_page_table 29
-#define PGC_page_table (1U<<_PGC_page_table)
+#define _PGC_page_table PG_shift(3)
+#define PGC_page_table PG_mask(1, 3)
/* 3-bit PAT/PCD/PWT cache-attribute hint. */
-#define PGC_cacheattr_base 26
-#define PGC_cacheattr_mask (7U<<PGC_cacheattr_base)
- /* 26-bit count of references to this frame. */
-#define PGC_count_mask ((1U<<26)-1)
+#define PGC_cacheattr_base PG_shift(6)
+#define PGC_cacheattr_mask PG_mask(7, 6)
+ /* Count of references to this frame. */
+#define PGC_count_width PG_shift(6)
+#define PGC_count_mask ((1UL<<PGC_count_width)-1)
#define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
#define is_xen_heap_mfn(mfn) ({ \

View File

@ -1,834 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1233072141 0
# Node ID bcf77bfd1161d1e2693d6762bcd436ad98ec0779
# Parent dbf53b739af0434adff50172fc071f718b57b450
x86: Fold page_info lock into type_info.
References: bnc#470949
Fix some racey looking code at the same time.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -202,11 +202,6 @@ void __init init_frametable(void)
}
memset(frame_table, 0, nr_pages << PAGE_SHIFT);
-
-#if defined(__x86_64__)
- for ( i = 0; i < max_page; i ++ )
- spin_lock_init(&frame_table[i].lock);
-#endif
}
void __init arch_init_memory(void)
@@ -1499,24 +1494,31 @@ static int free_l4_table(struct page_inf
#define free_l4_table(page, preemptible) (-EINVAL)
#endif
-static void page_lock(struct page_info *page)
+static int page_lock(struct page_info *page)
{
-#if defined(__i386__)
- while ( unlikely(test_and_set_bit(_PGC_locked, &page->count_info)) )
- while ( test_bit(_PGC_locked, &page->count_info) )
+ unsigned long x, nx;
+
+ do {
+ while ( (x = page->u.inuse.type_info) & PGT_locked )
cpu_relax();
-#else
- spin_lock(&page->lock);
-#endif
+ nx = x + (1 | PGT_locked);
+ if ( !(x & PGT_validated) ||
+ !(x & PGT_count_mask) ||
+ !(nx & PGT_count_mask) )
+ return 0;
+ } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
+
+ return 1;
}
static void page_unlock(struct page_info *page)
{
-#if defined(__i386__)
- clear_bit(_PGC_locked, &page->count_info);
-#else
- spin_unlock(&page->lock);
-#endif
+ unsigned long x, nx, y = page->u.inuse.type_info;
+
+ do {
+ x = y;
+ nx = x - (1 | PGT_locked);
+ } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
}
/* How to write an entry to the guest pagetables.
@@ -1579,19 +1581,15 @@ static int mod_l1_entry(l1_pgentry_t *pl
struct vcpu *curr = current;
struct domain *d = curr->domain;
unsigned long mfn;
- struct page_info *l1pg = mfn_to_page(gl1mfn);
p2m_type_t p2mt;
int rc = 1;
- page_lock(l1pg);
-
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
- return page_unlock(l1pg), 0;
+ return 0;
if ( unlikely(paging_mode_refcounts(d)) )
{
rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
- page_unlock(l1pg);
return rc;
}
@@ -1600,13 +1598,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
/* Translate foreign guest addresses. */
mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt));
if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
- return page_unlock(l1pg), 0;
+ return 0;
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
{
- page_unlock(l1pg);
MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & l1_disallow_mask(d));
return 0;
@@ -1618,12 +1615,11 @@ static int mod_l1_entry(l1_pgentry_t *pl
adjust_guest_l1e(nl1e, d);
rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
preserve_ad);
- page_unlock(l1pg);
return rc;
}
if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
- return page_unlock(l1pg), 0;
+ return 0;
adjust_guest_l1e(nl1e, d);
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
@@ -1636,11 +1632,9 @@ static int mod_l1_entry(l1_pgentry_t *pl
else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
preserve_ad)) )
{
- page_unlock(l1pg);
return 0;
}
- page_unlock(l1pg);
put_page_from_l1e(ol1e, d);
return rc;
}
@@ -1650,13 +1644,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
static int mod_l2_entry(l2_pgentry_t *pl2e,
l2_pgentry_t nl2e,
unsigned long pfn,
- unsigned long type,
int preserve_ad)
{
l2_pgentry_t ol2e;
struct vcpu *curr = current;
struct domain *d = curr->domain;
struct page_info *l2pg = mfn_to_page(pfn);
+ unsigned long type = l2pg->u.inuse.type_info;
int rc = 1;
if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
@@ -1665,16 +1659,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
return 0;
}
- page_lock(l2pg);
-
if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
- return page_unlock(l2pg), 0;
+ return 0;
if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
- page_unlock(l2pg);
MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
return 0;
@@ -1685,12 +1676,11 @@ static int mod_l2_entry(l2_pgentry_t *pl
{
adjust_guest_l2e(nl2e, d);
rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
- page_unlock(l2pg);
return rc;
}
if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
- return page_unlock(l2pg), 0;
+ return 0;
adjust_guest_l2e(nl2e, d);
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
@@ -1703,11 +1693,9 @@ static int mod_l2_entry(l2_pgentry_t *pl
else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
preserve_ad)) )
{
- page_unlock(l2pg);
return 0;
}
- page_unlock(l2pg);
put_page_from_l2e(ol2e, pfn);
return rc;
}
@@ -1722,7 +1710,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
l3_pgentry_t ol3e;
struct vcpu *curr = current;
struct domain *d = curr->domain;
- struct page_info *l3pg = mfn_to_page(pfn);
int rc = 0;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
@@ -1738,16 +1725,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
return -EINVAL;
- page_lock(l3pg);
-
if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
- return page_unlock(l3pg), -EFAULT;
+ return -EFAULT;
if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
{
if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
{
- page_unlock(l3pg);
MEM_LOG("Bad L3 flags %x",
l3e_get_flags(nl3e) & l3_disallow_mask(d));
return -EINVAL;
@@ -1758,13 +1742,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
{
adjust_guest_l3e(nl3e, d);
rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
- page_unlock(l3pg);
return rc ? 0 : -EFAULT;
}
rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
- return page_unlock(l3pg), rc;
+ return rc;
rc = 0;
adjust_guest_l3e(nl3e, d);
@@ -1778,7 +1761,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
preserve_ad)) )
{
- page_unlock(l3pg);
return -EFAULT;
}
@@ -1790,7 +1772,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
}
- page_unlock(l3pg);
put_page_from_l3e(ol3e, pfn, 0, 0);
return rc;
}
@@ -1807,7 +1788,6 @@ static int mod_l4_entry(l4_pgentry_t *pl
struct vcpu *curr = current;
struct domain *d = curr->domain;
l4_pgentry_t ol4e;
- struct page_info *l4pg = mfn_to_page(pfn);
int rc = 0;
if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
@@ -1816,16 +1796,13 @@ static int mod_l4_entry(l4_pgentry_t *pl
return -EINVAL;
}
- page_lock(l4pg);
-
if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
- return page_unlock(l4pg), -EFAULT;
+ return -EFAULT;
if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
{
if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
{
- page_unlock(l4pg);
MEM_LOG("Bad L4 flags %x",
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
return -EINVAL;
@@ -1836,13 +1813,12 @@ static int mod_l4_entry(l4_pgentry_t *pl
{
adjust_guest_l4e(nl4e, d);
rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
- page_unlock(l4pg);
return rc ? 0 : -EFAULT;
}
rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
- return page_unlock(l4pg), rc;
+ return rc;
rc = 0;
adjust_guest_l4e(nl4e, d);
@@ -1856,11 +1832,9 @@ static int mod_l4_entry(l4_pgentry_t *pl
else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
preserve_ad)) )
{
- page_unlock(l4pg);
return -EFAULT;
}
- page_unlock(l4pg);
put_page_from_l4e(ol4e, pfn, 0, 0);
return rc;
}
@@ -2918,7 +2892,6 @@ int do_mmu_update(
unsigned int cmd, done = 0;
struct vcpu *v = current;
struct domain *d = v->domain;
- unsigned long type_info;
struct domain_mmap_cache mapcache;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -2990,24 +2963,9 @@ int do_mmu_update(
(unsigned long)(req.ptr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
+ if ( page_lock(page) )
{
- case PGT_l1_page_table:
- case PGT_l2_page_table:
- case PGT_l3_page_table:
- case PGT_l4_page_table:
- {
- if ( paging_mode_refcounts(d) )
- {
- MEM_LOG("mmu update on auto-refcounted domain!");
- break;
- }
-
- if ( unlikely(!get_page_type(
- page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
- goto not_a_pt;
-
- switch ( type_info & PGT_type_mask )
+ switch ( page->u.inuse.type_info & PGT_type_mask )
{
case PGT_l1_page_table:
{
@@ -3019,7 +2977,7 @@ int do_mmu_update(
case PGT_l2_page_table:
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
- okay = mod_l2_entry(va, l2e, mfn, type_info,
+ okay = mod_l2_entry(va, l2e, mfn,
cmd == MMU_PT_UPDATE_PRESERVE_AD);
}
break;
@@ -3041,31 +2999,23 @@ int do_mmu_update(
}
break;
#endif
+ case PGT_writable_page:
+ perfc_incr(writable_mmu_updates);
+ okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+ break;
}
-
- put_page_type(page);
+ page_unlock(page);
if ( rc == -EINTR )
rc = -EAGAIN;
}
- break;
-
- default:
- not_a_pt:
+ else if ( get_page_type(page, PGT_writable_page) )
{
- if ( unlikely(!get_page_type(page, PGT_writable_page)) )
- break;
-
perfc_incr(writable_mmu_updates);
-
okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
-
put_page_type(page);
}
- break;
- }
unmap_domain_page_with_cache(va, &mapcache);
-
put_page(page);
break;
@@ -3144,7 +3094,6 @@ static int create_grant_pte_mapping(
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- unsigned long type;
l1_pgentry_t ol1e;
struct domain *d = v->domain;
@@ -3165,21 +3114,23 @@ static int create_grant_pte_mapping(
va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- type = page->u.inuse.type_info & PGT_type_mask;
- if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+ if ( !page_lock(page) )
{
- MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
goto failed;
}
- page_lock(page);
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
ol1e = *(l1_pgentry_t *)va;
if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
{
page_unlock(page);
- put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
@@ -3189,8 +3140,6 @@ static int create_grant_pte_mapping(
if ( !paging_mode_refcounts(d) )
put_page_from_l1e(ol1e, d);
- put_page_type(page);
-
failed:
unmap_domain_page(va);
put_page(page);
@@ -3205,7 +3154,6 @@ static int destroy_grant_pte_mapping(
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- unsigned long type;
l1_pgentry_t ol1e;
gmfn = addr >> PAGE_SHIFT;
@@ -3221,15 +3169,18 @@ static int destroy_grant_pte_mapping(
va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- type = page->u.inuse.type_info & PGT_type_mask;
- if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+ if ( !page_lock(page) )
{
- MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
goto failed;
}
- page_lock(page);
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
ol1e = *(l1_pgentry_t *)va;
@@ -3239,7 +3190,6 @@ static int destroy_grant_pte_mapping(
page_unlock(page);
MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
(unsigned long)l1e_get_intpte(ol1e), addr, frame);
- put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
@@ -3253,13 +3203,11 @@ static int destroy_grant_pte_mapping(
{
page_unlock(page);
MEM_LOG("Cannot delete PTE entry at %p", va);
- put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
page_unlock(page);
- put_page_type(page);
failed:
unmap_domain_page(va);
@@ -3287,21 +3235,40 @@ static int create_grant_va_mapping(
MEM_LOG("Could not find L1 PTE for address %lx", va);
return GNTST_general_error;
}
+
+ if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+ {
+ guest_unmap_l1e(v, pl1e);
+ return GNTST_general_error;
+ }
+
l1pg = mfn_to_page(gl1mfn);
- page_lock(l1pg);
+ if ( !page_lock(l1pg) )
+ {
+ put_page(l1pg);
+ guest_unmap_l1e(v, pl1e);
+ return GNTST_general_error;
+ }
+
+ if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(l1pg);
+ put_page(l1pg);
+ guest_unmap_l1e(v, pl1e);
+ return GNTST_general_error;
+ }
+
ol1e = *pl1e;
okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+
page_unlock(l1pg);
+ put_page(l1pg);
guest_unmap_l1e(v, pl1e);
- pl1e = NULL;
- if ( !okay )
- return GNTST_general_error;
-
- if ( !paging_mode_refcounts(d) )
+ if ( okay && !paging_mode_refcounts(d) )
put_page_from_l1e(ol1e, d);
- return GNTST_okay;
+ return okay ? GNTST_okay : GNTST_general_error;
}
static int replace_grant_va_mapping(
@@ -3319,31 +3286,48 @@ static int replace_grant_va_mapping(
return GNTST_general_error;
}
+ if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+ {
+ rc = GNTST_general_error;
+ goto out;
+ }
+
l1pg = mfn_to_page(gl1mfn);
- page_lock(l1pg);
+ if ( !page_lock(l1pg) )
+ {
+ rc = GNTST_general_error;
+ put_page(l1pg);
+ goto out;
+ }
+
+ if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ rc = GNTST_general_error;
+ goto unlock_and_out;
+ }
+
ol1e = *pl1e;
/* Check that the virtual address supplied is actually mapped to frame. */
if ( unlikely(l1e_get_pfn(ol1e) != frame) )
{
- page_unlock(l1pg);
MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
l1e_get_pfn(ol1e), addr, frame);
rc = GNTST_general_error;
- goto out;
+ goto unlock_and_out;
}
/* Delete pagetable entry. */
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
{
- page_unlock(l1pg);
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
rc = GNTST_general_error;
- goto out;
+ goto unlock_and_out;
}
+ unlock_and_out:
page_unlock(l1pg);
-
+ put_page(l1pg);
out:
guest_unmap_l1e(v, pl1e);
return rc;
@@ -3405,20 +3389,42 @@ int replace_grant_host_mapping(
return GNTST_general_error;
}
+ if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+ {
+ guest_unmap_l1e(curr, pl1e);
+ return GNTST_general_error;
+ }
+
l1pg = mfn_to_page(gl1mfn);
- page_lock(l1pg);
+ if ( !page_lock(l1pg) )
+ {
+ put_page(l1pg);
+ guest_unmap_l1e(curr, pl1e);
+ return GNTST_general_error;
+ }
+
+ if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(l1pg);
+ put_page(l1pg);
+ guest_unmap_l1e(curr, pl1e);
+ return GNTST_general_error;
+ }
+
ol1e = *pl1e;
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
gl1mfn, curr, 0)) )
{
page_unlock(l1pg);
+ put_page(l1pg);
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
guest_unmap_l1e(curr, pl1e);
return GNTST_general_error;
}
page_unlock(l1pg);
+ put_page(l1pg);
guest_unmap_l1e(curr, pl1e);
rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
@@ -3480,28 +3486,45 @@ int do_update_va_mapping(unsigned long v
l1_pgentry_t val = l1e_from_intpte(val64);
struct vcpu *v = current;
struct domain *d = v->domain;
+ struct page_info *gl1pg;
l1_pgentry_t *pl1e;
unsigned long vmask, bmap_ptr, gl1mfn;
cpumask_t pmask;
- int rc = 0;
+ int rc;
perfc_incr(calls_to_update_va);
- if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
- return -EINVAL;
-
rc = xsm_update_va_mapping(d, val);
if ( rc )
return rc;
+ rc = -EINVAL;
pl1e = guest_map_l1e(v, va, &gl1mfn);
+ if ( unlikely(!pl1e || !get_page_from_pagenr(gl1mfn, d)) )
+ goto out;
- if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
- rc = -EINVAL;
+ gl1pg = mfn_to_page(gl1mfn);
+ if ( !page_lock(gl1pg) )
+ {
+ put_page(gl1pg);
+ goto out;
+ }
+
+ if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(gl1pg);
+ put_page(gl1pg);
+ goto out;
+ }
+
+ rc = mod_l1_entry(pl1e, val, gl1mfn, 0) ? 0 : -EINVAL;
+ page_unlock(gl1pg);
+ put_page(gl1pg);
+
+ out:
if ( pl1e )
guest_unmap_l1e(v, pl1e);
- pl1e = NULL;
process_deferred_ops();
@@ -4122,15 +4145,25 @@ int ptwr_do_page_fault(struct vcpu *v, u
/* Attempt to read the PTE that maps the VA being accessed. */
guest_get_eff_l1e(v, addr, &pte);
- page = l1e_get_page(pte);
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
- !mfn_valid(l1e_get_pfn(pte)) ||
- ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
- ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
- (page_get_owner(page) != d) )
+ !get_page_from_pagenr(l1e_get_pfn(pte), d) )
+ goto bail;
+
+ page = l1e_get_page(pte);
+ if ( !page_lock(page) )
+ {
+ put_page(page);
+ goto bail;
+ }
+
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(page);
+ put_page(page);
goto bail;
+ }
ptwr_ctxt.ctxt.regs = regs;
ptwr_ctxt.ctxt.force_writeback = 0;
@@ -4139,9 +4172,11 @@ int ptwr_do_page_fault(struct vcpu *v, u
ptwr_ctxt.cr2 = addr;
ptwr_ctxt.pte = pte;
- page_lock(page);
rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+
page_unlock(page);
+ put_page(page);
+
if ( rc == X86EMUL_UNHANDLEABLE )
goto bail;
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1685,9 +1685,6 @@ shadow_free_p2m_page(struct domain *d, s
/* Free should not decrement domain's total allocation, since
* these pages were allocated without an owner. */
page_set_owner(pg, NULL);
-#if defined(__x86_64__)
- spin_lock_init(&pg->lock);
-#endif
free_domheap_pages(pg, 0);
d->arch.paging.shadow.p2m_pages--;
perfc_decr(shadow_alloc_count);
@@ -1801,16 +1798,7 @@ static unsigned int sh_set_allocation(st
* may get overwritten, so need to clear it here.
*/
for ( j = 0; j < 1U << order; j++ )
- {
page_set_owner(&((struct page_info *)sp)[j], NULL);
-#if defined(__x86_64__)
- /*
- * Re-instate lock field which we overwrite with shadow_page_info.
- * This was safe, since the lock is only used on guest pages.
- */
- spin_lock_init(&((struct page_info *)sp)[j].lock);
-#endif
- }
d->arch.paging.shadow.free_pages -= 1 << order;
d->arch.paging.shadow.total_pages -= 1 << order;
free_domheap_pages((struct page_info *)sp, order);
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -46,10 +46,6 @@ struct page_info
} u;
-#if defined(__x86_64__)
- spinlock_t lock;
-#endif
-
union {
/*
* Timestamp from 'TLB clock', used to avoid extra safety flushes.
@@ -127,27 +123,25 @@ struct page_info
/* Has this page been *partially* validated for use as its current type? */
#define _PGT_partial PG_shift(7)
#define PGT_partial PG_mask(1, 7)
+ /* Page is locked? */
+#define _PGT_locked PG_shift(8)
+#define PGT_locked PG_mask(1, 8)
/* Count of uses of this frame as its current type. */
-#define PGT_count_width PG_shift(7)
+#define PGT_count_width PG_shift(8)
#define PGT_count_mask ((1UL<<PGT_count_width)-1)
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated PG_shift(1)
#define PGC_allocated PG_mask(1, 1)
-#if defined(__i386__)
- /* Page is locked? */
-# define _PGC_locked PG_shift(2)
-# define PGC_locked PG_mask(1, 2)
-#endif
/* Set when is using a page as a page table */
-#define _PGC_page_table PG_shift(3)
-#define PGC_page_table PG_mask(1, 3)
+#define _PGC_page_table PG_shift(2)
+#define PGC_page_table PG_mask(1, 2)
/* 3-bit PAT/PCD/PWT cache-attribute hint. */
-#define PGC_cacheattr_base PG_shift(6)
-#define PGC_cacheattr_mask PG_mask(7, 6)
+#define PGC_cacheattr_base PG_shift(5)
+#define PGC_cacheattr_mask PG_mask(7, 5)
/* Count of references to this frame. */
-#define PGC_count_width PG_shift(6)
+#define PGC_count_width PG_shift(5)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
#define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -336,7 +336,7 @@ void paging_dump_vcpu_info(struct vcpu *
* Access to the guest pagetables */
/* Get a mapping of a PV guest's l1e for this virtual address. */
-static inline void *
+static inline l1_pgentry_t *
guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
{
l2_pgentry_t l2e;
@@ -354,15 +354,14 @@ guest_map_l1e(struct vcpu *v, unsigned l
!= _PAGE_PRESENT )
return NULL;
*gl1mfn = l2e_get_pfn(l2e);
- return &__linear_l1_table[l1_linear_offset(addr)];
+ return (l1_pgentry_t *)map_domain_page(*gl1mfn) + l1_table_offset(addr);
}
/* Pull down the mapping we got from guest_map_l1e() */
static inline void
guest_unmap_l1e(struct vcpu *v, void *p)
{
- if ( unlikely(paging_mode_translate(v->domain)) )
- unmap_domain_page(p);
+ unmap_domain_page(p);
}
/* Read the guest's l1e that maps this address. */

View File

@ -1,14 +0,0 @@
Index: xen-3.3.1-testing/tools/python/xen/xend/XendAPIStore.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendAPIStore.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendAPIStore.py
@@ -33,7 +33,8 @@ def register(uuid, type, inst):
def deregister(uuid, type):
old = get(uuid, type)
- del __classes[(uuid, type)]
+ if old is not None:
+ del __classes[(uuid, type)]
return old
def get(uuid, type):

View File

@ -1,13 +0,0 @@
Index: xen-3.3.1-testing/docs/man/xm.pod.1
===================================================================
--- xen-3.3.1-testing.orig/docs/man/xm.pod.1
+++ xen-3.3.1-testing/docs/man/xm.pod.1
@@ -67,6 +67,8 @@ The attached console will perform much l
so running curses based interfaces over the console B<is not
advised>. Vi tends to get very odd when using it over this interface.
+Use the key combination Ctrl+] to detach the domain console.
+
=item B<create> I<configfile> [I<OPTIONS>] [I<vars>]..
The create subcommand requires a config file and can optionally take a

View File

@ -1,17 +0,0 @@
Index: xen-3.3.1-testing/tools/python/xen/xm/main.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xm/main.py
+++ xen-3.3.1-testing/tools/python/xen/xm/main.py
@@ -58,7 +58,11 @@ from xen.util.acmpolicy import ACM_LABEL
import XenAPI
import xen.lowlevel.xc
-xc = xen.lowlevel.xc.xc()
+try:
+ xc = xen.lowlevel.xc.xc()
+except Exception, ex:
+ print >>sys.stderr, ("Is xen kernel running?")
+ sys.exit(1)
import inspect
from xen.xend import XendOptions

View File

@ -1,68 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1233748806 0
# Node ID 6058887e55d7096f8b32c1f0576c601b080dc879
# Parent 7e15ccb7bbd88e550ada6a6b86196cc4e5d880b6
vtd: Add a boot parameter option for snoop control capability for VT-d.
The default is to use snoop control.
Signed-off-by: Xin, Xiaohui <xiaohui.xin@intel.com>
Index: xen-3.3.1-testing/xen/drivers/passthrough/iommu.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/iommu.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/iommu.c
@@ -34,6 +34,8 @@ int amd_iov_detect(void);
* no-pv Disable IOMMU for PV domains (default)
* force|required Don't boot unless IOMMU is enabled
* passthrough Bypass VT-d translation for Dom0
+ * snoop Utilize the snoop control for IOMMU (default)
+ * no-snoop Dont utilize the snoop control for IOMMU
*/
custom_param("iommu", parse_iommu_param);
int iommu_enabled = 0;
@@ -46,6 +48,7 @@ static void __init parse_iommu_param(cha
{
char *ss;
iommu_enabled = 1;
+ iommu_snoop = 1;
do {
ss = strchr(s, ',');
@@ -63,6 +66,10 @@ static void __init parse_iommu_param(cha
force_iommu = 1;
else if ( !strcmp(s, "passthrough") )
iommu_passthrough = 1;
+ else if ( !strcmp(s, "snoop") )
+ iommu_snoop = 1;
+ else if ( !strcmp(s, "no-snoop") )
+ iommu_snoop = 0;
s = ss + 1;
} while ( ss );
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/dmar.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c
@@ -530,13 +530,15 @@ int acpi_dmar_init(void)
/* Giving that all devices within guest use same io page table,
* enable snoop control only if all VT-d engines support it.
*/
- iommu_snoop = 1;
- for_each_drhd_unit ( drhd )
+ if ( iommu_snoop )
{
- iommu = drhd->iommu;
- if ( !ecap_snp_ctl(iommu->ecap) ) {
- iommu_snoop = 0;
- break;
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( !ecap_snp_ctl(iommu->ecap) ) {
+ iommu_snoop = 0;
+ break;
+ }
}
}

View File

@ -1,85 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1234436057 0
# Node ID 32b15413749255e0cd518f25d9202759586dcb27
# Parent 94e12fa57816c26f8b76061f17c33928be202c85
vtd: move the snoop control detection out of acpi_dmar_init()
where the capability value is not initialized thus we may
get random value.
Signed-off-by: Xin, Xiaohui<xiaohui.xin@intel.com>
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/dmar.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c
@@ -509,8 +509,6 @@ static int __init acpi_parse_dmar(struct
int acpi_dmar_init(void)
{
int rc;
- struct acpi_drhd_unit *drhd;
- struct iommu *iommu;
rc = -ENODEV;
if ( force_iommu )
@@ -527,22 +525,7 @@ int acpi_dmar_init(void)
if ( list_empty(&acpi_drhd_units) )
goto fail;
- /* Giving that all devices within guest use same io page table,
- * enable snoop control only if all VT-d engines support it.
- */
- if ( iommu_snoop )
- {
- for_each_drhd_unit ( drhd )
- {
- iommu = drhd->iommu;
- if ( !ecap_snp_ctl(iommu->ecap) ) {
- iommu_snoop = 0;
- break;
- }
- }
- }
-
- printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);
+ printk("Intel VT-d has been enabled\n");
return 0;
Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c
@@ -1813,6 +1813,24 @@ int intel_vtd_setup(void)
if ( init_vtd_hw() )
goto error;
+ /* Giving that all devices within guest use same io page table,
+ * enable snoop control only if all VT-d engines support it.
+ */
+
+ if ( iommu_snoop )
+ {
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( !ecap_snp_ctl(iommu->ecap) ) {
+ iommu_snoop = 0;
+ break;
+ }
+ }
+ }
+
+ printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis");
+
register_keyhandler('V', dump_iommu_info, "dump iommu info");
return 0;
@@ -1821,6 +1839,7 @@ int intel_vtd_setup(void)
for_each_drhd_unit ( drhd )
iommu_free(drhd);
vtd_enabled = 0;
+ iommu_snoop = 0;
return -ENOMEM;
}

View File

@ -1,13 +1,13 @@
Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
Index: xen-3.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2273,7 +2273,7 @@ class XendDomainInfo:
--- xen-3.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2559,7 +2559,7 @@ class XendDomainInfo:
vtd_mem = ((vtd_mem + 1023) / 1024) * 1024
# Make sure there's enough RAM available for the domain
- balloon.free(memory + shadow + vtd_mem)
+ balloon.free(memory + shadow + vtd_mem + 512)
- balloon.free(memory + shadow + vtd_mem, self)
+ balloon.free(memory + shadow + vtd_mem + 512, self)
# Set up the shadow memory
shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)

View File

@ -1,248 +0,0 @@
Index: xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-3.3.1-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c
@@ -662,9 +662,6 @@ static int blktapctrl_new_blkif(blkif_t
DPRINTF("Received a poll for a new vbd\n");
if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) {
- if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
- return -1;
-
if (test_path(blk->params, &ptr, &type, &exist, &use_ioemu) != 0) {
DPRINTF("Error in blktap device string(%s).\n",
blk->params);
@@ -693,10 +690,6 @@ static int blktapctrl_new_blkif(blkif_t
blkif->fds[WRITE] = exist->fds[WRITE];
}
- add_disktype(blkif, type);
- blkif->major = major;
- blkif->minor = minor;
-
image = (image_t *)malloc(sizeof(image_t));
blkif->prv = (void *)image;
blkif->ops = &tapdisk_ops;
@@ -720,11 +713,18 @@ static int blktapctrl_new_blkif(blkif_t
goto fail;
}
+ if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
+ return -1;
+
+ blkif->major = major;
+ blkif->minor = minor;
+
+ add_disktype(blkif, type);
+
} else return -1;
return 0;
fail:
- ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor);
return -EINVAL;
}
Index: xen-3.3.1-testing/tools/blktap/lib/xenbus.c
===================================================================
--- xen-3.3.1-testing.orig/tools/blktap/lib/xenbus.c
+++ xen-3.3.1-testing/tools/blktap/lib/xenbus.c
@@ -48,6 +48,7 @@
#include <poll.h>
#include <time.h>
#include <sys/time.h>
+#include <unistd.h>
#include "blktaplib.h"
#include "list.h"
#include "xs_api.h"
@@ -149,6 +150,130 @@ static int backend_remove(struct xs_hand
return 0;
}
+static int check_sharing(struct xs_handle *h, struct backend_info *be)
+{
+ char *dom_uuid;
+ char *cur_dom_uuid;
+ char *path;
+ char *mode;
+ char *params;
+ char **domains;
+ char **devices;
+ int i, j;
+ unsigned int num_dom, num_dev;
+ blkif_info_t *info;
+ int ret = 0;
+
+ /* If the mode contains '!' or doesn't contain 'w' don't check anything */
+ xs_gather(h, be->backpath, "mode", NULL, &mode, NULL);
+ if (strchr(mode, '!'))
+ goto out;
+ if (strchr(mode, 'w') == NULL)
+ goto out;
+
+ /* Get the UUID of the domain we want to attach to */
+ if (asprintf(&path, "/local/domain/%ld", be->frontend_id) == -1)
+ goto fail;
+ xs_gather(h, path, "vm", NULL, &dom_uuid, NULL);
+ free(path);
+
+ /* Iterate through the devices of all VMs */
+ domains = xs_directory(h, XBT_NULL, "backend/tap", &num_dom);
+ if (domains == NULL)
+ num_dom = 0;
+
+ for (i = 0; !ret && (i < num_dom); i++) {
+
+ /* If it's the same VM, no action needed */
+ if (asprintf(&path, "/local/domain/%s", domains[i]) == -1) {
+ ret = -1;
+ break;
+ }
+ xs_gather(h, path, "vm", NULL, &cur_dom_uuid, NULL);
+ free(path);
+
+ if (!strcmp(cur_dom_uuid, dom_uuid)) {
+ free(cur_dom_uuid);
+ continue;
+ }
+
+ /* Check the devices */
+ if (asprintf(&path, "backend/tap/%s", domains[i]) == -1) {
+ ret = -1;
+ free(cur_dom_uuid);
+ break;
+ }
+ devices = xs_directory(h, XBT_NULL, path, &num_dev);
+ if (devices == NULL)
+ num_dev = 0;
+ free(path);
+
+ for (j = 0; !ret && (j < num_dev); j++) {
+ if (asprintf(&path, "backend/tap/%s/%s", domains[i], devices[j]) == -1) {
+ ret = -1;
+ break;
+ }
+ xs_gather(h, path, "params", NULL, &params, NULL);
+ free(path);
+
+ info = be->blkif->info;
+ if (strcmp(params, info->params)) {
+ ret = -1;
+ }
+
+ free(params);
+ }
+
+ free(cur_dom_uuid);
+ free(devices);
+ }
+ free(domains);
+ free(dom_uuid);
+ goto out;
+
+fail:
+ ret = -1;
+out:
+ free(mode);
+ return ret;
+}
+
+static int check_image(struct xs_handle *h, struct backend_info *be,
+ const char** errmsg)
+{
+ const char *path;
+ int mode;
+ blkif_t *blkif = be->blkif;
+ blkif_info_t *info = blkif->info;
+
+ /* Strip off the image type */
+ path = strchr(info->params, ':');
+ if (path == NULL)
+ path = info->params;
+ else
+ path++;
+
+ /* Check if the image exists and access is permitted */
+ mode = R_OK;
+ if (!be->readonly)
+ mode |= W_OK;
+ if (access(path, mode)) {
+ if (errno == ENOENT)
+ *errmsg = "File not found.";
+ else
+ *errmsg = "Insufficient file permissions.";
+ return -1;
+ }
+
+ /* Check that the image is not attached to a different VM */
+ if (check_sharing(h, be)) {
+ *errmsg = "File already in use by other domain";
+ return -1;
+ }
+
+ return 0;
+}
+
static void ueblktap_setup(struct xs_handle *h, char *bepath)
{
struct backend_info *be;
@@ -156,6 +281,7 @@ static void ueblktap_setup(struct xs_han
int len, er, deverr;
long int pdev = 0, handle;
blkif_info_t *blk;
+ const char* errmsg = NULL;
be = be_lookup_be(bepath);
if (be == NULL)
@@ -211,6 +337,9 @@ static void ueblktap_setup(struct xs_han
be->pdev = pdev;
}
+ if (check_image(h, be, &errmsg))
+ goto fail;
+
er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
if (er != 0) {
DPRINTF("Unable to open device %s\n",blk->params);
@@ -246,12 +375,21 @@ static void ueblktap_setup(struct xs_han
}
be->blkif->state = CONNECTED;
+ xs_printf(h, be->backpath, "hotplug-status", "connected");
+
DPRINTF("[SETUP] Complete\n\n");
goto close;
fail:
- if ( (be != NULL) && (be->blkif != NULL) )
+ if (be) {
+ if (errmsg == NULL)
+ errmsg = "Setting up the backend failed. See the log "
+ "files in /var/log/xen/ for details.";
+ xs_printf(h, be->backpath, "hotplug-error", errmsg);
+ xs_printf(h, be->backpath, "hotplug-status", "error");
+
backend_remove(h, be);
+ }
close:
if (path)
free(path);
@@ -286,7 +424,8 @@ static void ueblktap_probe(struct xs_han
len = strsep_len(bepath, '/', 7);
if (len < 0)
goto free_be;
- bepath[len] = '\0';
+ if (bepath[len] != '\0')
+ goto free_be;
be = malloc(sizeof(*be));
if (!be) {
Index: xen-3.3.1-testing/tools/examples/xen-backend.rules
===================================================================
--- xen-3.3.1-testing.orig/tools/examples/xen-backend.rules
+++ xen-3.3.1-testing/tools/examples/xen-backend.rules
@@ -1,4 +1,3 @@
-SUBSYSTEM=="xen-backend", KERNEL=="tap*", RUN+="/etc/xen/scripts/blktap $env{ACTION}"
SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}"
SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}"
SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online"

View File

@ -1,54 +0,0 @@
Index: xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c
===================================================================
--- xen-3.3.1-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c
@@ -221,6 +221,28 @@ static void add_disktype(blkif_t *blkif,
entry->pprev = pprev;
}
+static int qemu_instance_has_disks(int domid)
+{
+ int i;
+ int count = 0;
+ driver_list_entry_t *entry;
+
+ for (i = 0; i < MAX_DISK_TYPES; i++) {
+ entry = active_disks[i];
+ while (entry) {
+#ifdef ALWAYS_USE_IOEMU
+ if (entry->blkif->domid == domid)
+#else
+ if ((entry->blkif->domid == domid) && dtypes[i]->use_ioemu)
+#endif
+ count++;
+ entry = entry->next;
+ }
+ }
+
+ return (count != 0);
+}
+
static int del_disktype(blkif_t *blkif)
{
driver_list_entry_t *entry, **pprev;
@@ -245,8 +267,20 @@ static int del_disktype(blkif_t *blkif)
DPRINTF("DEL_DISKTYPE: Freeing entry\n");
free(entry);
+#ifdef ALWAYS_USE_IOEMU
+ return !qemu_instance_has_disks(blkif->domid);
+#else
+ /*
+ * When using ioemu, all disks of one VM are connected to the same
+ * qemu-dm instance. We may close the file handle only if there is
+ * no other disk left for this domain.
+ */
+ if (dtypes[type]->use_ioemu)
+ return !qemu_instance_has_disks(blkif->domid);
+
/* Caller should close() if no single controller, or list is empty. */
return (!dtypes[type]->single_handler || (active_disks[type] == NULL));
+#endif
}
static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)

View File

@ -1,5 +1,7 @@
Index: xen-3.4.0-testing/tools/blktap/drivers/block-cdrom.c
===================================================================
--- /dev/null
+++ b/tools/blktap/drivers/block-cdrom.c
+++ xen-3.4.0-testing/tools/blktap/drivers/block-cdrom.c
@@ -0,0 +1,536 @@
+/* block-cdrom.c
+ *
@ -537,8 +539,10 @@
+ .td_get_parent_id = tdcdrom_get_parent_id,
+ .td_validate_parent = tdcdrom_validate_parent
+};
Index: xen-3.4.0-testing/xen/include/public/io/cdromif.h
===================================================================
--- /dev/null
+++ b/xen/include/public/io/cdromif.h
+++ xen-3.4.0-testing/xen/include/public/io/cdromif.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ * cdromif.h
@ -660,10 +664,12 @@
+ sizeof(struct vcd_generic_command) - sizeof(struct request_sense))
+
+#endif
--- a/tools/blktap/drivers/Makefile
+++ b/tools/blktap/drivers/Makefile
@@ -24,8 +24,9 @@ CRYPT_LIB := -lcrypto
$(warning *** libgcrypt not installed: falling back to libcrypto ***)
Index: xen-3.4.0-testing/tools/blktap/drivers/Makefile
===================================================================
--- xen-3.4.0-testing.orig/tools/blktap/drivers/Makefile
+++ xen-3.4.0-testing/tools/blktap/drivers/Makefile
@@ -20,8 +20,9 @@ CRYPT_LIB := -lcrypto
$(warning === libgcrypt not installed: falling back to libcrypto ===)
endif
-LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap
@ -674,7 +680,7 @@
BLK-OBJS-y := block-aio.o
BLK-OBJS-y += block-sync.o
@@ -33,6 +34,7 @@ BLK-OBJS-y += block-vmdk.o
@@ -29,6 +30,7 @@ BLK-OBJS-y += block-vmdk.o
BLK-OBJS-y += block-ram.o
BLK-OBJS-y += block-qcow.o
BLK-OBJS-y += block-qcow2.o
@ -682,8 +688,10 @@
BLK-OBJS-y += aes.o
BLK-OBJS-y += tapaio.o
BLK-OBJS-$(CONFIG_Linux) += blk_linux.o
--- a/tools/blktap/drivers/tapdisk.h
+++ b/tools/blktap/drivers/tapdisk.h
Index: xen-3.4.0-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-3.4.0-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-3.4.0-testing/tools/blktap/drivers/tapdisk.h
@@ -137,6 +137,9 @@ struct tap_disk {
int (*td_get_parent_id) (struct disk_driver *dd, struct disk_id *id);
int (*td_validate_parent)(struct disk_driver *dd,
@ -702,7 +710,7 @@
/*Define Individual Disk Parameters here */
@@ -240,6 +244,17 @@ static disk_info_t ioemu_disk = {
@@ -229,6 +233,17 @@ static disk_info_t qcow2_disk = {
#endif
};
@ -720,26 +728,30 @@
/*Main disk info array */
static disk_info_t *dtypes[] = {
&aio_disk,
@@ -249,6 +264,7 @@ static disk_info_t *dtypes[] = {
@@ -237,6 +252,7 @@ static disk_info_t *dtypes[] = {
&ram_disk,
&qcow_disk,
&qcow2_disk,
&ioemu_disk,
+ &cdrom_disk,
};
typedef struct driver_list_entry {
--- a/tools/blktap/lib/blktaplib.h
+++ b/tools/blktap/lib/blktaplib.h
@@ -221,6 +221,7 @@ typedef struct msg_pid {
Index: xen-3.4.0-testing/tools/blktap/lib/blktaplib.h
===================================================================
--- xen-3.4.0-testing.orig/tools/blktap/lib/blktaplib.h
+++ xen-3.4.0-testing/tools/blktap/lib/blktaplib.h
@@ -220,6 +220,7 @@ typedef struct msg_pid {
#define DISK_TYPE_RAM 3
#define DISK_TYPE_QCOW 4
#define DISK_TYPE_QCOW2 5
#define DISK_TYPE_IOEMU 6
+#define DISK_TYPE_CDROM 7
+#define DISK_TYPE_CDROM 6
/* xenstore/xenbus: */
#define DOMNAME "Domain-0"
--- a/xen/include/public/io/blkif.h
+++ b/xen/include/public/io/blkif.h
Index: xen-3.4.0-testing/xen/include/public/io/blkif.h
===================================================================
--- xen-3.4.0-testing.orig/xen/include/public/io/blkif.h
+++ xen-3.4.0-testing/xen/include/public/io/blkif.h
@@ -76,6 +76,10 @@
* "feature-flush-cache" node!
*/
@ -751,8 +763,10 @@
/*
* Maximum scatter/gather segments per request.
--- a/tools/blktap/drivers/tapdisk.c
+++ b/tools/blktap/drivers/tapdisk.c
Index: xen-3.4.0-testing/tools/blktap/drivers/tapdisk.c
===================================================================
--- xen-3.4.0-testing.orig/tools/blktap/drivers/tapdisk.c
+++ xen-3.4.0-testing/tools/blktap/drivers/tapdisk.c
@@ -735,6 +735,22 @@ static void get_io_request(struct td_sta
goto out;
}
@ -776,16 +790,15 @@
default:
DPRINTF("Unknown block operation\n");
break;
--- a/tools/python/xen/xend/server/BlktapController.py
+++ b/tools/python/xen/xend/server/BlktapController.py
@@ -14,8 +14,8 @@ blktap_disk_types = [
Index: xen-3.4.0-testing/tools/python/xen/xend/server/BlktapController.py
===================================================================
--- xen-3.4.0-testing.orig/tools/python/xen/xend/server/BlktapController.py
+++ xen-3.4.0-testing/tools/python/xen/xend/server/BlktapController.py
@@ -14,6 +14,7 @@ blktap_disk_types = [
'ram',
'qcow',
'qcow2',
-
- 'ioemu'
+ 'ioemu',
+ 'cdrom',
]
class BlktapController(BlkifController):
'ioemu',
'tapdisk',

View File

@ -1,11 +1,11 @@
bug #239173
bug #242953
Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
Index: xen-3.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2618,7 +2618,7 @@ class XendDomainInfo:
--- xen-3.4.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.4.0-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2939,7 +2939,7 @@ class XendDomainInfo:
(fn, BOOTLOADER_LOOPBACK_DEVICE))
vbd = {
@ -14,26 +14,26 @@ Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
'device': BOOTLOADER_LOOPBACK_DEVICE,
}
Index: xen-3.3.1-testing/tools/ioemu-remote/xenstore.c
Index: xen-3.4.0-testing/tools/ioemu-remote/xenstore.c
===================================================================
--- xen-3.3.1-testing.orig/tools/ioemu-remote/xenstore.c
+++ xen-3.3.1-testing/tools/ioemu-remote/xenstore.c
@@ -151,9 +151,9 @@ void xenstore_parse_domain_config(int hv
--- xen-3.4.0-testing.orig/tools/ioemu-remote/xenstore.c
+++ xen-3.4.0-testing/tools/ioemu-remote/xenstore.c
@@ -311,9 +311,9 @@ void xenstore_parse_domain_config(int hv
{
char **e = NULL;
char *buf = NULL, *path;
char **e_danger = NULL;
char *buf = NULL;
- char *fpath = NULL, *bpath = NULL,
+ char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *type = NULL, *drv = NULL;
*dev = NULL, *params = NULL, *drv = NULL;
- int i, any_hdN = 0, ret;
+ int i, any_hdN = 0, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -188,6 +188,14 @@ void xenstore_parse_domain_config(int hv
bpath = xs_read(xsh, XBT_NULL, buf, &len);
@@ -353,6 +353,14 @@ void xenstore_parse_domain_config(int hv
e_danger[i]);
if (bpath == NULL)
continue;
continue;
+ /* check to see if type is tap or not */
+ if (pasprintf(&buf, "%s/type", bpath) == -1)
+ continue;
@ -45,11 +45,11 @@ Index: xen-3.3.1-testing/tools/ioemu-remote/xenstore.c
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -432,6 +440,7 @@ void xenstore_parse_domain_config(int hv
free(type);
@@ -662,6 +670,7 @@ void xenstore_parse_domain_config(int hv
free(danger_type);
free(params);
free(dev);
+ free(btype);
free(bpath);
free(buf);
free(path);
free(danger_buf);

View File

@ -1,211 +1,49 @@
Index: xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c
Index: xen-3.4.0-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-3.3.1-testing.orig/tools/blktap/drivers/blktapctrl.c
+++ xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c
@@ -65,6 +65,8 @@
#define MAX_RAND_VAL 0xFFFF
#define MAX_ATTEMPTS 10
+#undef ALWAYS_USE_IOEMU
+
int run = 1;
int max_timeout = MAX_TIMEOUT;
int ctlfd = 0;
@@ -148,7 +150,8 @@ static int get_tapdisk_pid(blkif_t *blki
* return 0 on success, -1 on error.
*/
-static int test_path(char *path, char **dev, int *type, blkif_t **blkif)
+static int test_path(char *path, char **dev, int *type, blkif_t **blkif,
+ int* use_ioemu)
{
char *ptr, handle[10];
int i, size, found = 0;
@@ -174,6 +177,7 @@ static int test_path(char *path, char **
}
if (found) {
+ *use_ioemu = dtypes[i]->use_ioemu;
*type = dtypes[i]->idnum;
if (dtypes[i]->single_handler == 1) {
@@ -185,6 +189,7 @@ static int test_path(char *path, char **
*blkif = active_disks[dtypes[i]
->idnum]->blkif;
}
+
return 0;
}
}
@@ -474,6 +479,7 @@ static int launch_tapdisk_provider(char
return child;
}
+#ifndef ALWAYS_USE_IOEMU
static int launch_tapdisk(char *wrctldev, char *rdctldev)
{
char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
@@ -483,6 +489,7 @@ static int launch_tapdisk(char *wrctldev
return 0;
}
+#endif
static int launch_tapdisk_ioemu(void)
{
@@ -505,7 +512,8 @@ static int connect_qemu(blkif_t *blkif,
static int tapdisk_ioemu_pid = 0;
static int dom0_readfd = 0;
static int dom0_writefd = 0;
-
+ int refresh_pid = 0;
+
if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) < 0)
return -1;
@@ -524,15 +532,23 @@ static int connect_qemu(blkif_t *blkif,
if (tapdisk_ioemu_pid == 0 || kill(tapdisk_ioemu_pid, 0)) {
/* No device model and tapdisk-ioemu doesn't run yet */
DPRINTF("Launching tapdisk-ioemu\n");
- tapdisk_ioemu_pid = launch_tapdisk_ioemu();
+ launch_tapdisk_ioemu();
dom0_readfd = open_ctrl_socket(wrctldev);
dom0_writefd = open_ctrl_socket(rdctldev);
+
+ refresh_pid = 1;
}
DPRINTF("Using tapdisk-ioemu connection\n");
blkif->fds[READ] = dom0_readfd;
blkif->fds[WRITE] = dom0_writefd;
+
+ if (refresh_pid) {
+ get_tapdisk_pid(blkif);
+ tapdisk_ioemu_pid = blkif->tappid;
+ }
+
} else if (access(rdctldev, R_OK | W_OK) == 0) {
/* Use existing pipe to the device model */
DPRINTF("Using qemu-dm connection\n");
@@ -554,6 +570,7 @@ static int connect_qemu(blkif_t *blkif,
return 0;
}
+#ifndef ALWAYS_USE_IOEMU
/* Launch tapdisk instance */
static int connect_tapdisk(blkif_t *blkif, int minor)
{
@@ -597,6 +614,7 @@ fail:
return ret;
}
+#endif
static int blktapctrl_new_blkif(blkif_t *blkif)
{
@@ -606,13 +624,14 @@ static int blktapctrl_new_blkif(blkif_t
image_t *image;
blkif_t *exist = NULL;
static uint16_t next_cookie = 0;
+ int use_ioemu;
DPRINTF("Received a poll for a new vbd\n");
if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) {
if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
return -1;
- if (test_path(blk->params, &ptr, &type, &exist) != 0) {
+ if (test_path(blk->params, &ptr, &type, &exist, &use_ioemu) != 0) {
DPRINTF("Error in blktap device string(%s).\n",
blk->params);
goto fail;
@@ -621,13 +640,18 @@ static int blktapctrl_new_blkif(blkif_t
blkif->cookie = next_cookie++;
if (!exist) {
- if (type == DISK_TYPE_IOEMU) {
+#ifdef ALWAYS_USE_IOEMU
+ if (connect_qemu(blkif, blkif->domid))
+ goto fail;
+#else
+ if (use_ioemu) {
if (connect_qemu(blkif, blkif->domid))
goto fail;
} else {
if (connect_tapdisk(blkif, minor))
goto fail;
}
+#endif
} else {
DPRINTF("Process exists!\n");
Index: xen-3.3.1-testing/tools/blktap/drivers/tapdisk.h
===================================================================
--- xen-3.3.1-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-3.3.1-testing/tools/blktap/drivers/tapdisk.h
@@ -145,6 +145,8 @@ typedef struct disk_info {
char handle[10]; /* xend handle, e.g. 'ram' */
int single_handler; /* is there a single controller for all */
/* instances of disk type? */
+ int use_ioemu; /* backend provider: 0 = tapdisk; 1 = ioemu */
+
#ifdef TAPDISK
struct tap_disk *drv;
#endif
@@ -166,6 +168,7 @@ static disk_info_t aio_disk = {
--- xen-3.4.0-testing.orig/tools/blktap/drivers/tapdisk.h
+++ xen-3.4.0-testing/tools/blktap/drivers/tapdisk.h
@@ -168,7 +168,7 @@ static disk_info_t aio_disk = {
"raw image (aio)",
"aio",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_aio,
#endif
@@ -176,6 +179,7 @@ static disk_info_t sync_disk = {
@@ -179,7 +179,7 @@ static disk_info_t sync_disk = {
"raw image (sync)",
"sync",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_sync,
#endif
@@ -186,6 +190,7 @@ static disk_info_t vmdk_disk = {
@@ -190,7 +190,7 @@ static disk_info_t vmdk_disk = {
"vmware image (vmdk)",
"vmdk",
1,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_vmdk,
#endif
@@ -196,6 +201,7 @@ static disk_info_t ram_disk = {
"ramdisk image (ram)",
"ram",
1,
+ 0,
#ifdef TAPDISK
&tapdisk_ram,
#endif
@@ -206,6 +212,7 @@ static disk_info_t qcow_disk = {
@@ -212,7 +212,7 @@ static disk_info_t qcow_disk = {
"qcow disk (qcow)",
"qcow",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow,
#endif
@@ -216,6 +223,7 @@ static disk_info_t qcow2_disk = {
@@ -223,7 +223,7 @@ static disk_info_t qcow2_disk = {
"qcow2 disk (qcow2)",
"qcow2",
0,
- 0,
+ 1,
#ifdef TAPDISK
&tapdisk_qcow2,
#endif
@@ -226,6 +234,7 @@ static disk_info_t ioemu_disk = {
"ioemu disk",
"ioemu",
1,
+ 1,
#ifdef TAPDISK
NULL
#endif

View File

@ -1,7 +1,7 @@
Index: xen-3.3.1-testing/tools/examples/block
Index: xen-3.4.0-testing/tools/hotplug/Linux/block
===================================================================
--- xen-3.3.1-testing.orig/tools/examples/block
+++ xen-3.3.1-testing/tools/examples/block
--- xen-3.4.0-testing.orig/tools/hotplug/Linux/block
+++ xen-3.4.0-testing/tools/hotplug/Linux/block
@@ -225,11 +225,14 @@ case "$command" in
;;

View File

@ -23,8 +23,7 @@ find_sdev()
unset dev
for session in /sys/class/iscsi_session/session*; do
if [ "$1" = "`cat $session/targetname`" ]; then
dev=`readlink $session/device/target*/*:0:*/block*`
dev=${dev##*/}
dev=`basename $session/device/target*/*:0:*/block*/*`
return
fi
done
@ -34,8 +33,7 @@ find_sdev_rev()
{
unset tgt
for session in /sys/class/iscsi_session/session*; do
dev=`readlink $session/device/target*/*:0:*/block*`
dev=${dev##*/}
dev=`basename $session/device/target*/*:0:*/block*/*`
if [ "$dev" = "$1" ]; then
tgt=`cat $session/targetname`
return

View File

@ -1,7 +1,7 @@
Index: xen-3.3.1-testing/tools/examples/block
Index: xen-3.4.0-testing/tools/hotplug/Linux/block
===================================================================
--- xen-3.3.1-testing.orig/tools/examples/block
+++ xen-3.3.1-testing/tools/examples/block
--- xen-3.4.0-testing.orig/tools/hotplug/Linux/block
+++ xen-3.4.0-testing/tools/hotplug/Linux/block
@@ -241,107 +241,111 @@ case "$command" in
mount it read-write in a guest domain."
fi

View File

@ -1,7 +1,7 @@
Index: xen-3.3.1-testing/tools/examples/network-bridge
Index: xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-3.3.1-testing.orig/tools/examples/network-bridge
+++ xen-3.3.1-testing/tools/examples/network-bridge
--- xen-3.4.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
@@ -241,6 +241,9 @@ op_start () {
return
fi
@ -12,14 +12,19 @@ Index: xen-3.3.1-testing/tools/examples/network-bridge
vlans=$(find_active_vlans "${netdev}")
for vlan in $vlans ; do ifdown $vlan ; done
@@ -254,13 +257,27 @@ op_start () {
@@ -258,18 +261,32 @@ op_start () {
ip link set ${netdev} down
ip addr flush ${netdev}
fi
- ip link set ${netdev} name ${pdev}
- ip link set ${tdev} name ${bridge}
-
- setup_bridge_port ${pdev}
- # Restore slaves
- if [ -n "${slaves}" ]; then
- ip link set ${pdev} up
- ifenslave ${pdev} ${slaves}
+ if [ "x${bonded}" = "xyes" ]
+ then
+ ip link set ${tdev} name ${bridge}
@ -36,12 +41,17 @@ Index: xen-3.3.1-testing/tools/examples/network-bridge
+ ip link set ${tdev} name ${bridge}
+
+ setup_bridge_port ${pdev}
- add_to_bridge2 ${bridge} ${pdev}
- do_ifup ${bridge}
+
+ # Restore slaves
+ if [ -n "${slaves}" ]; then
+ ip link set ${pdev} up
+ ifenslave ${pdev} ${slaves}
+ fi
+ add_to_bridge2 ${bridge} ${pdev}
+ do_ifup ${bridge}
+ fi
fi
- add_to_bridge2 ${bridge} ${pdev}
- do_ifup ${bridge}
for vlan in $vlans ; do ifup $vlan ; done

View File

@ -22,11 +22,11 @@ Signed-off-by: Gerd Hoffmann <kraxel@suse.de>
tools/examples/network-bridge | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
Index: xen-3.2.1-testing/tools/examples/network-bridge
Index: xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-3.2.1-testing.orig/tools/examples/network-bridge
+++ xen-3.2.1-testing/tools/examples/network-bridge
@@ -305,6 +305,31 @@ op_stop () {
--- xen-3.4.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
@@ -316,6 +316,31 @@ op_stop () {
for vlan in $vlans ; do ifup $vlan ; done
}
@ -58,7 +58,7 @@ Index: xen-3.2.1-testing/tools/examples/network-bridge
# adds $dev to $bridge but waits for $dev to be in running state first
add_to_bridge2() {
local bridge=$1
@@ -330,11 +355,19 @@ add_to_bridge2() {
@@ -341,11 +366,19 @@ add_to_bridge2() {
case "$command" in
start)

View File

@ -1,8 +1,8 @@
Index: xen-3.3.1-testing/tools/examples/network-bridge
Index: xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-3.3.1-testing.orig/tools/examples/network-bridge
+++ xen-3.3.1-testing/tools/examples/network-bridge
@@ -253,18 +253,18 @@ op_stop () {
--- xen-3.4.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
@@ -262,18 +262,18 @@ op_stop () {
transfer_addrs ${bridge} ${pdev}
if ! ifdown ${bridge}; then
get_ip_info ${bridge}

View File

@ -1,7 +1,7 @@
Index: xen-3.3.1-testing/tools/examples/network-bridge
Index: xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
===================================================================
--- xen-3.3.1-testing.orig/tools/examples/network-bridge
+++ xen-3.3.1-testing/tools/examples/network-bridge
--- xen-3.4.0-testing.orig/tools/hotplug/Linux/network-bridge
+++ xen-3.4.0-testing/tools/hotplug/Linux/network-bridge
@@ -249,6 +249,11 @@ op_start () {
create_bridge ${tdev}
@ -13,8 +13,8 @@ Index: xen-3.3.1-testing/tools/examples/network-bridge
+
preiftransfer ${netdev}
transfer_addrs ${netdev} ${tdev}
if ! ifdown ${netdev}; then
@@ -313,6 +318,13 @@ op_stop () {
# Remember slaves for bonding interface.
@@ -322,6 +327,13 @@ op_stop () {
ip link set ${pdev} name ${netdev}
do_ifup ${netdev}

Some files were not shown because too many files have changed in this diff Show More