a4d1d9fe03
bnc#828623 - bnc#839596 - VUL-0: CVE-2013-1442: XSA-62: xen: Information leak on AVX and/or LWP capable CPUs 5242a1b5-x86-xsave-initialize-extended-register-state-when-guests-enable-it.patch - bnc#840592 - VUL-0: CVE-2013-4355: XSA-63: xen: Information leaks through I/O instruction emulation CVE-2013-4355-xsa63.patch - bnc#840593 - VUL-0: CVE-2013-4356: XSA-64: xen: Memory accessible by 64-bit PV guests under live migration CVE-2013-4356-xsa64.patch - bnc#841766 - VUL-1: CVE-2013-4361: XSA-66: xen: Information leak through fbld instruction emulation CVE-2013-4361-xsa66.patch - bnc#833796 - L3: Xen: migration broken from xsave-capable to xsave-incapable host 52205e27-x86-xsave-initialization-improvements.patch 522dc0e6-x86-xsave-fix-migration-from-xsave-capable-to-xsave-incapable-host.patch - bnc#839600 - [HP BCS SLES11 Bug]: In HP’s UEFI x86_64 platform and sles11sp3 with xen environment, xen hypervisor will panic on multiple blades nPar. 523172d5-x86-fix-memory-cut-off-when-using-PFN-compression.patch - bnc#833251 - [HP BCS SLES11 Bug]: In HP’s UEFI x86_64 platform and with xen environment, in booting stage ,xen hypervisor will panic. 522d896b-x86-EFI-properly-handle-run-time-memory-regions-outside-the-1-1-map.patch - bnc#834751 - [HP BCS SLES11 Bug]: In xen, “shutdown –y 0 –h” cannot power off system 522d896b-x86-EFI-properly-handle-run-time-memory-regions-outside-the-1-1-map.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=274
156 lines
6.7 KiB
Diff
156 lines
6.7 KiB
Diff
# Commit 5e5a44b6c942d6ea47f15d6f1ed02b03e0d69445
|
|
# Date 2013-09-20 11:37:28 +0200
|
|
# Author Dario Faggioli <dario.faggioli@citrix.com>
|
|
# Committer Jan Beulich <jbeulich@suse.com>
|
|
sched_credit: filter node-affinity mask against online cpus
|
|
|
|
in _csched_cpu_pick(), as not doing so may result in the domain's
|
|
node-affinity mask (as retrieved by csched_balance_cpumask() )
|
|
and online mask (as retrieved by cpupool_scheduler_cpumask() )
|
|
having an empty intersection.
|
|
|
|
Therefore, when attempting a node-affinity load balancing step
|
|
and running this:
|
|
|
|
...
|
|
/* Pick an online CPU from the proper affinity mask */
|
|
csched_balance_cpumask(vc, balance_step, &cpus);
|
|
cpumask_and(&cpus, &cpus, online);
|
|
...
|
|
|
|
we end up with an empty cpumask (in cpus). At this point, in
|
|
the following code:
|
|
|
|
....
|
|
/* If present, prefer vc's current processor */
|
|
cpu = cpumask_test_cpu(vc->processor, &cpus)
|
|
? vc->processor
|
|
: cpumask_cycle(vc->processor, &cpus);
|
|
....
|
|
|
|
an ASSERT (from inside cpumask_cycle() ) triggers like this:
|
|
|
|
(XEN) Xen call trace:
|
|
(XEN) [<ffff82d08011b124>] _csched_cpu_pick+0x1d2/0x652
|
|
(XEN) [<ffff82d08011b5b2>] csched_cpu_pick+0xe/0x10
|
|
(XEN) [<ffff82d0801232de>] vcpu_migrate+0x167/0x31e
|
|
(XEN) [<ffff82d0801238cc>] cpu_disable_scheduler+0x1c8/0x287
|
|
(XEN) [<ffff82d080101b3f>] cpupool_unassign_cpu_helper+0x20/0xb4
|
|
(XEN) [<ffff82d08010544f>] continue_hypercall_tasklet_handler+0x4a/0xb1
|
|
(XEN) [<ffff82d080127793>] do_tasklet_work+0x78/0xab
|
|
(XEN) [<ffff82d080127a70>] do_tasklet+0x5f/0x8b
|
|
(XEN) [<ffff82d080158985>] idle_loop+0x57/0x5e
|
|
(XEN)
|
|
(XEN)
|
|
(XEN) ****************************************
|
|
(XEN) Panic on CPU 1:
|
|
(XEN) Assertion 'cpu < nr_cpu_ids' failed at /home/dario/Sources/xen/xen/xen.git/xen/include/xe:16481
|
|
|
|
It is for example sufficient to have a domain with node-affinity
|
|
to NUMA node 1 running, and issueing a `xl cpupool-numa-split'
|
|
would make the above happen. That is because, by default, all
|
|
the existing domains remain assigned to the first cpupool, and
|
|
it now (after the cpupool-numa-split) only includes NUMA node 0.
|
|
|
|
This change prevents that by generalizing the function used
|
|
for figuring out whether a node-affinity load balancing step
|
|
is legit or not. This way we can, in _csched_cpu_pick(),
|
|
figure out early enough that the mask would end up empty,
|
|
skip the step all together and avoid the splat.
|
|
|
|
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
|
|
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
|
|
|
|
--- a/xen/common/sched_credit.c
|
|
+++ b/xen/common/sched_credit.c
|
|
@@ -296,15 +296,28 @@ static void csched_set_node_affinity(
|
|
* vcpu-affinity balancing is always necessary and must never be skipped.
|
|
* OTOH, if a domain's node-affinity is said to be automatically computed
|
|
* (or if it just spans all the nodes), we can safely avoid dealing with
|
|
- * node-affinity entirely. Ah, node-affinity is also deemed meaningless
|
|
- * in case it has empty intersection with the vcpu's vcpu-affinity, as it
|
|
- * would mean trying to schedule it on _no_ pcpu!
|
|
+ * node-affinity entirely.
|
|
+ *
|
|
+ * Node-affinity is also deemed meaningless in case it has empty
|
|
+ * intersection with mask, to cover the cases where using the node-affinity
|
|
+ * mask seems legit, but would instead led to trying to schedule the vcpu
|
|
+ * on _no_ pcpu! Typical use cases are for mask to be equal to the vcpu's
|
|
+ * vcpu-affinity, or to the && of vcpu-affinity and the set of online cpus
|
|
+ * in the domain's cpupool.
|
|
*/
|
|
-#define __vcpu_has_node_affinity(vc) \
|
|
- ( !(cpumask_full(CSCHED_DOM(vc->domain)->node_affinity_cpumask) \
|
|
- || !cpumask_intersects(vc->cpu_affinity, \
|
|
- CSCHED_DOM(vc->domain)->node_affinity_cpumask) \
|
|
- || vc->domain->auto_node_affinity == 1) )
|
|
+static inline int __vcpu_has_node_affinity(const struct vcpu *vc,
|
|
+ const cpumask_t *mask)
|
|
+{
|
|
+ const struct domain *d = vc->domain;
|
|
+ const struct csched_dom *sdom = CSCHED_DOM(d);
|
|
+
|
|
+ if ( d->auto_node_affinity
|
|
+ || cpumask_full(sdom->node_affinity_cpumask)
|
|
+ || !cpumask_intersects(sdom->node_affinity_cpumask, mask) )
|
|
+ return 0;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
|
|
/*
|
|
* Each csched-balance step uses its own cpumask. This function determines
|
|
@@ -393,7 +406,8 @@ __runq_tickle(unsigned int cpu, struct c
|
|
int new_idlers_empty;
|
|
|
|
if ( balance_step == CSCHED_BALANCE_NODE_AFFINITY
|
|
- && !__vcpu_has_node_affinity(new->vcpu) )
|
|
+ && !__vcpu_has_node_affinity(new->vcpu,
|
|
+ new->vcpu->cpu_affinity) )
|
|
continue;
|
|
|
|
/* Are there idlers suitable for new (for this balance step)? */
|
|
@@ -626,11 +640,32 @@ _csched_cpu_pick(const struct scheduler
|
|
int cpu = vc->processor;
|
|
int balance_step;
|
|
|
|
+ /* Store in cpus the mask of online cpus on which the domain can run */
|
|
online = cpupool_scheduler_cpumask(vc->domain->cpupool);
|
|
+ cpumask_and(&cpus, vc->cpu_affinity, online);
|
|
+
|
|
for_each_csched_balance_step( balance_step )
|
|
{
|
|
+ /*
|
|
+ * We want to pick up a pcpu among the ones that are online and
|
|
+ * can accommodate vc, which is basically what we computed above
|
|
+ * and stored in cpus. As far as vcpu-affinity is concerned,
|
|
+ * there always will be at least one of these pcpus, hence cpus
|
|
+ * is never empty and the calls to cpumask_cycle() and
|
|
+ * cpumask_test_cpu() below are ok.
|
|
+ *
|
|
+ * On the other hand, when considering node-affinity too, it
|
|
+ * is possible for the mask to become empty (for instance, if the
|
|
+ * domain has been put in a cpupool that does not contain any of the
|
|
+ * nodes in its node-affinity), which would result in the ASSERT()-s
|
|
+ * inside cpumask_*() operations triggering (in debug builds).
|
|
+ *
|
|
+ * Therefore, in this case, we filter the node-affinity mask against
|
|
+ * cpus and, if the result is empty, we just skip the node-affinity
|
|
+ * balancing step all together.
|
|
+ */
|
|
if ( balance_step == CSCHED_BALANCE_NODE_AFFINITY
|
|
- && !__vcpu_has_node_affinity(vc) )
|
|
+ && !__vcpu_has_node_affinity(vc, &cpus) )
|
|
continue;
|
|
|
|
/* Pick an online CPU from the proper affinity mask */
|
|
@@ -1449,7 +1484,7 @@ csched_runq_steal(int peer_cpu, int cpu,
|
|
* or counter.
|
|
*/
|
|
if ( balance_step == CSCHED_BALANCE_NODE_AFFINITY
|
|
- && !__vcpu_has_node_affinity(vc) )
|
|
+ && !__vcpu_has_node_affinity(vc, vc->cpu_affinity) )
|
|
continue;
|
|
|
|
csched_balance_cpumask(vc, balance_step, csched_balance_mask);
|