xen/559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch

# Commit 02ea5031825d984d52eb9a982b8457e3434137f0
# Date 2015-07-07 14:30:06 +0200
# Author Dario Faggioli <dario.faggioli@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
credit1: properly deal with pCPUs not in any cpupool

Ideally, the pCPUs that are 'free', i.e., not assigned
to any cpupool, should not be considred by the scheduler
for load balancing or anything. In Credit1, we fail at
this, because of how we use cpupool_scheduler_cpumask().
In fact, for a free pCPU, cpupool_scheduler_cpumask()
returns a pointer to cpupool_free_cpus, and hence, near
the top of csched_load_balance():

 if ( unlikely(!cpumask_test_cpu(cpu, online)) )
     goto out;

is false (the pCPU _is_ free!), and we therefore do not
jump to the end right away, as we should. This, causes
the following splat when resuming from ACPI S3 with
pCPUs not assigned to any pool:

(XEN) ----[ Xen-4.6-unstable  x86_64  debug=y  Tainted:    C ]----
(XEN) ... ... ...
(XEN) Xen call trace:
(XEN)    [<ffff82d080122eaa>] csched_load_balance+0x213/0x794
(XEN)    [<ffff82d08012374c>] csched_schedule+0x321/0x452
(XEN)    [<ffff82d08012c85e>] schedule+0x12a/0x63c
(XEN)    [<ffff82d08012fa09>] __do_softirq+0x82/0x8d
(XEN)    [<ffff82d08012fa61>] do_softirq+0x13/0x15
(XEN)    [<ffff82d080164780>] idle_loop+0x5b/0x6b
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 8:
(XEN) GENERAL PROTECTION FAULT
(XEN) [error_code=0000]
(XEN) ****************************************

The cure is:
 * use cpupool_online_cpumask(), as a better guard to the
   case when the cpu is being offlined;
 * explicitly check whether the cpu is free.

SEDF is in a similar situation, so fix it too.

Still in Credit1, we must make sure that free (or offline)
CPUs are not considered "ticklable". Not doing so would impair
the load balancing algorithm, making the scheduler think that
it is possible to 'ask' the pCPU to pick up some work, while
in reallity, that will never happen! Evidence of such behavior
is shown in this trace:

 Name               CPU list
 Pool-0             0,1,2,3,4,5,6,7,8,9,10,11,12,13,14

    0.112998198 | ||.|| -|x||-|- d0v0 runstate_change d0v4 offline->runnable
 ]  0.112998198 | ||.|| -|x||-|- d0v0   22006(2:2:6) 1 [ f ]
 ]  0.112999612 | ||.|| -|x||-|- d0v0   28004(2:8:4) 2 [ 0 4 ]
    0.113003387 | ||.|| -||||-|x d32767v15 runstate_continue d32767v15 running->running

where "22006(2:2:6) 1 [ f ]" means that pCPU 15, which is
free from any pool, is tickled.

The cure, in this case, is to filter out the free pCPUs,
within __runq_tickle().

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: Juergen Gross <jgross@suse.com>
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>

--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -350,12 +350,17 @@ __runq_tickle(unsigned int cpu, struct c
 {
     struct csched_vcpu * const cur = CSCHED_VCPU(curr_on_cpu(cpu));
     struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
-    cpumask_t mask, idle_mask;
+    cpumask_t mask, idle_mask, *online;
     int balance_step, idlers_empty;
 
     ASSERT(cur);
     cpumask_clear(&mask);
-    idlers_empty = cpumask_empty(prv->idlers);
+
+    /* cpu is vc->processor, so it must be in a cpupool. */
+    ASSERT(per_cpu(cpupool, cpu) != NULL);
+    online = cpupool_online_cpumask(per_cpu(cpupool, cpu));
+    cpumask_and(&idle_mask, prv->idlers, online);
+    idlers_empty = cpumask_empty(&idle_mask);
 
 
     /*
@@ -392,8 +397,8 @@ __runq_tickle(unsigned int cpu, struct c
             /* Are there idlers suitable for new (for this balance step)? */
             csched_balance_cpumask(new->vcpu, balance_step,
                                    csched_balance_mask);
-            cpumask_and(&idle_mask, prv->idlers, csched_balance_mask);
-            new_idlers_empty = cpumask_empty(&idle_mask);
+            cpumask_and(csched_balance_mask, csched_balance_mask, &idle_mask);
+            new_idlers_empty = cpumask_empty(csched_balance_mask);
 
             /*
              * Let's not be too harsh! If there aren't idlers suitable
@@ -1494,6 +1499,7 @@ static struct csched_vcpu *
 csched_load_balance(struct csched_private *prv, int cpu,
     struct csched_vcpu *snext, bool_t *stolen)
 {
+    struct cpupool *c = per_cpu(cpupool, cpu);
     struct csched_vcpu *speer;
     cpumask_t workers;
     cpumask_t *online;
@@ -1501,10 +1507,13 @@ csched_load_balance(struct csched_privat
     int node = cpu_to_node(cpu);
 
     BUG_ON( cpu != snext->vcpu->processor );
-    online = cpupool_scheduler_cpumask(per_cpu(cpupool, cpu));
+    online = cpupool_online_cpumask(c);
 
-    /* If this CPU is going offline we shouldn't steal work. */
-    if ( unlikely(!cpumask_test_cpu(cpu, online)) )
+    /*
+     * If this CPU is going offline, or is not (yet) part of any cpupool
+     * (as it happens, e.g., during cpu bringup), we shouldn't steal work.
+     */
+    if ( unlikely(!cpumask_test_cpu(cpu, online) || c == NULL) )
         goto out;
 
     if ( snext->pri == CSCHED_PRI_IDLE )
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -791,7 +791,8 @@ static struct task_slice sedf_do_schedul
     if ( tasklet_work_scheduled ||
          (list_empty(runq) && list_empty(waitq)) ||
          unlikely(!cpumask_test_cpu(cpu,
-                   cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) )
+                   cpupool_online_cpumask(per_cpu(cpupool, cpu))) ||
+                  per_cpu(cpupool, cpu) == NULL) )
     {
         ret.task = IDLETASK(cpu);
         ret.time = SECONDS(1);
- bnc#935634 - VUL-0: CVE-2015-3259: xen: XSA-137: xl command line config handling stack overflow 55a62eb0-xl-correct-handling-of-extra_config-in-main_cpupoolcreate.patch - bsc#907514 - Bus fatal error & sles12 sudden reboot has been observed - bsc#910258 - SLES12 Xen host crashes with FATAL NMI after shutdown of guest with VT-d NIC - bsc#918984 - Bus fatal error & sles11-SP4 sudden reboot has been observed - bsc#923967 - Partner-L3: Bus fatal error & sles11-SP3 sudden reboot has been observed 552d293b-x86-vMSI-X-honor-all-mask-requests.patch 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch 5576f143-x86-adjust-PV-I-O-emulation-functions-types.patch 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch 5583d9c5-x86-MSI-X-cleanup.patch 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch 55b0a218-x86-PCI-CFG-write-intercept.patch 55b0a255-x86-MSI-X-maskall.patch 55b0a283-x86-MSI-X-teardown.patch 55b0a2ab-x86-MSI-X-enable.patch 55b0a2db-x86-MSI-track-guest-masking.patch - Upstream patches from Jan 552d0f49-x86-traps-identify-the-vcpu-in-context-when-dumping-regs.patch 559bc633-x86-cpupool-clear-proper-cpu_valid-bit-on-CPU-teardown.patch 559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch 559bc87f-x86-hvmloader-avoid-data-corruption-with-xenstore-rw.patch 55a66a1e-make-rangeset_report_ranges-report-all-ranges.patch 55a77e4f-dmar-device-scope-mem-leak-fix.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=373 2015-08-27 00:28:15 +02:00			`# Commit 02ea5031825d984d52eb9a982b8457e3434137f0`
			`# Date 2015-07-07 14:30:06 +0200`
			`# Author Dario Faggioli <dario.faggioli@citrix.com>`
			`# Committer Jan Beulich <jbeulich@suse.com>`
			`credit1: properly deal with pCPUs not in any cpupool`

			`Ideally, the pCPUs that are 'free', i.e., not assigned`
			`to any cpupool, should not be considred by the scheduler`
			`for load balancing or anything. In Credit1, we fail at`
			`this, because of how we use cpupool_scheduler_cpumask().`
			`In fact, for a free pCPU, cpupool_scheduler_cpumask()`
			`returns a pointer to cpupool_free_cpus, and hence, near`
			`the top of csched_load_balance():`

			`if ( unlikely(!cpumask_test_cpu(cpu, online)) )`
			`goto out;`

			`is false (the pCPU _is_ free!), and we therefore do not`
			`jump to the end right away, as we should. This, causes`
			`the following splat when resuming from ACPI S3 with`
			`pCPUs not assigned to any pool:`

			`(XEN) ----[ Xen-4.6-unstable x86_64 debug=y Tainted: C ]----`
			`(XEN) ... ... ...`
			`(XEN) Xen call trace:`
			`(XEN) [<ffff82d080122eaa>] csched_load_balance+0x213/0x794`
			`(XEN) [<ffff82d08012374c>] csched_schedule+0x321/0x452`
			`(XEN) [<ffff82d08012c85e>] schedule+0x12a/0x63c`
			`(XEN) [<ffff82d08012fa09>] __do_softirq+0x82/0x8d`
			`(XEN) [<ffff82d08012fa61>] do_softirq+0x13/0x15`
			`(XEN) [<ffff82d080164780>] idle_loop+0x5b/0x6b`
			`(XEN)`
			`(XEN)`
			`(XEN) ****************************************`
			`(XEN) Panic on CPU 8:`
			`(XEN) GENERAL PROTECTION FAULT`
			`(XEN) [error_code=0000]`
			`(XEN) ****************************************`

			`The cure is:`
			`* use cpupool_online_cpumask(), as a better guard to the`
			`case when the cpu is being offlined;`
			`* explicitly check whether the cpu is free.`

			`SEDF is in a similar situation, so fix it too.`

			`Still in Credit1, we must make sure that free (or offline)`
			`CPUs are not considered "ticklable". Not doing so would impair`
			`the load balancing algorithm, making the scheduler think that`
			`it is possible to 'ask' the pCPU to pick up some work, while`
			`in reallity, that will never happen! Evidence of such behavior`
			`is shown in this trace:`

			`Name CPU list`
			`Pool-0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14`

			`0.112998198 \| \|\|.\|\| -\|x\|\|-\|- d0v0 runstate_change d0v4 offline->runnable`
			`] 0.112998198 \| \|\|.\|\| -\|x\|\|-\|- d0v0 22006(2:2:6) 1 [ f ]`
			`] 0.112999612 \| \|\|.\|\| -\|x\|\|-\|- d0v0 28004(2:8:4) 2 [ 0 4 ]`
			`0.113003387 \| \|\|.\|\| -\|\|\|\|-\|x d32767v15 runstate_continue d32767v15 running->running`

			`where "22006(2:2:6) 1 [ f ]" means that pCPU 15, which is`
			`free from any pool, is tickled.`

			`The cure, in this case, is to filter out the free pCPUs,`
			`within __runq_tickle().`

			`Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>`
			`Acked-by: Juergen Gross <jgross@suse.com>`
			`Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>`

			`--- a/xen/common/sched_credit.c`
			`+++ b/xen/common/sched_credit.c`
			`@@ -350,12 +350,17 @@ __runq_tickle(unsigned int cpu, struct c`
			`{`
			`struct csched_vcpu * const cur = CSCHED_VCPU(curr_on_cpu(cpu));`
			`struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));`
			`- cpumask_t mask, idle_mask;`
			`+ cpumask_t mask, idle_mask, *online;`
			`int balance_step, idlers_empty;`

			`ASSERT(cur);`
			`cpumask_clear(&mask);`
			`- idlers_empty = cpumask_empty(prv->idlers);`
			`+`
			`+ /* cpu is vc->processor, so it must be in a cpupool. */`
			`+ ASSERT(per_cpu(cpupool, cpu) != NULL);`
			`+ online = cpupool_online_cpumask(per_cpu(cpupool, cpu));`
			`+ cpumask_and(&idle_mask, prv->idlers, online);`
			`+ idlers_empty = cpumask_empty(&idle_mask);`


			`/*`
			`@@ -392,8 +397,8 @@ __runq_tickle(unsigned int cpu, struct c`
			`/* Are there idlers suitable for new (for this balance step)? */`
			`csched_balance_cpumask(new->vcpu, balance_step,`
			`csched_balance_mask);`
			`- cpumask_and(&idle_mask, prv->idlers, csched_balance_mask);`
			`- new_idlers_empty = cpumask_empty(&idle_mask);`
			`+ cpumask_and(csched_balance_mask, csched_balance_mask, &idle_mask);`
			`+ new_idlers_empty = cpumask_empty(csched_balance_mask);`

			`/*`
			`* Let's not be too harsh! If there aren't idlers suitable`
			`@@ -1494,6 +1499,7 @@ static struct csched_vcpu *`
			`csched_load_balance(struct csched_private *prv, int cpu,`
			`struct csched_vcpu snext, bool_t stolen)`
			`{`
			`+ struct cpupool *c = per_cpu(cpupool, cpu);`
			`struct csched_vcpu *speer;`
			`cpumask_t workers;`
			`cpumask_t *online;`
			`@@ -1501,10 +1507,13 @@ csched_load_balance(struct csched_privat`
			`int node = cpu_to_node(cpu);`

			`BUG_ON( cpu != snext->vcpu->processor );`
			`- online = cpupool_scheduler_cpumask(per_cpu(cpupool, cpu));`
			`+ online = cpupool_online_cpumask(c);`

			`- /* If this CPU is going offline we shouldn't steal work. */`
			`- if ( unlikely(!cpumask_test_cpu(cpu, online)) )`
			`+ /*`
			`+ * If this CPU is going offline, or is not (yet) part of any cpupool`
			`+ * (as it happens, e.g., during cpu bringup), we shouldn't steal work.`
			`+ */`
			`+ if ( unlikely(!cpumask_test_cpu(cpu, online) \|\| c == NULL) )`
			`goto out;`

			`if ( snext->pri == CSCHED_PRI_IDLE )`
			`--- a/xen/common/sched_sedf.c`
			`+++ b/xen/common/sched_sedf.c`
			`@@ -791,7 +791,8 @@ static struct task_slice sedf_do_schedul`
			`if ( tasklet_work_scheduled \|\|`
			`(list_empty(runq) && list_empty(waitq)) \|\|`
			`unlikely(!cpumask_test_cpu(cpu,`
			`- cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) )`
			`+ cpupool_online_cpumask(per_cpu(cpupool, cpu))) \|\|`
			`+ per_cpu(cpupool, cpu) == NULL) )`
			`{`
			`ret.task = IDLETASK(cpu);`
			`ret.time = SECONDS(1);`