8724a18868
config handling stack overflow 55a62eb0-xl-correct-handling-of-extra_config-in-main_cpupoolcreate.patch - bsc#907514 - Bus fatal error & sles12 sudden reboot has been observed - bsc#910258 - SLES12 Xen host crashes with FATAL NMI after shutdown of guest with VT-d NIC - bsc#918984 - Bus fatal error & sles11-SP4 sudden reboot has been observed - bsc#923967 - Partner-L3: Bus fatal error & sles11-SP3 sudden reboot has been observed 552d293b-x86-vMSI-X-honor-all-mask-requests.patch 552d2966-x86-vMSI-X-add-valid-bits-for-read-acceleration.patch 5576f143-x86-adjust-PV-I-O-emulation-functions-types.patch 55795a52-x86-vMSI-X-support-qword-MMIO-access.patch 5583d9c5-x86-MSI-X-cleanup.patch 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch 55b0a218-x86-PCI-CFG-write-intercept.patch 55b0a255-x86-MSI-X-maskall.patch 55b0a283-x86-MSI-X-teardown.patch 55b0a2ab-x86-MSI-X-enable.patch 55b0a2db-x86-MSI-track-guest-masking.patch - Upstream patches from Jan 552d0f49-x86-traps-identify-the-vcpu-in-context-when-dumping-regs.patch 559bc633-x86-cpupool-clear-proper-cpu_valid-bit-on-CPU-teardown.patch 559bc64e-credit1-properly-deal-with-CPUs-not-in-any-pool.patch 559bc87f-x86-hvmloader-avoid-data-corruption-with-xenstore-rw.patch 55a66a1e-make-rangeset_report_ranges-report-all-ranges.patch 55a77e4f-dmar-device-scope-mem-leak-fix.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=373
142 lines
5.4 KiB
Diff
142 lines
5.4 KiB
Diff
# Commit 02ea5031825d984d52eb9a982b8457e3434137f0
|
|
# Date 2015-07-07 14:30:06 +0200
|
|
# Author Dario Faggioli <dario.faggioli@citrix.com>
|
|
# Committer Jan Beulich <jbeulich@suse.com>
|
|
credit1: properly deal with pCPUs not in any cpupool
|
|
|
|
Ideally, the pCPUs that are 'free', i.e., not assigned
|
|
to any cpupool, should not be considred by the scheduler
|
|
for load balancing or anything. In Credit1, we fail at
|
|
this, because of how we use cpupool_scheduler_cpumask().
|
|
In fact, for a free pCPU, cpupool_scheduler_cpumask()
|
|
returns a pointer to cpupool_free_cpus, and hence, near
|
|
the top of csched_load_balance():
|
|
|
|
if ( unlikely(!cpumask_test_cpu(cpu, online)) )
|
|
goto out;
|
|
|
|
is false (the pCPU _is_ free!), and we therefore do not
|
|
jump to the end right away, as we should. This, causes
|
|
the following splat when resuming from ACPI S3 with
|
|
pCPUs not assigned to any pool:
|
|
|
|
(XEN) ----[ Xen-4.6-unstable x86_64 debug=y Tainted: C ]----
|
|
(XEN) ... ... ...
|
|
(XEN) Xen call trace:
|
|
(XEN) [<ffff82d080122eaa>] csched_load_balance+0x213/0x794
|
|
(XEN) [<ffff82d08012374c>] csched_schedule+0x321/0x452
|
|
(XEN) [<ffff82d08012c85e>] schedule+0x12a/0x63c
|
|
(XEN) [<ffff82d08012fa09>] __do_softirq+0x82/0x8d
|
|
(XEN) [<ffff82d08012fa61>] do_softirq+0x13/0x15
|
|
(XEN) [<ffff82d080164780>] idle_loop+0x5b/0x6b
|
|
(XEN)
|
|
(XEN)
|
|
(XEN) ****************************************
|
|
(XEN) Panic on CPU 8:
|
|
(XEN) GENERAL PROTECTION FAULT
|
|
(XEN) [error_code=0000]
|
|
(XEN) ****************************************
|
|
|
|
The cure is:
|
|
* use cpupool_online_cpumask(), as a better guard to the
|
|
case when the cpu is being offlined;
|
|
* explicitly check whether the cpu is free.
|
|
|
|
SEDF is in a similar situation, so fix it too.
|
|
|
|
Still in Credit1, we must make sure that free (or offline)
|
|
CPUs are not considered "ticklable". Not doing so would impair
|
|
the load balancing algorithm, making the scheduler think that
|
|
it is possible to 'ask' the pCPU to pick up some work, while
|
|
in reallity, that will never happen! Evidence of such behavior
|
|
is shown in this trace:
|
|
|
|
Name CPU list
|
|
Pool-0 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
|
|
|
0.112998198 | ||.|| -|x||-|- d0v0 runstate_change d0v4 offline->runnable
|
|
] 0.112998198 | ||.|| -|x||-|- d0v0 22006(2:2:6) 1 [ f ]
|
|
] 0.112999612 | ||.|| -|x||-|- d0v0 28004(2:8:4) 2 [ 0 4 ]
|
|
0.113003387 | ||.|| -||||-|x d32767v15 runstate_continue d32767v15 running->running
|
|
|
|
where "22006(2:2:6) 1 [ f ]" means that pCPU 15, which is
|
|
free from any pool, is tickled.
|
|
|
|
The cure, in this case, is to filter out the free pCPUs,
|
|
within __runq_tickle().
|
|
|
|
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
|
|
Acked-by: Juergen Gross <jgross@suse.com>
|
|
Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
|
|
|
|
--- a/xen/common/sched_credit.c
|
|
+++ b/xen/common/sched_credit.c
|
|
@@ -350,12 +350,17 @@ __runq_tickle(unsigned int cpu, struct c
|
|
{
|
|
struct csched_vcpu * const cur = CSCHED_VCPU(curr_on_cpu(cpu));
|
|
struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
|
|
- cpumask_t mask, idle_mask;
|
|
+ cpumask_t mask, idle_mask, *online;
|
|
int balance_step, idlers_empty;
|
|
|
|
ASSERT(cur);
|
|
cpumask_clear(&mask);
|
|
- idlers_empty = cpumask_empty(prv->idlers);
|
|
+
|
|
+ /* cpu is vc->processor, so it must be in a cpupool. */
|
|
+ ASSERT(per_cpu(cpupool, cpu) != NULL);
|
|
+ online = cpupool_online_cpumask(per_cpu(cpupool, cpu));
|
|
+ cpumask_and(&idle_mask, prv->idlers, online);
|
|
+ idlers_empty = cpumask_empty(&idle_mask);
|
|
|
|
|
|
/*
|
|
@@ -392,8 +397,8 @@ __runq_tickle(unsigned int cpu, struct c
|
|
/* Are there idlers suitable for new (for this balance step)? */
|
|
csched_balance_cpumask(new->vcpu, balance_step,
|
|
csched_balance_mask);
|
|
- cpumask_and(&idle_mask, prv->idlers, csched_balance_mask);
|
|
- new_idlers_empty = cpumask_empty(&idle_mask);
|
|
+ cpumask_and(csched_balance_mask, csched_balance_mask, &idle_mask);
|
|
+ new_idlers_empty = cpumask_empty(csched_balance_mask);
|
|
|
|
/*
|
|
* Let's not be too harsh! If there aren't idlers suitable
|
|
@@ -1494,6 +1499,7 @@ static struct csched_vcpu *
|
|
csched_load_balance(struct csched_private *prv, int cpu,
|
|
struct csched_vcpu *snext, bool_t *stolen)
|
|
{
|
|
+ struct cpupool *c = per_cpu(cpupool, cpu);
|
|
struct csched_vcpu *speer;
|
|
cpumask_t workers;
|
|
cpumask_t *online;
|
|
@@ -1501,10 +1507,13 @@ csched_load_balance(struct csched_privat
|
|
int node = cpu_to_node(cpu);
|
|
|
|
BUG_ON( cpu != snext->vcpu->processor );
|
|
- online = cpupool_scheduler_cpumask(per_cpu(cpupool, cpu));
|
|
+ online = cpupool_online_cpumask(c);
|
|
|
|
- /* If this CPU is going offline we shouldn't steal work. */
|
|
- if ( unlikely(!cpumask_test_cpu(cpu, online)) )
|
|
+ /*
|
|
+ * If this CPU is going offline, or is not (yet) part of any cpupool
|
|
+ * (as it happens, e.g., during cpu bringup), we shouldn't steal work.
|
|
+ */
|
|
+ if ( unlikely(!cpumask_test_cpu(cpu, online) || c == NULL) )
|
|
goto out;
|
|
|
|
if ( snext->pri == CSCHED_PRI_IDLE )
|
|
--- a/xen/common/sched_sedf.c
|
|
+++ b/xen/common/sched_sedf.c
|
|
@@ -791,7 +791,8 @@ static struct task_slice sedf_do_schedul
|
|
if ( tasklet_work_scheduled ||
|
|
(list_empty(runq) && list_empty(waitq)) ||
|
|
unlikely(!cpumask_test_cpu(cpu,
|
|
- cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) )
|
|
+ cpupool_online_cpumask(per_cpu(cpupool, cpu))) ||
|
|
+ per_cpu(cpupool, cpu) == NULL) )
|
|
{
|
|
ret.task = IDLETASK(cpu);
|
|
ret.time = SECONDS(1);
|