89 lines
3.4 KiB
Diff
89 lines
3.4 KiB
Diff
|
# Commit 8022b05284dea80e24813d03180788ec7277a0bd
|
||
|
# Date 2015-07-07 14:29:39 +0200
|
||
|
# Author Dario Faggioli <dario.faggioli@citrix.com>
|
||
|
# Committer Jan Beulich <jbeulich@suse.com>
|
||
|
x86 / cpupool: clear the proper cpu_valid bit on pCPU teardown
|
||
|
|
||
|
In fact, when a pCPU goes down, we want to clear its
|
||
|
bit in the correct cpupool's valid mask, rather than
|
||
|
always in cpupool0's one.
|
||
|
|
||
|
Before this commit, all the pCPUs in the non-default
|
||
|
pool(s) will be considered immediately valid, during
|
||
|
system resume, even the one that have not been brought
|
||
|
up yet. As a result, the (Credit1) scheduler will attempt
|
||
|
to run its load balancing logic on them, causing the
|
||
|
following Oops:
|
||
|
|
||
|
# xl cpupool-cpu-remove Pool-0 8-15
|
||
|
# xl cpupool-create name=\"Pool-1\"
|
||
|
# xl cpupool-cpu-add Pool-1 8-15
|
||
|
--> suspend
|
||
|
--> resume
|
||
|
(XEN) ----[ Xen-4.6-unstable x86_64 debug=y Tainted: C ]----
|
||
|
(XEN) CPU: 8
|
||
|
(XEN) RIP: e008:[<ffff82d080123078>] csched_schedule+0x4be/0xb97
|
||
|
(XEN) RFLAGS: 0000000000010087 CONTEXT: hypervisor
|
||
|
(XEN) rax: 80007d2f7fccb780 rbx: 0000000000000009 rcx: 0000000000000000
|
||
|
(XEN) rdx: ffff82d08031ed40 rsi: ffff82d080334980 rdi: 0000000000000000
|
||
|
(XEN) rbp: ffff83010000fe20 rsp: ffff83010000fd40 r8: 0000000000000004
|
||
|
(XEN) r9: 0000ffff0000ffff r10: 00ff00ff00ff00ff r11: 0f0f0f0f0f0f0f0f
|
||
|
(XEN) r12: ffff8303191ea870 r13: ffff8303226aadf0 r14: 0000000000000009
|
||
|
(XEN) r15: 0000000000000008 cr0: 000000008005003b cr4: 00000000000026f0
|
||
|
(XEN) cr3: 00000000dba9d000 cr2: 0000000000000000
|
||
|
(XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008
|
||
|
(XEN) ... ... ...
|
||
|
(XEN) Xen call trace:
|
||
|
(XEN) [<ffff82d080123078>] csched_schedule+0x4be/0xb97
|
||
|
(XEN) [<ffff82d08012c732>] schedule+0x12a/0x63c
|
||
|
(XEN) [<ffff82d08012f8c8>] __do_softirq+0x82/0x8d
|
||
|
(XEN) [<ffff82d08012f920>] do_softirq+0x13/0x15
|
||
|
(XEN) [<ffff82d080164791>] idle_loop+0x5b/0x6b
|
||
|
(XEN)
|
||
|
(XEN) ****************************************
|
||
|
(XEN) Panic on CPU 8:
|
||
|
(XEN) GENERAL PROTECTION FAULT
|
||
|
(XEN) [error_code=0000]
|
||
|
(XEN) ****************************************
|
||
|
|
||
|
The reason why the error is a #GP fault is that, without
|
||
|
this commit, we try to access the per-cpu area of a not
|
||
|
yet allocated and initialized pCPU.
|
||
|
In fact, %rax, which is what is used as pointer, is
|
||
|
80007d2f7fccb780, and we also have this:
|
||
|
|
||
|
#define INVALID_PERCPU_AREA (0x8000000000000000L - (long)__per_cpu_start)
|
||
|
|
||
|
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||
|
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||
|
Acked-by: Juergen Gross <jgross@suse.com>
|
||
|
|
||
|
--- a/xen/arch/x86/smpboot.c
|
||
|
+++ b/xen/arch/x86/smpboot.c
|
||
|
@@ -816,7 +816,6 @@ void __cpu_disable(void)
|
||
|
remove_siblinginfo(cpu);
|
||
|
|
||
|
/* It's now safe to remove this processor from the online map */
|
||
|
- cpumask_clear_cpu(cpu, cpupool0->cpu_valid);
|
||
|
cpumask_clear_cpu(cpu, &cpu_online_map);
|
||
|
fixup_irqs();
|
||
|
|
||
|
--- a/xen/common/cpupool.c
|
||
|
+++ b/xen/common/cpupool.c
|
||
|
@@ -529,6 +529,7 @@ static int cpupool_cpu_remove(unsigned i
|
||
|
if ( cpumask_test_cpu(cpu, (*c)->cpu_valid ) )
|
||
|
{
|
||
|
cpumask_set_cpu(cpu, (*c)->cpu_suspended);
|
||
|
+ cpumask_clear_cpu(cpu, (*c)->cpu_valid);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
@@ -551,6 +552,7 @@ static int cpupool_cpu_remove(unsigned i
|
||
|
* If we are not suspending, we are hot-unplugging cpu, and that is
|
||
|
* allowed only for CPUs in pool0.
|
||
|
*/
|
||
|
+ cpumask_clear_cpu(cpu, cpupool0->cpu_valid);
|
||
|
ret = 0;
|
||
|
}
|
||
|
|