214 lines
6.7 KiB
Diff
214 lines
6.7 KiB
Diff
# HG changeset patch
|
|
# User Keir Fraser <keir.fraser@citrix.com>
|
|
# Date 1221141982 -3600
|
|
# Node ID 4ffc70556000869d3c301452a99e4e524dd54b07
|
|
# Parent fba8dca321c2b99842af6624f24afb77c472184b
|
|
x86: Support CPU hotplug offline.
|
|
|
|
Signed-off-by: Shan Haitao <haitao.shan@intel.com>
|
|
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
|
|
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
|
|
@@ -737,9 +737,12 @@ __initcall(setup_dump_irqs);
|
|
|
|
void fixup_irqs(cpumask_t map)
|
|
{
|
|
- unsigned int irq;
|
|
+ unsigned int irq, sp;
|
|
static int warned;
|
|
+ irq_guest_action_t *action;
|
|
+ struct pending_eoi *peoi;
|
|
|
|
+ /* Direct all future interrupts away from this CPU. */
|
|
for ( irq = 0; irq < NR_IRQS; irq++ )
|
|
{
|
|
cpumask_t mask;
|
|
@@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
|
|
printk("Cannot set affinity for irq %i\n", irq);
|
|
}
|
|
|
|
+ /* Service any interrupts that beat us in the re-direction race. */
|
|
local_irq_enable();
|
|
mdelay(1);
|
|
local_irq_disable();
|
|
+
|
|
+ /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
|
|
+ for ( irq = 0; irq < NR_IRQS; irq++ )
|
|
+ {
|
|
+ if ( !(irq_desc[irq].status & IRQ_GUEST) )
|
|
+ continue;
|
|
+ action = (irq_guest_action_t *)irq_desc[irq].action;
|
|
+ cpu_clear(smp_processor_id(), action->cpu_eoi_map);
|
|
+ }
|
|
+
|
|
+ /* Flush the interrupt EOI stack. */
|
|
+ peoi = this_cpu(pending_eoi);
|
|
+ for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
|
|
+ peoi[sp].ready = 1;
|
|
+ flush_ready_eoi(NULL);
|
|
}
|
|
#endif
|
|
Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c
|
|
+++ xen-3.3.1-testing/xen/arch/x86/smpboot.c
|
|
@@ -1225,15 +1225,6 @@ int __cpu_disable(void)
|
|
if (cpu == 0)
|
|
return -EBUSY;
|
|
|
|
- /*
|
|
- * Only S3 is using this path, and thus idle vcpus are running on all
|
|
- * APs when we are called. To support full cpu hotplug, other
|
|
- * notification mechanisms should be introduced (e.g., migrate vcpus
|
|
- * off this physical cpu before rendezvous point).
|
|
- */
|
|
- if (!is_idle_vcpu(current))
|
|
- return -EINVAL;
|
|
-
|
|
local_irq_disable();
|
|
clear_local_APIC();
|
|
/* Allow any queued timer interrupts to get serviced */
|
|
@@ -1249,6 +1240,9 @@ int __cpu_disable(void)
|
|
fixup_irqs(map);
|
|
/* It's now safe to remove this processor from the online map */
|
|
cpu_clear(cpu, cpu_online_map);
|
|
+
|
|
+ cpu_disable_scheduler();
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
|
|
return __cpu_disable();
|
|
}
|
|
|
|
-/*
|
|
- * XXX: One important thing missed here is to migrate vcpus
|
|
- * from dead cpu to other online ones and then put whole
|
|
- * system into a stop state. It assures a safe environment
|
|
- * for a cpu hotplug/remove at normal running state.
|
|
- *
|
|
- * However for xen PM case, at this point:
|
|
- * -> All other domains should be notified with PM event,
|
|
- * and then in following states:
|
|
- * * Suspend state, or
|
|
- * * Paused state, which is a force step to all
|
|
- * domains if they do nothing to suspend
|
|
- * -> All vcpus of dom0 (except vcpu0) have already beem
|
|
- * hot removed
|
|
- * with the net effect that all other cpus only have idle vcpu
|
|
- * running. In this special case, we can avoid vcpu migration
|
|
- * then and system can be considered in a stop state.
|
|
- *
|
|
- * So current cpu hotplug is a special version for PM specific
|
|
- * usage, and need more effort later for full cpu hotplug.
|
|
- * (ktian1)
|
|
- */
|
|
int cpu_down(unsigned int cpu)
|
|
{
|
|
int err = 0;
|
|
@@ -1307,6 +1279,12 @@ int cpu_down(unsigned int cpu)
|
|
goto out;
|
|
}
|
|
|
|
+ /* Can not offline BSP */
|
|
+ if (cpu == 0) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
if (!cpu_online(cpu)) {
|
|
err = -EINVAL;
|
|
goto out;
|
|
Index: xen-3.3.1-testing/xen/common/sched_credit.c
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/common/sched_credit.c
|
|
+++ xen-3.3.1-testing/xen/common/sched_credit.c
|
|
@@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
|
|
|
|
BUG_ON( cpu != snext->vcpu->processor );
|
|
|
|
+ /* If this CPU is going offline we shouldn't steal work. */
|
|
+ if ( unlikely(!cpu_online(cpu)) )
|
|
+ goto out;
|
|
+
|
|
if ( snext->pri == CSCHED_PRI_IDLE )
|
|
CSCHED_STAT_CRANK(load_balance_idle);
|
|
else if ( snext->pri == CSCHED_PRI_TS_OVER )
|
|
@@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
|
|
return speer;
|
|
}
|
|
|
|
+ out:
|
|
/* Failed to find more important work elsewhere... */
|
|
__runq_remove(snext);
|
|
return snext;
|
|
Index: xen-3.3.1-testing/xen/common/schedule.c
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/common/schedule.c
|
|
+++ xen-3.3.1-testing/xen/common/schedule.c
|
|
@@ -268,6 +268,48 @@ void vcpu_force_reschedule(struct vcpu *
|
|
}
|
|
}
|
|
|
|
+/*
|
|
+ * This function is used by cpu_hotplug code from stop_machine context.
|
|
+ * Hence we can avoid needing to take the
|
|
+ */
|
|
+void cpu_disable_scheduler(void)
|
|
+{
|
|
+ struct domain *d;
|
|
+ struct vcpu *v;
|
|
+ unsigned int cpu = smp_processor_id();
|
|
+
|
|
+ for_each_domain ( d )
|
|
+ {
|
|
+ for_each_vcpu ( d, v )
|
|
+ {
|
|
+ if ( is_idle_vcpu(v) )
|
|
+ continue;
|
|
+
|
|
+ if ( (cpus_weight(v->cpu_affinity) == 1) &&
|
|
+ cpu_isset(cpu, v->cpu_affinity) )
|
|
+ {
|
|
+ printk("Breaking vcpu affinity for domain %d vcpu %d\n",
|
|
+ v->domain->domain_id, v->vcpu_id);
|
|
+ cpus_setall(v->cpu_affinity);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Migrate single-shot timers to CPU0. A new cpu will automatically
|
|
+ * be chosen when the timer is next re-set.
|
|
+ */
|
|
+ if ( v->singleshot_timer.cpu == cpu )
|
|
+ migrate_timer(&v->singleshot_timer, 0);
|
|
+
|
|
+ if ( v->processor == cpu )
|
|
+ {
|
|
+ set_bit(_VPF_migrating, &v->pause_flags);
|
|
+ vcpu_sleep_nosync(v);
|
|
+ vcpu_migrate(v);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
static int __vcpu_set_affinity(
|
|
struct vcpu *v, cpumask_t *affinity,
|
|
bool_t old_lock_status, bool_t new_lock_status)
|
|
Index: xen-3.3.1-testing/xen/include/xen/sched.h
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/include/xen/sched.h
|
|
+++ xen-3.3.1-testing/xen/include/xen/sched.h
|
|
@@ -531,6 +531,7 @@ void domain_unpause_by_systemcontroller(
|
|
void cpu_init(void);
|
|
|
|
void vcpu_force_reschedule(struct vcpu *v);
|
|
+void cpu_disable_scheduler(void);
|
|
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
|
|
int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
|
|
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
|