# HG changeset patch # User Keir Fraser # Date 1221141982 -3600 # Node ID 4ffc70556000869d3c301452a99e4e524dd54b07 # Parent fba8dca321c2b99842af6624f24afb77c472184b x86: Support CPU hotplug offline. Signed-off-by: Shan Haitao Signed-off-by: Keir Fraser Index: xen-3.3.1-testing/xen/arch/x86/irq.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/irq.c +++ xen-3.3.1-testing/xen/arch/x86/irq.c @@ -751,9 +751,12 @@ __initcall(setup_dump_irqs); void fixup_irqs(cpumask_t map) { - unsigned int irq; + unsigned int irq, sp; static int warned; + irq_guest_action_t *action; + struct pending_eoi *peoi; + /* Direct all future interrupts away from this CPU. */ for ( irq = 0; irq < NR_IRQS; irq++ ) { cpumask_t mask; @@ -772,8 +775,24 @@ void fixup_irqs(cpumask_t map) printk("Cannot set affinity for irq %i\n", irq); } + /* Service any interrupts that beat us in the re-direction race. */ local_irq_enable(); mdelay(1); local_irq_disable(); + + /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */ + for ( irq = 0; irq < NR_IRQS; irq++ ) + { + if ( !(irq_desc[irq].status & IRQ_GUEST) ) + continue; + action = (irq_guest_action_t *)irq_desc[irq].action; + cpu_clear(smp_processor_id(), action->cpu_eoi_map); + } + + /* Flush the interrupt EOI stack. */ + peoi = this_cpu(pending_eoi); + for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ ) + peoi[sp].ready = 1; + flush_ready_eoi(NULL); } #endif Index: xen-3.3.1-testing/xen/arch/x86/smpboot.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/smpboot.c +++ xen-3.3.1-testing/xen/arch/x86/smpboot.c @@ -1224,15 +1224,6 @@ int __cpu_disable(void) if (cpu == 0) return -EBUSY; - /* - * Only S3 is using this path, and thus idle vcpus are running on all - * APs when we are called. To support full cpu hotplug, other - * notification mechanisms should be introduced (e.g., migrate vcpus - * off this physical cpu before rendezvous point). - */ - if (!is_idle_vcpu(current)) - return -EINVAL; - local_irq_disable(); clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ @@ -1248,6 +1239,9 @@ int __cpu_disable(void) fixup_irqs(map); /* It's now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); + + cpu_disable_scheduler(); + return 0; } @@ -1274,28 +1268,6 @@ static int take_cpu_down(void *unused) return __cpu_disable(); } -/* - * XXX: One important thing missed here is to migrate vcpus - * from dead cpu to other online ones and then put whole - * system into a stop state. It assures a safe environment - * for a cpu hotplug/remove at normal running state. - * - * However for xen PM case, at this point: - * -> All other domains should be notified with PM event, - * and then in following states: - * * Suspend state, or - * * Paused state, which is a force step to all - * domains if they do nothing to suspend - * -> All vcpus of dom0 (except vcpu0) have already beem - * hot removed - * with the net effect that all other cpus only have idle vcpu - * running. In this special case, we can avoid vcpu migration - * then and system can be considered in a stop state. - * - * So current cpu hotplug is a special version for PM specific - * usage, and need more effort later for full cpu hotplug. - * (ktian1) - */ int cpu_down(unsigned int cpu) { int err = 0; @@ -1306,6 +1278,12 @@ int cpu_down(unsigned int cpu) goto out; } + /* Can not offline BSP */ + if (cpu == 0) { + err = -EINVAL; + goto out; + } + if (!cpu_online(cpu)) { err = -EINVAL; goto out; Index: xen-3.3.1-testing/xen/common/sched_credit.c =================================================================== --- xen-3.3.1-testing.orig/xen/common/sched_credit.c +++ xen-3.3.1-testing/xen/common/sched_credit.c @@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch BUG_ON( cpu != snext->vcpu->processor ); + /* If this CPU is going offline we shouldn't steal work. */ + if ( unlikely(!cpu_online(cpu)) ) + goto out; + if ( snext->pri == CSCHED_PRI_IDLE ) CSCHED_STAT_CRANK(load_balance_idle); else if ( snext->pri == CSCHED_PRI_TS_OVER ) @@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch return speer; } + out: /* Failed to find more important work elsewhere... */ __runq_remove(snext); return snext; Index: xen-3.3.1-testing/xen/common/schedule.c =================================================================== --- xen-3.3.1-testing.orig/xen/common/schedule.c +++ xen-3.3.1-testing/xen/common/schedule.c @@ -268,6 +268,48 @@ void vcpu_force_reschedule(struct vcpu * } } +/* + * This function is used by cpu_hotplug code from stop_machine context. + * Hence we can avoid needing to take the + */ +void cpu_disable_scheduler(void) +{ + struct domain *d; + struct vcpu *v; + unsigned int cpu = smp_processor_id(); + + for_each_domain ( d ) + { + for_each_vcpu ( d, v ) + { + if ( is_idle_vcpu(v) ) + continue; + + if ( (cpus_weight(v->cpu_affinity) == 1) && + cpu_isset(cpu, v->cpu_affinity) ) + { + printk("Breaking vcpu affinity for domain %d vcpu %d\n", + v->domain->domain_id, v->vcpu_id); + cpus_setall(v->cpu_affinity); + } + + /* + * Migrate single-shot timers to CPU0. A new cpu will automatically + * be chosen when the timer is next re-set. + */ + if ( v->singleshot_timer.cpu == cpu ) + migrate_timer(&v->singleshot_timer, 0); + + if ( v->processor == cpu ) + { + set_bit(_VPF_migrating, &v->pause_flags); + vcpu_sleep_nosync(v); + vcpu_migrate(v); + } + } + } +} + static int __vcpu_set_affinity( struct vcpu *v, cpumask_t *affinity, bool_t old_lock_status, bool_t new_lock_status) Index: xen-3.3.1-testing/xen/include/xen/sched.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/xen/sched.h +++ xen-3.3.1-testing/xen/include/xen/sched.h @@ -531,6 +531,7 @@ void domain_unpause_by_systemcontroller( void cpu_init(void); void vcpu_force_reschedule(struct vcpu *v); +void cpu_disable_scheduler(void); int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity); int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity); void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);