xen/cpupools-core.patch

3350 lines
103 KiB
Diff
Raw Normal View History

From: Juergen Gross <juergen.gross@ts.fujitsu.com>
Index: xen-4.0.1-testing/xen/arch/x86/acpi/power.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/acpi/power.c
+++ xen-4.0.1-testing/xen/arch/x86/acpi/power.c
@@ -234,7 +234,7 @@ static int enter_state(u32 state)
return error;
}
-static long enter_state_helper(void *data)
+static long enter_state_helper(void *hdl, void *data)
{
struct acpi_sleep_info *sinfo = (struct acpi_sleep_info *)data;
return enter_state(sinfo->sleep_state);
@@ -265,7 +265,7 @@ int acpi_enter_sleep(struct xenpf_enter_
acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val;
acpi_sinfo.sleep_state = sleep->sleep_state;
- return continue_hypercall_on_cpu(0, enter_state_helper, &acpi_sinfo);
+ return continue_hypercall_on_cpu(0, NULL, enter_state_helper, &acpi_sinfo);
}
static int acpi_get_wake_status(void)
Index: xen-4.0.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/domain.c
+++ xen-4.0.1-testing/xen/arch/x86/domain.c
@@ -1522,42 +1522,52 @@ void sync_vcpu_execstate(struct vcpu *v)
}
struct migrate_info {
- long (*func)(void *data);
+ struct tasklet tasklet;
+ long (*func)(void *hdl, void *data);
void *data;
void (*saved_schedule_tail)(struct vcpu *);
- cpumask_t saved_affinity;
- unsigned int nest;
+ volatile int nest;
+ long ret;
+ struct vcpu *v;
};
static void continue_hypercall_on_cpu_helper(struct vcpu *v)
{
struct cpu_user_regs *regs = guest_cpu_user_regs();
struct migrate_info *info = v->arch.continue_info;
- cpumask_t mask = info->saved_affinity;
void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail;
- regs->eax = info->func(info->data);
+ regs->eax = info->ret;
- if ( info->nest-- == 0 )
- {
- xfree(info);
- v->arch.schedule_tail = saved_schedule_tail;
- v->arch.continue_info = NULL;
- vcpu_unlock_affinity(v, &mask);
- }
+ tasklet_kill(&info->tasklet);
+ xfree(info);
+ v->arch.schedule_tail = saved_schedule_tail;
+ v->arch.continue_info = NULL;
(*saved_schedule_tail)(v);
}
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data)
+static void continue_hypercall_on_cpu_tasklet(struct migrate_info *info)
+{
+ info->ret = info->func((void *)info, info->data);
+
+ if ( info->nest-- == 0 )
+ vcpu_unpause(info->v);
+
+ return;
+}
+
+int continue_hypercall_on_cpu(int cpu, void *hdl,
+ long (*func)(void *hdl, void *data), void *data)
{
struct vcpu *v = current;
- struct migrate_info *info;
- cpumask_t mask = cpumask_of_cpu(cpu);
- int rc;
+ struct migrate_info *info = (struct migrate_info *)hdl;
if ( cpu == smp_processor_id() )
- return func(data);
+ return func(info, data);
+
+ if ( info != NULL )
+ v = info->v;
info = v->arch.continue_info;
if ( info == NULL )
@@ -1566,16 +1576,12 @@ int continue_hypercall_on_cpu(int cpu, l
if ( info == NULL )
return -ENOMEM;
- rc = vcpu_lock_affinity(v, &mask);
- if ( rc )
- {
- xfree(info);
- return rc;
- }
-
info->saved_schedule_tail = v->arch.schedule_tail;
- info->saved_affinity = mask;
info->nest = 0;
+ info->v = v;
+ tasklet_init(&info->tasklet,
+ (void(*)(unsigned long))continue_hypercall_on_cpu_tasklet,
+ (unsigned long)info);
v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
v->arch.continue_info = info;
@@ -1583,17 +1589,17 @@ int continue_hypercall_on_cpu(int cpu, l
else
{
BUG_ON(info->nest != 0);
- rc = vcpu_locked_change_affinity(v, &mask);
- if ( rc )
- return rc;
info->nest++;
}
info->func = func;
info->data = data;
+ vcpu_pause_nosync(v);
+ tasklet_schedule_cpu(&info->tasklet, cpu);
+ raise_softirq(SCHEDULE_SOFTIRQ);
+
/* Dummy return value will be overwritten by new schedule_tail. */
- BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id())));
return 0;
}
Index: xen-4.0.1-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.0.1-testing/xen/arch/x86/domain_build.c
@@ -9,6 +9,7 @@
#include <xen/lib.h>
#include <xen/ctype.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/event.h>
@@ -84,7 +85,7 @@ integer_param("dom0_max_vcpus", opt_dom0
struct vcpu *__init alloc_dom0_vcpu0(void)
{
if ( opt_dom0_max_vcpus == 0 )
- opt_dom0_max_vcpus = num_online_cpus();
+ opt_dom0_max_vcpus = num_cpupool_cpus(cpupool0);
if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
opt_dom0_max_vcpus = MAX_VIRT_CPUS;
@@ -287,7 +288,7 @@ int __init construct_dom0(
unsigned long _initrd_start, unsigned long initrd_len,
char *cmdline)
{
- int i, rc, compatible, compat32, order, machine;
+ int i, cpu, rc, compatible, compat32, order, machine;
struct cpu_user_regs *regs;
unsigned long pfn, mfn;
unsigned long nr_pages;
@@ -786,8 +787,12 @@ int __init construct_dom0(
printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus);
+ cpu = first_cpu(cpupool0->cpu_valid);
for ( i = 1; i < opt_dom0_max_vcpus; i++ )
- (void)alloc_vcpu(d, i, i % num_online_cpus());
+ {
+ cpu = cycle_cpu(cpu, cpupool0->cpu_valid);
+ (void)alloc_vcpu(d, i, cpu);
+ }
/* Set up CR3 value for write_ptbase */
if ( paging_mode_enabled(d) )
Index: xen-4.0.1-testing/xen/arch/x86/microcode.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/microcode.c
+++ xen-4.0.1-testing/xen/arch/x86/microcode.c
@@ -114,7 +114,7 @@ static int microcode_update_cpu(const vo
return err;
}
-static long do_microcode_update(void *_info)
+static long do_microcode_update(void *hdl, void *_info)
{
struct microcode_info *info = _info;
int error;
@@ -127,7 +127,8 @@ static long do_microcode_update(void *_i
info->cpu = next_cpu(info->cpu, cpu_online_map);
if ( info->cpu < NR_CPUS )
- return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+ return continue_hypercall_on_cpu(info->cpu, hdl,
+ do_microcode_update, info);
error = info->error;
xfree(info);
@@ -160,5 +161,6 @@ int microcode_update(XEN_GUEST_HANDLE(co
info->error = 0;
info->cpu = first_cpu(cpu_online_map);
- return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+ return continue_hypercall_on_cpu(info->cpu, NULL,
+ do_microcode_update, info);
}
Index: xen-4.0.1-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/mm.c
+++ xen-4.0.1-testing/xen/arch/x86/mm.c
@@ -243,7 +243,7 @@ void __init arch_init_memory(void)
* Any Xen-heap pages that we will allow to be mapped will have
* their domain field set to dom_xen.
*/
- dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0);
+ dom_xen = domain_create(DOMID_XEN, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_xen == NULL);
/*
@@ -251,14 +251,14 @@ void __init arch_init_memory(void)
* This domain owns I/O pages that are within the range of the page_info
* array. Mappings occur at the priv of the caller.
*/
- dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0);
+ dom_io = domain_create(DOMID_IO, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_io == NULL);
/*
* Initialise our DOMID_IO domain.
* This domain owns sharable pages.
*/
- dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0);
+ dom_cow = domain_create(DOMID_COW, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_cow == NULL);
/* First 1MB of RAM is historically marked as I/O. */
Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
@@ -19,6 +19,7 @@
#include <xen/iocap.h>
#include <xen/guest_access.h>
#include <xen/acpi.h>
+#include <xen/sched-if.h>
#include <asm/current.h>
#include <public/platform.h>
#include <acpi/cpufreq/processor_perf.h>
@@ -48,12 +49,12 @@ static DEFINE_PER_CPU(uint64_t, freq);
extern int set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
-static long cpu_frequency_change_helper(void *data)
+static long cpu_frequency_change_helper(void *hdl, void *data)
{
return cpu_frequency_change(this_cpu(freq));
}
-static long cpu_down_helper(void *data)
+static long cpu_down_helper(void *hdl, void *data)
{
int cpu = (unsigned long)data;
return cpu_down(cpu);
@@ -314,7 +315,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
if ( op->u.change_freq.flags || !cpu_online(op->u.change_freq.cpu) )
break;
per_cpu(freq, op->u.change_freq.cpu) = op->u.change_freq.freq;
- ret = continue_hypercall_on_cpu(op->u.change_freq.cpu,
+ ret = continue_hypercall_on_cpu(op->u.change_freq.cpu, NULL,
cpu_frequency_change_helper,
NULL);
break;
@@ -406,7 +407,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
g_info = &op->u.pcpu_info;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if ( !spin_trylock(&cpu_add_remove_lock) )
+ if ( !spin_trylock(&cpupool_lock) )
{
ret = -EBUSY;
break;
@@ -429,7 +430,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
g_info->max_present = last_cpu(cpu_present_map);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
ret = copy_to_guest(u_xenpf_op, op, 1) ? -EFAULT : 0;
}
@@ -470,7 +471,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
break;
}
ret = continue_hypercall_on_cpu(
- 0, cpu_down_helper, (void *)(unsigned long)cpu);
+ 0, NULL, cpu_down_helper, (void *)(unsigned long)cpu);
break;
}
break;
Index: xen-4.0.1-testing/xen/arch/x86/setup.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/setup.c
+++ xen-4.0.1-testing/xen/arch/x86/setup.c
@@ -2,6 +2,7 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/serial.h>
#include <xen/softirq.h>
@@ -245,7 +246,7 @@ static void __init init_idle_domain(void
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
- idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
+ idle_domain = domain_create(IDLE_DOMAIN_ID, CPUPOOLID_NONE, 0, 0);
if ( idle_domain == NULL )
BUG();
idle_domain->vcpu = idle_vcpu;
@@ -1122,8 +1123,13 @@ void __init __start_xen(unsigned long mb
if ( !tboot_protect_mem_regions() )
panic("Could not protect TXT memory regions\n");
+ /* Create initial cpupool 0. */
+ cpupool0 = cpupool_create(0, NULL);
+ if ( (cpupool0 == NULL) || cpupool0_cpu_assign(cpupool0) )
+ panic("Error creating cpupool 0\n");
+
/* Create initial domain 0. */
- dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
+ dom0 = domain_create(0, 0, DOMCRF_s3_integrity, DOM0_SSIDREF);
if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
panic("Error creating domain 0\n");
Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.0.1-testing/xen/arch/x86/smpboot.c
@@ -39,6 +39,7 @@
#include <xen/mm.h>
#include <xen/domain.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/softirq.h>
@@ -104,7 +105,6 @@ static void map_cpu_to_logical_apicid(vo
DEFINE_PER_CPU(int, cpu_state) = { 0 };
void *stack_base[NR_CPUS];
-DEFINE_SPINLOCK(cpu_add_remove_lock);
/*
* The bootstrap kernel entry code has set these up. Save them for
@@ -821,7 +821,7 @@ wakeup_secondary_cpu(int phys_apicid, un
extern cpumask_t cpu_initialized;
/*
- * Caller should hold cpu_add_remove_lock if not called when booting
+ * Caller should hold cpupool_lock if not called when booting
*/
int alloc_cpu_id(void)
{
@@ -1306,10 +1306,11 @@ int __cpu_disable(void)
__sync_lazy_execstate();
/* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpupool0->cpu_valid);
cpu_clear(cpu, cpu_online_map);
fixup_irqs();
- cpu_disable_scheduler();
+ cpu_disable_scheduler(cpu, 0);
return 0;
}
@@ -1343,10 +1344,10 @@ int cpu_down(unsigned int cpu)
int err = 0;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
- if (num_online_cpus() == 1) {
+ if ((!cpu_isset(cpu, cpupool0->cpu_valid)) || (cpus_weight(cpupool0->cpu_valid) == 1)) {
err = -EBUSY;
goto out;
}
@@ -1379,7 +1380,7 @@ int cpu_down(unsigned int cpu)
out:
if (!err)
send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return err;
}
@@ -1388,7 +1389,7 @@ int cpu_up(unsigned int cpu)
int err = 0;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
if (cpu_online(cpu)) {
@@ -1406,7 +1407,7 @@ int cpu_up(unsigned int cpu)
out:
if (!err)
send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return err;
}
@@ -1492,14 +1493,14 @@ int cpu_add(uint32_t apic_id, uint32_t a
return -EEXIST;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
cpu = mp_register_lapic(apic_id, 1);
if (cpu < 0)
{
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return cpu;
}
@@ -1516,7 +1517,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
"Setup node failed for pxm %x\n", pxm);
x86_acpiid_to_apicid[acpi_id] = 0xff;
mp_unregister_lapic(apic_id, cpu);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return node;
}
apicid_to_node[apic_id] = node;
@@ -1524,7 +1525,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
srat_detect_node(cpu);
numa_add_cpu(cpu);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu);
return cpu;
}
@@ -1568,6 +1569,7 @@ int __devinit __cpu_up(unsigned int cpu)
process_pending_softirqs();
}
+ cpupool_cpu_add(cpu);
cpufreq_add_cpu(cpu);
return 0;
}
Index: xen-4.0.1-testing/xen/arch/x86/sysctl.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/sysctl.c
+++ xen-4.0.1-testing/xen/arch/x86/sysctl.c
@@ -29,7 +29,7 @@
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
-static long cpu_down_helper(void *data)
+static long cpu_down_helper(void *hdl, void *data)
{
int cpu = (unsigned long)data;
return cpu_down(cpu);
@@ -122,7 +122,7 @@ long arch_do_sysctl(
break;
case XEN_SYSCTL_CPU_HOTPLUG_OFFLINE:
ret = continue_hypercall_on_cpu(
- 0, cpu_down_helper, (void *)(unsigned long)cpu);
+ 0, NULL, cpu_down_helper, (void *)(unsigned long)cpu);
break;
case XEN_SYSCTL_CPU_HOTPLUG_STATUS:
ret = 0;
Index: xen-4.0.1-testing/xen/common/Makefile
===================================================================
--- xen-4.0.1-testing.orig/xen/common/Makefile
+++ xen-4.0.1-testing/xen/common/Makefile
@@ -1,5 +1,6 @@
obj-y += bitmap.o
obj-y += cpu.o
+obj-y += cpupool.o
obj-y += domctl.o
obj-y += domain.o
obj-y += event_channel.o
Index: xen-4.0.1-testing/xen/common/cpupool.c
===================================================================
--- /dev/null
+++ xen-4.0.1-testing/xen/common/cpupool.c
@@ -0,0 +1,585 @@
+/******************************************************************************
+ * cpupool.c
+ *
+ * Generic cpupool-handling functions.
+ *
+ * Cpupools are a feature to have configurable scheduling domains. Each
+ * cpupool runs an own scheduler on a dedicated set of physical cpus.
+ * A domain is bound to one cpupool at any time, but it can be moved to
+ * another cpupool.
+ *
+ * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
+ */
+
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpumask.h>
+#include <xen/percpu.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+
+#define for_each_cpupool(ptr) \
+ for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
+
+struct cpupool *cpupool0; /* Initial cpupool with Dom0 */
+cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */
+
+static struct cpupool *cpupool_list; /* linked list, sorted by poolid */
+
+static int cpupool0_max_cpus;
+integer_param("pool0_max_cpus", cpupool0_max_cpus);
+
+static int cpupool_moving_cpu = -1;
+static struct cpupool *cpupool_cpu_moving = NULL;
+
+/* cpupool lock: be carefull, this lock is sometimes released on another cpu
+ * as it was obtained!
+ */
+DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
+static struct cpupool *alloc_cpupool_struct(void)
+{
+ return xmalloc(struct cpupool);
+}
+
+static void free_cpupool_struct(struct cpupool *c)
+{
+ xfree(c);
+}
+
+/*
+ * find a cpupool by it's id. to be called with cpupool lock held
+ * if exact is not specified, the first cpupool with an id larger or equal to
+ * the searched id is returned
+ * returns NULL if not found.
+ */
+static struct cpupool *cpupool_find_by_id(int id, int exact)
+{
+ struct cpupool **q;
+
+ for_each_cpupool(q)
+ {
+ if ( (*q)->cpupool_id == id )
+ return *q;
+ if ( (*q)->cpupool_id > id )
+ break;
+ }
+ return exact ? NULL : *q;
+}
+
+/*
+ * create a new cpupool with specified poolid and scheduler
+ * returns pointer to new cpupool structure if okay, NULL else
+ * possible failures:
+ * - no memory
+ * - poolid already used
+ * - unknown scheduler
+ */
+struct cpupool *cpupool_create(int poolid, char *sched)
+{
+ struct cpupool *c;
+ struct cpupool **q;
+ int last = 0;
+
+ if ( (c = alloc_cpupool_struct()) == NULL )
+ return NULL;
+ memset(c, 0, sizeof(*c));
+
+ printk(XENLOG_DEBUG "cpupool_create(pool=%d,sched=%s)\n", poolid, sched);
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ {
+ last = (*q)->cpupool_id;
+ if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
+ break;
+ }
+ if ( *q != NULL )
+ {
+ if ( (*q)->cpupool_id == poolid )
+ {
+ spin_unlock(&cpupool_lock);
+ free_cpupool_struct(c);
+ return NULL;
+ }
+ c->next = *q;
+ }
+ *q = c;
+ c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
+ if ( schedule_init_global(sched, &(c->sched)) )
+ {
+ spin_unlock(&cpupool_lock);
+ cpupool_destroy(c);
+ return NULL;
+ }
+ spin_unlock(&cpupool_lock);
+
+ printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id,
+ c->sched.name, c->sched.opt_name);
+
+ return c;
+}
+/*
+ * destroys the given cpupool
+ * returns 0 on success, 1 else
+ * possible failures:
+ * - pool still in use
+ * - cpus still assigned to pool
+ * - pool not in list
+ */
+int cpupool_destroy(struct cpupool *c)
+{
+ struct cpupool **q;
+
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ if ( *q == c )
+ break;
+ if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) )
+ {
+ spin_unlock(&cpupool_lock);
+ return 1;
+ }
+ *q = c->next;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_destroy(pool=%d)\n", c->cpupool_id);
+ schedule_deinit_global(&(c->sched));
+ free_cpupool_struct(c);
+ return 0;
+}
+
+/*
+ * assign a specific cpu to a cpupool
+ * cpupool_lock must be held
+ */
+static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+ if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
+ return -EBUSY;
+ per_cpu(cpupool, cpu) = c;
+ schedule_cpu_switch(cpu, c);
+ cpu_clear(cpu, cpupool_free_cpus);
+ if (cpupool_moving_cpu == cpu)
+ {
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ cpu_set(cpu, c->cpu_valid);
+ return 0;
+}
+
+/*
+ * assign free physical cpus to a cpupool
+ * cpus assigned are unused cpus with lowest possible ids
+ * returns the number of cpus assigned
+ */
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu)
+{
+ int i;
+ int n;
+
+ n = 0;
+ spin_lock(&cpupool_lock);
+ for_each_cpu_mask(i, cpupool_free_cpus)
+ {
+ if ( cpupool_assign_cpu_locked(c, i) == 0 )
+ n++;
+ if ( n == ncpu )
+ break;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_ncpu(pool=%d,ncpu=%d) rc %d\n",
+ c->cpupool_id, ncpu, n);
+ return n;
+}
+
+static long cpupool_unassign_cpu_helper(void *hdl, void *info)
+{
+ struct cpupool *c = (struct cpupool *)info;
+ int cpu = cpupool_moving_cpu;
+ long ret;
+ int cpupool_id = c->cpupool_id;
+
+ ret = cpu_disable_scheduler(cpu, 1);
+ cpu_set(cpu, cpupool_free_cpus);
+ if ( !ret )
+ {
+ schedule_cpu_switch(cpu, NULL);
+ per_cpu(cpupool, cpu) = NULL;
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * unassign a specific cpu from a cpupool
+ * we must be sure not to run on the cpu to be unassigned! to achieve this
+ * the main functionality is performed via continue_hypercall_on_cpu on a
+ * specific cpu.
+ * if the cpu to be removed is the last one of the cpupool no active domain
+ * must be bound to the cpupool. dying domains are moved to cpupool0 as they
+ * might be zombies.
+ * possible failures:
+ * - last cpu and still active domains in cpupool
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+ int work_cpu;
+ int ret;
+ struct domain *d;
+ int cpupool_id = c->cpupool_id;
+
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
+ cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ ret = -EBUSY;
+ if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) )
+ goto out;
+
+ ret = 0;
+ if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) )
+ goto out;
+
+ if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) &&
+ (cpu != cpupool_moving_cpu) )
+ {
+ for_each_domain(d)
+ {
+ if ( d->cpupool != c )
+ continue;
+ if ( !d->is_dying )
+ {
+ ret = -EBUSY;
+ break;
+ }
+ c->n_dom--;
+ ret = sched_move_domain(d, cpupool0);
+ if ( ret )
+ {
+ c->n_dom++;
+ break;
+ }
+ cpupool0->n_dom++;
+ }
+ if ( ret )
+ goto out;
+ }
+ cpupool_moving_cpu = cpu;
+ cpupool_cpu_moving = c;
+ cpu_clear(cpu, c->cpu_valid);
+ work_cpu = smp_processor_id();
+ if ( work_cpu == cpu )
+ {
+ work_cpu = first_cpu(cpupool0->cpu_valid);
+ if ( work_cpu == cpu )
+ work_cpu = next_cpu(cpu, cpupool0->cpu_valid);
+ }
+ return continue_hypercall_on_cpu(work_cpu, NULL,
+ cpupool_unassign_cpu_helper, c);
+
+out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * assign cpus to the default cpupool
+ * default are all cpus, less cpus may be specified as boot parameter
+ * possible failures:
+ * - no cpu assigned
+ */
+int __init cpupool0_cpu_assign(struct cpupool *c)
+{
+ if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) )
+ cpupool0_max_cpus = num_online_cpus();
+ if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) )
+ return 1;
+ return 0;
+}
+
+/*
+ * add a new domain to a cpupool
+ * possible failures:
+ * - pool does not exist
+ * - no cpu assigned to pool
+ */
+int cpupool_add_domain(struct domain *d, int poolid)
+{
+ struct cpupool *c;
+ int rc = 1;
+ int n_dom;
+
+ if ( poolid == CPUPOOLID_NONE )
+ return 0;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(poolid, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ c->n_dom++;
+ n_dom = c->n_dom;
+ d->cpupool = c;
+ rc = 0;
+ }
+ spin_unlock(&cpupool_lock);
+ if (!rc)
+ printk(XENLOG_DEBUG "cpupool_add_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, poolid, n_dom);
+ return rc;
+}
+
+/*
+ * remove a domain from a cpupool
+ */
+void cpupool_rm_domain(struct domain *d)
+{
+ int cpupool_id;
+ int n_dom;
+
+ if ( d->cpupool == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpupool_id = d->cpupool->cpupool_id;
+ d->cpupool->n_dom--;
+ n_dom = d->cpupool->n_dom;
+ d->cpupool = NULL;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, cpupool_id, n_dom);
+ return;
+}
+
+/*
+ * called to add a new cpu to pool admin
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
+ */
+void cpupool_cpu_add(unsigned int cpu)
+{
+ if ( cpupool0 == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpu_set(cpu, cpupool_free_cpus);
+ (void)cpupool_assign_cpu_locked(cpupool0, cpu);
+ spin_unlock(&cpupool_lock);
+ return;
+}
+
+/*
+ * do cpupool related sysctl operations
+ */
+int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
+{
+ int ret;
+ struct cpupool *c;
+
+ switch ( op->op )
+ {
+
+ case XEN_SYSCTL_CPUPOOL_OP_CREATE:
+ {
+ int poolid;
+ struct scheduler *sched;
+
+ poolid = (op->cpupool_id == XEN_SYSCTL_CPUPOOL_PAR_ANY) ?
+ CPUPOOLID_NONE: op->cpupool_id;
+ sched = scheduler_get_by_id(op->sched_id);
+ ret = -ENOENT;
+ if ( sched == NULL )
+ break;
+ ret = 0;
+ c = cpupool_create(poolid, sched->opt_name);
+ if ( c == NULL )
+ ret = -EINVAL;
+ else
+ op->cpupool_id = c->cpupool_id;
+ }
+ break;
+
+ case XEN_SYSCTL_CPUPOOL_OP_DESTROY:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0;
+ }
+ break;
+
+ case XEN_SYSCTL_CPUPOOL_OP_INFO:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ op->cpupool_id = c->cpupool_id;
+ op->sched_id = c->sched.sched_id;
+ op->n_dom = c->n_dom;
+ cpumask_to_xenctl_cpumap(&(op->cpumap), &(c->cpu_valid));
+ ret = 0;
+ }
+ break;
+
+ case XEN_SYSCTL_CPUPOOL_OP_ADDCPU:
+ {
+ unsigned cpu;
+
+ cpu = op->cpu;
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d)\n",
+ op->cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
+ cpu = first_cpu(cpupool_free_cpus);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ goto addcpu_out;
+ ret = -EBUSY;
+ if ( !cpu_isset(cpu, cpupool_free_cpus) )
+ goto addcpu_out;
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ ret = -ENOENT;
+ if ( c == NULL )
+ goto addcpu_out;
+ ret = cpupool_assign_cpu_locked(c, cpu);
+addcpu_out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
+ op->cpupool_id, cpu, ret);
+ }
+ break;
+
+ case XEN_SYSCTL_CPUPOOL_OP_RMCPU:
+ {
+ unsigned cpu;
+
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ cpu = op->cpu;
+ if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
+ cpu = last_cpu(c->cpu_valid);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ break;
+ /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and
+ * will continue after the local return
+ */
+ ret = cpupool_unassign_cpu(c, cpu);
+ }
+ break;
+
+ case XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN:
+ {
+ struct domain *d;
+
+ ret = -EINVAL;
+ if ( op->domid == 0 )
+ break;
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(op->domid);
+ if ( d == NULL )
+ break;
+ if ( d->cpupool == NULL )
+ {
+ ret = -EINVAL;
+ rcu_unlock_domain(d);
+ break;
+ }
+ if ( op->cpupool_id == d->cpupool->cpupool_id )
+ {
+ ret = 0;
+ rcu_unlock_domain(d);
+ break;
+ }
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d\n",
+ d->domain_id, op->cpupool_id);
+ ret = -ENOENT;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ d->cpupool->n_dom--;
+ ret = sched_move_domain(d, c);
+ if ( ret )
+ d->cpupool->n_dom++;
+ else
+ c->n_dom++;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d ret %d\n",
+ d->domain_id, op->cpupool_id, ret);
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_SYSCTL_CPUPOOL_OP_FREEINFO:
+ {
+ cpumask_to_xenctl_cpumap(&(op->cpumap),
+ &cpupool_free_cpus);
+ ret = 0;
+ }
+ break;
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
+
+void schedule_dump(struct cpupool *c);
+
+void dump_runq(unsigned char key)
+{
+ unsigned long flags;
+ s_time_t now = NOW();
+ struct cpupool **c;
+
+ spin_lock(&cpupool_lock);
+ local_irq_save(flags);
+
+ printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+
+ printk("Idle cpupool:\n");
+ schedule_dump(NULL);
+
+ for_each_cpupool(c)
+ {
+ printk("Cpupool %d:\n", (*c)->cpupool_id);
+ schedule_dump(*c);
+ }
+
+ local_irq_restore(flags);
+ spin_unlock(&cpupool_lock);
+}
+
+static int __init cpupool_init(void)
+{
+ cpupool_free_cpus = cpu_online_map;
+ cpupool_list = NULL;
+ return 0;
+}
+__initcall(cpupool_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: xen-4.0.1-testing/xen/common/domain.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/domain.c
+++ xen-4.0.1-testing/xen/common/domain.c
@@ -209,7 +209,7 @@ static void __init parse_extra_guest_irq
custom_param("extra_guest_irqs", parse_extra_guest_irqs);
struct domain *domain_create(
- domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
+ domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref)
{
struct domain *d, **pd;
enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
@@ -292,6 +292,9 @@ struct domain *domain_create(
goto fail;
init_status |= INIT_arch;
+ if ( cpupool_add_domain(d, poolid) != 0 )
+ goto fail;
+
if ( sched_init_domain(d) != 0 )
goto fail;
@@ -603,6 +606,8 @@ static void complete_domain_destroy(stru
rangeset_domain_destroy(d);
+ cpupool_rm_domain(d);
+
sched_destroy_domain(d);
/* Free page used by xen oprofile buffer. */
Index: xen-4.0.1-testing/xen/common/domctl.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/domctl.c
+++ xen-4.0.1-testing/xen/common/domctl.c
@@ -11,6 +11,7 @@
#include <xen/lib.h>
#include <xen/mm.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/event.h>
#include <xen/domain_page.h>
@@ -141,10 +142,12 @@ void getdomaininfo(struct domain *d, str
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
BUG_ON(SHARED_M2P(info->shared_info_frame));
+ info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
+
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
}
-static unsigned int default_vcpu0_location(void)
+static unsigned int default_vcpu0_location(cpumask_t *online)
{
struct domain *d;
struct vcpu *v;
@@ -174,7 +177,7 @@ static unsigned int default_vcpu0_locati
if ( cpus_weight(per_cpu(cpu_sibling_map, 0)) > 1 )
cpu = next_cpu(cpu, per_cpu(cpu_sibling_map, 0));
cpu_exclude_map = per_cpu(cpu_sibling_map, 0);
- for_each_online_cpu ( i )
+ for_each_cpu_mask(i, *online)
{
if ( cpu_isset(i, cpu_exclude_map) )
continue;
@@ -389,6 +392,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
domid_t dom;
static domid_t rover = 0;
unsigned int domcr_flags;
+ int pool = 0;
ret = -EINVAL;
if ( supervisor_mode_kernel ||
@@ -432,7 +436,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
domcr_flags |= DOMCRF_oos_off;
ret = -ENOMEM;
- d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref);
+ d = domain_create(dom, pool, domcr_flags, op->u.createdomain.ssidref);
if ( d == NULL )
break;
@@ -451,6 +455,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
{
struct domain *d;
unsigned int i, max = op->u.max_vcpus.max, cpu;
+ cpumask_t *online;
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
@@ -499,6 +504,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
goto maxvcpu_out;
ret = -ENOMEM;
+ online = (d->cpupool == NULL) ? &cpu_online_map : &d->cpupool->cpu_valid;
if ( max > d->max_vcpus )
{
struct vcpu **vcpus;
@@ -522,8 +528,8 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
continue;
cpu = (i == 0) ?
- default_vcpu0_location() :
- cycle_cpu(d->vcpu[i-1]->processor, cpu_online_map);
+ default_vcpu0_location(online) :
+ cycle_cpu(d->vcpu[i-1]->processor, *online);
if ( alloc_vcpu(d, i, cpu) == NULL )
goto maxvcpu_out;
Index: xen-4.0.1-testing/xen/common/kexec.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/kexec.c
+++ xen-4.0.1-testing/xen/common/kexec.c
@@ -235,7 +235,7 @@ void kexec_crash(void)
BUG();
}
-static long kexec_reboot(void *_image)
+static long kexec_reboot(void *hdl, void *_image)
{
xen_kexec_image_t *image = _image;
@@ -584,7 +584,7 @@ static int kexec_exec(XEN_GUEST_HANDLE(v
{
case KEXEC_TYPE_DEFAULT:
image = &kexec_image[base + pos];
- ret = continue_hypercall_on_cpu(0, kexec_reboot, image);
+ ret = continue_hypercall_on_cpu(0, NULL, kexec_reboot, image);
break;
case KEXEC_TYPE_CRASH:
kexec_crash(); /* Does not return */
Index: xen-4.0.1-testing/xen/common/sched_credit.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/sched_credit.c
+++ xen-4.0.1-testing/xen/common/sched_credit.c
@@ -70,11 +70,15 @@
/*
* Useful macros
*/
+#define CSCHED_PRIV(_ops) \
+ ((struct csched_private *)((_ops)->sched_data))
#define CSCHED_PCPU(_c) \
((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+#define CSCHED_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
/*
@@ -160,10 +164,12 @@ struct csched_private {
struct timer master_ticker;
unsigned int master;
cpumask_t idlers;
+ cpumask_t cpus;
uint32_t weight;
uint32_t credit;
int credit_balance;
uint32_t runq_sort;
+ int ticker_active;
};
@@ -171,8 +177,10 @@ struct csched_private {
* Global variables
*/
static struct csched_private csched_priv;
+static struct csched_private *csched_priv0 = NULL;
static void csched_tick(void *_cpu);
+static void csched_acct(void *dummy);
static inline int
__vcpu_on_runq(struct csched_vcpu *svc)
@@ -233,6 +241,7 @@ __runq_tickle(unsigned int cpu, struct c
{
struct csched_vcpu * const cur =
CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
cpumask_t mask;
ASSERT(cur);
@@ -259,14 +268,14 @@ __runq_tickle(unsigned int cpu, struct c
*/
if ( cur->pri > CSCHED_PRI_IDLE )
{
- if ( cpus_empty(csched_priv.idlers) )
+ if ( cpus_empty(prv->idlers) )
{
CSCHED_STAT_CRANK(tickle_idlers_none);
}
else
{
CSCHED_STAT_CRANK(tickle_idlers_some);
- cpus_or(mask, mask, csched_priv.idlers);
+ cpus_or(mask, mask, prv->idlers);
cpus_and(mask, mask, new->vcpu->cpu_affinity);
}
}
@@ -276,40 +285,80 @@ __runq_tickle(unsigned int cpu, struct c
cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
}
-static int
-csched_pcpu_init(int cpu)
+static void
+csched_free_pdata(struct scheduler *ops, void *pcpu, int cpu)
+{
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_pcpu *spc = pcpu;
+ unsigned long flags;
+
+ if ( spc == NULL )
+ return;
+
+ spin_lock_irqsave(&prv->lock, flags);
+
+ prv->credit -= CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus--;
+ cpu_clear(cpu, prv->idlers);
+ cpu_clear(cpu, prv->cpus);
+ if ( (prv->master == cpu) && (prv->ncpus > 0) )
+ {
+ prv->master = first_cpu(prv->cpus);
+ migrate_timer(&prv->master_ticker, prv->master);
+ }
+ kill_timer(&spc->ticker);
+ if ( prv->ncpus == 0 )
+ kill_timer(&prv->master_ticker);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ xfree(spc);
+}
+
+static void *
+csched_alloc_pdata(struct scheduler *ops, int cpu)
{
struct csched_pcpu *spc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
/* Allocate per-PCPU info */
spc = xmalloc(struct csched_pcpu);
if ( spc == NULL )
- return -1;
+ return NULL;
memset(spc, 0, sizeof(*spc));
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
/* Initialize/update system-wide config */
- csched_priv.credit += CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.ncpus <= cpu )
- csched_priv.ncpus = cpu + 1;
- if ( csched_priv.master >= csched_priv.ncpus )
- csched_priv.master = cpu;
+ prv->credit += CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus++;
+ cpu_set(cpu, prv->cpus);
+ if ( (prv->ncpus == 1) && (prv != csched_priv0) )
+ {
+ prv->master = cpu;
+ init_timer( &prv->master_ticker, csched_acct, prv, cpu);
+ prv->ticker_active = 2;
+ }
init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
+
+ if ( prv == csched_priv0 )
+ prv->master = first_cpu(prv->cpus);
+
INIT_LIST_HEAD(&spc->runq);
- spc->runq_sort_last = csched_priv.runq_sort;
+ spc->runq_sort_last = prv->runq_sort;
spc->idle_bias = NR_CPUS - 1;
- per_cpu(schedule_data, cpu).sched_priv = spc;
+ if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
+ per_cpu(schedule_data, cpu).sched_priv = spc;
/* Start off idling... */
BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
- cpu_set(cpu, csched_priv.idlers);
+ cpu_set(cpu, prv->idlers);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
- return 0;
+ return spc;
}
#ifndef NDEBUG
@@ -382,17 +431,19 @@ __csched_vcpu_is_migrateable(struct vcpu
}
static int
-_csched_cpu_pick(struct vcpu *vc, bool_t commit)
+_csched_cpu_pick(struct scheduler *ops, struct vcpu *vc, bool_t commit)
{
cpumask_t cpus;
cpumask_t idlers;
+ cpumask_t *online;
int cpu;
/*
* Pick from online CPUs in VCPU's affinity mask, giving a
* preference to its current processor if it's in there.
*/
- cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+ online = CSCHED_CPUONLINE(vc->domain->cpupool);
+ cpus_and(cpus, *online, vc->cpu_affinity);
cpu = cpu_isset(vc->processor, cpus)
? vc->processor
: cycle_cpu(vc->processor, cpus);
@@ -410,7 +461,7 @@ _csched_cpu_pick(struct vcpu *vc, bool_t
* like run two VCPUs on co-hyperthreads while there are idle cores
* or sockets.
*/
- cpus_and(idlers, cpu_online_map, csched_priv.idlers);
+ cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
cpu_set(cpu, idlers);
cpus_and(cpus, cpus, idlers);
cpu_clear(cpu, cpus);
@@ -456,18 +507,18 @@ _csched_cpu_pick(struct vcpu *vc, bool_t
}
static int
-csched_cpu_pick(struct vcpu *vc)
+csched_cpu_pick(struct scheduler *ops, struct vcpu *vc)
{
- return _csched_cpu_pick(vc, 1);
+ return _csched_cpu_pick(ops, vc, 1);
}
static inline void
-__csched_vcpu_acct_start(struct csched_vcpu *svc)
+__csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
unsigned long flags;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
if ( list_empty(&svc->active_vcpu_elem) )
{
@@ -478,16 +529,17 @@ __csched_vcpu_acct_start(struct csched_v
list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
if ( list_empty(&sdom->active_sdom_elem) )
{
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
- csched_priv.weight += sdom->weight;
+ list_add(&sdom->active_sdom_elem, &prv->active_sdom);
+ prv->weight += sdom->weight;
}
}
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
static inline void
-__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+__csched_vcpu_acct_stop_locked(struct csched_private *prv,
+ struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
@@ -500,16 +552,17 @@ __csched_vcpu_acct_stop_locked(struct cs
list_del_init(&svc->active_vcpu_elem);
if ( list_empty(&sdom->active_vcpu) )
{
- BUG_ON( csched_priv.weight < sdom->weight );
+ BUG_ON( prv->weight < sdom->weight );
list_del_init(&sdom->active_sdom_elem);
- csched_priv.weight -= sdom->weight;
+ prv->weight -= sdom->weight;
}
}
static void
-csched_vcpu_acct(unsigned int cpu)
+csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
{
struct csched_vcpu * const svc = CSCHED_VCPU(current);
+ struct scheduler *ops = per_cpu(scheduler, cpu);
ASSERT( current->processor == cpu );
ASSERT( svc->sdom != NULL );
@@ -538,9 +591,9 @@ csched_vcpu_acct(unsigned int cpu)
*/
if ( list_empty(&svc->active_vcpu_elem) )
{
- __csched_vcpu_acct_start(svc);
+ __csched_vcpu_acct_start(prv, svc);
}
- else if ( _csched_cpu_pick(current, 0) != cpu )
+ else if ( _csched_cpu_pick(ops, current, 0) != cpu )
{
CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
CSCHED_STAT_CRANK(migrate_running);
@@ -549,66 +602,75 @@ csched_vcpu_acct(unsigned int cpu)
}
}
-static int
-csched_vcpu_init(struct vcpu *vc)
+static void *
+csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc, void *dd)
{
- struct domain * const dom = vc->domain;
- struct csched_dom *sdom = CSCHED_DOM(dom);
struct csched_vcpu *svc;
- CSCHED_STAT_CRANK(vcpu_init);
-
/* Allocate per-VCPU info */
svc = xmalloc(struct csched_vcpu);
if ( svc == NULL )
- return -1;
+ return NULL;
memset(svc, 0, sizeof(*svc));
INIT_LIST_HEAD(&svc->runq_elem);
INIT_LIST_HEAD(&svc->active_vcpu_elem);
- svc->sdom = sdom;
+ svc->sdom = dd;
svc->vcpu = vc;
atomic_set(&svc->credit, 0);
svc->flags = 0U;
- svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
+ svc->pri = is_idle_domain(vc->domain) ?
+ CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
CSCHED_VCPU_STATS_RESET(svc);
- vc->sched_priv = svc;
+ CSCHED_STAT_CRANK(vcpu_init);
+ return svc;
+}
- /* Allocate per-PCPU info */
- if ( unlikely(!CSCHED_PCPU(vc->processor)) )
- {
- if ( csched_pcpu_init(vc->processor) != 0 )
- return -1;
- }
+static void
+csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc = vc->sched_priv;
- CSCHED_VCPU_CHECK(vc);
- return 0;
+ if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
+ __runq_insert(vc->processor, svc);
}
static void
-csched_vcpu_destroy(struct vcpu *vc)
+csched_free_vdata(struct scheduler *ops, void *priv)
{
- struct csched_vcpu * const svc = CSCHED_VCPU(vc);
- struct csched_dom * const sdom = svc->sdom;
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_vcpu *svc = priv;
unsigned long flags;
- CSCHED_STAT_CRANK(vcpu_destroy);
-
- BUG_ON( sdom == NULL );
- BUG_ON( !list_empty(&svc->runq_elem) );
+ if ( __vcpu_on_runq(svc) )
+ __runq_remove(svc);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&(prv->lock), flags);
if ( !list_empty(&svc->active_vcpu_elem) )
- __csched_vcpu_acct_stop_locked(svc);
+ __csched_vcpu_acct_stop_locked(prv, svc);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&(prv->lock), flags);
xfree(svc);
}
static void
-csched_vcpu_sleep(struct vcpu *vc)
+csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+ struct csched_dom * const sdom = svc->sdom;
+
+ CSCHED_STAT_CRANK(vcpu_destroy);
+
+ BUG_ON( sdom == NULL );
+ BUG_ON( !list_empty(&svc->runq_elem) );
+
+ csched_free_vdata(ops, svc);
+}
+
+static void
+csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -623,7 +685,7 @@ csched_vcpu_sleep(struct vcpu *vc)
}
static void
-csched_vcpu_wake(struct vcpu *vc)
+csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
const unsigned int cpu = vc->processor;
@@ -679,10 +741,12 @@ csched_vcpu_wake(struct vcpu *vc)
static int
csched_dom_cntl(
+ struct scheduler *ops,
struct domain *d,
struct xen_domctl_scheduler_op *op)
{
struct csched_dom * const sdom = CSCHED_DOM(d);
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
@@ -694,14 +758,14 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
{
- csched_priv.weight -= sdom->weight;
- csched_priv.weight += op->u.credit.weight;
+ prv->weight -= sdom->weight;
+ prv->weight += op->u.credit.weight;
}
sdom->weight = op->u.credit.weight;
}
@@ -709,25 +773,20 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
return 0;
}
-static int
-csched_dom_init(struct domain *dom)
+static void *
+csched_alloc_domdata(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom;
- CSCHED_STAT_CRANK(dom_init);
-
- if ( is_idle_domain(dom) )
- return 0;
-
sdom = xmalloc(struct csched_dom);
if ( sdom == NULL )
- return -ENOMEM;
+ return NULL;
memset(sdom, 0, sizeof(*sdom));
/* Initialize credit and weight */
@@ -737,16 +796,40 @@ csched_dom_init(struct domain *dom)
sdom->dom = dom;
sdom->weight = CSCHED_DEFAULT_WEIGHT;
sdom->cap = 0U;
+
+ return (void *)sdom;
+}
+
+static int
+csched_dom_init(struct scheduler *ops, struct domain *dom)
+{
+ struct csched_dom *sdom;
+
+ CSCHED_STAT_CRANK(dom_init);
+
+ if ( is_idle_domain(dom) )
+ return 0;
+
+ sdom = csched_alloc_domdata(ops, dom);
+ if ( sdom == NULL )
+ return -ENOMEM;
+
dom->sched_priv = sdom;
return 0;
}
static void
-csched_dom_destroy(struct domain *dom)
+csched_free_domdata(struct scheduler *ops, void *data)
+{
+ xfree(data);
+}
+
+static void
+csched_dom_destroy(struct scheduler *ops, struct domain *dom)
{
CSCHED_STAT_CRANK(dom_destroy);
- xfree(CSCHED_DOM(dom));
+ csched_free_domdata(ops, CSCHED_DOM(dom));
}
/*
@@ -757,7 +840,7 @@ csched_dom_destroy(struct domain *dom)
* remember the last UNDER to make the move up operation O(1).
*/
static void
-csched_runq_sort(unsigned int cpu)
+csched_runq_sort(struct csched_private *prv, unsigned int cpu)
{
struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
struct list_head *runq, *elem, *next, *last_under;
@@ -765,7 +848,7 @@ csched_runq_sort(unsigned int cpu)
unsigned long flags;
int sort_epoch;
- sort_epoch = csched_priv.runq_sort;
+ sort_epoch = prv->runq_sort;
if ( sort_epoch == spc->runq_sort_last )
return;
@@ -802,6 +885,7 @@ csched_runq_sort(unsigned int cpu)
static void
csched_acct(void* dummy)
{
+ struct csched_private *prv = dummy;
unsigned long flags;
struct list_head *iter_vcpu, *next_vcpu;
struct list_head *iter_sdom, *next_sdom;
@@ -818,22 +902,22 @@ csched_acct(void* dummy)
int credit;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
- weight_total = csched_priv.weight;
- credit_total = csched_priv.credit;
+ weight_total = prv->weight;
+ credit_total = prv->credit;
/* Converge balance towards 0 when it drops negative */
- if ( csched_priv.credit_balance < 0 )
+ if ( prv->credit_balance < 0 )
{
- credit_total -= csched_priv.credit_balance;
+ credit_total -= prv->credit_balance;
CSCHED_STAT_CRANK(acct_balance);
}
if ( unlikely(weight_total == 0) )
{
- csched_priv.credit_balance = 0;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ prv->credit_balance = 0;
+ spin_unlock_irqrestore(&prv->lock, flags);
CSCHED_STAT_CRANK(acct_no_work);
goto out;
}
@@ -845,7 +929,7 @@ csched_acct(void* dummy)
credit_xtra = 0;
credit_cap = 0U;
- list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+ list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
{
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -865,9 +949,9 @@ csched_acct(void* dummy)
* only when the system-wide credit balance is negative.
*/
credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.credit_balance < 0 )
+ if ( prv->credit_balance < 0 )
{
- credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+ credit_peak += ( ( -prv->credit_balance * sdom->weight) +
(weight_total - 1)
) / weight_total;
}
@@ -909,7 +993,7 @@ csched_acct(void* dummy)
*/
CSCHED_STAT_CRANK(acct_reorder);
list_del(&sdom->active_sdom_elem);
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ list_add(&sdom->active_sdom_elem, &prv->active_sdom);
}
credit_fair = credit_peak;
@@ -975,7 +1059,7 @@ csched_acct(void* dummy)
/* Upper bound on credits means VCPU stops earning */
if ( credit > CSCHED_CREDITS_PER_TSLICE )
{
- __csched_vcpu_acct_stop_locked(svc);
+ __csched_vcpu_acct_stop_locked(prv, svc);
credit = 0;
atomic_set(&svc->credit, credit);
}
@@ -987,15 +1071,15 @@ csched_acct(void* dummy)
}
}
- csched_priv.credit_balance = credit_balance;
+ prv->credit_balance = credit_balance;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
/* Inform each CPU that its runq needs to be sorted */
- csched_priv.runq_sort++;
+ prv->runq_sort++;
out:
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &prv->master_ticker, NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
}
@@ -1004,6 +1088,7 @@ csched_tick(void *_cpu)
{
unsigned int cpu = (unsigned long)_cpu;
struct csched_pcpu *spc = CSCHED_PCPU(cpu);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
spc->tick++;
@@ -1011,7 +1096,7 @@ csched_tick(void *_cpu)
* Accounting for running VCPU
*/
if ( !is_idle_vcpu(current) )
- csched_vcpu_acct(cpu);
+ csched_vcpu_acct(prv, cpu);
/*
* Check if runq needs to be sorted
@@ -1020,7 +1105,7 @@ csched_tick(void *_cpu)
* modified priorities. This is a special O(n) sort and runs at most
* once per accounting period (currently 30 milliseconds).
*/
- csched_runq_sort(cpu);
+ csched_runq_sort(prv, cpu);
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
@@ -1073,16 +1158,19 @@ csched_runq_steal(int peer_cpu, int cpu,
}
static struct csched_vcpu *
-csched_load_balance(int cpu, struct csched_vcpu *snext)
+csched_load_balance(struct csched_private *prv, int cpu,
+ struct csched_vcpu *snext)
{
struct csched_vcpu *speer;
cpumask_t workers;
+ cpumask_t *online;
int peer_cpu;
BUG_ON( cpu != snext->vcpu->processor );
+ online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
/* If this CPU is going offline we shouldn't steal work. */
- if ( unlikely(!cpu_online(cpu)) )
+ if ( unlikely(!cpu_isset(cpu, *online)) )
goto out;
if ( snext->pri == CSCHED_PRI_IDLE )
@@ -1096,7 +1184,7 @@ csched_load_balance(int cpu, struct csch
* Peek at non-idling CPUs in the system, starting with our
* immediate neighbour.
*/
- cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
+ cpus_andnot(workers, *online, prv->idlers);
cpu_clear(cpu, workers);
peer_cpu = cpu;
@@ -1138,11 +1226,12 @@ csched_load_balance(int cpu, struct csch
* fast for the common case.
*/
static struct task_slice
-csched_schedule(s_time_t now)
+csched_schedule(struct scheduler *ops, s_time_t now)
{
const int cpu = smp_processor_id();
struct list_head * const runq = RUNQ(cpu);
struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+ struct csched_private *prv = CSCHED_PRIV(ops);
struct csched_vcpu *snext;
struct task_slice ret;
@@ -1177,7 +1266,7 @@ csched_schedule(s_time_t now)
if ( snext->pri > CSCHED_PRI_TS_OVER )
__runq_remove(snext);
else
- snext = csched_load_balance(cpu, snext);
+ snext = csched_load_balance(prv, cpu, snext);
/*
* Update idlers mask if necessary. When we're idling, other CPUs
@@ -1185,12 +1274,12 @@ csched_schedule(s_time_t now)
*/
if ( snext->pri == CSCHED_PRI_IDLE )
{
- if ( !cpu_isset(cpu, csched_priv.idlers) )
- cpu_set(cpu, csched_priv.idlers);
+ if ( !cpu_isset(cpu, prv->idlers) )
+ cpu_set(cpu, prv->idlers);
}
- else if ( cpu_isset(cpu, csched_priv.idlers) )
+ else if ( cpu_isset(cpu, prv->idlers) )
{
- cpu_clear(cpu, csched_priv.idlers);
+ cpu_clear(cpu, prv->idlers);
}
if ( !is_idle_vcpu(snext->vcpu) )
@@ -1237,7 +1326,7 @@ csched_dump_vcpu(struct csched_vcpu *svc
}
static void
-csched_dump_pcpu(int cpu)
+csched_dump_pcpu(struct scheduler *ops, int cpu)
{
struct list_head *runq, *iter;
struct csched_pcpu *spc;
@@ -1275,9 +1364,10 @@ csched_dump_pcpu(int cpu)
}
static void
-csched_dump(void)
+csched_dump(struct scheduler *ops)
{
struct list_head *iter_sdom, *iter_svc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
int loop;
#define idlers_buf keyhandler_scratch
@@ -1294,12 +1384,12 @@ csched_dump(void)
"\tticks per tslice = %d\n"
"\tticks per acct = %d\n"
"\tmigration delay = %uus\n",
- csched_priv.ncpus,
- csched_priv.master,
- csched_priv.credit,
- csched_priv.credit_balance,
- csched_priv.weight,
- csched_priv.runq_sort,
+ prv->ncpus,
+ prv->master,
+ prv->credit,
+ prv->credit_balance,
+ prv->weight,
+ prv->runq_sort,
CSCHED_DEFAULT_WEIGHT,
CSCHED_MSECS_PER_TICK,
CSCHED_CREDITS_PER_MSEC,
@@ -1307,12 +1397,12 @@ csched_dump(void)
CSCHED_TICKS_PER_ACCT,
vcpu_migration_delay);
- cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
+ cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
printk("idlers: %s\n", idlers_buf);
printk("active vcpus:\n");
loop = 0;
- list_for_each( iter_sdom, &csched_priv.active_sdom )
+ list_for_each( iter_sdom, &prv->active_sdom )
{
struct csched_dom *sdom;
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -1329,18 +1419,30 @@ csched_dump(void)
#undef idlers_buf
}
-static void
-csched_init(void)
+static int
+csched_init(struct scheduler *ops)
{
- spin_lock_init(&csched_priv.lock);
- INIT_LIST_HEAD(&csched_priv.active_sdom);
- csched_priv.ncpus = 0;
- csched_priv.master = UINT_MAX;
- cpus_clear(csched_priv.idlers);
- csched_priv.weight = 0U;
- csched_priv.credit = 0U;
- csched_priv.credit_balance = 0;
- csched_priv.runq_sort = 0U;
+ struct csched_private *prv;
+
+ prv = xmalloc(struct csched_private);
+ if ( prv == NULL )
+ return 1;
+ memset(prv, 0, sizeof(*prv));
+ if (csched_priv0 == NULL)
+ csched_priv0 = prv;
+ ops->sched_data = prv;
+ spin_lock_init(&prv->lock);
+ INIT_LIST_HEAD(&prv->active_sdom);
+ prv->ncpus = 0;
+ prv->master = UINT_MAX;
+ cpus_clear(prv->idlers);
+ prv->weight = 0U;
+ prv->credit = 0U;
+ prv->credit_balance = 0;
+ prv->runq_sort = 0U;
+ prv->ticker_active = (csched_priv0 == prv) ? 0 : 1;
+
+ return 0;
}
/* Tickers cannot be kicked until SMP subsystem is alive. */
@@ -1350,54 +1452,81 @@ static __init int csched_start_tickers(v
unsigned int cpu;
/* Is the credit scheduler initialised? */
- if ( csched_priv.ncpus == 0 )
+ if ( (csched_priv0 == NULL) || (csched_priv0->ncpus == 0) )
return 0;
+ csched_priv0->ticker_active = 1;
+
for_each_online_cpu ( cpu )
{
spc = CSCHED_PCPU(cpu);
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
- init_timer( &csched_priv.master_ticker, csched_acct, NULL,
- csched_priv.master);
+ init_timer( &csched_priv0->master_ticker, csched_acct, csched_priv0,
+ csched_priv0->master);
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &csched_priv0->master_ticker, NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
return 0;
}
__initcall(csched_start_tickers);
-static void csched_tick_suspend(void)
+static void
+csched_deinit(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+static void csched_tick_suspend(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
- spc = CSCHED_PCPU(smp_processor_id());
+ spc = CSCHED_PCPU(cpu);
stop_timer(&spc->ticker);
}
-static void csched_tick_resume(void)
+static void csched_tick_resume(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
uint64_t now = NOW();
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( !prv->ticker_active )
+ return;
- spc = CSCHED_PCPU(smp_processor_id());
+
+ spc = CSCHED_PCPU(cpu);
set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
- now % MILLISECS(CSCHED_MSECS_PER_TICK) );
+
+ if ( (prv->ticker_active == 2) && (prv->master == cpu) )
+ {
+ set_timer( &prv->master_ticker, now +
+ MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT -
+ now % MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
+ prv->ticker_active = 1;
+ }
}
-const struct scheduler sched_credit_def = {
+struct scheduler sched_credit_def = {
.name = "SMP Credit Scheduler",
.opt_name = "credit",
.sched_id = XEN_SCHEDULER_CREDIT,
+ .sched_data = &csched_priv,
.init_domain = csched_dom_init,
.destroy_domain = csched_dom_destroy,
- .init_vcpu = csched_vcpu_init,
+ .insert_vcpu = csched_vcpu_insert,
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1411,6 +1540,13 @@ const struct scheduler sched_credit_def
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
+ .deinit = csched_deinit,
+ .alloc_vdata = csched_alloc_vdata,
+ .free_vdata = csched_free_vdata,
+ .alloc_pdata = csched_alloc_pdata,
+ .free_pdata = csched_free_pdata,
+ .alloc_domdata = csched_alloc_domdata,
+ .free_domdata = csched_free_domdata,
.tick_suspend = csched_tick_suspend,
.tick_resume = csched_tick_resume,
Index: xen-4.0.1-testing/xen/common/sched_sedf.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/sched_sedf.c
+++ xen-4.0.1-testing/xen/common/sched_sedf.c
@@ -21,6 +21,9 @@
printk(_a ); \
} while ( 0 )
+#define SEDF_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
+
#ifndef NDEBUG
#define SEDF_STATS
#define CHECK(_p) \
@@ -132,7 +135,7 @@ struct sedf_cpu_info {
#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
-static void sedf_dump_cpu_state(int i);
+static void sedf_dump_cpu_state(struct scheduler *ops, int i);
static inline int extraq_on(struct vcpu *d, int i)
{
@@ -329,30 +332,17 @@ static inline void __add_to_runqueue_sor
}
-static int sedf_init_vcpu(struct vcpu *v)
+static void *sedf_alloc_vdata(struct scheduler *ops, struct vcpu *v, void *dd)
{
struct sedf_vcpu_info *inf;
- if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
- return -1;
- memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+ inf = xmalloc(struct sedf_vcpu_info);
+ if ( inf == NULL )
+ return NULL;
- inf = EDOM_INFO(v);
+ memset(inf, 0, sizeof(struct sedf_vcpu_info));
inf->vcpu = v;
-
- /* Allocate per-CPU context if this is the first domain to be added. */
- if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
- {
- per_cpu(schedule_data, v->processor).sched_priv =
- xmalloc(struct sedf_cpu_info);
- BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL);
- memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor)));
- INIT_LIST_HEAD(WAITQ(v->processor));
- INIT_LIST_HEAD(RUNQ(v->processor));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
- }
-
+
/* Every VCPU gets an equal share of extratime by default. */
inf->deadl_abs = 0;
inf->latency = 0;
@@ -383,39 +373,88 @@ static int sedf_init_vcpu(struct vcpu *v
}
else
{
- EDOM_INFO(v)->deadl_abs = 0;
- EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
+ inf->deadl_abs = 0;
+ inf->status &= ~SEDF_ASLEEP;
}
- return 0;
+ return inf;
+}
+
+static void *
+sedf_alloc_pdata(struct scheduler *ops, int cpu)
+{
+ struct sedf_cpu_info *spc;
+
+ spc = xmalloc(struct sedf_cpu_info);
+ BUG_ON(spc == NULL);
+ memset(spc, 0, sizeof(*spc));
+ INIT_LIST_HEAD(&spc->waitq);
+ INIT_LIST_HEAD(&spc->runnableq);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
+
+ return (void *)spc;
+}
+
+static void
+sedf_free_pdata(struct scheduler *ops, void *spc, int cpu)
+{
+ if ( spc == NULL )
+ return;
+
+ xfree(spc);
+}
+
+static void sedf_free_vdata(struct scheduler *ops, void *priv)
+{
+ xfree(priv);
}
-static void sedf_destroy_vcpu(struct vcpu *v)
+static void sedf_destroy_vcpu(struct scheduler *ops, struct vcpu *v)
{
- xfree(v->sched_priv);
+ sedf_free_vdata(ops, v->sched_priv);
}
-static int sedf_init_domain(struct domain *d)
+static void *
+sedf_alloc_domdata(struct scheduler *ops, struct domain *d)
{
- d->sched_priv = xmalloc(struct sedf_dom_info);
+ void *mem;
+
+ mem = xmalloc(struct sedf_dom_info);
+ if ( mem == NULL )
+ return NULL;
+
+ memset(mem, 0, sizeof(struct sedf_dom_info));
+
+ return mem;
+}
+
+static int sedf_init_domain(struct scheduler *ops, struct domain *d)
+{
+ d->sched_priv = sedf_alloc_domdata(ops, d);
if ( d->sched_priv == NULL )
return -ENOMEM;
- memset(d->sched_priv, 0, sizeof(struct sedf_dom_info));
-
return 0;
}
-static void sedf_destroy_domain(struct domain *d)
+static void sedf_free_domdata(struct scheduler *ops, void *data)
+{
+ xfree(data);
+}
+
+static void sedf_destroy_domain(struct scheduler *ops, struct domain *d)
{
- xfree(d->sched_priv);
+ sedf_free_domdata(ops, d->sched_priv);
}
-static int sedf_pick_cpu(struct vcpu *v)
+static int sedf_pick_cpu(struct scheduler *ops, struct vcpu *v)
{
cpumask_t online_affinity;
+ cpumask_t *online;
- cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
+ online = SEDF_CPUONLINE(v->domain->cpupool);
+ cpus_and(online_affinity, v->cpu_affinity, *online);
return first_cpu(online_affinity);
}
@@ -751,7 +790,7 @@ static struct task_slice sedf_do_extra_s
-timeslice for the current period used up
-domain on waitqueue has started it's period
-and various others ;) in general: determine which domain to run next*/
-static struct task_slice sedf_do_schedule(s_time_t now)
+static struct task_slice sedf_do_schedule(struct scheduler *ops, s_time_t now)
{
int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
@@ -786,6 +825,13 @@ static struct task_slice sedf_do_schedul
}
check_waitq:
update_queues(now, runq, waitq);
+
+ if ( unlikely(!cpu_isset(cpu, *SEDF_CPUONLINE(per_cpu(cpupool, cpu)))) )
+ {
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ goto sched_done;
+ }
/*now simply pick the first domain from the runqueue, which has the
earliest deadline, because the list is sorted*/
@@ -848,7 +894,7 @@ static struct task_slice sedf_do_schedul
}
-static void sedf_sleep(struct vcpu *d)
+static void sedf_sleep(struct scheduler *ops, struct vcpu *d)
{
PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
d->domain->domain_id, d->vcpu_id);
@@ -1067,7 +1113,7 @@ static inline int should_switch(struct v
return 1;
}
-static void sedf_wake(struct vcpu *d)
+static void sedf_wake(struct scheduler *ops, struct vcpu *d)
{
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
@@ -1220,8 +1266,8 @@ static void sedf_dump_domain(struct vcpu
}
-/* dumps all domains on hte specified cpu */
-static void sedf_dump_cpu_state(int i)
+/* dumps all domains on the specified cpu */
+static void sedf_dump_cpu_state(struct scheduler *ops, int i)
{
struct list_head *list, *queue, *tmp;
struct sedf_vcpu_info *d_inf;
@@ -1294,7 +1340,7 @@ static void sedf_dump_cpu_state(int i)
/* Adjusts periods and slices of the domains accordingly to their weights. */
-static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
+static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
{
struct vcpu *p;
struct domain *d;
@@ -1315,6 +1361,8 @@ static int sedf_adjust_weights(struct xe
rcu_read_lock(&domlist_read_lock);
for_each_domain( d )
{
+ if ( c != d->cpupool )
+ continue;
for_each_vcpu( d, p )
{
if ( EDOM_INFO(p)->weight )
@@ -1366,7 +1414,7 @@ static int sedf_adjust_weights(struct xe
/* set or fetch domain scheduling parameters */
-static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
+static int sedf_adjust(struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
struct vcpu *v;
int rc;
@@ -1376,9 +1424,6 @@ static int sedf_adjust(struct domain *p,
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
- if ( !p->vcpu )
- return -EINVAL;
-
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
/* Check for sane parameters. */
@@ -1428,7 +1473,7 @@ static int sedf_adjust(struct domain *p,
}
}
- rc = sedf_adjust_weights(op);
+ rc = sedf_adjust_weights(p->cpupool, op);
if ( rc )
return rc;
@@ -1456,7 +1501,7 @@ static int sedf_adjust(struct domain *p,
return 0;
}
-const struct scheduler sched_sedf_def = {
+struct scheduler sched_sedf_def = {
.name = "Simple EDF Scheduler",
.opt_name = "sedf",
.sched_id = XEN_SCHEDULER_SEDF,
@@ -1464,9 +1509,15 @@ const struct scheduler sched_sedf_def =
.init_domain = sedf_init_domain,
.destroy_domain = sedf_destroy_domain,
- .init_vcpu = sedf_init_vcpu,
.destroy_vcpu = sedf_destroy_vcpu,
+ .alloc_vdata = sedf_alloc_vdata,
+ .free_vdata = sedf_free_vdata,
+ .alloc_pdata = sedf_alloc_pdata,
+ .free_pdata = sedf_free_pdata,
+ .alloc_domdata = sedf_alloc_domdata,
+ .free_domdata = sedf_free_domdata,
+
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
.dump_cpu_state = sedf_dump_cpu_state,
Index: xen-4.0.1-testing/xen/common/schedule.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/schedule.c
+++ xen-4.0.1-testing/xen/common/schedule.c
@@ -53,10 +53,11 @@ static void poll_timer_fn(void *data);
/* This is global for now so that private implementations can reach it */
DEFINE_PER_CPU(struct schedule_data, schedule_data);
+DEFINE_PER_CPU(struct scheduler *, scheduler);
-extern const struct scheduler sched_sedf_def;
-extern const struct scheduler sched_credit_def;
-static const struct scheduler *__initdata schedulers[] = {
+extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
+static struct scheduler *schedulers[] = {
&sched_sedf_def,
&sched_credit_def,
NULL
@@ -64,9 +65,15 @@ static const struct scheduler *__initdat
static struct scheduler __read_mostly ops;
-#define SCHED_OP(fn, ...) \
- (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
- : (typeof(ops.fn(__VA_ARGS__)))0 )
+#define SCHED_OP(opsptr, fn, ...) \
+ (( (opsptr)->fn != NULL ) ? (opsptr)->fn(opsptr, ##__VA_ARGS__ ) \
+ : (typeof((opsptr)->fn(opsptr, ##__VA_ARGS__)))0 )
+
+#define DOM2OP(_d) (((_d)->cpupool == NULL) ? &ops : &((_d)->cpupool->sched))
+#define VCPU2OP(_v) (DOM2OP((_v)->domain))
+#define VCPU2ONLINE(_v) \
+ (((_v)->domain->cpupool == NULL) ? &cpu_online_map \
+ : &(_v)->domain->cpupool->cpu_valid)
static inline void trace_runstate_change(struct vcpu *v, int new_state)
{
@@ -207,7 +214,86 @@ int sched_init_vcpu(struct vcpu *v, unsi
TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
- return SCHED_OP(init_vcpu, v);
+ if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
+ {
+ per_cpu(schedule_data, v->processor).sched_priv =
+ SCHED_OP(DOM2OP(d), alloc_pdata, processor);
+ if ( per_cpu(schedule_data, v->processor).sched_priv == NULL )
+ return 1;
+ }
+
+ v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
+ if ( v->sched_priv == NULL )
+ return 1;
+
+ if ( is_idle_domain(d) )
+ per_cpu(schedule_data, v->processor).sched_idlevpriv = v->sched_priv;
+
+ return 0;
+}
+
+int sched_move_domain(struct domain *d, struct cpupool *c)
+{
+ struct vcpu *v;
+ unsigned int new_p;
+ void **vcpu_priv;
+ void *domdata;
+
+ domdata = SCHED_OP(&(c->sched), alloc_domdata, d);
+ if ( domdata == NULL )
+ return -ENOMEM;
+
+ vcpu_priv = xmalloc_array(void *, d->max_vcpus);
+ if ( vcpu_priv == NULL )
+ {
+ SCHED_OP(&(c->sched), free_domdata, domdata);
+ return -ENOMEM;
+ }
+
+ memset(vcpu_priv, 0, d->max_vcpus * sizeof(void *));
+ for_each_vcpu ( d, v )
+ {
+ vcpu_priv[v->vcpu_id] = SCHED_OP(&(c->sched), alloc_vdata, v, domdata);
+ if ( vcpu_priv[v->vcpu_id] == NULL )
+ {
+ for_each_vcpu ( d, v )
+ {
+ if ( vcpu_priv[v->vcpu_id] != NULL )
+ xfree(vcpu_priv[v->vcpu_id]);
+ }
+ xfree(vcpu_priv);
+ SCHED_OP(&(c->sched), free_domdata, domdata);
+ return -ENOMEM;
+ }
+ }
+
+ domain_pause(d);
+
+ new_p = first_cpu(c->cpu_valid);
+ for_each_vcpu ( d, v )
+ {
+ migrate_timer(&v->periodic_timer, new_p);
+ migrate_timer(&v->singleshot_timer, new_p);
+ migrate_timer(&v->poll_timer, new_p);
+
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
+
+ cpus_setall(v->cpu_affinity);
+ v->processor = new_p;
+ v->sched_priv = vcpu_priv[v->vcpu_id];
+
+ new_p = cycle_cpu(new_p, c->cpu_valid);
+ }
+
+ d->cpupool = c;
+ SCHED_OP(DOM2OP(d), free_domdata, d->sched_priv);
+ d->sched_priv = domdata;
+
+ domain_unpause(d);
+
+ xfree(vcpu_priv);
+
+ return 0;
}
void sched_destroy_vcpu(struct vcpu *v)
@@ -217,17 +303,17 @@ void sched_destroy_vcpu(struct vcpu *v)
kill_timer(&v->poll_timer);
if ( test_and_clear_bool(v->is_urgent) )
atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count);
- SCHED_OP(destroy_vcpu, v);
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
}
int sched_init_domain(struct domain *d)
{
- return SCHED_OP(init_domain, d);
+ return SCHED_OP(DOM2OP(d), init_domain, d);
}
void sched_destroy_domain(struct domain *d)
{
- SCHED_OP(destroy_domain, d);
+ SCHED_OP(DOM2OP(d), destroy_domain, d);
}
void vcpu_sleep_nosync(struct vcpu *v)
@@ -241,7 +327,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
if ( v->runstate.state == RUNSTATE_runnable )
vcpu_runstate_change(v, RUNSTATE_offline, NOW());
- SCHED_OP(sleep, v);
+ SCHED_OP(VCPU2OP(v), sleep, v);
}
vcpu_schedule_unlock_irqrestore(v, flags);
@@ -269,7 +355,7 @@ void vcpu_wake(struct vcpu *v)
{
if ( v->runstate.state >= RUNSTATE_blocked )
vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
- SCHED_OP(wake, v);
+ SCHED_OP(VCPU2OP(v), wake, v);
}
else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
{
@@ -324,7 +410,7 @@ static void vcpu_migrate(struct vcpu *v)
/* Select new CPU. */
old_cpu = v->processor;
- new_cpu = SCHED_OP(pick_cpu, v);
+ new_cpu = SCHED_OP(VCPU2OP(v), pick_cpu, v);
/*
* Transfer urgency status to new CPU before switching CPUs, as once
@@ -367,22 +453,32 @@ void vcpu_force_reschedule(struct vcpu *
}
/*
- * This function is used by cpu_hotplug code from stop_machine context.
- * Hence we can avoid needing to take the
+ * This function is used by cpu_hotplug code from stop_machine context
+ * and from cpupools to switch schedulers on a cpu.
*/
-void cpu_disable_scheduler(void)
+int cpu_disable_scheduler(unsigned int cpu, int lock)
{
struct domain *d;
struct vcpu *v;
- unsigned int cpu = smp_processor_id();
+ struct cpupool *c;
+ int ret = 0;
+
+ c = per_cpu(cpupool, cpu);
+ if ( c == NULL )
+ return ret;
for_each_domain ( d )
{
+ if ( d->cpupool != c )
+ continue;
+
for_each_vcpu ( d, v )
{
if ( is_idle_vcpu(v) )
continue;
+ if ( lock != 0 )
+ vcpu_schedule_lock_irq(v);
if ( (cpus_weight(v->cpu_affinity) == 1) &&
cpu_isset(cpu, v->cpu_affinity) )
{
@@ -396,39 +492,51 @@ void cpu_disable_scheduler(void)
* be chosen when the timer is next re-set.
*/
if ( v->singleshot_timer.cpu == cpu )
- migrate_timer(&v->singleshot_timer, 0);
+ {
+ int cpu_mig;
+
+ cpu_mig = first_cpu(c->cpu_valid);
+ if (cpu_mig == cpu)
+ cpu_mig = next_cpu(cpu_mig, c->cpu_valid);
+ migrate_timer(&v->singleshot_timer, cpu_mig);
+ }
if ( v->processor == cpu )
{
set_bit(_VPF_migrating, &v->pause_flags);
+ if ( lock != 0 )
+ vcpu_schedule_unlock_irq(v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
+ else if ( lock != 0 )
+ vcpu_schedule_unlock_irq(v);
+ /*
+ * A vcpu active in the hypervisor will not be migratable.
+ * The caller should try again after releasing and reaquiring
+ * all locks.
+ */
+ if ( v->processor == cpu )
+ ret = -EAGAIN;
}
}
+ return ret;
}
-static int __vcpu_set_affinity(
- struct vcpu *v, cpumask_t *affinity,
- bool_t old_lock_status, bool_t new_lock_status)
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
{
cpumask_t online_affinity, old_affinity;
+ cpumask_t *online;
- cpus_and(online_affinity, *affinity, cpu_online_map);
+ if ( v->domain->is_pinned )
+ return -EINVAL;
+ online = VCPU2ONLINE(v);
+ cpus_and(online_affinity, *affinity, *online);
if ( cpus_empty(online_affinity) )
return -EINVAL;
vcpu_schedule_lock_irq(v);
- if ( v->affinity_locked != old_lock_status )
- {
- BUG_ON(!v->affinity_locked);
- vcpu_schedule_unlock_irq(v);
- return -EBUSY;
- }
-
- v->affinity_locked = new_lock_status;
-
old_affinity = v->cpu_affinity;
v->cpu_affinity = *affinity;
*affinity = old_affinity;
@@ -446,36 +554,6 @@ static int __vcpu_set_affinity(
return 0;
}
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- if ( v->domain->is_pinned )
- return -EINVAL;
- return __vcpu_set_affinity(v, affinity, 0, 0);
-}
-
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- return __vcpu_set_affinity(v, affinity, 0, 1);
-}
-
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- return __vcpu_set_affinity(v, affinity, 1, 1);
-}
-
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- cpumask_t online_affinity;
-
- /* Do not fail if no CPU in old affinity mask is online. */
- cpus_and(online_affinity, *affinity, cpu_online_map);
- if ( cpus_empty(online_affinity) )
- *affinity = cpu_online_map;
-
- if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
- BUG();
-}
-
/* Block the currently-executing domain until a pertinent event occurs. */
static long do_block(void)
{
@@ -783,7 +861,7 @@ long sched_adjust(struct domain *d, stru
struct vcpu *v;
long ret;
- if ( (op->sched_id != ops.sched_id) ||
+ if ( (op->sched_id != DOM2OP(d)->sched_id) ||
((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
(op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
return -EINVAL;
@@ -810,7 +888,7 @@ long sched_adjust(struct domain *d, stru
if ( d == current->domain )
vcpu_schedule_lock_irq(current);
- if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
+ if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
if ( d == current->domain )
@@ -857,6 +935,7 @@ static void schedule(void)
{
struct vcpu *prev = current, *next = NULL;
s_time_t now = NOW();
+ struct scheduler *sched = this_cpu(scheduler);
struct schedule_data *sd;
struct task_slice next_slice;
@@ -872,7 +951,7 @@ static void schedule(void)
stop_timer(&sd->s_timer);
/* get policy-specific decision on scheduling... */
- next_slice = ops.do_schedule(now);
+ next_slice = sched->do_schedule(sched, now);
next = next_slice.task;
@@ -978,6 +1057,19 @@ static void poll_timer_fn(void *data)
vcpu_unblock(v);
}
+/* Get scheduler by id */
+struct scheduler *scheduler_get_by_id(unsigned int id)
+{
+ int i;
+
+ for ( i = 0; schedulers[i] != NULL; i++ )
+ {
+ if ( schedulers[i]->sched_id == id )
+ return schedulers[i];
+ }
+ return NULL;
+}
+
/* Initialise the data structures. */
void __init scheduler_init(void)
{
@@ -985,12 +1077,6 @@ void __init scheduler_init(void)
open_softirq(SCHEDULE_SOFTIRQ, schedule);
- for_each_possible_cpu ( i )
- {
- spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
- init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
- }
-
for ( i = 0; schedulers[i] != NULL; i++ )
{
ops = *schedulers[i];
@@ -1004,43 +1090,123 @@ void __init scheduler_init(void)
ops = *schedulers[0];
}
+ for_each_possible_cpu ( i )
+ {
+ per_cpu(scheduler, i) = &ops;
+ spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
+ init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
+ }
+
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(init);
+ if ( SCHED_OP(&ops, init) )
+ panic("scheduler returned error on init\n");
}
-void dump_runq(unsigned char key)
+/* switch scheduler on cpu */
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
{
- s_time_t now = NOW();
- int i;
unsigned long flags;
+ struct vcpu *v;
+ void *vpriv = NULL;
+ void *ppriv;
+ void *ppriv_old;
+ struct scheduler *old_ops;
+ struct scheduler *new_ops;
+
+ old_ops = per_cpu(scheduler, cpu);
+ new_ops = (c == NULL) ? &ops : &(c->sched);
+ v = per_cpu(schedule_data, cpu).idle;
+ ppriv = SCHED_OP(new_ops, alloc_pdata, cpu);
+ if ( c != NULL )
+ vpriv = SCHED_OP(new_ops, alloc_vdata, v, v->domain->sched_priv);
+
+ spin_lock_irqsave(&per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( c == NULL )
+ {
+ vpriv = v->sched_priv;
+ v->sched_priv = per_cpu(schedule_data, cpu).sched_idlevpriv;
+ }
+ else
+ {
+ v->sched_priv = vpriv;
+ vpriv = NULL;
+ }
+ SCHED_OP(old_ops, tick_suspend, cpu);
+ per_cpu(scheduler, cpu) = new_ops;
+ ppriv_old = per_cpu(schedule_data, cpu).sched_priv;
+ per_cpu(schedule_data, cpu).sched_priv = ppriv;
+ SCHED_OP(new_ops, tick_resume, cpu);
+ SCHED_OP(new_ops, insert_vcpu, v);
+
+ spin_unlock_irqrestore(&per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( vpriv != NULL )
+ SCHED_OP(old_ops, free_vdata, vpriv);
+ SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
+}
+
+/* init scheduler global data */
+int schedule_init_global(char *name, struct scheduler *sched)
+{
+ int i;
+ struct scheduler *data;
+
+ data = &ops;
+ for ( i = 0; (schedulers[i] != NULL) && (name != NULL) ; i++ )
+ {
+ if ( strcmp(schedulers[i]->opt_name, name) == 0 )
+ {
+ data = schedulers[i];
+ break;
+ }
+ }
+ memcpy(sched, data, sizeof(*sched));
+ return SCHED_OP(sched, init);
+}
- local_irq_save(flags);
+/* deinitialize scheduler global data */
+void schedule_deinit_global(struct scheduler *sched)
+{
+ SCHED_OP(sched, deinit);
+}
- printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(dump_settings);
- printk("sched_smt_power_savings: %s\n",
- sched_smt_power_savings? "enabled":"disabled");
- printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+void schedule_dump(struct cpupool *c)
+{
+ int i;
+ struct scheduler *sched;
+ cpumask_t *cpus;
+
+ sched = (c == NULL) ? &ops : &(c->sched);
+ cpus = (c == NULL) ? &cpupool_free_cpus : &c->cpu_valid;
+ printk("Scheduler: %s (%s)\n", sched->name, sched->opt_name);
+ SCHED_OP(sched, dump_settings);
- for_each_online_cpu ( i )
+ for_each_cpu_mask (i, *cpus)
{
spin_lock(&per_cpu(schedule_data, i).schedule_lock);
printk("CPU[%02d] ", i);
- SCHED_OP(dump_cpu_state, i);
+ SCHED_OP(sched, dump_cpu_state, i);
spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
}
-
- local_irq_restore(flags);
}
void sched_tick_suspend(void)
{
- SCHED_OP(tick_suspend);
+ struct scheduler *sched;
+ unsigned int cpu = smp_processor_id();
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_suspend, cpu);
}
void sched_tick_resume(void)
{
- SCHED_OP(tick_resume);
+ struct scheduler *sched;
+ unsigned int cpu = smp_processor_id();
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_resume, cpu);
}
#ifdef CONFIG_COMPAT
Index: xen-4.0.1-testing/xen/common/softirq.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/softirq.c
+++ xen-4.0.1-testing/xen/common/softirq.c
@@ -88,9 +88,11 @@ void raise_softirq(unsigned int nr)
}
static LIST_HEAD(tasklet_list);
+static DEFINE_PER_CPU(struct list_head, tasklet_list_pcpu);
static DEFINE_SPINLOCK(tasklet_lock);
-void tasklet_schedule(struct tasklet *t)
+static void tasklet_schedule_list(struct tasklet *t, struct list_head *tlist,
+ int cpu)
{
unsigned long flags;
@@ -101,28 +103,44 @@ void tasklet_schedule(struct tasklet *t)
if ( !t->is_scheduled && !t->is_running )
{
BUG_ON(!list_empty(&t->list));
- list_add_tail(&t->list, &tasklet_list);
+ list_add_tail(&t->list, tlist);
}
t->is_scheduled = 1;
- raise_softirq(TASKLET_SOFTIRQ);
+ if ( cpu == smp_processor_id() )
+ raise_softirq(TASKLET_SOFTIRQ);
+ else
+ cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
}
spin_unlock_irqrestore(&tasklet_lock, flags);
}
+void tasklet_schedule(struct tasklet *t)
+{
+ tasklet_schedule_list(t, &tasklet_list, smp_processor_id());
+}
+
+void tasklet_schedule_cpu(struct tasklet *t, int cpu)
+{
+ tasklet_schedule_list(t, &per_cpu(tasklet_list_pcpu, cpu), cpu);
+}
+
static void tasklet_action(void)
{
+ struct list_head *tlist;
struct tasklet *t;
spin_lock_irq(&tasklet_lock);
- if ( list_empty(&tasklet_list) )
+ tlist = ( list_empty(&this_cpu(tasklet_list_pcpu)) ) ? &tasklet_list :
+ &this_cpu(tasklet_list_pcpu);
+ if ( list_empty(tlist) )
{
spin_unlock_irq(&tasklet_lock);
return;
}
- t = list_entry(tasklet_list.next, struct tasklet, list);
+ t = list_entry(tlist->next, struct tasklet, list);
list_del_init(&t->list);
BUG_ON(t->is_dead || t->is_running || !t->is_scheduled);
@@ -138,14 +156,15 @@ static void tasklet_action(void)
if ( t->is_scheduled )
{
BUG_ON(t->is_dead || !list_empty(&t->list));
- list_add_tail(&t->list, &tasklet_list);
+ list_add_tail(&t->list, tlist);
}
/*
* If there is more work to do then reschedule. We don't grab more work
* immediately as we want to allow other softirq work to happen first.
*/
- if ( !list_empty(&tasklet_list) )
+ if ( !list_empty(&tasklet_list) ||
+ !list_empty(&this_cpu(tasklet_list_pcpu)) )
raise_softirq(TASKLET_SOFTIRQ);
spin_unlock_irq(&tasklet_lock);
@@ -186,6 +205,12 @@ void tasklet_init(
void __init softirq_init(void)
{
+ int i;
+
+ for_each_possible_cpu ( i )
+ {
+ INIT_LIST_HEAD(&per_cpu(tasklet_list_pcpu, i));
+ }
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
}
Index: xen-4.0.1-testing/xen/common/sysctl.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/sysctl.c
+++ xen-4.0.1-testing/xen/common/sysctl.c
@@ -314,6 +314,14 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
}
break;
+ case XEN_SYSCTL_cpupool_op:
+ {
+ ret = cpupool_do_sysctl(&op->u.cpupool_op);
+ if ( (ret == 0) && copy_to_guest(u_sysctl, op, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = arch_do_sysctl(op, u_sysctl);
break;
Index: xen-4.0.1-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/asm-x86/domain.h
+++ xen-4.0.1-testing/xen/include/asm-x86/domain.h
@@ -458,7 +458,8 @@ struct arch_vcpu
#define hvm_svm hvm_vcpu.u.svm
/* Continue the current hypercall via func(data) on specified cpu. */
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
+int continue_hypercall_on_cpu(int cpu, void *hdl,
+ long (*func)(void *hdl, void *data), void *data);
void vcpu_show_execution_state(struct vcpu *);
void vcpu_show_registers(const struct vcpu *);
Index: xen-4.0.1-testing/xen/include/asm-x86/smp.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/asm-x86/smp.h
+++ xen-4.0.1-testing/xen/include/asm-x86/smp.h
@@ -56,7 +56,6 @@ extern u32 cpu_2_logical_apicid[];
#define CPU_ONLINE 0x0002 /* CPU is up */
#define CPU_DEAD 0x0004 /* CPU is dead */
DECLARE_PER_CPU(int, cpu_state);
-extern spinlock_t(cpu_add_remove_lock);
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
extern int cpu_down(unsigned int cpu);
Index: xen-4.0.1-testing/xen/include/public/domctl.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/public/domctl.h
+++ xen-4.0.1-testing/xen/include/public/domctl.h
@@ -60,10 +60,10 @@ struct xen_domctl_createdomain {
/* Should domain memory integrity be verifed by tboot during Sx? */
#define _XEN_DOMCTL_CDF_s3_integrity 2
#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity)
- uint32_t flags;
/* Disable out-of-sync shadow page tables? */
#define _XEN_DOMCTL_CDF_oos_off 3
#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off)
+ uint32_t flags;
};
typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
@@ -106,6 +106,7 @@ struct xen_domctl_getdomaininfo {
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
uint32_t ssidref;
xen_domain_handle_t handle;
+ uint32_t cpupool;
};
typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
@@ -781,7 +782,6 @@ struct xen_domctl_mem_sharing_op {
typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);
-
struct xen_domctl {
uint32_t cmd;
#define XEN_DOMCTL_createdomain 1
Index: xen-4.0.1-testing/xen/include/public/sysctl.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/public/sysctl.h
+++ xen-4.0.1-testing/xen/include/public/sysctl.h
@@ -491,6 +491,28 @@ struct xen_sysctl_lockprof_op {
typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
+#define XEN_SYSCTL_cpupool_op 18
+/* XEN_SYSCTL_cpupool_op */
+#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */
+#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */
+#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */
+#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */
+#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */
+#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */
+#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */
+#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF
+struct xen_sysctl_cpupool_op {
+ uint32_t op; /* IN */
+ uint32_t cpupool_id; /* IN: CDIARM OUT: CI */
+ uint32_t sched_id; /* IN: C OUT: I */
+ uint32_t domid; /* IN: M */
+ uint32_t cpu; /* IN: AR */
+ uint32_t n_dom; /* OUT: I */
+ struct xenctl_cpumap cpumap; /* OUT: IF */
+};
+typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t);
+
struct xen_sysctl {
uint32_t cmd;
uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -509,6 +531,7 @@ struct xen_sysctl {
struct xen_sysctl_pm_op pm_op;
struct xen_sysctl_page_offline_op page_offline;
struct xen_sysctl_lockprof_op lockprof_op;
+ struct xen_sysctl_cpupool_op cpupool_op;
uint8_t pad[128];
} u;
};
Index: xen-4.0.1-testing/xen/include/xen/sched-if.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/xen/sched-if.h
+++ xen-4.0.1-testing/xen/include/xen/sched-if.h
@@ -10,16 +10,29 @@
#include <xen/percpu.h>
+
+/* A global pointer to the initial cpupool (POOL0). */
+extern struct cpupool *cpupool0;
+
+/* cpus currently in no cpupool */
+extern cpumask_t cpupool_free_cpus;
+
+/* cpupool lock (used for cpu on/offline, too) */
+extern spinlock_t cpupool_lock;
+
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
struct vcpu *curr; /* current task */
struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
+ void *sched_idlevpriv; /* default scheduler vcpu data */
struct timer s_timer; /* scheduling timer */
atomic_t urgent_count; /* how many urgent vcpus */
} __cacheline_aligned;
DECLARE_PER_CPU(struct schedule_data, schedule_data);
+DECLARE_PER_CPU(struct scheduler *, scheduler);
+DECLARE_PER_CPU(struct cpupool *, cpupool);
static inline void vcpu_schedule_lock(struct vcpu *v)
{
@@ -59,28 +72,49 @@ struct scheduler {
char *name; /* full name for this scheduler */
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
+ void *sched_data; /* global data pointer */
+
+ int (*init) (struct scheduler *);
+ void (*deinit) (struct scheduler *);
- void (*init) (void);
+ void (*free_vdata) (struct scheduler *, void *);
+ void * (*alloc_vdata) (struct scheduler *, struct vcpu *,
+ void *);
+ void (*free_pdata) (struct scheduler *, void *, int);
+ void * (*alloc_pdata) (struct scheduler *, int);
+ void (*free_domdata) (struct scheduler *, void *);
+ void * (*alloc_domdata) (struct scheduler *, struct domain *);
- int (*init_domain) (struct domain *);
- void (*destroy_domain) (struct domain *);
+ int (*init_domain) (struct scheduler *, struct domain *);
+ void (*destroy_domain) (struct scheduler *, struct domain *);
- int (*init_vcpu) (struct vcpu *);
- void (*destroy_vcpu) (struct vcpu *);
+ void (*insert_vcpu) (struct scheduler *, struct vcpu *);
+ void (*destroy_vcpu) (struct scheduler *, struct vcpu *);
- void (*sleep) (struct vcpu *);
- void (*wake) (struct vcpu *);
+ void (*sleep) (struct scheduler *, struct vcpu *);
+ void (*wake) (struct scheduler *, struct vcpu *);
- struct task_slice (*do_schedule) (s_time_t);
+ struct task_slice (*do_schedule) (struct scheduler *, s_time_t);
- int (*pick_cpu) (struct vcpu *);
- int (*adjust) (struct domain *,
+ int (*pick_cpu) (struct scheduler *, struct vcpu *);
+ int (*adjust) (struct scheduler *, struct domain *,
struct xen_domctl_scheduler_op *);
- void (*dump_settings) (void);
- void (*dump_cpu_state) (int);
+ void (*dump_settings) (struct scheduler *);
+ void (*dump_cpu_state) (struct scheduler *, int);
- void (*tick_suspend) (void);
- void (*tick_resume) (void);
+ void (*tick_suspend) (struct scheduler *, unsigned int);
+ void (*tick_resume) (struct scheduler *, unsigned int);
};
+struct cpupool
+{
+ int cpupool_id;
+ cpumask_t cpu_valid; /* all cpus assigned to pool */
+ struct cpupool *next;
+ unsigned int n_dom;
+ struct scheduler sched;
+};
+
+struct scheduler *scheduler_get_by_id(unsigned int id);
+
#endif /* __XEN_SCHED_IF_H__ */
Index: xen-4.0.1-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/xen/sched.h
+++ xen-4.0.1-testing/xen/include/xen/sched.h
@@ -9,6 +9,7 @@
#include <xen/shared.h>
#include <public/xen.h>
#include <public/domctl.h>
+#include <public/sysctl.h>
#include <public/vcpu.h>
#include <public/xsm/acm.h>
#include <xen/time.h>
@@ -132,8 +133,6 @@ struct vcpu
bool_t defer_shutdown;
/* VCPU is paused following shutdown request (d->is_shutting_down)? */
bool_t paused_for_shutdown;
- /* VCPU affinity is temporarily locked from controller changes? */
- bool_t affinity_locked;
/*
* > 0: a single port is being polled;
@@ -209,6 +208,7 @@ struct domain
/* Scheduling. */
void *sched_priv; /* scheduler-specific data */
+ struct cpupool *cpupool;
struct domain *next_in_list;
struct domain *next_in_hashbucket;
@@ -381,7 +381,7 @@ static inline struct domain *get_current
}
struct domain *domain_create(
- domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
+ domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref);
/* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
#define _DOMCRF_hvm 0
#define DOMCRF_hvm (1U<<_DOMCRF_hvm)
@@ -469,6 +469,7 @@ int sched_init_vcpu(struct vcpu *v, uns
void sched_destroy_vcpu(struct vcpu *v);
int sched_init_domain(struct domain *d);
void sched_destroy_domain(struct domain *d);
+int sched_move_domain(struct domain *d, struct cpupool *c);
long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
int sched_id(void);
void sched_tick_suspend(void);
@@ -576,12 +577,14 @@ void domain_pause_by_systemcontroller(st
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);
+struct scheduler;
+
+int schedule_init_global(char *name, struct scheduler *sched);
+void schedule_deinit_global(struct scheduler *sched);
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_force_reschedule(struct vcpu *v);
-void cpu_disable_scheduler(void);
+int cpu_disable_scheduler(unsigned int cpu, int lock);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
uint64_t get_cpu_idle_time(unsigned int cpu);
@@ -604,6 +607,18 @@ extern enum cpufreq_controller {
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
+#define CPUPOOLID_NONE -1
+
+struct cpupool *cpupool_create(int poolid, char *sched);
+int cpupool_destroy(struct cpupool *c);
+int cpupool0_cpu_assign(struct cpupool *c);
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu);
+void cpupool_cpu_add(unsigned int cpu);
+int cpupool_add_domain(struct domain *d, int poolid);
+void cpupool_rm_domain(struct domain *d);
+int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
+#define num_cpupool_cpus(c) (cpus_weight((c)->cpu_valid))
+
#endif /* __SCHED_H__ */
/*
Index: xen-4.0.1-testing/xen/include/xen/softirq.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/xen/softirq.h
+++ xen-4.0.1-testing/xen/include/xen/softirq.h
@@ -58,6 +58,7 @@ struct tasklet
struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data }
void tasklet_schedule(struct tasklet *t);
+void tasklet_schedule_cpu(struct tasklet *t, int cpu);
void tasklet_kill(struct tasklet *t);
void tasklet_init(
struct tasklet *t, void (*func)(unsigned long), unsigned long data);