diff --git a/21151-trace-bounds-check.patch b/21151-trace-bounds-check.patch new file mode 100644 index 0000000..3da6d68 --- /dev/null +++ b/21151-trace-bounds-check.patch @@ -0,0 +1,205 @@ +# HG changeset patch +# User Keir Fraser +# Date 1271091288 -3600 +# Node ID 94cae4dfa25bcf9aaeb93fb374926cb40411ebdf +# Parent 78488a63bbc200095413824cc146134b54635da9 +xentrace: Bounds checking and error handling + +Check tbuf_size to make sure that it will fit on the t_info struct +allocated at boot. Also deal with allocation failures more +gracefully. + +Signed-off-by: George Dunlap + +# HG changeset patch +# User Keir Fraser +# Date 1278093165 -3600 +# Node ID 2f3a68a0b55b1b7df4d6632dfc151040ba08e9ea +# Parent 2846fd19945cb2ab32d1513531c3500278133484 +trace: Fix T_INFO_FIRST_OFFSET calculation + +This wasn't defined correctly, thus allowing in the +num_online_cpus() == NR_CPUS case to pass a corrupted MFN to +Dom0. + +Reported-by: Jan Beulich +Signed-off-by: George Dunlap + +# HG changeset patch +# User Keir Fraser +# Date 1278093190 -3600 +# Node ID 1390e2ab45c7b63d79ba9496d609cf59af4b44ee +# Parent 2f3a68a0b55b1b7df4d6632dfc151040ba08e9ea +trace: improve check_tbuf_size() + +It didn't consider the case of the incoming size not allowing for the +2*data_size range for t_buf->{prod,cons} + +Signed-off-by: Jan Beulich +Signed-off-by: George Dunlap + +--- a/xen/common/trace.c ++++ b/xen/common/trace.c +@@ -48,10 +48,12 @@ integer_param("tbuf_size", opt_tbuf_size + /* Pointers to the meta-data objects for all system trace buffers */ + static struct t_info *t_info; + #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ ++#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE)) + static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); + static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); + static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); + static int data_size; ++static u32 t_info_first_offset __read_mostly; + + /* High water mark for trace buffers; */ + /* Send virtual interrupt when buffer level reaches this point */ +@@ -71,6 +73,39 @@ static cpumask_t tb_cpu_mask = CPU_MASK_ + /* which tracing events are enabled */ + static u32 tb_event_mask = TRC_ALL; + ++/* Return the number of elements _type necessary to store at least _x bytes of data ++ * i.e., sizeof(_type) * ans >= _x. */ ++#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type)) ++ ++static void calc_tinfo_first_offset(void) ++{ ++ int offset_in_bytes; ++ ++ offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]); ++ ++ t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes); ++ ++ gdprintk(XENLOG_INFO, "%s: NR_CPUs %d, offset_in_bytes %d, t_info_first_offset %u\n", ++ __func__, NR_CPUS, offset_in_bytes, (unsigned)t_info_first_offset); ++} ++ ++/** ++ * check_tbuf_size - check to make sure that the proposed size will fit ++ * in the currently sized struct t_info and allows prod and cons to ++ * reach double the value without overflow. ++ */ ++static int check_tbuf_size(u32 pages) ++{ ++ struct t_buf dummy; ++ typeof(dummy.prod) size; ++ ++ size = ((typeof(dummy.prod))pages) * PAGE_SIZE; ++ ++ return (size / PAGE_SIZE != pages) ++ || (size + size < size) ++ || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE / sizeof(uint32_t)); ++} ++ + /** + * alloc_trace_bufs - performs initialization of the per-cpu trace buffers. + * +@@ -87,7 +122,9 @@ static int alloc_trace_bufs(void) + unsigned long nr_pages; + /* Start after a fixed-size array of NR_CPUS */ + uint32_t *t_info_mfn_list = (uint32_t *)t_info; +- int offset = (NR_CPUS * 2 + 1 + 1) / 4; ++ int offset = t_info_first_offset; ++ ++ BUG_ON(check_tbuf_size(opt_tbuf_size)); + + if ( opt_tbuf_size == 0 ) + return -EINVAL; +@@ -180,7 +217,8 @@ out_dealloc: + } + spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags); + } +- return -EINVAL; ++ ++ return -ENOMEM; + } + + +@@ -197,19 +235,35 @@ static int tb_set_size(int size) + * boot time or via control tools, but not by both. Once buffers + * are created they cannot be destroyed. + */ +- if ( (opt_tbuf_size != 0) || (size <= 0) ) ++ int ret = 0; ++ ++ ++ ++ if ( (opt_tbuf_size != 0) ) + { +- gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n", +- opt_tbuf_size, size); ++ if ( size != opt_tbuf_size ) ++ gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n", ++ opt_tbuf_size, size); + return -EINVAL; + } + +- opt_tbuf_size = size; +- if ( alloc_trace_bufs() != 0 ) ++ if ( size <= 0 ) + return -EINVAL; + +- printk("Xen trace buffers: initialized\n"); +- return 0; ++ if ( check_tbuf_size(size) ) ++ { ++ gdprintk(XENLOG_INFO, "tb size %d too large\n", size); ++ return -EINVAL; ++ } ++ ++ opt_tbuf_size = size; ++ ++ if ( (ret = alloc_trace_bufs()) == 0 ) ++ printk("Xen trace buffers: initialized\n"); ++ else ++ opt_tbuf_size = 0; ++ ++ return ret; + } + + int trace_will_trace_event(u32 event) +@@ -248,6 +302,10 @@ int trace_will_trace_event(u32 event) + void __init init_trace_bufs(void) + { + int i; ++ ++ /* Calculate offset in u32 of first mfn */ ++ calc_tinfo_first_offset(); ++ + /* t_info size fixed at 2 pages for now. That should be big enough / small enough + * until it's worth making it dynamic. */ + t_info = alloc_xenheap_pages(1, 0); +@@ -265,13 +323,18 @@ void __init init_trace_bufs(void) + share_xen_page_with_privileged_guests( + virt_to_page(t_info) + i, XENSHARE_writable); + +- +- + if ( opt_tbuf_size == 0 ) + { + printk("Xen trace buffers: disabled\n"); + return; + } ++ else if ( check_tbuf_size(opt_tbuf_size) ) ++ { ++ gdprintk(XENLOG_INFO, "Xen trace buffers: " ++ "tb size %d too large, disabling\n", ++ opt_tbuf_size); ++ opt_tbuf_size = 0; ++ } + + if ( alloc_trace_bufs() == 0 ) + { +@@ -279,6 +342,13 @@ void __init init_trace_bufs(void) + wmb(); /* above must be visible before tb_init_done flag set */ + tb_init_done = 1; + } ++ else ++ { ++ gdprintk(XENLOG_INFO, "Xen trace buffers: " ++ "allocation size %d failed, disabling\n", ++ opt_tbuf_size); ++ opt_tbuf_size = 0; ++ } + } + + /** diff --git a/21333-xentrace-t_info-size.patch b/21333-xentrace-t_info-size.patch index 252a4ae..91b4f9d 100644 --- a/21333-xentrace-t_info-size.patch +++ b/21333-xentrace-t_info-size.patch @@ -11,9 +11,11 @@ is more than 1024 but less than 2048. Signed-off-by: George Dunlap ---- a/xen/common/trace.c -+++ b/xen/common/trace.c -@@ -297,7 +297,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc +Index: xen-4.0.0-testing/xen/common/trace.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/common/trace.c ++++ xen-4.0.0-testing/xen/common/trace.c +@@ -367,7 +367,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc case XEN_SYSCTL_TBUFOP_get_info: tbc->evt_mask = tb_event_mask; tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0; diff --git a/21360-x86-mce-polling-diabled-init.patch b/21360-x86-mce-polling-disabled-init.patch similarity index 100% rename from 21360-x86-mce-polling-diabled-init.patch rename to 21360-x86-mce-polling-disabled-init.patch diff --git a/21445-x86-tsc-handling-cleanups-v2.patch b/21445-x86-tsc-handling-cleanups-v2.patch new file mode 100644 index 0000000..db93ecb --- /dev/null +++ b/21445-x86-tsc-handling-cleanups-v2.patch @@ -0,0 +1,268 @@ +# HG changeset patch +# User Keir Fraser +# Date 1278094440 -3600 +# Node ID a9c458ab90e4ecb25383456be653368ecd900ee4 +# Parent 322468d5ab6ceca4afa21977a02f4492308d2ddc +x86: TSC handling cleanups (version 2) + +"I am removing the tsc_scaled variable that is never actually used +because when tsc needs to be scaled vtsc is 1. I am also making this +more explicit in tsc_set_info. I am also removing hvm_domain.gtsc_khz +that is a duplicate of d->arch.tsc_khz. I am using scale_delta(delta, +&d->arch.ns_to_vtsc) to scale the tsc value before returning it to the +guest like in the pv case. I added a feature flag to specify that the +pvclock algorithm is safe to be used in an HVM guest so that the guest +can now use it without hanging." + +Version 2 fixes a bug which breaks PV domU time. + +Signed-off-by: Stefano Stabellini +xen-unstable changeset: 21445:c1ed00d49534 +xen-unstable date: Sat May 22 06:31:47 2010 +0100 + +Index: xen-4.0.0-testing/xen/arch/x86/hvm/hvm.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-4.0.0-testing/xen/arch/x86/hvm/hvm.c +@@ -152,32 +152,6 @@ void hvm_set_rdtsc_exiting(struct domain + hvm_funcs.set_rdtsc_exiting(v, enable); + } + +-int hvm_gtsc_need_scale(struct domain *d) +-{ +- uint32_t gtsc_mhz, htsc_mhz; +- +- if ( d->arch.vtsc ) +- return 0; +- +- gtsc_mhz = d->arch.hvm_domain.gtsc_khz / 1000; +- htsc_mhz = (uint32_t)cpu_khz / 1000; +- +- d->arch.hvm_domain.tsc_scaled = (gtsc_mhz && (gtsc_mhz != htsc_mhz)); +- return d->arch.hvm_domain.tsc_scaled; +-} +- +-static u64 hvm_h2g_scale_tsc(struct vcpu *v, u64 host_tsc) +-{ +- uint32_t gtsc_khz, htsc_khz; +- +- if ( !v->domain->arch.hvm_domain.tsc_scaled ) +- return host_tsc; +- +- htsc_khz = cpu_khz; +- gtsc_khz = v->domain->arch.hvm_domain.gtsc_khz; +- return muldiv64(host_tsc, gtsc_khz, htsc_khz); +-} +- + void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc) + { + uint64_t tsc; +@@ -185,11 +159,11 @@ void hvm_set_guest_tsc(struct vcpu *v, u + if ( v->domain->arch.vtsc ) + { + tsc = hvm_get_guest_time(v); ++ tsc = gtime_to_gtsc(v->domain, tsc); + } + else + { + rdtscll(tsc); +- tsc = hvm_h2g_scale_tsc(v, tsc); + } + + v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - tsc; +@@ -203,12 +177,12 @@ u64 hvm_get_guest_tsc(struct vcpu *v) + if ( v->domain->arch.vtsc ) + { + tsc = hvm_get_guest_time(v); ++ tsc = gtime_to_gtsc(v->domain, tsc); + v->domain->arch.vtsc_kerncount++; + } + else + { + rdtscll(tsc); +- tsc = hvm_h2g_scale_tsc(v, tsc); + } + + return tsc + v->arch.hvm_vcpu.cache_tsc_offset; +Index: xen-4.0.0-testing/xen/arch/x86/hvm/save.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/hvm/save.c ++++ xen-4.0.0-testing/xen/arch/x86/hvm/save.c +@@ -33,7 +33,7 @@ void arch_hvm_save(struct domain *d, str + hdr->cpuid = eax; + + /* Save guest's preferred TSC. */ +- hdr->gtsc_khz = d->arch.hvm_domain.gtsc_khz; ++ hdr->gtsc_khz = d->arch.tsc_khz; + } + + int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) +@@ -62,8 +62,8 @@ int arch_hvm_load(struct domain *d, stru + + /* Restore guest's preferred TSC frequency. */ + if ( hdr->gtsc_khz ) +- d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz; +- if ( hvm_gtsc_need_scale(d) ) ++ d->arch.tsc_khz = hdr->gtsc_khz; ++ if ( d->arch.vtsc ) + { + hvm_set_rdtsc_exiting(d, 1); + gdprintk(XENLOG_WARNING, "Domain %d expects freq %uMHz " +Index: xen-4.0.0-testing/xen/arch/x86/hvm/vpt.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/hvm/vpt.c ++++ xen-4.0.0-testing/xen/arch/x86/hvm/vpt.c +@@ -32,9 +32,6 @@ void hvm_init_guest_time(struct domain * + spin_lock_init(&pl->pl_time_lock); + pl->stime_offset = -(u64)get_s_time(); + pl->last_guest_time = 0; +- +- d->arch.hvm_domain.gtsc_khz = cpu_khz; +- d->arch.hvm_domain.tsc_scaled = 0; + } + + u64 hvm_get_guest_time(struct vcpu *v) +Index: xen-4.0.0-testing/xen/arch/x86/time.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/time.c ++++ xen-4.0.0-testing/xen/arch/x86/time.c +@@ -850,8 +850,13 @@ static void __update_vcpu_system_time(st + + if ( d->arch.vtsc ) + { +- u64 delta = max_t(s64, t->stime_local_stamp - d->arch.vtsc_offset, 0); +- tsc_stamp = scale_delta(delta, &d->arch.ns_to_vtsc); ++ u64 stime = t->stime_local_stamp; ++ if ( is_hvm_domain(d) ) ++ { ++ struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; ++ stime += pl->stime_offset + v->arch.hvm_vcpu.stime_offset; ++ } ++ tsc_stamp = gtime_to_gtsc(d, stime); + } + else + { +@@ -874,6 +879,8 @@ static void __update_vcpu_system_time(st + _u.tsc_to_system_mul = t->tsc_scale.mul_frac; + _u.tsc_shift = (s8)t->tsc_scale.shift; + } ++ if ( is_hvm_domain(d) ) ++ _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset; + + /* Don't bother unless timestamp record has changed or we are forced. */ + _u.version = u->version; /* make versions match for memcmp test */ +@@ -1640,11 +1647,17 @@ struct tm wallclock_time(void) + * PV SoftTSC Emulation. + */ + ++u64 gtime_to_gtsc(struct domain *d, u64 tsc) ++{ ++ if ( !is_hvm_domain(d) ) ++ tsc = max_t(s64, tsc - d->arch.vtsc_offset, 0); ++ return scale_delta(tsc, &d->arch.ns_to_vtsc); ++} ++ + void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp) + { + s_time_t now = get_s_time(); + struct domain *d = v->domain; +- u64 delta; + + spin_lock(&d->arch.vtsc_lock); + +@@ -1660,8 +1673,7 @@ void pv_soft_rdtsc(struct vcpu *v, struc + + spin_unlock(&d->arch.vtsc_lock); + +- delta = max_t(s64, now - d->arch.vtsc_offset, 0); +- now = scale_delta(delta, &d->arch.ns_to_vtsc); ++ now = gtime_to_gtsc(d, now); + + regs->eax = (uint32_t)now; + regs->edx = (uint32_t)(now >> 32); +@@ -1802,8 +1814,10 @@ void tsc_set_info(struct domain *d, + d->arch.vtsc_offset = get_s_time() - elapsed_nsec; + d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz; + set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 ); +- /* use native TSC if initial host has safe TSC and not migrated yet */ +- if ( host_tsc_is_safe() && incarnation == 0 ) ++ /* use native TSC if initial host has safe TSC, has not migrated ++ * yet and tsc_khz == cpu_khz */ ++ if ( host_tsc_is_safe() && incarnation == 0 && ++ d->arch.tsc_khz == cpu_khz ) + d->arch.vtsc = 0; + else + d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns); +@@ -1828,7 +1842,7 @@ void tsc_set_info(struct domain *d, + } + d->arch.incarnation = incarnation + 1; + if ( is_hvm_domain(d) ) +- hvm_set_rdtsc_exiting(d, d->arch.vtsc || hvm_gtsc_need_scale(d)); ++ hvm_set_rdtsc_exiting(d, d->arch.vtsc); + } + + /* vtsc may incur measurable performance degradation, diagnose with this */ +Index: xen-4.0.0-testing/xen/common/kernel.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/common/kernel.c ++++ xen-4.0.0-testing/xen/common/kernel.c +@@ -243,6 +243,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL + fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) | + (1U << XENFEAT_highmem_assist) | + (1U << XENFEAT_gnttab_map_avail_bits); ++ else ++ fi.submap |= (1U << XENFEAT_hvm_safe_pvclock); + #endif + break; + default: +Index: xen-4.0.0-testing/xen/include/asm-x86/hvm/domain.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/asm-x86/hvm/domain.h ++++ xen-4.0.0-testing/xen/include/asm-x86/hvm/domain.h +@@ -45,8 +45,6 @@ struct hvm_domain { + struct hvm_ioreq_page ioreq; + struct hvm_ioreq_page buf_ioreq; + +- uint32_t gtsc_khz; /* kHz */ +- bool_t tsc_scaled; + struct pl_time pl_time; + + struct hvm_io_handler io_handler; +Index: xen-4.0.0-testing/xen/include/asm-x86/hvm/hvm.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/asm-x86/hvm/hvm.h ++++ xen-4.0.0-testing/xen/include/asm-x86/hvm/hvm.h +@@ -290,7 +290,6 @@ int hvm_event_needs_reinjection(uint8_t + uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2); + + void hvm_set_rdtsc_exiting(struct domain *d, bool_t enable); +-int hvm_gtsc_need_scale(struct domain *d); + + static inline int + hvm_cpu_prepare(unsigned int cpu) +Index: xen-4.0.0-testing/xen/include/asm-x86/time.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/asm-x86/time.h ++++ xen-4.0.0-testing/xen/include/asm-x86/time.h +@@ -60,6 +60,7 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic + uint64_t ns_to_acpi_pm_tick(uint64_t ns); + + void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp); ++u64 gtime_to_gtsc(struct domain *d, u64 tsc); + + void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec, + uint32_t gtsc_khz, uint32_t incarnation); +Index: xen-4.0.0-testing/xen/include/public/features.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/public/features.h ++++ xen-4.0.0-testing/xen/include/public/features.h +@@ -68,6 +68,9 @@ + */ + #define XENFEAT_gnttab_map_avail_bits 7 + ++/* x86: pvclock algorithm is safe to use on HVM */ ++#define XENFEAT_hvm_safe_pvclock 9 ++ + #define XENFEAT_NR_SUBMAPS 1 + + #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/21627-cpuidle-wrap.patch b/21627-cpuidle-wrap.patch new file mode 100644 index 0000000..18da1d3 --- /dev/null +++ b/21627-cpuidle-wrap.patch @@ -0,0 +1,23 @@ +# HG changeset patch +# User Keir Fraser +# Date 1276761018 -3600 +# Node ID 7a00c0bd4fc131fb4de5df9f3fdc4e48a29dd5f9 +# Parent dab8676e97ce7a95c0777e58eee4b1b03bfc5322 +cpuidle: fix wrapped ticks calculation for pm timer. + +Signed-off-by: Wei Gang + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -127,9 +127,9 @@ static inline u32 ticks_elapsed(u32 t1, + if ( t2 >= t1 ) + return (t2 - t1); + else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) ) +- return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); ++ return (((0x00FFFFFF - t1) + t2 + 1) & 0x00FFFFFF); + else +- return ((0xFFFFFFFF - t1) + t2); ++ return ((0xFFFFFFFF - t1) + t2 +1); + } + + static void acpi_safe_halt(void) diff --git a/21643-vmx-vpmu-pmc-offset.patch b/21643-vmx-vpmu-pmc-offset.patch new file mode 100644 index 0000000..be0eafb --- /dev/null +++ b/21643-vmx-vpmu-pmc-offset.patch @@ -0,0 +1,25 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277110750 -3600 +# Node ID 31708477f0a92be70a940d1c8ff1aa721051bba8 +# Parent 46a4c936b77e483971d2b3eb0b544c61700f824a +vmx: Fix bug in VMX VPMU fixed function PMC offset + +This is a minor fix to the calculation of bit-width of fixed function +perfmon counters in Intel processors. Bits 5-12 of edx register +should be calculated as (edx & 0x1fe0) >>5 instead of using 0x1f70. + +From: "John, Jaiber J" +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c ++++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c +@@ -82,7 +82,7 @@ static int core2_get_bitwidth_fix_count( + { + u32 eax, ebx, ecx, edx; + cpuid(0xa, &eax, &ebx, &ecx, &edx); +- return ((edx & 0x1f70) >> 5); ++ return ((edx & 0x1fe0) >> 5); + } + + static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) diff --git a/21678-xend-mac-fix.patch b/21678-xend-mac-fix.patch new file mode 100644 index 0000000..5ff0672 --- /dev/null +++ b/21678-xend-mac-fix.patch @@ -0,0 +1,39 @@ +# HG changeset patch +# User Ian Jackson +# Date 1277475191 -3600 +# Node ID e307aa11ed27ea91cf175461b3a715fe3f7253bc +# Parent e7b55cc5533aed48a47cf70e20aa9fb991bf2de4 +xend: Fix up check "mac" address sooner change + +In changeset 21653, + dev_type = sxp.name(dev_config) +should not have been moved, otherwise, the checking "mac" +paragraph is of no use. + +(The original patch as submitted was correct but I had to make the +change manually as it had been mangled.) + +Signed-off-by: Ian Jackson +Signed-off-by Chunyan Liu + +Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -847,6 +847,8 @@ class XendDomainInfo: + @type dev_config: SXP object (parsed config) + """ + log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config)) ++ dev_type = sxp.name(dev_config) ++ + if dev_type == 'vif': + for x in dev_config: + if x != 'vif' and x[0] == 'mac': +@@ -854,7 +856,6 @@ class XendDomainInfo: + log.error("Virtual network interface creation error - invalid MAC Address entered: %s", x[1]) + raise VmError("Cannot create a new virtual network interface - MAC address is not valid!"); + +- dev_type = sxp.name(dev_config) + dev_uuid = self.info.device_add(dev_type, cfg_sxp = dev_config) + dev_config_dict = self.info['devices'][dev_uuid][1] + log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config_dict)) diff --git a/21682-trace-buffer-range.patch b/21682-trace-buffer-range.patch new file mode 100644 index 0000000..7ba7645 --- /dev/null +++ b/21682-trace-buffer-range.patch @@ -0,0 +1,26 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277738876 -3600 +# Node ID 7e46fdbe8a1187cee2ab609256300d7967f37f06 +# Parent bf64e1081333696c68c9430cbc32c8bd6ee18796 +xentrace: restrict trace buffer MFNs + +Since they're being passed to Dom0 using an array of uint32_t, they +must be representable as 32-bit quantities, and hence the buffer +allocation must specify an upper address boundary. + +Signed-off-by: Jan Beulich +Acked-by: George Dunlap + +--- a/xen/common/trace.c ++++ b/xen/common/trace.c +@@ -152,7 +152,8 @@ static int alloc_trace_bufs(void) + char *rawbuf; + struct t_buf *buf; + +- if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL ) ++ if ( (rawbuf = alloc_xenheap_pages( ++ order, MEMF_bits(32 + PAGE_SHIFT))) == NULL ) + { + printk("Xen trace buffers: memory allocation failed\n"); + opt_tbuf_size = 0; diff --git a/21683-vtd-kill-timer-conditional.patch b/21683-vtd-kill-timer-conditional.patch new file mode 100644 index 0000000..94b6b9e --- /dev/null +++ b/21683-vtd-kill-timer-conditional.patch @@ -0,0 +1,43 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277739919 -3600 +# Node ID 059a12afce52a213db56bd8e9442d9eeadfdd34c +# Parent 7e46fdbe8a1187cee2ab609256300d7967f37f06 +vtd: Only kill_timer() an init_timer()'ed timer. + +Signed-off-by: Keir Fraser + +--- a/xen/drivers/passthrough/io.c ++++ b/xen/drivers/passthrough/io.c +@@ -27,7 +27,7 @@ + + static void hvm_dirq_assist(unsigned long _d); + +-static int pt_irq_need_timer(uint32_t flags) ++bool_t pt_irq_need_timer(uint32_t flags) + { + return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE)); + } +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -257,7 +257,9 @@ static void pci_clean_dpci_irqs(struct d + i = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, i + 1) ) + { + pirq_guest_unbind(d, i); +- kill_timer(&hvm_irq_dpci->hvm_timer[domain_pirq_to_irq(d, i)]); ++ ++ if ( pt_irq_need_timer(hvm_irq_dpci->mirq[i].flags) ) ++ kill_timer(&hvm_irq_dpci->hvm_timer[domain_pirq_to_irq(d, i)]); + + list_for_each_safe ( digl_list, tmp, + &hvm_irq_dpci->mirq[i].digl_list ) +--- a/xen/include/xen/iommu.h ++++ b/xen/include/xen/iommu.h +@@ -92,6 +92,7 @@ void hvm_dpci_isairq_eoi(struct domain * + struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain); + int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci); + void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci); ++bool_t pt_irq_need_timer(uint32_t flags); + + #define PT_IRQ_TIME_OUT MILLISECS(8) + #define VTDPREFIX "[VT-D]" diff --git a/21693-memevent-64bit-only.patch b/21693-memevent-64bit-only.patch new file mode 100644 index 0000000..5aca3aa --- /dev/null +++ b/21693-memevent-64bit-only.patch @@ -0,0 +1,286 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277831801 -3600 +# Node ID 6b5a5bfaf3577a050c6779b0b62245560fda53f6 +# Parent 3ea84fd20b263a8e443e3bb16d5495cd3dbd8033 +x86: Only build memory-event features on 64-bit Xen + +32-bit Xen doesn't have enough p2m types to support them. + +Signed-off-by: Tim Deegan + +# HG changeset patch +# User Keir Fraser +# Date 1278579370 -3600 +# Node ID a7a680442b738928eb963b31e22a3e428ac111a0 +# Parent 92ac9536ac5abc17f414f024f3df92658cf2ee96 +xend: Continue domain building even if memshr extensions are not +present in the hypervisor. + +Signed-off-by: Keir Fraser + +Index: xen-4.0.0-testing/tools/python/xen/xend/image.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/image.py ++++ xen-4.0.0-testing/tools/python/xen/xend/image.py +@@ -830,8 +830,10 @@ class HVMImageHandler(ImageHandler): + self.acpi = int(vmConfig['platform'].get('acpi', 0)) + self.guest_os_type = vmConfig['platform'].get('guest_os_type') + self.memory_sharing = int(vmConfig['memory_sharing']) +- xc.dom_set_memshr(self.vm.getDomid(), self.memory_sharing) +- ++ try: ++ xc.dom_set_memshr(self.vm.getDomid(), self.memory_sharing) ++ except: ++ pass + + # Return a list of cmd line args to the device models based on the + # xm config file +Index: xen-4.0.0-testing/xen/arch/x86/domctl.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/domctl.c ++++ xen-4.0.0-testing/xen/arch/x86/domctl.c +@@ -1420,6 +1420,7 @@ long arch_do_domctl( + break; + #endif /* XEN_GDBSX_CONFIG */ + ++#ifdef __x86_64__ + case XEN_DOMCTL_mem_event_op: + { + struct domain *d; +@@ -1450,6 +1451,7 @@ long arch_do_domctl( + } + } + break; ++#endif /* __x86_64__ */ + + default: + ret = -ENOSYS; +Index: xen-4.0.0-testing/xen/arch/x86/hvm/hvm.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-4.0.0-testing/xen/arch/x86/hvm/hvm.c +@@ -922,6 +922,7 @@ bool_t hvm_hap_nested_page_fault(unsigne + return 1; + } + ++#ifdef __x86_64__ + /* Check if the page has been paged out */ + if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) ) + p2m_mem_paging_populate(current->domain, gfn); +@@ -932,6 +933,7 @@ bool_t hvm_hap_nested_page_fault(unsigne + mem_sharing_unshare_page(current->domain, gfn, 0); + return 1; + } ++#endif + + /* Spurious fault? PoD and log-dirty also take this path. */ + if ( p2m_is_ram(p2mt) ) +Index: xen-4.0.0-testing/xen/arch/x86/mm.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/mm.c ++++ xen-4.0.0-testing/xen/arch/x86/mm.c +@@ -3179,20 +3179,23 @@ int do_mmu_update( + rc = -ENOENT; + break; + } ++#ifdef __x86_64__ + /* XXX: Ugly: pull all the checks into a separate function. + * Don't want to do it now, not to interfere with mem_paging + * patches */ + else if ( p2m_ram_shared == l1e_p2mt ) + { + /* Unshare the page for RW foreign mappings */ +- if(l1e_get_flags(l1e) & _PAGE_RW) ++ if ( l1e_get_flags(l1e) & _PAGE_RW ) + { + rc = mem_sharing_unshare_page(pg_owner, + l1e_get_pfn(l1e), + 0); +- if(rc) break; ++ if ( rc ) ++ break; + } + } ++#endif + + okay = mod_l1_entry(va, l1e, mfn, + cmd == MMU_PT_UPDATE_PRESERVE_AD, v, +@@ -4537,8 +4540,10 @@ long arch_memory_op(int op, XEN_GUEST_HA + return rc; + } + ++#ifdef __x86_64__ + case XENMEM_get_sharing_freed_pages: + return mem_sharing_get_nr_saved_mfns(); ++#endif + + default: + return subarch_memory_op(op, arg); +Index: xen-4.0.0-testing/xen/arch/x86/mm/Makefile +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/mm/Makefile ++++ xen-4.0.0-testing/xen/arch/x86/mm/Makefile +@@ -6,9 +6,9 @@ obj-y += p2m.o + obj-y += guest_walk_2.o + obj-y += guest_walk_3.o + obj-$(x86_64) += guest_walk_4.o +-obj-y += mem_event.o +-obj-y += mem_paging.o +-obj-y += mem_sharing.o ++obj-$(x86_64) += mem_event.o ++obj-$(x86_64) += mem_paging.o ++obj-$(x86_64) += mem_sharing.o + + guest_walk_%.o: guest_walk.c Makefile + $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@ +Index: xen-4.0.0-testing/xen/arch/x86/mm/p2m.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.0.0-testing/xen/arch/x86/mm/p2m.c +@@ -1708,17 +1708,23 @@ void p2m_teardown(struct domain *d) + { + struct page_info *pg; + struct p2m_domain *p2m = d->arch.p2m; ++#ifdef __x86_64__ + unsigned long gfn; + p2m_type_t t; + mfn_t mfn; ++#endif + + p2m_lock(p2m); +- for(gfn=0; gfn < p2m->max_mapped_pfn; gfn++) ++ ++#ifdef __x86_64__ ++ for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ ) + { + mfn = p2m->get_entry(d, gfn, &t, p2m_query); +- if(mfn_valid(mfn) && (t == p2m_ram_shared)) ++ if ( mfn_valid(mfn) && (t == p2m_ram_shared) ) + BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN)); + } ++#endif ++ + d->arch.phys_table = pagetable_null(); + + while ( (pg = page_list_remove_head(&p2m->pages)) ) +@@ -2410,6 +2416,7 @@ clear_mmio_p2m_entry(struct domain *d, u + return rc; + } + ++#ifdef __x86_64__ + int + set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) + { +@@ -2592,7 +2599,7 @@ void p2m_mem_paging_resume(struct domain + /* Unpause any domains that were paused because the ring was full */ + mem_event_unpause_vcpus(d); + } +- ++#endif /* __x86_64__ */ + + /* + * Local variables: +Index: xen-4.0.0-testing/xen/include/asm-x86/mem_sharing.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/asm-x86/mem_sharing.h ++++ xen-4.0.0-testing/xen/include/asm-x86/mem_sharing.h +@@ -22,6 +22,8 @@ + #ifndef __MEM_SHARING_H__ + #define __MEM_SHARING_H__ + ++#ifdef __x86_64__ ++ + #define sharing_supported(_d) \ + (is_hvm_domain(_d) && (_d)->arch.hvm_domain.hap_enabled) + +@@ -43,4 +45,10 @@ int mem_sharing_domctl(struct domain *d, + xen_domctl_mem_sharing_op_t *mec); + void mem_sharing_init(void); + ++#else ++ ++#define mem_sharing_init() do { } while (0) ++ ++#endif /* __x86_64__ */ ++ + #endif /* __MEM_SHARING_H__ */ +Index: xen-4.0.0-testing/xen/include/asm-x86/p2m.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/asm-x86/p2m.h ++++ xen-4.0.0-testing/xen/include/asm-x86/p2m.h +@@ -77,11 +77,12 @@ typedef enum { + p2m_grant_map_rw = 7, /* Read/write grant mapping */ + p2m_grant_map_ro = 8, /* Read-only grant mapping */ + ++ /* Likewise, although these are defined in all builds, they can only ++ * be used in 64-bit builds */ + p2m_ram_paging_out = 9, /* Memory that is being paged out */ + p2m_ram_paged = 10, /* Memory that has been paged out */ + p2m_ram_paging_in = 11, /* Memory that is being paged in */ + p2m_ram_paging_in_start = 12, /* Memory that is being paged in */ +- + p2m_ram_shared = 13, /* Shared or sharable memory */ + } p2m_type_t; + +@@ -154,6 +155,7 @@ typedef enum { + #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES) + #define p2m_is_shared(_t) (p2m_to_mask(_t) & P2M_SHARED_TYPES) + ++ + /* Populate-on-demand */ + #define POPULATE_ON_DEMAND_MFN (1<<9) + #define POD_PAGE_ORDER 9 +@@ -314,20 +316,21 @@ static inline mfn_t gfn_to_mfn_unshare(s + int must_succeed) + { + mfn_t mfn; +- int ret; + + mfn = gfn_to_mfn(d, gfn, p2mt); +- if(p2m_is_shared(*p2mt)) ++#ifdef __x86_64__ ++ if ( p2m_is_shared(*p2mt) ) + { +- ret = mem_sharing_unshare_page(d, gfn, +- must_succeed ? MEM_SHARING_MUST_SUCCEED : 0); +- if(ret < 0) ++ if ( mem_sharing_unshare_page(d, gfn, ++ must_succeed ++ ? MEM_SHARING_MUST_SUCCEED : 0) ) + { + BUG_ON(must_succeed); + return mfn; + } + mfn = gfn_to_mfn(d, gfn, p2mt); + } ++#endif + + return mfn; + } +@@ -429,10 +432,11 @@ p2m_type_t p2m_change_type(struct domain + /* Set mmio addresses in the p2m table (for pass-through) */ + int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); + int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn); +-/* Modify p2m table for shared gfn */ +-int +-set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); + ++ ++#ifdef __x86_64__ ++/* Modify p2m table for shared gfn */ ++int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); + /* Check if a nominated gfn is valid to be paged out */ + int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn); + /* Evict a frame */ +@@ -443,6 +447,10 @@ void p2m_mem_paging_populate(struct doma + int p2m_mem_paging_prep(struct domain *d, unsigned long gfn); + /* Resume normal operation (in case a domain was paused) */ + void p2m_mem_paging_resume(struct domain *d); ++#else ++static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) ++{ } ++#endif + + #endif /* _XEN_P2M_H */ + diff --git a/21695-trace-t_info-readonly.patch b/21695-trace-t_info-readonly.patch new file mode 100644 index 0000000..71957ce --- /dev/null +++ b/21695-trace-t_info-readonly.patch @@ -0,0 +1,90 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277831922 -3600 +# Node ID c5f3fe17374cead91fdaa94f60cf7b3115eaa091 +# Parent 2a3a5979e3f16d77f5b526050c45acba186482b0 +trace: share t_info pages only in read-only mode + +There's no need to share writably the t_info pages (Dom0 only wants +[and needs] to read it) + +Signed-off-by: Jan Beulich +Acked-by: George Dunlap + +--- a/tools/xenmon/xenbaked.c ++++ b/tools/xenmon/xenbaked.c +@@ -84,7 +84,7 @@ typedef struct settings_st { + } settings_t; + + struct t_struct { +- struct t_info *t_info; /* Structure with information about individual buffers */ ++ const struct t_info *t_info; /* Structure with information about individual buffers */ + struct t_buf **meta; /* Pointers to trace buffer metadata */ + unsigned char **data; /* Pointers to trace buffer data areas */ + }; +@@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign + } + + /* Map t_info metadata structure */ +- tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, +- tinfo_size, PROT_READ | PROT_WRITE, +- tbufs_mfn); ++ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, ++ PROT_READ, tbufs_mfn); + + if ( tbufs.t_info == 0 ) + { +@@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign + for(i=0; imfn_offset[i]; ++ const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info ++ + tbufs.t_info->mfn_offset[i]; + int j; + xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; + +--- a/tools/xentrace/xentrace.c ++++ b/tools/xentrace/xentrace.c +@@ -62,7 +62,7 @@ typedef struct settings_st { + } settings_t; + + struct t_struct { +- struct t_info *t_info; /* Structure with information about individual buffers */ ++ const struct t_info *t_info; /* Structure with information about individual buffers */ + struct t_buf **meta; /* Pointers to trace buffer metadata */ + unsigned char **data; /* Pointers to trace buffer data areas */ + }; +@@ -459,9 +459,8 @@ static struct t_struct *map_tbufs(unsign + int i; + + /* Map t_info metadata structure */ +- tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, +- tinfo_size, PROT_READ | PROT_WRITE, +- tbufs_mfn); ++ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, ++ PROT_READ, tbufs_mfn); + + if ( tbufs.t_info == 0 ) + { +@@ -487,7 +486,8 @@ static struct t_struct *map_tbufs(unsign + for(i=0; imfn_offset[i]; ++ const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info ++ + tbufs.t_info->mfn_offset[i]; + int j; + xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; + +--- a/xen/common/trace.c ++++ b/xen/common/trace.c +@@ -322,7 +322,7 @@ void __init init_trace_bufs(void) + + for(i=0; i +# Date 1277917869 -3600 +# Node ID 81d6471ff1235fde2c30428b920cb6e00ba546d6 +# Parent a9caa0f2d693a1d0d008b4295e49da3ea1d70334 +x86: fix an off-by-one pirq range check + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -1019,7 +1019,7 @@ static void __pirq_guest_eoi(struct doma + + int pirq_guest_eoi(struct domain *d, int irq) + { +- if ( (irq < 0) || (irq > d->nr_pirqs) ) ++ if ( (irq < 0) || (irq >= d->nr_pirqs) ) + return -EINVAL; + + __pirq_guest_eoi(d, irq); diff --git a/21699-p2m-query-for-type-change.patch b/21699-p2m-query-for-type-change.patch new file mode 100644 index 0000000..b3152cf --- /dev/null +++ b/21699-p2m-query-for-type-change.patch @@ -0,0 +1,24 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277917902 -3600 +# Node ID 7cda3ad44c6d4e799e65b2ffe21e609f50cdb94b +# Parent 81d6471ff1235fde2c30428b920cb6e00ba546d6 +Use gfn_to_mfn_query() rather then gfn_to_mfn() when changing P2M types + +Use gfn_to_mfn_query() rather then gfn_to_mfn() when changing +P2M types since we do not really want to force a PoD allocation +as a side effect. + +Signed-off-by: Paul Durrant + +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -2351,7 +2351,7 @@ p2m_type_t p2m_change_type(struct domain + + p2m_lock(d->arch.p2m); + +- mfn = gfn_to_mfn(d, gfn, &pt); ++ mfn = gfn_to_mfn_query(d, gfn, &pt); + if ( pt == ot ) + set_p2m_entry(d, gfn, mfn, 0, nt); + diff --git a/21700-32on64-vm86-gpf.patch b/21700-32on64-vm86-gpf.patch new file mode 100644 index 0000000..241c450 --- /dev/null +++ b/21700-32on64-vm86-gpf.patch @@ -0,0 +1,25 @@ +# HG changeset patch +# User Keir Fraser +# Date 1277917963 -3600 +# Node ID fae04060a4f4e364c5012692b97ae1eeec3a326e +# Parent 7cda3ad44c6d4e799e65b2ffe21e609f50cdb94b +Fix #GPF injection into compat guests in vm86 code + +not to let the guest disable interrupts in the real EFLAGS. + +Signed-off-by: Ian Campbell + +--- a/xen/arch/x86/x86_64/compat/traps.c ++++ b/xen/arch/x86/x86_64/compat/traps.c +@@ -127,9 +127,8 @@ unsigned int compat_iret(void) + ti = &v->arch.guest_context.trap_ctxt[13]; + if ( TI_GET_IF(ti) ) + eflags &= ~X86_EFLAGS_IF; +- regs->_eflags = eflags & ~(X86_EFLAGS_VM|X86_EFLAGS_RF| +- X86_EFLAGS_NT|X86_EFLAGS_TF); +- ++ regs->_eflags &= ~(X86_EFLAGS_VM|X86_EFLAGS_RF| ++ X86_EFLAGS_NT|X86_EFLAGS_TF); + if ( unlikely(__put_user(0, (u32 *)regs->rsp)) ) + goto exit_and_crash; + regs->_eip = ti->address; diff --git a/21705-trace-printk.patch b/21705-trace-printk.patch new file mode 100644 index 0000000..515640e --- /dev/null +++ b/21705-trace-printk.patch @@ -0,0 +1,51 @@ +# HG changeset patch +# User Keir Fraser +# Date 1278093217 -3600 +# Node ID 19f4d637a52b8723ac1fbcf666c146951bee8e57 +# Parent 1390e2ab45c7b63d79ba9496d609cf59af4b44ee +trace: adjust printk()s + +They should be lower level or rate limited. + +Signed-off-by: Jan Beulich +Acked-by: George Dunlap + +--- a/xen/common/trace.c ++++ b/xen/common/trace.c +@@ -137,7 +137,7 @@ static int alloc_trace_bufs(void) + } + + t_info->tbuf_size = opt_tbuf_size; +- printk("tbuf_size %d\n", t_info->tbuf_size); ++ printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size); + + nr_pages = opt_tbuf_size; + order = get_order_from_pages(nr_pages); +@@ -194,7 +194,7 @@ static int alloc_trace_bufs(void) + /* Write list first, then write per-cpu offset. */ + wmb(); + t_info->mfn_offset[cpu]=offset; +- printk("p%d mfn %"PRIx32" offset %d\n", ++ printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", + cpu, mfn, offset); + offset+=i; + } +@@ -489,12 +489,13 @@ static inline int __insert_record(struct + /* Double-check once more that we have enough space. + * Don't bugcheck here, in case the userland tool is doing + * something stupid. */ +- if ( calc_bytes_avail(buf) < rec_size ) ++ next = calc_bytes_avail(buf); ++ if ( next < rec_size ) + { +- printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n", +- __func__, +- calc_bytes_avail(buf), +- data_size, buf->prod, buf->cons, data_size, rec_size); ++ if ( printk_ratelimit() ) ++ printk(XENLOG_WARNING ++ "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", ++ __func__, next, data_size, buf->prod, buf->cons, rec_size); + return 0; + } + rmb(); diff --git a/21706-trace-security.patch b/21706-trace-security.patch new file mode 100644 index 0000000..588ed0c --- /dev/null +++ b/21706-trace-security.patch @@ -0,0 +1,396 @@ +# HG changeset patch +# User Keir Fraser +# Date 1278093394 -3600 +# Node ID ae68758f8862bc43ab6bbe4ad3a8594c28b9bc39 +# Parent 19f4d637a52b8723ac1fbcf666c146951bee8e57 +trace: fix security issues + +After getting a report of 3.2.3's xenmon crashing Xen (as it turned +out this was because c/s 17000 was backported to that tree without +also applying c/s 17515), I figured that the hypervisor shouldn't rely +on any specific state of the actual trace buffer (as it is shared +writable with Dom0) + +[GWD: Volatile quantifiers have been taken out and moved to another +patch] + +To make clear what purpose specific variables have and/or where they +got loaded from, the patch also changes the type of some of them to be +explicitly u32/s32, and removes pointless assertions (like checking an +unsigned variable to be >= 0). + +I also took the prototype adjustment of __trace_var() as an +opportunity to simplify the TRACE_xD() macros. Similar simplification +could be done on the (quite numerous) direct callers of the function. + +Signed-off-by: Jan Beulich +Signed-off-by: George Dunlap + +# HG changeset patch +# User Keir Fraser +# Date 1278314658 -3600 +# Node ID 9074d50d09358cd8349d54c7ab2e2ead81fa1570 +# Parent f483b5ce7be235494156fee164decd73e0472cb7 +trace: insert compiler memory barriers + +This is to ensure fields shared writably with Dom0 get read only once +for any consistency checking followed by actual calculations. + +I realized there was another multiple-read issue, a fix for which is +also included (which at once simplifies __insert_record()). + +Signed-off-by: Jan Beulich + +--- a/xen/common/trace.c ++++ b/xen/common/trace.c +@@ -52,12 +52,12 @@ static struct t_info *t_info; + static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); + static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); + static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); +-static int data_size; ++static u32 data_size; + static u32 t_info_first_offset __read_mostly; + + /* High water mark for trace buffers; */ + /* Send virtual interrupt when buffer level reaches this point */ +-static int t_buf_highwater; ++static u32 t_buf_highwater; + + /* Number of records lost due to per-CPU trace buffer being full. */ + static DEFINE_PER_CPU(unsigned long, lost_records); +@@ -162,7 +162,7 @@ static int alloc_trace_bufs(void) + + spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); + +- buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; ++ per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf; + buf->cons = buf->prod = 0; + per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); + +@@ -213,6 +213,7 @@ out_dealloc: + spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); + if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) + { ++ per_cpu(t_bufs, cpu) = NULL; + ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); + free_xenheap_pages(rawbuf, order); + } +@@ -418,19 +419,39 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc + return rc; + } + +-static inline int calc_rec_size(int cycles, int extra) ++static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) + { +- int rec_size; +- rec_size = 4; ++ unsigned int rec_size = 4; ++ + if ( cycles ) + rec_size += 8; + rec_size += extra; + return rec_size; + } + +-static inline int calc_unconsumed_bytes(struct t_buf *buf) ++static inline bool_t bogus(u32 prod, u32 cons) + { +- int x = buf->prod - buf->cons; ++ if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || ++ unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) ++ { ++ tb_init_done = 0; ++ printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n", ++ smp_processor_id(), prod, cons); ++ return 1; ++ } ++ return 0; ++} ++ ++static inline u32 calc_unconsumed_bytes(const struct t_buf *buf) ++{ ++ u32 prod = buf->prod, cons = buf->cons; ++ s32 x; ++ ++ barrier(); /* must read buf->prod and buf->cons only once */ ++ if ( bogus(prod, cons) ) ++ return data_size; ++ ++ x = prod - cons; + if ( x < 0 ) + x += 2*data_size; + +@@ -440,9 +461,16 @@ static inline int calc_unconsumed_bytes( + return x; + } + +-static inline int calc_bytes_to_wrap(struct t_buf *buf) ++static inline u32 calc_bytes_to_wrap(const struct t_buf *buf) + { +- int x = data_size - buf->prod; ++ u32 prod = buf->prod, cons = buf->cons; ++ s32 x; ++ ++ barrier(); /* must read buf->prod and buf->cons only once */ ++ if ( bogus(prod, cons) ) ++ return 0; ++ ++ x = data_size - prod; + if ( x <= 0 ) + x += data_size; + +@@ -452,55 +480,60 @@ static inline int calc_bytes_to_wrap(str + return x; + } + +-static inline int calc_bytes_avail(struct t_buf *buf) ++static inline u32 calc_bytes_avail(const struct t_buf *buf) + { + return data_size - calc_unconsumed_bytes(buf); + } + +-static inline struct t_rec * +-next_record(struct t_buf *buf) ++static inline struct t_rec *next_record(const struct t_buf *buf, ++ uint32_t *next) + { +- int x = buf->prod; ++ u32 x = buf->prod, cons = buf->cons; ++ ++ barrier(); /* must read buf->prod and buf->cons only once */ ++ *next = x; ++ if ( !tb_init_done || bogus(x, cons) ) ++ return NULL; ++ + if ( x >= data_size ) + x -= data_size; + +- ASSERT(x >= 0); + ASSERT(x < data_size); + + return (struct t_rec *)&this_cpu(t_data)[x]; + } + +-static inline int __insert_record(struct t_buf *buf, +- unsigned long event, +- int extra, +- int cycles, +- int rec_size, +- unsigned char *extra_data) ++static inline void __insert_record(struct t_buf *buf, ++ unsigned long event, ++ unsigned int extra, ++ bool_t cycles, ++ unsigned int rec_size, ++ const void *extra_data) + { + struct t_rec *rec; + unsigned char *dst; +- unsigned long extra_word = extra/sizeof(u32); +- int local_rec_size = calc_rec_size(cycles, extra); ++ unsigned int extra_word = extra / sizeof(u32); ++ unsigned int local_rec_size = calc_rec_size(cycles, extra); + uint32_t next; + + BUG_ON(local_rec_size != rec_size); + BUG_ON(extra & 3); + ++ rec = next_record(buf, &next); ++ if ( !rec ) ++ return; + /* Double-check once more that we have enough space. + * Don't bugcheck here, in case the userland tool is doing + * something stupid. */ +- next = calc_bytes_avail(buf); +- if ( next < rec_size ) ++ if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) + { + if ( printk_ratelimit() ) + printk(XENLOG_WARNING +- "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", +- __func__, next, data_size, buf->prod, buf->cons, rec_size); +- return 0; ++ "%s: size=%08x prod=%08x cons=%08x rec=%u\n", ++ __func__, data_size, next, buf->cons, rec_size); ++ return; + } +- rmb(); + +- rec = next_record(buf); + rec->event = event; + rec->extra_u32 = extra_word; + dst = (unsigned char *)rec->u.nocycles.extra_u32; +@@ -517,21 +550,19 @@ static inline int __insert_record(struct + + wmb(); + +- next = buf->prod + rec_size; ++ next += rec_size; + if ( next >= 2*data_size ) + next -= 2*data_size; +- ASSERT(next >= 0); + ASSERT(next < 2*data_size); + buf->prod = next; +- +- return rec_size; + } + +-static inline int insert_wrap_record(struct t_buf *buf, int size) ++static inline void insert_wrap_record(struct t_buf *buf, ++ unsigned int size) + { +- int space_left = calc_bytes_to_wrap(buf); +- unsigned long extra_space = space_left - sizeof(u32); +- int cycles = 0; ++ u32 space_left = calc_bytes_to_wrap(buf); ++ unsigned int extra_space = space_left - sizeof(u32); ++ bool_t cycles = 0; + + BUG_ON(space_left > size); + +@@ -543,17 +574,13 @@ static inline int insert_wrap_record(str + ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); + } + +- return __insert_record(buf, +- TRC_TRACE_WRAP_BUFFER, +- extra_space, +- cycles, +- space_left, +- NULL); ++ __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, ++ space_left, NULL); + } + + #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ + +-static inline int insert_lost_records(struct t_buf *buf) ++static inline void insert_lost_records(struct t_buf *buf) + { + struct { + u32 lost_records; +@@ -568,12 +595,8 @@ static inline int insert_lost_records(st + + this_cpu(lost_records) = 0; + +- return __insert_record(buf, +- TRC_LOST_RECORDS, +- sizeof(ed), +- 1 /* cycles */, +- LOST_REC_SIZE, +- (unsigned char *)&ed); ++ __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, ++ LOST_REC_SIZE, &ed); + } + + /* +@@ -595,13 +618,15 @@ static DECLARE_TASKLET(trace_notify_dom0 + * failure, otherwise 0. Failure occurs only if the trace buffers are not yet + * initialised. + */ +-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) ++void __trace_var(u32 event, bool_t cycles, unsigned int extra, ++ const void *extra_data) + { + struct t_buf *buf; +- unsigned long flags, bytes_to_tail, bytes_to_wrap; +- int rec_size, total_size; +- int extra_word; +- int started_below_highwater = 0; ++ unsigned long flags; ++ u32 bytes_to_tail, bytes_to_wrap; ++ unsigned int rec_size, total_size; ++ unsigned int extra_word; ++ bool_t started_below_highwater; + + if( !tb_init_done ) + return; +@@ -640,7 +665,11 @@ void __trace_var(u32 event, int cycles, + buf = this_cpu(t_bufs); + + if ( unlikely(!buf) ) ++ { ++ /* Make gcc happy */ ++ started_below_highwater = 0; + goto unlock; ++ } + + started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); + +@@ -721,8 +750,9 @@ unlock: + spin_unlock_irqrestore(&this_cpu(t_lock), flags); + + /* Notify trace buffer consumer that we've crossed the high water mark. */ +- if ( started_below_highwater && +- (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) ++ if ( likely(buf!=NULL) ++ && started_below_highwater ++ && (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) + tasklet_schedule(&trace_notify_dom0_tasklet); + } + +--- a/xen/include/xen/trace.h ++++ b/xen/include/xen/trace.h +@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op + + int trace_will_trace_event(u32 event); + +-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); ++void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); + + static inline void trace_var(u32 event, int cycles, int extra, + unsigned char *extra_data) +@@ -57,7 +57,7 @@ static inline void trace_var(u32 event, + { \ + u32 _d[1]; \ + _d[0] = d1; \ +- __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ ++ __trace_var(_e, 1, sizeof(_d), _d); \ + } \ + } while ( 0 ) + +@@ -68,7 +68,7 @@ static inline void trace_var(u32 event, + u32 _d[2]; \ + _d[0] = d1; \ + _d[1] = d2; \ +- __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ ++ __trace_var(_e, 1, sizeof(_d), _d); \ + } \ + } while ( 0 ) + +@@ -80,7 +80,7 @@ static inline void trace_var(u32 event, + _d[0] = d1; \ + _d[1] = d2; \ + _d[2] = d3; \ +- __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ ++ __trace_var(_e, 1, sizeof(_d), _d); \ + } \ + } while ( 0 ) + +@@ -93,7 +93,7 @@ static inline void trace_var(u32 event, + _d[1] = d2; \ + _d[2] = d3; \ + _d[3] = d4; \ +- __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ ++ __trace_var(_e, 1, sizeof(_d), _d); \ + } \ + } while ( 0 ) + +@@ -107,7 +107,7 @@ static inline void trace_var(u32 event, + _d[2] = d3; \ + _d[3] = d4; \ + _d[4] = d5; \ +- __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ ++ __trace_var(_e, 1, sizeof(_d), _d); \ + } \ + } while ( 0 ) + +@@ -122,7 +122,7 @@ static inline void trace_var(u32 event, + _d[3] = d4; \ + _d[4] = d5; \ + _d[5] = d6; \ +- __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ ++ __trace_var(_e, 1, sizeof(_d), _d); \ + } \ + } while ( 0 ) + diff --git a/21712-amd-osvw.patch b/21712-amd-osvw.patch new file mode 100644 index 0000000..096424b --- /dev/null +++ b/21712-amd-osvw.patch @@ -0,0 +1,400 @@ +# HG changeset patch +# User Keir Fraser +# Date 1278093897 -3600 +# Node ID f483b5ce7be235494156fee164decd73e0472cb7 +# Parent 4d091e6e04918ba3ef19cc45ae2fffaee4f18afe +AMD OSVW (OS Visible Workaround) for Xen + +This path enables AMD OSVW (OS Visible Workaround) feature for +Xen. New AMD errata will have a OSVW id assigned in the future. OS is +supposed to check OSVW status MSR to find out whether CPU has a +specific erratum. Legacy errata are also supported in this patch: +traditional family/model/stepping approach will be used if OSVW +feature isn't applicable. This patch is adapted from Hans Rosenfeld's +patch submitted to Linux kernel. + +Signed-off-by: Wei Huang +Signed-off-by: Hans Rosenfeld +Acked-by: Jan Beulich + +Index: xen-4.0.0-testing/xen/arch/x86/cpu/amd.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/cpu/amd.c ++++ xen-4.0.0-testing/xen/arch/x86/cpu/amd.c +@@ -7,11 +7,11 @@ + #include + #include + #include ++#include + #include + #include /* amd_init_cpu */ + + #include "cpu.h" +-#include "amd.h" + + void start_svm(struct cpuinfo_x86 *c); + +@@ -157,6 +157,54 @@ static void __devinit set_cpuidmask(stru + } + + /* ++ * Check for the presence of an AMD erratum. Arguments are defined in amd.h ++ * for each known erratum. Return 1 if erratum is found. ++ */ ++int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw, ...) ++{ ++ va_list ap; ++ u32 range; ++ u32 ms; ++ ++ if (cpu->x86_vendor != X86_VENDOR_AMD) ++ return 0; ++ ++ va_start(ap, osvw); ++ ++ if (osvw) { ++ u16 osvw_id = va_arg(ap, int); ++ ++ if (cpu_has(cpu, X86_FEATURE_OSVW)) { ++ u64 osvw_len; ++ rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len); ++ ++ if (osvw_id < osvw_len) { ++ u64 osvw_bits; ++ rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6), ++ osvw_bits); ++ ++ va_end(ap); ++ return (osvw_bits >> (osvw_id & 0x3f)) & 0x01; ++ } ++ } ++ } ++ ++ /* OSVW unavailable or ID unknown, match family-model-stepping range */ ++ ms = (cpu->x86_model << 8) | cpu->x86_mask; ++ while ((range = va_arg(ap, int))) { ++ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && ++ (ms >= AMD_MODEL_RANGE_START(range)) && ++ (ms <= AMD_MODEL_RANGE_END(range))) { ++ va_end(ap); ++ return 1; ++ } ++ } ++ ++ va_end(ap); ++ return 0; ++} ++ ++/* + * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush + * filter on AMD 64-bit processors. + */ +Index: xen-4.0.0-testing/xen/arch/x86/cpu/amd.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/cpu/amd.h ++++ /dev/null +@@ -1,103 +0,0 @@ +-/* +- * amd.h - AMD processor specific definitions +- */ +- +-#ifndef __AMD_H__ +-#define __AMD_H__ +- +-#include +- +-/* CPUID masked for use by AMD-V Extended Migration */ +- +-#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32) +-#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_)) +- +-/* Family 0Fh, Revision C */ +-#define AMD_FEATURES_K8_REV_C_ECX 0 +-#define AMD_FEATURES_K8_REV_C_EDX ( \ +- __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ +- __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ +- __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ +- __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ +- __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ +- __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \ +- __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ +- __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ +- __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \ +- __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \ +- __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2)) +-#define AMD_EXTFEATURES_K8_REV_C_ECX 0 +-#define AMD_EXTFEATURES_K8_REV_C_EDX ( \ +- __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ +- __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ +- __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ +- __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ +- __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ +- __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \ +- __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ +- __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ +- __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \ +- __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \ +- __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \ +- __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW)) +- +-/* Family 0Fh, Revision D */ +-#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX +-#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX +-#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\ +- __bit(X86_FEATURE_LAHF_LM)) +-#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\ +- __bit(X86_FEATURE_FFXSR)) +- +-/* Family 0Fh, Revision E */ +-#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \ +- __bit(X86_FEATURE_XMM3)) +-#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \ +- __bit(X86_FEATURE_HT)) +-#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\ +- __bit(X86_FEATURE_CMP_LEGACY)) +-#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX +- +-/* Family 0Fh, Revision F */ +-#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \ +- __bit(X86_FEATURE_CX16)) +-#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX +-#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\ +- __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \ +- __bit(X86_FEATURE_ALTMOVCR)) +-#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\ +- __bit(X86_FEATURE_RDTSCP)) +- +-/* Family 0Fh, Revision G */ +-#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX +-#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX +-#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ +- __bit(X86_FEATURE_3DNOWPF)) +-#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX +- +-/* Family 10h, Revision B */ +-#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \ +- __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT)) +-#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX +-#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ +- __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \ +- __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \ +- __bit(X86_FEATURE_IBS)) +-#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\ +- __bit(X86_FEATURE_PAGE1GB)) +- +-/* Family 10h, Revision C */ +-#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX +-#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX +-#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\ +- __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT)) +-#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX +- +-/* Family 11h, Revision B */ +-#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX +-#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX +-#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\ +- __bit(X86_FEATURE_SKINIT)) +-#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX +- +-#endif /* __AMD_H__ */ +Index: xen-4.0.0-testing/xen/arch/x86/hvm/svm/asid.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/hvm/svm/asid.c ++++ xen-4.0.0-testing/xen/arch/x86/hvm/svm/asid.c +@@ -21,14 +21,14 @@ + #include + #include + #include ++#include + + void svm_asid_init(struct cpuinfo_x86 *c) + { + int nasids = 0; + + /* Check for erratum #170, and leave ASIDs disabled if it's present. */ +- if ( (c->x86 == 0x10) || +- ((c->x86 == 0xf) && (c->x86_model >= 0x68) && (c->x86_mask >= 1)) ) ++ if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_170) ) + nasids = cpuid_ebx(0x8000000A); + + hvm_asid_init(nasids); +Index: xen-4.0.0-testing/xen/arch/x86/hvm/svm/svm.c +=================================================================== +--- xen-4.0.0-testing.orig/xen/arch/x86/hvm/svm/svm.c ++++ xen-4.0.0-testing/xen/arch/x86/hvm/svm/svm.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -828,8 +829,8 @@ static void svm_init_erratum_383(struct + { + uint64_t msr_content; + +- /* only family 10h is affected */ +- if ( c->x86 != 0x10 ) ++ /* check whether CPU is affected */ ++ if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_383) ) + return; + + rdmsrl(MSR_AMD64_DC_CFG, msr_content); +Index: xen-4.0.0-testing/xen/include/asm-x86/amd.h +=================================================================== +--- /dev/null ++++ xen-4.0.0-testing/xen/include/asm-x86/amd.h +@@ -0,0 +1,137 @@ ++/* ++ * amd.h - AMD processor specific definitions ++ */ ++ ++#ifndef __AMD_H__ ++#define __AMD_H__ ++ ++#include ++ ++/* CPUID masked for use by AMD-V Extended Migration */ ++ ++#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32) ++#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_)) ++ ++/* Family 0Fh, Revision C */ ++#define AMD_FEATURES_K8_REV_C_ECX 0 ++#define AMD_FEATURES_K8_REV_C_EDX ( \ ++ __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ ++ __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ ++ __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ ++ __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ ++ __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ ++ __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \ ++ __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ ++ __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ ++ __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \ ++ __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \ ++ __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2)) ++#define AMD_EXTFEATURES_K8_REV_C_ECX 0 ++#define AMD_EXTFEATURES_K8_REV_C_EDX ( \ ++ __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ ++ __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ ++ __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ ++ __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ ++ __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ ++ __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \ ++ __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ ++ __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ ++ __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \ ++ __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \ ++ __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \ ++ __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW)) ++ ++/* Family 0Fh, Revision D */ ++#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX ++#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX ++#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\ ++ __bit(X86_FEATURE_LAHF_LM)) ++#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\ ++ __bit(X86_FEATURE_FFXSR)) ++ ++/* Family 0Fh, Revision E */ ++#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \ ++ __bit(X86_FEATURE_XMM3)) ++#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \ ++ __bit(X86_FEATURE_HT)) ++#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\ ++ __bit(X86_FEATURE_CMP_LEGACY)) ++#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX ++ ++/* Family 0Fh, Revision F */ ++#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \ ++ __bit(X86_FEATURE_CX16)) ++#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX ++#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\ ++ __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \ ++ __bit(X86_FEATURE_ALTMOVCR)) ++#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\ ++ __bit(X86_FEATURE_RDTSCP)) ++ ++/* Family 0Fh, Revision G */ ++#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX ++#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX ++#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ ++ __bit(X86_FEATURE_3DNOWPF)) ++#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX ++ ++/* Family 10h, Revision B */ ++#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \ ++ __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT)) ++#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX ++#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ ++ __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \ ++ __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \ ++ __bit(X86_FEATURE_IBS)) ++#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\ ++ __bit(X86_FEATURE_PAGE1GB)) ++ ++/* Family 10h, Revision C */ ++#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX ++#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX ++#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\ ++ __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT)) ++#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX ++ ++/* Family 11h, Revision B */ ++#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX ++#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX ++#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\ ++ __bit(X86_FEATURE_SKINIT)) ++#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX ++ ++/* AMD errata checking ++ * ++ * Errata are defined using the AMD_LEGACY_ERRATUM() or AMD_OSVW_ERRATUM() ++ * macros. The latter is intended for newer errata that have an OSVW id ++ * assigned, which it takes as first argument. Both take a variable number ++ * of family-specific model-stepping ranges created by AMD_MODEL_RANGE(). ++ * ++ * Example 1: ++ * #define AMD_ERRATUM_319 \ ++ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), \ ++ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), \ ++ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)) ++ * Example 2: ++ * #define AMD_ERRATUM_400 \ ++ * AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), \ ++ * AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)) ++ */ ++ ++#define AMD_LEGACY_ERRATUM(...) 0 /* legacy */, __VA_ARGS__, 0 ++#define AMD_OSVW_ERRATUM(osvw_id, ...) 1 /* osvw */, osvw_id, __VA_ARGS__, 0 ++#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ ++ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) ++#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) ++#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) ++#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) ++ ++#define AMD_ERRATUM_170 \ ++ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf)) ++ ++#define AMD_ERRATUM_383 \ ++ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf), \ ++ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0x1, 0x0)) ++ ++int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); ++#endif /* __AMD_H__ */ +Index: xen-4.0.0-testing/xen/include/asm-x86/msr-index.h +=================================================================== +--- xen-4.0.0-testing.orig/xen/include/asm-x86/msr-index.h ++++ xen-4.0.0-testing/xen/include/asm-x86/msr-index.h +@@ -251,6 +251,10 @@ + #define MSR_AMD_PATCHLEVEL 0x0000008b + #define MSR_AMD_PATCHLOADER 0xc0010020 + ++/* AMD OS Visible Workaround MSRs */ ++#define MSR_AMD_OSVW_ID_LENGTH 0xc0010140 ++#define MSR_AMD_OSVW_STATUS 0xc0010141 ++ + /* K6 MSRs */ + #define MSR_K6_EFER 0xc0000080 + #define MSR_K6_STAR 0xc0000081 diff --git a/21723-get-domu-state.patch b/21723-get-domu-state.patch new file mode 100644 index 0000000..922e954 --- /dev/null +++ b/21723-get-domu-state.patch @@ -0,0 +1,188 @@ +# HG changeset patch +# User Ian Jackson +# Date 1277819571 -3600 +# Node ID a60c604b5829db6285ff89d8163478330ac12ee2 +# Parent 7b00193bd0334606b6f6779c3f14a1667a952fe4 +tools/xend, xm: add a command to get the state of VMs + +add a command "domstate" to get the state of Vms, which may have one state of +{'shutoff', 'idle','shutdown','running','crashed','paused' or 'paused by +admin"}. + +For case of pause, I distinguish it into two conditions. One is "paused" the +other is "paused by admin". +"pasued by admin" means that users pause a domain voluntary by "xm paused +VM" or " API" + +Signed-off-by James (Song Wei) + +Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomain.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendDomain.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendDomain.py +@@ -250,6 +250,18 @@ class XendDomain: + @return: path to config file. + """ + return os.path.join(self._managed_path(domuuid), CACHED_CONFIG_FILE) ++ def domain_setpauseflag(self, dom, flag=False): ++ try: ++ dominfo = self.domain_lookup_nr(dom) ++ dominfo.paused_by_admin = flag ++ except Exception, err: ++ log.debug("error in in setpauseflag") ++ def domain_getpauseflag(self, dom): ++ try: ++ dominfo = self.domain_lookup_nr(dom) ++ return dominfo.paused_by_admin ++ except Exception, err: ++ log.debug("error in in getpauseflag") + + def _managed_check_point_path(self, domuuid): + """Returns absolute path to check point file for managed domain. +Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -327,6 +327,8 @@ class XendDomainInfo: + @type info: dictionary + @ivar domid: Domain ID (if VM has started) + @type domid: int or None ++ @ivar paused_by_admin: Is this Domain paused by command or API ++ @type paused_by_admin: bool + @ivar guest_bitsize: the bitsize of guest + @type guest_bitsize: int or None + @ivar alloc_mem: the memory domain allocated when booting +@@ -390,6 +392,7 @@ class XendDomainInfo: + self.domid = domid + self.guest_bitsize = None + self.alloc_mem = None ++ self.paused_by_admin = False + + maxmem = self.info.get('memory_static_max', 0) + memory = self.info.get('memory_dynamic_max', 0) +Index: xen-4.0.0-testing/tools/python/xen/xend/server/SrvDomain.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/server/SrvDomain.py ++++ xen-4.0.0-testing/tools/python/xen/xend/server/SrvDomain.py +@@ -225,6 +225,20 @@ class SrvDomain(SrvDir): + self.acceptCommand(req) + return self.xd.domain_reset(self.dom.getName()) + ++ def op_do_get_pauseflag(self, op, req): ++ self.acceptCommand(req) ++ return req.threadRequest(self.do_get_pauseflag, op, req) ++ ++ def do_get_pauseflag(self, _, req): ++ return self.xd.domain_getpauseflag(self.dom.getName(), req) ++ ++ def op_do_set_pauseflag(self, op, req): ++ self.acceptCommand(req) ++ return req.threadRequest(self.do_set_pauseflag, op, req) ++ ++ def do_set_pauseflag(self, _, req): ++ return self.xd.domain_setpauseflag(self.dom.getName(), req) ++ + def op_usb_add(self, op, req): + self.acceptCommand(req) + return req.threadRequest(self.do_usb_add, op, req) +Index: xen-4.0.0-testing/tools/python/xen/xm/main.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xm/main.py ++++ xen-4.0.0-testing/tools/python/xen/xm/main.py +@@ -165,6 +165,8 @@ SUBCOMMAND_HELP = { + #usb + 'usb-add' : (' <[host:bus.addr] [host:vendor_id:product_id]>','Add the usb device to FV VM.'), + 'usb-del' : (' <[host:bus.addr] [host:vendor_id:product_id]>','Delete the usb device to FV VM.'), ++ #domstate ++ 'domstate' : (' ', 'get the state of a domain'), + + # device commands + +@@ -370,6 +372,7 @@ common_commands = [ + "uptime", + "usb-add", + "usb-del", ++ "domstate", + "vcpu-set", + ] + +@@ -404,6 +407,7 @@ domain_commands = [ + "uptime", + "usb-add", + "usb-del", ++ "domstate", + "vcpu-list", + "vcpu-pin", + "vcpu-set", +@@ -901,7 +905,6 @@ def getDomains(domain_names, state, full + return "-" + state_str = "".join([state_on_off(state) + for state in states]) +- + dom_rec.update({'name': dom_rec['name_label'], + 'memory_actual': int(dom_metrics_rec['memory_actual'])/1024, + 'vcpus': dom_metrics_rec['VCPUs_number'], +@@ -1395,8 +1398,10 @@ def xm_pause(args): + + if serverType == SERVER_XEN_API: + server.xenapi.VM.pause(get_single_vm(dom)) ++ server.xenapi.VM.set_pauseflag(get_single_vm(dom), True) + else: + server.xend.domain.pause(dom) ++ server.xend.domain.setpauseflag(dom, True) + + def xm_unpause(args): + arg_check(args, "unpause", 1) +@@ -1404,8 +1409,10 @@ def xm_unpause(args): + + if serverType == SERVER_XEN_API: + server.xenapi.VM.unpause(get_single_vm(dom)) ++ server.xenapi.VM.set_pauseflag(get_single_vm(dom), False) + else: + server.xend.domain.unpause(dom) ++ server.xend.domain.setpauseflag(dom, False) + + def xm_dump_core(args): + live = False +@@ -1515,6 +1522,32 @@ def xm_usb_add(args): + arg_check(args, "usb-add", 2) + server.xend.domain.usb_add(args[0],args[1]) + ++def xm_domstate(args): ++ arg_check(args, "domstate", 1) ++ (opitons, params) = getopt.gnu_getopt(args, 's', ['domname=']) ++ doms = getDomains(params, 'all') ++ d = parse_doms_info(doms[0]) ++ state = d['state'] ++ if state: ++ if state.find('s') > 0: ++ print 'shutoff' ++ elif state.find('b') > 0: ++ print 'idle' ++ elif state.find('d') > 0: ++ print 'shutdown' ++ elif state.find('r') > 0: ++ print 'running' ++ elif state.find('c') > 0: ++ print 'crashed' ++ elif state.find('p') > 0: ++ if server.xend.domain.getpauseflag(args[0]): ++ print 'paused by admin' ++ else: ++ print 'paused' ++ else: ++ print 'shutoff' ++ return ++ + def xm_usb_del(args): + arg_check(args, "usb-del", 2) + server.xend.domain.usb_del(args[0],args[1]) +@@ -3538,6 +3571,8 @@ commands = { + #usb + "usb-add": xm_usb_add, + "usb-del": xm_usb_del, ++ #domstate ++ "domstate": xm_domstate, + } + + ## The commands supported by a separate argument parser in xend.xm. diff --git a/21744-x86-cpufreq-range-check.patch b/21744-x86-cpufreq-range-check.patch new file mode 100644 index 0000000..5d5a65f --- /dev/null +++ b/21744-x86-cpufreq-range-check.patch @@ -0,0 +1,27 @@ +# HG changeset patch +# User Keir Fraser +# Date 1278578686 -3600 +# Node ID df63728e1680ce7827bd58f6bda453f70ed41ad9 +# Parent a0f0ae5be814f19590d5a59d91ab7183cd1a325f +x86/cpufreq: check array index before use + +... rather than after. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c ++++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c +@@ -210,9 +210,11 @@ + + if (!cpu_isset(cpu, mask)) + cpu = first_cpu(mask); ++ if (cpu >= NR_CPUS) ++ return 0; ++ + policy = cpufreq_cpu_policy[cpu]; +- +- if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu]) ++ if (!policy || !drv_data[policy->cpu]) + return 0; + + switch (drv_data[policy->cpu]->cpu_feature) { diff --git a/21847-pscsi.patch b/21847-pscsi.patch new file mode 100644 index 0000000..f7d9df6 --- /dev/null +++ b/21847-pscsi.patch @@ -0,0 +1,130 @@ +# HG changeset patch +# User "Dube, Lutz" +# Date 1279902875 -3600 +# Node ID 4814e16ea4105502332407e3379c49da92018899 +# Parent e23302fcb83c72f93ec01285bd7f4f1641eb67e4 +tools/xend: Fix performance of xend with more than 10000 FC device paths + +On server startup xend start or a later xend restart needs approx. 30 min to +start/restart. Without attached FC devices xend start/restart needs only some +seconds. + +server type: Fujitsu Primergy RX600-S5 + +The time gets lost in xen/xend/XendNode.py line 329 while calling +vscsi_util.get_all_scsi_device(). + +329 for pscsi_record in vscsi_util.get_all_scsi_devices(): +330 scsi_id = pscsi_record['scsi_id'] +331 if scsi_id: +332 saved_HBA_uuid = None + +I think, in most cases we don't need all the PSCSI devices registered in +xend, but only a few of it. +So a good solution for this perforamce issue is to scan only the SCSI device +paths we need, controlled by a new option in xend-config.sxp. + +I have made a patch to allow specification of scsi devices we need in xend +in the config file xend-config.sxp. +The new options pscsi-device-mask expects a list of device ids oder partial +device ids like the option of lsscsi, e.g. +(pscsi-device-mask (': "Well done" +Committed-by: Ian Jackson + +Index: xen-4.0.0-testing/tools/examples/xend-config.sxp +=================================================================== +--- xen-4.0.0-testing.orig/tools/examples/xend-config.sxp ++++ xen-4.0.0-testing/tools/examples/xend-config.sxp +@@ -277,3 +277,11 @@ + # we have to realize this may incur security issue and we can't make sure the + # device assignment could really work properly even after we do this. + #(pci-passthrough-strict-check yes) ++ ++# If we have a very big scsi device configuration, start of xend is slow, ++# because xend scans all the device paths to build its internal PSCSI device ++# list. If we need only a few devices for assigning to a guest, we can reduce ++# the scan to this device. Set list list of device paths in same syntax like in ++# command lsscsi, e.g. ('16:0:0:0' '15:0') ++# (pscsi-device-mask ('*')) ++ +Index: xen-4.0.0-testing/tools/python/xen/util/vscsi_util.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/util/vscsi_util.py ++++ xen-4.0.0-testing/tools/python/xen/util/vscsi_util.py +@@ -148,11 +148,12 @@ def _vscsi_get_scsidevices_by_sysfs(): + return devices + + +-def vscsi_get_scsidevices(): ++def vscsi_get_scsidevices(mask=""): + """ get all scsi devices information """ + +- devices = _vscsi_get_scsidevices_by_lsscsi("") +- if devices: ++ devices = _vscsi_get_scsidevices_by_lsscsi("[%s]" % mask) ++ if devices or (len(mask) and mask[0] != "*"): ++ # devices found or partial device scan + return devices + return _vscsi_get_scsidevices_by_sysfs() + +@@ -274,9 +275,9 @@ def get_scsi_device(pHCTL): + return _make_scsi_record(scsi_info) + return None + +-def get_all_scsi_devices(): ++def get_all_scsi_devices(mask=""): + scsi_records = [] +- for scsi_info in vscsi_get_scsidevices(): ++ for scsi_info in vscsi_get_scsidevices(mask): + scsi_record = _make_scsi_record(scsi_info) + scsi_records.append(scsi_record) + return scsi_records +Index: xen-4.0.0-testing/tools/python/xen/xend/XendNode.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendNode.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendNode.py +@@ -323,7 +323,12 @@ class XendNode: + pscsi_table = {} + pscsi_HBA_table = {} + +- for pscsi_record in vscsi_util.get_all_scsi_devices(): ++ pscsi_records = [] ++ for pscsi_mask in xendoptions().get_pscsi_device_mask(): ++ pscsi_records += vscsi_util.get_all_scsi_devices(pscsi_mask) ++ log.debug("pscsi record count: %s" % len(pscsi_records)) ++ ++ for pscsi_record in pscsi_records: + scsi_id = pscsi_record['scsi_id'] + if scsi_id: + saved_HBA_uuid = None +Index: xen-4.0.0-testing/tools/python/xen/xend/XendOptions.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendOptions.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendOptions.py +@@ -164,6 +164,9 @@ class XendOptions: + """ + print >>sys.stderr, "xend [ERROR]", fmt % args + ++ """Default mask for pscsi device scan.""" ++ xend_pscsi_device_mask = ['*'] ++ + + def configure(self): + self.set_config() +@@ -430,6 +433,10 @@ class XendOptions: + return self.get_config_bool("pci-passthrough-strict-check", + self.pci_dev_assign_strict_check_default) + ++ def get_pscsi_device_mask(self): ++ return self.get_config_value("pscsi-device-mask", ++ self.xend_pscsi_device_mask) ++ + class XendOptionsFile(XendOptions): + + """Default path to the config file.""" diff --git a/21866-xenapi.patch b/21866-xenapi.patch new file mode 100644 index 0000000..d9a5377 --- /dev/null +++ b/21866-xenapi.patch @@ -0,0 +1,90 @@ +# HG changeset patch +# User Lutz Dube +# Date 1280245980 -3600 +# Node ID e017930af272c888f2a562f842af4e142a973d5f +# Parent 5078f2c1e3d6a3a06ecf352a068eb496f09a2a98 +xend (XenAPI): Error in Xend-API method VM_set_actions_after_crash + +Xend-API defines the method VM_set_actions_after_crash with valid +action names coredump_and_destroy, coredump_and_restart,... . These +values have to be converted into internal representation +"coredump-destroy", "coredump-restart", ... otherwise start of the +domain is rejected. Same error occurs, if I try to create a VM using +the Xend-API with actions_after_crash set to coredump_and_destroy. + +Could you please apply my patch to xen-4-0-testing, too. + +Signed-off-by: Lutz Dube Lutz.Dube@ts.fujitsu.com +Acked-by: Jim Fehlig +Committed-by: Ian Jackson + +Index: xen-4.0.0-testing/tools/python/xen/xend/XendAPI.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendAPI.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendAPI.py +@@ -1667,7 +1667,8 @@ class XendAPI(object): + def VM_set_actions_after_crash(self, session, vm_ref, action): + if action not in XEN_API_ON_CRASH_BEHAVIOUR: + return xen_api_error(['VM_ON_CRASH_BEHAVIOUR_INVALID', vm_ref]) +- return self.VM_set('actions_after_crash', session, vm_ref, action) ++ return self.VM_set('actions_after_crash', session, vm_ref, ++ XEN_API_ON_CRASH_BEHAVIOUR_LEGACY[action]) + + def VM_set_HVM_boot_policy(self, session, vm_ref, value): + if value != "" and value != "BIOS order": +Index: xen-4.0.0-testing/tools/python/xen/xend/XendAPIConstants.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendAPIConstants.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendAPIConstants.py +@@ -63,6 +63,18 @@ XEN_API_ON_CRASH_BEHAVIOUR_FILTER = { + 'rename_restart' : 'rename_restart', + } + ++XEN_API_ON_CRASH_BEHAVIOUR_LEGACY = { ++ 'destroy' : 'destroy', ++ 'coredump-destroy' : 'coredump-destroy', ++ 'coredump_and_destroy' : 'coredump-destroy', ++ 'restart' : 'restart', ++ 'coredump-restart' : 'coredump-restart', ++ 'coredump_and_restart' : 'coredump-restart', ++ 'preserve' : 'preserve', ++ 'rename-restart' : 'rename-restart', ++ 'rename_restart' : 'rename-restart', ++} ++ + XEN_API_VBD_MODE = ['RO', 'RW'] + XEN_API_VDI_TYPE = ['system', 'user', 'ephemeral'] + XEN_API_VBD_TYPE = ['CD', 'Disk'] +Index: xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py +=================================================================== +--- xen-4.0.0-testing.orig/tools/python/xen/xend/XendConfig.py ++++ xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py +@@ -41,6 +41,7 @@ from xen.util.pci import pci_opts_list_f + from xen.xend.XendSXPDev import dev_dict_to_sxp + from xen.util import xsconstants + from xen.util import auxbin ++from xen.xend.XendAPIConstants import * + import xen.util.fileuri + + log = logging.getLogger("xend.XendConfig") +@@ -62,6 +63,11 @@ def reverse_dict(adict): + def bool0(v): + return v != '0' and v != 'False' and bool(v) + ++def convert_on_crash(v): ++ v = str(v) ++ return XEN_API_ON_CRASH_BEHAVIOUR_LEGACY[v] \ ++ if v in XEN_API_ON_CRASH_BEHAVIOUR else v ++ + # Recursively copy a data struct, scrubbing out VNC passwords. + # Will scrub any dict entry with a key of 'vncpasswd' or any + # 2-element list whose first member is 'vncpasswd'. It will +@@ -211,7 +217,7 @@ XENAPI_CFG_TYPES = { + 'VCPUs_live': int, + 'actions_after_shutdown': str, + 'actions_after_reboot': str, +- 'actions_after_crash': str, ++ 'actions_after_crash': convert_on_crash, + 'PV_bootloader': str, + 'PV_kernel': str, + 'PV_ramdisk': str, diff --git a/32on64-extra-mem.patch b/32on64-extra-mem.patch index 5b5d55e..c82dfe3 100644 --- a/32on64-extra-mem.patch +++ b/32on64-extra-mem.patch @@ -2,7 +2,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.0.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -2913,7 +2913,7 @@ class XendDomainInfo: +@@ -2917,7 +2917,7 @@ class XendDomainInfo: self.guest_bitsize = self.image.getBitSize() # Make sure there's enough RAM available for the domain diff --git a/README.SuSE b/README.SuSE index f161bbc..c8bd432 100644 --- a/README.SuSE +++ b/README.SuSE @@ -464,6 +464,9 @@ The hypervisor and domain 0 kernel are a matched set, and usually must be upgraded together. Consult the online documentation for a matrix of supported 32- and 64-bit combinations +A 64-bit paravirtualized VM will not run on 32-bit host but a 32-bit +paravirtualized VM will run on a 64-bit host. + On certain machines with 2GB or less of RAM, domain 0 Linux may fail to boot, printing the following messages: PCI-DMA: Using software bounce buffering for IO (SWIOTLB) @@ -495,8 +498,8 @@ file (viewable with the "xm dmesg" command). If problems persist, check if a newer version is available. Well-tested versions will be shipped with SUSE and via YaST Online Update. More frequent -(but less supported) updates are available on Novell's Forge site: - http://forge.novell.com/modules/xfmod/project/?xenpreview +(but less supported) updates are available on the Xen Technical Preview site: + ftp://ftp.novell.com/forge/XenTechnicalPreview/ Known Issues diff --git a/blktap.patch b/blktap.patch index 3d67c84..59cc25e 100644 --- a/blktap.patch +++ b/blktap.patch @@ -5,7 +5,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.0.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -3286,7 +3286,7 @@ class XendDomainInfo: +@@ -3290,7 +3290,7 @@ class XendDomainInfo: (fn, BOOTLOADER_LOOPBACK_DEVICE)) vbd = { diff --git a/block-iscsi b/block-iscsi index 6f14d7f..3f99c67 100644 --- a/block-iscsi +++ b/block-iscsi @@ -22,7 +22,7 @@ find_sdev() { unset dev for session in /sys/class/iscsi_session/session*; do - if [ "$1" = "`cat $session/targetname`" ]; then + if [ "$1" = "`cat $session/targetname 2>/dev/null`" ]; then dev=`basename $session/device/target*/*:0:*/block*/*` return fi @@ -35,7 +35,7 @@ find_sdev_rev() for session in /sys/class/iscsi_session/session*; do dev=`basename $session/device/target*/*:0:*/block*/*` if [ "$dev" = "$1" ]; then - tgt=`cat $session/targetname` + tgt=`cat $session/targetname 2>/dev/null` return fi done diff --git a/cpu-pools-python.patch b/cpu-pools-python.patch index f3ac492..49b0364 100644 --- a/cpu-pools-python.patch +++ b/cpu-pools-python.patch @@ -516,7 +516,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendAPI.py except Exception, ex: log.exception(ex) -@@ -1835,7 +1870,9 @@ class XendAPI(object): +@@ -1836,7 +1871,9 @@ class XendAPI(object): 'is_control_domain': xeninfo.info['is_control_domain'], 'metrics': xeninfo.get_metrics(), 'security_label': xeninfo.get_security_label(), @@ -527,7 +527,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendAPI.py } return xen_api_success(record) -@@ -1933,6 +1970,25 @@ class XendAPI(object): +@@ -1934,6 +1971,25 @@ class XendAPI(object): xendom.domain_restore(src, bool(paused)) return xen_api_success_void() @@ -1465,7 +1465,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py =================================================================== --- xen-4.0.0-testing.orig/tools/python/xen/xend/XendConfig.py +++ xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py -@@ -128,6 +128,7 @@ XENAPI_CFG_TO_LEGACY_CFG = { +@@ -134,6 +134,7 @@ XENAPI_CFG_TO_LEGACY_CFG = { 'PV_bootloader': 'bootloader', 'PV_bootloader_args': 'bootloader_args', 'Description': 'description', @@ -1473,7 +1473,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py } LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(XENAPI_CFG_TO_LEGACY_CFG) -@@ -234,6 +235,7 @@ XENAPI_CFG_TYPES = { +@@ -240,6 +241,7 @@ XENAPI_CFG_TYPES = { 'superpages' : int, 'memory_sharing': int, 'Description': str, @@ -1481,7 +1481,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py } # List of legacy configuration keys that have no equivalent in the -@@ -279,6 +281,7 @@ LEGACY_CFG_TYPES = { +@@ -285,6 +287,7 @@ LEGACY_CFG_TYPES = { 'bootloader': str, 'bootloader_args': str, 'description': str, @@ -1489,7 +1489,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py } # Values that should be stored in xenstore's /vm/ that is used -@@ -300,6 +303,7 @@ LEGACY_XENSTORE_VM_PARAMS = [ +@@ -306,6 +309,7 @@ LEGACY_XENSTORE_VM_PARAMS = [ 'on_xend_stop', 'bootloader', 'bootloader_args', @@ -1497,7 +1497,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendConfig.py ] ## -@@ -408,6 +412,7 @@ class XendConfig(dict): +@@ -414,6 +418,7 @@ class XendConfig(dict): 'other_config': {}, 'platform': {}, 'target': 0, @@ -1530,7 +1530,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py from xen.xend.server.DevConstants import xenbusState from xen.xend.server.BlktapController import TAPDISK_DEVICE, parseDeviceString -@@ -2565,6 +2566,19 @@ class XendDomainInfo: +@@ -2569,6 +2570,19 @@ class XendDomainInfo: oos = self.info['platform'].get('oos', 1) oos_off = 1 - int(oos) @@ -1550,7 +1550,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py flags = (int(hvm) << 0) | (int(hap) << 1) | (int(s3_integrity) << 2) | (int(oos_off) << 3) try: -@@ -2586,6 +2600,11 @@ class XendDomainInfo: +@@ -2590,6 +2604,11 @@ class XendDomainInfo: failmsg += ', error=%i' % int(self.domid) raise VmError(failmsg) @@ -1562,7 +1562,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendDomainInfo.py self.dompath = GetDomainPath(self.domid) self._recreateDom() -@@ -3613,6 +3632,11 @@ class XendDomainInfo: +@@ -3617,6 +3636,11 @@ class XendDomainInfo: retval = xc.sched_credit_domain_get(self.getDomid()) return retval @@ -1634,7 +1634,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendNode.py def _init_networks(self): # Initialise networks -@@ -361,6 +364,18 @@ class XendNode: +@@ -366,6 +369,18 @@ class XendNode: for physical_host, pscsi_HBA_uuid in pscsi_HBA_table.items(): XendPSCSI_HBA(pscsi_HBA_uuid, {'physical_host': physical_host}) @@ -1653,7 +1653,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendNode.py def add_network(self, interface): # TODO -@@ -581,6 +596,7 @@ class XendNode: +@@ -586,6 +601,7 @@ class XendNode: self.save_PPCIs() self.save_PSCSIs() self.save_PSCSI_HBAs() @@ -1661,7 +1661,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendNode.py def save_PIFs(self): pif_records = dict([(pif_uuid, XendAPIStore.get( -@@ -623,6 +639,12 @@ class XendNode: +@@ -628,6 +644,12 @@ class XendNode: for pscsi_HBA_uuid in XendPSCSI_HBA.get_all()]) self.state_store.save_state('pscsi_HBA', pscsi_HBA_records) @@ -1674,7 +1674,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendNode.py def shutdown(self): return 0 -@@ -934,6 +956,7 @@ class XendNode: +@@ -939,6 +961,7 @@ class XendNode: self.format_node_to_memory(info, 'node_to_memory') info['node_to_dma32_mem'] = \ self.format_node_to_memory(info, 'node_to_dma32_mem') @@ -1682,7 +1682,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xend/XendNode.py # FIXME: These are hard-coded to be the inverse of the getXenMemory # functions in image.py. Find a cleaner way. -@@ -953,6 +976,7 @@ class XendNode: +@@ -958,6 +981,7 @@ class XendNode: 'virt_caps', 'total_memory', 'free_memory', @@ -1798,7 +1798,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xm/main.py from xen.util import auxbin import XenAPI -@@ -235,6 +236,23 @@ SUBCOMMAND_HELP = { +@@ -237,6 +238,23 @@ SUBCOMMAND_HELP = { 'tmem-freeable' : ('', 'Print freeable tmem (in MiB).'), 'tmem-shared-auth' : ('[|-a|--all] [--uuid=] [--auth=<0|1>]', 'De/authenticate shared tmem pool.'), @@ -1822,7 +1822,7 @@ Index: xen-4.0.0-testing/tools/python/xen/xm/main.py # security 'addlabel' : ('