From a89d75605efa81a1237a9683ced0d2ed2054354665394578e613072bacda1c4d Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Thu, 4 Aug 2016 19:26:11 +0000 Subject: [PATCH] - bsc#970135 - new virtualization project clock test randomly fails on Xen 576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch 5769106e-x86-generate-assembler-equates-for-synthesized.patch 57a1e603-x86-time-adjust-local-system-time-initialization.patch 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch - bsc#991934 - xen hypervisor crash in csched_acct 57973099-have-schedulers-revise-initial-placement.patch 579730e6-remove-buggy-initial-placement-algorithm.patch - bsc#988675 - VUL-0: CVE-2016-6258: xen: x86: Privilege escalation in PV guests (XSA-182) 57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch - bsc#988676 - VUL-0: CVE-2016-6259: xen: x86: Missing SMAP whitelisting in 32-bit exception / event delivery (XSA-183) 57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch - Upstream patches from Jan 57a30261-x86-support-newer-Intel-CPU-models.patch - bsc#985503 - vif-route broken vif-route.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=445 --- ...l-stamp-in-TSC-calibration-fast-path.patch | 50 +++ ...te-assembler-equates-for-synthesized.patch | 133 ++++++++ ...-schedulers-revise-initial-placement.patch | 94 ++++++ ...ve-buggy-initial-placement-algorithm.patch | 84 +++++ ...safe-bits-from-mod_lN_entry-fastpath.patch | 94 ++++++ ...lation-in-compat_create_bounce_frame.patch | 61 ++++ ...ust-local-system-time-initialization.patch | 123 ++++++++ ...time-introduce-and-use-rdtsc_ordered.patch | 190 +++++++++++ ...calibrate-TSC-against-platform-timer.patch | 298 ++++++++++++++++++ ...1-x86-support-newer-Intel-CPU-models.patch | 200 ++++++++++++ vif-route.patch | 15 + xen.changes | 28 ++ xen.spec | 26 +- 13 files changed, 1394 insertions(+), 2 deletions(-) create mode 100644 576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch create mode 100644 5769106e-x86-generate-assembler-equates-for-synthesized.patch create mode 100644 57973099-have-schedulers-revise-initial-placement.patch create mode 100644 579730e6-remove-buggy-initial-placement-algorithm.patch create mode 100644 57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch create mode 100644 57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch create mode 100644 57a1e603-x86-time-adjust-local-system-time-initialization.patch create mode 100644 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch create mode 100644 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch create mode 100644 57a30261-x86-support-newer-Intel-CPU-models.patch create mode 100644 vif-route.patch diff --git a/576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch b/576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch new file mode 100644 index 0000000..ab84ab5 --- /dev/null +++ b/576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch @@ -0,0 +1,50 @@ +References: bsc#970135 + +# Commit b64438c7c1495a7580d1bb9d8ba644f3705e1ffb +# Date 2016-06-14 15:08:47 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/time: use correct (local) time stamp in constant-TSC calibration fast path + +This looks like a copy and paste mistake in commit 1b6a99892d ("x86: +Simpler time handling when TSC is constant across all power saving +states"), responsible for occasional many-microsecond cross-CPU skew of +what NOW() returns. + +Also improve the correlation between local TSC and stime stamps +obtained at the end of the two calibration handlers: Compute the stime +one from the TSC one, instead of doing another rdtsc() for that +compuation. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -998,7 +998,7 @@ static void local_time_calibration(void) + /* Atomically read cpu_calibration struct and write cpu_time struct. */ + local_irq_disable(); + t->local_tsc_stamp = c->local_tsc_stamp; +- t->stime_local_stamp = c->stime_master_stamp; ++ t->stime_local_stamp = c->stime_local_stamp; + t->stime_master_stamp = c->stime_master_stamp; + local_irq_enable(); + update_vcpu_system_time(current); +@@ -1275,7 +1275,7 @@ static void time_calibration_tsc_rendezv + } + + c->local_tsc_stamp = rdtsc(); +- c->stime_local_stamp = get_s_time(); ++ c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp); + c->stime_master_stamp = r->master_stime; + + raise_softirq(TIME_CALIBRATE_SOFTIRQ); +@@ -1305,7 +1305,7 @@ static void time_calibration_std_rendezv + } + + c->local_tsc_stamp = rdtsc(); +- c->stime_local_stamp = get_s_time(); ++ c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp); + c->stime_master_stamp = r->master_stime; + + raise_softirq(TIME_CALIBRATE_SOFTIRQ); diff --git a/5769106e-x86-generate-assembler-equates-for-synthesized.patch b/5769106e-x86-generate-assembler-equates-for-synthesized.patch new file mode 100644 index 0000000..467ab3d --- /dev/null +++ b/5769106e-x86-generate-assembler-equates-for-synthesized.patch @@ -0,0 +1,133 @@ +References: bsc#970135 + +# Commit 06f083c826836a098f793db821845b313ad88a7f +# Date 2016-06-21 12:01:18 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: also generate assembler usable equates for synthesized features + +... to make it possible to base alternative instruction patching upon +such. + +Signed-off-by: Jan Beulich +Tested-by: Dario Faggioli +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -219,7 +219,8 @@ long arch_do_sysctl( + } + + /* Clip the number of entries. */ +- nr = min(sysctl->u.cpu_featureset.nr_features, FSCAPINTS); ++ nr = min_t(unsigned int, sysctl->u.cpu_featureset.nr_features, ++ FSCAPINTS); + + /* Look up requested featureset. */ + if ( sysctl->u.cpu_featureset.index < ARRAY_SIZE(featureset_table) ) +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -3,8 +3,23 @@ + * + * Defines x86 CPU feature bits + */ ++#if defined(XEN_CPUFEATURE) + +-#ifndef __ASM_I386_CPUFEATURE_H ++/* Other features, Xen-defined mapping. */ ++/* This range is used for feature bits which conflict or are synthesized */ ++XEN_CPUFEATURE(CONSTANT_TSC, (FSCAPINTS+0)*32+ 0) /* TSC ticks at a constant rate */ ++XEN_CPUFEATURE(NONSTOP_TSC, (FSCAPINTS+0)*32+ 1) /* TSC does not stop in C states */ ++XEN_CPUFEATURE(ARAT, (FSCAPINTS+0)*32+ 2) /* Always running APIC timer */ ++XEN_CPUFEATURE(ARCH_PERFMON, (FSCAPINTS+0)*32+ 3) /* Intel Architectural PerfMon */ ++XEN_CPUFEATURE(TSC_RELIABLE, (FSCAPINTS+0)*32+ 4) /* TSC is known to be reliable */ ++XEN_CPUFEATURE(XTOPOLOGY, (FSCAPINTS+0)*32+ 5) /* cpu topology enum extensions */ ++XEN_CPUFEATURE(CPUID_FAULTING, (FSCAPINTS+0)*32+ 6) /* cpuid faulting */ ++XEN_CPUFEATURE(CLFLUSH_MONITOR, (FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */ ++XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */ ++ ++#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */ ++ ++#elif !defined(__ASM_I386_CPUFEATURE_H) + #ifndef X86_FEATURES_ONLY + #define __ASM_I386_CPUFEATURE_H + #endif +@@ -12,20 +27,6 @@ + #include + #include + +-#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */ +- +-/* Other features, Xen-defined mapping. */ +-/* This range is used for feature bits which conflict or are synthesized */ +-#define X86_FEATURE_CONSTANT_TSC ((FSCAPINTS+0)*32+ 0) /* TSC ticks at a constant rate */ +-#define X86_FEATURE_NONSTOP_TSC ((FSCAPINTS+0)*32+ 1) /* TSC does not stop in C states */ +-#define X86_FEATURE_ARAT ((FSCAPINTS+0)*32+ 2) /* Always running APIC timer */ +-#define X86_FEATURE_ARCH_PERFMON ((FSCAPINTS+0)*32+ 3) /* Intel Architectural PerfMon */ +-#define X86_FEATURE_TSC_RELIABLE ((FSCAPINTS+0)*32+ 4) /* TSC is known to be reliable */ +-#define X86_FEATURE_XTOPOLOGY ((FSCAPINTS+0)*32+ 5) /* cpu topology enum extensions */ +-#define X86_FEATURE_CPUID_FAULTING ((FSCAPINTS+0)*32+ 6) /* cpuid faulting */ +-#define X86_FEATURE_CLFLUSH_MONITOR ((FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */ +-#define X86_FEATURE_APERFMPERF ((FSCAPINTS+0)*32+ 8) /* APERFMPERF */ +- + #define cpufeat_word(idx) ((idx) / 32) + #define cpufeat_bit(idx) ((idx) % 32) + #define cpufeat_mask(idx) (_AC(1, U) << cpufeat_bit(idx)) +--- a/xen/include/asm-x86/cpufeatureset.h ++++ b/xen/include/asm-x86/cpufeatureset.h +@@ -3,19 +3,25 @@ + + #ifndef __ASSEMBLY__ + ++#include ++ + #define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value, + enum { + #include ++#include + }; + #undef XEN_CPUFEATURE + +-#define XEN_CPUFEATURE(name, value) asm (".equ X86_FEATURE_" #name ", " #value); ++#define XEN_CPUFEATURE(name, value) asm (".equ X86_FEATURE_" #name ", " \ ++ __stringify(value)); + #include ++#include + + #else /* !__ASSEMBLY__ */ + + #define XEN_CPUFEATURE(name, value) .equ X86_FEATURE_##name, value + #include ++#include + + #endif /* __ASSEMBLY__ */ + +--- a/xen/include/asm-x86/cpuid.h ++++ b/xen/include/asm-x86/cpuid.h +@@ -1,12 +1,13 @@ + #ifndef __X86_CPUID_H__ + #define __X86_CPUID_H__ + +-#include + #include +-#include + + #define FSCAPINTS FEATURESET_NR_ENTRIES + ++#include ++#include ++ + #define FEATURESET_1d 0 /* 0x00000001.edx */ + #define FEATURESET_1c 1 /* 0x00000001.ecx */ + #define FEATURESET_e1d 2 /* 0x80000001.edx */ +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -291,7 +291,7 @@ def write_results(state): + + state.output.write( + """ +-#define FEATURESET_NR_ENTRIES %sU ++#define FEATURESET_NR_ENTRIES %s + + #define CPUID_COMMON_1D_FEATURES %s + diff --git a/57973099-have-schedulers-revise-initial-placement.patch b/57973099-have-schedulers-revise-initial-placement.patch new file mode 100644 index 0000000..4e3d6d0 --- /dev/null +++ b/57973099-have-schedulers-revise-initial-placement.patch @@ -0,0 +1,94 @@ +References: bsc#991934 + +# Commit 9f358ddd69463fa8fb65cf67beb5f6f0d3350e32 +# Date 2016-07-26 10:42:49 +0100 +# Author George Dunlap +# Committer George Dunlap +xen: Have schedulers revise initial placement + +The generic domain creation logic in +xen/common/domctl.c:default_vcpu0_location() attempts to try to do +initial placement load-balancing by placing vcpu 0 on the least-busy +non-primary hyperthread available. Unfortunately, the logic can end +up picking a pcpu that's not in the online mask. When this is passed +to a scheduler such which assumes that the initial assignment is +valid, it causes a null pointer dereference looking up the runqueue. + +Furthermore, this initial placement doesn't take into account hard or +soft affinity, or any scheduler-specific knowledge (such as historic +runqueue load, as in credit2). + +To solve this, when inserting a vcpu, always call the per-scheduler +"pick" function to revise the initial placement. This will +automatically take all knowledge the scheduler has into account. + +csched2_cpu_pick ASSERTs that the vcpu's pcpu scheduler lock has been +taken. Grab and release the lock to minimize time spend with irqs +disabled. + +Signed-off-by: George Dunlap +Reviewed-by: Meng Xu +Reviwed-by: Dario Faggioli + +--- a/xen/common/sched_credit.c ++++ b/xen/common/sched_credit.c +@@ -994,6 +994,9 @@ csched_vcpu_insert(const struct schedule + + BUG_ON( is_idle_vcpu(vc) ); + ++ /* This is safe because vc isn't yet being scheduled */ ++ vc->processor = csched_cpu_pick(ops, vc); ++ + lock = vcpu_schedule_lock_irq(vc); + + if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running ) +--- a/xen/common/sched_credit2.c ++++ b/xen/common/sched_credit2.c +@@ -318,6 +318,8 @@ struct csched2_dom { + uint16_t nr_vcpus; + }; + ++static int csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc); ++ + /* + * When a hard affinity change occurs, we may not be able to check some + * (any!) of the other runqueues, when looking for the best new processor +@@ -956,9 +958,16 @@ csched2_vcpu_insert(const struct schedul + + BUG_ON(is_idle_vcpu(vc)); + +- /* Add vcpu to runqueue of initial processor */ ++ /* csched2_cpu_pick() expects the pcpu lock to be held */ + lock = vcpu_schedule_lock_irq(vc); + ++ vc->processor = csched2_cpu_pick(ops, vc); ++ ++ spin_unlock_irq(lock); ++ ++ lock = vcpu_schedule_lock_irq(vc); ++ ++ /* Add vcpu to runqueue of initial processor */ + runq_assign(ops, vc); + + vcpu_schedule_unlock_irq(lock, vc); +--- a/xen/common/sched_rt.c ++++ b/xen/common/sched_rt.c +@@ -203,6 +203,8 @@ struct rt_dom { + struct domain *dom; /* pointer to upper domain */ + }; + ++static int rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc); ++ + /* + * Useful inline functions + */ +@@ -845,6 +847,9 @@ rt_vcpu_insert(const struct scheduler *o + + BUG_ON( is_idle_vcpu(vc) ); + ++ /* This is safe because vc isn't yet being scheduled */ ++ vc->processor = rt_cpu_pick(ops, vc); ++ + lock = vcpu_schedule_lock_irq(vc); + + now = NOW(); diff --git a/579730e6-remove-buggy-initial-placement-algorithm.patch b/579730e6-remove-buggy-initial-placement-algorithm.patch new file mode 100644 index 0000000..cae6d6d --- /dev/null +++ b/579730e6-remove-buggy-initial-placement-algorithm.patch @@ -0,0 +1,84 @@ +References: bsc#991934 + +# Commit d5438accceecc8172db2d37d98b695eb8bc43afc +# Date 2016-07-26 10:44:06 +0100 +# Author George Dunlap +# Committer George Dunlap +xen: Remove buggy initial placement algorithm + +The initial placement algorithm sometimes picks cpus outside of the +mask it's given, does a lot of unnecessary bitmasking, does its own +separate load calculation, and completely ignores vcpu hard and soft +affinities. Just get rid of it and rely on the schedulers to do +initial placement. + +Signed-off-by: George Dunlap +Reviewed-by: Dario Faggioli +Acked-by: Andrew Cooper + +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -217,54 +217,6 @@ void getdomaininfo(struct domain *d, str + memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t)); + } + +-static unsigned int default_vcpu0_location(cpumask_t *online) +-{ +- struct domain *d; +- struct vcpu *v; +- unsigned int i, cpu, nr_cpus, *cnt; +- cpumask_t cpu_exclude_map; +- +- /* Do an initial CPU placement. Pick the least-populated CPU. */ +- nr_cpus = cpumask_last(&cpu_online_map) + 1; +- cnt = xzalloc_array(unsigned int, nr_cpus); +- if ( cnt ) +- { +- rcu_read_lock(&domlist_read_lock); +- for_each_domain ( d ) +- for_each_vcpu ( d, v ) +- if ( !(v->pause_flags & VPF_down) +- && ((cpu = v->processor) < nr_cpus) ) +- cnt[cpu]++; +- rcu_read_unlock(&domlist_read_lock); +- } +- +- /* +- * If we're on a HT system, we only auto-allocate to a non-primary HT. We +- * favour high numbered CPUs in the event of a tie. +- */ +- cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0)); +- cpu = cpumask_first(&cpu_exclude_map); +- i = cpumask_next(cpu, &cpu_exclude_map); +- if ( i < nr_cpu_ids ) +- cpu = i; +- for_each_cpu(i, online) +- { +- if ( cpumask_test_cpu(i, &cpu_exclude_map) ) +- continue; +- if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) && +- (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) ) +- continue; +- cpumask_or(&cpu_exclude_map, &cpu_exclude_map, +- per_cpu(cpu_sibling_mask, i)); +- if ( !cnt || cnt[i] <= cnt[cpu] ) +- cpu = i; +- } +- +- xfree(cnt); +- +- return cpu; +-} +- + bool_t domctl_lock_acquire(void) + { + /* +@@ -691,7 +643,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe + continue; + + cpu = (i == 0) ? +- default_vcpu0_location(online) : ++ cpumask_any(online) : + cpumask_cycle(d->vcpu[i-1]->processor, online); + + if ( alloc_vcpu(d, i, cpu) == NULL ) diff --git a/57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch b/57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch new file mode 100644 index 0000000..0de2967 --- /dev/null +++ b/57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch @@ -0,0 +1,94 @@ +References: bsc#988675 CVE-2016-6258 XSA-182 + +# Commit e1bff4c2ea3b32464510ac00c320bba28a8dbcca +# Date 2016-07-26 14:06:59 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/pv: Remove unsafe bits from the mod_l?_entry() fastpath + +All changes in writeability and cacheability must go through full +re-validation. + +Rework the logic as a whitelist, to make it clearer to follow. + +This is XSA-182 + +Reported-by: Jérémie Boutoille +Signed-off-by: Andrew Cooper +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1852,6 +1852,14 @@ static inline int update_intpte(intpte_t + _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ + (_m), (_v), (_ad)) + ++/* ++ * PTE flags that a guest may change without re-validating the PTE. ++ * All other bits affect translation, caching, or Xen's safety. ++ */ ++#define FASTPATH_FLAG_WHITELIST \ ++ (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \ ++ _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER) ++ + /* Update the L1 entry at pl1e to new value nl1e. */ + static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, + unsigned long gl1mfn, int preserve_ad, +@@ -1891,9 +1899,8 @@ static int mod_l1_entry(l1_pgentry_t *pl + nl1e = l1e_from_pfn(page_to_mfn(page), l1e_get_flags(nl1e)); + } + +- /* Fast path for identical mapping, r/w, presence, and cachability. */ +- if ( !l1e_has_changed(ol1e, nl1e, +- PAGE_CACHE_ATTRS | _PAGE_RW | _PAGE_PRESENT) ) ++ /* Fast path for sufficiently-similar mappings. */ ++ if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) ) + { + adjust_guest_l1e(nl1e, pt_dom); + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, +@@ -1970,11 +1977,8 @@ static int mod_l2_entry(l2_pgentry_t *pl + return -EINVAL; + } + +- /* Fast path for identical mapping and presence. */ +- if ( !l2e_has_changed(ol2e, nl2e, +- unlikely(opt_allow_superpage) +- ? _PAGE_PSE | _PAGE_RW | _PAGE_PRESENT +- : _PAGE_PRESENT) ) ++ /* Fast path for sufficiently-similar mappings. */ ++ if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) ) + { + adjust_guest_l2e(nl2e, d); + if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) ) +@@ -2039,8 +2043,8 @@ static int mod_l3_entry(l3_pgentry_t *pl + return -EINVAL; + } + +- /* Fast path for identical mapping and presence. */ +- if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) ++ /* Fast path for sufficiently-similar mappings. */ ++ if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) ) + { + adjust_guest_l3e(nl3e, d); + rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad); +@@ -2103,8 +2107,8 @@ static int mod_l4_entry(l4_pgentry_t *pl + return -EINVAL; + } + +- /* Fast path for identical mapping and presence. */ +- if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) ++ /* Fast path for sufficiently-similar mappings. */ ++ if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) ) + { + adjust_guest_l4e(nl4e, d); + rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad); +--- a/xen/include/asm-x86/page.h ++++ b/xen/include/asm-x86/page.h +@@ -313,6 +313,7 @@ void efi_update_l4_pgtable(unsigned int + #define _PAGE_AVAIL2 _AC(0x800,U) + #define _PAGE_AVAIL _AC(0xE00,U) + #define _PAGE_PSE_PAT _AC(0x1000,U) ++#define _PAGE_AVAIL_HIGH (_AC(0x7ff, U) << 12) + #define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0) + /* non-architectural flags */ + #define _PAGE_PAGED 0x2000U diff --git a/57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch b/57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch new file mode 100644 index 0000000..1b16898 --- /dev/null +++ b/57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch @@ -0,0 +1,61 @@ +References: bsc#988676 CVE-2016-6259 XSA-183 + +# Commit 9f1441487aa215193a7c00fd9cb80b335542465e +# Date 2016-07-26 14:07:04 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/entry: Avoid SMAP violation in compat_create_bounce_frame() + +A 32bit guest kernel might be running on user mappings. +compat_create_bounce_frame() must whitelist its guest accesses to avoid +risking a SMAP violation. + +For both variants of create_bounce_frame(), re-blacklist user accesses if +execution exits via an exception table redirection. + +This is XSA-183 / CVE-2016-6259 + +Signed-off-by: Andrew Cooper +Reviewed-by: George Dunlap +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -318,6 +318,7 @@ ENTRY(compat_int80_direct_trap) + compat_create_bounce_frame: + ASSERT_INTERRUPTS_ENABLED + mov %fs,%edi ++ ASM_STAC + testb $2,UREGS_cs+8(%rsp) + jz 1f + /* Push new frame at registered guest-OS stack base. */ +@@ -364,6 +365,7 @@ compat_create_bounce_frame: + movl TRAPBOUNCE_error_code(%rdx),%eax + .Lft8: movl %eax,%fs:(%rsi) # ERROR CODE + 1: ++ ASM_CLAC + /* Rewrite our stack frame and return to guest-OS mode. */ + /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ + andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\ +@@ -403,6 +405,7 @@ compat_crash_page_fault_4: + addl $4,%esi + compat_crash_page_fault: + .Lft14: mov %edi,%fs ++ ASM_CLAC + movl %esi,%edi + call show_page_walk + jmp dom_crash_sync_extable +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -420,9 +420,11 @@ domain_crash_page_fault_16: + domain_crash_page_fault_8: + addq $8,%rsi + domain_crash_page_fault: ++ ASM_CLAC + movq %rsi,%rdi + call show_page_walk + ENTRY(dom_crash_sync_extable) ++ ASM_CLAC + # Get out of the guest-save area of the stack. + GET_STACK_END(ax) + leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp diff --git a/57a1e603-x86-time-adjust-local-system-time-initialization.patch b/57a1e603-x86-time-adjust-local-system-time-initialization.patch new file mode 100644 index 0000000..90bf101 --- /dev/null +++ b/57a1e603-x86-time-adjust-local-system-time-initialization.patch @@ -0,0 +1,123 @@ +References: bsc#970135 + +# Commit bb49fd3092a84ce151f5528794c0e612eeb4961a +# Date 2016-08-03 14:39:31 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/time: adjust local system time initialization + +Using the bare return value from read_platform_stime() is not suitable +when local_time_calibration() is going to use its fast path: Divergence +of several dozen microseconds between NOW() return values on different +CPUs results when platform and local time don't stay in close sync. + +Latch local and platform time on the CPU initiating AP bringup, such +that the AP can use these values to seed its stime_local_stamp with as +little of an error as possible. The boot CPU, otoh, can simply +calculate the correct initial value (other CPUs could do so too with +even greater accuracy than the approach being introduced, but that can +work only if all CPUs' TSCs start ticking at the same time, which +generally can't be assumed to be the case on multi-socket systems). + +This slightly defers init_percpu_time() (moved ahead by commit +dd2658f966 ["x86/time: initialise time earlier during +start_secondary()"]) in order to reduce as much as possible the gap +between populating the stamps and consuming them. + +Signed-off-by: Jan Beulich +Tested-by: Dario Faggioli +Tested-by: Joao Martins +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -328,12 +328,12 @@ void start_secondary(void *unused) + + percpu_traps_init(); + +- init_percpu_time(); +- + cpu_init(); + + smp_callin(); + ++ init_percpu_time(); ++ + setup_secondary_APIC_clock(); + + /* +@@ -996,6 +996,8 @@ int __cpu_up(unsigned int cpu) + if ( (ret = do_boot_cpu(apicid, cpu)) != 0 ) + return ret; + ++ time_latch_stamps(); ++ + set_cpu_state(CPU_STATE_ONLINE); + while ( !cpu_online(cpu) ) + { +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -1328,21 +1328,52 @@ static void time_calibration(void *unuse + &r, 1); + } + ++static struct { ++ s_time_t local_stime, master_stime; ++} ap_bringup_ref; ++ ++void time_latch_stamps(void) ++{ ++ unsigned long flags; ++ u64 tsc; ++ ++ local_irq_save(flags); ++ ap_bringup_ref.master_stime = read_platform_stime(); ++ tsc = rdtsc(); ++ local_irq_restore(flags); ++ ++ ap_bringup_ref.local_stime = get_s_time_fixed(tsc); ++} ++ + void init_percpu_time(void) + { + struct cpu_time *t = &this_cpu(cpu_time); + unsigned long flags; ++ u64 tsc; + s_time_t now; + + /* Initial estimate for TSC rate. */ + t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale; + + local_irq_save(flags); +- t->local_tsc_stamp = rdtsc(); + now = read_platform_stime(); ++ tsc = rdtsc(); + local_irq_restore(flags); + + t->stime_master_stamp = now; ++ /* ++ * To avoid a discontinuity (TSC and platform clock can't be expected ++ * to be in perfect sync), initialization here needs to match up with ++ * local_time_calibration()'s decision whether to use its fast path. ++ */ ++ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) ++ { ++ if ( system_state < SYS_STATE_smp_boot ) ++ now = get_s_time_fixed(tsc); ++ else ++ now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime; ++ } ++ t->local_tsc_stamp = tsc; + t->stime_local_stamp = now; + } + +--- a/xen/include/asm-x86/time.h ++++ b/xen/include/asm-x86/time.h +@@ -40,6 +40,7 @@ int time_suspend(void); + int time_resume(void); + + void init_percpu_time(void); ++void time_latch_stamps(void); + + struct ioreq; + int hwdom_pit_access(struct ioreq *ioreq); diff --git a/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch b/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch new file mode 100644 index 0000000..86e9e81 --- /dev/null +++ b/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch @@ -0,0 +1,190 @@ +References: bsc#970135 + +# Commit fa74e70500fd73dd2fc441c7dc00b190fb37cee5 +# Date 2016-08-03 14:40:44 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/time: introduce and use rdtsc_ordered() + +Matching Linux commit 03b9730b76 ("x86/asm/tsc: Add rdtsc_ordered() and +use it in trivial call sites") and earlier ones it builds upon, let's +make sure timing loops don't have their rdtsc()-s re-ordered, as that +would harm precision of the result (values were observed to be several +hundred clocks off without this adjustment). + +Signed-off-by: Jan Beulich +Tested-by: Dario Faggioli +Reviewed-by: Andrew Cooper +Tested-by: Joao Martins + +--- a/xen/arch/x86/apic.c ++++ b/xen/arch/x86/apic.c +@@ -1137,7 +1137,7 @@ static int __init calibrate_APIC_clock(v + /* + * We wrapped around just now. Let's start: + */ +- t1 = rdtsc(); ++ t1 = rdtsc_ordered(); + tt1 = apic_read(APIC_TMCCT); + + /* +@@ -1147,7 +1147,7 @@ static int __init calibrate_APIC_clock(v + wait_8254_wraparound(); + + tt2 = apic_read(APIC_TMCCT); +- t2 = rdtsc(); ++ t2 = rdtsc_ordered(); + + /* + * The APIC bus clock counter is 32 bits only, it +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -541,6 +541,9 @@ static void init_amd(struct cpuinfo_x86 + wrmsr_amd_safe(0xc001100d, l, h & ~1); + } + ++ /* MFENCE stops RDTSC speculation */ ++ __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); ++ + switch(c->x86) + { + case 0xf ... 0x17: +--- a/xen/arch/x86/delay.c ++++ b/xen/arch/x86/delay.c +@@ -21,10 +21,10 @@ void __udelay(unsigned long usecs) + unsigned long ticks = usecs * (cpu_khz / 1000); + unsigned long s, e; + +- s = rdtsc(); ++ s = rdtsc_ordered(); + do + { + rep_nop(); +- e = rdtsc(); ++ e = rdtsc_ordered(); + } while ((e-s) < ticks); + } +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -123,7 +123,7 @@ static void synchronize_tsc_master(unsig + + for ( i = 1; i <= 5; i++ ) + { +- tsc_value = rdtsc(); ++ tsc_value = rdtsc_ordered(); + wmb(); + atomic_inc(&tsc_count); + while ( atomic_read(&tsc_count) != (i<<1) ) +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -257,10 +257,10 @@ static u64 init_pit_and_calibrate_tsc(vo + outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ + outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ + +- start = rdtsc(); ++ start = rdtsc_ordered(); + for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) + continue; +- end = rdtsc(); ++ end = rdtsc_ordered(); + + /* Error if the CTC doesn't behave itself. */ + if ( count == 0 ) +@@ -760,7 +760,7 @@ s_time_t get_s_time_fixed(u64 at_tsc) + if ( at_tsc ) + tsc = at_tsc; + else +- tsc = rdtsc(); ++ tsc = rdtsc_ordered(); + delta = tsc - t->local_tsc_stamp; + now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale); + +@@ -933,7 +933,7 @@ int cpu_frequency_change(u64 freq) + /* TSC-extrapolated time may be bogus after frequency change. */ + /*t->stime_local_stamp = get_s_time();*/ + t->stime_local_stamp = t->stime_master_stamp; +- curr_tsc = rdtsc(); ++ curr_tsc = rdtsc_ordered(); + t->local_tsc_stamp = curr_tsc; + set_time_scale(&t->tsc_scale, freq); + local_irq_enable(); +@@ -1248,7 +1248,7 @@ static void time_calibration_tsc_rendezv + if ( r->master_stime == 0 ) + { + r->master_stime = read_platform_stime(); +- r->master_tsc_stamp = rdtsc(); ++ r->master_tsc_stamp = rdtsc_ordered(); + } + atomic_inc(&r->semaphore); + +@@ -1274,7 +1274,7 @@ static void time_calibration_tsc_rendezv + } + } + +- c->local_tsc_stamp = rdtsc(); ++ c->local_tsc_stamp = rdtsc_ordered(); + c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp); + c->stime_master_stamp = r->master_stime; + +@@ -1304,7 +1304,7 @@ static void time_calibration_std_rendezv + mb(); /* receive signal /then/ read r->master_stime */ + } + +- c->local_tsc_stamp = rdtsc(); ++ c->local_tsc_stamp = rdtsc_ordered(); + c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp); + c->stime_master_stamp = r->master_stime; + +@@ -1339,7 +1339,7 @@ void time_latch_stamps(void) + + local_irq_save(flags); + ap_bringup_ref.master_stime = read_platform_stime(); +- tsc = rdtsc(); ++ tsc = rdtsc_ordered(); + local_irq_restore(flags); + + ap_bringup_ref.local_stime = get_s_time_fixed(tsc); +@@ -1357,7 +1357,7 @@ void init_percpu_time(void) + + local_irq_save(flags); + now = read_platform_stime(); +- tsc = rdtsc(); ++ tsc = rdtsc_ordered(); + local_irq_restore(flags); + + t->stime_master_stamp = now; +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -16,6 +16,7 @@ XEN_CPUFEATURE(XTOPOLOGY, (FSCAPIN + XEN_CPUFEATURE(CPUID_FAULTING, (FSCAPINTS+0)*32+ 6) /* cpuid faulting */ + XEN_CPUFEATURE(CLFLUSH_MONITOR, (FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */ + XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */ ++XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTSC */ + + #define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */ + +--- a/xen/include/asm-x86/msr.h ++++ b/xen/include/asm-x86/msr.h +@@ -80,6 +80,22 @@ static inline uint64_t rdtsc(void) + return ((uint64_t)high << 32) | low; + } + ++static inline uint64_t rdtsc_ordered(void) ++{ ++ /* ++ * The RDTSC instruction is not ordered relative to memory access. ++ * The Intel SDM and the AMD APM are both vague on this point, but ++ * empirically an RDTSC instruction can be speculatively executed ++ * before prior loads. An RDTSC immediately after an appropriate ++ * barrier appears to be ordered as a normal load, that is, it ++ * provides the same ordering guarantees as reading from a global ++ * memory location that some other imaginary CPU is updating ++ * continuously with a time stamp. ++ */ ++ alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC); ++ return rdtsc(); ++} ++ + #define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val) + #define write_tsc(val) ({ \ + /* Reliable TSCs are in lockstep across all CPUs. We should \ diff --git a/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch b/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch new file mode 100644 index 0000000..cfb0c4d --- /dev/null +++ b/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch @@ -0,0 +1,298 @@ +References: bsc#970135 + +# Commit 93340297802b8e743b6ce66b0bc366af1ad51f39 +# Date 2016-08-04 10:02:52 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/time: calibrate TSC against platform timer + +... instead of unconditionally against the PIT. This allows for local +and master system times to remain in better sync (which matters even +when, on any modern system, the master time is really used only during +secondary CPU bringup, as the error between the two is in fact +noticable in cross-CPU NOW() invocation monotonicity). + +This involves moving the init_platform_timer() invocation into +early_time_init(), splitting out the few things which really need to be +done in init_xen_time(). That in turn allows dropping the open coded +PIT initialization from init_IRQ() (it was needed for APIC clock +calibration, which runs between early_time_init() and init_xen_time()). + +In the course of this re-ordering also set the timer channel 2 gate low +after having finished calibration. This should be benign to overall +system operation, but appears to be the more clean state. + +Also do away with open coded 8254 register manipulation from 8259 code. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/i8259.c ++++ b/xen/arch/x86/i8259.c +@@ -359,13 +359,6 @@ void __init init_IRQ(void) + + apic_intr_init(); + +- /* Set the clock to HZ Hz */ +-#define CLOCK_TICK_RATE 1193182 /* crystal freq (Hz) */ +-#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) +- outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ +- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ +- outb(LATCH >> 8, PIT_CH0); /* MSB */ +- + setup_irq(2, 0, &cascade); + } + +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -59,7 +59,7 @@ struct platform_timesource { + char *name; + u64 frequency; + u64 (*read_counter)(void); +- int (*init)(struct platform_timesource *); ++ s64 (*init)(struct platform_timesource *); + void (*resume)(struct platform_timesource *); + int counter_bits; + }; +@@ -224,49 +224,18 @@ static struct irqaction __read_mostly ir + timer_interrupt, "timer", NULL + }; + +-/* ------ Calibrate the TSC ------- +- * Return processor ticks per second / CALIBRATE_FRAC. +- */ +- + #define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */ + #define CALIBRATE_FRAC 20 /* calibrate over 50ms */ +-#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) ++#define CALIBRATE_VALUE(freq) (((freq) + CALIBRATE_FRAC / 2) / CALIBRATE_FRAC) + +-static u64 init_pit_and_calibrate_tsc(void) ++static void preinit_pit(void) + { +- u64 start, end; +- unsigned long count; +- + /* Set PIT channel 0 to HZ Hz. */ + #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) + outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ + outb(LATCH >> 8, PIT_CH0); /* MSB */ +- +- /* Set the Gate high, disable speaker */ +- outb((inb(0x61) & ~0x02) | 0x01, 0x61); +- +- /* +- * Now let's take care of CTC channel 2 +- * +- * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on +- * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB) +- * to begin countdown. +- */ +- outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */ +- outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ +- outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ +- +- start = rdtsc_ordered(); +- for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) +- continue; +- end = rdtsc_ordered(); +- +- /* Error if the CTC doesn't behave itself. */ +- if ( count == 0 ) +- return 0; +- +- return ((end - start) * (u64)CALIBRATE_FRAC); ++#undef LATCH + } + + void set_time_scale(struct time_scale *ts, u64 ticks_per_sec) +@@ -327,10 +296,49 @@ static u64 read_pit_count(void) + return count32; + } + +-static int __init init_pit(struct platform_timesource *pts) ++static s64 __init init_pit(struct platform_timesource *pts) + { ++ u8 portb = inb(0x61); ++ u64 start, end; ++ unsigned long count; ++ + using_pit = 1; +- return 1; ++ ++ /* Set the Gate high, disable speaker. */ ++ outb((portb & ~0x02) | 0x01, 0x61); ++ ++ /* ++ * Now let's take care of CTC channel 2: mode 0, (interrupt on ++ * terminal count mode), binary count, load CALIBRATE_LATCH count, ++ * (LSB and MSB) to begin countdown. ++ */ ++#define CALIBRATE_LATCH CALIBRATE_VALUE(CLOCK_TICK_RATE) ++ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */ ++ outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ ++ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ ++#undef CALIBRATE_LATCH ++ ++ start = rdtsc_ordered(); ++ for ( count = 0; !(inb(0x61) & 0x20); ++count ) ++ continue; ++ end = rdtsc_ordered(); ++ ++ /* Set the Gate low, disable speaker. */ ++ outb(portb & ~0x03, 0x61); ++ ++ /* Error if the CTC doesn't behave itself. */ ++ if ( count == 0 ) ++ return 0; ++ ++ return (end - start) * CALIBRATE_FRAC; ++} ++ ++static void resume_pit(struct platform_timesource *pts) ++{ ++ /* Set CTC channel 2 to mode 0 again; initial value does not matter. */ ++ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */ ++ outb(0, PIT_CH2); /* LSB of count */ ++ outb(0, PIT_CH2); /* MSB of count */ + } + + static struct platform_timesource __initdata plt_pit = +@@ -340,7 +348,8 @@ static struct platform_timesource __init + .frequency = CLOCK_TICK_RATE, + .read_counter = read_pit_count, + .counter_bits = 32, +- .init = init_pit ++ .init = init_pit, ++ .resume = resume_pit, + }; + + /************************************************************ +@@ -352,15 +361,26 @@ static u64 read_hpet_count(void) + return hpet_read32(HPET_COUNTER); + } + +-static int __init init_hpet(struct platform_timesource *pts) ++static s64 __init init_hpet(struct platform_timesource *pts) + { +- u64 hpet_rate = hpet_setup(); ++ u64 hpet_rate = hpet_setup(), start; ++ u32 count, target; + + if ( hpet_rate == 0 ) + return 0; + + pts->frequency = hpet_rate; +- return 1; ++ ++ count = hpet_read32(HPET_COUNTER); ++ start = rdtsc_ordered(); ++ target = count + CALIBRATE_VALUE(hpet_rate); ++ if ( target < count ) ++ while ( hpet_read32(HPET_COUNTER) >= count ) ++ continue; ++ while ( hpet_read32(HPET_COUNTER) < target ) ++ continue; ++ ++ return (rdtsc_ordered() - start) * CALIBRATE_FRAC; + } + + static void resume_hpet(struct platform_timesource *pts) +@@ -392,12 +412,24 @@ static u64 read_pmtimer_count(void) + return inl(pmtmr_ioport); + } + +-static int __init init_pmtimer(struct platform_timesource *pts) ++static s64 __init init_pmtimer(struct platform_timesource *pts) + { ++ u64 start; ++ u32 count, target, mask = 0xffffff; ++ + if ( pmtmr_ioport == 0 ) + return 0; + +- return 1; ++ count = inl(pmtmr_ioport) & mask; ++ start = rdtsc_ordered(); ++ target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY); ++ if ( target < count ) ++ while ( (inl(pmtmr_ioport) & mask) >= count ) ++ continue; ++ while ( (inl(pmtmr_ioport) & mask) < target ) ++ continue; ++ ++ return (rdtsc_ordered() - start) * CALIBRATE_FRAC; + } + + static struct platform_timesource __initdata plt_pmtimer = +@@ -533,14 +565,15 @@ static void resume_platform_timer(void) + plt_stamp = plt_src.read_counter(); + } + +-static void __init init_platform_timer(void) ++static u64 __init init_platform_timer(void) + { + static struct platform_timesource * __initdata plt_timers[] = { + &plt_hpet, &plt_pmtimer, &plt_pit + }; + + struct platform_timesource *pts = NULL; +- int i, rc = -1; ++ unsigned int i; ++ s64 rc = -1; + + if ( opt_clocksource[0] != '\0' ) + { +@@ -578,15 +611,12 @@ static void __init init_platform_timer(v + + plt_overflow_period = scale_delta( + 1ull << (pts->counter_bits-1), &plt_scale); +- init_timer(&plt_overflow_timer, plt_overflow, NULL, 0); + plt_src = *pts; +- plt_overflow(NULL); +- +- platform_timer_stamp = plt_stamp64; +- stime_platform_stamp = NOW(); + + printk("Platform timer is %s %s\n", + freq_string(pts->frequency), pts->name); ++ ++ return rc; + } + + u64 stime2tsc(s_time_t stime) +@@ -1479,7 +1509,11 @@ int __init init_xen_time(void) + /* NB. get_cmos_time() can take over one second to execute. */ + do_settime(get_cmos_time(), 0, NOW()); + +- init_platform_timer(); ++ /* Finish platform timer initialization. */ ++ init_timer(&plt_overflow_timer, plt_overflow, NULL, 0); ++ plt_overflow(NULL); ++ platform_timer_stamp = plt_stamp64; ++ stime_platform_stamp = NOW(); + + init_percpu_time(); + +@@ -1494,7 +1528,10 @@ int __init init_xen_time(void) + void __init early_time_init(void) + { + struct cpu_time *t = &this_cpu(cpu_time); +- u64 tmp = init_pit_and_calibrate_tsc(); ++ u64 tmp; ++ ++ preinit_pit(); ++ tmp = init_platform_timer(); + + set_time_scale(&t->tsc_scale, tmp); + t->local_tsc_stamp = boot_tsc_stamp; +@@ -1603,7 +1640,7 @@ int time_suspend(void) + + int time_resume(void) + { +- init_pit_and_calibrate_tsc(); ++ preinit_pit(); + + resume_platform_timer(); + diff --git a/57a30261-x86-support-newer-Intel-CPU-models.patch b/57a30261-x86-support-newer-Intel-CPU-models.patch new file mode 100644 index 0000000..e05ad75 --- /dev/null +++ b/57a30261-x86-support-newer-Intel-CPU-models.patch @@ -0,0 +1,200 @@ +# Commit 350bc1a9d4ebc03b18a43cdafcb626618caace55 +# Date 2016-08-04 10:52:49 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: support newer Intel CPU models + +... as per the June 2016 edition of the SDM. + +Also remove a couple of dead break statements as well as unused +*MSR_PM_LASTBRANCH* #define-s. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +Acked-by: Kevin Tian + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -61,14 +61,14 @@ + + #define GET_HW_RES_IN_NS(msr, val) \ + do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 ) +-#define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val) /* Atom E3000 only */ ++#define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val) + #define GET_PC2_RES(val) GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */ + #define GET_PC3_RES(val) GET_HW_RES_IN_NS(0x3F8, val) + #define GET_PC6_RES(val) GET_HW_RES_IN_NS(0x3F9, val) + #define GET_PC7_RES(val) GET_HW_RES_IN_NS(0x3FA, val) +-#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */ +-#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */ +-#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */ ++#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val) ++#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val) ++#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) + #define GET_CC1_RES(val) GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */ + #define GET_CC3_RES(val) GET_HW_RES_IN_NS(0x3FC, val) + #define GET_CC6_RES(val) GET_HW_RES_IN_NS(0x3FD, val) +@@ -142,6 +142,8 @@ static void do_get_hw_residencies(void * + { + /* 4th generation Intel Core (Haswell) */ + case 0x45: ++ /* Xeon E5/E7 v4 (Broadwell) */ ++ case 0x4F: + GET_PC8_RES(hw_res->pc8); + GET_PC9_RES(hw_res->pc9); + GET_PC10_RES(hw_res->pc10); +@@ -158,10 +160,11 @@ static void do_get_hw_residencies(void * + case 0x46: + /* Broadwell */ + case 0x3D: +- case 0x4F: ++ case 0x47: + case 0x56: +- /* future */ ++ /* Skylake */ + case 0x4E: ++ case 0x5E: + GET_PC2_RES(hw_res->pc2); + GET_CC7_RES(hw_res->cc7); + /* fall through */ +@@ -198,18 +201,28 @@ static void do_get_hw_residencies(void * + break; + /* Silvermont */ + case 0x37: +- GET_MC6_RES(hw_res->mc6); +- /* fall through */ + case 0x4A: + case 0x4D: + case 0x5A: + case 0x5D: + /* Airmont */ + case 0x4C: ++ GET_MC6_RES(hw_res->mc6); + GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */ + GET_CC1_RES(hw_res->cc1); + GET_CC6_RES(hw_res->cc6); + break; ++ /* Goldmont */ ++ case 0x5C: ++ case 0x5F: ++ GET_PC2_RES(hw_res->pc2); ++ GET_PC3_RES(hw_res->pc3); ++ GET_PC6_RES(hw_res->pc6); ++ GET_PC10_RES(hw_res->pc10); ++ GET_CC1_RES(hw_res->cc1); ++ GET_CC3_RES(hw_res->cc3); ++ GET_CC6_RES(hw_res->cc6); ++ break; + } + } + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2526,6 +2526,14 @@ static const struct lbr_info { + { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, + { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO }, + { 0, 0 } ++}, sk_lbr[] = { ++ { MSR_IA32_LASTINTFROMIP, 1 }, ++ { MSR_IA32_LASTINTTOIP, 1 }, ++ { MSR_SKL_LASTBRANCH_TOS, 1 }, ++ { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH }, ++ { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH }, ++ { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH }, ++ { 0, 0 } + }, at_lbr[] = { + { MSR_IA32_LASTINTFROMIP, 1 }, + { MSR_IA32_LASTINTTOIP, 1 }, +@@ -2533,6 +2541,13 @@ static const struct lbr_info { + { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO }, + { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO }, + { 0, 0 } ++}, gm_lbr[] = { ++ { MSR_IA32_LASTINTFROMIP, 1 }, ++ { MSR_IA32_LASTINTTOIP, 1 }, ++ { MSR_GM_LASTBRANCH_TOS, 1 }, ++ { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO }, ++ { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO }, ++ { 0, 0 } + }; + + static const struct lbr_info *last_branch_msr_get(void) +@@ -2547,7 +2562,6 @@ static const struct lbr_info *last_branc + /* Enhanced Core */ + case 23: + return c2_lbr; +- break; + /* Nehalem */ + case 26: case 30: case 31: case 46: + /* Westmere */ +@@ -2559,11 +2573,13 @@ static const struct lbr_info *last_branc + /* Haswell */ + case 60: case 63: case 69: case 70: + /* Broadwell */ +- case 61: case 79: case 86: +- /* future */ +- case 78: ++ case 61: case 71: case 79: case 86: + return nh_lbr; +- break; ++ /* Skylake */ ++ case 78: case 94: ++ /* future */ ++ case 142: case 158: ++ return sk_lbr; + /* Atom */ + case 28: case 38: case 39: case 53: case 54: + /* Silvermont */ +@@ -2573,7 +2589,9 @@ static const struct lbr_info *last_branc + /* Airmont */ + case 76: + return at_lbr; +- break; ++ /* Goldmont */ ++ case 92: case 95: ++ return gm_lbr; + } + break; + +@@ -2583,7 +2601,6 @@ static const struct lbr_info *last_branc + /* Pentium4/Xeon with em64t */ + case 3: case 4: case 6: + return p4_lbr; +- break; + } + break; + } +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -458,11 +458,6 @@ + #define MSR_P4_LASTBRANCH_0_TO_LIP 0x000006c0 + #define NUM_MSR_P4_LASTBRANCH_FROM_TO 16 + +-/* Pentium M (and Core) last-branch recording */ +-#define MSR_PM_LASTBRANCH_TOS 0x000001c9 +-#define MSR_PM_LASTBRANCH_0 0x00000040 +-#define NUM_MSR_PM_LASTBRANCH 8 +- + /* Core 2 and Atom last-branch recording */ + #define MSR_C2_LASTBRANCH_TOS 0x000001c9 + #define MSR_C2_LASTBRANCH_0_FROM_IP 0x00000040 +@@ -470,6 +465,19 @@ + #define NUM_MSR_C2_LASTBRANCH_FROM_TO 4 + #define NUM_MSR_ATOM_LASTBRANCH_FROM_TO 8 + ++/* Skylake (and newer) last-branch recording */ ++#define MSR_SKL_LASTBRANCH_TOS 0x000001c9 ++#define MSR_SKL_LASTBRANCH_0_FROM_IP 0x00000680 ++#define MSR_SKL_LASTBRANCH_0_TO_IP 0x000006c0 ++#define MSR_SKL_LASTBRANCH_0_INFO 0x00000dc0 ++#define NUM_MSR_SKL_LASTBRANCH 32 ++ ++/* Goldmont last-branch recording */ ++#define MSR_GM_LASTBRANCH_TOS 0x000001c9 ++#define MSR_GM_LASTBRANCH_0_FROM_IP 0x00000680 ++#define MSR_GM_LASTBRANCH_0_TO_IP 0x000006c0 ++#define NUM_MSR_GM_LASTBRANCH_FROM_TO 32 ++ + /* Intel Core-based CPU performance counters */ + #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 + #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a diff --git a/vif-route.patch b/vif-route.patch new file mode 100644 index 0000000..d46767f --- /dev/null +++ b/vif-route.patch @@ -0,0 +1,15 @@ +References: bsc#985503 + +Index: xen-4.7.0-testing/tools/hotplug/Linux/vif-route +=================================================================== +--- xen-4.7.0-testing.orig/tools/hotplug/Linux/vif-route ++++ xen-4.7.0-testing/tools/hotplug/Linux/vif-route +@@ -35,7 +35,7 @@ case "${command}" in + ;; + esac + +-if [ "${ip}" ] ; then ++if [ "${ip}" ] && [ "${ipcmd}" ] ; then + # If we've been given a list of IP addresses, then add routes from dom0 to + # the guest using those addresses. + for addr in ${ip} ; do diff --git a/xen.changes b/xen.changes index 975020d..8537b8d 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,31 @@ +------------------------------------------------------------------- +Thu Aug 4 09:12:34 MDT 2016 - carnold@suse.com + +- bsc#970135 - new virtualization project clock test randomly fails + on Xen + 576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch + 5769106e-x86-generate-assembler-equates-for-synthesized.patch + 57a1e603-x86-time-adjust-local-system-time-initialization.patch + 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch + 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch +- bsc#991934 - xen hypervisor crash in csched_acct + 57973099-have-schedulers-revise-initial-placement.patch + 579730e6-remove-buggy-initial-placement-algorithm.patch +- bsc#988675 - VUL-0: CVE-2016-6258: xen: x86: Privilege escalation + in PV guests (XSA-182) + 57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch +- bsc#988676 - VUL-0: CVE-2016-6259: xen: x86: Missing SMAP + whitelisting in 32-bit exception / event delivery (XSA-183) + 57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch +- Upstream patches from Jan + 57a30261-x86-support-newer-Intel-CPU-models.patch + +------------------------------------------------------------------- +Mon Aug 1 11:46:22 MDT 2016 - carnold@suse.com + +- bsc#985503 - vif-route broken + vif-route.patch + ------------------------------------------------------------------- Thu Jul 28 05:23:12 MDT 2016 - carnold@suse.com diff --git a/xen.spec b/xen.spec index ce52705..dc9ae0d 100644 --- a/xen.spec +++ b/xen.spec @@ -165,7 +165,7 @@ BuildRequires: xorg-x11-util-devel %endif %endif -Version: 4.7.0_09 +Version: 4.7.0_10 Release: 0 Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0 @@ -205,7 +205,17 @@ Source99: baselibs.conf # Upstream patches Patch1: 57580bbd-kexec-allow-relaxed-placement-via-cmdline.patch Patch2: 575e9ca0-nested-vmx-Validate-host-VMX-MSRs-before-accessing-them.patch -Patch3: 57640448-xen-sched-use-default-scheduler-upon-an-invalid-sched.patch +Patch3: 576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch +Patch4: 57640448-xen-sched-use-default-scheduler-upon-an-invalid-sched.patch +Patch5: 5769106e-x86-generate-assembler-equates-for-synthesized.patch +Patch6: 57973099-have-schedulers-revise-initial-placement.patch +Patch7: 579730e6-remove-buggy-initial-placement-algorithm.patch +Patch8: 57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch +Patch9: 57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch +Patch10: 57a1e603-x86-time-adjust-local-system-time-initialization.patch +Patch11: 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch +Patch12: 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch +Patch13: 57a30261-x86-support-newer-Intel-CPU-models.patch # Upstream qemu-traditional patches Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -274,6 +284,7 @@ Patch403: xl-conf-default-bridge.patch Patch420: suspend_evtchn_lock.patch Patch421: xenpaging.doc.patch Patch422: stubdom-have-iovec.patch +Patch423: vif-route.patch # Other bug fixes or features Patch451: xenconsole-no-multiple-connections.patch Patch452: hibernate.patch @@ -529,6 +540,16 @@ Authors: %patch1 -p1 %patch2 -p1 %patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1 @@ -597,6 +618,7 @@ Authors: %patch420 -p1 %patch421 -p1 %patch422 -p1 +%patch423 -p1 # Other bug fixes or features %patch451 -p1 %patch452 -p1