- bsc#970135 - new virtualization project clock test randomly fails

on Xen
  576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch
  5769106e-x86-generate-assembler-equates-for-synthesized.patch
  57a1e603-x86-time-adjust-local-system-time-initialization.patch
  57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch
  57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch
- bsc#991934 - xen hypervisor crash in csched_acct
  57973099-have-schedulers-revise-initial-placement.patch
  579730e6-remove-buggy-initial-placement-algorithm.patch
- bsc#988675 - VUL-0: CVE-2016-6258: xen: x86: Privilege escalation
  in PV guests (XSA-182)
  57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch
- bsc#988676 - VUL-0: CVE-2016-6259: xen: x86: Missing SMAP
  whitelisting in 32-bit exception / event delivery (XSA-183)
  57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch
- Upstream patches from Jan
  57a30261-x86-support-newer-Intel-CPU-models.patch

- bsc#985503 - vif-route broken
  vif-route.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=445
This commit is contained in:
Charles Arnold 2016-08-04 19:26:11 +00:00 committed by Git OBS Bridge
parent c8a1704907
commit a89d75605e
13 changed files with 1394 additions and 2 deletions

View File

@ -0,0 +1,50 @@
References: bsc#970135
# Commit b64438c7c1495a7580d1bb9d8ba644f3705e1ffb
# Date 2016-06-14 15:08:47 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: use correct (local) time stamp in constant-TSC calibration fast path
This looks like a copy and paste mistake in commit 1b6a99892d ("x86:
Simpler time handling when TSC is constant across all power saving
states"), responsible for occasional many-microsecond cross-CPU skew of
what NOW() returns.
Also improve the correlation between local TSC and stime stamps
obtained at the end of the two calibration handlers: Compute the stime
one from the TSC one, instead of doing another rdtsc() for that
compuation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -998,7 +998,7 @@ static void local_time_calibration(void)
/* Atomically read cpu_calibration struct and write cpu_time struct. */
local_irq_disable();
t->local_tsc_stamp = c->local_tsc_stamp;
- t->stime_local_stamp = c->stime_master_stamp;
+ t->stime_local_stamp = c->stime_local_stamp;
t->stime_master_stamp = c->stime_master_stamp;
local_irq_enable();
update_vcpu_system_time(current);
@@ -1275,7 +1275,7 @@ static void time_calibration_tsc_rendezv
}
c->local_tsc_stamp = rdtsc();
- c->stime_local_stamp = get_s_time();
+ c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
raise_softirq(TIME_CALIBRATE_SOFTIRQ);
@@ -1305,7 +1305,7 @@ static void time_calibration_std_rendezv
}
c->local_tsc_stamp = rdtsc();
- c->stime_local_stamp = get_s_time();
+ c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
raise_softirq(TIME_CALIBRATE_SOFTIRQ);

View File

@ -0,0 +1,133 @@
References: bsc#970135
# Commit 06f083c826836a098f793db821845b313ad88a7f
# Date 2016-06-21 12:01:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: also generate assembler usable equates for synthesized features
... to make it possible to base alternative instruction patching upon
such.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -219,7 +219,8 @@ long arch_do_sysctl(
}
/* Clip the number of entries. */
- nr = min(sysctl->u.cpu_featureset.nr_features, FSCAPINTS);
+ nr = min_t(unsigned int, sysctl->u.cpu_featureset.nr_features,
+ FSCAPINTS);
/* Look up requested featureset. */
if ( sysctl->u.cpu_featureset.index < ARRAY_SIZE(featureset_table) )
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -3,8 +3,23 @@
*
* Defines x86 CPU feature bits
*/
+#if defined(XEN_CPUFEATURE)
-#ifndef __ASM_I386_CPUFEATURE_H
+/* Other features, Xen-defined mapping. */
+/* This range is used for feature bits which conflict or are synthesized */
+XEN_CPUFEATURE(CONSTANT_TSC, (FSCAPINTS+0)*32+ 0) /* TSC ticks at a constant rate */
+XEN_CPUFEATURE(NONSTOP_TSC, (FSCAPINTS+0)*32+ 1) /* TSC does not stop in C states */
+XEN_CPUFEATURE(ARAT, (FSCAPINTS+0)*32+ 2) /* Always running APIC timer */
+XEN_CPUFEATURE(ARCH_PERFMON, (FSCAPINTS+0)*32+ 3) /* Intel Architectural PerfMon */
+XEN_CPUFEATURE(TSC_RELIABLE, (FSCAPINTS+0)*32+ 4) /* TSC is known to be reliable */
+XEN_CPUFEATURE(XTOPOLOGY, (FSCAPINTS+0)*32+ 5) /* cpu topology enum extensions */
+XEN_CPUFEATURE(CPUID_FAULTING, (FSCAPINTS+0)*32+ 6) /* cpuid faulting */
+XEN_CPUFEATURE(CLFLUSH_MONITOR, (FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */
+XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */
+
+#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
+
+#elif !defined(__ASM_I386_CPUFEATURE_H)
#ifndef X86_FEATURES_ONLY
#define __ASM_I386_CPUFEATURE_H
#endif
@@ -12,20 +27,6 @@
#include <xen/const.h>
#include <asm/cpuid.h>
-#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
-
-/* Other features, Xen-defined mapping. */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CONSTANT_TSC ((FSCAPINTS+0)*32+ 0) /* TSC ticks at a constant rate */
-#define X86_FEATURE_NONSTOP_TSC ((FSCAPINTS+0)*32+ 1) /* TSC does not stop in C states */
-#define X86_FEATURE_ARAT ((FSCAPINTS+0)*32+ 2) /* Always running APIC timer */
-#define X86_FEATURE_ARCH_PERFMON ((FSCAPINTS+0)*32+ 3) /* Intel Architectural PerfMon */
-#define X86_FEATURE_TSC_RELIABLE ((FSCAPINTS+0)*32+ 4) /* TSC is known to be reliable */
-#define X86_FEATURE_XTOPOLOGY ((FSCAPINTS+0)*32+ 5) /* cpu topology enum extensions */
-#define X86_FEATURE_CPUID_FAULTING ((FSCAPINTS+0)*32+ 6) /* cpuid faulting */
-#define X86_FEATURE_CLFLUSH_MONITOR ((FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */
-#define X86_FEATURE_APERFMPERF ((FSCAPINTS+0)*32+ 8) /* APERFMPERF */
-
#define cpufeat_word(idx) ((idx) / 32)
#define cpufeat_bit(idx) ((idx) % 32)
#define cpufeat_mask(idx) (_AC(1, U) << cpufeat_bit(idx))
--- a/xen/include/asm-x86/cpufeatureset.h
+++ b/xen/include/asm-x86/cpufeatureset.h
@@ -3,19 +3,25 @@
#ifndef __ASSEMBLY__
+#include <xen/stringify.h>
+
#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
enum {
#include <public/arch-x86/cpufeatureset.h>
+#include <asm/cpufeature.h>
};
#undef XEN_CPUFEATURE
-#define XEN_CPUFEATURE(name, value) asm (".equ X86_FEATURE_" #name ", " #value);
+#define XEN_CPUFEATURE(name, value) asm (".equ X86_FEATURE_" #name ", " \
+ __stringify(value));
#include <public/arch-x86/cpufeatureset.h>
+#include <asm/cpufeature.h>
#else /* !__ASSEMBLY__ */
#define XEN_CPUFEATURE(name, value) .equ X86_FEATURE_##name, value
#include <public/arch-x86/cpufeatureset.h>
+#include <asm/cpufeature.h>
#endif /* __ASSEMBLY__ */
--- a/xen/include/asm-x86/cpuid.h
+++ b/xen/include/asm-x86/cpuid.h
@@ -1,12 +1,13 @@
#ifndef __X86_CPUID_H__
#define __X86_CPUID_H__
-#include <asm/cpufeatureset.h>
#include <asm/cpuid-autogen.h>
-#include <asm/percpu.h>
#define FSCAPINTS FEATURESET_NR_ENTRIES
+#include <asm/cpufeatureset.h>
+#include <asm/percpu.h>
+
#define FEATURESET_1d 0 /* 0x00000001.edx */
#define FEATURESET_1c 1 /* 0x00000001.ecx */
#define FEATURESET_e1d 2 /* 0x80000001.edx */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -291,7 +291,7 @@ def write_results(state):
state.output.write(
"""
-#define FEATURESET_NR_ENTRIES %sU
+#define FEATURESET_NR_ENTRIES %s
#define CPUID_COMMON_1D_FEATURES %s

View File

@ -0,0 +1,94 @@
References: bsc#991934
# Commit 9f358ddd69463fa8fb65cf67beb5f6f0d3350e32
# Date 2016-07-26 10:42:49 +0100
# Author George Dunlap <george.dunlap@citrix.com>
# Committer George Dunlap <george.dunlap@citrix.com>
xen: Have schedulers revise initial placement
The generic domain creation logic in
xen/common/domctl.c:default_vcpu0_location() attempts to try to do
initial placement load-balancing by placing vcpu 0 on the least-busy
non-primary hyperthread available. Unfortunately, the logic can end
up picking a pcpu that's not in the online mask. When this is passed
to a scheduler such which assumes that the initial assignment is
valid, it causes a null pointer dereference looking up the runqueue.
Furthermore, this initial placement doesn't take into account hard or
soft affinity, or any scheduler-specific knowledge (such as historic
runqueue load, as in credit2).
To solve this, when inserting a vcpu, always call the per-scheduler
"pick" function to revise the initial placement. This will
automatically take all knowledge the scheduler has into account.
csched2_cpu_pick ASSERTs that the vcpu's pcpu scheduler lock has been
taken. Grab and release the lock to minimize time spend with irqs
disabled.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>
Reviwed-by: Dario Faggioli <dario.faggioli@citrix.com>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -994,6 +994,9 @@ csched_vcpu_insert(const struct schedule
BUG_ON( is_idle_vcpu(vc) );
+ /* This is safe because vc isn't yet being scheduled */
+ vc->processor = csched_cpu_pick(ops, vc);
+
lock = vcpu_schedule_lock_irq(vc);
if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -318,6 +318,8 @@ struct csched2_dom {
uint16_t nr_vcpus;
};
+static int csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc);
+
/*
* When a hard affinity change occurs, we may not be able to check some
* (any!) of the other runqueues, when looking for the best new processor
@@ -956,9 +958,16 @@ csched2_vcpu_insert(const struct schedul
BUG_ON(is_idle_vcpu(vc));
- /* Add vcpu to runqueue of initial processor */
+ /* csched2_cpu_pick() expects the pcpu lock to be held */
lock = vcpu_schedule_lock_irq(vc);
+ vc->processor = csched2_cpu_pick(ops, vc);
+
+ spin_unlock_irq(lock);
+
+ lock = vcpu_schedule_lock_irq(vc);
+
+ /* Add vcpu to runqueue of initial processor */
runq_assign(ops, vc);
vcpu_schedule_unlock_irq(lock, vc);
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -203,6 +203,8 @@ struct rt_dom {
struct domain *dom; /* pointer to upper domain */
};
+static int rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc);
+
/*
* Useful inline functions
*/
@@ -845,6 +847,9 @@ rt_vcpu_insert(const struct scheduler *o
BUG_ON( is_idle_vcpu(vc) );
+ /* This is safe because vc isn't yet being scheduled */
+ vc->processor = rt_cpu_pick(ops, vc);
+
lock = vcpu_schedule_lock_irq(vc);
now = NOW();

View File

@ -0,0 +1,84 @@
References: bsc#991934
# Commit d5438accceecc8172db2d37d98b695eb8bc43afc
# Date 2016-07-26 10:44:06 +0100
# Author George Dunlap <george.dunlap@citrix.com>
# Committer George Dunlap <george.dunlap@citrix.com>
xen: Remove buggy initial placement algorithm
The initial placement algorithm sometimes picks cpus outside of the
mask it's given, does a lot of unnecessary bitmasking, does its own
separate load calculation, and completely ignores vcpu hard and soft
affinities. Just get rid of it and rely on the schedulers to do
initial placement.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -217,54 +217,6 @@ void getdomaininfo(struct domain *d, str
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
}
-static unsigned int default_vcpu0_location(cpumask_t *online)
-{
- struct domain *d;
- struct vcpu *v;
- unsigned int i, cpu, nr_cpus, *cnt;
- cpumask_t cpu_exclude_map;
-
- /* Do an initial CPU placement. Pick the least-populated CPU. */
- nr_cpus = cpumask_last(&cpu_online_map) + 1;
- cnt = xzalloc_array(unsigned int, nr_cpus);
- if ( cnt )
- {
- rcu_read_lock(&domlist_read_lock);
- for_each_domain ( d )
- for_each_vcpu ( d, v )
- if ( !(v->pause_flags & VPF_down)
- && ((cpu = v->processor) < nr_cpus) )
- cnt[cpu]++;
- rcu_read_unlock(&domlist_read_lock);
- }
-
- /*
- * If we're on a HT system, we only auto-allocate to a non-primary HT. We
- * favour high numbered CPUs in the event of a tie.
- */
- cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0));
- cpu = cpumask_first(&cpu_exclude_map);
- i = cpumask_next(cpu, &cpu_exclude_map);
- if ( i < nr_cpu_ids )
- cpu = i;
- for_each_cpu(i, online)
- {
- if ( cpumask_test_cpu(i, &cpu_exclude_map) )
- continue;
- if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) &&
- (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) )
- continue;
- cpumask_or(&cpu_exclude_map, &cpu_exclude_map,
- per_cpu(cpu_sibling_mask, i));
- if ( !cnt || cnt[i] <= cnt[cpu] )
- cpu = i;
- }
-
- xfree(cnt);
-
- return cpu;
-}
-
bool_t domctl_lock_acquire(void)
{
/*
@@ -691,7 +643,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
continue;
cpu = (i == 0) ?
- default_vcpu0_location(online) :
+ cpumask_any(online) :
cpumask_cycle(d->vcpu[i-1]->processor, online);
if ( alloc_vcpu(d, i, cpu) == NULL )

View File

@ -0,0 +1,94 @@
References: bsc#988675 CVE-2016-6258 XSA-182
# Commit e1bff4c2ea3b32464510ac00c320bba28a8dbcca
# Date 2016-07-26 14:06:59 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/pv: Remove unsafe bits from the mod_l?_entry() fastpath
All changes in writeability and cacheability must go through full
re-validation.
Rework the logic as a whitelist, to make it clearer to follow.
This is XSA-182
Reported-by: Jérémie Boutoille <jboutoille@ext.quarkslab.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1852,6 +1852,14 @@ static inline int update_intpte(intpte_t
_t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
(_m), (_v), (_ad))
+/*
+ * PTE flags that a guest may change without re-validating the PTE.
+ * All other bits affect translation, caching, or Xen's safety.
+ */
+#define FASTPATH_FLAG_WHITELIST \
+ (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \
+ _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER)
+
/* Update the L1 entry at pl1e to new value nl1e. */
static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
unsigned long gl1mfn, int preserve_ad,
@@ -1891,9 +1899,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
nl1e = l1e_from_pfn(page_to_mfn(page), l1e_get_flags(nl1e));
}
- /* Fast path for identical mapping, r/w, presence, and cachability. */
- if ( !l1e_has_changed(ol1e, nl1e,
- PAGE_CACHE_ATTRS | _PAGE_RW | _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l1e(nl1e, pt_dom);
rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
@@ -1970,11 +1977,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
return -EINVAL;
}
- /* Fast path for identical mapping and presence. */
- if ( !l2e_has_changed(ol2e, nl2e,
- unlikely(opt_allow_superpage)
- ? _PAGE_PSE | _PAGE_RW | _PAGE_PRESENT
- : _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l2e(nl2e, d);
if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) )
@@ -2039,8 +2043,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
return -EINVAL;
}
- /* Fast path for identical mapping and presence. */
- if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l3e(nl3e, d);
rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad);
@@ -2103,8 +2107,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
return -EINVAL;
}
- /* Fast path for identical mapping and presence. */
- if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l4e(nl4e, d);
rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad);
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -313,6 +313,7 @@ void efi_update_l4_pgtable(unsigned int
#define _PAGE_AVAIL2 _AC(0x800,U)
#define _PAGE_AVAIL _AC(0xE00,U)
#define _PAGE_PSE_PAT _AC(0x1000,U)
+#define _PAGE_AVAIL_HIGH (_AC(0x7ff, U) << 12)
#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0)
/* non-architectural flags */
#define _PAGE_PAGED 0x2000U

View File

@ -0,0 +1,61 @@
References: bsc#988676 CVE-2016-6259 XSA-183
# Commit 9f1441487aa215193a7c00fd9cb80b335542465e
# Date 2016-07-26 14:07:04 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/entry: Avoid SMAP violation in compat_create_bounce_frame()
A 32bit guest kernel might be running on user mappings.
compat_create_bounce_frame() must whitelist its guest accesses to avoid
risking a SMAP violation.
For both variants of create_bounce_frame(), re-blacklist user accesses if
execution exits via an exception table redirection.
This is XSA-183 / CVE-2016-6259
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -318,6 +318,7 @@ ENTRY(compat_int80_direct_trap)
compat_create_bounce_frame:
ASSERT_INTERRUPTS_ENABLED
mov %fs,%edi
+ ASM_STAC
testb $2,UREGS_cs+8(%rsp)
jz 1f
/* Push new frame at registered guest-OS stack base. */
@@ -364,6 +365,7 @@ compat_create_bounce_frame:
movl TRAPBOUNCE_error_code(%rdx),%eax
.Lft8: movl %eax,%fs:(%rsi) # ERROR CODE
1:
+ ASM_CLAC
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
@@ -403,6 +405,7 @@ compat_crash_page_fault_4:
addl $4,%esi
compat_crash_page_fault:
.Lft14: mov %edi,%fs
+ ASM_CLAC
movl %esi,%edi
call show_page_walk
jmp dom_crash_sync_extable
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -420,9 +420,11 @@ domain_crash_page_fault_16:
domain_crash_page_fault_8:
addq $8,%rsi
domain_crash_page_fault:
+ ASM_CLAC
movq %rsi,%rdi
call show_page_walk
ENTRY(dom_crash_sync_extable)
+ ASM_CLAC
# Get out of the guest-save area of the stack.
GET_STACK_END(ax)
leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp

View File

@ -0,0 +1,123 @@
References: bsc#970135
# Commit bb49fd3092a84ce151f5528794c0e612eeb4961a
# Date 2016-08-03 14:39:31 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: adjust local system time initialization
Using the bare return value from read_platform_stime() is not suitable
when local_time_calibration() is going to use its fast path: Divergence
of several dozen microseconds between NOW() return values on different
CPUs results when platform and local time don't stay in close sync.
Latch local and platform time on the CPU initiating AP bringup, such
that the AP can use these values to seed its stime_local_stamp with as
little of an error as possible. The boot CPU, otoh, can simply
calculate the correct initial value (other CPUs could do so too with
even greater accuracy than the approach being introduced, but that can
work only if all CPUs' TSCs start ticking at the same time, which
generally can't be assumed to be the case on multi-socket systems).
This slightly defers init_percpu_time() (moved ahead by commit
dd2658f966 ["x86/time: initialise time earlier during
start_secondary()"]) in order to reduce as much as possible the gap
between populating the stamps and consuming them.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -328,12 +328,12 @@ void start_secondary(void *unused)
percpu_traps_init();
- init_percpu_time();
-
cpu_init();
smp_callin();
+ init_percpu_time();
+
setup_secondary_APIC_clock();
/*
@@ -996,6 +996,8 @@ int __cpu_up(unsigned int cpu)
if ( (ret = do_boot_cpu(apicid, cpu)) != 0 )
return ret;
+ time_latch_stamps();
+
set_cpu_state(CPU_STATE_ONLINE);
while ( !cpu_online(cpu) )
{
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1328,21 +1328,52 @@ static void time_calibration(void *unuse
&r, 1);
}
+static struct {
+ s_time_t local_stime, master_stime;
+} ap_bringup_ref;
+
+void time_latch_stamps(void)
+{
+ unsigned long flags;
+ u64 tsc;
+
+ local_irq_save(flags);
+ ap_bringup_ref.master_stime = read_platform_stime();
+ tsc = rdtsc();
+ local_irq_restore(flags);
+
+ ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
+}
+
void init_percpu_time(void)
{
struct cpu_time *t = &this_cpu(cpu_time);
unsigned long flags;
+ u64 tsc;
s_time_t now;
/* Initial estimate for TSC rate. */
t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
local_irq_save(flags);
- t->local_tsc_stamp = rdtsc();
now = read_platform_stime();
+ tsc = rdtsc();
local_irq_restore(flags);
t->stime_master_stamp = now;
+ /*
+ * To avoid a discontinuity (TSC and platform clock can't be expected
+ * to be in perfect sync), initialization here needs to match up with
+ * local_time_calibration()'s decision whether to use its fast path.
+ */
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ {
+ if ( system_state < SYS_STATE_smp_boot )
+ now = get_s_time_fixed(tsc);
+ else
+ now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime;
+ }
+ t->local_tsc_stamp = tsc;
t->stime_local_stamp = now;
}
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -40,6 +40,7 @@ int time_suspend(void);
int time_resume(void);
void init_percpu_time(void);
+void time_latch_stamps(void);
struct ioreq;
int hwdom_pit_access(struct ioreq *ioreq);

View File

@ -0,0 +1,190 @@
References: bsc#970135
# Commit fa74e70500fd73dd2fc441c7dc00b190fb37cee5
# Date 2016-08-03 14:40:44 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: introduce and use rdtsc_ordered()
Matching Linux commit 03b9730b76 ("x86/asm/tsc: Add rdtsc_ordered() and
use it in trivial call sites") and earlier ones it builds upon, let's
make sure timing loops don't have their rdtsc()-s re-ordered, as that
would harm precision of the result (values were observed to be several
hundred clocks off without this adjustment).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1137,7 +1137,7 @@ static int __init calibrate_APIC_clock(v
/*
* We wrapped around just now. Let's start:
*/
- t1 = rdtsc();
+ t1 = rdtsc_ordered();
tt1 = apic_read(APIC_TMCCT);
/*
@@ -1147,7 +1147,7 @@ static int __init calibrate_APIC_clock(v
wait_8254_wraparound();
tt2 = apic_read(APIC_TMCCT);
- t2 = rdtsc();
+ t2 = rdtsc_ordered();
/*
* The APIC bus clock counter is 32 bits only, it
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -541,6 +541,9 @@ static void init_amd(struct cpuinfo_x86
wrmsr_amd_safe(0xc001100d, l, h & ~1);
}
+ /* MFENCE stops RDTSC speculation */
+ __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
+
switch(c->x86)
{
case 0xf ... 0x17:
--- a/xen/arch/x86/delay.c
+++ b/xen/arch/x86/delay.c
@@ -21,10 +21,10 @@ void __udelay(unsigned long usecs)
unsigned long ticks = usecs * (cpu_khz / 1000);
unsigned long s, e;
- s = rdtsc();
+ s = rdtsc_ordered();
do
{
rep_nop();
- e = rdtsc();
+ e = rdtsc_ordered();
} while ((e-s) < ticks);
}
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -123,7 +123,7 @@ static void synchronize_tsc_master(unsig
for ( i = 1; i <= 5; i++ )
{
- tsc_value = rdtsc();
+ tsc_value = rdtsc_ordered();
wmb();
atomic_inc(&tsc_count);
while ( atomic_read(&tsc_count) != (i<<1) )
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -257,10 +257,10 @@ static u64 init_pit_and_calibrate_tsc(vo
outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
- start = rdtsc();
+ start = rdtsc_ordered();
for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
continue;
- end = rdtsc();
+ end = rdtsc_ordered();
/* Error if the CTC doesn't behave itself. */
if ( count == 0 )
@@ -760,7 +760,7 @@ s_time_t get_s_time_fixed(u64 at_tsc)
if ( at_tsc )
tsc = at_tsc;
else
- tsc = rdtsc();
+ tsc = rdtsc_ordered();
delta = tsc - t->local_tsc_stamp;
now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
@@ -933,7 +933,7 @@ int cpu_frequency_change(u64 freq)
/* TSC-extrapolated time may be bogus after frequency change. */
/*t->stime_local_stamp = get_s_time();*/
t->stime_local_stamp = t->stime_master_stamp;
- curr_tsc = rdtsc();
+ curr_tsc = rdtsc_ordered();
t->local_tsc_stamp = curr_tsc;
set_time_scale(&t->tsc_scale, freq);
local_irq_enable();
@@ -1248,7 +1248,7 @@ static void time_calibration_tsc_rendezv
if ( r->master_stime == 0 )
{
r->master_stime = read_platform_stime();
- r->master_tsc_stamp = rdtsc();
+ r->master_tsc_stamp = rdtsc_ordered();
}
atomic_inc(&r->semaphore);
@@ -1274,7 +1274,7 @@ static void time_calibration_tsc_rendezv
}
}
- c->local_tsc_stamp = rdtsc();
+ c->local_tsc_stamp = rdtsc_ordered();
c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
@@ -1304,7 +1304,7 @@ static void time_calibration_std_rendezv
mb(); /* receive signal /then/ read r->master_stime */
}
- c->local_tsc_stamp = rdtsc();
+ c->local_tsc_stamp = rdtsc_ordered();
c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
@@ -1339,7 +1339,7 @@ void time_latch_stamps(void)
local_irq_save(flags);
ap_bringup_ref.master_stime = read_platform_stime();
- tsc = rdtsc();
+ tsc = rdtsc_ordered();
local_irq_restore(flags);
ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
@@ -1357,7 +1357,7 @@ void init_percpu_time(void)
local_irq_save(flags);
now = read_platform_stime();
- tsc = rdtsc();
+ tsc = rdtsc_ordered();
local_irq_restore(flags);
t->stime_master_stamp = now;
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -16,6 +16,7 @@ XEN_CPUFEATURE(XTOPOLOGY, (FSCAPIN
XEN_CPUFEATURE(CPUID_FAULTING, (FSCAPINTS+0)*32+ 6) /* cpuid faulting */
XEN_CPUFEATURE(CLFLUSH_MONITOR, (FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */
XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */
+XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTSC */
#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -80,6 +80,22 @@ static inline uint64_t rdtsc(void)
return ((uint64_t)high << 32) | low;
}
+static inline uint64_t rdtsc_ordered(void)
+{
+ /*
+ * The RDTSC instruction is not ordered relative to memory access.
+ * The Intel SDM and the AMD APM are both vague on this point, but
+ * empirically an RDTSC instruction can be speculatively executed
+ * before prior loads. An RDTSC immediately after an appropriate
+ * barrier appears to be ordered as a normal load, that is, it
+ * provides the same ordering guarantees as reading from a global
+ * memory location that some other imaginary CPU is updating
+ * continuously with a time stamp.
+ */
+ alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
+ return rdtsc();
+}
+
#define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
#define write_tsc(val) ({ \
/* Reliable TSCs are in lockstep across all CPUs. We should \

View File

@ -0,0 +1,298 @@
References: bsc#970135
# Commit 93340297802b8e743b6ce66b0bc366af1ad51f39
# Date 2016-08-04 10:02:52 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: calibrate TSC against platform timer
... instead of unconditionally against the PIT. This allows for local
and master system times to remain in better sync (which matters even
when, on any modern system, the master time is really used only during
secondary CPU bringup, as the error between the two is in fact
noticable in cross-CPU NOW() invocation monotonicity).
This involves moving the init_platform_timer() invocation into
early_time_init(), splitting out the few things which really need to be
done in init_xen_time(). That in turn allows dropping the open coded
PIT initialization from init_IRQ() (it was needed for APIC clock
calibration, which runs between early_time_init() and init_xen_time()).
In the course of this re-ordering also set the timer channel 2 gate low
after having finished calibration. This should be benign to overall
system operation, but appears to be the more clean state.
Also do away with open coded 8254 register manipulation from 8259 code.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -359,13 +359,6 @@ void __init init_IRQ(void)
apic_intr_init();
- /* Set the clock to HZ Hz */
-#define CLOCK_TICK_RATE 1193182 /* crystal freq (Hz) */
-#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
- outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
- outb(LATCH >> 8, PIT_CH0); /* MSB */
-
setup_irq(2, 0, &cascade);
}
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -59,7 +59,7 @@ struct platform_timesource {
char *name;
u64 frequency;
u64 (*read_counter)(void);
- int (*init)(struct platform_timesource *);
+ s64 (*init)(struct platform_timesource *);
void (*resume)(struct platform_timesource *);
int counter_bits;
};
@@ -224,49 +224,18 @@ static struct irqaction __read_mostly ir
timer_interrupt, "timer", NULL
};
-/* ------ Calibrate the TSC -------
- * Return processor ticks per second / CALIBRATE_FRAC.
- */
-
#define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
-#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+#define CALIBRATE_VALUE(freq) (((freq) + CALIBRATE_FRAC / 2) / CALIBRATE_FRAC)
-static u64 init_pit_and_calibrate_tsc(void)
+static void preinit_pit(void)
{
- u64 start, end;
- unsigned long count;
-
/* Set PIT channel 0 to HZ Hz. */
#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
outb(LATCH >> 8, PIT_CH0); /* MSB */
-
- /* Set the Gate high, disable speaker */
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
- /*
- * Now let's take care of CTC channel 2
- *
- * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
- * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
- * to begin countdown.
- */
- outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
- outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
- outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
-
- start = rdtsc_ordered();
- for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
- continue;
- end = rdtsc_ordered();
-
- /* Error if the CTC doesn't behave itself. */
- if ( count == 0 )
- return 0;
-
- return ((end - start) * (u64)CALIBRATE_FRAC);
+#undef LATCH
}
void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
@@ -327,10 +296,49 @@ static u64 read_pit_count(void)
return count32;
}
-static int __init init_pit(struct platform_timesource *pts)
+static s64 __init init_pit(struct platform_timesource *pts)
{
+ u8 portb = inb(0x61);
+ u64 start, end;
+ unsigned long count;
+
using_pit = 1;
- return 1;
+
+ /* Set the Gate high, disable speaker. */
+ outb((portb & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2: mode 0, (interrupt on
+ * terminal count mode), binary count, load CALIBRATE_LATCH count,
+ * (LSB and MSB) to begin countdown.
+ */
+#define CALIBRATE_LATCH CALIBRATE_VALUE(CLOCK_TICK_RATE)
+ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
+ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
+#undef CALIBRATE_LATCH
+
+ start = rdtsc_ordered();
+ for ( count = 0; !(inb(0x61) & 0x20); ++count )
+ continue;
+ end = rdtsc_ordered();
+
+ /* Set the Gate low, disable speaker. */
+ outb(portb & ~0x03, 0x61);
+
+ /* Error if the CTC doesn't behave itself. */
+ if ( count == 0 )
+ return 0;
+
+ return (end - start) * CALIBRATE_FRAC;
+}
+
+static void resume_pit(struct platform_timesource *pts)
+{
+ /* Set CTC channel 2 to mode 0 again; initial value does not matter. */
+ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(0, PIT_CH2); /* LSB of count */
+ outb(0, PIT_CH2); /* MSB of count */
}
static struct platform_timesource __initdata plt_pit =
@@ -340,7 +348,8 @@ static struct platform_timesource __init
.frequency = CLOCK_TICK_RATE,
.read_counter = read_pit_count,
.counter_bits = 32,
- .init = init_pit
+ .init = init_pit,
+ .resume = resume_pit,
};
/************************************************************
@@ -352,15 +361,26 @@ static u64 read_hpet_count(void)
return hpet_read32(HPET_COUNTER);
}
-static int __init init_hpet(struct platform_timesource *pts)
+static s64 __init init_hpet(struct platform_timesource *pts)
{
- u64 hpet_rate = hpet_setup();
+ u64 hpet_rate = hpet_setup(), start;
+ u32 count, target;
if ( hpet_rate == 0 )
return 0;
pts->frequency = hpet_rate;
- return 1;
+
+ count = hpet_read32(HPET_COUNTER);
+ start = rdtsc_ordered();
+ target = count + CALIBRATE_VALUE(hpet_rate);
+ if ( target < count )
+ while ( hpet_read32(HPET_COUNTER) >= count )
+ continue;
+ while ( hpet_read32(HPET_COUNTER) < target )
+ continue;
+
+ return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
}
static void resume_hpet(struct platform_timesource *pts)
@@ -392,12 +412,24 @@ static u64 read_pmtimer_count(void)
return inl(pmtmr_ioport);
}
-static int __init init_pmtimer(struct platform_timesource *pts)
+static s64 __init init_pmtimer(struct platform_timesource *pts)
{
+ u64 start;
+ u32 count, target, mask = 0xffffff;
+
if ( pmtmr_ioport == 0 )
return 0;
- return 1;
+ count = inl(pmtmr_ioport) & mask;
+ start = rdtsc_ordered();
+ target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY);
+ if ( target < count )
+ while ( (inl(pmtmr_ioport) & mask) >= count )
+ continue;
+ while ( (inl(pmtmr_ioport) & mask) < target )
+ continue;
+
+ return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
}
static struct platform_timesource __initdata plt_pmtimer =
@@ -533,14 +565,15 @@ static void resume_platform_timer(void)
plt_stamp = plt_src.read_counter();
}
-static void __init init_platform_timer(void)
+static u64 __init init_platform_timer(void)
{
static struct platform_timesource * __initdata plt_timers[] = {
&plt_hpet, &plt_pmtimer, &plt_pit
};
struct platform_timesource *pts = NULL;
- int i, rc = -1;
+ unsigned int i;
+ s64 rc = -1;
if ( opt_clocksource[0] != '\0' )
{
@@ -578,15 +611,12 @@ static void __init init_platform_timer(v
plt_overflow_period = scale_delta(
1ull << (pts->counter_bits-1), &plt_scale);
- init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
plt_src = *pts;
- plt_overflow(NULL);
-
- platform_timer_stamp = plt_stamp64;
- stime_platform_stamp = NOW();
printk("Platform timer is %s %s\n",
freq_string(pts->frequency), pts->name);
+
+ return rc;
}
u64 stime2tsc(s_time_t stime)
@@ -1479,7 +1509,11 @@ int __init init_xen_time(void)
/* NB. get_cmos_time() can take over one second to execute. */
do_settime(get_cmos_time(), 0, NOW());
- init_platform_timer();
+ /* Finish platform timer initialization. */
+ init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+ plt_overflow(NULL);
+ platform_timer_stamp = plt_stamp64;
+ stime_platform_stamp = NOW();
init_percpu_time();
@@ -1494,7 +1528,10 @@ int __init init_xen_time(void)
void __init early_time_init(void)
{
struct cpu_time *t = &this_cpu(cpu_time);
- u64 tmp = init_pit_and_calibrate_tsc();
+ u64 tmp;
+
+ preinit_pit();
+ tmp = init_platform_timer();
set_time_scale(&t->tsc_scale, tmp);
t->local_tsc_stamp = boot_tsc_stamp;
@@ -1603,7 +1640,7 @@ int time_suspend(void)
int time_resume(void)
{
- init_pit_and_calibrate_tsc();
+ preinit_pit();
resume_platform_timer();

View File

@ -0,0 +1,200 @@
# Commit 350bc1a9d4ebc03b18a43cdafcb626618caace55
# Date 2016-08-04 10:52:49 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: support newer Intel CPU models
... as per the June 2016 edition of the SDM.
Also remove a couple of dead break statements as well as unused
*MSR_PM_LASTBRANCH* #define-s.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -61,14 +61,14 @@
#define GET_HW_RES_IN_NS(msr, val) \
do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
-#define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val) /* Atom E3000 only */
+#define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val)
#define GET_PC2_RES(val) GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */
#define GET_PC3_RES(val) GET_HW_RES_IN_NS(0x3F8, val)
#define GET_PC6_RES(val) GET_HW_RES_IN_NS(0x3F9, val)
#define GET_PC7_RES(val) GET_HW_RES_IN_NS(0x3FA, val)
-#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */
-#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */
-#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */
+#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val)
+#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val)
+#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val)
#define GET_CC1_RES(val) GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */
#define GET_CC3_RES(val) GET_HW_RES_IN_NS(0x3FC, val)
#define GET_CC6_RES(val) GET_HW_RES_IN_NS(0x3FD, val)
@@ -142,6 +142,8 @@ static void do_get_hw_residencies(void *
{
/* 4th generation Intel Core (Haswell) */
case 0x45:
+ /* Xeon E5/E7 v4 (Broadwell) */
+ case 0x4F:
GET_PC8_RES(hw_res->pc8);
GET_PC9_RES(hw_res->pc9);
GET_PC10_RES(hw_res->pc10);
@@ -158,10 +160,11 @@ static void do_get_hw_residencies(void *
case 0x46:
/* Broadwell */
case 0x3D:
- case 0x4F:
+ case 0x47:
case 0x56:
- /* future */
+ /* Skylake */
case 0x4E:
+ case 0x5E:
GET_PC2_RES(hw_res->pc2);
GET_CC7_RES(hw_res->cc7);
/* fall through */
@@ -198,18 +201,28 @@ static void do_get_hw_residencies(void *
break;
/* Silvermont */
case 0x37:
- GET_MC6_RES(hw_res->mc6);
- /* fall through */
case 0x4A:
case 0x4D:
case 0x5A:
case 0x5D:
/* Airmont */
case 0x4C:
+ GET_MC6_RES(hw_res->mc6);
GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
GET_CC1_RES(hw_res->cc1);
GET_CC6_RES(hw_res->cc6);
break;
+ /* Goldmont */
+ case 0x5C:
+ case 0x5F:
+ GET_PC2_RES(hw_res->pc2);
+ GET_PC3_RES(hw_res->pc3);
+ GET_PC6_RES(hw_res->pc6);
+ GET_PC10_RES(hw_res->pc10);
+ GET_CC1_RES(hw_res->cc1);
+ GET_CC3_RES(hw_res->cc3);
+ GET_CC6_RES(hw_res->cc6);
+ break;
}
}
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2526,6 +2526,14 @@ static const struct lbr_info {
{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
{ MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
{ 0, 0 }
+}, sk_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_SKL_LASTBRANCH_TOS, 1 },
+ { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH },
+ { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH },
+ { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH },
+ { 0, 0 }
}, at_lbr[] = {
{ MSR_IA32_LASTINTFROMIP, 1 },
{ MSR_IA32_LASTINTTOIP, 1 },
@@ -2533,6 +2541,13 @@ static const struct lbr_info {
{ MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
{ MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
{ 0, 0 }
+}, gm_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_GM_LASTBRANCH_TOS, 1 },
+ { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
+ { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
+ { 0, 0 }
};
static const struct lbr_info *last_branch_msr_get(void)
@@ -2547,7 +2562,6 @@ static const struct lbr_info *last_branc
/* Enhanced Core */
case 23:
return c2_lbr;
- break;
/* Nehalem */
case 26: case 30: case 31: case 46:
/* Westmere */
@@ -2559,11 +2573,13 @@ static const struct lbr_info *last_branc
/* Haswell */
case 60: case 63: case 69: case 70:
/* Broadwell */
- case 61: case 79: case 86:
- /* future */
- case 78:
+ case 61: case 71: case 79: case 86:
return nh_lbr;
- break;
+ /* Skylake */
+ case 78: case 94:
+ /* future */
+ case 142: case 158:
+ return sk_lbr;
/* Atom */
case 28: case 38: case 39: case 53: case 54:
/* Silvermont */
@@ -2573,7 +2589,9 @@ static const struct lbr_info *last_branc
/* Airmont */
case 76:
return at_lbr;
- break;
+ /* Goldmont */
+ case 92: case 95:
+ return gm_lbr;
}
break;
@@ -2583,7 +2601,6 @@ static const struct lbr_info *last_branc
/* Pentium4/Xeon with em64t */
case 3: case 4: case 6:
return p4_lbr;
- break;
}
break;
}
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -458,11 +458,6 @@
#define MSR_P4_LASTBRANCH_0_TO_LIP 0x000006c0
#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16
-/* Pentium M (and Core) last-branch recording */
-#define MSR_PM_LASTBRANCH_TOS 0x000001c9
-#define MSR_PM_LASTBRANCH_0 0x00000040
-#define NUM_MSR_PM_LASTBRANCH 8
-
/* Core 2 and Atom last-branch recording */
#define MSR_C2_LASTBRANCH_TOS 0x000001c9
#define MSR_C2_LASTBRANCH_0_FROM_IP 0x00000040
@@ -470,6 +465,19 @@
#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4
#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO 8
+/* Skylake (and newer) last-branch recording */
+#define MSR_SKL_LASTBRANCH_TOS 0x000001c9
+#define MSR_SKL_LASTBRANCH_0_FROM_IP 0x00000680
+#define MSR_SKL_LASTBRANCH_0_TO_IP 0x000006c0
+#define MSR_SKL_LASTBRANCH_0_INFO 0x00000dc0
+#define NUM_MSR_SKL_LASTBRANCH 32
+
+/* Goldmont last-branch recording */
+#define MSR_GM_LASTBRANCH_TOS 0x000001c9
+#define MSR_GM_LASTBRANCH_0_FROM_IP 0x00000680
+#define MSR_GM_LASTBRANCH_0_TO_IP 0x000006c0
+#define NUM_MSR_GM_LASTBRANCH_FROM_TO 32
+
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a

15
vif-route.patch Normal file
View File

@ -0,0 +1,15 @@
References: bsc#985503
Index: xen-4.7.0-testing/tools/hotplug/Linux/vif-route
===================================================================
--- xen-4.7.0-testing.orig/tools/hotplug/Linux/vif-route
+++ xen-4.7.0-testing/tools/hotplug/Linux/vif-route
@@ -35,7 +35,7 @@ case "${command}" in
;;
esac
-if [ "${ip}" ] ; then
+if [ "${ip}" ] && [ "${ipcmd}" ] ; then
# If we've been given a list of IP addresses, then add routes from dom0 to
# the guest using those addresses.
for addr in ${ip} ; do

View File

@ -1,3 +1,31 @@
-------------------------------------------------------------------
Thu Aug 4 09:12:34 MDT 2016 - carnold@suse.com
- bsc#970135 - new virtualization project clock test randomly fails
on Xen
576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch
5769106e-x86-generate-assembler-equates-for-synthesized.patch
57a1e603-x86-time-adjust-local-system-time-initialization.patch
57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch
57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch
- bsc#991934 - xen hypervisor crash in csched_acct
57973099-have-schedulers-revise-initial-placement.patch
579730e6-remove-buggy-initial-placement-algorithm.patch
- bsc#988675 - VUL-0: CVE-2016-6258: xen: x86: Privilege escalation
in PV guests (XSA-182)
57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch
- bsc#988676 - VUL-0: CVE-2016-6259: xen: x86: Missing SMAP
whitelisting in 32-bit exception / event delivery (XSA-183)
57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch
- Upstream patches from Jan
57a30261-x86-support-newer-Intel-CPU-models.patch
-------------------------------------------------------------------
Mon Aug 1 11:46:22 MDT 2016 - carnold@suse.com
- bsc#985503 - vif-route broken
vif-route.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Thu Jul 28 05:23:12 MDT 2016 - carnold@suse.com Thu Jul 28 05:23:12 MDT 2016 - carnold@suse.com

View File

@ -165,7 +165,7 @@ BuildRequires: xorg-x11-util-devel
%endif %endif
%endif %endif
Version: 4.7.0_09 Version: 4.7.0_10
Release: 0 Release: 0
Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
License: GPL-2.0 License: GPL-2.0
@ -205,7 +205,17 @@ Source99: baselibs.conf
# Upstream patches # Upstream patches
Patch1: 57580bbd-kexec-allow-relaxed-placement-via-cmdline.patch Patch1: 57580bbd-kexec-allow-relaxed-placement-via-cmdline.patch
Patch2: 575e9ca0-nested-vmx-Validate-host-VMX-MSRs-before-accessing-them.patch Patch2: 575e9ca0-nested-vmx-Validate-host-VMX-MSRs-before-accessing-them.patch
Patch3: 57640448-xen-sched-use-default-scheduler-upon-an-invalid-sched.patch Patch3: 576001df-x86-time-use-local-stamp-in-TSC-calibration-fast-path.patch
Patch4: 57640448-xen-sched-use-default-scheduler-upon-an-invalid-sched.patch
Patch5: 5769106e-x86-generate-assembler-equates-for-synthesized.patch
Patch6: 57973099-have-schedulers-revise-initial-placement.patch
Patch7: 579730e6-remove-buggy-initial-placement-algorithm.patch
Patch8: 57976073-x86-remove-unsafe-bits-from-mod_lN_entry-fastpath.patch
Patch9: 57976078-x86-avoid-SMAP-violation-in-compat_create_bounce_frame.patch
Patch10: 57a1e603-x86-time-adjust-local-system-time-initialization.patch
Patch11: 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch
Patch12: 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch
Patch13: 57a30261-x86-support-newer-Intel-CPU-models.patch
# Upstream qemu-traditional patches # Upstream qemu-traditional patches
Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch
Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch
@ -274,6 +284,7 @@ Patch403: xl-conf-default-bridge.patch
Patch420: suspend_evtchn_lock.patch Patch420: suspend_evtchn_lock.patch
Patch421: xenpaging.doc.patch Patch421: xenpaging.doc.patch
Patch422: stubdom-have-iovec.patch Patch422: stubdom-have-iovec.patch
Patch423: vif-route.patch
# Other bug fixes or features # Other bug fixes or features
Patch451: xenconsole-no-multiple-connections.patch Patch451: xenconsole-no-multiple-connections.patch
Patch452: hibernate.patch Patch452: hibernate.patch
@ -529,6 +540,16 @@ Authors:
%patch1 -p1 %patch1 -p1
%patch2 -p1 %patch2 -p1
%patch3 -p1 %patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%patch12 -p1
%patch13 -p1
# Upstream qemu patches # Upstream qemu patches
%patch250 -p1 %patch250 -p1
%patch251 -p1 %patch251 -p1
@ -597,6 +618,7 @@ Authors:
%patch420 -p1 %patch420 -p1
%patch421 -p1 %patch421 -p1
%patch422 -p1 %patch422 -p1
%patch423 -p1
# Other bug fixes or features # Other bug fixes or features
%patch451 -p1 %patch451 -p1
%patch452 -p1 %patch452 -p1