- bsc#992224 - [HPS Bug] During boot of Xen Hypervisor, Failed to

get contiguous memory for DMA from Xen
  57ac6316-don-t-restrict-DMA-heap-to-node-0.patch
- bsc#978755 - xen uefi systems fail to boot
- bsc#983697 - SLES12 SP2 Xen UEFI mode cannot boot
  57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch
- Upstream patch from Jan
  57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch

- spec: to stay compatible with the in-tree qemu-xen binary, use
  /usr/bin/qemu-system-i386 instead of /usr/bin/qemu-system-x86_64
  bsc#986164

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=447
This commit is contained in:
Charles Arnold 2016-08-23 16:38:35 +00:00 committed by Git OBS Bridge
parent a89d75605e
commit a9e5d7ffae
7 changed files with 280 additions and 11 deletions

View File

@ -17,6 +17,18 @@ Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
# Commit 7fb0a87d97201f9c3639f85615eacd93110dc1c5
# Date 2016-08-05 18:00:45 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: also use rdtsc_ordered() in check_tsc_warp()
This really was meant to be added in a v2 of what became commit
fa74e70500 ("x86/time: introduce and use rdtsc_ordered()").
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1137,7 +1137,7 @@ static int __init calibrate_APIC_clock(v
@ -108,7 +120,36 @@ Tested-by: Joao Martins <joao.m.martins@oracle.com>
t->local_tsc_stamp = curr_tsc;
set_time_scale(&t->tsc_scale, freq);
local_irq_enable();
@@ -1248,7 +1248,7 @@ static void time_calibration_tsc_rendezv
@@ -1124,16 +1124,13 @@ static void local_time_calibration(void)
*/
static void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
{
-#define rdtsc_barrier() mb()
static DEFINE_SPINLOCK(sync_lock);
static cycles_t last_tsc;
cycles_t start, now, prev, end;
int i;
- rdtsc_barrier();
- start = get_cycles();
- rdtsc_barrier();
+ start = rdtsc_ordered();
/* The measurement runs for 20 msecs: */
end = start + tsc_khz * 20ULL;
@@ -1148,9 +1145,7 @@ static void check_tsc_warp(unsigned long
*/
spin_lock(&sync_lock);
prev = last_tsc;
- rdtsc_barrier();
- now = get_cycles();
- rdtsc_barrier();
+ now = rdtsc_ordered();
last_tsc = now;
spin_unlock(&sync_lock);
@@ -1248,7 +1243,7 @@ static void time_calibration_tsc_rendezv
if ( r->master_stime == 0 )
{
r->master_stime = read_platform_stime();
@ -117,7 +158,7 @@ Tested-by: Joao Martins <joao.m.martins@oracle.com>
}
atomic_inc(&r->semaphore);
@@ -1274,7 +1274,7 @@ static void time_calibration_tsc_rendezv
@@ -1274,7 +1269,7 @@ static void time_calibration_tsc_rendezv
}
}
@ -126,7 +167,7 @@ Tested-by: Joao Martins <joao.m.martins@oracle.com>
c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
@@ -1304,7 +1304,7 @@ static void time_calibration_std_rendezv
@@ -1304,7 +1299,7 @@ static void time_calibration_std_rendezv
mb(); /* receive signal /then/ read r->master_stime */
}
@ -135,7 +176,7 @@ Tested-by: Joao Martins <joao.m.martins@oracle.com>
c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
@@ -1339,7 +1339,7 @@ void time_latch_stamps(void)
@@ -1339,7 +1334,7 @@ void time_latch_stamps(void)
local_irq_save(flags);
ap_bringup_ref.master_stime = read_platform_stime();
@ -144,7 +185,7 @@ Tested-by: Joao Martins <joao.m.martins@oracle.com>
local_irq_restore(flags);
ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
@@ -1357,7 +1357,7 @@ void init_percpu_time(void)
@@ -1357,7 +1352,7 @@ void init_percpu_time(void)
local_irq_save(flags);
now = read_platform_stime();

View File

@ -262,7 +262,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
}
u64 stime2tsc(s_time_t stime)
@@ -1479,7 +1509,11 @@ int __init init_xen_time(void)
@@ -1474,7 +1504,11 @@ int __init init_xen_time(void)
/* NB. get_cmos_time() can take over one second to execute. */
do_settime(get_cmos_time(), 0, NOW());
@ -275,7 +275,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
init_percpu_time();
@@ -1494,7 +1528,10 @@ int __init init_xen_time(void)
@@ -1489,7 +1523,10 @@ int __init init_xen_time(void)
void __init early_time_init(void)
{
struct cpu_time *t = &this_cpu(cpu_time);
@ -287,7 +287,7 @@ Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
set_time_scale(&t->tsc_scale, tmp);
t->local_tsc_stamp = boot_tsc_stamp;
@@ -1603,7 +1640,7 @@ int time_suspend(void)
@@ -1598,7 +1635,7 @@ int time_suspend(void)
int time_resume(void)
{

View File

@ -0,0 +1,102 @@
References: bsc#992224
# Commit d0d6597d3d682f324b6a79e3278e6f5bb6bad153
# Date 2016-08-11 13:35:50 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
page-alloc/x86: don't restrict DMA heap to node 0
When node zero has no memory, the DMA bit width will end up getting set
to 9, which is obviously not helpful to hold back a reasonable amount
of low enough memory for Dom0 to use for DMA purposes. Find the lowest
node with memory below 4Gb instead.
Introduce arch_get_dma_bitsize() to keep this arch-specific logic out
of common code.
Also adjust the original calculation: I think the subtraction of 1
should have been part of the flsl() argument rather than getting
applied to its result. And while previously the division by 4 was valid
to be done on the flsl() result, this now also needs to be converted,
as is should only be applied to the spanned pages value.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -355,11 +355,25 @@ void __init init_cpu_to_node(void)
}
}
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode_shift);
-EXPORT_SYMBOL(memnodemap);
-EXPORT_SYMBOL(node_data);
+unsigned int __init arch_get_dma_bitsize(void)
+{
+ unsigned int node;
+
+ for_each_online_node(node)
+ if ( node_spanned_pages(node) &&
+ !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
+ break;
+ if ( node >= MAX_NUMNODES )
+ panic("No node with memory below 4Gb");
+
+ /*
+ * Try to not reserve the whole node's memory for DMA, but dividing
+ * its spanned pages by (arbitrarily chosen) 4.
+ */
+ return min_t(unsigned int,
+ flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
+ + PAGE_SHIFT, 32);
+}
static void dump_numa(unsigned char key)
{
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1368,16 +1368,7 @@ void __init end_boot_allocator(void)
init_heap_pages(virt_to_page(bootmem_region_list), 1);
if ( !dma_bitsize && (num_online_nodes() > 1) )
- {
-#ifdef CONFIG_X86
- dma_bitsize = min_t(unsigned int,
- flsl(NODE_DATA(0)->node_spanned_pages) - 1
- + PAGE_SHIFT - 2,
- 32);
-#else
- dma_bitsize = 32;
-#endif
- }
+ dma_bitsize = arch_get_dma_bitsize();
printk("Domain heap initialised");
if ( dma_bitsize )
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -17,6 +17,11 @@ static inline __attribute__((pure)) node
#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
#define __node_distance(a, b) (20)
+static inline unsigned int arch_get_dma_bitsize(void)
+{
+ return 32;
+}
+
#endif /* __ARCH_ARM_NUMA_H */
/*
* Local variables:
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -86,5 +86,6 @@ extern int valid_numa_range(u64 start, u
void srat_parse_regions(u64 addr);
extern u8 __node_distance(nodeid_t a, nodeid_t b);
+unsigned int arch_get_dma_bitsize(void);
#endif

View File

@ -0,0 +1,48 @@
References: bsc#978755 bsc#983697
# Commit c5b4805bcd6bc749a8717e7406faa4a0e95468b4
# Date 2016-08-19 17:03:33 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/EFI: don't apply relocations to l{2,3}_bootmap
Other than claimed in commit 2ce5963727's ("x86: construct the
{l2,l3}_bootmap at compile time") the initialization of the two page
tables doesn't take care of everything without furher adjustment: The
compile time initialization obviously requires base relocations, and
those get processed after efi_arch_memory_setup(). Hence without
additional care the correctly initialized values may then get wrongly
"adjusted" again. Except the two table from being subject to base
relocation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper@citrix.com>
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -47,11 +47,23 @@ static void __init efi_arch_relocate_ima
for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
{
- unsigned int i, n;
+ unsigned int i = 0, n;
n = (base_relocs->size - sizeof(*base_relocs)) /
sizeof(*base_relocs->entries);
- for ( i = 0; i < n; ++i )
+
+ /*
+ * Relevant l{2,3}_bootmap entries get initialized explicitly in
+ * efi_arch_memory_setup(), so we must not apply relocations there.
+ * l2_identmap's first slot, otoh, should be handled normally, as
+ * efi_arch_memory_setup() won't touch it (xen_phys_start should
+ * never be zero).
+ */
+ if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap ||
+ xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap )
+ i = n;
+
+ for ( ; i < n; ++i )
{
unsigned long addr = xen_phys_start + base_relocs->rva +
(base_relocs->entries[i] & 0xfff);

View File

@ -0,0 +1,52 @@
# Commit 2a99aa99fc84a45f505f84802af56b006d14c52e
# Date 2016-08-19 18:40:11 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
xen/physmap: Do not permit a guest to populate PoD pages for itself
PoD is supposed to be entirely transparent to guest, but this interface has
been left exposed for a long time.
The use of PoD requires careful co-ordination by the toolstack with the
XENMEM_{get,set}_pod_target hypercalls, and xenstore ballooning target. The
best a guest can do without toolstack cooperation crash.
Furthermore, there are combinations of features (e.g. c/s c63868ff "libxl:
disallow PCI device assignment for HVM guest when PoD is enabled") which a
toolstack might wish to explicitly prohibit (in this case, because the two
simply don't function in combination). In such cases, the guest mustn't be
able to subvert the configuration chosen by the toolstack.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -140,14 +140,14 @@ static void populate_physmap(struct memo
struct page_info *page;
unsigned int i, j;
xen_pfn_t gpfn, mfn;
- struct domain *d = a->domain;
+ struct domain *d = a->domain, *curr_d = current->domain;
if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
a->nr_extents-1) )
return;
if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER :
- max_order(current->domain)) )
+ max_order(curr_d)) )
return;
for ( i = a->nr_done; i < a->nr_extents; i++ )
@@ -163,6 +163,10 @@ static void populate_physmap(struct memo
if ( a->memflags & MEMF_populate_on_demand )
{
+ /* Disallow populating PoD pages on oneself. */
+ if ( d == curr_d )
+ goto out;
+
if ( guest_physmap_mark_populate_on_demand(d, gpfn,
a->extent_order) < 0 )
goto out;

View File

@ -1,3 +1,22 @@
-------------------------------------------------------------------
Tue Aug 23 08:07:46 MDT 2016 - carnold@suse.com
- bsc#992224 - [HPS Bug] During boot of Xen Hypervisor, Failed to
get contiguous memory for DMA from Xen
57ac6316-don-t-restrict-DMA-heap-to-node-0.patch
- bsc#978755 - xen uefi systems fail to boot
- bsc#983697 - SLES12 SP2 Xen UEFI mode cannot boot
57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch
- Upstream patch from Jan
57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch
-------------------------------------------------------------------
Mon Aug 8 18:27:23 UTC 2016 - jfehlig@suse.com
- spec: to stay compatible with the in-tree qemu-xen binary, use
/usr/bin/qemu-system-i386 instead of /usr/bin/qemu-system-x86_64
bsc#986164
-------------------------------------------------------------------
Thu Aug 4 09:12:34 MDT 2016 - carnold@suse.com

View File

@ -216,6 +216,9 @@ Patch10: 57a1e603-x86-time-adjust-local-system-time-initialization.patch
Patch11: 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch
Patch12: 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch
Patch13: 57a30261-x86-support-newer-Intel-CPU-models.patch
Patch14: 57ac6316-don-t-restrict-DMA-heap-to-node-0.patch
Patch15: 57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch
Patch16: 57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch
# Upstream qemu-traditional patches
Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch
Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch
@ -550,6 +553,9 @@ Authors:
%patch11 -p1
%patch12 -p1
%patch13 -p1
%patch14 -p1
%patch15 -p1
%patch16 -p1
# Upstream qemu patches
%patch250 -p1
%patch251 -p1
@ -762,7 +768,7 @@ configure_flags="${configure_flags} --disable-qemu-traditional"
%endif
--with-system-ovmf=%{_datadir}/qemu/ovmf-x86_64-ms.bin \
--with-system-seabios=%{_datadir}/qemu/bios-256k.bin \
--with-system-qemu=%{_bindir}/qemu-system-%{_arch} \
--with-system-qemu=%{_bindir}/qemu-system-i386 \
${configure_flags}
make -C tools/include/xen-foreign %{?_smp_mflags}
make %{?_smp_mflags}
@ -909,12 +915,13 @@ done
# and advertised as the <emulator> in libvirt capabilities. Tool such as
# virt-install include <emulator> in domXML they produce, so we need to
# preserve the path. For x86_64, create a simple wrapper that invokes
# /usr/bin/qemu-system-x86_64
# /usr/bin/qemu-system-i386
# Using qemu-system-x86_64 will result in an incompatible VM
%ifarch x86_64
cat > $RPM_BUILD_ROOT/usr/lib/xen/bin/qemu-system-i386 << 'EOF'
#!/bin/sh
exec %{_bindir}/qemu-system-x86_64 "$@"
exec %{_bindir}/qemu-system-i386 "$@"
EOF
chmod 0755 $RPM_BUILD_ROOT/usr/lib/xen/bin/qemu-system-i386
%endif