From a9e5d7ffaeb186ece589e8bfea96dd39ed6f3faee7d48ae7f7acee5958344b40 Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Tue, 23 Aug 2016 16:38:35 +0000 Subject: [PATCH] - bsc#992224 - [HPS Bug] During boot of Xen Hypervisor, Failed to get contiguous memory for DMA from Xen 57ac6316-don-t-restrict-DMA-heap-to-node-0.patch - bsc#978755 - xen uefi systems fail to boot - bsc#983697 - SLES12 SP2 Xen UEFI mode cannot boot 57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch - Upstream patch from Jan 57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch - spec: to stay compatible with the in-tree qemu-xen binary, use /usr/bin/qemu-system-i386 instead of /usr/bin/qemu-system-x86_64 bsc#986164 OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=447 --- ...time-introduce-and-use-rdtsc_ordered.patch | 51 ++++++++- ...calibrate-TSC-against-platform-timer.patch | 6 +- ...16-don-t-restrict-DMA-heap-to-node-0.patch | 102 ++++++++++++++++++ ...-apply-relocations-to-l-2-3-_bootmap.patch | 48 +++++++++ ...est-to-populate-PoD-pages-for-itself.patch | 52 +++++++++ xen.changes | 19 ++++ xen.spec | 13 ++- 7 files changed, 280 insertions(+), 11 deletions(-) create mode 100644 57ac6316-don-t-restrict-DMA-heap-to-node-0.patch create mode 100644 57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch create mode 100644 57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch diff --git a/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch b/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch index 86e9e81..2dcd232 100644 --- a/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch +++ b/57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch @@ -17,6 +17,18 @@ Tested-by: Dario Faggioli Reviewed-by: Andrew Cooper Tested-by: Joao Martins +# Commit 7fb0a87d97201f9c3639f85615eacd93110dc1c5 +# Date 2016-08-05 18:00:45 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/time: also use rdtsc_ordered() in check_tsc_warp() + +This really was meant to be added in a v2 of what became commit +fa74e70500 ("x86/time: introduce and use rdtsc_ordered()"). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + --- a/xen/arch/x86/apic.c +++ b/xen/arch/x86/apic.c @@ -1137,7 +1137,7 @@ static int __init calibrate_APIC_clock(v @@ -108,7 +120,36 @@ Tested-by: Joao Martins t->local_tsc_stamp = curr_tsc; set_time_scale(&t->tsc_scale, freq); local_irq_enable(); -@@ -1248,7 +1248,7 @@ static void time_calibration_tsc_rendezv +@@ -1124,16 +1124,13 @@ static void local_time_calibration(void) + */ + static void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp) + { +-#define rdtsc_barrier() mb() + static DEFINE_SPINLOCK(sync_lock); + static cycles_t last_tsc; + + cycles_t start, now, prev, end; + int i; + +- rdtsc_barrier(); +- start = get_cycles(); +- rdtsc_barrier(); ++ start = rdtsc_ordered(); + + /* The measurement runs for 20 msecs: */ + end = start + tsc_khz * 20ULL; +@@ -1148,9 +1145,7 @@ static void check_tsc_warp(unsigned long + */ + spin_lock(&sync_lock); + prev = last_tsc; +- rdtsc_barrier(); +- now = get_cycles(); +- rdtsc_barrier(); ++ now = rdtsc_ordered(); + last_tsc = now; + spin_unlock(&sync_lock); + +@@ -1248,7 +1243,7 @@ static void time_calibration_tsc_rendezv if ( r->master_stime == 0 ) { r->master_stime = read_platform_stime(); @@ -117,7 +158,7 @@ Tested-by: Joao Martins } atomic_inc(&r->semaphore); -@@ -1274,7 +1274,7 @@ static void time_calibration_tsc_rendezv +@@ -1274,7 +1269,7 @@ static void time_calibration_tsc_rendezv } } @@ -126,7 +167,7 @@ Tested-by: Joao Martins c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp); c->stime_master_stamp = r->master_stime; -@@ -1304,7 +1304,7 @@ static void time_calibration_std_rendezv +@@ -1304,7 +1299,7 @@ static void time_calibration_std_rendezv mb(); /* receive signal /then/ read r->master_stime */ } @@ -135,7 +176,7 @@ Tested-by: Joao Martins c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp); c->stime_master_stamp = r->master_stime; -@@ -1339,7 +1339,7 @@ void time_latch_stamps(void) +@@ -1339,7 +1334,7 @@ void time_latch_stamps(void) local_irq_save(flags); ap_bringup_ref.master_stime = read_platform_stime(); @@ -144,7 +185,7 @@ Tested-by: Joao Martins local_irq_restore(flags); ap_bringup_ref.local_stime = get_s_time_fixed(tsc); -@@ -1357,7 +1357,7 @@ void init_percpu_time(void) +@@ -1357,7 +1352,7 @@ void init_percpu_time(void) local_irq_save(flags); now = read_platform_stime(); diff --git a/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch b/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch index cfb0c4d..7f366cd 100644 --- a/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch +++ b/57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch @@ -262,7 +262,7 @@ Reviewed-by: Andrew Cooper } u64 stime2tsc(s_time_t stime) -@@ -1479,7 +1509,11 @@ int __init init_xen_time(void) +@@ -1474,7 +1504,11 @@ int __init init_xen_time(void) /* NB. get_cmos_time() can take over one second to execute. */ do_settime(get_cmos_time(), 0, NOW()); @@ -275,7 +275,7 @@ Reviewed-by: Andrew Cooper init_percpu_time(); -@@ -1494,7 +1528,10 @@ int __init init_xen_time(void) +@@ -1489,7 +1523,10 @@ int __init init_xen_time(void) void __init early_time_init(void) { struct cpu_time *t = &this_cpu(cpu_time); @@ -287,7 +287,7 @@ Reviewed-by: Andrew Cooper set_time_scale(&t->tsc_scale, tmp); t->local_tsc_stamp = boot_tsc_stamp; -@@ -1603,7 +1640,7 @@ int time_suspend(void) +@@ -1598,7 +1635,7 @@ int time_suspend(void) int time_resume(void) { diff --git a/57ac6316-don-t-restrict-DMA-heap-to-node-0.patch b/57ac6316-don-t-restrict-DMA-heap-to-node-0.patch new file mode 100644 index 0000000..d4b6881 --- /dev/null +++ b/57ac6316-don-t-restrict-DMA-heap-to-node-0.patch @@ -0,0 +1,102 @@ +References: bsc#992224 + +# Commit d0d6597d3d682f324b6a79e3278e6f5bb6bad153 +# Date 2016-08-11 13:35:50 +0200 +# Author Jan Beulich +# Committer Jan Beulich +page-alloc/x86: don't restrict DMA heap to node 0 + +When node zero has no memory, the DMA bit width will end up getting set +to 9, which is obviously not helpful to hold back a reasonable amount +of low enough memory for Dom0 to use for DMA purposes. Find the lowest +node with memory below 4Gb instead. + +Introduce arch_get_dma_bitsize() to keep this arch-specific logic out +of common code. + +Also adjust the original calculation: I think the subtraction of 1 +should have been part of the flsl() argument rather than getting +applied to its result. And while previously the division by 4 was valid +to be done on the flsl() result, this now also needs to be converted, +as is should only be applied to the spanned pages value. + +Signed-off-by: Jan Beulich +Acked-by: Julien Grall +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/numa.c ++++ b/xen/arch/x86/numa.c +@@ -355,11 +355,25 @@ void __init init_cpu_to_node(void) + } + } + +-EXPORT_SYMBOL(cpu_to_node); +-EXPORT_SYMBOL(node_to_cpumask); +-EXPORT_SYMBOL(memnode_shift); +-EXPORT_SYMBOL(memnodemap); +-EXPORT_SYMBOL(node_data); ++unsigned int __init arch_get_dma_bitsize(void) ++{ ++ unsigned int node; ++ ++ for_each_online_node(node) ++ if ( node_spanned_pages(node) && ++ !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) ) ++ break; ++ if ( node >= MAX_NUMNODES ) ++ panic("No node with memory below 4Gb"); ++ ++ /* ++ * Try to not reserve the whole node's memory for DMA, but dividing ++ * its spanned pages by (arbitrarily chosen) 4. ++ */ ++ return min_t(unsigned int, ++ flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1) ++ + PAGE_SHIFT, 32); ++} + + static void dump_numa(unsigned char key) + { +--- a/xen/common/page_alloc.c ++++ b/xen/common/page_alloc.c +@@ -1368,16 +1368,7 @@ void __init end_boot_allocator(void) + init_heap_pages(virt_to_page(bootmem_region_list), 1); + + if ( !dma_bitsize && (num_online_nodes() > 1) ) +- { +-#ifdef CONFIG_X86 +- dma_bitsize = min_t(unsigned int, +- flsl(NODE_DATA(0)->node_spanned_pages) - 1 +- + PAGE_SHIFT - 2, +- 32); +-#else +- dma_bitsize = 32; +-#endif +- } ++ dma_bitsize = arch_get_dma_bitsize(); + + printk("Domain heap initialised"); + if ( dma_bitsize ) +--- a/xen/include/asm-arm/numa.h ++++ b/xen/include/asm-arm/numa.h +@@ -17,6 +17,11 @@ static inline __attribute__((pure)) node + #define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) + #define __node_distance(a, b) (20) + ++static inline unsigned int arch_get_dma_bitsize(void) ++{ ++ return 32; ++} ++ + #endif /* __ARCH_ARM_NUMA_H */ + /* + * Local variables: +--- a/xen/include/asm-x86/numa.h ++++ b/xen/include/asm-x86/numa.h +@@ -86,5 +86,6 @@ extern int valid_numa_range(u64 start, u + + void srat_parse_regions(u64 addr); + extern u8 __node_distance(nodeid_t a, nodeid_t b); ++unsigned int arch_get_dma_bitsize(void); + + #endif diff --git a/57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch b/57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch new file mode 100644 index 0000000..2b19a6e --- /dev/null +++ b/57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch @@ -0,0 +1,48 @@ +References: bsc#978755 bsc#983697 + +# Commit c5b4805bcd6bc749a8717e7406faa4a0e95468b4 +# Date 2016-08-19 17:03:33 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/EFI: don't apply relocations to l{2,3}_bootmap + +Other than claimed in commit 2ce5963727's ("x86: construct the +{l2,l3}_bootmap at compile time") the initialization of the two page +tables doesn't take care of everything without furher adjustment: The +compile time initialization obviously requires base relocations, and +those get processed after efi_arch_memory_setup(). Hence without +additional care the correctly initialized values may then get wrongly +"adjusted" again. Except the two table from being subject to base +relocation. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/efi/efi-boot.h ++++ b/xen/arch/x86/efi/efi-boot.h +@@ -47,11 +47,23 @@ static void __init efi_arch_relocate_ima + + for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; ) + { +- unsigned int i, n; ++ unsigned int i = 0, n; + + n = (base_relocs->size - sizeof(*base_relocs)) / + sizeof(*base_relocs->entries); +- for ( i = 0; i < n; ++i ) ++ ++ /* ++ * Relevant l{2,3}_bootmap entries get initialized explicitly in ++ * efi_arch_memory_setup(), so we must not apply relocations there. ++ * l2_identmap's first slot, otoh, should be handled normally, as ++ * efi_arch_memory_setup() won't touch it (xen_phys_start should ++ * never be zero). ++ */ ++ if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap || ++ xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap ) ++ i = n; ++ ++ for ( ; i < n; ++i ) + { + unsigned long addr = xen_phys_start + base_relocs->rva + + (base_relocs->entries[i] & 0xfff); diff --git a/57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch b/57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch new file mode 100644 index 0000000..5c1d27c --- /dev/null +++ b/57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch @@ -0,0 +1,52 @@ +# Commit 2a99aa99fc84a45f505f84802af56b006d14c52e +# Date 2016-08-19 18:40:11 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +xen/physmap: Do not permit a guest to populate PoD pages for itself + +PoD is supposed to be entirely transparent to guest, but this interface has +been left exposed for a long time. + +The use of PoD requires careful co-ordination by the toolstack with the +XENMEM_{get,set}_pod_target hypercalls, and xenstore ballooning target. The +best a guest can do without toolstack cooperation crash. + +Furthermore, there are combinations of features (e.g. c/s c63868ff "libxl: +disallow PCI device assignment for HVM guest when PoD is enabled") which a +toolstack might wish to explicitly prohibit (in this case, because the two +simply don't function in combination). In such cases, the guest mustn't be +able to subvert the configuration chosen by the toolstack. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +--- a/xen/common/memory.c ++++ b/xen/common/memory.c +@@ -140,14 +140,14 @@ static void populate_physmap(struct memo + struct page_info *page; + unsigned int i, j; + xen_pfn_t gpfn, mfn; +- struct domain *d = a->domain; ++ struct domain *d = a->domain, *curr_d = current->domain; + + if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, + a->nr_extents-1) ) + return; + + if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER : +- max_order(current->domain)) ) ++ max_order(curr_d)) ) + return; + + for ( i = a->nr_done; i < a->nr_extents; i++ ) +@@ -163,6 +163,10 @@ static void populate_physmap(struct memo + + if ( a->memflags & MEMF_populate_on_demand ) + { ++ /* Disallow populating PoD pages on oneself. */ ++ if ( d == curr_d ) ++ goto out; ++ + if ( guest_physmap_mark_populate_on_demand(d, gpfn, + a->extent_order) < 0 ) + goto out; diff --git a/xen.changes b/xen.changes index 8537b8d..b414431 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,22 @@ +------------------------------------------------------------------- +Tue Aug 23 08:07:46 MDT 2016 - carnold@suse.com + +- bsc#992224 - [HPS Bug] During boot of Xen Hypervisor, Failed to + get contiguous memory for DMA from Xen + 57ac6316-don-t-restrict-DMA-heap-to-node-0.patch +- bsc#978755 - xen uefi systems fail to boot +- bsc#983697 - SLES12 SP2 Xen UEFI mode cannot boot + 57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch +- Upstream patch from Jan + 57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch + +------------------------------------------------------------------- +Mon Aug 8 18:27:23 UTC 2016 - jfehlig@suse.com + +- spec: to stay compatible with the in-tree qemu-xen binary, use + /usr/bin/qemu-system-i386 instead of /usr/bin/qemu-system-x86_64 + bsc#986164 + ------------------------------------------------------------------- Thu Aug 4 09:12:34 MDT 2016 - carnold@suse.com diff --git a/xen.spec b/xen.spec index dc9ae0d..3066191 100644 --- a/xen.spec +++ b/xen.spec @@ -216,6 +216,9 @@ Patch10: 57a1e603-x86-time-adjust-local-system-time-initialization.patch Patch11: 57a1e64c-x86-time-introduce-and-use-rdtsc_ordered.patch Patch12: 57a2f6ac-x86-time-calibrate-TSC-against-platform-timer.patch Patch13: 57a30261-x86-support-newer-Intel-CPU-models.patch +Patch14: 57ac6316-don-t-restrict-DMA-heap-to-node-0.patch +Patch15: 57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch +Patch16: 57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch # Upstream qemu-traditional patches Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -550,6 +553,9 @@ Authors: %patch11 -p1 %patch12 -p1 %patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1 @@ -762,7 +768,7 @@ configure_flags="${configure_flags} --disable-qemu-traditional" %endif --with-system-ovmf=%{_datadir}/qemu/ovmf-x86_64-ms.bin \ --with-system-seabios=%{_datadir}/qemu/bios-256k.bin \ - --with-system-qemu=%{_bindir}/qemu-system-%{_arch} \ + --with-system-qemu=%{_bindir}/qemu-system-i386 \ ${configure_flags} make -C tools/include/xen-foreign %{?_smp_mflags} make %{?_smp_mflags} @@ -909,12 +915,13 @@ done # and advertised as the in libvirt capabilities. Tool such as # virt-install include in domXML they produce, so we need to # preserve the path. For x86_64, create a simple wrapper that invokes -# /usr/bin/qemu-system-x86_64 +# /usr/bin/qemu-system-i386 +# Using qemu-system-x86_64 will result in an incompatible VM %ifarch x86_64 cat > $RPM_BUILD_ROOT/usr/lib/xen/bin/qemu-system-i386 << 'EOF' #!/bin/sh -exec %{_bindir}/qemu-system-x86_64 "$@" +exec %{_bindir}/qemu-system-i386 "$@" EOF chmod 0755 $RPM_BUILD_ROOT/usr/lib/xen/bin/qemu-system-i386 %endif