- add xorg-x11-util-devel to BuildRequires to get lndir(1)
- remove xen.migrate.tools_notify_restore_to_hangup_during_migration_--abort_if_busy.patch It changed migration protocol and upstream wants a different solution - bnc#802221 - fix xenpaging readd xenpaging.qemu.flush-cache.patch - Upstream patches from Jan 26891-x86-S3-Fix-cpu-pool-scheduling-after-suspend-resume.patch 26930-x86-EFI-fix-runtime-call-status-for-compat-mode-Dom0.patch - Additional fix for bnc#816159 CVE-2013-1918-xsa45-followup.patch - bnc#817068 - Xen guest with >1 sr-iov vf won't start xen-managed-pci-device.patch - Update to Xen 4.2.2 c/s 26064 The following recent security patches are included in the tarball CVE-2013-0151-xsa34.patch (bnc#797285) CVE-2012-6075-xsa41.patch (bnc#797523) CVE-2013-1917-xsa44.patch (bnc#813673) CVE-2013-1919-xsa46.patch (bnc#813675) - Upstream patch from Jan 26902-x86-EFI-pass-boot-services-variable-info-to-runtime-code.patch - bnc#816159 - VUL-0: xen: CVE-2013-1918: XSA-45: Several long latency operations are not preemptible CVE-2013-1918-xsa45-1-vcpu-destroy-pagetables-preemptible.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=237
This commit is contained in:
parent
9c5584a232
commit
b9d38dfc8d
@ -12,10 +12,10 @@ the fixmaps together with other boot time page table construction.
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
Index: xen-4.2.0-testing/xen/arch/x86/boot/head.S
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/boot/head.S
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/arch/x86/boot/head.S
|
||||
+++ xen-4.2.0-testing/xen/arch/x86/boot/head.S
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/boot/head.S
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/boot/head.S
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <public/xen.h>
|
||||
#include <asm/asm_defns.h>
|
||||
@ -57,10 +57,10 @@ Index: xen-4.2.0-testing/xen/arch/x86/boot/head.S
|
||||
#endif
|
||||
|
||||
/* Initialize 4kB mappings of first 2MB or 4MB of memory. */
|
||||
Index: xen-4.2.0-testing/xen/arch/x86/efi/boot.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/efi/boot.c
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/arch/x86/efi/boot.c
|
||||
+++ xen-4.2.0-testing/xen/arch/x86/efi/boot.c
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/efi/boot.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/efi/boot.c
|
||||
@@ -17,6 +17,9 @@
|
||||
#include <xen/vga.h>
|
||||
#include <asm/e820.h>
|
||||
@ -92,11 +92,11 @@ Index: xen-4.2.0-testing/xen/arch/x86/efi/boot.c
|
||||
/* Initialise L3 boot-map page directory entries. */
|
||||
l3_bootmap[l3_table_offset(xen_phys_start)] =
|
||||
l3e_from_paddr((UINTN)l2_bootmap, __PAGE_HYPERVISOR);
|
||||
Index: xen-4.2.0-testing/xen/arch/x86/mm.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.0-testing/xen/arch/x86/mm.c
|
||||
@@ -130,6 +130,10 @@
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/mm.c
|
||||
@@ -131,6 +131,10 @@
|
||||
l1_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
|
||||
l1_identmap[L1_PAGETABLE_ENTRIES];
|
||||
|
||||
@ -107,10 +107,10 @@ Index: xen-4.2.0-testing/xen/arch/x86/mm.c
|
||||
#define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
|
||||
|
||||
/*
|
||||
Index: xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/x86_64/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/arch/x86/x86_64/mm.c
|
||||
+++ xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/x86_64/mm.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/x86_64/mm.c
|
||||
@@ -65,6 +65,10 @@ l3_pgentry_t __attribute__ ((__section__
|
||||
l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
|
||||
l2_xenmap[L2_PAGETABLE_ENTRIES];
|
||||
@ -122,10 +122,10 @@ Index: xen-4.2.0-testing/xen/arch/x86/x86_64/mm.c
|
||||
/* Enough page directories to map into the bottom 1GB. */
|
||||
l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
|
||||
l3_bootmap[L3_PAGETABLE_ENTRIES];
|
||||
Index: xen-4.2.0-testing/xen/include/asm-x86/config.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/config.h
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/include/asm-x86/config.h
|
||||
+++ xen-4.2.0-testing/xen/include/asm-x86/config.h
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/config.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/config.h
|
||||
@@ -317,7 +317,7 @@ extern unsigned char boot_edid_info[128]
|
||||
#define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */
|
||||
#define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)
|
||||
@ -135,10 +135,10 @@ Index: xen-4.2.0-testing/xen/include/asm-x86/config.h
|
||||
#define IOREMAP_VIRT_START (IOREMAP_VIRT_END - (IOREMAP_MBYTES<<20))
|
||||
#define DIRECTMAP_VIRT_END IOREMAP_VIRT_START
|
||||
#define DIRECTMAP_VIRT_START (DIRECTMAP_VIRT_END - (DIRECTMAP_MBYTES<<20))
|
||||
Index: xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/fixmap.h
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/include/asm-x86/fixmap.h
|
||||
+++ xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/fixmap.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/fixmap.h
|
||||
@@ -13,12 +13,17 @@
|
||||
#define _ASM_FIXMAP_H
|
||||
|
||||
@ -158,7 +158,7 @@ Index: xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
|
||||
#include <asm/amd-iommu.h>
|
||||
#include <asm/msi.h>
|
||||
#include <acpi/apei.h>
|
||||
@@ -66,7 +71,6 @@ enum fixed_addresses {
|
||||
@@ -68,7 +73,6 @@ enum fixed_addresses {
|
||||
__end_of_fixed_addresses
|
||||
};
|
||||
|
||||
@ -166,17 +166,17 @@ Index: xen-4.2.0-testing/xen/include/asm-x86/fixmap.h
|
||||
#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
|
||||
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
||||
|
||||
@@ -90,4 +94,6 @@ static inline unsigned long virt_to_fix(
|
||||
@@ -92,4 +96,6 @@ static inline unsigned long virt_to_fix(
|
||||
return __virt_to_fix(vaddr);
|
||||
}
|
||||
|
||||
+#endif /* __ASSEMBLY__ */
|
||||
+
|
||||
#endif
|
||||
Index: xen-4.2.0-testing/xen/include/asm-x86/page.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/page.h
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/include/asm-x86/page.h
|
||||
+++ xen-4.2.0-testing/xen/include/asm-x86/page.h
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/page.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/page.h
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef __X86_PAGE_H__
|
||||
#define __X86_PAGE_H__
|
||||
@ -203,10 +203,10 @@ Index: xen-4.2.0-testing/xen/include/asm-x86/page.h
|
||||
void paging_init(void);
|
||||
void setup_idle_pagetable(void);
|
||||
#endif /* !defined(__ASSEMBLY__) */
|
||||
Index: xen-4.2.0-testing/xen/include/xen/const.h
|
||||
Index: xen-4.2.2-testing/xen/include/xen/const.h
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ xen-4.2.0-testing/xen/include/xen/const.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/const.h
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* const.h: Macros for dealing with constants. */
|
||||
+
|
||||
|
@ -14,8 +14,10 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/docs/misc/xen-command-line.markdown
|
||||
+++ b/docs/misc/xen-command-line.markdown
|
||||
Index: xen-4.2.2-testing/docs/misc/xen-command-line.markdown
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/docs/misc/xen-command-line.markdown
|
||||
+++ xen-4.2.2-testing/docs/misc/xen-command-line.markdown
|
||||
@@ -244,7 +244,7 @@ A typical setup for most situations migh
|
||||
Specify the size of the console ring buffer.
|
||||
|
||||
@ -47,8 +49,10 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
### debug\_stack\_lines
|
||||
> `= <integer>`
|
||||
|
||||
--- a/xen/arch/x86/Rules.mk
|
||||
+++ b/xen/arch/x86/Rules.mk
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/Rules.mk
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/Rules.mk
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/Rules.mk
|
||||
@@ -7,6 +7,7 @@ HAS_CPUFREQ := y
|
||||
HAS_PCI := y
|
||||
HAS_PASSTHROUGH := y
|
||||
@ -57,8 +61,10 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
HAS_KEXEC := y
|
||||
HAS_GDBSX := y
|
||||
xenoprof := y
|
||||
--- a/xen/arch/x86/physdev.c
|
||||
+++ b/xen/arch/x86/physdev.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/physdev.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/physdev.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/physdev.c
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <xen/event.h>
|
||||
#include <xen/guest_access.h>
|
||||
@ -67,7 +73,7 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
#include <asm/current.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/msi.h>
|
||||
@@ -722,6 +723,19 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
|
||||
@@ -734,6 +735,19 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
|
||||
|
||||
break;
|
||||
}
|
||||
@ -87,8 +93,10 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
default:
|
||||
ret = -ENOSYS;
|
||||
break;
|
||||
--- a/xen/arch/x86/setup.c
|
||||
+++ b/xen/arch/x86/setup.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/setup.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/setup.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/setup.c
|
||||
@@ -606,6 +606,7 @@ void __init __start_xen(unsigned long mb
|
||||
ns16550.io_base = 0x2f8;
|
||||
ns16550.irq = 3;
|
||||
@ -97,16 +105,20 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
console_init_preirq();
|
||||
|
||||
printk("Bootloader: %s\n", loader);
|
||||
--- a/xen/drivers/char/Makefile
|
||||
+++ b/xen/drivers/char/Makefile
|
||||
Index: xen-4.2.2-testing/xen/drivers/char/Makefile
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/char/Makefile
|
||||
+++ xen-4.2.2-testing/xen/drivers/char/Makefile
|
||||
@@ -1,4 +1,5 @@
|
||||
obj-y += console.o
|
||||
obj-$(HAS_NS16550) += ns16550.o
|
||||
obj-$(HAS_PL011) += pl011.o
|
||||
+obj-$(HAS_EHCI) += ehci-dbgp.o
|
||||
obj-y += serial.o
|
||||
Index: xen-4.2.2-testing/xen/drivers/char/ehci-dbgp.c
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ b/xen/drivers/char/ehci-dbgp.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/char/ehci-dbgp.c
|
||||
@@ -0,0 +1,1577 @@
|
||||
+/*
|
||||
+ * Standalone EHCI USB debug driver
|
||||
@ -1685,8 +1697,10 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
+
|
||||
+ return -ENOSYS;
|
||||
+}
|
||||
--- a/xen/drivers/char/serial.c
|
||||
+++ b/xen/drivers/char/serial.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/char/serial.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/char/serial.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/char/serial.c
|
||||
@@ -265,6 +265,14 @@ int __init serial_parse_handle(char *con
|
||||
{
|
||||
int handle;
|
||||
@ -1702,8 +1716,10 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
if ( strncmp(conf, "com", 3) )
|
||||
goto fail;
|
||||
|
||||
--- a/xen/include/asm-x86/fixmap.h
|
||||
+++ b/xen/include/asm-x86/fixmap.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/fixmap.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/fixmap.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/fixmap.h
|
||||
@@ -36,7 +36,15 @@
|
||||
* from the end of virtual memory backwards.
|
||||
*/
|
||||
@ -1721,9 +1737,11 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
#ifdef __i386__
|
||||
FIX_PAE_HIGHMEM_0,
|
||||
FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
|
||||
--- a/xen/include/public/physdev.h
|
||||
+++ b/xen/include/public/physdev.h
|
||||
@@ -312,6 +312,24 @@ struct physdev_pci_device {
|
||||
Index: xen-4.2.2-testing/xen/include/public/physdev.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/public/physdev.h
|
||||
+++ xen-4.2.2-testing/xen/include/public/physdev.h
|
||||
@@ -318,6 +318,24 @@ struct physdev_pci_device {
|
||||
typedef struct physdev_pci_device physdev_pci_device_t;
|
||||
DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
|
||||
|
||||
@ -1748,8 +1766,10 @@ Acked-by: Keir Fraser <keir@xen.org>
|
||||
/*
|
||||
* Notify that some PIRQ-bound event channels have been unmasked.
|
||||
* ** This command is obsolete since interface version 0x00030202 and is **
|
||||
--- a/xen/include/xen/serial.h
|
||||
+++ b/xen/include/xen/serial.h
|
||||
Index: xen-4.2.2-testing/xen/include/xen/serial.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/serial.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/serial.h
|
||||
@@ -69,9 +69,10 @@ struct uart_driver {
|
||||
};
|
||||
|
||||
|
@ -1,146 +0,0 @@
|
||||
No functional change.
|
||||
|
||||
The purpose is to make it easier to backport patches from Xen 4.3's
|
||||
libxl, as Xen 4.3's libxl has had this done:
|
||||
|
||||
libxl: Enable -Wshadow.
|
||||
|
||||
It was convenient to invent $(CFLAGS_LIBXL) to do this.
|
||||
|
||||
Various renamings to avoid shadowing standard functions:
|
||||
- index(3)
|
||||
- listen(2)
|
||||
- link(2)
|
||||
- abort(3)
|
||||
- abs(3)
|
||||
|
||||
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
In this patch we do not change the others, and we do not enable
|
||||
-Wshadow. We're just trying to bring 4.2's libxl textually closer to
|
||||
4.3's.
|
||||
|
||||
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
|
||||
---
|
||||
tools/libxl/libxl_event.c | 34 +++++++++++++++++-----------------
|
||||
1 files changed, 17 insertions(+), 17 deletions(-)
|
||||
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_event.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_event.c
|
||||
@@ -167,15 +167,15 @@ static void time_insert_finite(libxl__gc
|
||||
}
|
||||
|
||||
static int time_register_finite(libxl__gc *gc, libxl__ev_time *ev,
|
||||
- struct timeval abs)
|
||||
+ struct timeval absolute)
|
||||
{
|
||||
int rc;
|
||||
|
||||
- rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, abs, ev);
|
||||
+ rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, absolute, ev);
|
||||
if (rc) return rc;
|
||||
|
||||
ev->infinite = 0;
|
||||
- ev->abs = abs;
|
||||
+ ev->abs = absolute;
|
||||
time_insert_finite(gc, ev);
|
||||
|
||||
return 0;
|
||||
@@ -202,16 +202,16 @@ static void time_done_debug(libxl__gc *g
|
||||
|
||||
int libxl__ev_time_register_abs(libxl__gc *gc, libxl__ev_time *ev,
|
||||
libxl__ev_time_callback *func,
|
||||
- struct timeval abs)
|
||||
+ struct timeval absolute)
|
||||
{
|
||||
int rc;
|
||||
|
||||
CTX_LOCK;
|
||||
|
||||
DBG("ev_time=%p register abs=%lu.%06lu",
|
||||
- ev, (unsigned long)abs.tv_sec, (unsigned long)abs.tv_usec);
|
||||
+ ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);
|
||||
|
||||
- rc = time_register_finite(gc, ev, abs);
|
||||
+ rc = time_register_finite(gc, ev, absolute);
|
||||
if (rc) goto out;
|
||||
|
||||
ev->func = func;
|
||||
@@ -228,7 +228,7 @@ int libxl__ev_time_register_rel(libxl__g
|
||||
libxl__ev_time_callback *func,
|
||||
int milliseconds /* as for poll(2) */)
|
||||
{
|
||||
- struct timeval abs;
|
||||
+ struct timeval absolute;
|
||||
int rc;
|
||||
|
||||
CTX_LOCK;
|
||||
@@ -238,10 +238,10 @@ int libxl__ev_time_register_rel(libxl__g
|
||||
if (milliseconds < 0) {
|
||||
ev->infinite = 1;
|
||||
} else {
|
||||
- rc = time_rel_to_abs(gc, milliseconds, &abs);
|
||||
+ rc = time_rel_to_abs(gc, milliseconds, &absolute);
|
||||
if (rc) goto out;
|
||||
|
||||
- rc = time_register_finite(gc, ev, abs);
|
||||
+ rc = time_register_finite(gc, ev, absolute);
|
||||
if (rc) goto out;
|
||||
}
|
||||
|
||||
@@ -255,26 +255,26 @@ int libxl__ev_time_register_rel(libxl__g
|
||||
}
|
||||
|
||||
int libxl__ev_time_modify_abs(libxl__gc *gc, libxl__ev_time *ev,
|
||||
- struct timeval abs)
|
||||
+ struct timeval absolute)
|
||||
{
|
||||
int rc;
|
||||
|
||||
CTX_LOCK;
|
||||
|
||||
DBG("ev_time=%p modify abs==%lu.%06lu",
|
||||
- ev, (unsigned long)abs.tv_sec, (unsigned long)abs.tv_usec);
|
||||
+ ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);
|
||||
|
||||
assert(libxl__ev_time_isregistered(ev));
|
||||
|
||||
if (ev->infinite) {
|
||||
- rc = time_register_finite(gc, ev, abs);
|
||||
+ rc = time_register_finite(gc, ev, absolute);
|
||||
if (rc) goto out;
|
||||
} else {
|
||||
- rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, abs);
|
||||
+ rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, absolute);
|
||||
if (rc) goto out;
|
||||
|
||||
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
|
||||
- ev->abs = abs;
|
||||
+ ev->abs = absolute;
|
||||
time_insert_finite(gc, ev);
|
||||
}
|
||||
|
||||
@@ -288,7 +288,7 @@ int libxl__ev_time_modify_abs(libxl__gc
|
||||
int libxl__ev_time_modify_rel(libxl__gc *gc, libxl__ev_time *ev,
|
||||
int milliseconds)
|
||||
{
|
||||
- struct timeval abs;
|
||||
+ struct timeval absolute;
|
||||
int rc;
|
||||
|
||||
CTX_LOCK;
|
||||
@@ -304,10 +304,10 @@ int libxl__ev_time_modify_rel(libxl__gc
|
||||
goto out;
|
||||
}
|
||||
|
||||
- rc = time_rel_to_abs(gc, milliseconds, &abs);
|
||||
+ rc = time_rel_to_abs(gc, milliseconds, &absolute);
|
||||
if (rc) goto out;
|
||||
|
||||
- rc = libxl__ev_time_modify_abs(gc, ev, abs);
|
||||
+ rc = libxl__ev_time_modify_abs(gc, ev, absolute);
|
||||
if (rc) goto out;
|
||||
|
||||
rc = 0;
|
@ -17,16 +17,14 @@ Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vlapic.c
|
||||
+++ b/xen/arch/x86/hvm/vlapic.c
|
||||
@@ -823,6 +823,14 @@ static int vlapic_write(struct vcpu *v,
|
||||
@@ -822,6 +822,12 @@ static int vlapic_write(struct vcpu *v,
|
||||
return rc;
|
||||
}
|
||||
|
||||
+int vlapic_apicv_write(struct vcpu *v, unsigned int offset)
|
||||
+{
|
||||
+ uint32_t val = vlapic_get_reg(vcpu_vlapic(v), offset);
|
||||
+
|
||||
+ vlapic_reg_write(v, offset, val);
|
||||
+ return 0;
|
||||
+ return vlapic_reg_write(v, offset, val);
|
||||
+}
|
||||
+
|
||||
int hvm_x2apic_msr_write(struct vcpu *v, unsigned int msr, uint64_t msr_content)
|
||||
@ -59,7 +57,7 @@ Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -2274,6 +2274,16 @@ static void vmx_idtv_reinject(unsigned l
|
||||
@@ -2279,6 +2279,16 @@ static void vmx_idtv_reinject(unsigned l
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,7 +74,7 @@ Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
|
||||
void vmx_vmexit_handler(struct cpu_user_regs *regs)
|
||||
{
|
||||
unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
|
||||
@@ -2729,6 +2739,11 @@ void vmx_vmexit_handler(struct cpu_user_
|
||||
@@ -2741,6 +2751,11 @@ void vmx_vmexit_handler(struct cpu_user_
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
int vlapic_ipi(
|
||||
struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
|
||||
{
|
||||
@@ -1000,6 +1011,14 @@ void vlapic_adjust_i8259_target(struct d
|
||||
@@ -996,6 +1007,14 @@ void vlapic_adjust_i8259_target(struct d
|
||||
pt_adjust_global_vcpu_target(v);
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
int vlapic_has_pending_irq(struct vcpu *v)
|
||||
{
|
||||
struct vlapic *vlapic = vcpu_vlapic(v);
|
||||
@@ -1012,6 +1031,9 @@ int vlapic_has_pending_irq(struct vcpu *
|
||||
@@ -1008,6 +1027,9 @@ int vlapic_has_pending_irq(struct vcpu *
|
||||
if ( irr == -1 )
|
||||
return -1;
|
||||
|
||||
@ -76,7 +76,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
isr = vlapic_find_highest_isr(vlapic);
|
||||
isr = (isr != -1) ? isr : 0;
|
||||
if ( (isr & 0xf0) >= (irr & 0xf0) )
|
||||
@@ -1024,6 +1046,9 @@ int vlapic_ack_pending_irq(struct vcpu *
|
||||
@@ -1020,6 +1042,9 @@ int vlapic_ack_pending_irq(struct vcpu *
|
||||
{
|
||||
struct vlapic *vlapic = vcpu_vlapic(v);
|
||||
|
||||
@ -88,7 +88,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/intr.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/intr.c
|
||||
@@ -206,6 +206,7 @@ void vmx_intr_assist(void)
|
||||
@@ -209,6 +209,7 @@ void vmx_intr_assist(void)
|
||||
struct vcpu *v = current;
|
||||
unsigned int tpr_threshold = 0;
|
||||
enum hvm_intblk intblk;
|
||||
@ -96,7 +96,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
/* Block event injection when single step with MTF. */
|
||||
if ( unlikely(v->arch.hvm_vcpu.single_step) )
|
||||
@@ -216,7 +217,7 @@ void vmx_intr_assist(void)
|
||||
@@ -219,7 +220,7 @@ void vmx_intr_assist(void)
|
||||
}
|
||||
|
||||
/* Crank the handle on interrupt state. */
|
||||
@ -105,7 +105,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
do {
|
||||
intack = hvm_vcpu_has_pending_irq(v);
|
||||
@@ -227,16 +228,34 @@ void vmx_intr_assist(void)
|
||||
@@ -230,16 +231,34 @@ void vmx_intr_assist(void)
|
||||
goto out;
|
||||
|
||||
intblk = hvm_interrupt_blocked(v, intack);
|
||||
@ -145,7 +145,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
{
|
||||
enable_intr_window(v, intack);
|
||||
goto out;
|
||||
@@ -253,6 +272,44 @@ void vmx_intr_assist(void)
|
||||
@@ -256,6 +275,44 @@ void vmx_intr_assist(void)
|
||||
{
|
||||
hvm_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE);
|
||||
}
|
||||
@ -190,7 +190,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
else
|
||||
{
|
||||
HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
|
||||
@@ -262,11 +319,16 @@ void vmx_intr_assist(void)
|
||||
@@ -265,11 +322,16 @@ void vmx_intr_assist(void)
|
||||
|
||||
/* Is there another IRQ to queue up behind this one? */
|
||||
intack = hvm_vcpu_has_pending_irq(v);
|
||||
@ -291,7 +291,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1502,6 +1502,22 @@ static void vmx_set_info_guest(struct vc
|
||||
@@ -1507,6 +1507,22 @@ static void vmx_set_info_guest(struct vc
|
||||
vmx_vmcs_exit(v);
|
||||
}
|
||||
|
||||
@ -314,7 +314,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
static struct hvm_function_table __read_mostly vmx_function_table = {
|
||||
.name = "VMX",
|
||||
.cpu_up_prepare = vmx_cpu_up_prepare,
|
||||
@@ -1548,7 +1564,9 @@ static struct hvm_function_table __read_
|
||||
@@ -1553,7 +1569,9 @@ static struct hvm_function_table __read_
|
||||
.nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
|
||||
.nhvm_vcpu_vmexit_trap = nvmx_vmexit_trap,
|
||||
.nhvm_intr_blocked = nvmx_intr_blocked,
|
||||
@ -325,7 +325,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
};
|
||||
|
||||
struct hvm_function_table * __init start_vmx(void)
|
||||
@@ -2284,6 +2302,17 @@ static int vmx_handle_apic_write(void)
|
||||
@@ -2289,6 +2307,17 @@ static int vmx_handle_apic_write(void)
|
||||
return vlapic_apicv_write(current, offset);
|
||||
}
|
||||
|
||||
@ -343,7 +343,7 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
void vmx_vmexit_handler(struct cpu_user_regs *regs)
|
||||
{
|
||||
unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
|
||||
@@ -2677,6 +2706,16 @@ void vmx_vmexit_handler(struct cpu_user_
|
||||
@@ -2689,6 +2718,16 @@ void vmx_vmexit_handler(struct cpu_user_
|
||||
hvm_inject_hw_exception(TRAP_gp_fault, 0);
|
||||
break;
|
||||
|
||||
|
@ -16,8 +16,10 @@ corresponding x2apic MSRs:
|
||||
Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
@@ -658,7 +658,7 @@ static void vmx_set_host_env(struct vcpu
|
||||
(unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
|
||||
}
|
||||
@ -82,9 +84,11 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
}
|
||||
|
||||
/* I/O access bitmap. */
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -2036,7 +2036,7 @@ static int vmx_msr_write_intercept(unsig
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmx.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -2041,7 +2041,7 @@ static int vmx_msr_write_intercept(unsig
|
||||
for ( ; (rc == 0) && lbr->count; lbr++ )
|
||||
for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
|
||||
if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
|
||||
@ -93,8 +97,10 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
}
|
||||
|
||||
if ( (rc < 0) ||
|
||||
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
@@ -407,7 +407,9 @@ enum vmcs_field {
|
||||
|
||||
#define VMCS_VPID_WIDTH 16
|
||||
@ -106,9 +112,11 @@ Committed-by: Keir Fraser <keir@xen.org>
|
||||
int vmx_read_guest_msr(u32 msr, u64 *val);
|
||||
int vmx_write_guest_msr(u32 msr, u64 val);
|
||||
int vmx_add_guest_msr(u32 msr);
|
||||
--- a/xen/include/asm-x86/msr-index.h
|
||||
+++ b/xen/include/asm-x86/msr-index.h
|
||||
@@ -291,6 +291,9 @@
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/msr-index.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/msr-index.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/msr-index.h
|
||||
@@ -293,6 +293,9 @@
|
||||
#define MSR_IA32_APICBASE_ENABLE (1<<11)
|
||||
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
|
||||
#define MSR_IA32_APICBASE_MSR 0x800
|
||||
|
@ -17,11 +17,11 @@ domain's permission is sufficient.
|
||||
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
Index: xen-4.2.0-testing/xen/arch/x86/mm.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.0-testing/xen/arch/x86/mm.c
|
||||
@@ -883,6 +883,19 @@ get_page_from_l1e(
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/mm.c
|
||||
@@ -884,6 +884,19 @@ get_page_from_l1e(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -24,10 +24,10 @@ would be happy when sync tsc.
|
||||
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
Index: xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/hvm.c
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/arch/x86/hvm/hvm.c
|
||||
+++ xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/hvm.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/hvm.c
|
||||
@@ -244,6 +244,7 @@ int hvm_set_guest_pat(struct vcpu *v, u6
|
||||
void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc)
|
||||
{
|
||||
@ -103,10 +103,10 @@ Index: xen-4.2.0-testing/xen/arch/x86/hvm/hvm.c
|
||||
paging_update_paging_modes(v);
|
||||
|
||||
v->arch.flags |= TF_kernel_mode;
|
||||
Index: xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/hvm/vcpu.h
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/include/asm-x86/hvm/vcpu.h
|
||||
+++ xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/hvm/vcpu.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/hvm/vcpu.h
|
||||
@@ -137,6 +137,7 @@ struct hvm_vcpu {
|
||||
struct hvm_vcpu_asid n1asid;
|
||||
|
||||
@ -115,11 +115,11 @@ Index: xen-4.2.0-testing/xen/include/asm-x86/hvm/vcpu.h
|
||||
|
||||
/* VPMU */
|
||||
struct vpmu_struct vpmu;
|
||||
Index: xen-4.2.0-testing/xen/include/asm-x86/msr-index.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/msr-index.h
|
||||
===================================================================
|
||||
--- xen-4.2.0-testing.orig/xen/include/asm-x86/msr-index.h
|
||||
+++ xen-4.2.0-testing/xen/include/asm-x86/msr-index.h
|
||||
@@ -284,6 +284,7 @@
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/msr-index.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/msr-index.h
|
||||
@@ -286,6 +286,7 @@
|
||||
#define MSR_IA32_PLATFORM_ID 0x00000017
|
||||
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
|
||||
#define MSR_IA32_EBC_FREQUENCY_ID 0x0000002c
|
||||
|
@ -10,8 +10,10 @@ And some initial Haswell ones at once.
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Nakajima, Jun" <jun.nakajima@intel.com>
|
||||
|
||||
--- a/xen/arch/x86/acpi/cpu_idle.c
|
||||
+++ b/xen/arch/x86/acpi/cpu_idle.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/acpi/cpu_idle.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/acpi/cpu_idle.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/acpi/cpu_idle.c
|
||||
@@ -105,11 +105,15 @@ static void do_get_hw_residencies(void *
|
||||
|
||||
switch ( c->x86_model )
|
||||
@ -30,9 +32,11 @@ Acked-by: "Nakajima, Jun" <jun.nakajima@intel.com>
|
||||
GET_PC2_RES(hw_res->pc2);
|
||||
GET_CC7_RES(hw_res->cc7);
|
||||
/* fall through */
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1820,7 +1820,9 @@ static const struct lbr_info *last_branc
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmx.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1825,7 +1825,9 @@ static const struct lbr_info *last_branc
|
||||
/* Sandy Bridge */
|
||||
case 42: case 45:
|
||||
/* Ivy Bridge */
|
||||
@ -43,9 +47,11 @@ Acked-by: "Nakajima, Jun" <jun.nakajima@intel.com>
|
||||
return nh_lbr;
|
||||
break;
|
||||
/* Atom */
|
||||
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
|
||||
@@ -747,6 +747,7 @@ int vmx_vpmu_initialise(struct vcpu *v,
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vpmu_core2.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vmx/vpmu_core2.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vpmu_core2.c
|
||||
@@ -747,6 +747,7 @@ int vmx_vpmu_initialise(struct vcpu *v,
|
||||
case 46:
|
||||
case 47:
|
||||
case 58:
|
||||
|
@ -1,67 +0,0 @@
|
||||
References: bnc#785211
|
||||
|
||||
# HG changeset patch
|
||||
# User Huang Ying <ying.huang@intel.com>
|
||||
# Date 1350401196 -7200
|
||||
# Node ID 4fc87c2f31a02c770655518c9e4d389302564f00
|
||||
# Parent c1c549c4fe9ebdc460cbf51e296edad157b6e518
|
||||
ACPI: fix APEI related table size checking
|
||||
|
||||
On Huang Ying's machine:
|
||||
|
||||
erst_tab->header_length == sizeof(struct acpi_table_einj)
|
||||
|
||||
but Yinghai reported that on his machine,
|
||||
|
||||
erst_tab->header_length == sizeof(struct acpi_table_einj) -
|
||||
sizeof(struct acpi_table_header)
|
||||
|
||||
To make erst table size checking code works on all systems, both
|
||||
testing are treated as PASS.
|
||||
|
||||
Same situation applies to einj_tab->header_length, so corresponding
|
||||
table size checking is changed in similar way too.
|
||||
|
||||
Originally-by: Yinghai Lu <yinghai@kernel.org>
|
||||
Signed-off-by: Huang Ying <ying.huang@intel.com>
|
||||
|
||||
- use switch() for better readability
|
||||
- add comment explaining why a formally invalid size it also being
|
||||
accepted
|
||||
- check erst_tab->header.length before even looking at
|
||||
erst_tab->header_length
|
||||
- prefer sizeof(*erst_tab) over sizeof(struct acpi_table_erst)
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/acpi/apei/erst.c
|
||||
+++ b/xen/drivers/acpi/apei/erst.c
|
||||
@@ -715,12 +715,23 @@ int erst_clear(u64 record_id)
|
||||
|
||||
static int __init erst_check_table(struct acpi_table_erst *erst_tab)
|
||||
{
|
||||
- if (erst_tab->header_length != sizeof(struct acpi_table_erst))
|
||||
+ if (erst_tab->header.length < sizeof(*erst_tab))
|
||||
return -EINVAL;
|
||||
- if (erst_tab->header.length < sizeof(struct acpi_table_erst))
|
||||
+
|
||||
+ switch (erst_tab->header_length) {
|
||||
+ case sizeof(*erst_tab) - sizeof(erst_tab->header):
|
||||
+ /*
|
||||
+ * While invalid per specification, there are (early?) systems
|
||||
+ * indicating the full header size here, so accept that value too.
|
||||
+ */
|
||||
+ case sizeof(*erst_tab):
|
||||
+ break;
|
||||
+ default:
|
||||
return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
if (erst_tab->entries !=
|
||||
- (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
|
||||
+ (erst_tab->header.length - sizeof(*erst_tab)) /
|
||||
sizeof(struct acpi_erst_entry))
|
||||
return -EINVAL;
|
||||
|
@ -1,95 +0,0 @@
|
||||
References: bnc#785211
|
||||
|
||||
# HG changeset patch
|
||||
# User Huang Ying <ying.huang@intel.com>
|
||||
# Date 1350475926 -7200
|
||||
# Node ID ec8a091efcce717584b00ce76e3cec40a6247ebc
|
||||
# Parent 4b4c0c7a6031820ab521fdd6764cb0df157f44bf
|
||||
ACPI/APEI: fix ERST MOVE_DATA instruction implementation
|
||||
|
||||
The src_base and dst_base fields in apei_exec_context are physical
|
||||
address, so they should be ioremaped before being used in ERST
|
||||
MOVE_DATA instruction.
|
||||
|
||||
Reported-by: Javier Martinez Canillas <martinez.javier@gmail.com>
|
||||
Reported-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Huang Ying <ying.huang@intel.com>
|
||||
|
||||
Replace use of ioremap() by __acpi_map_table()/set_fixmap(). Fix error
|
||||
handling.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/acpi/apei/erst.c
|
||||
+++ b/xen/drivers/acpi/apei/erst.c
|
||||
@@ -247,15 +247,64 @@ static int erst_exec_move_data(struct ap
|
||||
{
|
||||
int rc;
|
||||
u64 offset;
|
||||
+#ifdef CONFIG_X86
|
||||
+ enum fixed_addresses idx;
|
||||
+#endif
|
||||
+ void *src, *dst;
|
||||
+
|
||||
+ /* ioremap does not work in interrupt context */
|
||||
+ if (in_irq()) {
|
||||
+ printk(KERN_WARNING
|
||||
+ "MOVE_DATA cannot be used in interrupt context\n");
|
||||
+ return -EBUSY;
|
||||
+ }
|
||||
|
||||
rc = __apei_exec_read_register(entry, &offset);
|
||||
if (rc)
|
||||
return rc;
|
||||
- memmove((void *)(unsigned long)(ctx->dst_base + offset),
|
||||
- (void *)(unsigned long)(ctx->src_base + offset),
|
||||
- ctx->var2);
|
||||
|
||||
- return 0;
|
||||
+#ifdef CONFIG_X86
|
||||
+ switch (ctx->var2) {
|
||||
+ case 0:
|
||||
+ return 0;
|
||||
+ case 1 ... PAGE_SIZE:
|
||||
+ break;
|
||||
+ default:
|
||||
+ printk(KERN_WARNING
|
||||
+ "MOVE_DATA cannot be used for %#"PRIx64" bytes of data\n",
|
||||
+ ctx->var2);
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+
|
||||
+ src = __acpi_map_table(ctx->src_base + offset, ctx->var2);
|
||||
+#else
|
||||
+ src = ioremap(ctx->src_base + offset, ctx->var2);
|
||||
+#endif
|
||||
+ if (!src)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+#ifdef CONFIG_X86
|
||||
+ BUILD_BUG_ON(FIX_ACPI_PAGES < 4);
|
||||
+ idx = virt_to_fix((unsigned long)src + 2 * PAGE_SIZE);
|
||||
+ offset += ctx->dst_base;
|
||||
+ dst = (void *)fix_to_virt(idx) + (offset & ~PAGE_MASK);
|
||||
+ set_fixmap(idx, offset);
|
||||
+ if (PFN_DOWN(offset) != PFN_DOWN(offset + ctx->var2 - 1)) {
|
||||
+ idx = virt_to_fix((unsigned long)dst + PAGE_SIZE);
|
||||
+ set_fixmap(idx, offset + PAGE_SIZE);
|
||||
+ }
|
||||
+#else
|
||||
+ dst = ioremap(ctx->dst_base + offset, ctx->var2);
|
||||
+#endif
|
||||
+ if (dst) {
|
||||
+ memmove(dst, src, ctx->var2);
|
||||
+ iounmap(dst);
|
||||
+ } else
|
||||
+ rc = -ENOMEM;
|
||||
+
|
||||
+ iounmap(src);
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static struct apei_exec_ins_type erst_ins_type[] = {
|
@ -15,9 +15,11 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_init.c
|
||||
@@ -564,7 +564,7 @@ static hw_irq_controller iommu_msi_type
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_init.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_init.c
|
||||
@@ -564,7 +564,7 @@ static hw_irq_controller iommu_msi_type
|
||||
|
||||
static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
|
||||
{
|
||||
@ -47,9 +49,11 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
}
|
||||
else
|
||||
{
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
@@ -214,6 +214,7 @@ static int device_assigned(u16 seg, u8 b
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
@@ -218,6 +218,7 @@ static int device_assigned(u16 seg, u8 b
|
||||
static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
|
||||
{
|
||||
struct hvm_iommu *hd = domain_hvm_iommu(d);
|
||||
@ -57,7 +61,7 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
int rc = 0;
|
||||
|
||||
if ( !iommu_enabled || !hd->platform_ops )
|
||||
@@ -227,6 +228,10 @@ static int assign_device(struct domain *
|
||||
@@ -231,6 +232,10 @@ static int assign_device(struct domain *
|
||||
return -EXDEV;
|
||||
|
||||
spin_lock(&pcidevs_lock);
|
||||
@ -68,7 +72,7 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
|
||||
goto done;
|
||||
|
||||
@@ -378,6 +383,8 @@ int deassign_device(struct domain *d, u1
|
||||
@@ -382,6 +387,8 @@ int deassign_device(struct domain *d, u1
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -77,8 +81,10 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
if ( !has_arch_pdevs(d) && need_iommu(d) )
|
||||
{
|
||||
d->need_iommu = 0;
|
||||
--- a/xen/drivers/passthrough/pci.c
|
||||
+++ b/xen/drivers/passthrough/pci.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/pci.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
@@ -637,6 +637,36 @@ int __init pci_device_detect(u16 seg, u8
|
||||
return 1;
|
||||
}
|
||||
@ -116,8 +122,10 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
/*
|
||||
* scan pci devices to add all existed PCI devices to alldevs_list,
|
||||
* and setup pci hierarchy in array bus2bridge.
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -936,7 +936,7 @@ static void __do_iommu_page_fault(struct
|
||||
while (1)
|
||||
{
|
||||
@ -144,9 +152,11 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
|
||||
fault_index++;
|
||||
if ( fault_index > cap_num_fault_regs(iommu->cap) )
|
||||
--- a/xen/include/xen/pci.h
|
||||
+++ b/xen/include/xen/pci.h
|
||||
@@ -64,6 +64,11 @@ struct pci_dev {
|
||||
Index: xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/pci.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
@@ -65,6 +65,11 @@ struct pci_dev {
|
||||
const u8 devfn;
|
||||
struct pci_dev_info info;
|
||||
struct arch_pci_dev arch;
|
||||
@ -158,7 +168,7 @@ Acked-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
u64 vf_rlen[6];
|
||||
};
|
||||
|
||||
@@ -106,6 +111,7 @@ void arch_pci_ro_device(int seg, int bdf
|
||||
@@ -107,6 +112,7 @@ void arch_pci_ro_device(int seg, int bdf
|
||||
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
|
||||
struct pci_dev *pci_get_pdev_by_domain(
|
||||
struct domain *, int seg, int bus, int devfn);
|
||||
|
@ -1,88 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1353575003 -3600
|
||||
# Node ID c139ca92edca2fab8ec95deb7fd9e4246c3fe28d
|
||||
# Parent af6b72a224e99a4a516fbc2eecc06ada569304e8
|
||||
x86/HPET: fix FSB interrupt masking
|
||||
|
||||
HPET_TN_FSB is not really suitable for masking interrupts - it merely
|
||||
switches between the two delivery methods. The right way of masking is
|
||||
through the HPET_TN_ENABLE bit (which really is an interrupt enable,
|
||||
not a counter enable or some such). This is even more so with certain
|
||||
chip sets not even allowing HPET_TN_FSB to be cleared on some of the
|
||||
channels.
|
||||
|
||||
Further, all the setup of the channel should happen before actually
|
||||
enabling the interrupt, which requires splitting legacy and FSB logic.
|
||||
|
||||
Finally this also fixes an S3 resume problem (HPET_TN_FSB did not get
|
||||
set in hpet_broadcast_resume(), and hpet_msi_unmask() doesn't get
|
||||
called from the general resume code either afaict).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hpet.c
|
||||
+++ b/xen/arch/x86/hpet.c
|
||||
@@ -236,7 +236,7 @@ static void hpet_msi_unmask(struct irq_d
|
||||
struct hpet_event_channel *ch = desc->action->dev_id;
|
||||
|
||||
cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
|
||||
- cfg |= HPET_TN_FSB;
|
||||
+ cfg |= HPET_TN_ENABLE;
|
||||
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
|
||||
}
|
||||
|
||||
@@ -246,7 +246,7 @@ static void hpet_msi_mask(struct irq_des
|
||||
struct hpet_event_channel *ch = desc->action->dev_id;
|
||||
|
||||
cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
|
||||
- cfg &= ~HPET_TN_FSB;
|
||||
+ cfg &= ~HPET_TN_ENABLE;
|
||||
hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
|
||||
}
|
||||
|
||||
@@ -319,8 +319,14 @@ static void __hpet_setup_msi_irq(struct
|
||||
static int __init hpet_setup_msi_irq(unsigned int irq, struct hpet_event_channel *ch)
|
||||
{
|
||||
int ret;
|
||||
+ u32 cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
|
||||
irq_desc_t *desc = irq_to_desc(irq);
|
||||
|
||||
+ /* set HPET Tn as oneshot */
|
||||
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
|
||||
+ cfg |= HPET_TN_FSB | HPET_TN_32BIT;
|
||||
+ hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
|
||||
+
|
||||
desc->handler = &hpet_msi_type;
|
||||
ret = request_irq(irq, hpet_interrupt_handler, 0, "HPET", ch);
|
||||
if ( ret < 0 )
|
||||
@@ -541,11 +547,14 @@ void __init hpet_broadcast_init(void)
|
||||
|
||||
for ( i = 0; i < n; i++ )
|
||||
{
|
||||
- /* set HPET Tn as oneshot */
|
||||
- cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
|
||||
- cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
|
||||
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
|
||||
- hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
|
||||
+ if ( i == 0 && (cfg & HPET_CFG_LEGACY) )
|
||||
+ {
|
||||
+ /* set HPET T0 as oneshot */
|
||||
+ cfg = hpet_read32(HPET_Tn_CFG(0));
|
||||
+ cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
|
||||
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
|
||||
+ hpet_write32(cfg, HPET_Tn_CFG(0));
|
||||
+ }
|
||||
|
||||
/*
|
||||
* The period is a femto seconds value. We need to calculate the scaled
|
||||
@@ -602,6 +611,8 @@ void hpet_broadcast_resume(void)
|
||||
cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
|
||||
cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
|
||||
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
|
||||
+ if ( !(hpet_events[i].flags & HPET_EVT_LEGACY) )
|
||||
+ cfg |= HPET_TN_FSB;
|
||||
hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
|
||||
|
||||
hpet_events[i].next_event = STIME_MAX;
|
@ -1,28 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1354118456 -3600
|
||||
# Node ID 836697b197462f89a4d296da9482d1719dcc0836
|
||||
# Parent 1fce7522daa6bab9fce93b95adf592193c904097
|
||||
IOMMU: imply "verbose" from "debug"
|
||||
|
||||
I think that generally enabling debugging code without also enabling
|
||||
verbose output is rather pointless; if someone really wants this, they
|
||||
can always pass e.g. "iommu=debug,no-verbose".
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
@@ -91,7 +91,11 @@ static void __init parse_iommu_param(cha
|
||||
else if ( !strcmp(s, "intremap") )
|
||||
iommu_intremap = val;
|
||||
else if ( !strcmp(s, "debug") )
|
||||
+ {
|
||||
iommu_debug = val;
|
||||
+ if ( val )
|
||||
+ iommu_verbose = 1;
|
||||
+ }
|
||||
else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
|
||||
amd_iommu_perdev_intremap = val;
|
||||
else if ( !strcmp(s, "dom0-passthrough") )
|
@ -1,52 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1354697534 -3600
|
||||
# Node ID 670b07e8d7382229639af0d1df30071e6c1ebb19
|
||||
# Parent bc624b00d6d601f00a53c2f7502a82dcef60f882
|
||||
IOMMU/ATS: fix maximum queue depth calculation
|
||||
|
||||
The capabilities register field is a 5-bit value, and the 5 bits all
|
||||
being zero actually means 32 entries.
|
||||
|
||||
Under the assumption that amd_iommu_flush_iotlb() really just tried
|
||||
to correct for the miscalculation above when adding 32 to the value,
|
||||
that adjustment is also being removed.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by Xiantao Zhang <xiantao.zhang@intel.com>
|
||||
Acked-by: Wei Huang <wei.huang2@amd.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
@@ -321,7 +321,7 @@ void amd_iommu_flush_iotlb(struct pci_de
|
||||
|
||||
req_id = get_dma_requestor_id(iommu->seg, bdf);
|
||||
queueid = req_id;
|
||||
- maxpend = (ats_pdev->ats_queue_depth + 32) & 0xff;
|
||||
+ maxpend = ats_pdev->ats_queue_depth & 0xff;
|
||||
|
||||
/* send INVALIDATE_IOTLB_PAGES command */
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
--- a/xen/drivers/passthrough/ats.h
|
||||
+++ b/xen/drivers/passthrough/ats.h
|
||||
@@ -30,7 +30,7 @@ struct pci_ats_dev {
|
||||
|
||||
#define ATS_REG_CAP 4
|
||||
#define ATS_REG_CTL 6
|
||||
-#define ATS_QUEUE_DEPTH_MASK 0xF
|
||||
+#define ATS_QUEUE_DEPTH_MASK 0x1f
|
||||
#define ATS_ENABLE (1<<15)
|
||||
|
||||
extern struct list_head ats_devices;
|
||||
--- a/xen/drivers/passthrough/x86/ats.c
|
||||
+++ b/xen/drivers/passthrough/x86/ats.c
|
||||
@@ -93,7 +93,8 @@ int enable_ats_device(int seg, int bus,
|
||||
pdev->devfn = devfn;
|
||||
value = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
|
||||
PCI_FUNC(devfn), pos + ATS_REG_CAP);
|
||||
- pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK;
|
||||
+ pdev->ats_queue_depth = value & ATS_QUEUE_DEPTH_MASK ?:
|
||||
+ ATS_QUEUE_DEPTH_MASK + 1;
|
||||
list_add(&pdev->list, &ats_devices);
|
||||
}
|
||||
|
@ -1,28 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
# Date 1354812866 0
|
||||
# Node ID 312f0713dfc98635fd9ed4b42481581489faa28f
|
||||
# Parent bfd8e96fa3f157630f9698401a1f040ca1776c8e
|
||||
nested vmx: fix rflags status in virtual vmexit
|
||||
|
||||
As stated in SDM, all bits (except for those 1-reserved) in rflags
|
||||
would be set to 0 in VM exit. Therefore we need to follow this logic
|
||||
in virtual_vmexit.
|
||||
|
||||
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
|
||||
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
@@ -990,7 +990,8 @@ static void virtual_vmexit(struct cpu_us
|
||||
|
||||
regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP);
|
||||
regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP);
|
||||
- regs->eflags = __vmread(GUEST_RFLAGS);
|
||||
+ /* VM exit clears all bits except bit 1 */
|
||||
+ regs->eflags = 0x2;
|
||||
|
||||
/* updating host cr0 to sync TS bit */
|
||||
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
|
@ -1,46 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
# Date 1354812981 0
|
||||
# Node ID a09150b57ace2fa786dcaefa958f0b197b1b6d4c
|
||||
# Parent 312f0713dfc98635fd9ed4b42481581489faa28f
|
||||
nested vmx: fix handling of RDTSC
|
||||
|
||||
If L0 is to handle the TSC access, then we need to update guest EIP by
|
||||
calling update_guest_eip().
|
||||
|
||||
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1613,7 +1613,7 @@ static int get_instruction_length(void)
|
||||
return len;
|
||||
}
|
||||
|
||||
-static void update_guest_eip(void)
|
||||
+void update_guest_eip(void)
|
||||
{
|
||||
struct cpu_user_regs *regs = guest_cpu_user_regs();
|
||||
unsigned long x;
|
||||
--- a/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
@@ -1558,6 +1558,7 @@ int nvmx_n2_vmexit_handler(struct cpu_us
|
||||
tsc += __get_vvmcs(nvcpu->nv_vvmcx, TSC_OFFSET);
|
||||
regs->eax = (uint32_t)tsc;
|
||||
regs->edx = (uint32_t)(tsc >> 32);
|
||||
+ update_guest_eip();
|
||||
|
||||
return 1;
|
||||
}
|
||||
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
|
||||
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
|
||||
@@ -396,6 +396,8 @@ void ept_p2m_init(struct p2m_domain *p2m
|
||||
void ept_walk_table(struct domain *d, unsigned long gfn);
|
||||
void setup_ept_dump(void);
|
||||
|
||||
+void update_guest_eip(void);
|
||||
+
|
||||
/* EPT violation qualifications definitions */
|
||||
#define _EPT_READ_VIOLATION 0
|
||||
#define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION)
|
@ -1,27 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
# Date 1354813009 0
|
||||
# Node ID e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
|
||||
# Parent a09150b57ace2fa786dcaefa958f0b197b1b6d4c
|
||||
nested vmx: fix DR access VM exit
|
||||
|
||||
For DR register, we use lazy restore mechanism when access
|
||||
it. Therefore when receiving such VM exit, L0 should be responsible to
|
||||
switch to the right DR values, then inject to L1 hypervisor.
|
||||
|
||||
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
@@ -1585,7 +1585,8 @@ int nvmx_n2_vmexit_handler(struct cpu_us
|
||||
break;
|
||||
case EXIT_REASON_DR_ACCESS:
|
||||
ctrl = __n2_exec_control(v);
|
||||
- if ( ctrl & CPU_BASED_MOV_DR_EXITING )
|
||||
+ if ( (ctrl & CPU_BASED_MOV_DR_EXITING) &&
|
||||
+ v->arch.hvm_vcpu.flag_dr_dirty )
|
||||
nvcpu->nv_vmexit_pending = 1;
|
||||
break;
|
||||
case EXIT_REASON_INVLPG:
|
@ -1,30 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
# Date 1354813046 0
|
||||
# Node ID 1ed1507fa0407f1da715d04fe1b510e81ca4fb31
|
||||
# Parent e6eb1e52da7cfcb1a7697b35b4d842f35107d1ed
|
||||
nested vmx: enable IA32E mode while do VM entry
|
||||
|
||||
Some VMMs may check the platform capability to judge whether long
|
||||
mode guest is supported. Therefore we need to expose this bit to
|
||||
guest VMM.
|
||||
|
||||
Xen on Xen works fine in current solution because Xen doesn't
|
||||
check this capability but directly set it in VMCS if guest
|
||||
supports long mode.
|
||||
|
||||
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
|
||||
@@ -1351,7 +1351,7 @@ int nvmx_msr_read_intercept(unsigned int
|
||||
case MSR_IA32_VMX_ENTRY_CTLS:
|
||||
/* bit 0-8, and 12 must be 1 (refer G5 of SDM) */
|
||||
data = 0x11ff;
|
||||
- data = (data << 32) | data;
|
||||
+ data = ((data | VM_ENTRY_IA32E_MODE) << 32) | data;
|
||||
break;
|
||||
|
||||
case IA32_FEATURE_CONTROL_MSR:
|
@ -1,45 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
# Date 1354813139 0
|
||||
# Node ID 90831c29bfde6aac013b7e5ec98934a4953c31c9
|
||||
# Parent 25dd352265ca23750f1a1a983124b36f518c4384
|
||||
nested vmx: fix interrupt delivery to L2 guest
|
||||
|
||||
While delivering interrupt into L2 guest, L0 hypervisor need to check
|
||||
whether L1 hypervisor wants to own the interrupt, if not, directly
|
||||
inject the interrupt into L2 guest.
|
||||
|
||||
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
|
||||
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/intr.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/intr.c
|
||||
@@ -163,7 +163,7 @@ enum hvm_intblk nvmx_intr_blocked(struct
|
||||
|
||||
static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
|
||||
{
|
||||
- u32 exit_ctrl;
|
||||
+ u32 ctrl;
|
||||
|
||||
if ( nvmx_intr_blocked(v) != hvm_intblk_none )
|
||||
{
|
||||
@@ -176,11 +176,14 @@ static int nvmx_intr_intercept(struct vc
|
||||
if ( intack.source == hvm_intsrc_pic ||
|
||||
intack.source == hvm_intsrc_lapic )
|
||||
{
|
||||
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
|
||||
+ if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
|
||||
+ return 0;
|
||||
+
|
||||
vmx_inject_extint(intack.vector);
|
||||
|
||||
- exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
|
||||
- VM_EXIT_CONTROLS);
|
||||
- if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
|
||||
+ ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
|
||||
+ if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
|
||||
{
|
||||
/* for now, duplicate the ack path in vmx_intr_assist */
|
||||
hvm_vcpu_ack_pending_irq(v, intack);
|
@ -1,70 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1355134467 -3600
|
||||
# Node ID 8d209624ea83b272e1ebd713a928c38d4782f4f1
|
||||
# Parent f96a0cda12160f497981a37f6922a1ed7db9a462
|
||||
scheduler: fix rate limit range checking
|
||||
|
||||
For one, neither of the two checks permitted for the documented value
|
||||
of zero (disabling the functionality altogether).
|
||||
|
||||
Second, the range checking of the command line parameter was done by
|
||||
the credit scheduler's initialization code, despite it being a generic
|
||||
scheduler option.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/common/sched_credit.c
|
||||
+++ b/xen/common/sched_credit.c
|
||||
@@ -846,8 +846,9 @@ csched_sys_cntl(const struct scheduler *
|
||||
case XEN_SYSCTL_SCHEDOP_putinfo:
|
||||
if (params->tslice_ms > XEN_SYSCTL_CSCHED_TSLICE_MAX
|
||||
|| params->tslice_ms < XEN_SYSCTL_CSCHED_TSLICE_MIN
|
||||
- || params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
|
||||
- || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN
|
||||
+ || (params->ratelimit_us
|
||||
+ && (params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
|
||||
+ || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN))
|
||||
|| MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) )
|
||||
goto out;
|
||||
prv->tslice_ms = params->tslice_ms;
|
||||
@@ -1607,17 +1608,6 @@ csched_init(struct scheduler *ops)
|
||||
sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS;
|
||||
}
|
||||
|
||||
- if ( sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
|
||||
- || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN )
|
||||
- {
|
||||
- printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
|
||||
- " Resetting to default %u\n",
|
||||
- XEN_SYSCTL_SCHED_RATELIMIT_MIN,
|
||||
- XEN_SYSCTL_SCHED_RATELIMIT_MAX,
|
||||
- SCHED_DEFAULT_RATELIMIT_US);
|
||||
- sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
|
||||
- }
|
||||
-
|
||||
prv->tslice_ms = sched_credit_tslice_ms;
|
||||
prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE;
|
||||
if ( prv->tslice_ms < prv->ticks_per_tslice )
|
||||
--- a/xen/common/schedule.c
|
||||
+++ b/xen/common/schedule.c
|
||||
@@ -1322,6 +1322,18 @@ void __init scheduler_init(void)
|
||||
if ( SCHED_OP(&ops, init) )
|
||||
panic("scheduler returned error on init\n");
|
||||
|
||||
+ if ( sched_ratelimit_us &&
|
||||
+ (sched_ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
|
||||
+ || sched_ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN) )
|
||||
+ {
|
||||
+ printk("WARNING: sched_ratelimit_us outside of valid range [%d,%d].\n"
|
||||
+ " Resetting to default %u\n",
|
||||
+ XEN_SYSCTL_SCHED_RATELIMIT_MIN,
|
||||
+ XEN_SYSCTL_SCHED_RATELIMIT_MAX,
|
||||
+ SCHED_DEFAULT_RATELIMIT_US);
|
||||
+ sched_ratelimit_us = SCHED_DEFAULT_RATELIMIT_US;
|
||||
+ }
|
||||
+
|
||||
idle_domain = domain_create(DOMID_IDLE, 0, 0);
|
||||
BUG_ON(IS_ERR(idle_domain));
|
||||
idle_domain->vcpu = idle_vcpu;
|
@ -1,82 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dario Faggioli <dario.faggioli@citrix.com>
|
||||
# Date 1355854218 0
|
||||
# Node ID 127c2c47d440eb7f3248ab5561909e326af7e328
|
||||
# Parent d5c0389bf26c89969ebce71927f34f6b923af949
|
||||
xen: sched_credit: improve picking up the idle CPU for a VCPU
|
||||
|
||||
In _csched_cpu_pick() we try to select the best possible CPU for
|
||||
running a VCPU, considering the characteristics of the underlying
|
||||
hardware (i.e., how many threads, core, sockets, and how busy they
|
||||
are). What we want is "the idle execution vehicle with the most
|
||||
idling neighbours in its grouping".
|
||||
|
||||
In order to achieve it, we select a CPU from the VCPU's affinity,
|
||||
giving preference to its current processor if possible, as the basis
|
||||
for the comparison with all the other CPUs. Problem is, to discount
|
||||
the VCPU itself when computing this "idleness" (in an attempt to be
|
||||
fair wrt its current processor), we arbitrarily and unconditionally
|
||||
consider that selected CPU as idle, even when it is not the case,
|
||||
for instance:
|
||||
1. If the CPU is not the one where the VCPU is running (perhaps due
|
||||
to the affinity being changed);
|
||||
2. The CPU is where the VCPU is running, but it has other VCPUs in
|
||||
its runq, so it won't go idle even if the VCPU in question goes.
|
||||
|
||||
This is exemplified in the trace below:
|
||||
|
||||
] 3.466115364 x|------|------| d10v1 22005(2:2:5) 3 [ a 1 8 ]
|
||||
... ... ...
|
||||
3.466122856 x|------|------| d10v1 runstate_change d10v1
|
||||
running->offline
|
||||
3.466123046 x|------|------| d?v? runstate_change d32767v0
|
||||
runnable->running
|
||||
... ... ...
|
||||
] 3.466126887 x|------|------| d32767v0 28004(2:8:4) 3 [ a 1 8 ]
|
||||
|
||||
22005(...) line (the first line) means _csched_cpu_pick() was called
|
||||
on VCPU 1 of domain 10, while it is running on CPU 0, and it choose
|
||||
CPU 8, which is busy ('|'), even if there are plenty of idle
|
||||
CPUs. That is because, as a consequence of changing the VCPU affinity,
|
||||
CPU 8 was chosen as the basis for the comparison, and therefore
|
||||
considered idle (its bit gets unconditionally set in the bitmask
|
||||
representing the idle CPUs). 28004(...) line means the VCPU is woken
|
||||
up and queued on CPU 8's runq, where it waits for a context switch or
|
||||
a migration, in order to be able to execute.
|
||||
|
||||
This change fixes things by only considering the "guessed" CPU idle if
|
||||
the VCPU in question is both running there and is its only runnable
|
||||
VCPU.
|
||||
|
||||
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
|
||||
Acked-by: George Dunlap <george.dunlap@citrix.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/common/sched_credit.c
|
||||
+++ b/xen/common/sched_credit.c
|
||||
@@ -72,6 +72,9 @@
|
||||
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
|
||||
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
|
||||
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
|
||||
+/* Is the first element of _cpu's runq its idle vcpu? */
|
||||
+#define IS_RUNQ_IDLE(_cpu) (list_empty(RUNQ(_cpu)) || \
|
||||
+ is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu))
|
||||
|
||||
|
||||
/*
|
||||
@@ -487,9 +490,14 @@ _csched_cpu_pick(const struct scheduler
|
||||
* distinct cores first and guarantees we don't do something stupid
|
||||
* like run two VCPUs on co-hyperthreads while there are idle cores
|
||||
* or sockets.
|
||||
+ *
|
||||
+ * Notice that, when computing the "idleness" of cpu, we may want to
|
||||
+ * discount vc. That is, iff vc is the currently running and the only
|
||||
+ * runnable vcpu on cpu, we add cpu to the idlers.
|
||||
*/
|
||||
cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers);
|
||||
- cpumask_set_cpu(cpu, &idlers);
|
||||
+ if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) )
|
||||
+ cpumask_set_cpu(cpu, &idlers);
|
||||
cpumask_and(&cpus, &cpus, &idlers);
|
||||
cpumask_clear_cpu(cpu, &cpus);
|
||||
|
@ -1,71 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Andre Przywara <osp@andrep.de>
|
||||
# Date 1355913729 -3600
|
||||
# Node ID 5fb0b8b838dab0b331abfa675fd2b2214ac90760
|
||||
# Parent b04de677de31f26ba4b8f2f382ca4dfffcff9a79
|
||||
x86, amd: Disable way access filter on Piledriver CPUs
|
||||
|
||||
The Way Access Filter in recent AMD CPUs may hurt the performance of
|
||||
some workloads, caused by aliasing issues in the L1 cache.
|
||||
This patch disables it on the affected CPUs.
|
||||
|
||||
The issue is similar to that one of last year:
|
||||
http://lkml.indiana.edu/hypermail/linux/kernel/1107.3/00041.html
|
||||
This new patch does not replace the old one, we just need another
|
||||
quirk for newer CPUs.
|
||||
|
||||
The performance penalty without the patch depends on the
|
||||
circumstances, but is a bit less than the last year's 3%.
|
||||
|
||||
The workloads affected would be those that access code from the same
|
||||
physical page under different virtual addresses, so different
|
||||
processes using the same libraries with ASLR or multiple instances of
|
||||
PIE-binaries. The code needs to be accessed simultaneously from both
|
||||
cores of the same compute unit.
|
||||
|
||||
More details can be found here:
|
||||
http://developer.amd.com/Assets/SharedL1InstructionCacheonAMD15hCPU.pdf
|
||||
|
||||
CPUs affected are anything with the core known as Piledriver.
|
||||
That includes the new parts of the AMD A-Series (aka Trinity) and the
|
||||
just released new CPUs of the FX-Series (aka Vishera).
|
||||
The model numbering is a bit odd here: FX CPUs have model 2,
|
||||
A-Series has model 10h, with possible extensions to 1Fh. Hence the
|
||||
range of model ids.
|
||||
|
||||
Signed-off-by: Andre Przywara <osp@andrep.de>
|
||||
|
||||
Add and use MSR_AMD64_IC_CFG. Update the value whenever it is found to
|
||||
not have all bits set, rather than just when it's zero.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/cpu/amd.c
|
||||
+++ b/xen/arch/x86/cpu/amd.c
|
||||
@@ -493,6 +493,14 @@ static void __devinit init_amd(struct cp
|
||||
}
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * The way access filter has a performance penalty on some workloads.
|
||||
+ * Disable it on the affected CPUs.
|
||||
+ */
|
||||
+ if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 &&
|
||||
+ !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e)
|
||||
+ wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e);
|
||||
+
|
||||
amd_get_topology(c);
|
||||
|
||||
/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
|
||||
--- a/xen/include/asm-x86/msr-index.h
|
||||
+++ b/xen/include/asm-x86/msr-index.h
|
||||
@@ -206,6 +206,7 @@
|
||||
|
||||
/* AMD64 MSRs */
|
||||
#define MSR_AMD64_NB_CFG 0xc001001f
|
||||
+#define MSR_AMD64_IC_CFG 0xc0011021
|
||||
#define MSR_AMD64_DC_CFG 0xc0011022
|
||||
#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46
|
||||
|
@ -1,45 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
# Date 1357290407 -3600
|
||||
# Node ID 8fd5635f451b073ddc99e928c975e8a7743d1321
|
||||
# Parent c4114a042410d3bdec3a77c30b2e85366d7fbe1d
|
||||
passthrough/domctl: use correct struct in union
|
||||
|
||||
This appears to be a copy paste error from c/s 23861:ec7c81fbe0de.
|
||||
|
||||
It is safe, functionally speaking, as both the xen_domctl_assign_device
|
||||
and xen_domctl_get_device_group structure start with a 'uint32_t
|
||||
machine_sbdf'. We should however use the correct union structure.
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
@@ -592,7 +592,7 @@ int iommu_do_domctl(
|
||||
if ( ret )
|
||||
break;
|
||||
|
||||
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
|
||||
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
|
||||
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
|
||||
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
|
||||
|
||||
@@ -621,7 +621,7 @@ int iommu_do_domctl(
|
||||
if ( ret )
|
||||
goto assign_device_out;
|
||||
|
||||
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
|
||||
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
|
||||
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
|
||||
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
|
||||
|
||||
@@ -649,7 +649,7 @@ int iommu_do_domctl(
|
||||
if ( ret )
|
||||
goto deassign_device_out;
|
||||
|
||||
- seg = domctl->u.get_device_group.machine_sbdf >> 16;
|
||||
+ seg = domctl->u.assign_device.machine_sbdf >> 16;
|
||||
bus = (domctl->u.assign_device.machine_sbdf >> 8) & 0xff;
|
||||
devfn = domctl->u.assign_device.machine_sbdf & 0xff;
|
||||
|
@ -12,9 +12,11 @@ IOMMU: adjust (re)assign operation parameters
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
@@ -328,34 +328,31 @@ void amd_iommu_disable_domain_device(str
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
@@ -333,34 +333,31 @@ void amd_iommu_disable_domain_device(str
|
||||
disable_ats_device(iommu->seg, bus, devfn);
|
||||
}
|
||||
|
||||
@ -59,7 +61,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
/* IO page tables might be destroyed after pci-detach the last device
|
||||
* In this case, we have to re-allocate root table for next pci-attach.*/
|
||||
@@ -364,17 +361,18 @@ static int reassign_device( struct domai
|
||||
@@ -369,17 +366,18 @@ static int reassign_device( struct domai
|
||||
|
||||
amd_iommu_setup_domain_device(target, iommu, bdf);
|
||||
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
|
||||
@ -83,7 +85,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
if ( ivrs_mappings[req_id].unity_map_enable )
|
||||
{
|
||||
@@ -386,7 +384,7 @@ static int amd_iommu_assign_device(struc
|
||||
@@ -391,7 +389,7 @@ static int amd_iommu_assign_device(struc
|
||||
ivrs_mappings[req_id].read_permission);
|
||||
}
|
||||
|
||||
@ -92,7 +94,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
|
||||
static void deallocate_next_page_table(struct page_info* pg, int level)
|
||||
@@ -451,12 +449,6 @@ static void amd_iommu_domain_destroy(str
|
||||
@@ -456,12 +454,6 @@ static void amd_iommu_domain_destroy(str
|
||||
amd_iommu_flush_all_pages(d);
|
||||
}
|
||||
|
||||
@ -105,7 +107,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
static int amd_iommu_add_device(struct pci_dev *pdev)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
@@ -596,7 +588,7 @@ const struct iommu_ops amd_iommu_ops = {
|
||||
@@ -601,7 +593,7 @@ const struct iommu_ops amd_iommu_ops = {
|
||||
.teardown = amd_iommu_domain_destroy,
|
||||
.map_page = amd_iommu_map_page,
|
||||
.unmap_page = amd_iommu_unmap_page,
|
||||
@ -114,8 +116,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
.get_device_group_id = amd_iommu_group_id,
|
||||
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
|
||||
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
@@ -232,11 +232,16 @@ static int assign_device(struct domain *
|
||||
return -EXDEV;
|
||||
|
||||
@ -158,8 +162,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
if ( ret )
|
||||
{
|
||||
dprintk(XENLOG_ERR VTDPREFIX,
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -1689,17 +1689,10 @@ out:
|
||||
static int reassign_device_ownership(
|
||||
struct domain *source,
|
||||
@ -203,7 +209,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2207,36 +2203,26 @@ int __init intel_vtd_setup(void)
|
||||
@@ -2222,36 +2218,26 @@ int __init intel_vtd_setup(void)
|
||||
}
|
||||
|
||||
static int intel_iommu_assign_device(
|
||||
@ -247,8 +253,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
{
|
||||
ret = 0;
|
||||
goto done;
|
||||
--- a/xen/include/xen/iommu.h
|
||||
+++ b/xen/include/xen/iommu.h
|
||||
Index: xen-4.2.2-testing/xen/include/xen/iommu.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/iommu.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/iommu.h
|
||||
@@ -123,13 +123,13 @@ struct iommu_ops {
|
||||
int (*add_device)(struct pci_dev *pdev);
|
||||
int (*enable_device)(struct pci_dev *pdev);
|
||||
|
@ -12,8 +12,10 @@ IOMMU: adjust add/remove operation parameters
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
@@ -83,14 +83,14 @@ static void disable_translation(u32 *dte
|
||||
}
|
||||
|
||||
@ -96,7 +98,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
|
||||
int __init amd_iov_detect(void)
|
||||
@@ -291,16 +290,16 @@ static void __init amd_iommu_dom0_init(s
|
||||
@@ -296,16 +295,16 @@ static void __init amd_iommu_dom0_init(s
|
||||
}
|
||||
|
||||
void amd_iommu_disable_domain_device(struct domain *domain,
|
||||
@ -117,7 +119,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
@@ -308,7 +307,7 @@ void amd_iommu_disable_domain_device(str
|
||||
@@ -313,7 +312,7 @@ void amd_iommu_disable_domain_device(str
|
||||
{
|
||||
disable_translation((u32 *)dte);
|
||||
|
||||
@ -126,7 +128,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
|
||||
iommu_dte_set_iotlb((u32 *)dte, 0);
|
||||
|
||||
@@ -323,7 +322,8 @@ void amd_iommu_disable_domain_device(str
|
||||
@@ -328,7 +327,8 @@ void amd_iommu_disable_domain_device(str
|
||||
|
||||
ASSERT(spin_is_locked(&pcidevs_lock));
|
||||
|
||||
@ -136,7 +138,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
pci_ats_enabled(iommu->seg, bus, devfn) )
|
||||
disable_ats_device(iommu->seg, bus, devfn);
|
||||
}
|
||||
@@ -346,7 +346,7 @@ static int reassign_device(struct domain
|
||||
@@ -351,7 +351,7 @@ static int reassign_device(struct domain
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@ -145,7 +147,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
if ( devfn == pdev->devfn )
|
||||
{
|
||||
@@ -359,7 +359,7 @@ static int reassign_device(struct domain
|
||||
@@ -364,7 +364,7 @@ static int reassign_device(struct domain
|
||||
if ( t->root_table == NULL )
|
||||
allocate_domain_resources(t);
|
||||
|
||||
@ -154,7 +156,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
|
||||
pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
|
||||
source->domain_id, target->domain_id);
|
||||
@@ -449,7 +449,7 @@ static void amd_iommu_domain_destroy(str
|
||||
@@ -454,7 +454,7 @@ static void amd_iommu_domain_destroy(str
|
||||
amd_iommu_flush_all_pages(d);
|
||||
}
|
||||
|
||||
@ -163,7 +165,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
u16 bdf;
|
||||
@@ -462,16 +462,16 @@ static int amd_iommu_add_device(struct p
|
||||
@@ -467,16 +467,16 @@ static int amd_iommu_add_device(struct p
|
||||
{
|
||||
AMD_IOMMU_DEBUG("Fail to find iommu."
|
||||
" %04x:%02x:%02x.%u cannot be assigned to dom%d\n",
|
||||
@ -184,7 +186,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
u16 bdf;
|
||||
@@ -484,12 +484,12 @@ static int amd_iommu_remove_device(struc
|
||||
@@ -489,12 +489,12 @@ static int amd_iommu_remove_device(struc
|
||||
{
|
||||
AMD_IOMMU_DEBUG("Fail to find iommu."
|
||||
" %04x:%02x:%02x.%u cannot be removed from dom%d\n",
|
||||
@ -200,8 +202,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
return 0;
|
||||
}
|
||||
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
@@ -167,7 +167,7 @@ int iommu_add_device(struct pci_dev *pde
|
||||
if ( !iommu_enabled || !hd->platform_ops )
|
||||
return 0;
|
||||
@ -220,8 +224,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/xen/drivers/passthrough/pci.c
|
||||
+++ b/xen/drivers/passthrough/pci.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/pci.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
@@ -715,7 +715,7 @@ int __init scan_pci_devices(void)
|
||||
|
||||
struct setup_dom0 {
|
||||
@ -249,8 +255,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
{
|
||||
struct setup_dom0 ctxt = { .d = d, .handler = handler };
|
||||
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -52,7 +52,7 @@ int nr_iommus;
|
||||
|
||||
static struct tasklet vtd_fault_tasklet;
|
||||
@ -260,7 +268,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
static void setup_dom0_rmrr(struct domain *d);
|
||||
|
||||
static int domain_iommu_domid(struct domain *d,
|
||||
@@ -1904,7 +1904,7 @@ static int rmrr_identity_mapping(struct
|
||||
@@ -1904,7 +1904,7 @@ static int rmrr_identity_mapping(struct
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -326,8 +334,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
|
||||
void clear_fault_bits(struct iommu *iommu)
|
||||
--- a/xen/include/xen/iommu.h
|
||||
+++ b/xen/include/xen/iommu.h
|
||||
Index: xen-4.2.2-testing/xen/include/xen/iommu.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/iommu.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/iommu.h
|
||||
@@ -120,9 +120,9 @@ bool_t pt_irq_need_timer(uint32_t flags)
|
||||
struct iommu_ops {
|
||||
int (*init)(struct domain *d);
|
||||
@ -340,9 +350,11 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *);
|
||||
void (*teardown)(struct domain *d);
|
||||
int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
|
||||
--- a/xen/include/xen/pci.h
|
||||
+++ b/xen/include/xen/pci.h
|
||||
@@ -100,7 +100,8 @@ struct pci_dev *pci_lock_pdev(int seg, i
|
||||
Index: xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/pci.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
@@ -101,7 +101,8 @@ struct pci_dev *pci_lock_pdev(int seg, i
|
||||
struct pci_dev *pci_lock_domain_pdev(
|
||||
struct domain *, int seg, int bus, int devfn);
|
||||
|
||||
|
@ -12,8 +12,10 @@ VT-d: adjust context map/unmap parameters
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/vtd/extern.h
|
||||
+++ b/xen/drivers/passthrough/vtd/extern.h
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/extern.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/extern.h
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/extern.h
|
||||
@@ -95,7 +95,7 @@ void free_pgtable_maddr(u64 maddr);
|
||||
void *map_vtd_domain_page(u64 maddr);
|
||||
void unmap_vtd_domain_page(void *va);
|
||||
@ -23,8 +25,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
|
||||
u8 bus, u8 devfn);
|
||||
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -1308,7 +1308,7 @@ static void __init intel_iommu_dom0_init
|
||||
int domain_context_mapping_one(
|
||||
struct domain *domain,
|
||||
@ -157,7 +161,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
if ( ret )
|
||||
{
|
||||
dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n",
|
||||
@@ -1975,14 +1973,14 @@ static int intel_iommu_remove_device(u8
|
||||
@@ -1975,14 +1973,14 @@ static int intel_iommu_remove_device(u8
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,9 +178,11 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
if ( !err && devfn == pdev->devfn )
|
||||
pci_vtd_quirk(pdev);
|
||||
return err;
|
||||
--- a/xen/drivers/passthrough/vtd/quirks.c
|
||||
+++ b/xen/drivers/passthrough/vtd/quirks.c
|
||||
@@ -292,7 +292,7 @@ static void map_me_phantom_function(stru
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/quirks.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/quirks.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/quirks.c
|
||||
@@ -319,7 +319,7 @@ static void map_me_phantom_function(stru
|
||||
/* map or unmap ME phantom function */
|
||||
if ( map )
|
||||
domain_context_mapping_one(domain, drhd->iommu, 0,
|
||||
|
@ -18,8 +18,10 @@ how to deal with such a device, and hence shouldn't try to).
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/pci.c
|
||||
+++ b/xen/drivers/passthrough/pci.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/pci.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
@@ -144,7 +144,7 @@ static struct pci_dev *alloc_pdev(struct
|
||||
spin_lock_init(&pdev->msix_table_lock);
|
||||
|
||||
@ -83,8 +85,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
|
||||
return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
|
||||
--- a/xen/drivers/passthrough/vtd/intremap.c
|
||||
+++ b/xen/drivers/passthrough/vtd/intremap.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/intremap.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/intremap.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/intremap.c
|
||||
@@ -426,7 +426,6 @@ void io_apic_write_remap_rte(
|
||||
|
||||
static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
|
||||
@ -112,8 +116,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
|
||||
break;
|
||||
}
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -1450,7 +1450,6 @@ static int domain_context_mapping(
|
||||
{
|
||||
struct acpi_drhd_unit *drhd;
|
||||
@ -168,9 +174,11 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
--- a/xen/include/xen/pci.h
|
||||
+++ b/xen/include/xen/pci.h
|
||||
@@ -62,6 +62,17 @@ struct pci_dev {
|
||||
Index: xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/pci.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
@@ -63,6 +63,17 @@ struct pci_dev {
|
||||
const u16 seg;
|
||||
const u8 bus;
|
||||
const u8 devfn;
|
||||
@ -188,7 +196,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
struct pci_dev_info info;
|
||||
struct arch_pci_dev arch;
|
||||
struct {
|
||||
@@ -83,18 +94,10 @@ struct pci_dev {
|
||||
@@ -84,18 +95,10 @@ struct pci_dev {
|
||||
|
||||
extern spinlock_t pcidevs_lock;
|
||||
|
||||
@ -208,8 +216,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
|
||||
struct pci_dev *pci_lock_pdev(int seg, int bus, int devfn);
|
||||
struct pci_dev *pci_lock_domain_pdev(
|
||||
--- a/xen/include/xen/pci_regs.h
|
||||
+++ b/xen/include/xen/pci_regs.h
|
||||
Index: xen-4.2.2-testing/xen/include/xen/pci_regs.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/pci_regs.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/pci_regs.h
|
||||
@@ -371,6 +371,9 @@
|
||||
#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
|
||||
#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
|
||||
|
@ -18,8 +18,10 @@ function number, would return the underlying actual device.
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_cmd.c
|
||||
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
|
||||
return;
|
||||
|
||||
@ -37,8 +39,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
|
||||
/* Flush iommu cache after p2m changes. */
|
||||
--- a/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_init.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_init.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_init.c
|
||||
@@ -692,7 +692,7 @@ void parse_ppr_log_entry(struct amd_iomm
|
||||
devfn = PCI_DEVFN2(device_id);
|
||||
|
||||
@ -48,8 +52,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
spin_unlock(&pcidevs_lock);
|
||||
|
||||
if ( pdev )
|
||||
--- a/xen/drivers/passthrough/amd/iommu_map.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_map.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_map.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/amd/iommu_map.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/amd/iommu_map.c
|
||||
@@ -612,7 +612,6 @@ static int update_paging_mode(struct dom
|
||||
for_each_pdev( d, pdev )
|
||||
{
|
||||
@ -64,25 +70,23 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
- device_entry = iommu->dev_table.buffer +
|
||||
- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
|
||||
-
|
||||
+ do {
|
||||
+ req_id = get_dma_requestor_id(pdev->seg, bdf);
|
||||
+ device_entry = iommu->dev_table.buffer +
|
||||
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
|
||||
|
||||
- /* valid = 0 only works for dom0 passthrough mode */
|
||||
- amd_iommu_set_root_page_table((u32 *)device_entry,
|
||||
- page_to_maddr(hd->root_table),
|
||||
- hd->domain_id,
|
||||
- hd->paging_mode, 1);
|
||||
-
|
||||
- amd_iommu_flush_device(iommu, req_id);
|
||||
+ do {
|
||||
+ req_id = get_dma_requestor_id(pdev->seg, bdf);
|
||||
+ device_entry = iommu->dev_table.buffer +
|
||||
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
|
||||
+
|
||||
+ /* valid = 0 only works for dom0 passthrough mode */
|
||||
+ amd_iommu_set_root_page_table((u32 *)device_entry,
|
||||
+ page_to_maddr(hd->root_table),
|
||||
+ hd->domain_id,
|
||||
+ hd->paging_mode, 1);
|
||||
+
|
||||
|
||||
- amd_iommu_flush_device(iommu, req_id);
|
||||
+ amd_iommu_flush_device(iommu, req_id);
|
||||
+ bdf += pdev->phantom_stride;
|
||||
+ } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
|
||||
@ -90,8 +94,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
}
|
||||
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/iommu.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/iommu.c
|
||||
@@ -157,6 +157,8 @@ void __init iommu_dom0_init(struct domai
|
||||
int iommu_add_device(struct pci_dev *pdev)
|
||||
{
|
||||
@ -196,8 +202,10 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
|
||||
if ( ret )
|
||||
{
|
||||
--- a/xen/drivers/passthrough/pci.c
|
||||
+++ b/xen/drivers/passthrough/pci.c
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/pci.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
@@ -146,6 +146,8 @@ static struct pci_dev *alloc_pdev(struct
|
||||
/* update bus2bridge */
|
||||
switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
|
||||
@ -332,9 +340,11 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
}
|
||||
}
|
||||
|
||||
--- a/xen/include/xen/lib.h
|
||||
+++ b/xen/include/xen/lib.h
|
||||
@@ -58,6 +58,9 @@ do {
|
||||
Index: xen-4.2.2-testing/xen/include/xen/lib.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/lib.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/lib.h
|
||||
@@ -58,6 +58,9 @@ do {
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))
|
||||
|
||||
@ -344,9 +354,11 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
#define reserve_bootmem(_p,_l) ((void)0)
|
||||
|
||||
struct domain;
|
||||
--- a/xen/include/xen/pci.h
|
||||
+++ b/xen/include/xen/pci.h
|
||||
@@ -63,6 +63,8 @@ struct pci_dev {
|
||||
Index: xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/pci.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/pci.h
|
||||
@@ -64,6 +64,8 @@ struct pci_dev {
|
||||
const u8 bus;
|
||||
const u8 devfn;
|
||||
|
||||
@ -355,7 +367,7 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
enum pdev_type {
|
||||
DEV_TYPE_PCI_UNKNOWN,
|
||||
DEV_TYPE_PCIe_ENDPOINT,
|
||||
@@ -113,6 +115,7 @@ int pci_remove_device(u16 seg, u8 bus, u
|
||||
@@ -114,6 +116,7 @@ int pci_remove_device(u16 seg, u8 bus, u
|
||||
int pci_ro_device(int seg, int bus, int devfn);
|
||||
void arch_pci_ro_device(int seg, int bdf);
|
||||
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
|
||||
|
@ -14,9 +14,11 @@ single function devices.
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/docs/misc/xen-command-line.markdown
|
||||
+++ b/docs/misc/xen-command-line.markdown
|
||||
@@ -672,6 +672,16 @@ Defaults to booting secondary processors
|
||||
Index: xen-4.2.2-testing/docs/misc/xen-command-line.markdown
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/docs/misc/xen-command-line.markdown
|
||||
+++ xen-4.2.2-testing/docs/misc/xen-command-line.markdown
|
||||
@@ -679,6 +679,16 @@ Defaults to booting secondary processors
|
||||
|
||||
Default: `on`
|
||||
|
||||
@ -33,9 +35,11 @@ Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
### ple\_gap
|
||||
> `= <integer>`
|
||||
|
||||
--- a/xen/drivers/passthrough/pci.c
|
||||
+++ b/xen/drivers/passthrough/pci.c
|
||||
@@ -123,6 +123,49 @@ const unsigned long *pci_get_ro_map(u16
|
||||
Index: xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/drivers/passthrough/pci.c
|
||||
+++ xen-4.2.2-testing/xen/drivers/passthrough/pci.c
|
||||
@@ -123,6 +123,49 @@ const unsigned long *pci_get_ro_map(u16
|
||||
return pseg ? pseg->ro_map : NULL;
|
||||
}
|
||||
|
||||
|
@ -1,30 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1357561709 -3600
|
||||
# Node ID 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
|
||||
# Parent 23c4bbc0111dd807561b2c62cbc5798220943a0d
|
||||
x86: compat_show_guest_stack() should not truncate MFN
|
||||
|
||||
Re-using "addr" here was a mistake, as it is a 32-bit quantity.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/x86_64/compat/traps.c
|
||||
+++ b/xen/arch/x86/x86_64/compat/traps.c
|
||||
@@ -20,11 +20,12 @@ void compat_show_guest_stack(struct vcpu
|
||||
if ( v != current )
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
+ unsigned long mfn;
|
||||
|
||||
ASSERT(guest_kernel_mode(v, regs));
|
||||
- addr = read_cr3() >> PAGE_SHIFT;
|
||||
+ mfn = read_cr3() >> PAGE_SHIFT;
|
||||
for_each_vcpu( v->domain, vcpu )
|
||||
- if ( pagetable_get_pfn(vcpu->arch.guest_table) == addr )
|
||||
+ if ( pagetable_get_pfn(vcpu->arch.guest_table) == mfn )
|
||||
break;
|
||||
if ( !vcpu )
|
||||
{
|
@ -1,30 +0,0 @@
|
||||
References: CVE-2013-0154 XSA-37 bnc#797031
|
||||
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1357564826 -3600
|
||||
# Node ID e1facbde56ff4e5e85f9a4935abc99eb24367cd0
|
||||
# Parent 8e942f2f3b45edc5bb1f7a6e05de288342426f0d
|
||||
x86: fix assertion in get_page_type()
|
||||
|
||||
c/s 22998:e9fab50d7b61 (and immediately following ones) made it
|
||||
possible that __get_page_type() returns other than -EINVAL, in
|
||||
particular -EBUSY. Consequently, the assertion in get_page_type()
|
||||
should check for only the return values we absolutely don't expect to
|
||||
see there.
|
||||
|
||||
This is XSA-37 / CVE-2013-0154.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -2603,7 +2603,7 @@ int get_page_type(struct page_info *page
|
||||
int rc = __get_page_type(page, type, 0);
|
||||
if ( likely(rc == 0) )
|
||||
return 1;
|
||||
- ASSERT(rc == -EINVAL);
|
||||
+ ASSERT(rc != -EINTR && rc != -EAGAIN);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,27 +0,0 @@
|
||||
References: CVE-2012-5634 XSA-33 bnc#794316
|
||||
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1357748006 -3600
|
||||
# Node ID 19fd1237ff0dfa3d97a896d6ed6fbbd33f816a9f
|
||||
# Parent 56b0d5476c11bfd09986080dfa97923586ef474f
|
||||
VT-d: fix interrupt remapping source validation for devices behind legacy bridges
|
||||
|
||||
Using SVT_VERIFY_BUS here doesn't make sense; native Linux also
|
||||
uses SVT_VERIFY_SID_SQ here instead.
|
||||
|
||||
This is XSA-33 / CVE-2012-5634.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/vtd/intremap.c
|
||||
+++ b/xen/drivers/passthrough/vtd/intremap.c
|
||||
@@ -469,7 +469,7 @@ static void set_msi_source_id(struct pci
|
||||
set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
|
||||
(bus << 8) | pdev->bus);
|
||||
else if ( pdev_type(seg, bus, devfn) == DEV_TYPE_LEGACY_PCI_BRIDGE )
|
||||
- set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
|
||||
+ set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
|
||||
PCI_BDF2(bus, devfn));
|
||||
}
|
||||
break;
|
@ -13,11 +13,11 @@ Date: Fri Jan 11 12:22:26 2013 +0000
|
||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
Committed-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
Index: xen-4.2.2-testing/tools/libxl/libxl.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
@@ -1727,6 +1727,26 @@ out:
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/libxl.c
|
||||
+++ xen-4.2.2-testing/tools/libxl/libxl.c
|
||||
@@ -1710,6 +1710,26 @@ out:
|
||||
return;
|
||||
}
|
||||
|
||||
@ -44,7 +44,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
/******************************************************************************/
|
||||
|
||||
int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk)
|
||||
@@ -2563,8 +2583,7 @@ void libxl__device_nic_add(libxl__egc *e
|
||||
@@ -2549,8 +2569,7 @@ void libxl__device_nic_add(libxl__egc *e
|
||||
flexarray_t *front;
|
||||
flexarray_t *back;
|
||||
libxl__device *device;
|
||||
@ -54,7 +54,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
|
||||
rc = libxl__device_nic_setdefault(gc, nic, domid);
|
||||
if (rc) goto out;
|
||||
@@ -2581,16 +2600,10 @@ void libxl__device_nic_add(libxl__egc *e
|
||||
@@ -2567,17 +2586,10 @@ void libxl__device_nic_add(libxl__egc *e
|
||||
}
|
||||
|
||||
if (nic->devid == -1) {
|
||||
@ -64,7 +64,8 @@ Index: xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
goto out_free;
|
||||
}
|
||||
- if (!(l = libxl__xs_directory(gc, XBT_NULL,
|
||||
- libxl__sprintf(gc, "%s/device/vif", dompath), &nb))) {
|
||||
- libxl__sprintf(gc, "%s/device/vif", dompath), &nb)) ||
|
||||
- nb == 0) {
|
||||
- nic->devid = 0;
|
||||
- } else {
|
||||
- nic->devid = strtoul(l[nb - 1], NULL, 10) + 1;
|
||||
@ -72,7 +73,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
}
|
||||
|
||||
GCNEW(device);
|
||||
@@ -2977,6 +2990,13 @@ int libxl__device_vkb_add(libxl__gc *gc,
|
||||
@@ -2964,6 +2976,13 @@ int libxl__device_vkb_add(libxl__gc *gc,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
@ -86,7 +87,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl.c
|
||||
rc = libxl__device_from_vkb(gc, domid, vkb, &device);
|
||||
if (rc != 0) goto out_free;
|
||||
|
||||
@@ -3078,6 +3098,13 @@ int libxl__device_vfb_add(libxl__gc *gc,
|
||||
@@ -3065,6 +3084,13 @@ int libxl__device_vfb_add(libxl__gc *gc,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
|
@ -1,46 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1358341015 -3600
|
||||
# Node ID b4cbb83f9a1f57b4f26f2d35998cda42b904ea69
|
||||
# Parent 327b812026fe62a990f1d81041729c42196983ca
|
||||
x86: consistently mask floating point exceptions
|
||||
|
||||
c/s 23142:f5e8d152a565 resulted in v->arch.fpu_ctxt to point into the
|
||||
save area allocated for xsave/xrstor (when they're available). The way
|
||||
vcpu_restore_fpu_lazy() works (using fpu_init() for an uninitialized
|
||||
vCPU only when there's no xsave support) causes this to load whatever
|
||||
arch_set_info_guest() put there, irrespective of whether the i387 state
|
||||
was specified to be valid in the respective input structure.
|
||||
|
||||
Consequently, with a cleared (al zeroes) incoming FPU context, and with
|
||||
xsave available, one gets all exceptions unmasked (as opposed to to the
|
||||
legacy case, where FINIT and LDMXCSR get used, masking all exceptions).
|
||||
This causes e.g. para-virtualized NetWare to crash.
|
||||
|
||||
The behavior of arch_set_info_guest() is thus being made more hardware-
|
||||
like for the FPU portion of it: Considering it to be similar to INIT,
|
||||
it will leave untouched all floating point state now. An alternative
|
||||
would be to make the behavior RESET-like, forcing all state to known
|
||||
values, albeit - taking into account legacy behavior - not to precisely
|
||||
the values RESET would enforce (which masks only SSE exceptions, but
|
||||
not x87 ones); that would come closest to mimicing FINIT behavior in
|
||||
the xsave case. Another option would be to continue copying whatever
|
||||
was provided, but override (at least) FCW and MXCSR if VGCF_I387_VALID
|
||||
isn't set.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/domain.c
|
||||
+++ b/xen/arch/x86/domain.c
|
||||
@@ -819,7 +819,9 @@ int arch_set_info_guest(
|
||||
|
||||
v->arch.vgc_flags = flags;
|
||||
|
||||
- memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
|
||||
+ if ( flags & VGCF_I387_VALID )
|
||||
+ memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
|
||||
+
|
||||
if ( !compat )
|
||||
{
|
||||
memcpy(&v->arch.user_regs, &c.nat->user_regs, sizeof(c.nat->user_regs));
|
@ -14,9 +14,11 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/traps.c
|
||||
+++ b/xen/arch/x86/traps.c
|
||||
@@ -3357,10 +3357,10 @@ void do_nmi(struct cpu_user_regs *regs)
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/traps.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/traps.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/traps.c
|
||||
@@ -3369,10 +3369,10 @@ void do_nmi(struct cpu_user_regs *regs)
|
||||
reason = inb(0x61);
|
||||
if ( reason & 0x80 )
|
||||
pci_serr_error(regs);
|
||||
|
@ -1,51 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
# Date 1358508058 -3600
|
||||
# Node ID 8f6dd5dc5d6cdd56050ed917a0c30903bbddcbf0
|
||||
# Parent eb8e9a23925d7b77c344a4a99679a45f96754a17
|
||||
x86/AMD: Enable WC+ memory type on family 10 processors
|
||||
|
||||
In some cases BIOS may not enable WC+ memory type on family 10 processors,
|
||||
instead converting what would be WC+ memory to CD type. On guests using
|
||||
nested pages this could result in performance degradation. This patch
|
||||
enables WC+.
|
||||
|
||||
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/cpu/amd.c
|
||||
+++ b/xen/arch/x86/cpu/amd.c
|
||||
@@ -534,6 +534,19 @@ static void __devinit init_amd(struct cp
|
||||
}
|
||||
#endif
|
||||
|
||||
+ if (c->x86 == 0x10) {
|
||||
+ /*
|
||||
+ * On family 10h BIOS may not have properly enabled WC+
|
||||
+ * support, causing it to be converted to CD memtype. This may
|
||||
+ * result in performance degradation for certain nested-paging
|
||||
+ * guests. Prevent this conversion by clearing bit 24 in
|
||||
+ * MSR_F10_BU_CFG2.
|
||||
+ */
|
||||
+ rdmsrl(MSR_F10_BU_CFG2, value);
|
||||
+ value &= ~(1ULL << 24);
|
||||
+ wrmsrl(MSR_F10_BU_CFG2, value);
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Family 0x12 and above processors have APIC timer
|
||||
* running in deep C states.
|
||||
--- a/xen/include/asm-x86/msr-index.h
|
||||
+++ b/xen/include/asm-x86/msr-index.h
|
||||
@@ -215,8 +215,9 @@
|
||||
#define MSR_F10_MC4_MISC2 0xc0000409
|
||||
#define MSR_F10_MC4_MISC3 0xc000040A
|
||||
|
||||
-/* AMD Family10h MMU control MSRs */
|
||||
-#define MSR_F10_BU_CFG 0xc0011023
|
||||
+/* AMD Family10h Bus Unit MSRs */
|
||||
+#define MSR_F10_BU_CFG 0xc0011023
|
||||
+#define MSR_F10_BU_CFG2 0xc001102a
|
||||
|
||||
/* Other AMD Fam10h MSRs */
|
||||
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
|
@ -1,38 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Tim Deegan <tim@xen.org>
|
||||
# Date 1358508717 -3600
|
||||
# Node ID 9e8c39bdc1fedd5dfc5aa7209cc5f77f813476c7
|
||||
# Parent 8f6dd5dc5d6cdd56050ed917a0c30903bbddcbf0
|
||||
x86/hvm: fix RTC setting.
|
||||
|
||||
When the guest writes one field of the RTC time, we must bring all the
|
||||
other fields up to date for the current second before calculating the
|
||||
new RTC time.
|
||||
|
||||
Signed-off-by: Tim Deegan <tim@xen.org>
|
||||
Tested-by: Phil Evans <Phil.Evans@m247.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/rtc.c
|
||||
+++ b/xen/arch/x86/hvm/rtc.c
|
||||
@@ -399,10 +399,17 @@ static int rtc_ioport_write(void *opaque
|
||||
case RTC_DAY_OF_MONTH:
|
||||
case RTC_MONTH:
|
||||
case RTC_YEAR:
|
||||
- s->hw.cmos_data[s->hw.cmos_index] = data;
|
||||
- /* if in set mode, do not update the time */
|
||||
- if ( !(s->hw.cmos_data[RTC_REG_B] & RTC_SET) )
|
||||
+ /* if in set mode, just write the register */
|
||||
+ if ( (s->hw.cmos_data[RTC_REG_B] & RTC_SET) )
|
||||
+ s->hw.cmos_data[s->hw.cmos_index] = data;
|
||||
+ else
|
||||
+ {
|
||||
+ /* Fetch the current time and update just this field. */
|
||||
+ s->current_tm = gmtime(get_localtime(d));
|
||||
+ rtc_copy_date(s);
|
||||
+ s->hw.cmos_data[s->hw.cmos_index] = data;
|
||||
rtc_set_time(s);
|
||||
+ }
|
||||
alarm_timer_update(s);
|
||||
break;
|
||||
case RTC_REG_A:
|
@ -1,72 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1358843590 -3600
|
||||
# Node ID 5af4f2ab06f33ce441fa550333a9049c09a9ef28
|
||||
# Parent 4b476378fc35e776196c29dc0e24b71529393a4c
|
||||
x86: restore (optional) forwarding of PCI SERR induced NMI to Dom0
|
||||
|
||||
c/s 22949:54fe1011f86b removed the forwarding of NMIs to Dom0 when they
|
||||
were caused by PCI SERR. NMI buttons as well as BMCs (like HP's iLO)
|
||||
may however want such events to be seen in Dom0 (e.g. to trigger a
|
||||
dump).
|
||||
|
||||
Therefore restore most of the functionality which named c/s removed
|
||||
(adjusted for subsequent changes, and adjusting the public interface to
|
||||
use the modern term, retaining the old one for backwards
|
||||
compatibility).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/traps.c
|
||||
+++ b/xen/arch/x86/traps.c
|
||||
@@ -3201,6 +3201,7 @@ static void nmi_mce_softirq(void)
|
||||
static void pci_serr_softirq(void)
|
||||
{
|
||||
printk("\n\nNMI - PCI system error (SERR)\n");
|
||||
+ outb(inb(0x61) & 0x0b, 0x61); /* re-enable the PCI SERR error line. */
|
||||
}
|
||||
|
||||
void async_exception_cleanup(struct vcpu *curr)
|
||||
@@ -3291,9 +3292,20 @@ static void pci_serr_error(struct cpu_us
|
||||
{
|
||||
outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable the PCI SERR error line. */
|
||||
|
||||
- /* Would like to print a diagnostic here but can't call printk()
|
||||
- from NMI context -- raise a softirq instead. */
|
||||
- raise_softirq(PCI_SERR_SOFTIRQ);
|
||||
+ switch ( opt_nmi[0] )
|
||||
+ {
|
||||
+ case 'd': /* 'dom0' */
|
||||
+ nmi_dom0_report(_XEN_NMIREASON_pci_serr);
|
||||
+ case 'i': /* 'ignore' */
|
||||
+ /* Would like to print a diagnostic here but can't call printk()
|
||||
+ from NMI context -- raise a softirq instead. */
|
||||
+ raise_softirq(PCI_SERR_SOFTIRQ);
|
||||
+ break;
|
||||
+ default: /* 'fatal' */
|
||||
+ console_force_unlock();
|
||||
+ printk("\n\nNMI - PCI system error (SERR)\n");
|
||||
+ fatal_trap(TRAP_nmi, regs);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void io_check_error(struct cpu_user_regs *regs)
|
||||
--- a/xen/include/public/nmi.h
|
||||
+++ b/xen/include/public/nmi.h
|
||||
@@ -36,9 +36,14 @@
|
||||
/* I/O-check error reported via ISA port 0x61, bit 6. */
|
||||
#define _XEN_NMIREASON_io_error 0
|
||||
#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error)
|
||||
+ /* PCI SERR reported via ISA port 0x61, bit 7. */
|
||||
+#define _XEN_NMIREASON_pci_serr 1
|
||||
+#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr)
|
||||
+#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */
|
||||
/* Parity error reported via ISA port 0x61, bit 7. */
|
||||
#define _XEN_NMIREASON_parity_error 1
|
||||
#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error)
|
||||
+#endif
|
||||
/* Unknown hardware-generated NMI. */
|
||||
#define _XEN_NMIREASON_unknown 2
|
||||
#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown)
|
@ -1,149 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Tomasz Wroblewski <tomasz.wroblewski@citrix.com>
|
||||
# Date 1358933464 -3600
|
||||
# Node ID 9efe4c0bf9c8d3ecf03868c69c24dad3218523a4
|
||||
# Parent 7c6ecf2c1831a1c7f63a96f119a8891891463e54
|
||||
fix acpi_dmar_zap/reinstate() (fixes S3 regression)
|
||||
|
||||
Fix S3 regression introduced by cs 23013:65d26504e843 (ACPI: large
|
||||
cleanup). The dmar virtual pointer returned from acpi_get_table cannot
|
||||
be safely stored away and used later, as the underlying
|
||||
acpi_os_map_memory / __acpi_map_table functions overwrite the mapping
|
||||
causing it to point to different tables than dmar (last fetched table is
|
||||
used). This subsequently causes acpi_dmar_reinstate() and
|
||||
acpi_dmar_zap() to write data to wrong table, causing its corruption and
|
||||
problems with consecutive s3 resumes.
|
||||
|
||||
Added a new function to fetch ACPI table physical address, and
|
||||
establishing separate static mapping for dmar_table pointer instead of
|
||||
using acpi_get_table().
|
||||
|
||||
Signed-off-by: Tomasz Wroblewski <tomasz.wroblewski@citrix.com>
|
||||
|
||||
Added call to acpi_tb_verify_table(). Fixed page count passed to
|
||||
map_pages_to_xen(). Cosmetic changes.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/acpi/tables/tbxface.c
|
||||
+++ b/xen/drivers/acpi/tables/tbxface.c
|
||||
@@ -205,3 +205,51 @@ acpi_get_table(char *signature,
|
||||
|
||||
return (AE_NOT_FOUND);
|
||||
}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ *
|
||||
+ * FUNCTION: acpi_get_table_phys
|
||||
+ *
|
||||
+ * PARAMETERS: signature - ACPI signature of needed table
|
||||
+ * instance - Which instance (for SSDTs)
|
||||
+ * addr - Where the table's physical address is returned
|
||||
+ * len - Where the length of table is returned
|
||||
+ *
|
||||
+ * RETURN: Status, pointer and length of table
|
||||
+ *
|
||||
+ * DESCRIPTION: Finds physical address and length of ACPI table
|
||||
+ *
|
||||
+ *****************************************************************************/
|
||||
+acpi_status __init
|
||||
+acpi_get_table_phys(acpi_string signature, acpi_native_uint instance,
|
||||
+ acpi_physical_address *addr, acpi_native_uint *len)
|
||||
+{
|
||||
+ acpi_native_uint i, j;
|
||||
+ acpi_status status;
|
||||
+
|
||||
+ if (!signature || !addr || !len)
|
||||
+ return AE_BAD_PARAMETER;
|
||||
+
|
||||
+ for (i = j = 0; i < acpi_gbl_root_table_list.count; i++) {
|
||||
+ if (!ACPI_COMPARE_NAME(
|
||||
+ &acpi_gbl_root_table_list.tables[i].signature,
|
||||
+ signature))
|
||||
+ continue;
|
||||
+
|
||||
+ if (++j < instance)
|
||||
+ continue;
|
||||
+
|
||||
+ status =
|
||||
+ acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]);
|
||||
+ if (ACPI_SUCCESS(status)) {
|
||||
+ *addr = acpi_gbl_root_table_list.tables[i].address;
|
||||
+ *len = acpi_gbl_root_table_list.tables[i].length;
|
||||
+ }
|
||||
+
|
||||
+ acpi_gbl_root_table_list.tables[i].pointer = NULL;
|
||||
+
|
||||
+ return status;
|
||||
+ }
|
||||
+
|
||||
+ return AE_NOT_FOUND;
|
||||
+}
|
||||
--- a/xen/drivers/passthrough/vtd/dmar.c
|
||||
+++ b/xen/drivers/passthrough/vtd/dmar.c
|
||||
@@ -776,6 +776,7 @@ out:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
+#include <asm/fixmap.h>
|
||||
#include <asm/tboot.h>
|
||||
/* ACPI tables may not be DMA protected by tboot, so use DMAR copy */
|
||||
/* SINIT saved in SinitMleData in TXT heap (which is DMA protected) */
|
||||
@@ -786,7 +787,32 @@ out:
|
||||
|
||||
int __init acpi_dmar_init(void)
|
||||
{
|
||||
- acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_table);
|
||||
+ acpi_physical_address dmar_addr;
|
||||
+ acpi_native_uint dmar_len;
|
||||
+
|
||||
+ if ( ACPI_SUCCESS(acpi_get_table_phys(ACPI_SIG_DMAR, 0,
|
||||
+ &dmar_addr, &dmar_len)) )
|
||||
+ {
|
||||
+#ifdef CONFIG_X86_32
|
||||
+ if ( dmar_addr + dmar_len > (DIRECTMAP_MBYTES << 20) )
|
||||
+ {
|
||||
+ unsigned long offset = dmar_addr & (PAGE_SIZE - 1);
|
||||
+ unsigned long mapped_size = PAGE_SIZE - offset;
|
||||
+
|
||||
+ set_fixmap(FIX_DMAR_ZAP_LO, dmar_addr);
|
||||
+ if ( mapped_size < sizeof(*dmar_table) )
|
||||
+ set_fixmap(FIX_DMAR_ZAP_HI, dmar_addr + PAGE_SIZE);
|
||||
+ dmar_table = (void *)fix_to_virt(FIX_DMAR_ZAP_LO) + offset;
|
||||
+ goto exit;
|
||||
+ }
|
||||
+#endif
|
||||
+ map_pages_to_xen((unsigned long)__va(dmar_addr), PFN_DOWN(dmar_addr),
|
||||
+ PFN_UP(dmar_addr + dmar_len) - PFN_DOWN(dmar_addr),
|
||||
+ PAGE_HYPERVISOR);
|
||||
+ dmar_table = __va(dmar_addr);
|
||||
+ }
|
||||
+
|
||||
+ exit: __attribute__((__unused__))
|
||||
return parse_dmar_table(acpi_parse_dmar);
|
||||
}
|
||||
|
||||
--- a/xen/include/acpi/acpixf.h
|
||||
+++ b/xen/include/acpi/acpixf.h
|
||||
@@ -77,6 +77,9 @@ acpi_status
|
||||
acpi_get_table(acpi_string signature,
|
||||
acpi_native_uint instance, struct acpi_table_header **out_table);
|
||||
|
||||
+acpi_status
|
||||
+acpi_get_table_phys(acpi_string signature, acpi_native_uint instance,
|
||||
+ acpi_physical_address *addr, acpi_native_uint *len);
|
||||
/*
|
||||
* Namespace and name interfaces
|
||||
*/
|
||||
--- a/xen/include/asm-x86/fixmap.h
|
||||
+++ b/xen/include/asm-x86/fixmap.h
|
||||
@@ -50,6 +50,8 @@ enum fixed_addresses {
|
||||
FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
|
||||
#define FIX_VGC_END FIX_PAE_HIGHMEM_0
|
||||
#define FIX_VGC_BEGIN FIX_PAE_HIGHMEM_END
|
||||
+ FIX_DMAR_ZAP_HI,
|
||||
+ FIX_DMAR_ZAP_LO,
|
||||
#else
|
||||
FIX_VGC_END,
|
||||
FIX_VGC_BEGIN = FIX_VGC_END
|
@ -1,32 +0,0 @@
|
||||
References: CVE-2013-0152 XSA-35 bnc#797287
|
||||
|
||||
# HG changeset patch
|
||||
# User Ian Campbell <ian.campbell@citrix.com>
|
||||
# Date 1358938044 -3600
|
||||
# Node ID 621b1a889e9b120236698731e0b5ecc5b0cb1d82
|
||||
# Parent 9efe4c0bf9c8d3ecf03868c69c24dad3218523a4
|
||||
xen: Do not allow guests to enable nested HVM on themselves
|
||||
|
||||
There is no reason for this and doing so exposes a memory leak to
|
||||
guests. Only toolstacks need write access to this HVM param.
|
||||
|
||||
This is XSA-35 / CVE-2013-0152.
|
||||
|
||||
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
Acked-by: Jan Beulich <JBeulich@suse.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/hvm.c
|
||||
+++ b/xen/arch/x86/hvm/hvm.c
|
||||
@@ -3930,6 +3930,11 @@ long do_hvm_op(unsigned long op, XEN_GUE
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
case HVM_PARAM_NESTEDHVM:
|
||||
+ if ( !IS_PRIV(current->domain) )
|
||||
+ {
|
||||
+ rc = -EPERM;
|
||||
+ break;
|
||||
+ }
|
||||
#ifdef __i386__
|
||||
if ( a.value )
|
||||
rc = -EINVAL;
|
@ -1,343 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Ian Jackson <ian.jackson@eu.citrix.com>
|
||||
# Date 1359031672 0
|
||||
# Node ID a181bf3e77df891c97fc20dff4e9b90b7584022b
|
||||
# Parent 3e93c50982de4f2f7db99d92b04684556320541c
|
||||
libxl: fix stale fd event callback race
|
||||
|
||||
Because there is not necessarily any lock held at the point the
|
||||
application (eg, libvirt) calls libxl_osevent_occurred_timeout and
|
||||
..._fd, in a multithreaded program those calls may be arbitrarily
|
||||
delayed in relation to other activities within the program.
|
||||
|
||||
libxl therefore needs to be prepared to receive very old event
|
||||
callbacks. Arrange for this to be the case for fd callbacks.
|
||||
|
||||
This requires a new layer of indirection through a "hook nexus" struct
|
||||
which can outlive the libxl__ev_foo. Allocation and deallocation of
|
||||
these nexi is mostly handled in the OSEVENT macros which wrap up
|
||||
the application's callbacks.
|
||||
|
||||
Document the problem and the solution in a comment in libxl_event.c
|
||||
just before the definition of struct libxl__osevent_hook_nexus.
|
||||
|
||||
There is still a race relating to libxl__osevent_occurred_timeout;
|
||||
this will be addressed in the following patch.
|
||||
|
||||
Reported-by: Bamvor Jian Zhang <bjzhang@suse.com>
|
||||
Cc: Bamvor Jian Zhang <bjzhang@suse.com>
|
||||
Cc: Ian Campbell <Ian.Campbell@citrix.com>
|
||||
Tested-by: Jim Fehlig <jfehlig@suse.com>
|
||||
Acked-by: Jim Fehlig <jfehlig@suse.com>
|
||||
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
|
||||
Committed-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_event.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_event.c
|
||||
@@ -38,23 +38,131 @@
|
||||
* The application's registration hooks should be called ONLY via
|
||||
* these macros, with the ctx locked. Likewise all the "occurred"
|
||||
* entrypoints from the application should assert(!in_hook);
|
||||
+ *
|
||||
+ * During the hook call - including while the arguments are being
|
||||
+ * evaluated - ev->nexus is guaranteed to be valid and refer to the
|
||||
+ * nexus which is being used for this event registration. The
|
||||
+ * arguments should specify ev->nexus for the for_libxl argument and
|
||||
+ * ev->nexus->for_app_reg (or a pointer to it) for for_app_reg.
|
||||
*/
|
||||
-#define OSEVENT_HOOK_INTERN(retval, hookname, ...) do { \
|
||||
- if (CTX->osevent_hooks) { \
|
||||
- CTX->osevent_in_hook++; \
|
||||
- retval CTX->osevent_hooks->hookname(CTX->osevent_user, __VA_ARGS__); \
|
||||
- CTX->osevent_in_hook--; \
|
||||
- } \
|
||||
+#define OSEVENT_HOOK_INTERN(retval, failedp, evkind, hookop, nexusop, ...) do { \
|
||||
+ if (CTX->osevent_hooks) { \
|
||||
+ CTX->osevent_in_hook++; \
|
||||
+ libxl__osevent_hook_nexi *nexi = &CTX->hook_##evkind##_nexi_idle; \
|
||||
+ osevent_hook_pre_##nexusop(gc, ev, nexi, &ev->nexus); \
|
||||
+ retval CTX->osevent_hooks->evkind##_##hookop \
|
||||
+ (CTX->osevent_user, __VA_ARGS__); \
|
||||
+ if ((failedp)) \
|
||||
+ osevent_hook_failed_##nexusop(gc, ev, nexi, &ev->nexus); \
|
||||
+ CTX->osevent_in_hook--; \
|
||||
+ } \
|
||||
} while (0)
|
||||
|
||||
-#define OSEVENT_HOOK(hookname, ...) ({ \
|
||||
- int osevent_hook_rc = 0; \
|
||||
- OSEVENT_HOOK_INTERN(osevent_hook_rc = , hookname, __VA_ARGS__); \
|
||||
- osevent_hook_rc; \
|
||||
+#define OSEVENT_HOOK(evkind, hookop, nexusop, ...) ({ \
|
||||
+ int osevent_hook_rc = 0; \
|
||||
+ OSEVENT_HOOK_INTERN(osevent_hook_rc =, !!osevent_hook_rc, \
|
||||
+ evkind, hookop, nexusop, __VA_ARGS__); \
|
||||
+ osevent_hook_rc; \
|
||||
})
|
||||
|
||||
-#define OSEVENT_HOOK_VOID(hookname, ...) \
|
||||
- OSEVENT_HOOK_INTERN(/* void */, hookname, __VA_ARGS__)
|
||||
+#define OSEVENT_HOOK_VOID(evkind, hookop, nexusop, ...) \
|
||||
+ OSEVENT_HOOK_INTERN(/* void */, 0, evkind, hookop, nexusop, __VA_ARGS__)
|
||||
+
|
||||
+/*
|
||||
+ * The application's calls to libxl_osevent_occurred_... may be
|
||||
+ * indefinitely delayed with respect to the rest of the program (since
|
||||
+ * they are not necessarily called with any lock held). So the
|
||||
+ * for_libxl value we receive may be (almost) arbitrarily old. All we
|
||||
+ * know is that it came from this ctx.
|
||||
+ *
|
||||
+ * Therefore we may not free the object referred to by any for_libxl
|
||||
+ * value until we free the whole libxl_ctx. And if we reuse it we
|
||||
+ * must be able to tell when an old use turns up, and discard the
|
||||
+ * stale event.
|
||||
+ *
|
||||
+ * Thus we cannot use the ev directly as the for_libxl value - we need
|
||||
+ * a layer of indirection.
|
||||
+ *
|
||||
+ * We do this by keeping a pool of libxl__osevent_hook_nexus structs,
|
||||
+ * and use pointers to them as for_libxl values. In fact, there are
|
||||
+ * two pools: one for fds and one for timeouts. This ensures that we
|
||||
+ * don't risk a type error when we upcast nexus->ev. In each nexus
|
||||
+ * the ev is either null or points to a valid libxl__ev_time or
|
||||
+ * libxl__ev_fd, as applicable.
|
||||
+ *
|
||||
+ * We /do/ allow ourselves to reassociate an old nexus with a new ev
|
||||
+ * as otherwise we would have to leak nexi. (This reassociation
|
||||
+ * might, of course, be an old ev being reused for a new purpose so
|
||||
+ * simply comparing the ev pointer is not sufficient.) Thus the
|
||||
+ * libxl_osevent_occurred functions need to check that the condition
|
||||
+ * allegedly signalled by this event actually exists.
|
||||
+ *
|
||||
+ * The nexi and the lists are all protected by the ctx lock.
|
||||
+ */
|
||||
+
|
||||
+struct libxl__osevent_hook_nexus {
|
||||
+ void *ev;
|
||||
+ void *for_app_reg;
|
||||
+ LIBXL_SLIST_ENTRY(libxl__osevent_hook_nexus) next;
|
||||
+};
|
||||
+
|
||||
+static void *osevent_ev_from_hook_nexus(libxl_ctx *ctx,
|
||||
+ libxl__osevent_hook_nexus *nexus /* pass void *for_libxl */)
|
||||
+{
|
||||
+ return nexus->ev;
|
||||
+}
|
||||
+
|
||||
+static void osevent_release_nexus(libxl__gc *gc,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus *nexus)
|
||||
+{
|
||||
+ nexus->ev = 0;
|
||||
+ LIBXL_SLIST_INSERT_HEAD(nexi_idle, nexus, next);
|
||||
+}
|
||||
+
|
||||
+/*----- OSEVENT* hook functions for nexusop "alloc" -----*/
|
||||
+static void osevent_hook_pre_alloc(libxl__gc *gc, void *ev,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus **nexus_r)
|
||||
+{
|
||||
+ libxl__osevent_hook_nexus *nexus = LIBXL_SLIST_FIRST(nexi_idle);
|
||||
+ if (nexus) {
|
||||
+ LIBXL_SLIST_REMOVE_HEAD(nexi_idle, next);
|
||||
+ } else {
|
||||
+ nexus = libxl__zalloc(NOGC, sizeof(*nexus));
|
||||
+ }
|
||||
+ nexus->ev = ev;
|
||||
+ *nexus_r = nexus;
|
||||
+}
|
||||
+static void osevent_hook_failed_alloc(libxl__gc *gc, void *ev,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus **nexus)
|
||||
+{
|
||||
+ osevent_release_nexus(gc, nexi_idle, *nexus);
|
||||
+}
|
||||
+
|
||||
+/*----- OSEVENT* hook functions for nexusop "release" -----*/
|
||||
+static void osevent_hook_pre_release(libxl__gc *gc, void *ev,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus **nexus)
|
||||
+{
|
||||
+ osevent_release_nexus(gc, nexi_idle, *nexus);
|
||||
+}
|
||||
+static void osevent_hook_failed_release(libxl__gc *gc, void *ev,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus **nexus)
|
||||
+{
|
||||
+ abort();
|
||||
+}
|
||||
+
|
||||
+/*----- OSEVENT* hook functions for nexusop "noop" -----*/
|
||||
+static void osevent_hook_pre_noop(libxl__gc *gc, void *ev,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus **nexus) { }
|
||||
+static void osevent_hook_failed_noop(libxl__gc *gc, void *ev,
|
||||
+ libxl__osevent_hook_nexi *nexi_idle,
|
||||
+ libxl__osevent_hook_nexus **nexus) { }
|
||||
+
|
||||
|
||||
/*
|
||||
* fd events
|
||||
@@ -72,7 +180,8 @@ int libxl__ev_fd_register(libxl__gc *gc,
|
||||
|
||||
DBG("ev_fd=%p register fd=%d events=%x", ev, fd, events);
|
||||
|
||||
- rc = OSEVENT_HOOK(fd_register, fd, &ev->for_app_reg, events, ev);
|
||||
+ rc = OSEVENT_HOOK(fd,register, alloc, fd, &ev->nexus->for_app_reg,
|
||||
+ events, ev->nexus);
|
||||
if (rc) goto out;
|
||||
|
||||
ev->fd = fd;
|
||||
@@ -97,7 +206,7 @@ int libxl__ev_fd_modify(libxl__gc *gc, l
|
||||
|
||||
DBG("ev_fd=%p modify fd=%d events=%x", ev, ev->fd, events);
|
||||
|
||||
- rc = OSEVENT_HOOK(fd_modify, ev->fd, &ev->for_app_reg, events);
|
||||
+ rc = OSEVENT_HOOK(fd,modify, noop, ev->fd, &ev->nexus->for_app_reg, events);
|
||||
if (rc) goto out;
|
||||
|
||||
ev->events = events;
|
||||
@@ -119,7 +228,7 @@ void libxl__ev_fd_deregister(libxl__gc *
|
||||
|
||||
DBG("ev_fd=%p deregister fd=%d", ev, ev->fd);
|
||||
|
||||
- OSEVENT_HOOK_VOID(fd_deregister, ev->fd, ev->for_app_reg);
|
||||
+ OSEVENT_HOOK_VOID(fd,deregister, release, ev->fd, ev->nexus->for_app_reg);
|
||||
LIBXL_LIST_REMOVE(ev, entry);
|
||||
ev->fd = -1;
|
||||
|
||||
@@ -171,7 +280,8 @@ static int time_register_finite(libxl__g
|
||||
{
|
||||
int rc;
|
||||
|
||||
- rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, absolute, ev);
|
||||
+ rc = OSEVENT_HOOK(timeout,register, alloc, &ev->nexus->for_app_reg,
|
||||
+ absolute, ev->nexus);
|
||||
if (rc) return rc;
|
||||
|
||||
ev->infinite = 0;
|
||||
@@ -184,7 +294,7 @@ static int time_register_finite(libxl__g
|
||||
static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
|
||||
{
|
||||
if (!ev->infinite) {
|
||||
- OSEVENT_HOOK_VOID(timeout_deregister, ev->for_app_reg);
|
||||
+ OSEVENT_HOOK_VOID(timeout,deregister, release, ev->nexus->for_app_reg);
|
||||
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
|
||||
}
|
||||
}
|
||||
@@ -270,7 +380,8 @@ int libxl__ev_time_modify_abs(libxl__gc
|
||||
rc = time_register_finite(gc, ev, absolute);
|
||||
if (rc) goto out;
|
||||
} else {
|
||||
- rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, absolute);
|
||||
+ rc = OSEVENT_HOOK(timeout,modify, noop,
|
||||
+ &ev->nexus->for_app_reg, absolute);
|
||||
if (rc) goto out;
|
||||
|
||||
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
|
||||
@@ -1009,35 +1120,54 @@ void libxl_osevent_register_hooks(libxl_
|
||||
|
||||
|
||||
void libxl_osevent_occurred_fd(libxl_ctx *ctx, void *for_libxl,
|
||||
- int fd, short events, short revents)
|
||||
+ int fd, short events_ign, short revents_ign)
|
||||
{
|
||||
- libxl__ev_fd *ev = for_libxl;
|
||||
-
|
||||
EGC_INIT(ctx);
|
||||
CTX_LOCK;
|
||||
assert(!CTX->osevent_in_hook);
|
||||
|
||||
- assert(fd == ev->fd);
|
||||
- revents &= ev->events;
|
||||
- if (revents)
|
||||
- ev->func(egc, ev, fd, ev->events, revents);
|
||||
+ libxl__ev_fd *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
|
||||
+ if (!ev) goto out;
|
||||
+ if (ev->fd != fd) goto out;
|
||||
|
||||
+ struct pollfd check;
|
||||
+ for (;;) {
|
||||
+ check.fd = fd;
|
||||
+ check.events = ev->events;
|
||||
+ int r = poll(&check, 1, 0);
|
||||
+ if (!r)
|
||||
+ goto out;
|
||||
+ if (r==1)
|
||||
+ break;
|
||||
+ assert(r<0);
|
||||
+ if (errno != EINTR) {
|
||||
+ LIBXL__EVENT_DISASTER(egc, "failed poll to check for fd", errno, 0);
|
||||
+ goto out;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (check.revents)
|
||||
+ ev->func(egc, ev, fd, ev->events, check.revents);
|
||||
+
|
||||
+ out:
|
||||
CTX_UNLOCK;
|
||||
EGC_FREE;
|
||||
}
|
||||
|
||||
void libxl_osevent_occurred_timeout(libxl_ctx *ctx, void *for_libxl)
|
||||
{
|
||||
- libxl__ev_time *ev = for_libxl;
|
||||
-
|
||||
EGC_INIT(ctx);
|
||||
CTX_LOCK;
|
||||
assert(!CTX->osevent_in_hook);
|
||||
|
||||
+ libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
|
||||
+ if (!ev) goto out;
|
||||
assert(!ev->infinite);
|
||||
+
|
||||
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
|
||||
ev->func(egc, ev, &ev->abs);
|
||||
|
||||
+ out:
|
||||
CTX_UNLOCK;
|
||||
EGC_FREE;
|
||||
}
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_internal.h
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_internal.h
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_internal.h
|
||||
@@ -136,6 +136,8 @@ typedef struct libxl__gc libxl__gc;
|
||||
typedef struct libxl__egc libxl__egc;
|
||||
typedef struct libxl__ao libxl__ao;
|
||||
typedef struct libxl__aop_occurred libxl__aop_occurred;
|
||||
+typedef struct libxl__osevent_hook_nexus libxl__osevent_hook_nexus;
|
||||
+typedef struct libxl__osevent_hook_nexi libxl__osevent_hook_nexi;
|
||||
|
||||
_hidden void libxl__alloc_failed(libxl_ctx *, const char *func,
|
||||
size_t nmemb, size_t size) __attribute__((noreturn));
|
||||
@@ -163,7 +165,7 @@ struct libxl__ev_fd {
|
||||
libxl__ev_fd_callback *func;
|
||||
/* remainder is private for libxl__ev_fd... */
|
||||
LIBXL_LIST_ENTRY(libxl__ev_fd) entry;
|
||||
- void *for_app_reg;
|
||||
+ libxl__osevent_hook_nexus *nexus;
|
||||
};
|
||||
|
||||
|
||||
@@ -178,7 +180,7 @@ struct libxl__ev_time {
|
||||
int infinite; /* not registered in list or with app if infinite */
|
||||
LIBXL_TAILQ_ENTRY(libxl__ev_time) entry;
|
||||
struct timeval abs;
|
||||
- void *for_app_reg;
|
||||
+ libxl__osevent_hook_nexus *nexus;
|
||||
};
|
||||
|
||||
typedef struct libxl__ev_xswatch libxl__ev_xswatch;
|
||||
@@ -329,6 +331,8 @@ struct libxl__ctx {
|
||||
libxl__poller poller_app; /* libxl_osevent_beforepoll and _afterpoll */
|
||||
LIBXL_LIST_HEAD(, libxl__poller) pollers_event, pollers_idle;
|
||||
|
||||
+ LIBXL_SLIST_HEAD(libxl__osevent_hook_nexi, libxl__osevent_hook_nexus)
|
||||
+ hook_fd_nexi_idle, hook_timeout_nexi_idle;
|
||||
LIBXL_LIST_HEAD(, libxl__ev_fd) efds;
|
||||
LIBXL_TAILQ_HEAD(, libxl__ev_time) etimes;
|
||||
|
@ -1,228 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Ian Jackson <ian.jackson@eu.citrix.com>
|
||||
# Date 1359031673 0
|
||||
# Node ID a162a72e719a85799e3b08f52af7bb2147a407b8
|
||||
# Parent a181bf3e77df891c97fc20dff4e9b90b7584022b
|
||||
libxl: fix stale timeout event callback race
|
||||
|
||||
Because there is not necessarily any lock held at the point the
|
||||
application (eg, libvirt) calls libxl_osevent_occurred_timeout, in a
|
||||
multithreaded program those calls may be arbitrarily delayed in
|
||||
relation to other activities within the program.
|
||||
|
||||
Specifically this means when ->timeout_deregister returns, libxl does
|
||||
not know whether it can safely dispose of the for_libxl value or
|
||||
whether it needs to retain it in case of an in-progress call to
|
||||
_occurred_timeout.
|
||||
|
||||
The interface could be fixed by requiring the application to make a
|
||||
new call into libxl to say that the deregistration was complete.
|
||||
|
||||
However that new call would have to be threaded through the
|
||||
application's event loop; this is complicated and some application
|
||||
authors are likely not to implement it properly. Furthermore the
|
||||
easiest way to implement this facility in most event loops is to queue
|
||||
up a time event for "now".
|
||||
|
||||
Shortcut all of this by having libxl always call timeout_modify
|
||||
setting abs={0,0} (ie, ASAP) instead of timeout_deregister. This will
|
||||
cause the application to call _occurred_timeout. When processing this
|
||||
calldown we see that we were no longer actually interested and simply
|
||||
throw it away.
|
||||
|
||||
Additionally, there is a race between _occurred_timeout and
|
||||
->timeout_modify. If libxl ever adjusts the deadline for a timeout
|
||||
the application may already be in the process of calling _occurred, in
|
||||
which case the situation with for_app's lifetime becomes very
|
||||
complicated. Therefore abolish libxl__ev_time_modify_{abs,rel} (which
|
||||
have no callers) and promise to the application only ever to call
|
||||
->timeout_modify with abs=={0,0}. The application still needs to cope
|
||||
with ->timeout_modify racing with its internal function which calls
|
||||
_occurred_timeout. Document this.
|
||||
|
||||
This is a forwards-compatible change for applications using the libxl
|
||||
API, and will hopefully eliminate these races in callback-supplying
|
||||
applications (such as libvirt) without the need for corresponding
|
||||
changes to the application. (It is possible that this might expose
|
||||
bugs in applications, though, as previously libxl would never call
|
||||
libxl_osevent_hooks->timeout_modify and now it never calls
|
||||
->timeout_deregister).
|
||||
|
||||
For clarity, fold the body of time_register_finite into its one
|
||||
remaining call site. This makes the semantics of ev->infinite
|
||||
slightly clearer.
|
||||
|
||||
Cc: Bamvor Jian Zhang <bjzhang@suse.com>
|
||||
Cc: Ian Campbell <Ian.Campbell@citrix.com>
|
||||
Tested-by: Jim Fehlig <jfehlig@suse.com>
|
||||
Acked-by: Jim Fehlig <jfehlig@suse.com>
|
||||
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
|
||||
Committed-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_event.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_event.c
|
||||
@@ -267,18 +267,11 @@ static int time_rel_to_abs(libxl__gc *gc
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void time_insert_finite(libxl__gc *gc, libxl__ev_time *ev)
|
||||
-{
|
||||
- libxl__ev_time *evsearch;
|
||||
- LIBXL_TAILQ_INSERT_SORTED(&CTX->etimes, entry, ev, evsearch, /*empty*/,
|
||||
- timercmp(&ev->abs, &evsearch->abs, >));
|
||||
- ev->infinite = 0;
|
||||
-}
|
||||
-
|
||||
static int time_register_finite(libxl__gc *gc, libxl__ev_time *ev,
|
||||
struct timeval absolute)
|
||||
{
|
||||
int rc;
|
||||
+ libxl__ev_time *evsearch;
|
||||
|
||||
rc = OSEVENT_HOOK(timeout,register, alloc, &ev->nexus->for_app_reg,
|
||||
absolute, ev->nexus);
|
||||
@@ -286,7 +279,8 @@ static int time_register_finite(libxl__g
|
||||
|
||||
ev->infinite = 0;
|
||||
ev->abs = absolute;
|
||||
- time_insert_finite(gc, ev);
|
||||
+ LIBXL_TAILQ_INSERT_SORTED(&CTX->etimes, entry, ev, evsearch, /*empty*/,
|
||||
+ timercmp(&ev->abs, &evsearch->abs, >));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -294,7 +288,12 @@ static int time_register_finite(libxl__g
|
||||
static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
|
||||
{
|
||||
if (!ev->infinite) {
|
||||
- OSEVENT_HOOK_VOID(timeout,deregister, release, ev->nexus->for_app_reg);
|
||||
+ struct timeval right_away = { 0, 0 };
|
||||
+ if (ev->nexus) /* only set if app provided hooks */
|
||||
+ ev->nexus->ev = 0;
|
||||
+ OSEVENT_HOOK_VOID(timeout,modify,
|
||||
+ noop /* release nexus in _occurred_ */,
|
||||
+ &ev->nexus->for_app_reg, right_away);
|
||||
LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
|
||||
}
|
||||
}
|
||||
@@ -364,70 +363,6 @@ int libxl__ev_time_register_rel(libxl__g
|
||||
return rc;
|
||||
}
|
||||
|
||||
-int libxl__ev_time_modify_abs(libxl__gc *gc, libxl__ev_time *ev,
|
||||
- struct timeval absolute)
|
||||
-{
|
||||
- int rc;
|
||||
-
|
||||
- CTX_LOCK;
|
||||
-
|
||||
- DBG("ev_time=%p modify abs==%lu.%06lu",
|
||||
- ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);
|
||||
-
|
||||
- assert(libxl__ev_time_isregistered(ev));
|
||||
-
|
||||
- if (ev->infinite) {
|
||||
- rc = time_register_finite(gc, ev, absolute);
|
||||
- if (rc) goto out;
|
||||
- } else {
|
||||
- rc = OSEVENT_HOOK(timeout,modify, noop,
|
||||
- &ev->nexus->for_app_reg, absolute);
|
||||
- if (rc) goto out;
|
||||
-
|
||||
- LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
|
||||
- ev->abs = absolute;
|
||||
- time_insert_finite(gc, ev);
|
||||
- }
|
||||
-
|
||||
- rc = 0;
|
||||
- out:
|
||||
- time_done_debug(gc,__func__,ev,rc);
|
||||
- CTX_UNLOCK;
|
||||
- return rc;
|
||||
-}
|
||||
-
|
||||
-int libxl__ev_time_modify_rel(libxl__gc *gc, libxl__ev_time *ev,
|
||||
- int milliseconds)
|
||||
-{
|
||||
- struct timeval absolute;
|
||||
- int rc;
|
||||
-
|
||||
- CTX_LOCK;
|
||||
-
|
||||
- DBG("ev_time=%p modify ms=%d", ev, milliseconds);
|
||||
-
|
||||
- assert(libxl__ev_time_isregistered(ev));
|
||||
-
|
||||
- if (milliseconds < 0) {
|
||||
- time_deregister(gc, ev);
|
||||
- ev->infinite = 1;
|
||||
- rc = 0;
|
||||
- goto out;
|
||||
- }
|
||||
-
|
||||
- rc = time_rel_to_abs(gc, milliseconds, &absolute);
|
||||
- if (rc) goto out;
|
||||
-
|
||||
- rc = libxl__ev_time_modify_abs(gc, ev, absolute);
|
||||
- if (rc) goto out;
|
||||
-
|
||||
- rc = 0;
|
||||
- out:
|
||||
- time_done_debug(gc,__func__,ev,rc);
|
||||
- CTX_UNLOCK;
|
||||
- return rc;
|
||||
-}
|
||||
-
|
||||
void libxl__ev_time_deregister(libxl__gc *gc, libxl__ev_time *ev)
|
||||
{
|
||||
CTX_LOCK;
|
||||
@@ -1160,7 +1095,11 @@ void libxl_osevent_occurred_timeout(libx
|
||||
CTX_LOCK;
|
||||
assert(!CTX->osevent_in_hook);
|
||||
|
||||
- libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
|
||||
+ libxl__osevent_hook_nexus *nexus = for_libxl;
|
||||
+ libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, nexus);
|
||||
+
|
||||
+ osevent_release_nexus(gc, &CTX->hook_timeout_nexi_idle, nexus);
|
||||
+
|
||||
if (!ev) goto out;
|
||||
assert(!ev->infinite);
|
||||
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_event.h
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_event.h
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_event.h
|
||||
@@ -287,8 +287,10 @@ typedef struct libxl_osevent_hooks {
|
||||
int (*timeout_register)(void *user, void **for_app_registration_out,
|
||||
struct timeval abs, void *for_libxl);
|
||||
int (*timeout_modify)(void *user, void **for_app_registration_update,
|
||||
- struct timeval abs);
|
||||
- void (*timeout_deregister)(void *user, void *for_app_registration);
|
||||
+ struct timeval abs)
|
||||
+ /* only ever called with abs={0,0}, meaning ASAP */;
|
||||
+ void (*timeout_deregister)(void *user, void *for_app_registration)
|
||||
+ /* will never be called */;
|
||||
} libxl_osevent_hooks;
|
||||
|
||||
/* The application which calls register_fd_hooks promises to
|
||||
@@ -337,6 +339,17 @@ typedef struct libxl_osevent_hooks {
|
||||
* register (or modify), and pass it to subsequent calls to modify
|
||||
* or deregister.
|
||||
*
|
||||
+ * Note that the application must cope with a call from libxl to
|
||||
+ * timeout_modify racing with its own call to
|
||||
+ * libxl__osevent_occurred_timeout. libxl guarantees that
|
||||
+ * timeout_modify will only be called with abs={0,0} but the
|
||||
+ * application must still ensure that libxl's attempt to cause the
|
||||
+ * timeout to occur immediately is safely ignored even the timeout is
|
||||
+ * actually already in the process of occurring.
|
||||
+ *
|
||||
+ * timeout_deregister is not used because it forms part of a
|
||||
+ * deprecated unsafe mode of use of the API.
|
||||
+ *
|
||||
* osevent_register_hooks may be called only once for each libxl_ctx.
|
||||
* libxl may make calls to register/modify/deregister from within
|
||||
* any libxl function (indeed, it will usually call register from
|
@ -1,64 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Keir Fraser <keir@xen.org>
|
||||
# Date 1359566139 28800
|
||||
# Node ID 8201b6ec3564c80db5516cdcf36dcfa9b7fdd93b
|
||||
# Parent 1fe8ecfdf10cc9077fc810364663a0f25a5c5b96
|
||||
vmx: Simplify cr0 update handling by deferring cr4 changes to the cr4 handler.
|
||||
|
||||
Signed-off-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1133,20 +1133,18 @@ static void vmx_update_guest_cr(struct v
|
||||
|
||||
if ( paging_mode_hap(v->domain) )
|
||||
{
|
||||
- /* We manage GUEST_CR3 when guest CR0.PE is zero or when cr3 memevents are on */
|
||||
+ /* Manage GUEST_CR3 when CR0.PE=0. */
|
||||
uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING);
|
||||
v->arch.hvm_vmx.exec_control &= ~cr3_ctls;
|
||||
if ( !hvm_paging_enabled(v) )
|
||||
v->arch.hvm_vmx.exec_control |= cr3_ctls;
|
||||
|
||||
+ /* Trap CR3 updates if CR3 memory events are enabled. */
|
||||
if ( v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_CR3] )
|
||||
v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
|
||||
|
||||
vmx_update_cpu_exec_control(v);
|
||||
-
|
||||
- /* Changing CR0.PE can change some bits in real CR4. */
|
||||
- vmx_update_guest_cr(v, 4);
|
||||
}
|
||||
|
||||
if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
|
||||
@@ -1176,8 +1174,6 @@ static void vmx_update_guest_cr(struct v
|
||||
{
|
||||
for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
|
||||
vmx_set_segment_register(v, s, ®[s]);
|
||||
- v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME;
|
||||
- __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
|
||||
v->arch.hvm_vmx.exception_bitmap = 0xffffffff;
|
||||
vmx_update_exception_bitmap(v);
|
||||
}
|
||||
@@ -1187,10 +1183,6 @@ static void vmx_update_guest_cr(struct v
|
||||
if ( !(v->arch.hvm_vmx.vm86_segment_mask & (1<<s)) )
|
||||
vmx_set_segment_register(
|
||||
v, s, &v->arch.hvm_vmx.vm86_saved_seg[s]);
|
||||
- v->arch.hvm_vcpu.hw_cr[4] =
|
||||
- ((v->arch.hvm_vcpu.hw_cr[4] & ~X86_CR4_VME)
|
||||
- |(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VME));
|
||||
- __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
|
||||
v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK
|
||||
| (paging_mode_hap(v->domain) ?
|
||||
0 : (1U << TRAP_page_fault))
|
||||
@@ -1204,6 +1196,9 @@ static void vmx_update_guest_cr(struct v
|
||||
v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
|
||||
__vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
|
||||
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
|
||||
+
|
||||
+ /* Changing CR0 can change some bits in real CR4. */
|
||||
+ vmx_update_guest_cr(v, 4);
|
||||
break;
|
||||
}
|
||||
case 2:
|
@ -1,39 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
# Date 1359566250 28800
|
||||
# Node ID d1bf3b21f78302dad1ed53e540facf7b9a0e2ab5
|
||||
# Parent 8201b6ec3564c80db5516cdcf36dcfa9b7fdd93b
|
||||
VMX: disable SMEP feature when guest is in non-paging mode
|
||||
|
||||
SMEP is disabled if CPU is in non-paging mode in hardware.
|
||||
However Xen always uses paging mode to emulate guest non-paging
|
||||
mode with HAP. To emulate this behavior, SMEP needs to be manually
|
||||
disabled when guest switches to non-paging mode.
|
||||
|
||||
We met an issue that, SMP Linux guest with recent kernel (enable
|
||||
SMEP support, for example, 3.5.3) would crash with triple fault if
|
||||
setting unrestricted_guest=0 in grub. This is because Xen uses an
|
||||
identity mapping page table to emulate the non-paging mode, where
|
||||
the page table is set with USER flag. If SMEP is still enabled in
|
||||
this case, guest will meet unhandlable page fault and then crash.
|
||||
|
||||
Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
|
||||
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
|
||||
Committed-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1227,6 +1227,13 @@ static void vmx_update_guest_cr(struct v
|
||||
{
|
||||
v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
|
||||
v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
|
||||
+ /*
|
||||
+ * SMEP is disabled if CPU is in non-paging mode in hardware.
|
||||
+ * However Xen always uses paging mode to emulate guest non-paging
|
||||
+ * mode with HAP. To emulate this behavior, SMEP needs to be
|
||||
+ * manually disabled when guest switches to non-paging mode.
|
||||
+ */
|
||||
+ v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP;
|
||||
}
|
||||
__vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
|
||||
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
|
@ -1,37 +0,0 @@
|
||||
References: CVE-2013-0153 XSA-36 bnc#800275
|
||||
|
||||
# HG changeset patch
|
||||
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
# Date 1360073898 -3600
|
||||
# Node ID 32d4516a97f0b22ed06155f7b8e0bff075024991
|
||||
# Parent 2fdca30363f08026971c094e8a1a84e19ca3e55b
|
||||
ACPI: acpi_table_parse() should return handler's error code
|
||||
|
||||
Currently, the error code returned by acpi_table_parse()'s handler
|
||||
is ignored. This patch will propagate handler's return value to
|
||||
acpi_table_parse()'s caller.
|
||||
|
||||
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/acpi/tables.c
|
||||
+++ b/xen/drivers/acpi/tables.c
|
||||
@@ -267,7 +267,7 @@ acpi_table_parse_madt(enum acpi_madt_typ
|
||||
* @handler: handler to run
|
||||
*
|
||||
* Scan the ACPI System Descriptor Table (STD) for a table matching @id,
|
||||
- * run @handler on it. Return 0 if table found, return on if not.
|
||||
+ * run @handler on it.
|
||||
*/
|
||||
int __init acpi_table_parse(char *id, acpi_table_handler handler)
|
||||
{
|
||||
@@ -282,8 +282,7 @@ int __init acpi_table_parse(char *id, ac
|
||||
acpi_get_table(id, 0, &table);
|
||||
|
||||
if (table) {
|
||||
- handler(table);
|
||||
- return 0;
|
||||
+ return handler(table);
|
||||
} else
|
||||
return 1;
|
||||
}
|
@ -1,205 +0,0 @@
|
||||
References: CVE-2013-0153 XSA-36 bnc#800275
|
||||
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1360074047 -3600
|
||||
# Node ID 601139e2b0db7dc8a5bb69b9b7373fb87742741c
|
||||
# Parent 32d4516a97f0b22ed06155f7b8e0bff075024991
|
||||
AMD,IOMMU: Clean up old entries in remapping tables when creating new one
|
||||
|
||||
When changing the affinity of an IRQ associated with a passed
|
||||
through PCI device, clear previous mapping.
|
||||
|
||||
This is XSA-36 / CVE-2013-0153.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
In addition, because some BIOSes may incorrectly program IVRS
|
||||
entries for IOAPIC try to check for entry's consistency. Specifically,
|
||||
if conflicting entries are found disable IOMMU if per-device
|
||||
remapping table is used. If entries refer to bogus IOAPIC IDs
|
||||
disable IOMMU unconditionally
|
||||
|
||||
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <xen/errno.h>
|
||||
#include <xen/acpi.h>
|
||||
#include <asm/apicdef.h>
|
||||
+#include <asm/io_apic.h>
|
||||
#include <asm/amd-iommu.h>
|
||||
#include <asm/hvm/svm/amd-iommu-proto.h>
|
||||
|
||||
@@ -635,6 +636,7 @@ static u16 __init parse_ivhd_device_spec
|
||||
u16 header_length, u16 block_length, struct amd_iommu *iommu)
|
||||
{
|
||||
u16 dev_length, bdf;
|
||||
+ int apic;
|
||||
|
||||
dev_length = sizeof(*special);
|
||||
if ( header_length < (block_length + dev_length) )
|
||||
@@ -651,10 +653,59 @@ static u16 __init parse_ivhd_device_spec
|
||||
}
|
||||
|
||||
add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
|
||||
- /* set device id of ioapic */
|
||||
- ioapic_sbdf[special->handle].bdf = bdf;
|
||||
- ioapic_sbdf[special->handle].seg = seg;
|
||||
- return dev_length;
|
||||
+
|
||||
+ if ( special->variety != ACPI_IVHD_IOAPIC )
|
||||
+ {
|
||||
+ if ( special->variety != ACPI_IVHD_HPET )
|
||||
+ printk(XENLOG_ERR "Unrecognized IVHD special variety %#x\n",
|
||||
+ special->variety);
|
||||
+ return dev_length;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Some BIOSes have IOAPIC broken entries so we check for IVRS
|
||||
+ * consistency here --- whether entry's IOAPIC ID is valid and
|
||||
+ * whether there are conflicting/duplicated entries.
|
||||
+ */
|
||||
+ for ( apic = 0; apic < nr_ioapics; apic++ )
|
||||
+ {
|
||||
+ if ( IO_APIC_ID(apic) != special->handle )
|
||||
+ continue;
|
||||
+
|
||||
+ if ( ioapic_sbdf[special->handle].pin_setup )
|
||||
+ {
|
||||
+ if ( ioapic_sbdf[special->handle].bdf == bdf &&
|
||||
+ ioapic_sbdf[special->handle].seg == seg )
|
||||
+ AMD_IOMMU_DEBUG("IVHD Warning: Duplicate IO-APIC %#x entries\n",
|
||||
+ special->handle);
|
||||
+ else
|
||||
+ {
|
||||
+ printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n",
|
||||
+ special->handle);
|
||||
+ if ( amd_iommu_perdev_intremap )
|
||||
+ return 0;
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* set device id of ioapic */
|
||||
+ ioapic_sbdf[special->handle].bdf = bdf;
|
||||
+ ioapic_sbdf[special->handle].seg = seg;
|
||||
+
|
||||
+ ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
|
||||
+ unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
|
||||
+ if ( nr_ioapic_entries[apic] &&
|
||||
+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
|
||||
+ {
|
||||
+ printk(XENLOG_ERR "IVHD Error: Out of memory\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+ }
|
||||
+ return dev_length;
|
||||
+ }
|
||||
+
|
||||
+ printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n", special->handle);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int __init parse_ivhd_block(const struct acpi_ivrs_hardware *ivhd_block)
|
||||
--- a/xen/drivers/passthrough/amd/iommu_intr.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
|
||||
@@ -99,12 +99,12 @@ static void update_intremap_entry(u32* e
|
||||
static void update_intremap_entry_from_ioapic(
|
||||
int bdf,
|
||||
struct amd_iommu *iommu,
|
||||
- struct IO_APIC_route_entry *ioapic_rte)
|
||||
+ const struct IO_APIC_route_entry *rte,
|
||||
+ const struct IO_APIC_route_entry *old_rte)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32* entry;
|
||||
u8 delivery_mode, dest, vector, dest_mode;
|
||||
- struct IO_APIC_route_entry *rte = ioapic_rte;
|
||||
int req_id;
|
||||
spinlock_t *lock;
|
||||
int offset;
|
||||
@@ -120,6 +120,14 @@ static void update_intremap_entry_from_i
|
||||
spin_lock_irqsave(lock, flags);
|
||||
|
||||
offset = get_intremap_offset(vector, delivery_mode);
|
||||
+ if ( old_rte )
|
||||
+ {
|
||||
+ int old_offset = get_intremap_offset(old_rte->vector,
|
||||
+ old_rte->delivery_mode);
|
||||
+
|
||||
+ if ( offset != old_offset )
|
||||
+ free_intremap_entry(iommu->seg, bdf, old_offset);
|
||||
+ }
|
||||
entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
|
||||
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
|
||||
|
||||
@@ -188,6 +196,7 @@ int __init amd_iommu_setup_ioapic_remapp
|
||||
amd_iommu_flush_intremap(iommu, req_id);
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
}
|
||||
+ set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@@ -199,6 +208,7 @@ void amd_iommu_ioapic_update_ire(
|
||||
struct IO_APIC_route_entry old_rte = { 0 };
|
||||
struct IO_APIC_route_entry new_rte = { 0 };
|
||||
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
|
||||
+ unsigned int pin = (reg - 0x10) / 2;
|
||||
int saved_mask, seg, bdf;
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
@@ -236,6 +246,14 @@ void amd_iommu_ioapic_update_ire(
|
||||
*(((u32 *)&new_rte) + 1) = value;
|
||||
}
|
||||
|
||||
+ if ( new_rte.mask &&
|
||||
+ !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
|
||||
+ {
|
||||
+ ASSERT(saved_mask);
|
||||
+ __io_apic_write(apic, reg, value);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
/* mask the interrupt while we change the intremap table */
|
||||
if ( !saved_mask )
|
||||
{
|
||||
@@ -244,7 +262,11 @@ void amd_iommu_ioapic_update_ire(
|
||||
}
|
||||
|
||||
/* Update interrupt remapping entry */
|
||||
- update_intremap_entry_from_ioapic(bdf, iommu, &new_rte);
|
||||
+ update_intremap_entry_from_ioapic(
|
||||
+ bdf, iommu, &new_rte,
|
||||
+ test_and_set_bit(pin,
|
||||
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
|
||||
+ : NULL);
|
||||
|
||||
/* Forward write access to IO-APIC RTE */
|
||||
__io_apic_write(apic, reg, value);
|
||||
@@ -354,6 +376,12 @@ void amd_iommu_msi_msg_update_ire(
|
||||
return;
|
||||
}
|
||||
|
||||
+ if ( msi_desc->remap_index >= 0 )
|
||||
+ update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, NULL);
|
||||
+
|
||||
+ if ( !msg )
|
||||
+ return;
|
||||
+
|
||||
update_intremap_entry_from_msi_msg(iommu, pdev, msi_desc, msg);
|
||||
}
|
||||
|
||||
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
|
||||
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
|
||||
@@ -100,6 +100,7 @@ void amd_iommu_read_msi_from_ire(
|
||||
|
||||
extern struct ioapic_sbdf {
|
||||
u16 bdf, seg;
|
||||
+ unsigned long *pin_setup;
|
||||
} ioapic_sbdf[MAX_IO_APICS];
|
||||
extern void *shared_intremap_table;
|
||||
|
@ -1,77 +0,0 @@
|
||||
References: CVE-2013-0153 XSA-36 bnc#800275
|
||||
|
||||
# HG changeset patch
|
||||
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
# Date 1360074085 -3600
|
||||
# Node ID e379a23b04655e9e43dc50944a5c9d1e59d8bee9
|
||||
# Parent 601139e2b0db7dc8a5bb69b9b7373fb87742741c
|
||||
AMD,IOMMU: Disable IOMMU if SATA Combined mode is on
|
||||
|
||||
AMD's SP5100 chipset can be placed into SATA Combined mode
|
||||
that may cause prevent dom0 from booting when IOMMU is
|
||||
enabled and per-device interrupt remapping table is used.
|
||||
While SP5100 erratum 28 requires BIOSes to disable this mode,
|
||||
some may still use it.
|
||||
|
||||
This patch checks whether this mode is on and, if per-device
|
||||
table is in use, disables IOMMU.
|
||||
|
||||
This is XSA-36 / CVE-2013-0153.
|
||||
|
||||
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
|
||||
Flipped operands of && in amd_iommu_init() to make the message issued
|
||||
by amd_sp5100_erratum28() match reality (when amd_iommu_perdev_intremap
|
||||
is zero, there's really no point in calling the function).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_init.c
|
||||
@@ -1118,12 +1118,45 @@ static int __init amd_iommu_setup_device
|
||||
return 0;
|
||||
}
|
||||
|
||||
+/* Check whether SP5100 SATA Combined mode is on */
|
||||
+static bool_t __init amd_sp5100_erratum28(void)
|
||||
+{
|
||||
+ u32 bus, id;
|
||||
+ u16 vendor_id, dev_id;
|
||||
+ u8 byte;
|
||||
+
|
||||
+ for (bus = 0; bus < 256; bus++)
|
||||
+ {
|
||||
+ id = pci_conf_read32(0, bus, 0x14, 0, PCI_VENDOR_ID);
|
||||
+
|
||||
+ vendor_id = id & 0xffff;
|
||||
+ dev_id = (id >> 16) & 0xffff;
|
||||
+
|
||||
+ /* SP5100 SMBus module sets Combined mode on */
|
||||
+ if (vendor_id != 0x1002 || dev_id != 0x4385)
|
||||
+ continue;
|
||||
+
|
||||
+ byte = pci_conf_read8(0, bus, 0x14, 0, 0xad);
|
||||
+ if ( (byte >> 3) & 1 )
|
||||
+ {
|
||||
+ printk(XENLOG_WARNING "AMD-Vi: SP5100 erratum 28 detected, disabling IOMMU.\n"
|
||||
+ "If possible, disable SATA Combined mode in BIOS or contact your vendor for BIOS update.\n");
|
||||
+ return 1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int __init amd_iommu_init(void)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
BUG_ON( !iommu_found() );
|
||||
|
||||
+ if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
|
||||
+ goto error_out;
|
||||
+
|
||||
ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
|
||||
|
||||
if ( !ivrs_bdf_entries )
|
@ -1,55 +0,0 @@
|
||||
References: CVE-2013-0153 XSA-36 bnc#800275
|
||||
|
||||
# HG changeset patch
|
||||
# User Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
# Date 1360074131 -3600
|
||||
# Node ID 1af531e7bc2fc518f16d8d1461083c528e1517cf
|
||||
# Parent e379a23b04655e9e43dc50944a5c9d1e59d8bee9
|
||||
AMD,IOMMU: Make per-device interrupt remapping table default
|
||||
|
||||
Using global interrupt remapping table may be insecure, as
|
||||
described by XSA-36. This patch makes per-device mode default.
|
||||
|
||||
This is XSA-36 / CVE-2013-0153.
|
||||
|
||||
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
|
||||
|
||||
Moved warning in amd_iov_detect() to location covering all cases.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/irq.c
|
||||
+++ b/xen/arch/x86/irq.c
|
||||
@@ -1942,9 +1942,6 @@ int map_domain_pirq(
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
set_domain_irq_pirq(d, irq, info);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
-
|
||||
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
|
||||
- printk(XENLOG_INFO "Per-device vector maps for GSIs not implemented yet.\n");
|
||||
}
|
||||
|
||||
done:
|
||||
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
@@ -204,6 +204,8 @@ int __init amd_iov_detect(void)
|
||||
{
|
||||
printk("AMD-Vi: Not overriding irq_vector_map setting\n");
|
||||
}
|
||||
+ if ( !amd_iommu_perdev_intremap )
|
||||
+ printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
|
||||
return scan_pci_devices();
|
||||
}
|
||||
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
@@ -52,7 +52,7 @@ bool_t __read_mostly iommu_qinval = 1;
|
||||
bool_t __read_mostly iommu_intremap = 1;
|
||||
bool_t __read_mostly iommu_hap_pt_share = 1;
|
||||
bool_t __read_mostly iommu_debug;
|
||||
-bool_t __read_mostly amd_iommu_perdev_intremap;
|
||||
+bool_t __read_mostly amd_iommu_perdev_intremap = 1;
|
||||
|
||||
DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
|
||||
|
@ -1,37 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Olaf Hering <olaf@aepfle.de>
|
||||
# Date 1360664991 -3600
|
||||
# Node ID a37aa55c3cbcb0e8340b4985314ef8fb31d7610b
|
||||
# Parent 9af6e566befe5516e66b62197813aa22e1d7122c
|
||||
unmodified_drivers: __devinit was removed in linux-3.8
|
||||
|
||||
Signed-off-by: Olaf Hering <olaf@aepfle.de>
|
||||
|
||||
Merge with __init handling.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
|
||||
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
|
||||
@@ -13,10 +13,19 @@
|
||||
#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
|
||||
#endif
|
||||
|
||||
-#if defined(_LINUX_INIT_H) && !defined(__init)
|
||||
+#ifdef _LINUX_INIT_H
|
||||
+
|
||||
+#ifndef __init
|
||||
#define __init
|
||||
#endif
|
||||
|
||||
+#ifndef __devinit
|
||||
+#define __devinit
|
||||
+#define __devinitdata
|
||||
+#endif
|
||||
+
|
||||
+#endif /* _LINUX_INIT_H */
|
||||
+
|
||||
#if defined(__LINUX_CACHE_H) && !defined(__read_mostly)
|
||||
#define __read_mostly
|
||||
#endif
|
@ -1,21 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Keir Fraser <keir@xen.org>
|
||||
# Date 1360775011 0
|
||||
# Node ID 97b7e546e2e4a021491e198a33f7d685550ebc73
|
||||
# Parent 742dde457258422a3d08e3ddbf9a7eae55c93acb
|
||||
gcc4.8 build fix: Add -Wno-unused-local-typedefs to CFLAGS.
|
||||
|
||||
Based on a patch by M A Young <m.a.young@durham.ac.uk>
|
||||
|
||||
Signed-off-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/Config.mk
|
||||
+++ b/Config.mk
|
||||
@@ -166,6 +166,7 @@ CFLAGS-$(clang) += -Wno-parentheses -Wno
|
||||
$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
|
||||
$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
|
||||
$(call cc-option-add,CFLAGS,CC,-Wno-unused-but-set-variable)
|
||||
+$(call cc-option-add,CFLAGS,CC,-Wno-unused-local-typedefs)
|
||||
|
||||
LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i))
|
||||
CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
|
@ -1,127 +0,0 @@
|
||||
References: CVE-2013-0153 XSA-36 bnc#800275
|
||||
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1360831252 -3600
|
||||
# Node ID e68f14b9e73925e9d404e517ba510f73fe472e4e
|
||||
# Parent c43be17eec0602015fc6461d1f13c992ba330c20
|
||||
AMD IOMMU: also spot missing IO-APIC entries in IVRS table
|
||||
|
||||
Apart from dealing duplicate conflicting entries, we also have to
|
||||
handle firmware omitting IO-APIC entries in IVRS altogether. Not doing
|
||||
so has resulted in c/s 26517:601139e2b0db to crash such systems during
|
||||
boot (whereas with the change here the IOMMU gets disabled just as is
|
||||
being done in the other cases, i.e. unless global tables are being
|
||||
used).
|
||||
|
||||
Debugging this issue has also pointed out that the debug log output is
|
||||
pretty ugly to look at - consolidate the output, and add one extra
|
||||
item for the IVHD special entries, so that future issues are easier
|
||||
to analyze.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
|
||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
@@ -352,9 +352,8 @@ static int __init parse_ivmd_block(const
|
||||
base = start_addr & PAGE_MASK;
|
||||
limit = (start_addr + mem_length - 1) & PAGE_MASK;
|
||||
|
||||
- AMD_IOMMU_DEBUG("IVMD Block: Type 0x%x\n",ivmd_block->header.type);
|
||||
- AMD_IOMMU_DEBUG(" Start_Addr_Phys 0x%lx\n", start_addr);
|
||||
- AMD_IOMMU_DEBUG(" Mem_Length 0x%lx\n", mem_length);
|
||||
+ AMD_IOMMU_DEBUG("IVMD Block: type %#x phys %#lx len %#lx\n",
|
||||
+ ivmd_block->header.type, start_addr, mem_length);
|
||||
|
||||
if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
|
||||
iw = ir = IOMMU_CONTROL_ENABLED;
|
||||
@@ -549,8 +548,8 @@ static u16 __init parse_ivhd_device_alia
|
||||
return 0;
|
||||
}
|
||||
|
||||
- AMD_IOMMU_DEBUG(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
|
||||
- AMD_IOMMU_DEBUG(" Dev_Id Alias: 0x%x\n", alias_id);
|
||||
+ AMD_IOMMU_DEBUG(" Dev_Id Range: %#x -> %#x alias %#x\n",
|
||||
+ first_bdf, last_bdf, alias_id);
|
||||
|
||||
for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
|
||||
add_ivrs_mapping_entry(bdf, alias_id, range->alias.header.data_setting,
|
||||
@@ -652,6 +651,9 @@ static u16 __init parse_ivhd_device_spec
|
||||
return 0;
|
||||
}
|
||||
|
||||
+ AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle %#x\n",
|
||||
+ seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
|
||||
+ special->variety, special->handle);
|
||||
add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
|
||||
|
||||
if ( special->variety != ACPI_IVHD_IOAPIC )
|
||||
@@ -737,10 +739,9 @@ static int __init parse_ivhd_block(const
|
||||
{
|
||||
ivhd_device = (const void *)((const u8 *)ivhd_block + block_length);
|
||||
|
||||
- AMD_IOMMU_DEBUG( "IVHD Device Entry:\n");
|
||||
- AMD_IOMMU_DEBUG( " Type 0x%x\n", ivhd_device->header.type);
|
||||
- AMD_IOMMU_DEBUG( " Dev_Id 0x%x\n", ivhd_device->header.id);
|
||||
- AMD_IOMMU_DEBUG( " Flags 0x%x\n", ivhd_device->header.data_setting);
|
||||
+ AMD_IOMMU_DEBUG("IVHD Device Entry: type %#x id %#x flags %#x\n",
|
||||
+ ivhd_device->header.type, ivhd_device->header.id,
|
||||
+ ivhd_device->header.data_setting);
|
||||
|
||||
switch ( ivhd_device->header.type )
|
||||
{
|
||||
@@ -869,6 +870,7 @@ static int __init parse_ivrs_table(struc
|
||||
{
|
||||
const struct acpi_ivrs_header *ivrs_block;
|
||||
unsigned long length;
|
||||
+ unsigned int apic;
|
||||
int error = 0;
|
||||
|
||||
BUG_ON(!table);
|
||||
@@ -882,11 +884,9 @@ static int __init parse_ivrs_table(struc
|
||||
{
|
||||
ivrs_block = (struct acpi_ivrs_header *)((u8 *)table + length);
|
||||
|
||||
- AMD_IOMMU_DEBUG("IVRS Block:\n");
|
||||
- AMD_IOMMU_DEBUG(" Type 0x%x\n", ivrs_block->type);
|
||||
- AMD_IOMMU_DEBUG(" Flags 0x%x\n", ivrs_block->flags);
|
||||
- AMD_IOMMU_DEBUG(" Length 0x%x\n", ivrs_block->length);
|
||||
- AMD_IOMMU_DEBUG(" Dev_Id 0x%x\n", ivrs_block->device_id);
|
||||
+ AMD_IOMMU_DEBUG("IVRS Block: type %#x flags %#x len %#x id %#x\n",
|
||||
+ ivrs_block->type, ivrs_block->flags,
|
||||
+ ivrs_block->length, ivrs_block->device_id);
|
||||
|
||||
if ( table->length < (length + ivrs_block->length) )
|
||||
{
|
||||
@@ -901,6 +901,29 @@ static int __init parse_ivrs_table(struc
|
||||
length += ivrs_block->length;
|
||||
}
|
||||
|
||||
+ /* Each IO-APIC must have been mentioned in the table. */
|
||||
+ for ( apic = 0; !error && apic < nr_ioapics; ++apic )
|
||||
+ {
|
||||
+ if ( !nr_ioapic_entries[apic] ||
|
||||
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
|
||||
+ continue;
|
||||
+
|
||||
+ printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
|
||||
+ IO_APIC_ID(apic));
|
||||
+ if ( amd_iommu_perdev_intremap )
|
||||
+ error = -ENXIO;
|
||||
+ else
|
||||
+ {
|
||||
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
|
||||
+ unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
|
||||
+ if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
|
||||
+ {
|
||||
+ printk(XENLOG_ERR "IVHD Error: Out of memory\n");
|
||||
+ error = -ENOMEM;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
return error;
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Tim Deegan <tim@xen.org>
|
||||
# Date 1360917722 -3600
|
||||
# Node ID 0cca8a18432f08b342d76a753aa98559d892f592
|
||||
# Parent 7af3c38ae187b351c5cea58e9eee482b50d814d8
|
||||
xenoprof: avoid division by 0
|
||||
|
||||
Signed-off-by: Tim Deegan <tim@xen.org>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/common/xenoprof.c
|
||||
+++ b/xen/common/xenoprof.c
|
||||
@@ -193,6 +193,13 @@ static int alloc_xenoprof_struct(
|
||||
unsigned max_max_samples;
|
||||
int i;
|
||||
|
||||
+ nvcpu = 0;
|
||||
+ for_each_vcpu ( d, v )
|
||||
+ nvcpu++;
|
||||
+
|
||||
+ if ( !nvcpu )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
d->xenoprof = xzalloc(struct xenoprof);
|
||||
if ( d->xenoprof == NULL )
|
||||
{
|
||||
@@ -209,10 +216,6 @@ static int alloc_xenoprof_struct(
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- nvcpu = 0;
|
||||
- for_each_vcpu ( d, v )
|
||||
- nvcpu++;
|
||||
-
|
||||
bufsize = sizeof(struct xenoprof_buf);
|
||||
i = sizeof(struct event_log);
|
||||
#ifdef CONFIG_COMPAT
|
@ -9,10 +9,11 @@ Signed-off-by: Ross Philipson <ross.philipson@citrix.com>
|
||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
Committed-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
diff -r 71c15ae09983 -r 3124ab7855fd tools/libxl/libxl_dom.c
|
||||
--- a/tools/libxl/libxl_dom.c Fri Feb 15 13:32:15 2013 +0000
|
||||
+++ b/tools/libxl/libxl_dom.c Fri Feb 15 13:32:16 2013 +0000
|
||||
@@ -542,17 +542,24 @@ int libxl__build_hvm(libxl__gc *gc, uint
|
||||
Index: xen-4.2.2-testing/tools/libxl/libxl_dom.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/libxl_dom.c
|
||||
+++ xen-4.2.2-testing/tools/libxl/libxl_dom.c
|
||||
@@ -546,17 +546,24 @@ int libxl__build_hvm(libxl__gc *gc, uint
|
||||
libxl__domain_build_state *state)
|
||||
{
|
||||
libxl_ctx *ctx = libxl__gc_owner(gc);
|
||||
|
@ -25,10 +25,10 @@ Signed-off-by: Ross Philipson <ross.philipson@citrix.com>
|
||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
Committed-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
Index: xen-4.2.1-testing/docs/man/xl.cfg.pod.5
|
||||
Index: xen-4.2.2-testing/docs/man/xl.cfg.pod.5
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/docs/man/xl.cfg.pod.5
|
||||
+++ xen-4.2.1-testing/docs/man/xl.cfg.pod.5
|
||||
--- xen-4.2.2-testing.orig/docs/man/xl.cfg.pod.5
|
||||
+++ xen-4.2.2-testing/docs/man/xl.cfg.pod.5
|
||||
@@ -637,6 +637,25 @@ of Xen) within a Xen guest or to support
|
||||
which uses hardware virtualisation extensions (e.g. Windows XP
|
||||
compatibility mode on more modern Windows OS).
|
||||
@ -55,10 +55,10 @@ Index: xen-4.2.1-testing/docs/man/xl.cfg.pod.5
|
||||
=back
|
||||
|
||||
=head3 Guest Virtual Time Controls
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl.h
|
||||
Index: xen-4.2.2-testing/tools/libxl/libxl.h
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl.h
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl.h
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/libxl.h
|
||||
+++ xen-4.2.2-testing/tools/libxl/libxl.h
|
||||
@@ -68,6 +68,13 @@
|
||||
*/
|
||||
|
||||
@ -73,11 +73,11 @@ Index: xen-4.2.1-testing/tools/libxl/libxl.h
|
||||
* libxl ABI compatibility
|
||||
*
|
||||
* The only guarantee which libxl makes regarding ABI compatibility
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
Index: xen-4.2.2-testing/tools/libxl/libxl_dom.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_dom.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
@@ -21,6 +21,7 @@
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/libxl_dom.c
|
||||
+++ xen-4.2.2-testing/tools/libxl/libxl_dom.c
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include <xc_dom.h>
|
||||
#include <xen/hvm/hvm_info_table.h>
|
||||
@ -85,7 +85,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
|
||||
libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
|
||||
{
|
||||
@@ -510,11 +511,61 @@ static int hvm_build_set_params(xc_inter
|
||||
@@ -514,11 +515,61 @@ static int hvm_build_set_params(xc_inter
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -149,7 +149,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
|
||||
if (info->u.hvm.firmware)
|
||||
firmware = info->u.hvm.firmware;
|
||||
@@ -528,13 +579,52 @@ static const char *libxl__domain_firmwar
|
||||
@@ -532,13 +583,52 @@ static const char *libxl__domain_firmwar
|
||||
firmware = "hvmloader";
|
||||
break;
|
||||
default:
|
||||
@ -206,7 +206,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
}
|
||||
|
||||
int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
|
||||
@@ -544,10 +634,6 @@ int libxl__build_hvm(libxl__gc *gc, uint
|
||||
@@ -548,10 +638,6 @@ int libxl__build_hvm(libxl__gc *gc, uint
|
||||
libxl_ctx *ctx = libxl__gc_owner(gc);
|
||||
struct xc_hvm_build_args args = {};
|
||||
int ret, rc = ERROR_FAIL;
|
||||
@ -217,7 +217,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
|
||||
memset(&args, 0, sizeof(struct xc_hvm_build_args));
|
||||
/* The params from the configuration file are in Mb, which are then
|
||||
@@ -557,22 +643,34 @@ int libxl__build_hvm(libxl__gc *gc, uint
|
||||
@@ -561,22 +647,34 @@ int libxl__build_hvm(libxl__gc *gc, uint
|
||||
*/
|
||||
args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
|
||||
args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) << 10;
|
||||
@ -256,7 +256,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
@@ -634,7 +732,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
@@ -638,7 +736,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
|
||||
memcpy(&count, ptr, sizeof(count));
|
||||
ptr += sizeof(count);
|
||||
@ -265,7 +265,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
if (size < sizeof(version) + sizeof(count) +
|
||||
count * (sizeof(struct libxl__physmap_info))) {
|
||||
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "wrong size");
|
||||
@@ -809,7 +907,7 @@ static void switch_logdirty_xswatch(libx
|
||||
@@ -852,7 +950,7 @@ static void switch_logdirty_xswatch(libx
|
||||
rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
|
||||
if (rc) goto out;
|
||||
|
||||
@ -274,7 +274,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
if (!rc) break;
|
||||
if (rc<0) goto out;
|
||||
}
|
||||
@@ -1281,7 +1379,7 @@ void libxl__xc_domain_save_done(libxl__e
|
||||
@@ -1324,7 +1422,7 @@ void libxl__xc_domain_save_done(libxl__e
|
||||
if (type == LIBXL_DOMAIN_TYPE_HVM) {
|
||||
rc = libxl__domain_suspend_device_model(gc, dss);
|
||||
if (rc) goto out;
|
||||
@ -283,10 +283,10 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
libxl__domain_save_device_model(egc, dss, domain_suspend_done);
|
||||
return;
|
||||
}
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_types.idl
|
||||
Index: xen-4.2.2-testing/tools/libxl/libxl_types.idl
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_types.idl
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_types.idl
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/libxl_types.idl
|
||||
+++ xen-4.2.2-testing/tools/libxl/libxl_types.idl
|
||||
@@ -301,6 +301,8 @@ libxl_domain_build_info = Struct("domain
|
||||
("vpt_align", libxl_defbool),
|
||||
("timer_mode", libxl_timer_mode),
|
||||
@ -296,10 +296,10 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_types.idl
|
||||
("nographic", libxl_defbool),
|
||||
("vga", libxl_vga_interface_info),
|
||||
("vnc", libxl_vnc_info),
|
||||
Index: xen-4.2.1-testing/tools/libxl/xl_cmdimpl.c
|
||||
Index: xen-4.2.2-testing/tools/libxl/xl_cmdimpl.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/xl_cmdimpl.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/xl_cmdimpl.c
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/xl_cmdimpl.c
|
||||
+++ xen-4.2.2-testing/tools/libxl/xl_cmdimpl.c
|
||||
@@ -863,6 +863,11 @@ static void parse_config_data(const char
|
||||
}
|
||||
|
||||
|
@ -9,11 +9,11 @@ Signed-off-by: Ross Philipson <ross.philipson@citrix.com>
|
||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
Committed-by: Ian Campbell <ian.campbell@citrix.com>
|
||||
|
||||
Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
Index: xen-4.2.2-testing/tools/libxl/libxl_dom.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/libxl/libxl_dom.c
|
||||
+++ xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
@@ -31,8 +31,7 @@ libxl_domain_type libxl__domain_type(lib
|
||||
--- xen-4.2.2-testing.orig/tools/libxl/libxl_dom.c
|
||||
+++ xen-4.2.2-testing/tools/libxl/libxl_dom.c
|
||||
@@ -32,8 +32,7 @@ libxl_domain_type libxl__domain_type(lib
|
||||
|
||||
ret = xc_domain_getinfolist(ctx->xch, domid, 1, &info);
|
||||
if (ret != 1 || info.domain != domid) {
|
||||
@ -23,7 +23,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return LIBXL_DOMAIN_TYPE_INVALID;
|
||||
}
|
||||
if (info.flags & XEN_DOMINF_hvm_guest)
|
||||
@@ -313,20 +312,19 @@ int libxl__build_post(libxl__gc *gc, uin
|
||||
@@ -317,20 +316,19 @@ int libxl__build_post(libxl__gc *gc, uin
|
||||
|
||||
ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
|
||||
ents[0] = "memory/static-max";
|
||||
@ -51,7 +51,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
|
||||
? "online" : "offline";
|
||||
}
|
||||
@@ -335,7 +333,7 @@ int libxl__build_post(libxl__gc *gc, uin
|
||||
@@ -339,7 +337,7 @@ int libxl__build_post(libxl__gc *gc, uin
|
||||
if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
|
||||
hvm_ents = libxl__calloc(gc, 3, sizeof(char *));
|
||||
hvm_ents[0] = "hvmloader/generation-id-address";
|
||||
@ -60,7 +60,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
}
|
||||
|
||||
dom_path = libxl__xs_get_dompath(gc, domid);
|
||||
@@ -343,7 +341,7 @@ int libxl__build_post(libxl__gc *gc, uin
|
||||
@@ -347,7 +345,7 @@ int libxl__build_post(libxl__gc *gc, uin
|
||||
return ERROR_FAIL;
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
retry_transaction:
|
||||
t = xs_transaction_start(ctx->xsh);
|
||||
|
||||
@@ -374,7 +372,7 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
@@ -378,7 +376,7 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
|
||||
dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
|
||||
if (!dom) {
|
||||
@ -78,7 +78,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return ERROR_FAIL;
|
||||
}
|
||||
|
||||
@@ -384,13 +382,13 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
@@ -388,13 +386,13 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
state->pv_kernel.data,
|
||||
state->pv_kernel.size);
|
||||
if ( ret != 0) {
|
||||
@ -94,7 +94,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@@ -398,12 +396,12 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
@@ -402,12 +400,12 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
|
||||
if (state->pv_ramdisk.mapped) {
|
||||
if ( (ret = xc_dom_ramdisk_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size)) != 0 ) {
|
||||
@ -109,7 +109,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@@ -416,31 +414,31 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
@@ -420,31 +418,31 @@ int libxl__build_pv(libxl__gc *gc, uint3
|
||||
dom->xenstore_domid = state->store_domid;
|
||||
|
||||
if ( (ret = xc_dom_boot_xen_init(dom, ctx->xch, domid)) != 0 ) {
|
||||
@ -148,7 +148,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -679,8 +677,7 @@ int libxl__qemu_traditional_cmd(libxl__g
|
||||
@@ -683,8 +681,7 @@ int libxl__qemu_traditional_cmd(libxl__g
|
||||
const char *cmd)
|
||||
{
|
||||
char *path = NULL;
|
||||
@ -158,7 +158,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return libxl__xs_write(gc, XBT_NULL, path, "%s", cmd);
|
||||
}
|
||||
|
||||
@@ -697,8 +694,7 @@ struct libxl__physmap_info {
|
||||
@@ -701,8 +698,7 @@ struct libxl__physmap_info {
|
||||
static inline char *restore_helper(libxl__gc *gc, uint32_t domid,
|
||||
uint64_t phys_offset, char *node)
|
||||
{
|
||||
@ -168,7 +168,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
domid, phys_offset, node);
|
||||
}
|
||||
|
||||
@@ -708,7 +704,6 @@ int libxl__toolstack_restore(uint32_t do
|
||||
@@ -712,7 +708,6 @@ int libxl__toolstack_restore(uint32_t do
|
||||
libxl__save_helper_state *shs = user;
|
||||
libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
|
||||
STATE_AO_GC(dcs->ao);
|
||||
@ -176,7 +176,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
int i, ret;
|
||||
const uint8_t *ptr = buf;
|
||||
uint32_t count = 0, version = 0;
|
||||
@@ -718,7 +713,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
@@ -722,7 +717,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
LOG(DEBUG,"domain=%"PRIu32" toolstack data size=%"PRIu32, domid, size);
|
||||
|
||||
if (size < sizeof(version) + sizeof(count)) {
|
||||
@ -185,7 +185,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -726,7 +721,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
@@ -730,7 +725,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
ptr += sizeof(version);
|
||||
|
||||
if (version != TOOLSTACK_SAVE_VERSION) {
|
||||
@ -194,7 +194,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -735,7 +730,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
@@ -739,7 +734,7 @@ int libxl__toolstack_restore(uint32_t do
|
||||
|
||||
if (size < sizeof(version) + sizeof(count) +
|
||||
count * (sizeof(struct libxl__physmap_info))) {
|
||||
@ -203,7 +203,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -945,15 +940,13 @@ static void switch_logdirty_done(libxl__
|
||||
@@ -988,15 +983,13 @@ static void switch_logdirty_done(libxl__
|
||||
int libxl__domain_suspend_device_model(libxl__gc *gc,
|
||||
libxl__domain_suspend_state *dss)
|
||||
{
|
||||
@ -220,7 +220,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
libxl__qemu_traditional_cmd(gc, domid, "save");
|
||||
libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
|
||||
break;
|
||||
@@ -1129,8 +1122,7 @@ int libxl__domain_suspend_common_callbac
|
||||
@@ -1172,8 +1165,7 @@ int libxl__domain_suspend_common_callbac
|
||||
static inline char *physmap_path(libxl__gc *gc, uint32_t domid,
|
||||
char *phys_offset, char *node)
|
||||
{
|
||||
@ -230,7 +230,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
domid, phys_offset, node);
|
||||
}
|
||||
|
||||
@@ -1147,7 +1139,7 @@ int libxl__toolstack_save(uint32_t domid
|
||||
@@ -1190,7 +1182,7 @@ int libxl__toolstack_save(uint32_t domid
|
||||
char **entries = NULL;
|
||||
struct libxl__physmap_info *pi;
|
||||
|
||||
@ -239,7 +239,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
"/local/domain/0/device-model/%d/physmap", domid), &num);
|
||||
count = num;
|
||||
|
||||
@@ -1288,7 +1280,7 @@ void libxl__domain_suspend(libxl__egc *e
|
||||
@@ -1331,7 +1323,7 @@ void libxl__domain_suspend(libxl__egc *e
|
||||
char *path;
|
||||
char *addr;
|
||||
|
||||
@ -248,7 +248,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
libxl__xs_get_dompath(gc, domid));
|
||||
addr = libxl__xs_read(gc, XBT_NULL, path);
|
||||
|
||||
@@ -1502,10 +1494,7 @@ static void domain_suspend_done(libxl__e
|
||||
@@ -1545,10 +1537,7 @@ static void domain_suspend_done(libxl__e
|
||||
|
||||
char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
|
||||
{
|
||||
@ -260,7 +260,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
}
|
||||
|
||||
static const char *userdata_path(libxl__gc *gc, uint32_t domid,
|
||||
@@ -1513,34 +1502,27 @@ static const char *userdata_path(libxl__
|
||||
@@ -1556,34 +1545,27 @@ static const char *userdata_path(libxl__
|
||||
const char *wh)
|
||||
{
|
||||
libxl_ctx *ctx = libxl__gc_owner(gc);
|
||||
@ -301,7 +301,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
return errno;
|
||||
}
|
||||
return 0;
|
||||
@@ -1548,7 +1530,6 @@ static int userdata_delete(libxl__gc *gc
|
||||
@@ -1591,7 +1573,6 @@ static int userdata_delete(libxl__gc *gc
|
||||
|
||||
void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
|
||||
{
|
||||
@ -309,7 +309,7 @@ Index: xen-4.2.1-testing/tools/libxl/libxl_dom.c
|
||||
const char *pattern;
|
||||
glob_t gl;
|
||||
int r, i;
|
||||
@@ -1564,7 +1545,7 @@ void libxl__userdata_destroyall(libxl__g
|
||||
@@ -1607,7 +1588,7 @@ void libxl__userdata_destroyall(libxl__g
|
||||
if (r == GLOB_NOMATCH)
|
||||
goto out;
|
||||
if (r)
|
||||
|
@ -18,7 +18,7 @@ Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vlapic.c
|
||||
+++ b/xen/arch/x86/hvm/vlapic.c
|
||||
@@ -1198,6 +1198,9 @@ static int lapic_load_regs(struct domain
|
||||
@@ -1194,6 +1194,9 @@ static int lapic_load_regs(struct domain
|
||||
if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
|
||||
return -EINVAL;
|
||||
|
||||
@ -43,7 +43,7 @@ Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
if (v->arch.hvm_vmx.eoi_exitmap_changed) {
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -1520,6 +1520,29 @@ static int vmx_virtual_intr_delivery_ena
|
||||
@@ -1523,6 +1523,29 @@ static int vmx_virtual_intr_delivery_ena
|
||||
return cpu_has_vmx_virtual_intr_delivery;
|
||||
}
|
||||
|
||||
@ -73,7 +73,7 @@ Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
static struct hvm_function_table __read_mostly vmx_function_table = {
|
||||
.name = "VMX",
|
||||
.cpu_up_prepare = vmx_cpu_up_prepare,
|
||||
@@ -1568,7 +1591,8 @@ static struct hvm_function_table __read_
|
||||
@@ -1571,7 +1594,8 @@ static struct hvm_function_table __read_
|
||||
.nhvm_intr_blocked = nvmx_intr_blocked,
|
||||
.nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
|
||||
.update_eoi_exit_bitmap = vmx_update_eoi_exit_bitmap,
|
||||
|
@ -18,8 +18,10 @@ Acked-by: Eddie Dong <eddie.dong@intel.com>
|
||||
Acked-by: Jun Nakajima <jun.nakajima@intel.com>
|
||||
Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmcs.c
|
||||
@@ -194,7 +194,8 @@ static int vmx_init_vmcs_config(void)
|
||||
*/
|
||||
if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
|
||||
@ -128,9 +130,11 @@ Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
}
|
||||
|
||||
/* I/O access bitmap. */
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -2009,18 +2009,63 @@ static void vmx_install_vlapic_mapping(s
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmx.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -2012,18 +2012,63 @@ static void vmx_install_vlapic_mapping(s
|
||||
|
||||
void vmx_vlapic_msr_changed(struct vcpu *v)
|
||||
{
|
||||
@ -198,8 +202,10 @@ Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
vmx_update_secondary_exec_control(v);
|
||||
vmx_vmcs_exit(v);
|
||||
}
|
||||
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/hvm/vmx/vmcs.h
|
||||
@@ -182,6 +182,7 @@ extern u32 vmx_vmentry_control;
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
|
||||
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
|
||||
@ -225,8 +231,10 @@ Committed-by: Jan Beulich <jbeulich@suse.com>
|
||||
int vmx_read_guest_msr(u32 msr, u64 *val);
|
||||
int vmx_write_guest_msr(u32 msr, u64 val);
|
||||
int vmx_add_guest_msr(u32 msr);
|
||||
--- a/xen/include/asm-x86/msr-index.h
|
||||
+++ b/xen/include/asm-x86/msr-index.h
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/msr-index.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/msr-index.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/msr-index.h
|
||||
@@ -295,7 +295,10 @@
|
||||
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
|
||||
#define MSR_IA32_APICBASE_MSR 0x800
|
||||
|
@ -1,25 +0,0 @@
|
||||
# HG changeset patch
|
||||
# User Jan Beulich <jbeulich@suse.com>
|
||||
# Date 1361176655 -3600
|
||||
# Node ID 57e67af5281a6b66cf71dfa812e4335930684fd6
|
||||
# Parent 45d59b822ed187c535b127679e32853b148ed411
|
||||
AMD IOMMU: don't BUG() when we don't have to
|
||||
|
||||
find_iommu_for_device() can easily return NULL instead, as all of its
|
||||
callers are prepared for that.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
@@ -32,8 +32,8 @@ struct amd_iommu *find_iommu_for_device(
|
||||
{
|
||||
struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
|
||||
|
||||
- BUG_ON ( bdf >= ivrs_bdf_entries );
|
||||
- return ivrs_mappings ? ivrs_mappings[bdf].iommu : NULL;
|
||||
+ return ivrs_mappings && bdf < ivrs_bdf_entries ? ivrs_mappings[bdf].iommu
|
||||
+ : NULL;
|
||||
}
|
||||
|
||||
/*
|
@ -1,50 +0,0 @@
|
||||
# Commit a15d87475ed95840dba693ab0a56d0b48a215cbc
|
||||
# Date 2013-02-21 15:16:20 +0000
|
||||
# Author Tim Deegan <tim@xen.org>
|
||||
# Committer Tim Deegan <tim@xen.org>
|
||||
x86/mm: Take the p2m lock even in shadow mode.
|
||||
|
||||
The reworking of p2m lookups to use get_gfn()/put_gfn() left the
|
||||
shadow code not taking the p2m lock, even in cases where the p2m would
|
||||
be updated (i.e. PoD).
|
||||
|
||||
In many cases, shadow code doesn't need the exclusion that
|
||||
get_gfn()/put_gfn() provides, as it has its own interlocks against p2m
|
||||
updates, but this is taking things too far, and can lead to crashes in
|
||||
the PoD code.
|
||||
|
||||
Now that most shadow-code p2m lookups are done with explicitly
|
||||
unlocked accessors, or with the get_page_from_gfn() accessor, which is
|
||||
often lock-free, we can just turn this locking on.
|
||||
|
||||
The remaining locked lookups are in sh_page_fault() (in a path that's
|
||||
almost always already serializing on the paging lock), and in
|
||||
emulate_map_dest() (which can probably be updated to use
|
||||
get_page_from_gfn()). They're not addressed here but may be in a
|
||||
follow-up patch.
|
||||
|
||||
Signed-off-by: Tim Deegan <tim@xen.org>
|
||||
Acked-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
|
||||
|
||||
--- a/xen/arch/x86/mm/p2m.c
|
||||
+++ b/xen/arch/x86/mm/p2m.c
|
||||
@@ -163,8 +163,7 @@ mfn_t __get_gfn_type_access(struct p2m_d
|
||||
return _mfn(gfn);
|
||||
}
|
||||
|
||||
- /* For now only perform locking on hap domains */
|
||||
- if ( locked && (hap_enabled(p2m->domain)) )
|
||||
+ if ( locked )
|
||||
/* Grab the lock here, don't release until put_gfn */
|
||||
gfn_lock(p2m, gfn, 0);
|
||||
|
||||
@@ -197,8 +196,7 @@ mfn_t __get_gfn_type_access(struct p2m_d
|
||||
|
||||
void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
|
||||
{
|
||||
- if ( !p2m || !paging_mode_translate(p2m->domain)
|
||||
- || !hap_enabled(p2m->domain) )
|
||||
+ if ( !p2m || !paging_mode_translate(p2m->domain) )
|
||||
/* Nothing to do in this case */
|
||||
return;
|
||||
|
@ -1,57 +0,0 @@
|
||||
# Commit 17281aea1a9a10f1ee165c6e6a2921a67b7b1df2
|
||||
# Date 2013-02-22 11:21:38 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86/nhvm: properly clean up after failure to set up all vCPU-s
|
||||
|
||||
Otherwise we may leak memory when setting up nHVM fails half way.
|
||||
|
||||
This implies that the individual destroy functions will have to remain
|
||||
capable (in the VMX case they first need to be made so, following
|
||||
26486:7648ef657fe7 and 26489:83a3fa9c8434) of being called for a vCPU
|
||||
that the corresponding init function was never run on.
|
||||
|
||||
Once at it, also remove a redundant check from the corresponding
|
||||
parameter validation code.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
Tested-by: Olaf Hering <olaf@aepfle.de>
|
||||
|
||||
--- a/xen/arch/x86/hvm/hvm.c
|
||||
+++ b/xen/arch/x86/hvm/hvm.c
|
||||
@@ -3941,18 +3941,20 @@ long do_hvm_op(unsigned long op, XEN_GUE
|
||||
#else
|
||||
if ( a.value > 1 )
|
||||
rc = -EINVAL;
|
||||
- if ( !is_hvm_domain(d) )
|
||||
- rc = -EINVAL;
|
||||
/* Remove the check below once we have
|
||||
* shadow-on-shadow.
|
||||
*/
|
||||
if ( cpu_has_svm && !paging_mode_hap(d) && a.value )
|
||||
rc = -EINVAL;
|
||||
/* Set up NHVM state for any vcpus that are already up */
|
||||
- if ( !d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM] )
|
||||
+ if ( a.value &&
|
||||
+ !d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM] )
|
||||
for_each_vcpu(d, v)
|
||||
if ( rc == 0 )
|
||||
rc = nestedhvm_vcpu_initialise(v);
|
||||
+ if ( !a.value || rc )
|
||||
+ for_each_vcpu(d, v)
|
||||
+ nestedhvm_vcpu_destroy(v);
|
||||
#endif
|
||||
break;
|
||||
case HVM_PARAM_BUFIOREQ_EVTCHN:
|
||||
--- a/xen/arch/x86/hvm/nestedhvm.c
|
||||
+++ b/xen/arch/x86/hvm/nestedhvm.c
|
||||
@@ -88,7 +88,7 @@ nestedhvm_vcpu_initialise(struct vcpu *v
|
||||
void
|
||||
nestedhvm_vcpu_destroy(struct vcpu *v)
|
||||
{
|
||||
- if ( nestedhvm_enabled(v->domain) && hvm_funcs.nhvm_vcpu_destroy )
|
||||
+ if ( hvm_funcs.nhvm_vcpu_destroy )
|
||||
hvm_funcs.nhvm_vcpu_destroy(v);
|
||||
}
|
||||
|
@ -1,158 +0,0 @@
|
||||
# Commit 992fdf6f46252a459c6b1b8d971b2c71f01460f8
|
||||
# Date 2013-02-22 11:56:54 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
honor ACPI v4 FADT flags
|
||||
|
||||
- force use of physical APIC mode if indicated so (as we don't support
|
||||
xAPIC cluster mode, the respective flag is taken to force physical
|
||||
mode too)
|
||||
- don't use MSI if indicated so (implies no IOMMU)
|
||||
|
||||
Both can be overridden on the command line, for the MSI case this at
|
||||
once adds a new command line option allowing to turn off PCI MSI (IOMMU
|
||||
and HPET are unaffected by this).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/docs/misc/xen-command-line.markdown
|
||||
+++ b/docs/misc/xen-command-line.markdown
|
||||
@@ -602,6 +602,13 @@ limit is ignored by Xen.
|
||||
|
||||
Specify if the MMConfig space should be enabled.
|
||||
|
||||
+### msi
|
||||
+> `= <boolean>`
|
||||
+
|
||||
+> Default: `true`
|
||||
+
|
||||
+Force Xen to (not) use PCI-MSI, even if ACPI FADT says otherwise.
|
||||
+
|
||||
### nmi
|
||||
> `= ignore | dom0 | fatal`
|
||||
|
||||
--- a/xen/arch/x86/genapic/bigsmp.c
|
||||
+++ b/xen/arch/x86/genapic/bigsmp.c
|
||||
@@ -40,7 +40,14 @@ static struct dmi_system_id __initdata b
|
||||
|
||||
static __init int probe_bigsmp(void)
|
||||
{
|
||||
- if (!def_to_bigsmp)
|
||||
+ /*
|
||||
+ * We don't implement cluster mode, so force use of
|
||||
+ * physical mode in both cases.
|
||||
+ */
|
||||
+ if (acpi_gbl_FADT.flags &
|
||||
+ (ACPI_FADT_APIC_CLUSTER | ACPI_FADT_APIC_PHYSICAL))
|
||||
+ def_to_bigsmp = 1;
|
||||
+ else if (!def_to_bigsmp)
|
||||
dmi_check_system(bigsmp_dmi_table);
|
||||
return def_to_bigsmp;
|
||||
}
|
||||
--- a/xen/arch/x86/genapic/x2apic.c
|
||||
+++ b/xen/arch/x86/genapic/x2apic.c
|
||||
@@ -29,9 +29,6 @@
|
||||
#include <xen/smp.h>
|
||||
#include <asm/mach-default/mach_mpparse.h>
|
||||
|
||||
-static bool_t __initdata x2apic_phys; /* By default we use logical cluster mode. */
|
||||
-boolean_param("x2apic_phys", x2apic_phys);
|
||||
-
|
||||
static void init_apic_ldr_x2apic_phys(void)
|
||||
{
|
||||
}
|
||||
@@ -121,8 +118,14 @@ static const struct genapic apic_x2apic_
|
||||
.send_IPI_self = send_IPI_self_x2apic
|
||||
};
|
||||
|
||||
+static s8 __initdata x2apic_phys = -1; /* By default we use logical cluster mode. */
|
||||
+boolean_param("x2apic_phys", x2apic_phys);
|
||||
+
|
||||
const struct genapic *__init apic_x2apic_probe(void)
|
||||
{
|
||||
+ if ( x2apic_phys < 0 )
|
||||
+ x2apic_phys = !!(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL);
|
||||
+
|
||||
return x2apic_phys ? &apic_x2apic_phys : &apic_x2apic_cluster;
|
||||
}
|
||||
|
||||
--- a/xen/arch/x86/msi.c
|
||||
+++ b/xen/arch/x86/msi.c
|
||||
@@ -31,6 +31,9 @@
|
||||
#include <xen/iommu.h>
|
||||
#include <xsm/xsm.h>
|
||||
|
||||
+static s8 __read_mostly use_msi = -1;
|
||||
+boolean_param("msi", use_msi);
|
||||
+
|
||||
/* bitmap indicate which fixed map is free */
|
||||
DEFINE_SPINLOCK(msix_fixmap_lock);
|
||||
DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
|
||||
@@ -958,6 +961,9 @@ int pci_enable_msi(struct msi_info *msi,
|
||||
{
|
||||
ASSERT(spin_is_locked(&pcidevs_lock));
|
||||
|
||||
+ if ( !use_msi )
|
||||
+ return -EPERM;
|
||||
+
|
||||
return msi->table_base ? __pci_enable_msix(msi, desc) :
|
||||
__pci_enable_msi(msi, desc);
|
||||
}
|
||||
@@ -1003,7 +1009,10 @@ int pci_restore_msi_state(struct pci_dev
|
||||
|
||||
ASSERT(spin_is_locked(&pcidevs_lock));
|
||||
|
||||
- if (!pdev)
|
||||
+ if ( !use_msi )
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ if ( !pdev )
|
||||
return -EINVAL;
|
||||
|
||||
ret = xsm_resource_setup_pci((pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn);
|
||||
@@ -1062,7 +1071,7 @@ unsigned int pci_msix_get_table_len(stru
|
||||
func = PCI_FUNC(pdev->devfn);
|
||||
|
||||
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
|
||||
- if ( !pos )
|
||||
+ if ( !pos || !use_msi )
|
||||
return 0;
|
||||
|
||||
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
|
||||
@@ -1135,6 +1144,9 @@ static struct keyhandler dump_msi_keyhan
|
||||
|
||||
static int __init msi_setup_keyhandler(void)
|
||||
{
|
||||
+ if ( use_msi < 0 )
|
||||
+ use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
|
||||
+
|
||||
register_keyhandler('M', &dump_msi_keyhandler);
|
||||
return 0;
|
||||
}
|
||||
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
@@ -1066,5 +1066,8 @@ int __init amd_iommu_get_ivrs_dev_entrie
|
||||
|
||||
int __init amd_iommu_update_ivrs_mapping_acpi(void)
|
||||
{
|
||||
+ if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
|
||||
+ return -EPERM;
|
||||
+
|
||||
return acpi_table_parse(ACPI_SIG_IVRS, parse_ivrs_table);
|
||||
}
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -2119,6 +2119,12 @@ int __init intel_vtd_setup(void)
|
||||
if ( list_empty(&acpi_drhd_units) )
|
||||
return -ENODEV;
|
||||
|
||||
+ if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
|
||||
+ {
|
||||
+ ret = -EPERM;
|
||||
+ goto error;
|
||||
+ }
|
||||
+
|
||||
platform_quirks_init();
|
||||
|
||||
/* We enable the following features only if they are supported by all VT-d
|
@ -1,22 +0,0 @@
|
||||
# Commit c40e24a8ef74f9d0ee59dd9b8ca890be08b0b874
|
||||
# Date 2013-02-25 12:44:25 +0100
|
||||
# Author Xi Wang <xi@mit.edu>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86: fix null pointer dereference in intel_get_extended_msrs()
|
||||
|
||||
`memset(&mc_ext, 0, ...)' leads to a buffer overflow and a subsequent
|
||||
null pointer dereference. Replace `&mc_ext' with `mc_ext'.
|
||||
|
||||
Signed-off-by: Xi Wang <xi@mit.edu>
|
||||
|
||||
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
|
||||
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
|
||||
@@ -534,7 +534,7 @@ intel_get_extended_msrs(struct mcinfo_gl
|
||||
}
|
||||
|
||||
/* this function will called when CAP(9).MCG_EXT_P = 1 */
|
||||
- memset(&mc_ext, 0, sizeof(struct mcinfo_extended));
|
||||
+ memset(mc_ext, 0, sizeof(*mc_ext));
|
||||
mc_ext->common.type = MC_TYPE_EXTENDED;
|
||||
mc_ext->common.size = sizeof(struct mcinfo_extended);
|
||||
|
@ -1,73 +0,0 @@
|
||||
# Commit 0f8adcb2a7183bea5063f6fffba7d7e1aa14fc84
|
||||
# Date 2013-02-26 10:14:53 +0100
|
||||
# Author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
IOMMU, AMD Family15h Model10-1Fh erratum 746 Workaround
|
||||
|
||||
The IOMMU may stop processing page translations due to a perceived lack
|
||||
of credits for writing upstream peripheral page service request (PPR)
|
||||
or event logs. If the L2B miscellaneous clock gating feature is enabled
|
||||
the IOMMU does not properly register credits after the log request has
|
||||
completed, leading to a potential system hang.
|
||||
|
||||
BIOSes are supposed to disable L2B micellaneous clock gating by setting
|
||||
L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b. This
|
||||
patch corrects that for those which do not enable this workaround.
|
||||
|
||||
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_init.c
|
||||
@@ -795,6 +795,42 @@ static int __init set_iommu_interrupt_ha
|
||||
return irq;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
|
||||
+ * Workaround:
|
||||
+ * BIOS should disable L2B micellaneous clock gating by setting
|
||||
+ * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
|
||||
+ */
|
||||
+static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
|
||||
+{
|
||||
+ u32 value;
|
||||
+ u8 bus = PCI_BUS(iommu->bdf);
|
||||
+ u8 dev = PCI_SLOT(iommu->bdf);
|
||||
+ u8 func = PCI_FUNC(iommu->bdf);
|
||||
+
|
||||
+ if ( (boot_cpu_data.x86 != 0x15) ||
|
||||
+ (boot_cpu_data.x86_model < 0x10) ||
|
||||
+ (boot_cpu_data.x86_model > 0x1f) )
|
||||
+ return;
|
||||
+
|
||||
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf0, 0x90);
|
||||
+ value = pci_conf_read32(iommu->seg, bus, dev, func, 0xf4);
|
||||
+
|
||||
+ if ( value & (1 << 2) )
|
||||
+ return;
|
||||
+
|
||||
+ /* Select NB indirect register 0x90 and enable writing */
|
||||
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf0, 0x90 | (1 << 8));
|
||||
+
|
||||
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf4, value | (1 << 2));
|
||||
+ printk(XENLOG_INFO
|
||||
+ "AMD-Vi: Applying erratum 746 workaround for IOMMU at %04x:%02x:%02x.%u\n",
|
||||
+ iommu->seg, bus, dev, func);
|
||||
+
|
||||
+ /* Clear the enable writing bit */
|
||||
+ pci_conf_write32(iommu->seg, bus, dev, func, 0xf0, 0x90);
|
||||
+}
|
||||
+
|
||||
static void enable_iommu(struct amd_iommu *iommu)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -807,6 +843,8 @@ static void enable_iommu(struct amd_iomm
|
||||
return;
|
||||
}
|
||||
|
||||
+ amd_iommu_erratum_746_workaround(iommu);
|
||||
+
|
||||
register_iommu_dev_table_in_mmio_space(iommu);
|
||||
register_iommu_cmd_buffer_in_mmio_space(iommu);
|
||||
register_iommu_event_log_in_mmio_space(iommu);
|
@ -1,128 +0,0 @@
|
||||
# Commit 2f8c55ccefe49bb526df0eaf5fa9b7b788422208
|
||||
# Date 2013-02-26 10:15:56 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86: fix CMCI injection
|
||||
|
||||
This fixes the wrong use of literal vector 0xF7 with an "int"
|
||||
instruction (invalidated by 25113:14609be41f36) and the fact that doing
|
||||
the injection via a software interrupt was never valid anyway (because
|
||||
cmci_interrupt() acks the LAPIC, which does the wrong thing if the
|
||||
interrupt didn't get delivered though it).
|
||||
|
||||
In order to do latter, the patch introduces send_IPI_self(), at once
|
||||
removing two opend coded uses of "genapic" in the IRQ handling code.
|
||||
|
||||
Reported-by: Yongjie Ren <yongjie.ren@intel.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Tested-by: Yongjie Ren <yongjie.ren@intel.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/cpu/mcheck/mce.c
|
||||
+++ b/xen/arch/x86/cpu/mcheck/mce.c
|
||||
@@ -30,6 +30,7 @@ bool_t __read_mostly mce_broadcast = 0;
|
||||
bool_t is_mc_panic;
|
||||
unsigned int __read_mostly nr_mce_banks;
|
||||
unsigned int __read_mostly firstbank;
|
||||
+uint8_t __read_mostly cmci_apic_vector;
|
||||
|
||||
static void intpose_init(void);
|
||||
static void mcinfo_clear(struct mc_info *);
|
||||
@@ -1277,12 +1278,6 @@ static void x86_mc_mceinject(void *data)
|
||||
__asm__ __volatile__("int $0x12");
|
||||
}
|
||||
|
||||
-static void x86_cmci_inject(void *data)
|
||||
-{
|
||||
- printk("Simulating CMCI on cpu %d\n", smp_processor_id());
|
||||
- __asm__ __volatile__("int $0xf7");
|
||||
-}
|
||||
-
|
||||
#if BITS_PER_LONG == 64
|
||||
|
||||
#define ID2COOKIE(id) ((mctelem_cookie_t)(id))
|
||||
@@ -1568,11 +1563,15 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
|
||||
on_selected_cpus(cpumap, x86_mc_mceinject, NULL, 1);
|
||||
break;
|
||||
case XEN_MC_INJECT_TYPE_CMCI:
|
||||
- if ( !cmci_support )
|
||||
+ if ( !cmci_apic_vector )
|
||||
ret = x86_mcerr(
|
||||
"No CMCI supported in platform\n", -EINVAL);
|
||||
else
|
||||
- on_selected_cpus(cpumap, x86_cmci_inject, NULL, 1);
|
||||
+ {
|
||||
+ if ( cpumask_test_cpu(smp_processor_id(), cpumap) )
|
||||
+ send_IPI_self(cmci_apic_vector);
|
||||
+ send_IPI_mask(cpumap, cmci_apic_vector);
|
||||
+ }
|
||||
break;
|
||||
default:
|
||||
ret = x86_mcerr("Wrong mca type\n", -EINVAL);
|
||||
--- a/xen/arch/x86/cpu/mcheck/mce.h
|
||||
+++ b/xen/arch/x86/cpu/mcheck/mce.h
|
||||
@@ -38,6 +38,8 @@ enum mcheck_type {
|
||||
mcheck_intel
|
||||
};
|
||||
|
||||
+extern uint8_t cmci_apic_vector;
|
||||
+
|
||||
/* Init functions */
|
||||
enum mcheck_type amd_k7_mcheck_init(struct cpuinfo_x86 *c);
|
||||
enum mcheck_type amd_k8_mcheck_init(struct cpuinfo_x86 *c);
|
||||
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
|
||||
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
|
||||
@@ -1164,7 +1164,6 @@ static void intel_init_cmci(struct cpuin
|
||||
{
|
||||
u32 l, apic;
|
||||
int cpu = smp_processor_id();
|
||||
- static uint8_t cmci_apic_vector;
|
||||
|
||||
if (!mce_available(c) || !cmci_support) {
|
||||
if (opt_cpu_info)
|
||||
--- a/xen/arch/x86/irq.c
|
||||
+++ b/xen/arch/x86/irq.c
|
||||
@@ -646,7 +646,7 @@ void irq_move_cleanup_interrupt(struct c
|
||||
* to myself.
|
||||
*/
|
||||
if (irr & (1 << (vector % 32))) {
|
||||
- genapic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
|
||||
+ send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
|
||||
TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY,
|
||||
irq, vector, smp_processor_id());
|
||||
goto unlock;
|
||||
@@ -692,7 +692,7 @@ static void send_cleanup_vector(struct i
|
||||
|
||||
cpumask_and(&cleanup_mask, desc->arch.old_cpu_mask, &cpu_online_map);
|
||||
desc->arch.move_cleanup_count = cpumask_weight(&cleanup_mask);
|
||||
- genapic->send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
||||
+ send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
||||
|
||||
desc->arch.move_in_progress = 0;
|
||||
}
|
||||
--- a/xen/arch/x86/smp.c
|
||||
+++ b/xen/arch/x86/smp.c
|
||||
@@ -43,6 +43,11 @@ void send_IPI_mask(const cpumask_t *mask
|
||||
genapic->send_IPI_mask(mask, vector);
|
||||
}
|
||||
|
||||
+void send_IPI_self(int vector)
|
||||
+{
|
||||
+ genapic->send_IPI_self(vector);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Some notes on x86 processor bugs affecting SMP operation:
|
||||
*
|
||||
--- a/xen/include/asm-x86/smp.h
|
||||
+++ b/xen/include/asm-x86/smp.h
|
||||
@@ -29,7 +29,8 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_
|
||||
|
||||
void smp_send_nmi_allbutself(void);
|
||||
|
||||
-void send_IPI_mask(const cpumask_t *mask, int vector);
|
||||
+void send_IPI_mask(const cpumask_t *, int vector);
|
||||
+void send_IPI_self(int vector);
|
||||
|
||||
extern void (*mtrr_hook) (void);
|
||||
|
@ -1,107 +0,0 @@
|
||||
# Commit 7dd3b06ff031c9a8c727df16c5def2afb382101c
|
||||
# Date 2013-02-28 14:00:18 +0000
|
||||
# Author Tim Deegan <tim@xen.org>
|
||||
# Committer Tim Deegan <tim@xen.org>
|
||||
vmx: fix handling of NMI VMEXIT.
|
||||
|
||||
Call do_nmi() directly and explicitly re-enable NMIs rather than
|
||||
raising an NMI through the APIC. Since NMIs are disabled after the
|
||||
VMEXIT, the raised NMI would be blocked until the next IRET
|
||||
instruction (i.e. the next real interrupt, or after scheduling a PV
|
||||
guest) and in the meantime the guest will spin taking NMI VMEXITS.
|
||||
|
||||
Also, handle NMIs before re-enabling interrupts, since if we handle an
|
||||
interrupt (and therefore IRET) before calling do_nmi(), we may end up
|
||||
running the NMI handler with NMIs enabled.
|
||||
|
||||
Signed-off-by: Tim Deegan <tim@xen.org>
|
||||
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
Acked-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/vmx/vmx.c
|
||||
+++ b/xen/arch/x86/hvm/vmx/vmx.c
|
||||
@@ -2421,6 +2421,13 @@ void vmx_vmexit_handler(struct cpu_user_
|
||||
vector = intr_info & INTR_INFO_VECTOR_MASK;
|
||||
if ( vector == TRAP_machine_check )
|
||||
do_machine_check(regs);
|
||||
+ if ( vector == TRAP_nmi
|
||||
+ && ((intr_info & INTR_INFO_INTR_TYPE_MASK) ==
|
||||
+ (X86_EVENTTYPE_NMI << 8)) )
|
||||
+ {
|
||||
+ do_nmi(regs);
|
||||
+ enable_nmis();
|
||||
+ }
|
||||
break;
|
||||
case EXIT_REASON_MCE_DURING_VMENTRY:
|
||||
do_machine_check(regs);
|
||||
@@ -2594,7 +2601,7 @@ void vmx_vmexit_handler(struct cpu_user_
|
||||
(X86_EVENTTYPE_NMI << 8) )
|
||||
goto exit_and_crash;
|
||||
HVMTRACE_0D(NMI);
|
||||
- self_nmi(); /* Real NMI, vector 2: normal processing. */
|
||||
+ /* Already handled above. */
|
||||
break;
|
||||
case TRAP_machine_check:
|
||||
HVMTRACE_0D(MCE);
|
||||
--- a/xen/arch/x86/x86_32/entry.S
|
||||
+++ b/xen/arch/x86/x86_32/entry.S
|
||||
@@ -621,6 +621,14 @@ ENTRY(machine_check)
|
||||
pushl $TRAP_machine_check<<16
|
||||
jmp handle_nmi_mce
|
||||
|
||||
+/* Enable NMIs. No special register assumptions. All registers are preserved. */
|
||||
+ENTRY(enable_nmis)
|
||||
+ /* Set up stack frame */
|
||||
+ pushf # EFLAGS
|
||||
+ push %cs # CS
|
||||
+ push $.Lret # EIP
|
||||
+ iret # Disable the hardware NMI latch
|
||||
+
|
||||
ENTRY(setup_vm86_frame)
|
||||
mov %ecx,%ds
|
||||
mov %ecx,%es
|
||||
@@ -634,7 +642,7 @@ ENTRY(setup_vm86_frame)
|
||||
.endm
|
||||
copy_vm86_words
|
||||
addl $16,%esp
|
||||
- ret
|
||||
+.Lret: ret
|
||||
|
||||
.section .rodata, "a", @progbits
|
||||
|
||||
--- a/xen/arch/x86/x86_64/entry.S
|
||||
+++ b/xen/arch/x86/x86_64/entry.S
|
||||
@@ -643,6 +643,22 @@ ENTRY(machine_check)
|
||||
movl $TRAP_machine_check,4(%rsp)
|
||||
jmp handle_ist_exception
|
||||
|
||||
+/* Enable NMIs. No special register assumptions. Only %rax is not preserved. */
|
||||
+ENTRY(enable_nmis)
|
||||
+ movq %rsp, %rax /* Grab RSP before pushing */
|
||||
+
|
||||
+ /* Set up stack frame */
|
||||
+ pushq $0 /* SS */
|
||||
+ pushq %rax /* RSP */
|
||||
+ pushfq /* RFLAGS */
|
||||
+ pushq $__HYPERVISOR_CS /* CS */
|
||||
+ leaq 1f(%rip),%rax
|
||||
+ pushq %rax /* RIP */
|
||||
+
|
||||
+ iretq /* Disable the hardware NMI latch */
|
||||
+1:
|
||||
+ retq
|
||||
+
|
||||
.section .rodata, "a", @progbits
|
||||
|
||||
ENTRY(exception_table)
|
||||
--- a/xen/include/asm-x86/processor.h
|
||||
+++ b/xen/include/asm-x86/processor.h
|
||||
@@ -584,6 +584,8 @@ DECLARE_TRAP_HANDLER(alignment_check);
|
||||
DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
|
||||
#undef DECLARE_TRAP_HANDLER
|
||||
|
||||
+void enable_nmis(void);
|
||||
+
|
||||
void syscall_enter(void);
|
||||
void sysenter_entry(void);
|
||||
void sysenter_eflags_saved(void);
|
@ -1,80 +0,0 @@
|
||||
# Commit 482300def7d08e773ccd2a0d978bcb9469fdd810
|
||||
# Date 2013-02-28 14:56:45 +0000
|
||||
# Author Juergen Gross <juergen.gross@ts.fujitsu.com>
|
||||
# Committer Keir Fraser <keir@xen.org>
|
||||
Avoid stale pointer when moving domain to another cpupool
|
||||
|
||||
When a domain is moved to another cpupool the scheduler private data pointers
|
||||
in vcpu and domain structures must never point to an already freed memory
|
||||
area.
|
||||
|
||||
While at it, simplify sched_init_vcpu() by using DOM2OP instead VCPU2OP.
|
||||
|
||||
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
|
||||
|
||||
--- a/xen/common/schedule.c
|
||||
+++ b/xen/common/schedule.c
|
||||
@@ -220,7 +220,7 @@ int sched_init_vcpu(struct vcpu *v, unsi
|
||||
if ( v->sched_priv == NULL )
|
||||
return 1;
|
||||
|
||||
- SCHED_OP(VCPU2OP(v), insert_vcpu, v);
|
||||
+ SCHED_OP(DOM2OP(d), insert_vcpu, v);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -231,6 +231,9 @@ int sched_move_domain(struct domain *d,
|
||||
unsigned int new_p;
|
||||
void **vcpu_priv;
|
||||
void *domdata;
|
||||
+ void *vcpudata;
|
||||
+ struct scheduler *old_ops;
|
||||
+ void *old_domdata;
|
||||
|
||||
domdata = SCHED_OP(c->sched, alloc_domdata, d);
|
||||
if ( domdata == NULL )
|
||||
@@ -261,21 +264,22 @@ int sched_move_domain(struct domain *d,
|
||||
|
||||
domain_pause(d);
|
||||
|
||||
+ old_ops = DOM2OP(d);
|
||||
+ old_domdata = d->sched_priv;
|
||||
+
|
||||
for_each_vcpu ( d, v )
|
||||
{
|
||||
- SCHED_OP(VCPU2OP(v), remove_vcpu, v);
|
||||
- SCHED_OP(VCPU2OP(v), free_vdata, v->sched_priv);
|
||||
- v->sched_priv = NULL;
|
||||
+ SCHED_OP(old_ops, remove_vcpu, v);
|
||||
}
|
||||
|
||||
- SCHED_OP(DOM2OP(d), free_domdata, d->sched_priv);
|
||||
-
|
||||
d->cpupool = c;
|
||||
d->sched_priv = domdata;
|
||||
|
||||
new_p = cpumask_first(c->cpu_valid);
|
||||
for_each_vcpu ( d, v )
|
||||
{
|
||||
+ vcpudata = v->sched_priv;
|
||||
+
|
||||
migrate_timer(&v->periodic_timer, new_p);
|
||||
migrate_timer(&v->singleshot_timer, new_p);
|
||||
migrate_timer(&v->poll_timer, new_p);
|
||||
@@ -288,12 +292,16 @@ int sched_move_domain(struct domain *d,
|
||||
new_p = cpumask_cycle(new_p, c->cpu_valid);
|
||||
|
||||
SCHED_OP(c->sched, insert_vcpu, v);
|
||||
+
|
||||
+ SCHED_OP(old_ops, free_vdata, vcpudata);
|
||||
}
|
||||
|
||||
domain_update_node_affinity(d);
|
||||
|
||||
domain_unpause(d);
|
||||
|
||||
+ SCHED_OP(old_ops, free_domdata, old_domdata);
|
||||
+
|
||||
xfree(vcpu_priv);
|
||||
|
||||
return 0;
|
@ -1,24 +0,0 @@
|
||||
# Commit 53decd322157e922cac2988e07da6d39538c8033
|
||||
# Date 2013-03-01 16:59:49 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
fix compat memory exchange op splitting
|
||||
|
||||
A shift with a negative count was erroneously used here, yielding
|
||||
undefined behavior.
|
||||
|
||||
Reported-by: Xi Wang <xi@mit.edu>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/common/compat/memory.c
|
||||
+++ b/xen/common/compat/memory.c
|
||||
@@ -172,7 +172,7 @@ int compat_memory_op(unsigned int cmd, X
|
||||
if ( order_delta >= 0 )
|
||||
nat.xchg->out.nr_extents = end_extent >> order_delta;
|
||||
else
|
||||
- nat.xchg->out.nr_extents = end_extent << order_delta;
|
||||
+ nat.xchg->out.nr_extents = end_extent << -order_delta;
|
||||
++split;
|
||||
}
|
||||
|
@ -1,78 +0,0 @@
|
||||
# Commit 7ffc9779aa5120c5098d938cb88f69a1dda9a0fe
|
||||
# Date 2013-03-04 10:16:04 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86: make certain memory sub-ops return valid values
|
||||
|
||||
When a domain's shared info field "max_pfn" is zero,
|
||||
domain_get_maximum_gpfn() so far returned ULONG_MAX, which
|
||||
do_memory_op() in turn converted to -1 (i.e. -EPERM). Make the former
|
||||
always return a sensible number (i.e. zero if the field was zero) and
|
||||
have the latter no longer truncate return values.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -437,7 +437,7 @@ unsigned long domain_get_maximum_gpfn(st
|
||||
if ( is_hvm_domain(d) )
|
||||
return p2m_get_hostp2m(d)->max_mapped_pfn;
|
||||
/* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */
|
||||
- return arch_get_max_pfn(d) - 1;
|
||||
+ return (arch_get_max_pfn(d) ?: 1) - 1;
|
||||
}
|
||||
|
||||
void share_xen_page_with_guest(
|
||||
--- a/xen/common/compat/memory.c
|
||||
+++ b/xen/common/compat/memory.c
|
||||
@@ -15,7 +15,8 @@ CHECK_TYPE(domid);
|
||||
|
||||
int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat)
|
||||
{
|
||||
- int rc, split, op = cmd & MEMOP_CMD_MASK;
|
||||
+ int split, op = cmd & MEMOP_CMD_MASK;
|
||||
+ long rc;
|
||||
unsigned int start_extent = cmd >> MEMOP_EXTENT_SHIFT;
|
||||
|
||||
do
|
||||
@@ -204,7 +205,7 @@ int compat_memory_op(unsigned int cmd, X
|
||||
|
||||
rc = do_memory_op(cmd, nat.hnd);
|
||||
if ( rc < 0 )
|
||||
- return rc;
|
||||
+ break;
|
||||
|
||||
cmd = 0;
|
||||
if ( hypercall_xlat_continuation(&cmd, 0x02, nat.hnd, compat) )
|
||||
@@ -318,5 +319,11 @@ int compat_memory_op(unsigned int cmd, X
|
||||
__HYPERVISOR_memory_op, "ih", cmd, compat);
|
||||
} while ( split > 0 );
|
||||
|
||||
+ if ( unlikely(rc > INT_MAX) )
|
||||
+ return INT_MAX;
|
||||
+
|
||||
+ if ( unlikely(rc < INT_MIN) )
|
||||
+ return INT_MIN;
|
||||
+
|
||||
return rc;
|
||||
}
|
||||
--- a/xen/common/memory.c
|
||||
+++ b/xen/common/memory.c
|
||||
@@ -532,14 +532,13 @@ static long memory_exchange(XEN_GUEST_HA
|
||||
long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
|
||||
{
|
||||
struct domain *d;
|
||||
- int rc, op;
|
||||
+ long rc;
|
||||
unsigned int address_bits;
|
||||
unsigned long start_extent;
|
||||
struct xen_memory_reservation reservation;
|
||||
struct memop_args args;
|
||||
domid_t domid;
|
||||
-
|
||||
- op = cmd & MEMOP_CMD_MASK;
|
||||
+ int op = cmd & MEMOP_CMD_MASK;
|
||||
|
||||
switch ( op )
|
||||
{
|
@ -1,58 +0,0 @@
|
||||
# Commit e6a6fd63652814e5c36a0016c082032f798ced1f
|
||||
# Date 2013-03-04 10:17:52 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
SEDF: avoid gathering vCPU-s on pCPU0
|
||||
|
||||
The introduction of vcpu_force_reschedule() in 14320:215b799fa181 was
|
||||
incompatible with the SEDF scheduler: Any vCPU using
|
||||
VCPUOP_stop_periodic_timer (e.g. any vCPU of half way modern PV Linux
|
||||
guests) ends up on pCPU0 after that call. Obviously, running all PV
|
||||
guests' (and namely Dom0's) vCPU-s on pCPU0 causes problems for those
|
||||
guests rather sooner than later.
|
||||
|
||||
So the main thing that was clearly wrong (and bogus from the beginning)
|
||||
was the use of cpumask_first() in sedf_pick_cpu(). It is being replaced
|
||||
by a construct that prefers to put back the vCPU on the pCPU that it
|
||||
got launched on.
|
||||
|
||||
However, there's one more glitch: When reducing the affinity of a vCPU
|
||||
temporarily, and then widening it again to a set that includes the pCPU
|
||||
that the vCPU was last running on, the generic scheduler code would not
|
||||
force a migration of that vCPU, and hence it would forever stay on the
|
||||
pCPU it last ran on. Since that can again create a load imbalance, the
|
||||
SEDF scheduler wants a migration to happen regardless of it being
|
||||
apparently unnecessary.
|
||||
|
||||
Of course, an alternative to checking for SEDF explicitly in
|
||||
vcpu_set_affinity() would be to introduce a flags field in struct
|
||||
scheduler, and have SEDF set a "always-migrate-on-affinity-change"
|
||||
flag.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/common/sched_sedf.c
|
||||
+++ b/xen/common/sched_sedf.c
|
||||
@@ -396,7 +396,8 @@ static int sedf_pick_cpu(const struct sc
|
||||
|
||||
online = cpupool_scheduler_cpumask(v->domain->cpupool);
|
||||
cpumask_and(&online_affinity, v->cpu_affinity, online);
|
||||
- return cpumask_first(&online_affinity);
|
||||
+ return cpumask_cycle(v->vcpu_id % cpumask_weight(&online_affinity) - 1,
|
||||
+ &online_affinity);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/xen/common/schedule.c
|
||||
+++ b/xen/common/schedule.c
|
||||
@@ -611,7 +611,8 @@ int vcpu_set_affinity(struct vcpu *v, co
|
||||
vcpu_schedule_lock_irq(v);
|
||||
|
||||
cpumask_copy(v->cpu_affinity, affinity);
|
||||
- if ( !cpumask_test_cpu(v->processor, v->cpu_affinity) )
|
||||
+ if ( VCPU2OP(v)->sched_id == XEN_SCHEDULER_SEDF ||
|
||||
+ !cpumask_test_cpu(v->processor, v->cpu_affinity) )
|
||||
set_bit(_VPF_migrating, &v->pause_flags);
|
||||
|
||||
vcpu_schedule_unlock_irq(v);
|
@ -1,134 +0,0 @@
|
||||
# Commit d463b005bbd6475ed930a302821efe239e1b2cf9
|
||||
# Date 2013-03-04 10:19:34 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86: defer processing events on the NMI exit path
|
||||
|
||||
Otherwise, we may end up in the scheduler, keeping NMIs masked for a
|
||||
possibly unbounded period of time (until whenever the next IRET gets
|
||||
executed). Enforce timely event processing by sending a self IPI.
|
||||
|
||||
Of course it's open for discussion whether to always use the straight
|
||||
exit path from handle_ist_exception.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/x86_32/entry.S
|
||||
+++ b/xen/arch/x86/x86_32/entry.S
|
||||
@@ -60,6 +60,7 @@
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/page.h>
|
||||
#include <public/xen.h>
|
||||
+#include <irq_vectors.h>
|
||||
|
||||
ALIGN
|
||||
restore_all_guest:
|
||||
@@ -561,6 +562,8 @@ ENTRY(early_page_fault)
|
||||
jmp restore_all_xen
|
||||
.popsection
|
||||
|
||||
+ENTRY(nmi)
|
||||
+ pushl $TRAP_nmi<<16
|
||||
handle_nmi_mce:
|
||||
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
|
||||
# NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
|
||||
@@ -581,7 +584,24 @@ handle_nmi_mce:
|
||||
* cases we have put guest DS/ES on the guest stack frame, which will
|
||||
* be detected by SAVE_ALL(), or we have rolled back restore_guest.
|
||||
*/
|
||||
- jmp ret_from_intr
|
||||
+ cmpb $TRAP_nmi,UREGS_entry_vector(%esp)
|
||||
+ jne ret_from_intr
|
||||
+ /* We want to get straight to the IRET on the NMI exit path. */
|
||||
+ GET_CURRENT(%ebx)
|
||||
+ movl UREGS_eflags(%esp),%eax
|
||||
+ movb UREGS_cs(%esp),%al
|
||||
+ testl $(3|X86_EFLAGS_VM),%eax
|
||||
+ jz restore_all_xen
|
||||
+ /* Send an IPI to ourselves to cover for the lack of event checking. */
|
||||
+ movl VCPU_processor(%ebx),%eax
|
||||
+ shll $IRQSTAT_shift,%eax
|
||||
+ cmpl $0,irq_stat(%eax)
|
||||
+ je restore_all_guest
|
||||
+ pushl $EVENT_CHECK_VECTOR
|
||||
+ call send_IPI_self
|
||||
+ addl $4,%esp
|
||||
+ jmp restore_all_guest
|
||||
+
|
||||
.Lnmi_mce_xen:
|
||||
/* Check the outer (guest) context for %ds/%es state validity. */
|
||||
GET_CPUINFO_FIELD(CPUINFO_guest_cpu_user_regs,%ebx)
|
||||
@@ -613,10 +633,6 @@ handle_nmi_mce:
|
||||
jmp .Lnmi_mce_common
|
||||
#endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
|
||||
|
||||
-ENTRY(nmi)
|
||||
- pushl $TRAP_nmi<<16
|
||||
- jmp handle_nmi_mce
|
||||
-
|
||||
ENTRY(machine_check)
|
||||
pushl $TRAP_machine_check<<16
|
||||
jmp handle_nmi_mce
|
||||
--- a/xen/arch/x86/x86_64/compat/entry.S
|
||||
+++ b/xen/arch/x86/x86_64/compat/entry.S
|
||||
@@ -171,7 +171,7 @@ compat_bad_hypercall:
|
||||
jmp compat_test_all_events
|
||||
|
||||
/* %rbx: struct vcpu, interrupts disabled */
|
||||
-compat_restore_all_guest:
|
||||
+ENTRY(compat_restore_all_guest)
|
||||
ASSERT_INTERRUPTS_DISABLED
|
||||
RESTORE_ALL
|
||||
addq $8,%rsp
|
||||
--- a/xen/arch/x86/x86_64/entry.S
|
||||
+++ b/xen/arch/x86/x86_64/entry.S
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/page.h>
|
||||
#include <public/xen.h>
|
||||
+#include <irq_vectors.h>
|
||||
|
||||
ALIGN
|
||||
/* %rbx: struct vcpu */
|
||||
@@ -617,6 +618,9 @@ ENTRY(early_page_fault)
|
||||
jmp restore_all_xen
|
||||
.popsection
|
||||
|
||||
+ENTRY(nmi)
|
||||
+ pushq $0
|
||||
+ movl $TRAP_nmi,4(%rsp)
|
||||
handle_ist_exception:
|
||||
SAVE_ALL
|
||||
testb $3,UREGS_cs(%rsp)
|
||||
@@ -631,12 +635,25 @@ handle_ist_exception:
|
||||
movl UREGS_entry_vector(%rsp),%eax
|
||||
leaq exception_table(%rip),%rdx
|
||||
callq *(%rdx,%rax,8)
|
||||
- jmp ret_from_intr
|
||||
+ cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
|
||||
+ jne ret_from_intr
|
||||
|
||||
-ENTRY(nmi)
|
||||
- pushq $0
|
||||
- movl $TRAP_nmi,4(%rsp)
|
||||
- jmp handle_ist_exception
|
||||
+ /* We want to get straight to the IRET on the NMI exit path. */
|
||||
+ testb $3,UREGS_cs(%rsp)
|
||||
+ jz restore_all_xen
|
||||
+ GET_CURRENT(%rbx)
|
||||
+ /* Send an IPI to ourselves to cover for the lack of event checking. */
|
||||
+ movl VCPU_processor(%rbx),%eax
|
||||
+ shll $IRQSTAT_shift,%eax
|
||||
+ leaq irq_stat(%rip),%rcx
|
||||
+ cmpl $0,(%rcx,%rax,1)
|
||||
+ je 1f
|
||||
+ movl $EVENT_CHECK_VECTOR,%edi
|
||||
+ call send_IPI_self
|
||||
+1: movq VCPU_domain(%rbx),%rax
|
||||
+ cmpb $0,DOMAIN_is_32bit_pv(%rax)
|
||||
+ je restore_all_guest
|
||||
+ jmp compat_restore_all_guest
|
||||
|
||||
ENTRY(machine_check)
|
||||
pushq $0
|
@ -1,113 +0,0 @@
|
||||
# Commit be6507509454adf3bb5a50b9406c88504e996d5a
|
||||
# Date 2013-03-04 13:37:39 +0100
|
||||
# Author George Dunlap <george.dunlap@eu.citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
credit1: Use atomic bit operations for the flags structure
|
||||
|
||||
The flags structure is not protected by locks (or more precisely,
|
||||
it is protected using an inconsistent set of locks); we therefore need
|
||||
to make sure that all accesses are atomic-safe. This is particulary
|
||||
important in the case of the PARKED flag, which if clobbered while
|
||||
changing the YIELD bit will leave a vcpu wedged in an offline state.
|
||||
|
||||
Using the atomic bitops also requires us to change the size of the "flags"
|
||||
element.
|
||||
|
||||
Spotted-by: Igor Pavlikevich <ipavlikevich@gmail.com>
|
||||
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
|
||||
|
||||
--- a/xen/common/sched_credit.c
|
||||
+++ b/xen/common/sched_credit.c
|
||||
@@ -58,8 +58,8 @@
|
||||
/*
|
||||
* Flags
|
||||
*/
|
||||
-#define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */
|
||||
-#define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */
|
||||
+#define CSCHED_FLAG_VCPU_PARKED 0x0 /* VCPU over capped credits */
|
||||
+#define CSCHED_FLAG_VCPU_YIELD 0x1 /* VCPU yielding */
|
||||
|
||||
|
||||
/*
|
||||
@@ -132,7 +132,7 @@ struct csched_vcpu {
|
||||
struct vcpu *vcpu;
|
||||
atomic_t credit;
|
||||
s_time_t start_time; /* When we were scheduled (used for credit) */
|
||||
- uint16_t flags;
|
||||
+ unsigned flags;
|
||||
int16_t pri;
|
||||
#ifdef CSCHED_STATS
|
||||
struct {
|
||||
@@ -214,7 +214,7 @@ __runq_insert(unsigned int cpu, struct c
|
||||
/* If the vcpu yielded, try to put it behind one lower-priority
|
||||
* runnable vcpu if we can. The next runq_sort will bring it forward
|
||||
* within 30ms if the queue too long. */
|
||||
- if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
|
||||
+ if ( test_bit(CSCHED_FLAG_VCPU_YIELD, &svc->flags)
|
||||
&& __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
|
||||
{
|
||||
iter=iter->next;
|
||||
@@ -776,7 +776,7 @@ csched_vcpu_wake(const struct scheduler
|
||||
* those.
|
||||
*/
|
||||
if ( svc->pri == CSCHED_PRI_TS_UNDER &&
|
||||
- !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
|
||||
+ !test_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
|
||||
{
|
||||
svc->pri = CSCHED_PRI_TS_BOOST;
|
||||
}
|
||||
@@ -789,12 +789,12 @@ csched_vcpu_wake(const struct scheduler
|
||||
static void
|
||||
csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc)
|
||||
{
|
||||
- struct csched_vcpu * const sv = CSCHED_VCPU(vc);
|
||||
+ struct csched_vcpu * const svc = CSCHED_VCPU(vc);
|
||||
|
||||
if ( !sched_credit_default_yield )
|
||||
{
|
||||
/* Let the scheduler know that this vcpu is trying to yield */
|
||||
- sv->flags |= CSCHED_FLAG_VCPU_YIELD;
|
||||
+ set_bit(CSCHED_FLAG_VCPU_YIELD, &svc->flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1122,11 +1122,10 @@ csched_acct(void* dummy)
|
||||
/* Park running VCPUs of capped-out domains */
|
||||
if ( sdom->cap != 0U &&
|
||||
credit < -credit_cap &&
|
||||
- !(svc->flags & CSCHED_FLAG_VCPU_PARKED) )
|
||||
+ !test_and_set_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
|
||||
{
|
||||
CSCHED_STAT_CRANK(vcpu_park);
|
||||
vcpu_pause_nosync(svc->vcpu);
|
||||
- svc->flags |= CSCHED_FLAG_VCPU_PARKED;
|
||||
}
|
||||
|
||||
/* Lower bound on credits */
|
||||
@@ -1142,7 +1141,7 @@ csched_acct(void* dummy)
|
||||
svc->pri = CSCHED_PRI_TS_UNDER;
|
||||
|
||||
/* Unpark any capped domains whose credits go positive */
|
||||
- if ( svc->flags & CSCHED_FLAG_VCPU_PARKED)
|
||||
+ if ( test_and_clear_bit(CSCHED_FLAG_VCPU_PARKED, &svc->flags) )
|
||||
{
|
||||
/*
|
||||
* It's important to unset the flag AFTER the unpause()
|
||||
@@ -1151,7 +1150,6 @@ csched_acct(void* dummy)
|
||||
*/
|
||||
CSCHED_STAT_CRANK(vcpu_unpark);
|
||||
vcpu_unpause(svc->vcpu);
|
||||
- svc->flags &= ~CSCHED_FLAG_VCPU_PARKED;
|
||||
}
|
||||
|
||||
/* Upper bound on credits means VCPU stops earning */
|
||||
@@ -1410,8 +1408,7 @@ csched_schedule(
|
||||
/*
|
||||
* Clear YIELD flag before scheduling out
|
||||
*/
|
||||
- if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD )
|
||||
- scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD);
|
||||
+ clear_bit(CSCHED_FLAG_VCPU_YIELD, &scurr->flags);
|
||||
|
||||
/*
|
||||
* SMP Load balance:
|
@ -1,36 +0,0 @@
|
||||
# Commit d9fb28ae6d41c8201482948660e52889481830dd
|
||||
# Date 2013-03-04 13:42:17 +0100
|
||||
# Author Olaf Hering <olaf@aepfle.de>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
xentrace: fix off-by-one in calculate_tbuf_size
|
||||
|
||||
Commit "xentrace: reduce trace buffer size to something mfn_offset can
|
||||
reach" contains an off-by-one bug. max_mfn_offset needs to be reduced by
|
||||
exactly the value of t_info_first_offset.
|
||||
|
||||
If the system has two cpus and the number of requested trace pages is
|
||||
very large, the final number of trace pages + the offset will not fit
|
||||
into a short. As a result the variable offset in alloc_trace_bufs() will
|
||||
wrap while allocating buffers for the second cpu. Later
|
||||
share_xen_page_with_privileged_guests() will be called with a wrong page
|
||||
and the ASSERT in this function triggers. If the ASSERT is ignored by
|
||||
running a non-dbg hypervisor the asserts in xentrace itself trigger
|
||||
because "cons" is not aligned because the very last trace page for the
|
||||
second cpu is a random mfn.
|
||||
|
||||
Thanks to Jan for the quick analysis.
|
||||
|
||||
Signed-off-by: Olaf Hering <olaf@aepfle.de>
|
||||
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
|
||||
|
||||
--- a/xen/common/trace.c
|
||||
+++ b/xen/common/trace.c
|
||||
@@ -133,7 +133,7 @@ static int calculate_tbuf_size(unsigned
|
||||
* The array of mfns for the highest cpu can start at the maximum value
|
||||
* mfn_offset can hold. So reduce the number of cpus and also the mfn_offset.
|
||||
*/
|
||||
- max_mfn_offset -= t_info_first_offset - 1;
|
||||
+ max_mfn_offset -= t_info_first_offset;
|
||||
max_cpus--;
|
||||
if ( max_cpus )
|
||||
max_mfn_offset /= max_cpus;
|
@ -1,25 +0,0 @@
|
||||
# Commit 9581c4f9a55372a21e759cd449cb676d0e8feddb
|
||||
# Date 2013-03-06 17:10:26 +0100
|
||||
# Author Matthew Daley <mattjd@gmail.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
fix domain unlocking in some xsm error paths
|
||||
|
||||
A couple of xsm error/access-denied code paths in hypercalls neglect to
|
||||
unlock a previously locked domain. Fix by ensuring the domains are
|
||||
unlocked correctly.
|
||||
|
||||
Signed-off-by: Matthew Daley <mattjd@gmail.com>
|
||||
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/common/grant_table.c
|
||||
+++ b/xen/common/grant_table.c
|
||||
@@ -2262,7 +2262,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
|
||||
rc = xsm_grant_setup(current->domain, d);
|
||||
if ( rc ) {
|
||||
op.status = GNTST_permission_denied;
|
||||
- goto out1;
|
||||
+ goto out2;
|
||||
}
|
||||
|
||||
gt = d->grant_table;
|
@ -1,369 +0,0 @@
|
||||
# Commit 4245d331e0e75de8d1bddbbb518f3a8ce6d0bb7e
|
||||
# Date 2013-03-08 14:05:34 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86/MSI: add mechanism to fully protect MSI-X table from PV guest accesses
|
||||
|
||||
This adds two new physdev operations for Dom0 to invoke when resource
|
||||
allocation for devices is known to be complete, so that the hypervisor
|
||||
can arrange for the respective MMIO ranges to be marked read-only
|
||||
before an eventual guest getting such a device assigned even gets
|
||||
started, such that it won't be able to set up writable mappings for
|
||||
these MMIO ranges before Xen has a chance to protect them.
|
||||
|
||||
This also addresses another issue with the code being modified here,
|
||||
in that so far write protection for the address ranges in question got
|
||||
set up only once during the lifetime of a device (i.e. until either
|
||||
system shutdown or device hot removal), while teardown happened when
|
||||
the last interrupt was disposed of by the guest (which at least allowed
|
||||
the tables to be writable when the device got assigned to a second
|
||||
guest [instance] after the first terminated).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/msi.c
|
||||
+++ b/xen/arch/x86/msi.c
|
||||
@@ -649,8 +649,8 @@ static u64 read_pci_mem_bar(u16 seg, u8
|
||||
* @entries: pointer to an array of struct msix_entry entries
|
||||
* @nvec: number of @entries
|
||||
*
|
||||
- * Setup the MSI-X capability structure of device function with a
|
||||
- * single MSI-X irq. A return of zero indicates the successful setup of
|
||||
+ * Setup the MSI-X capability structure of device function with the requested
|
||||
+ * number MSI-X irqs. A return of zero indicates the successful setup of
|
||||
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
|
||||
**/
|
||||
static int msix_capability_init(struct pci_dev *dev,
|
||||
@@ -658,86 +658,69 @@ static int msix_capability_init(struct p
|
||||
struct msi_desc **desc,
|
||||
unsigned int nr_entries)
|
||||
{
|
||||
- struct msi_desc *entry;
|
||||
- int pos;
|
||||
+ struct msi_desc *entry = NULL;
|
||||
+ int pos, vf;
|
||||
u16 control;
|
||||
- u64 table_paddr, entry_paddr;
|
||||
- u32 table_offset, entry_offset;
|
||||
- u8 bir;
|
||||
- void __iomem *base;
|
||||
- int idx;
|
||||
+ u64 table_paddr;
|
||||
+ u32 table_offset;
|
||||
+ u8 bir, pbus, pslot, pfunc;
|
||||
u16 seg = dev->seg;
|
||||
u8 bus = dev->bus;
|
||||
u8 slot = PCI_SLOT(dev->devfn);
|
||||
u8 func = PCI_FUNC(dev->devfn);
|
||||
|
||||
ASSERT(spin_is_locked(&pcidevs_lock));
|
||||
- ASSERT(desc);
|
||||
|
||||
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
|
||||
control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
|
||||
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
|
||||
|
||||
- /* MSI-X Table Initialization */
|
||||
- entry = alloc_msi_entry();
|
||||
- if ( !entry )
|
||||
- return -ENOMEM;
|
||||
+ if ( desc )
|
||||
+ {
|
||||
+ entry = alloc_msi_entry();
|
||||
+ if ( !entry )
|
||||
+ return -ENOMEM;
|
||||
+ ASSERT(msi);
|
||||
+ }
|
||||
|
||||
- /* Request & Map MSI-X table region */
|
||||
+ /* Locate MSI-X table region */
|
||||
table_offset = pci_conf_read32(seg, bus, slot, func,
|
||||
msix_table_offset_reg(pos));
|
||||
bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
|
||||
table_offset &= ~PCI_MSIX_BIRMASK;
|
||||
- entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
|
||||
|
||||
- table_paddr = msi->table_base + table_offset;
|
||||
- entry_paddr = table_paddr + entry_offset;
|
||||
- idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
|
||||
- if ( idx < 0 )
|
||||
- {
|
||||
- xfree(entry);
|
||||
- return idx;
|
||||
- }
|
||||
- base = (void *)(fix_to_virt(idx) +
|
||||
- ((unsigned long)entry_paddr & ((1UL << PAGE_SHIFT) - 1)));
|
||||
-
|
||||
- entry->msi_attrib.type = PCI_CAP_ID_MSIX;
|
||||
- entry->msi_attrib.is_64 = 1;
|
||||
- entry->msi_attrib.entry_nr = msi->entry_nr;
|
||||
- entry->msi_attrib.maskbit = 1;
|
||||
- entry->msi_attrib.masked = 1;
|
||||
- entry->msi_attrib.pos = pos;
|
||||
- entry->irq = msi->irq;
|
||||
- entry->dev = dev;
|
||||
- entry->mask_base = base;
|
||||
-
|
||||
- list_add_tail(&entry->list, &dev->msi_list);
|
||||
-
|
||||
- if ( !dev->msix_nr_entries )
|
||||
+ if ( !dev->info.is_virtfn )
|
||||
{
|
||||
- u8 pbus, pslot, pfunc;
|
||||
- int vf;
|
||||
- u64 pba_paddr;
|
||||
- u32 pba_offset;
|
||||
+ pbus = bus;
|
||||
+ pslot = slot;
|
||||
+ pfunc = func;
|
||||
+ vf = -1;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ pbus = dev->info.physfn.bus;
|
||||
+ pslot = PCI_SLOT(dev->info.physfn.devfn);
|
||||
+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
|
||||
+ vf = PCI_BDF2(dev->bus, dev->devfn);
|
||||
+ }
|
||||
|
||||
- if ( !dev->info.is_virtfn )
|
||||
- {
|
||||
- pbus = bus;
|
||||
- pslot = slot;
|
||||
- pfunc = func;
|
||||
- vf = -1;
|
||||
- }
|
||||
- else
|
||||
+ table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
|
||||
+ WARN_ON(msi && msi->table_base != table_paddr);
|
||||
+ if ( !table_paddr )
|
||||
+ {
|
||||
+ if ( !msi || !msi->table_base )
|
||||
{
|
||||
- pbus = dev->info.physfn.bus;
|
||||
- pslot = PCI_SLOT(dev->info.physfn.devfn);
|
||||
- pfunc = PCI_FUNC(dev->info.physfn.devfn);
|
||||
- vf = PCI_BDF2(dev->bus, dev->devfn);
|
||||
+ xfree(entry);
|
||||
+ return -ENXIO;
|
||||
}
|
||||
+ table_paddr = msi->table_base;
|
||||
+ }
|
||||
+ table_paddr += table_offset;
|
||||
|
||||
- ASSERT(!dev->msix_used_entries);
|
||||
- WARN_ON(msi->table_base !=
|
||||
- read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf));
|
||||
+ if ( !dev->msix_used_entries )
|
||||
+ {
|
||||
+ u64 pba_paddr;
|
||||
+ u32 pba_offset;
|
||||
|
||||
dev->msix_nr_entries = nr_entries;
|
||||
dev->msix_table.first = PFN_DOWN(table_paddr);
|
||||
@@ -758,7 +741,42 @@ static int msix_capability_init(struct p
|
||||
BITS_TO_LONGS(nr_entries) - 1);
|
||||
WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev->msix_pba.first,
|
||||
dev->msix_pba.last));
|
||||
+ }
|
||||
+
|
||||
+ if ( entry )
|
||||
+ {
|
||||
+ /* Map MSI-X table region */
|
||||
+ u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
|
||||
+ int idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
|
||||
+ void __iomem *base;
|
||||
+
|
||||
+ if ( idx < 0 )
|
||||
+ {
|
||||
+ xfree(entry);
|
||||
+ return idx;
|
||||
+ }
|
||||
+ base = (void *)(fix_to_virt(idx) +
|
||||
+ ((unsigned long)entry_paddr & (PAGE_SIZE - 1)));
|
||||
|
||||
+ /* Mask interrupt here */
|
||||
+ writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
||||
+
|
||||
+ entry->msi_attrib.type = PCI_CAP_ID_MSIX;
|
||||
+ entry->msi_attrib.is_64 = 1;
|
||||
+ entry->msi_attrib.entry_nr = msi->entry_nr;
|
||||
+ entry->msi_attrib.maskbit = 1;
|
||||
+ entry->msi_attrib.masked = 1;
|
||||
+ entry->msi_attrib.pos = pos;
|
||||
+ entry->irq = msi->irq;
|
||||
+ entry->dev = dev;
|
||||
+ entry->mask_base = base;
|
||||
+
|
||||
+ list_add_tail(&entry->list, &dev->msi_list);
|
||||
+ *desc = entry;
|
||||
+ }
|
||||
+
|
||||
+ if ( !dev->msix_used_entries )
|
||||
+ {
|
||||
if ( rangeset_add_range(mmio_ro_ranges, dev->msix_table.first,
|
||||
dev->msix_table.last) )
|
||||
WARN();
|
||||
@@ -769,7 +787,7 @@ static int msix_capability_init(struct p
|
||||
if ( dev->domain )
|
||||
p2m_change_entry_type_global(dev->domain,
|
||||
p2m_mmio_direct, p2m_mmio_direct);
|
||||
- if ( !dev->domain || !paging_mode_translate(dev->domain) )
|
||||
+ if ( desc && (!dev->domain || !paging_mode_translate(dev->domain)) )
|
||||
{
|
||||
struct domain *d = dev->domain;
|
||||
|
||||
@@ -783,6 +801,13 @@ static int msix_capability_init(struct p
|
||||
break;
|
||||
if ( d )
|
||||
{
|
||||
+ if ( !IS_PRIV(d) && dev->msix_warned != d->domain_id )
|
||||
+ {
|
||||
+ dev->msix_warned = d->domain_id;
|
||||
+ printk(XENLOG_ERR
|
||||
+ "Potentially insecure use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
|
||||
+ seg, bus, slot, func, d->domain_id);
|
||||
+ }
|
||||
/* XXX How to deal with existing mappings? */
|
||||
}
|
||||
}
|
||||
@@ -791,10 +816,6 @@ static int msix_capability_init(struct p
|
||||
WARN_ON(dev->msix_table.first != (table_paddr >> PAGE_SHIFT));
|
||||
++dev->msix_used_entries;
|
||||
|
||||
- /* Mask interrupt here */
|
||||
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
|
||||
-
|
||||
- *desc = entry;
|
||||
/* Restore MSI-X enabled bits */
|
||||
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
|
||||
|
||||
@@ -919,6 +940,19 @@ static int __pci_enable_msix(struct msi_
|
||||
return status;
|
||||
}
|
||||
|
||||
+static void _pci_cleanup_msix(struct pci_dev *dev)
|
||||
+{
|
||||
+ if ( !--dev->msix_used_entries )
|
||||
+ {
|
||||
+ if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,
|
||||
+ dev->msix_table.last) )
|
||||
+ WARN();
|
||||
+ if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,
|
||||
+ dev->msix_pba.last) )
|
||||
+ WARN();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void __pci_disable_msix(struct msi_desc *entry)
|
||||
{
|
||||
struct pci_dev *dev;
|
||||
@@ -942,15 +976,45 @@ static void __pci_disable_msix(struct ms
|
||||
|
||||
pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
|
||||
|
||||
- if ( !--dev->msix_used_entries )
|
||||
+ _pci_cleanup_msix(dev);
|
||||
+}
|
||||
+
|
||||
+int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off)
|
||||
+{
|
||||
+ int rc;
|
||||
+ struct pci_dev *pdev;
|
||||
+ u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
|
||||
+ unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
|
||||
+ PCI_CAP_ID_MSIX);
|
||||
+
|
||||
+ if ( !use_msi )
|
||||
+ return 0;
|
||||
+
|
||||
+ if ( !pos )
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ spin_lock(&pcidevs_lock);
|
||||
+ pdev = pci_get_pdev(seg, bus, devfn);
|
||||
+ if ( !pdev )
|
||||
+ rc = -ENODEV;
|
||||
+ else if ( pdev->msix_used_entries != !!off )
|
||||
+ rc = -EBUSY;
|
||||
+ else if ( off )
|
||||
{
|
||||
- if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,
|
||||
- dev->msix_table.last) )
|
||||
- WARN();
|
||||
- if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,
|
||||
- dev->msix_pba.last) )
|
||||
- WARN();
|
||||
+ _pci_cleanup_msix(pdev);
|
||||
+ rc = 0;
|
||||
}
|
||||
+ else
|
||||
+ {
|
||||
+ u16 control = pci_conf_read16(seg, bus, slot, func,
|
||||
+ msix_control_reg(pos));
|
||||
+
|
||||
+ rc = msix_capability_init(pdev, NULL, NULL,
|
||||
+ multi_msix_capable(control));
|
||||
+ }
|
||||
+ spin_unlock(&pcidevs_lock);
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/xen/arch/x86/physdev.c
|
||||
+++ b/xen/arch/x86/physdev.c
|
||||
@@ -609,6 +609,18 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
|
||||
break;
|
||||
}
|
||||
|
||||
+ case PHYSDEVOP_prepare_msix:
|
||||
+ case PHYSDEVOP_release_msix: {
|
||||
+ struct physdev_pci_device dev;
|
||||
+
|
||||
+ if ( copy_from_guest(&dev, arg, 1) )
|
||||
+ ret = -EFAULT;
|
||||
+ else
|
||||
+ ret = pci_prepare_msix(dev.seg, dev.bus, dev.devfn,
|
||||
+ cmd != PHYSDEVOP_prepare_msix);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
#ifdef __x86_64__
|
||||
case PHYSDEVOP_pci_mmcfg_reserved: {
|
||||
struct physdev_pci_mmcfg_reserved info;
|
||||
--- a/xen/include/asm-x86/msi.h
|
||||
+++ b/xen/include/asm-x86/msi.h
|
||||
@@ -80,6 +80,7 @@ struct msi_desc;
|
||||
/* Helper functions */
|
||||
extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
|
||||
extern void pci_disable_msi(struct msi_desc *desc);
|
||||
+extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
|
||||
extern void pci_cleanup_msi(struct pci_dev *pdev);
|
||||
extern void setup_msi_handler(struct irq_desc *, struct msi_desc *);
|
||||
extern void setup_msi_irq(struct irq_desc *);
|
||||
--- a/xen/include/public/physdev.h
|
||||
+++ b/xen/include/public/physdev.h
|
||||
@@ -303,6 +303,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi
|
||||
|
||||
#define PHYSDEVOP_pci_device_remove 26
|
||||
#define PHYSDEVOP_restore_msi_ext 27
|
||||
+/*
|
||||
+ * Dom0 should use these two to announce MMIO resources assigned to
|
||||
+ * MSI-X capable devices won't (prepare) or may (release) change.
|
||||
+ */
|
||||
+#define PHYSDEVOP_prepare_msix 30
|
||||
+#define PHYSDEVOP_release_msix 31
|
||||
struct physdev_pci_device {
|
||||
/* IN */
|
||||
uint16_t seg;
|
||||
--- a/xen/include/xen/pci.h
|
||||
+++ b/xen/include/xen/pci.h
|
||||
@@ -57,6 +57,7 @@ struct pci_dev {
|
||||
int msix_table_refcnt[MAX_MSIX_TABLE_PAGES];
|
||||
int msix_table_idx[MAX_MSIX_TABLE_PAGES];
|
||||
spinlock_t msix_table_lock;
|
||||
+ domid_t msix_warned;
|
||||
|
||||
struct domain *domain;
|
||||
const u16 seg;
|
@ -1,139 +0,0 @@
|
||||
# Commit 1d80765b504b34b63a42a63aff4291e07e29f0c5
|
||||
# Date 2013-03-12 15:34:22 +0100
|
||||
# Author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
powernow: add fixups for AMD P-state figures
|
||||
|
||||
In the Linux kernel, these two git commits:
|
||||
|
||||
- f594065faf4f9067c2283a34619fc0714e79a98d
|
||||
ACPI: Add fixups for AMD P-state figures
|
||||
- 9855d8ce41a7801548a05d844db2f46c3e810166
|
||||
ACPI: Check MSR valid bit before using P-state frequencies
|
||||
|
||||
Try to fix the the issue that "some AMD systems may round the
|
||||
frequencies in ACPI tables to 100MHz boundaries. We can obtain the real
|
||||
frequencies from MSRs, so add a quirk to fix these frequencies up
|
||||
on AMD systems." (from f594065..)
|
||||
|
||||
In discussion (around 9855d8..) "it turned out that indeed real
|
||||
HW/BIOSes may choose to not set the valid bit and thus mark the
|
||||
P-state as invalid. So this could be considered a fix for broken
|
||||
BIOSes." (from 9855d8..)
|
||||
|
||||
which is great for Linux. Unfortunatly the Linux kernel, when
|
||||
it tries to do the RDMSR under Xen it fails to get the right
|
||||
value (it gets zero) as Xen traps it and returns zero. Hence
|
||||
when dom0 uploads the P-states they will be unmodified and
|
||||
we should take care of updating the frequencies with the right
|
||||
values.
|
||||
|
||||
I've tested it under Dell Inc. PowerEdge T105 /0RR825, BIOS 1.3.2
|
||||
08/20/2008 where this quirk can be observed (x86 == 0x10, model == 2).
|
||||
Also on other AMD (x86 == 0x12, A8-3850; x86 = 0x14, AMD E-350) to
|
||||
make sure the quirk is not applied there.
|
||||
|
||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Acked-by: stefan.bader@canonical.com
|
||||
|
||||
Do the MSR access here (and while at it, also the one reading
|
||||
MSR_PSTATE_CUR_LIMIT) on the target CPU, and bound the loop over
|
||||
amd_fixup_frequency() by max_hw_pstate (matching the one in
|
||||
powernow_cpufreq_cpu_init()).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/acpi/cpufreq/powernow.c
|
||||
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c
|
||||
@@ -159,6 +159,51 @@ static int powernow_cpufreq_target(struc
|
||||
return result;
|
||||
}
|
||||
|
||||
+static void amd_fixup_frequency(struct xen_processor_px *px)
|
||||
+{
|
||||
+ u32 hi, lo, fid, did;
|
||||
+ int index = px->control & 0x00000007;
|
||||
+ const struct cpuinfo_x86 *c = ¤t_cpu_data;
|
||||
+
|
||||
+ if ((c->x86 != 0x10 || c->x86_model >= 10) && c->x86 != 0x11)
|
||||
+ return;
|
||||
+
|
||||
+ rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
|
||||
+ /*
|
||||
+ * MSR C001_0064+:
|
||||
+ * Bit 63: PstateEn. Read-write. If set, the P-state is valid.
|
||||
+ */
|
||||
+ if (!(hi & (1U << 31)))
|
||||
+ return;
|
||||
+
|
||||
+ fid = lo & 0x3f;
|
||||
+ did = (lo >> 6) & 7;
|
||||
+ if (c->x86 == 0x10)
|
||||
+ px->core_frequency = (100 * (fid + 16)) >> did;
|
||||
+ else
|
||||
+ px->core_frequency = (100 * (fid + 8)) >> did;
|
||||
+}
|
||||
+
|
||||
+struct amd_cpu_data {
|
||||
+ struct processor_performance *perf;
|
||||
+ u32 max_hw_pstate;
|
||||
+};
|
||||
+
|
||||
+static void get_cpu_data(void *arg)
|
||||
+{
|
||||
+ struct amd_cpu_data *data = arg;
|
||||
+ struct processor_performance *perf = data->perf;
|
||||
+ uint64_t msr_content;
|
||||
+ unsigned int i;
|
||||
+
|
||||
+ rdmsrl(MSR_PSTATE_CUR_LIMIT, msr_content);
|
||||
+ data->max_hw_pstate = (msr_content & HW_PSTATE_MAX_MASK) >>
|
||||
+ HW_PSTATE_MAX_SHIFT;
|
||||
+
|
||||
+ for (i = 0; i < perf->state_count && i <= data->max_hw_pstate; i++)
|
||||
+ amd_fixup_frequency(&perf->states[i]);
|
||||
+}
|
||||
+
|
||||
static int powernow_cpufreq_verify(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct acpi_cpufreq_data *data;
|
||||
@@ -205,8 +250,7 @@ static int powernow_cpufreq_cpu_init(str
|
||||
struct acpi_cpufreq_data *data;
|
||||
unsigned int result = 0;
|
||||
struct processor_performance *perf;
|
||||
- u32 max_hw_pstate;
|
||||
- uint64_t msr_content;
|
||||
+ struct amd_cpu_data info;
|
||||
struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
|
||||
|
||||
data = xzalloc(struct acpi_cpufreq_data);
|
||||
@@ -217,7 +261,7 @@ static int powernow_cpufreq_cpu_init(str
|
||||
|
||||
data->acpi_data = &processor_pminfo[cpu]->perf;
|
||||
|
||||
- perf = data->acpi_data;
|
||||
+ info.perf = perf = data->acpi_data;
|
||||
policy->shared_type = perf->shared_type;
|
||||
|
||||
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
|
||||
@@ -239,8 +283,6 @@ static int powernow_cpufreq_cpu_init(str
|
||||
result = -ENODEV;
|
||||
goto err_unreg;
|
||||
}
|
||||
- rdmsrl(MSR_PSTATE_CUR_LIMIT, msr_content);
|
||||
- max_hw_pstate = (msr_content & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
|
||||
|
||||
if (perf->control_register.space_id != perf->status_register.space_id) {
|
||||
result = -ENODEV;
|
||||
@@ -265,8 +307,10 @@ static int powernow_cpufreq_cpu_init(str
|
||||
|
||||
policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
|
||||
|
||||
+ on_selected_cpus(cpumask_of(cpu), get_cpu_data, &info, 1);
|
||||
+
|
||||
/* table init */
|
||||
- for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
|
||||
+ for (i = 0; i < perf->state_count && i <= info.max_hw_pstate; i++) {
|
||||
if (i > 0 && perf->states[i].core_frequency >=
|
||||
data->freq_table[valid_states-1].frequency / 1000)
|
||||
continue;
|
@ -1,72 +0,0 @@
|
||||
References: bnc#805579
|
||||
|
||||
# Commit b0583c0e64cc8bb6229c95c3304fdac2051f79b3
|
||||
# Date 2013-03-12 15:53:30 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86/MCA: suppress bank clearing for certain injected events
|
||||
|
||||
As the bits indicating validity of the ADDR and MISC bank MSRs may be
|
||||
injected in a way that isn't consistent with what the underlying
|
||||
hardware implements (while the bank must be valid for injection to
|
||||
work, the auxiliary MSRs may not be implemented - and hence cause #GP
|
||||
upon access - if the hardware never sets the corresponding valid bits.
|
||||
|
||||
Consequently we need to do the clearing writes only if no value was
|
||||
interposed for the respective MSR (which also makes sense the other way
|
||||
around: there's no point in clearing a hardware register when all data
|
||||
read came from software). Of course this all requires the injection
|
||||
tool to do things in a consistent way (but that had been a requirement
|
||||
before already).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Tested-by: Ren Yongjie <yongjie.ren@intel.com>
|
||||
Acked-by: Liu Jinsong <jinsong.liu@intel.com>
|
||||
|
||||
--- a/xen/arch/x86/cpu/mcheck/mce.c
|
||||
+++ b/xen/arch/x86/cpu/mcheck/mce.c
|
||||
@@ -1145,13 +1145,15 @@ static void intpose_add(unsigned int cpu
|
||||
printk("intpose_add: interpose array full - request dropped\n");
|
||||
}
|
||||
|
||||
-void intpose_inval(unsigned int cpu_nr, uint64_t msr)
|
||||
+bool_t intpose_inval(unsigned int cpu_nr, uint64_t msr)
|
||||
{
|
||||
- struct intpose_ent *ent;
|
||||
+ struct intpose_ent *ent = intpose_lookup(cpu_nr, msr, NULL);
|
||||
|
||||
- if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
|
||||
- ent->cpu_nr = -1;
|
||||
- }
|
||||
+ if ( !ent )
|
||||
+ return 0;
|
||||
+
|
||||
+ ent->cpu_nr = -1;
|
||||
+ return 1;
|
||||
}
|
||||
|
||||
#define IS_MCA_BANKREG(r) \
|
||||
--- a/xen/arch/x86/cpu/mcheck/mce.h
|
||||
+++ b/xen/arch/x86/cpu/mcheck/mce.h
|
||||
@@ -89,7 +89,7 @@ extern void mce_recoverable_register(mce
|
||||
/* Read an MSR, checking for an interposed value first */
|
||||
extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
|
||||
uint64_t *);
|
||||
-extern void intpose_inval(unsigned int, uint64_t);
|
||||
+extern bool_t intpose_inval(unsigned int, uint64_t);
|
||||
|
||||
static inline uint64_t mca_rdmsr(unsigned int msr)
|
||||
{
|
||||
@@ -101,9 +101,9 @@ static inline uint64_t mca_rdmsr(unsigne
|
||||
|
||||
/* Write an MSR, invalidating any interposed value */
|
||||
#define mca_wrmsr(msr, val) do { \
|
||||
- intpose_inval(smp_processor_id(), msr); \
|
||||
- wrmsrl(msr, val); \
|
||||
-} while (0)
|
||||
+ if ( !intpose_inval(smp_processor_id(), msr) ) \
|
||||
+ wrmsrl(msr, val); \
|
||||
+} while ( 0 )
|
||||
|
||||
|
||||
/* Utility function to "logout" all architectural MCA telemetry from the MCA
|
@ -1,32 +0,0 @@
|
||||
# Commit 0f7b6f91ac1bbfd33b23c291b14874b9561909d2
|
||||
# Date 2013-03-20 10:00:01 +0100
|
||||
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
AMD/IOMMU: Process softirqs while building dom0 iommu mappings
|
||||
|
||||
Recent changes which have made their way into xen-4.2 stable have pushed the
|
||||
runtime of construct_dom0() over 5 seconds, which has caused regressions in
|
||||
XenServer testing because of our 5 second watchdog.
|
||||
|
||||
The root cause is that amd_iommu_dom0_init() does not process softirqs and in
|
||||
particular the nmi_timer which causes the watchdog to decide that no useful
|
||||
progress is being made.
|
||||
|
||||
This patch adds periodic calls to process_pending_softirqs() at the same
|
||||
interval as the Intel variant of this function. The server which was failing
|
||||
with the watchdog test now boots reliably with a timeout of 1 second.
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||||
@@ -285,6 +285,9 @@ static void __init amd_iommu_dom0_init(s
|
||||
if ( mfn_valid(pfn) )
|
||||
amd_iommu_map_page(d, pfn, pfn,
|
||||
IOMMUF_readable|IOMMUF_writable);
|
||||
+
|
||||
+ if ( !(i & 0xfffff) )
|
||||
+ process_pending_softirqs();
|
||||
}
|
||||
}
|
||||
|
@ -1,22 +0,0 @@
|
||||
# Commit 32861c537781ac94bf403fb778505c3679b85f67
|
||||
# Date 2013-03-20 10:02:26 +0100
|
||||
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
VT-d: Enumerate IOMMUs when listing capabilities
|
||||
|
||||
This saves N identical console log lines on a multi-iommu server.
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -2135,7 +2135,8 @@ int __init intel_vtd_setup(void)
|
||||
{
|
||||
iommu = drhd->iommu;
|
||||
|
||||
- printk("Intel VT-d supported page sizes: 4kB");
|
||||
+ printk("Intel VT-d iommu %"PRIu32" supported page sizes: 4kB",
|
||||
+ iommu->index);
|
||||
if (cap_sps_2mb(iommu->cap))
|
||||
printk(", 2MB");
|
||||
|
@ -1,28 +0,0 @@
|
||||
# Commit 759847e44401176401e86e7c55b644cb9f93c781
|
||||
# Date 2013-03-20 10:02:52 +0100
|
||||
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
ACPI/ERST: Name table in otherwise opaque error messages
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
Fix spelling and lower severities.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/acpi/apei/erst.c
|
||||
+++ b/xen/drivers/acpi/apei/erst.c
|
||||
@@ -799,11 +799,11 @@ int __init erst_init(void)
|
||||
status = acpi_get_table(ACPI_SIG_ERST, 0,
|
||||
(struct acpi_table_header **)&erst_tab);
|
||||
if (status == AE_NOT_FOUND) {
|
||||
- printk(KERN_ERR "Table is not found!\n");
|
||||
+ printk(KERN_INFO "ERST table was not found\n");
|
||||
return -ENODEV;
|
||||
} else if (ACPI_FAILURE(status)) {
|
||||
const char *msg = acpi_format_exception(status);
|
||||
- printk(KERN_ERR "Failed to get table, %s\n", msg);
|
||||
+ printk(KERN_WARNING "Failed to get ERST table: %s\n", msg);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -1,34 +0,0 @@
|
||||
References: bnc#785211
|
||||
|
||||
# Commit 0611689d9153227831979c7bafe594214b8505a3
|
||||
# Date 2013-03-22 09:43:38 +0100
|
||||
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
ACPI/APEI: Unlock apei_iomaps_lock on error path
|
||||
|
||||
This causes deadlocks during early boot on hardware with broken/buggy
|
||||
APEI implementations, such as a Dell Poweredge 2950 with the latest
|
||||
currently available BIOS.
|
||||
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
Don't use goto or another special error path, as handling the error
|
||||
case in normal flow is quite simple.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/drivers/acpi/apei/apei-io.c
|
||||
+++ b/xen/drivers/acpi/apei/apei-io.c
|
||||
@@ -146,10 +146,8 @@ static void __init apei_post_unmap(paddr
|
||||
|
||||
spin_lock_irqsave(&apei_iomaps_lock, flags);
|
||||
map = __apei_find_iomap(paddr, size);
|
||||
- if (!map)
|
||||
- return;
|
||||
-
|
||||
- list_del(&map->list);
|
||||
+ if (map)
|
||||
+ list_del(&map->list);
|
||||
spin_unlock_irqrestore(&apei_iomaps_lock, flags);
|
||||
|
||||
xfree(map);
|
@ -1,70 +0,0 @@
|
||||
References: bnc#785211
|
||||
|
||||
# Commit 72af01bf6f7489e54ad59270222a29d3e8c501d1
|
||||
# Date 2013-03-22 12:46:25 +0100
|
||||
# Author Huang Ying <ying.huang@intel.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
ACPI, APEI: Add apei_exec_run_optional
|
||||
|
||||
Some actions in APEI ERST and EINJ tables are optional, for example,
|
||||
ACPI_EINJ_BEGIN_OPERATION action is used to do some preparation for
|
||||
error injection, and firmware may choose to do nothing here. While
|
||||
some other actions are mandatory, for example, firmware must provide
|
||||
ACPI_EINJ_GET_ERROR_TYPE implementation.
|
||||
|
||||
Original implementation treats all actions as optional (that is, can
|
||||
have no instructions), that may cause issue if firmware does not
|
||||
provide some mandatory actions. To fix this, this patch adds
|
||||
apei_exec_run_optional, which should be used for optional actions.
|
||||
The original apei_exec_run should be used for mandatory actions.
|
||||
|
||||
Signed-off-by: Huang Ying <ying.huang@intel.com>
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
--- a/xen/drivers/acpi/apei/apei-base.c
|
||||
+++ b/xen/drivers/acpi/apei/apei-base.c
|
||||
@@ -154,9 +154,10 @@ int apei_exec_noop(struct apei_exec_cont
|
||||
* Interpret the specified action. Go through whole action table,
|
||||
* execute all instructions belong to the action.
|
||||
*/
|
||||
-int apei_exec_run(struct apei_exec_context *ctx, u8 action)
|
||||
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
|
||||
+ bool_t optional)
|
||||
{
|
||||
- int rc;
|
||||
+ int rc = -ENOENT;
|
||||
u32 i, ip;
|
||||
struct acpi_whea_header *entry;
|
||||
apei_exec_ins_func_t run;
|
||||
@@ -195,7 +196,7 @@ rewind:
|
||||
goto rewind;
|
||||
}
|
||||
|
||||
- return 0;
|
||||
+ return !optional && rc < 0 ? rc : 0;
|
||||
}
|
||||
|
||||
typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
|
||||
--- a/xen/drivers/acpi/apei/apei-internal.h
|
||||
+++ b/xen/drivers/acpi/apei/apei-internal.h
|
||||
@@ -48,7 +48,18 @@ static inline u64 apei_exec_ctx_get_outp
|
||||
return ctx->value;
|
||||
}
|
||||
|
||||
-int apei_exec_run(struct apei_exec_context *ctx, u8 action);
|
||||
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool_t optional);
|
||||
+
|
||||
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
|
||||
+{
|
||||
+ return __apei_exec_run(ctx, action, 0);
|
||||
+}
|
||||
+
|
||||
+/* It is optional whether the firmware provides the action */
|
||||
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
|
||||
+{
|
||||
+ return __apei_exec_run(ctx, action, 1);
|
||||
+}
|
||||
|
||||
/* Common instruction implementation */
|
||||
|
@ -1,96 +0,0 @@
|
||||
# Commit fae0372140befb88d890a30704a8ec058c902af8
|
||||
# Date 2013-03-25 14:28:31 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
IOMMU: properly check whether interrupt remapping is enabled
|
||||
|
||||
... rather than the IOMMU as a whole.
|
||||
|
||||
That in turn required to make sure iommu_intremap gets properly
|
||||
cleared when the respective initialization fails (or isn't being
|
||||
done at all).
|
||||
|
||||
Along with making sure interrupt remapping doesn't get inconsistently
|
||||
enabled on some IOMMUs and not on others in the VT-d code, this in turn
|
||||
allowed quite a bit of cleanup on the VT-d side (removed from the
|
||||
backport).
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/arch/x86/msi.c
|
||||
+++ b/xen/arch/x86/msi.c
|
||||
@@ -204,7 +204,7 @@ static void read_msi_msg(struct msi_desc
|
||||
BUG();
|
||||
}
|
||||
|
||||
- if ( iommu_enabled )
|
||||
+ if ( iommu_intremap )
|
||||
iommu_read_msi_from_ire(entry, msg);
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ static void write_msi_msg(struct msi_des
|
||||
{
|
||||
entry->msg = *msg;
|
||||
|
||||
- if ( iommu_enabled )
|
||||
+ if ( iommu_intremap )
|
||||
{
|
||||
ASSERT(msg != &entry->msg);
|
||||
iommu_update_ire_from_msi(entry, msg);
|
||||
@@ -482,7 +482,7 @@ int msi_free_irq(struct msi_desc *entry)
|
||||
}
|
||||
|
||||
/* Free the unused IRTE if intr remap enabled */
|
||||
- if ( iommu_enabled )
|
||||
+ if ( iommu_intremap )
|
||||
iommu_update_ire_from_msi(entry, NULL);
|
||||
|
||||
list_del(&entry->list);
|
||||
--- a/xen/drivers/passthrough/iommu.c
|
||||
+++ b/xen/drivers/passthrough/iommu.c
|
||||
@@ -469,6 +469,8 @@ int __init iommu_setup(void)
|
||||
rc = iommu_hardware_setup();
|
||||
iommu_enabled = (rc == 0);
|
||||
}
|
||||
+ if ( !iommu_enabled )
|
||||
+ iommu_intremap = 0;
|
||||
|
||||
if ( (force_iommu && !iommu_enabled) ||
|
||||
(force_intremap && !iommu_intremap) )
|
||||
@@ -485,9 +487,12 @@ int __init iommu_setup(void)
|
||||
}
|
||||
printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
|
||||
if ( iommu_enabled )
|
||||
+ {
|
||||
printk(" - Dom0 mode: %s\n",
|
||||
iommu_passthrough ? "Passthrough" :
|
||||
iommu_dom0_strict ? "Strict" : "Relaxed");
|
||||
+ printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
|
||||
+ }
|
||||
|
||||
return rc;
|
||||
}
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -2072,6 +2072,9 @@ static int init_vtd_hw(void)
|
||||
break;
|
||||
}
|
||||
}
|
||||
+ if ( !iommu_intremap )
|
||||
+ for_each_drhd_unit ( drhd )
|
||||
+ disable_intremap(drhd->iommu);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/xen/include/asm-x86/io_apic.h
|
||||
+++ b/xen/include/asm-x86/io_apic.h
|
||||
@@ -129,7 +129,7 @@ struct IO_APIC_route_entry {
|
||||
extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
|
||||
|
||||
/* Only need to remap ioapic RTE (reg: 10~3Fh) */
|
||||
-#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10))
|
||||
+#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10))
|
||||
|
||||
static inline unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
|
||||
{
|
@ -1,90 +0,0 @@
|
||||
References: bnc#801910
|
||||
|
||||
# Commit 6890cebc6a987d0e896f5d23a8de11a3934101cf
|
||||
# Date 2013-03-25 14:31:27 +0100
|
||||
# Author Malcolm Crossley <malcolm.crossley@citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
VT-d: deal with 5500/5520/X58 errata
|
||||
|
||||
http://www.intel.com/content/www/us/en/chipsets/5520-and-5500-chipset-ioh-specification-update.html
|
||||
|
||||
Stepping B-3 has two errata (#47 and #53) related to Interrupt
|
||||
remapping, to which the workaround is for the BIOS to completely disable
|
||||
interrupt remapping. These errata are fixed in stepping C-2.
|
||||
|
||||
Unfortunately this chipset stepping is very common and many BIOSes are
|
||||
not disabling interrupt remapping on this stepping . We can detect this in
|
||||
Xen and prevent Xen from using the problematic interrupt remapping feature.
|
||||
|
||||
The Intel 5500/5520/X58 chipset does not support VT-d
|
||||
Extended Interrupt Mode(EIM). This means the iommu_supports_eim() check
|
||||
always fails and so x2apic mode cannot be enabled in Xen before this quirk
|
||||
disables the interrupt remapping feature.
|
||||
|
||||
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
|
||||
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
||||
|
||||
Gate the function call to check the quirk on interrupt remapping being
|
||||
requested to get enabled, and upon failure disable the IOMMU to be in
|
||||
line with what the changes for XSA-36 (plus follow-ups) did.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/vtd/iommu.c
|
||||
+++ b/xen/drivers/passthrough/vtd/iommu.c
|
||||
@@ -2129,6 +2129,11 @@ int __init intel_vtd_setup(void)
|
||||
}
|
||||
|
||||
platform_quirks_init();
|
||||
+ if ( !iommu_enabled )
|
||||
+ {
|
||||
+ ret = -ENODEV;
|
||||
+ goto error;
|
||||
+ }
|
||||
|
||||
/* We enable the following features only if they are supported by all VT-d
|
||||
* engines: Snoop Control, DMA passthrough, Queued Invalidation and
|
||||
--- a/xen/drivers/passthrough/vtd/quirks.c
|
||||
+++ b/xen/drivers/passthrough/vtd/quirks.c
|
||||
@@ -248,6 +248,29 @@ void vtd_ops_postamble_quirk(struct iomm
|
||||
}
|
||||
}
|
||||
|
||||
+/* 5500/5520/X58 Chipset Interrupt remapping errata, for stepping B-3.
|
||||
+ * Fixed in stepping C-2. */
|
||||
+static void __init tylersburg_intremap_quirk(void)
|
||||
+{
|
||||
+ uint32_t bus, device;
|
||||
+ uint8_t rev;
|
||||
+
|
||||
+ for ( bus = 0; bus < 0x100; bus++ )
|
||||
+ {
|
||||
+ /* Match on System Management Registers on Device 20 Function 0 */
|
||||
+ device = pci_conf_read32(0, bus, 20, 0, PCI_VENDOR_ID);
|
||||
+ rev = pci_conf_read8(0, bus, 20, 0, PCI_REVISION_ID);
|
||||
+
|
||||
+ if ( rev == 0x13 && device == 0x342e8086 )
|
||||
+ {
|
||||
+ printk(XENLOG_WARNING VTDPREFIX
|
||||
+ "Disabling IOMMU due to Intel 5500/5520/X58 Chipset errata #47, #53\n");
|
||||
+ iommu_enabled = 0;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* initialize platform identification flags */
|
||||
void __init platform_quirks_init(void)
|
||||
{
|
||||
@@ -268,6 +291,10 @@ void __init platform_quirks_init(void)
|
||||
|
||||
/* ioremap IGD MMIO+0x2000 page */
|
||||
map_igd_reg();
|
||||
+
|
||||
+ /* Tylersburg interrupt remap quirk */
|
||||
+ if ( iommu_intremap )
|
||||
+ tylersburg_intremap_quirk();
|
||||
}
|
||||
|
||||
/*
|
@ -1,63 +0,0 @@
|
||||
# Commit 92b8bc03bd4b582cb524db51494d0dba7607e7ac
|
||||
# Date 2013-03-25 16:55:22 +0100
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
AMD IOMMU: allow disabling only interrupt remapping when certain IVRS consistency checks fail
|
||||
|
||||
After some more thought on the XSA-36 and specifically the comments we
|
||||
got regarding disabling the IOMMU in this situation altogether making
|
||||
things worse instead of better, I came to the conclusion that we can
|
||||
actually restrict the action in affected cases to just disabling
|
||||
interrupt remapping. That doesn't make the situation worse than prior
|
||||
to the XSA-36 fixes (where interrupt remapping didn't really protect
|
||||
domains from one another), but allows at least DMA isolation to still
|
||||
be utilized.
|
||||
|
||||
To do so, disabling of interrupt remapping must be explicitly requested
|
||||
on the command line - respective checks will then be skipped.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
|
||||
|
||||
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
|
||||
@@ -664,6 +664,9 @@ static u16 __init parse_ivhd_device_spec
|
||||
return dev_length;
|
||||
}
|
||||
|
||||
+ if ( !iommu_intremap )
|
||||
+ return dev_length;
|
||||
+
|
||||
/*
|
||||
* Some BIOSes have IOAPIC broken entries so we check for IVRS
|
||||
* consistency here --- whether entry's IOAPIC ID is valid and
|
||||
@@ -902,7 +905,7 @@ static int __init parse_ivrs_table(struc
|
||||
}
|
||||
|
||||
/* Each IO-APIC must have been mentioned in the table. */
|
||||
- for ( apic = 0; !error && apic < nr_ioapics; ++apic )
|
||||
+ for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
|
||||
{
|
||||
if ( !nr_ioapic_entries[apic] ||
|
||||
ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
|
||||
--- a/xen/drivers/passthrough/amd/iommu_init.c
|
||||
+++ b/xen/drivers/passthrough/amd/iommu_init.c
|
||||
@@ -1192,7 +1192,8 @@ int __init amd_iommu_init(void)
|
||||
|
||||
BUG_ON( !iommu_found() );
|
||||
|
||||
- if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() )
|
||||
+ if ( iommu_intremap && amd_iommu_perdev_intremap &&
|
||||
+ amd_sp5100_erratum28() )
|
||||
goto error_out;
|
||||
|
||||
ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
|
||||
@@ -1209,7 +1210,7 @@ int __init amd_iommu_init(void)
|
||||
goto error_out;
|
||||
|
||||
/* initialize io-apic interrupt remapping entries */
|
||||
- if ( amd_iommu_setup_ioapic_remapping() != 0 )
|
||||
+ if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 )
|
||||
goto error_out;
|
||||
|
||||
/* allocate and initialize a global device table shared by all iommus */
|
144
26891-x86-S3-Fix-cpu-pool-scheduling-after-suspend-resume.patch
Normal file
144
26891-x86-S3-Fix-cpu-pool-scheduling-after-suspend-resume.patch
Normal file
@ -0,0 +1,144 @@
|
||||
# Commit 9aa356bc9f7533c3cb7f02c823f532532876d444
|
||||
# Date 2013-04-19 12:29:01 +0200
|
||||
# Author Ben Guthro <benjamin.guthro@citrix.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86/S3: Fix cpu pool scheduling after suspend/resume
|
||||
|
||||
This review is another S3 scheduler problem with the system_state
|
||||
variable introduced with the following changeset:
|
||||
http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=269f543ea750ed567d18f2e819e5d5ce58eda5c5
|
||||
|
||||
Specifically, the cpu_callback function that takes the CPU down during
|
||||
suspend, and back up during resume. We were seeing situations where,
|
||||
after S3, only CPU0 was in cpupool0. Guest performance suffered
|
||||
greatly, since all vcpus were only on a single pcpu. Guests under high
|
||||
CPU load showed the problem much more quickly than an idle guest.
|
||||
|
||||
Removing this if condition forces the CPUs to go through the expected
|
||||
online/offline state, and be properly scheduled after S3.
|
||||
|
||||
This also includes a necessary partial change proposed earlier by
|
||||
Tomasz Wroblewski here:
|
||||
http://lists.xen.org/archives/html/xen-devel/2013-01/msg02206.html
|
||||
|
||||
It should also resolve the issues discussed in this thread:
|
||||
http://lists.xen.org/archives/html/xen-devel/2012-11/msg01801.html
|
||||
|
||||
Signed-off-by: Ben Guthro <benjamin.guthro@citrix.com>
|
||||
Acked-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
|
||||
|
||||
--- a/xen/common/cpupool.c
|
||||
+++ b/xen/common/cpupool.c
|
||||
@@ -41,16 +41,28 @@ static struct cpupool *alloc_cpupool_str
|
||||
{
|
||||
struct cpupool *c = xzalloc(struct cpupool);
|
||||
|
||||
- if ( c && zalloc_cpumask_var(&c->cpu_valid) )
|
||||
- return c;
|
||||
- xfree(c);
|
||||
- return NULL;
|
||||
+ if ( !c || !zalloc_cpumask_var(&c->cpu_valid) )
|
||||
+ {
|
||||
+ xfree(c);
|
||||
+ c = NULL;
|
||||
+ }
|
||||
+ else if ( !zalloc_cpumask_var(&c->cpu_suspended) )
|
||||
+ {
|
||||
+ free_cpumask_var(c->cpu_valid);
|
||||
+ xfree(c);
|
||||
+ c = NULL;
|
||||
+ }
|
||||
+
|
||||
+ return c;
|
||||
}
|
||||
|
||||
static void free_cpupool_struct(struct cpupool *c)
|
||||
{
|
||||
if ( c )
|
||||
+ {
|
||||
+ free_cpumask_var(c->cpu_suspended);
|
||||
free_cpumask_var(c->cpu_valid);
|
||||
+ }
|
||||
xfree(c);
|
||||
}
|
||||
|
||||
@@ -417,14 +429,32 @@ void cpupool_rm_domain(struct domain *d)
|
||||
|
||||
/*
|
||||
* called to add a new cpu to pool admin
|
||||
- * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
|
||||
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0,
|
||||
+ * unless we are resuming from S3, in which case we put the cpu back
|
||||
+ * in the cpupool it was in prior to suspend.
|
||||
*/
|
||||
static void cpupool_cpu_add(unsigned int cpu)
|
||||
{
|
||||
spin_lock(&cpupool_lock);
|
||||
cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
|
||||
cpumask_set_cpu(cpu, &cpupool_free_cpus);
|
||||
- cpupool_assign_cpu_locked(cpupool0, cpu);
|
||||
+
|
||||
+ if ( system_state == SYS_STATE_resume )
|
||||
+ {
|
||||
+ struct cpupool **c;
|
||||
+
|
||||
+ for_each_cpupool(c)
|
||||
+ {
|
||||
+ if ( cpumask_test_cpu(cpu, (*c)->cpu_suspended ) )
|
||||
+ {
|
||||
+ cpupool_assign_cpu_locked(*c, cpu);
|
||||
+ cpumask_clear_cpu(cpu, (*c)->cpu_suspended);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if ( cpumask_test_cpu(cpu, &cpupool_free_cpus) )
|
||||
+ cpupool_assign_cpu_locked(cpupool0, cpu);
|
||||
spin_unlock(&cpupool_lock);
|
||||
}
|
||||
|
||||
@@ -436,7 +466,7 @@ static void cpupool_cpu_add(unsigned int
|
||||
static int cpupool_cpu_remove(unsigned int cpu)
|
||||
{
|
||||
int ret = 0;
|
||||
-
|
||||
+
|
||||
spin_lock(&cpupool_lock);
|
||||
if ( !cpumask_test_cpu(cpu, cpupool0->cpu_valid))
|
||||
ret = -EBUSY;
|
||||
@@ -633,9 +663,14 @@ static int cpu_callback(
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
int rc = 0;
|
||||
|
||||
- if ( (system_state == SYS_STATE_suspend) ||
|
||||
- (system_state == SYS_STATE_resume) )
|
||||
- goto out;
|
||||
+ if ( system_state == SYS_STATE_suspend )
|
||||
+ {
|
||||
+ struct cpupool **c;
|
||||
+
|
||||
+ for_each_cpupool(c)
|
||||
+ if ( cpumask_test_cpu(cpu, (*c)->cpu_valid ) )
|
||||
+ cpumask_set_cpu(cpu, (*c)->cpu_suspended);
|
||||
+ }
|
||||
|
||||
switch ( action )
|
||||
{
|
||||
@@ -650,7 +685,6 @@ static int cpu_callback(
|
||||
break;
|
||||
}
|
||||
|
||||
-out:
|
||||
return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
|
||||
}
|
||||
|
||||
--- a/xen/include/xen/sched-if.h
|
||||
+++ b/xen/include/xen/sched-if.h
|
||||
@@ -199,6 +199,7 @@ struct cpupool
|
||||
{
|
||||
int cpupool_id;
|
||||
cpumask_var_t cpu_valid; /* all cpus assigned to pool */
|
||||
+ cpumask_var_t cpu_suspended; /* cpus in S3 that should be in this pool */
|
||||
struct cpupool *next;
|
||||
unsigned int n_dom;
|
||||
struct scheduler *sched;
|
@ -0,0 +1,142 @@
|
||||
References: FATE#314499, FATE#314509
|
||||
|
||||
# Commit 9be8a4447103d92843fcfeaad8be42408c90e9a9
|
||||
# Date 2013-04-22 13:58:01 +0200
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86/EFI: pass boot services variable info to runtime code
|
||||
|
||||
EFI variables can be flagged as being accessible only within boot services.
|
||||
This makes it awkward for us to figure out how much space they use at
|
||||
runtime. In theory we could figure this out by simply comparing the results
|
||||
from QueryVariableInfo() to the space used by all of our variables, but
|
||||
that fails if the platform doesn't garbage collect on every boot. Thankfully,
|
||||
calling QueryVariableInfo() while still inside boot services gives a more
|
||||
reliable answer. This patch passes that information from the EFI boot stub
|
||||
up to the efi platform code.
|
||||
|
||||
Based on a similarly named Linux patch by Matthew Garrett <matthew.garrett@nebula.com>.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
|
||||
|
||||
--- a/xen/arch/x86/efi/boot.c
|
||||
+++ b/xen/arch/x86/efi/boot.c
|
||||
@@ -1128,6 +1128,23 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
|
||||
if (efi.smbios != EFI_INVALID_TABLE_ADDR)
|
||||
dmi_efi_get_table((void *)(long)efi.smbios);
|
||||
|
||||
+ /* Get snapshot of variable store parameters. */
|
||||
+ status = efi_rs->QueryVariableInfo(EFI_VARIABLE_NON_VOLATILE |
|
||||
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
|
||||
+ EFI_VARIABLE_RUNTIME_ACCESS,
|
||||
+ &efi_boot_max_var_store_size,
|
||||
+ &efi_boot_remain_var_store_size,
|
||||
+ &efi_boot_max_var_size);
|
||||
+ if ( EFI_ERROR(status) )
|
||||
+ {
|
||||
+ efi_boot_max_var_store_size = 0;
|
||||
+ efi_boot_remain_var_store_size = 0;
|
||||
+ efi_boot_max_var_size = status;
|
||||
+ PrintStr(L"Warning: Could not query variable store: ");
|
||||
+ DisplayUint(status, 0);
|
||||
+ PrintStr(newline);
|
||||
+ }
|
||||
+
|
||||
/* Allocate space for trampoline (in first Mb). */
|
||||
cfg.addr = 0x100000;
|
||||
cfg.size = trampoline_end - trampoline_start;
|
||||
--- a/xen/arch/x86/efi/efi.h
|
||||
+++ b/xen/arch/x86/efi/efi.h
|
||||
@@ -22,5 +22,8 @@ extern void *efi_memmap;
|
||||
|
||||
extern l4_pgentry_t *efi_l4_pgtable;
|
||||
|
||||
+extern UINT64 efi_boot_max_var_store_size, efi_boot_remain_var_store_size,
|
||||
+ efi_boot_max_var_size;
|
||||
+
|
||||
unsigned long efi_rs_enter(void);
|
||||
void efi_rs_leave(unsigned long);
|
||||
--- a/xen/arch/x86/efi/runtime.c
|
||||
+++ b/xen/arch/x86/efi/runtime.c
|
||||
@@ -28,6 +28,10 @@ UINTN __read_mostly efi_memmap_size;
|
||||
UINTN __read_mostly efi_mdesc_size;
|
||||
void *__read_mostly efi_memmap;
|
||||
|
||||
+UINT64 __read_mostly efi_boot_max_var_store_size;
|
||||
+UINT64 __read_mostly efi_boot_remain_var_store_size;
|
||||
+UINT64 __read_mostly efi_boot_max_var_size;
|
||||
+
|
||||
struct efi __read_mostly efi = {
|
||||
.acpi = EFI_INVALID_TABLE_ADDR,
|
||||
.acpi20 = EFI_INVALID_TABLE_ADDR,
|
||||
@@ -446,6 +450,35 @@ int efi_runtime_call(struct xenpf_efi_ru
|
||||
break;
|
||||
|
||||
case XEN_EFI_query_variable_info:
|
||||
+ if ( op->misc & ~XEN_EFI_VARINFO_BOOT_SNAPSHOT )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if ( op->misc & XEN_EFI_VARINFO_BOOT_SNAPSHOT )
|
||||
+ {
|
||||
+ if ( (op->u.query_variable_info.attr
|
||||
+ & ~EFI_VARIABLE_APPEND_WRITE) !=
|
||||
+ (EFI_VARIABLE_NON_VOLATILE |
|
||||
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
|
||||
+ EFI_VARIABLE_RUNTIME_ACCESS) )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ op->u.query_variable_info.max_store_size =
|
||||
+ efi_boot_max_var_store_size;
|
||||
+ op->u.query_variable_info.remain_store_size =
|
||||
+ efi_boot_remain_var_store_size;
|
||||
+ if ( efi_boot_max_var_store_size )
|
||||
+ {
|
||||
+ op->u.query_variable_info.max_size = efi_boot_max_var_size;
|
||||
+ status = EFI_SUCCESS;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ op->u.query_variable_info.max_size = 0;
|
||||
+ status = efi_boot_max_var_size;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
cr3 = efi_rs_enter();
|
||||
if ( (efi_rs->Hdr.Revision >> 16) < 2 )
|
||||
{
|
||||
@@ -462,6 +495,9 @@ int efi_runtime_call(struct xenpf_efi_ru
|
||||
|
||||
case XEN_EFI_query_capsule_capabilities:
|
||||
case XEN_EFI_update_capsule:
|
||||
+ if ( op->misc )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
cr3 = efi_rs_enter();
|
||||
if ( (efi_rs->Hdr.Revision >> 16) < 2 )
|
||||
{
|
||||
--- a/xen/include/efi/efiapi.h
|
||||
+++ b/xen/include/efi/efiapi.h
|
||||
@@ -213,6 +213,10 @@ VOID
|
||||
#define EFI_VARIABLE_NON_VOLATILE 0x00000001
|
||||
#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002
|
||||
#define EFI_VARIABLE_RUNTIME_ACCESS 0x00000004
|
||||
+#define EFI_VARIABLE_HARDWARE_ERROR_RECORD 0x00000008
|
||||
+#define EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS 0x00000010
|
||||
+#define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS 0x00000020
|
||||
+#define EFI_VARIABLE_APPEND_WRITE 0x00000040
|
||||
|
||||
// Variable size limitation
|
||||
#define EFI_MAXIMUM_VARIABLE_SIZE 1024
|
||||
--- a/xen/include/public/platform.h
|
||||
+++ b/xen/include/public/platform.h
|
||||
@@ -184,6 +184,7 @@ struct xenpf_efi_runtime_call {
|
||||
struct xenpf_efi_guid vendor_guid;
|
||||
} get_next_variable_name;
|
||||
|
||||
+#define XEN_EFI_VARINFO_BOOT_SNAPSHOT 0x00000001
|
||||
struct {
|
||||
uint32_t attr;
|
||||
uint64_t max_store_size;
|
@ -0,0 +1,23 @@
|
||||
# Commit a7ac9597a7fc6ca934957eb78b41e26638281953
|
||||
# Date 2013-04-29 11:27:54 +0200
|
||||
# Author Jan Beulich <jbeulich@suse.com>
|
||||
# Committer Jan Beulich <jbeulich@suse.com>
|
||||
x86/EFI: fix runtime call status for compat mode Dom0
|
||||
|
||||
The top two bits (indicating error/warning classification) need to
|
||||
remain the top two bits.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Keir Fraser <keir@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/efi/runtime.c
|
||||
+++ b/xen/arch/x86/efi/runtime.c
|
||||
@@ -513,7 +513,7 @@ int efi_runtime_call(struct xenpf_efi_ru
|
||||
#ifndef COMPAT
|
||||
op->status = status;
|
||||
#else
|
||||
- op->status = (status & 0x3fffffff) | (status >> 62);
|
||||
+ op->status = (status & 0x3fffffff) | ((status >> 32) & 0xc0000000);
|
||||
#endif
|
||||
|
||||
return rc;
|
@ -1,8 +1,8 @@
|
||||
Index: xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
|
||||
Index: xen-4.2.2-testing/tools/python/xen/xend/XendDomainInfo.py
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
|
||||
+++ xen-4.2.1-testing/tools/python/xen/xend/XendDomainInfo.py
|
||||
@@ -2984,7 +2984,7 @@ class XendDomainInfo:
|
||||
--- xen-4.2.2-testing.orig/tools/python/xen/xend/XendDomainInfo.py
|
||||
+++ xen-4.2.2-testing/tools/python/xen/xend/XendDomainInfo.py
|
||||
@@ -2985,7 +2985,7 @@ class XendDomainInfo:
|
||||
|
||||
self.guest_bitsize = self.image.getBitSize()
|
||||
# Make sure there's enough RAM available for the domain
|
||||
|
@ -1,88 +0,0 @@
|
||||
Subject: e1000: Discard packets that are too long if !SBP and !LPE
|
||||
From: Michael Contreras michael@inetric.com Sun Dec 2 20:11:22 2012 -0800
|
||||
Date: Wed Jan 16 14:12:40 2013 +0000:
|
||||
Git: b4e9b8169dedc0bcf0d3abe07642f761ac70aeea
|
||||
|
||||
The e1000_receive function for the e1000 needs to discard packets longer than
|
||||
1522 bytes if the SBP and LPE flags are disabled. The linux driver assumes
|
||||
this behavior and allocates memory based on this assumption.
|
||||
|
||||
Signed-off-by: Michael Contreras <michael@inetric.com>
|
||||
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
|
||||
|
||||
Subject: e1000: Discard oversized packets based on SBP|LPE
|
||||
From: Michael Contreras <michael@inetric.com>
|
||||
Date: Wed, 5 Dec 2012 18:31:30 +0000 (-0500)
|
||||
|
||||
e1000: Discard oversized packets based on SBP|LPE
|
||||
|
||||
Discard packets longer than 16384 when !SBP to match the hardware behavior.
|
||||
|
||||
Signed-off-by: Michael Contreras <michael@inetric.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
[ This is a security vulnerability, CVE-2012-6075 / XSA-41. ]
|
||||
(cherry picked from commit 4c2cae2a882db4d2a231b27b3b31a5bbec6dacbf)
|
||||
|
||||
Index: xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
|
||||
+++ xen-4.2.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
|
||||
@@ -55,6 +55,11 @@ static int debugflags = DBGBIT(TXERR) |
|
||||
#define REG_IOADDR 0x0
|
||||
#define REG_IODATA 0x4
|
||||
|
||||
+/* this is the size past which hardware will drop packets when setting LPE=0 */
|
||||
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
|
||||
+/* this is the size past which hardware will drop packets when setting LPE=1 */
|
||||
+#define MAXIMUM_ETHERNET_LPE_SIZE 16384
|
||||
+
|
||||
/*
|
||||
* HW models:
|
||||
* E1000_DEV_ID_82540EM works with Windows and Linux
|
||||
@@ -628,6 +633,14 @@ e1000_receive(void *opaque, const uint8_
|
||||
return;
|
||||
}
|
||||
|
||||
+ /* Discard oversized packets if !LPE and !SBP. */
|
||||
+ if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
|
||||
+ (size > MAXIMUM_ETHERNET_VLAN_SIZE
|
||||
+ && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
|
||||
+ && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!receive_filter(s, buf, size))
|
||||
return;
|
||||
|
||||
Index: xen-4.2.1-testing/tools/qemu-xen-dir-remote/hw/e1000.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/tools/qemu-xen-dir-remote/hw/e1000.c
|
||||
+++ xen-4.2.1-testing/tools/qemu-xen-dir-remote/hw/e1000.c
|
||||
@@ -59,6 +59,11 @@ static int debugflags = DBGBIT(TXERR) |
|
||||
#define PNPMMIO_SIZE 0x20000
|
||||
#define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
|
||||
|
||||
+/* this is the size past which hardware will drop packets when setting LPE=0 */
|
||||
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
|
||||
+/* this is the size past which hardware will drop packets when setting LPE=1 */
|
||||
+#define MAXIMUM_ETHERNET_LPE_SIZE 16384
|
||||
+
|
||||
/*
|
||||
* HW models:
|
||||
* E1000_DEV_ID_82540EM works with Windows and Linux
|
||||
@@ -693,6 +698,14 @@ e1000_receive(VLANClientState *nc, const
|
||||
size = sizeof(min_buf);
|
||||
}
|
||||
|
||||
+ /* Discard oversized packets if !LPE and !SBP. */
|
||||
+ if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
|
||||
+ (size > MAXIMUM_ETHERNET_VLAN_SIZE
|
||||
+ && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
|
||||
+ && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
|
||||
+ return size;
|
||||
+ }
|
||||
+
|
||||
if (!receive_filter(s, buf, size))
|
||||
return size;
|
||||
|
@ -1,32 +0,0 @@
|
||||
References: CVE-2013-0151 XSA-34 bnc#797285
|
||||
|
||||
x86_32: don't allow use of nested HVM
|
||||
|
||||
There are (indirect) uses of map_domain_page() in the nested HVM code
|
||||
that are unsafe when not just using the 1:1 mapping.
|
||||
|
||||
This is XSA-34 / CVE-2013-0151.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
--- a/xen/arch/x86/hvm/hvm.c
|
||||
+++ b/xen/arch/x86/hvm/hvm.c
|
||||
@@ -3930,6 +3930,10 @@ long do_hvm_op(unsigned long op, XEN_GUE
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
case HVM_PARAM_NESTEDHVM:
|
||||
+#ifdef __i386__
|
||||
+ if ( a.value )
|
||||
+ rc = -EINVAL;
|
||||
+#else
|
||||
if ( a.value > 1 )
|
||||
rc = -EINVAL;
|
||||
if ( !is_hvm_domain(d) )
|
||||
@@ -3944,6 +3948,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
|
||||
for_each_vcpu(d, v)
|
||||
if ( rc == 0 )
|
||||
rc = nestedhvm_vcpu_initialise(v);
|
||||
+#endif
|
||||
break;
|
||||
case HVM_PARAM_BUFIOREQ_EVTCHN:
|
||||
rc = -EINVAL;
|
262
CVE-2013-1918-xsa45-1-vcpu-destroy-pagetables-preemptible.patch
Normal file
262
CVE-2013-1918-xsa45-1-vcpu-destroy-pagetables-preemptible.patch
Normal file
@ -0,0 +1,262 @@
|
||||
x86: make vcpu_destroy_pagetables() preemptible
|
||||
|
||||
... as it may take significant amounts of time.
|
||||
|
||||
The function, being moved to mm.c as the better home for it anyway, and
|
||||
to avoid having to make a new helper function there non-static, is
|
||||
given a "preemptible" parameter temporarily (until, in a subsequent
|
||||
patch, its other caller is also being made capable of dealing with
|
||||
preemption).
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/domain.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/domain.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/domain.c
|
||||
@@ -73,8 +73,6 @@ void (*dead_idle) (void) __read_mostly =
|
||||
static void paravirt_ctxt_switch_from(struct vcpu *v);
|
||||
static void paravirt_ctxt_switch_to(struct vcpu *v);
|
||||
|
||||
-static void vcpu_destroy_pagetables(struct vcpu *v);
|
||||
-
|
||||
static void default_idle(void)
|
||||
{
|
||||
local_irq_disable();
|
||||
@@ -1058,7 +1056,7 @@ void arch_vcpu_reset(struct vcpu *v)
|
||||
if ( !is_hvm_vcpu(v) )
|
||||
{
|
||||
destroy_gdt(v);
|
||||
- vcpu_destroy_pagetables(v);
|
||||
+ vcpu_destroy_pagetables(v, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2069,63 +2067,6 @@ static int relinquish_memory(
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static void vcpu_destroy_pagetables(struct vcpu *v)
|
||||
-{
|
||||
- struct domain *d = v->domain;
|
||||
- unsigned long pfn;
|
||||
-
|
||||
-#ifdef __x86_64__
|
||||
- if ( is_pv_32on64_vcpu(v) )
|
||||
- {
|
||||
- pfn = l4e_get_pfn(*(l4_pgentry_t *)
|
||||
- __va(pagetable_get_paddr(v->arch.guest_table)));
|
||||
-
|
||||
- if ( pfn != 0 )
|
||||
- {
|
||||
- if ( paging_mode_refcounts(d) )
|
||||
- put_page(mfn_to_page(pfn));
|
||||
- else
|
||||
- put_page_and_type(mfn_to_page(pfn));
|
||||
- }
|
||||
-
|
||||
- l4e_write(
|
||||
- (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
|
||||
- l4e_empty());
|
||||
-
|
||||
- v->arch.cr3 = 0;
|
||||
- return;
|
||||
- }
|
||||
-#endif
|
||||
-
|
||||
- pfn = pagetable_get_pfn(v->arch.guest_table);
|
||||
- if ( pfn != 0 )
|
||||
- {
|
||||
- if ( paging_mode_refcounts(d) )
|
||||
- put_page(mfn_to_page(pfn));
|
||||
- else
|
||||
- put_page_and_type(mfn_to_page(pfn));
|
||||
- v->arch.guest_table = pagetable_null();
|
||||
- }
|
||||
-
|
||||
-#ifdef __x86_64__
|
||||
- /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
|
||||
- pfn = pagetable_get_pfn(v->arch.guest_table_user);
|
||||
- if ( pfn != 0 )
|
||||
- {
|
||||
- if ( !is_pv_32bit_vcpu(v) )
|
||||
- {
|
||||
- if ( paging_mode_refcounts(d) )
|
||||
- put_page(mfn_to_page(pfn));
|
||||
- else
|
||||
- put_page_and_type(mfn_to_page(pfn));
|
||||
- }
|
||||
- v->arch.guest_table_user = pagetable_null();
|
||||
- }
|
||||
-#endif
|
||||
-
|
||||
- v->arch.cr3 = 0;
|
||||
-}
|
||||
-
|
||||
int domain_relinquish_resources(struct domain *d)
|
||||
{
|
||||
int ret;
|
||||
@@ -2143,7 +2084,11 @@ int domain_relinquish_resources(struct d
|
||||
|
||||
/* Drop the in-use references to page-table bases. */
|
||||
for_each_vcpu ( d, v )
|
||||
- vcpu_destroy_pagetables(v);
|
||||
+ {
|
||||
+ ret = vcpu_destroy_pagetables(v, 1);
|
||||
+ if ( ret )
|
||||
+ return ret;
|
||||
+ }
|
||||
|
||||
if ( !is_hvm_domain(d) )
|
||||
{
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
@@ -2825,6 +2825,82 @@ static void put_superpage(unsigned long
|
||||
|
||||
#endif
|
||||
|
||||
+static int put_old_guest_table(struct vcpu *v)
|
||||
+{
|
||||
+ int rc;
|
||||
+
|
||||
+ if ( !v->arch.old_guest_table )
|
||||
+ return 0;
|
||||
+
|
||||
+ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) )
|
||||
+ {
|
||||
+ case -EINTR:
|
||||
+ case -EAGAIN:
|
||||
+ return -EAGAIN;
|
||||
+ }
|
||||
+
|
||||
+ v->arch.old_guest_table = NULL;
|
||||
+
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
+int vcpu_destroy_pagetables(struct vcpu *v, bool_t preemptible)
|
||||
+{
|
||||
+ unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
|
||||
+ struct page_info *page;
|
||||
+ int rc = put_old_guest_table(v);
|
||||
+
|
||||
+ if ( rc )
|
||||
+ return rc;
|
||||
+
|
||||
+#ifdef __x86_64__
|
||||
+ if ( is_pv_32on64_vcpu(v) )
|
||||
+ mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn));
|
||||
+#endif
|
||||
+
|
||||
+ if ( mfn )
|
||||
+ {
|
||||
+ page = mfn_to_page(mfn);
|
||||
+ if ( paging_mode_refcounts(v->domain) )
|
||||
+ put_page(page);
|
||||
+ else
|
||||
+ rc = put_page_and_type_preemptible(page, preemptible);
|
||||
+ }
|
||||
+
|
||||
+#ifdef __x86_64__
|
||||
+ if ( is_pv_32on64_vcpu(v) )
|
||||
+ {
|
||||
+ if ( !rc )
|
||||
+ l4e_write(
|
||||
+ (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
|
||||
+ l4e_empty());
|
||||
+ }
|
||||
+ else
|
||||
+#endif
|
||||
+ if ( !rc )
|
||||
+ {
|
||||
+ v->arch.guest_table = pagetable_null();
|
||||
+
|
||||
+#ifdef __x86_64__
|
||||
+ /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
|
||||
+ mfn = pagetable_get_pfn(v->arch.guest_table_user);
|
||||
+ if ( mfn )
|
||||
+ {
|
||||
+ page = mfn_to_page(mfn);
|
||||
+ if ( paging_mode_refcounts(v->domain) )
|
||||
+ put_page(page);
|
||||
+ else
|
||||
+ rc = put_page_and_type_preemptible(page, preemptible);
|
||||
+ }
|
||||
+ if ( !rc )
|
||||
+ v->arch.guest_table_user = pagetable_null();
|
||||
+#endif
|
||||
+ }
|
||||
+
|
||||
+ v->arch.cr3 = 0;
|
||||
+
|
||||
+ return rc;
|
||||
+}
|
||||
|
||||
int new_guest_cr3(unsigned long mfn)
|
||||
{
|
||||
@@ -3011,12 +3087,21 @@ long do_mmuext_op(
|
||||
unsigned int foreigndom)
|
||||
{
|
||||
struct mmuext_op op;
|
||||
- int rc = 0, i = 0, okay;
|
||||
unsigned long type;
|
||||
- unsigned int done = 0;
|
||||
+ unsigned int i = 0, done = 0;
|
||||
struct vcpu *curr = current;
|
||||
struct domain *d = curr->domain;
|
||||
struct domain *pg_owner;
|
||||
+ int okay, rc = put_old_guest_table(curr);
|
||||
+
|
||||
+ if ( unlikely(rc) )
|
||||
+ {
|
||||
+ if ( likely(rc == -EAGAIN) )
|
||||
+ rc = hypercall_create_continuation(
|
||||
+ __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
|
||||
+ foreigndom);
|
||||
+ return rc;
|
||||
+ }
|
||||
|
||||
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
|
||||
{
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/x86_64/compat/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/x86_64/compat/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/x86_64/compat/mm.c
|
||||
@@ -365,7 +365,7 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
|
||||
: mcs->call.args[1];
|
||||
unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED;
|
||||
|
||||
- BUG_ON(left == arg1);
|
||||
+ BUG_ON(left == arg1 && left != i);
|
||||
BUG_ON(left > count);
|
||||
guest_handle_add_offset(nat_ops, i - left);
|
||||
guest_handle_subtract_offset(cmp_uops, left);
|
||||
Index: xen-4.2.1-testing/xen/include/asm-x86/domain.h
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/include/asm-x86/domain.h
|
||||
+++ xen-4.2.1-testing/xen/include/asm-x86/domain.h
|
||||
@@ -464,6 +464,7 @@ struct arch_vcpu
|
||||
pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
|
||||
#endif
|
||||
pagetable_t guest_table; /* (MFN) guest notion of cr3 */
|
||||
+ struct page_info *old_guest_table; /* partially destructed pagetable */
|
||||
/* guest_table holds a ref to the page, and also a type-count unless
|
||||
* shadow refcounts are in use */
|
||||
pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
|
||||
Index: xen-4.2.1-testing/xen/include/asm-x86/mm.h
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/include/asm-x86/mm.h
|
||||
+++ xen-4.2.1-testing/xen/include/asm-x86/mm.h
|
||||
@@ -605,6 +605,7 @@ void audit_domains(void);
|
||||
int new_guest_cr3(unsigned long pfn);
|
||||
void make_cr3(struct vcpu *v, unsigned long mfn);
|
||||
void update_cr3(struct vcpu *v);
|
||||
+int vcpu_destroy_pagetables(struct vcpu *, bool_t preemptible);
|
||||
void propagate_page_fault(unsigned long addr, u16 error_code);
|
||||
void *do_page_walk(struct vcpu *v, unsigned long addr);
|
||||
|
173
CVE-2013-1918-xsa45-2-new-guest-cr3-preemptible.patch
Normal file
173
CVE-2013-1918-xsa45-2-new-guest-cr3-preemptible.patch
Normal file
@ -0,0 +1,173 @@
|
||||
x86: make new_guest_cr3() preemptible
|
||||
|
||||
... as it may take significant amounts of time.
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
@@ -2906,44 +2906,69 @@ int new_guest_cr3(unsigned long mfn)
|
||||
{
|
||||
struct vcpu *curr = current;
|
||||
struct domain *d = curr->domain;
|
||||
- int okay;
|
||||
+ int rc;
|
||||
unsigned long old_base_mfn;
|
||||
|
||||
#ifdef __x86_64__
|
||||
if ( is_pv_32on64_domain(d) )
|
||||
{
|
||||
- okay = paging_mode_refcounts(d)
|
||||
- ? 0 /* Old code was broken, but what should it be? */
|
||||
- : mod_l4_entry(
|
||||
+ rc = paging_mode_refcounts(d)
|
||||
+ ? -EINVAL /* Old code was broken, but what should it be? */
|
||||
+ : mod_l4_entry(
|
||||
__va(pagetable_get_paddr(curr->arch.guest_table)),
|
||||
l4e_from_pfn(
|
||||
mfn,
|
||||
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
|
||||
- pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
|
||||
- if ( unlikely(!okay) )
|
||||
+ pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr);
|
||||
+ switch ( rc )
|
||||
{
|
||||
+ case 0:
|
||||
+ break;
|
||||
+ case -EINTR:
|
||||
+ case -EAGAIN:
|
||||
+ return -EAGAIN;
|
||||
+ default:
|
||||
MEM_LOG("Error while installing new compat baseptr %lx", mfn);
|
||||
- return 0;
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
invalidate_shadow_ldt(curr, 0);
|
||||
write_ptbase(curr);
|
||||
|
||||
- return 1;
|
||||
+ return 0;
|
||||
}
|
||||
#endif
|
||||
- okay = paging_mode_refcounts(d)
|
||||
- ? get_page_from_pagenr(mfn, d)
|
||||
- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
|
||||
- if ( unlikely(!okay) )
|
||||
+ rc = put_old_guest_table(curr);
|
||||
+ if ( unlikely(rc) )
|
||||
+ return rc;
|
||||
+
|
||||
+ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
|
||||
+ /*
|
||||
+ * This is particularly important when getting restarted after the
|
||||
+ * previous attempt got preempted in the put-old-MFN phase.
|
||||
+ */
|
||||
+ if ( old_base_mfn == mfn )
|
||||
{
|
||||
- MEM_LOG("Error while installing new baseptr %lx", mfn);
|
||||
+ write_ptbase(curr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
- invalidate_shadow_ldt(curr, 0);
|
||||
+ rc = paging_mode_refcounts(d)
|
||||
+ ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL)
|
||||
+ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1);
|
||||
+ switch ( rc )
|
||||
+ {
|
||||
+ case 0:
|
||||
+ break;
|
||||
+ case -EINTR:
|
||||
+ case -EAGAIN:
|
||||
+ return -EAGAIN;
|
||||
+ default:
|
||||
+ MEM_LOG("Error while installing new baseptr %lx", mfn);
|
||||
+ return rc;
|
||||
+ }
|
||||
|
||||
- old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
|
||||
+ invalidate_shadow_ldt(curr, 0);
|
||||
|
||||
curr->arch.guest_table = pagetable_from_pfn(mfn);
|
||||
update_cr3(curr);
|
||||
@@ -2952,13 +2977,25 @@ int new_guest_cr3(unsigned long mfn)
|
||||
|
||||
if ( likely(old_base_mfn != 0) )
|
||||
{
|
||||
+ struct page_info *page = mfn_to_page(old_base_mfn);
|
||||
+
|
||||
if ( paging_mode_refcounts(d) )
|
||||
- put_page(mfn_to_page(old_base_mfn));
|
||||
+ put_page(page);
|
||||
else
|
||||
- put_page_and_type(mfn_to_page(old_base_mfn));
|
||||
+ switch ( rc = put_page_and_type_preemptible(page, 1) )
|
||||
+ {
|
||||
+ case -EINTR:
|
||||
+ rc = -EAGAIN;
|
||||
+ case -EAGAIN:
|
||||
+ curr->arch.old_guest_table = page;
|
||||
+ break;
|
||||
+ default:
|
||||
+ BUG_ON(rc);
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
- return 1;
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static struct domain *get_pg_owner(domid_t domid)
|
||||
@@ -3256,8 +3293,13 @@ long do_mmuext_op(
|
||||
}
|
||||
|
||||
case MMUEXT_NEW_BASEPTR:
|
||||
- okay = (!paging_mode_translate(d)
|
||||
- && new_guest_cr3(op.arg1.mfn));
|
||||
+ if ( paging_mode_translate(d) )
|
||||
+ okay = 0;
|
||||
+ else
|
||||
+ {
|
||||
+ rc = new_guest_cr3(op.arg1.mfn);
|
||||
+ okay = !rc;
|
||||
+ }
|
||||
break;
|
||||
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/traps.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/traps.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/traps.c
|
||||
@@ -2407,12 +2407,23 @@ static int emulate_privileged_op(struct
|
||||
#endif
|
||||
}
|
||||
page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC);
|
||||
- rc = page ? new_guest_cr3(page_to_mfn(page)) : 0;
|
||||
if ( page )
|
||||
+ {
|
||||
+ rc = new_guest_cr3(page_to_mfn(page));
|
||||
put_page(page);
|
||||
+ }
|
||||
+ else
|
||||
+ rc = -EINVAL;
|
||||
domain_unlock(v->domain);
|
||||
- if ( rc == 0 ) /* not okay */
|
||||
+ switch ( rc )
|
||||
+ {
|
||||
+ case 0:
|
||||
+ break;
|
||||
+ case -EAGAIN: /* retry after preemption */
|
||||
+ goto skip;
|
||||
+ default: /* not okay */
|
||||
goto fail;
|
||||
+ }
|
||||
break;
|
||||
}
|
||||
|
76
CVE-2013-1918-xsa45-3-new-user-base-preemptible.patch
Normal file
76
CVE-2013-1918-xsa45-3-new-user-base-preemptible.patch
Normal file
@ -0,0 +1,76 @@
|
||||
x86: make MMUEXT_NEW_USER_BASEPTR preemptible
|
||||
|
||||
... as it may take significant amounts of time.
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
@@ -3313,29 +3313,56 @@ long do_mmuext_op(
|
||||
break;
|
||||
}
|
||||
|
||||
+ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
|
||||
+ /*
|
||||
+ * This is particularly important when getting restarted after the
|
||||
+ * previous attempt got preempted in the put-old-MFN phase.
|
||||
+ */
|
||||
+ if ( old_mfn == op.arg1.mfn )
|
||||
+ break;
|
||||
+
|
||||
if ( op.arg1.mfn != 0 )
|
||||
{
|
||||
if ( paging_mode_refcounts(d) )
|
||||
okay = get_page_from_pagenr(op.arg1.mfn, d);
|
||||
else
|
||||
- okay = !get_page_and_type_from_pagenr(
|
||||
- op.arg1.mfn, PGT_root_page_table, d, 0, 0);
|
||||
+ {
|
||||
+ rc = get_page_and_type_from_pagenr(
|
||||
+ op.arg1.mfn, PGT_root_page_table, d, 0, 1);
|
||||
+ okay = !rc;
|
||||
+ }
|
||||
if ( unlikely(!okay) )
|
||||
{
|
||||
- MEM_LOG("Error while installing new mfn %lx", op.arg1.mfn);
|
||||
+ if ( rc == -EINTR )
|
||||
+ rc = -EAGAIN;
|
||||
+ else if ( rc != -EAGAIN )
|
||||
+ MEM_LOG("Error while installing new mfn %lx",
|
||||
+ op.arg1.mfn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
|
||||
curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
|
||||
|
||||
if ( old_mfn != 0 )
|
||||
{
|
||||
+ struct page_info *page = mfn_to_page(old_mfn);
|
||||
+
|
||||
if ( paging_mode_refcounts(d) )
|
||||
- put_page(mfn_to_page(old_mfn));
|
||||
+ put_page(page);
|
||||
else
|
||||
- put_page_and_type(mfn_to_page(old_mfn));
|
||||
+ switch ( rc = put_page_and_type_preemptible(page, 1) )
|
||||
+ {
|
||||
+ case -EINTR:
|
||||
+ rc = -EAGAIN;
|
||||
+ case -EAGAIN:
|
||||
+ curr->arch.old_guest_table = page;
|
||||
+ okay = 0;
|
||||
+ break;
|
||||
+ default:
|
||||
+ BUG_ON(rc);
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
break;
|
218
CVE-2013-1918-xsa45-4-vcpu-reset-preemptible.patch
Normal file
218
CVE-2013-1918-xsa45-4-vcpu-reset-preemptible.patch
Normal file
@ -0,0 +1,218 @@
|
||||
x86: make vcpu_reset() preemptible
|
||||
|
||||
... as dropping the old page tables may take significant amounts of
|
||||
time.
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/domain.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/domain.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/domain.c
|
||||
@@ -1051,17 +1051,16 @@ int arch_set_info_guest(
|
||||
#undef c
|
||||
}
|
||||
|
||||
-void arch_vcpu_reset(struct vcpu *v)
|
||||
+int arch_vcpu_reset(struct vcpu *v)
|
||||
{
|
||||
if ( !is_hvm_vcpu(v) )
|
||||
{
|
||||
destroy_gdt(v);
|
||||
- vcpu_destroy_pagetables(v, 0);
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- vcpu_end_shutdown_deferral(v);
|
||||
+ return vcpu_destroy_pagetables(v);
|
||||
}
|
||||
+
|
||||
+ vcpu_end_shutdown_deferral(v);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2085,7 +2084,7 @@ int domain_relinquish_resources(struct d
|
||||
/* Drop the in-use references to page-table bases. */
|
||||
for_each_vcpu ( d, v )
|
||||
{
|
||||
- ret = vcpu_destroy_pagetables(v, 1);
|
||||
+ ret = vcpu_destroy_pagetables(v);
|
||||
if ( ret )
|
||||
return ret;
|
||||
}
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/hvm.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/hvm.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/hvm.c
|
||||
@@ -3577,8 +3577,11 @@ static void hvm_s3_suspend(struct domain
|
||||
|
||||
for_each_vcpu ( d, v )
|
||||
{
|
||||
+ int rc;
|
||||
+
|
||||
vlapic_reset(vcpu_vlapic(v));
|
||||
- vcpu_reset(v);
|
||||
+ rc = vcpu_reset(v);
|
||||
+ ASSERT(!rc);
|
||||
}
|
||||
|
||||
vpic_reset(d);
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/hvm/vlapic.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/hvm/vlapic.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/hvm/vlapic.c
|
||||
@@ -255,10 +255,13 @@ static void vlapic_init_sipi_action(unsi
|
||||
{
|
||||
case APIC_DM_INIT: {
|
||||
bool_t fpu_initialised;
|
||||
+ int rc;
|
||||
+
|
||||
domain_lock(target->domain);
|
||||
/* Reset necessary VCPU state. This does not include FPU state. */
|
||||
fpu_initialised = target->fpu_initialised;
|
||||
- vcpu_reset(target);
|
||||
+ rc = vcpu_reset(target);
|
||||
+ ASSERT(!rc);
|
||||
target->fpu_initialised = fpu_initialised;
|
||||
vlapic_reset(vcpu_vlapic(target));
|
||||
domain_unlock(target->domain);
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/mm.c
|
||||
@@ -2844,7 +2844,7 @@ static int put_old_guest_table(struct vc
|
||||
return rc;
|
||||
}
|
||||
|
||||
-int vcpu_destroy_pagetables(struct vcpu *v, bool_t preemptible)
|
||||
+int vcpu_destroy_pagetables(struct vcpu *v)
|
||||
{
|
||||
unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
|
||||
struct page_info *page;
|
||||
@@ -2864,7 +2864,7 @@ int vcpu_destroy_pagetables(struct vcpu
|
||||
if ( paging_mode_refcounts(v->domain) )
|
||||
put_page(page);
|
||||
else
|
||||
- rc = put_page_and_type_preemptible(page, preemptible);
|
||||
+ rc = put_page_and_type_preemptible(page, 1);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@@ -2890,7 +2890,7 @@ int vcpu_destroy_pagetables(struct vcpu
|
||||
if ( paging_mode_refcounts(v->domain) )
|
||||
put_page(page);
|
||||
else
|
||||
- rc = put_page_and_type_preemptible(page, preemptible);
|
||||
+ rc = put_page_and_type_preemptible(page, 1);
|
||||
}
|
||||
if ( !rc )
|
||||
v->arch.guest_table_user = pagetable_null();
|
||||
Index: xen-4.2.2-testing/xen/common/domain.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/common/domain.c
|
||||
+++ xen-4.2.2-testing/xen/common/domain.c
|
||||
@@ -779,14 +779,18 @@ void domain_unpause_by_systemcontroller(
|
||||
domain_unpause(d);
|
||||
}
|
||||
|
||||
-void vcpu_reset(struct vcpu *v)
|
||||
+int vcpu_reset(struct vcpu *v)
|
||||
{
|
||||
struct domain *d = v->domain;
|
||||
+ int rc;
|
||||
|
||||
vcpu_pause(v);
|
||||
domain_lock(d);
|
||||
|
||||
- arch_vcpu_reset(v);
|
||||
+ set_bit(_VPF_in_reset, &v->pause_flags);
|
||||
+ rc = arch_vcpu_reset(v);
|
||||
+ if ( rc )
|
||||
+ goto out_unlock;
|
||||
|
||||
set_bit(_VPF_down, &v->pause_flags);
|
||||
|
||||
@@ -802,9 +806,13 @@ void vcpu_reset(struct vcpu *v)
|
||||
#endif
|
||||
cpumask_clear(v->cpu_affinity_tmp);
|
||||
clear_bit(_VPF_blocked, &v->pause_flags);
|
||||
+ clear_bit(_VPF_in_reset, &v->pause_flags);
|
||||
|
||||
+ out_unlock:
|
||||
domain_unlock(v->domain);
|
||||
vcpu_unpause(v);
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
|
||||
Index: xen-4.2.2-testing/xen/common/domctl.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/common/domctl.c
|
||||
+++ xen-4.2.2-testing/xen/common/domctl.c
|
||||
@@ -307,8 +307,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
|
||||
|
||||
if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
|
||||
{
|
||||
- vcpu_reset(v);
|
||||
- ret = 0;
|
||||
+ ret = vcpu_reset(v);
|
||||
+ if ( ret == -EAGAIN )
|
||||
+ ret = hypercall_create_continuation(
|
||||
+ __HYPERVISOR_domctl, "h", u_domctl);
|
||||
goto svc_out;
|
||||
}
|
||||
|
||||
Index: xen-4.2.2-testing/xen/include/asm-x86/mm.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/asm-x86/mm.h
|
||||
+++ xen-4.2.2-testing/xen/include/asm-x86/mm.h
|
||||
@@ -605,7 +605,7 @@ void audit_domains(void);
|
||||
int new_guest_cr3(unsigned long pfn);
|
||||
void make_cr3(struct vcpu *v, unsigned long mfn);
|
||||
void update_cr3(struct vcpu *v);
|
||||
-int vcpu_destroy_pagetables(struct vcpu *, bool_t preemptible);
|
||||
+int vcpu_destroy_pagetables(struct vcpu *);
|
||||
void propagate_page_fault(unsigned long addr, u16 error_code);
|
||||
void *do_page_walk(struct vcpu *v, unsigned long addr);
|
||||
|
||||
Index: xen-4.2.2-testing/xen/include/xen/domain.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/domain.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/domain.h
|
||||
@@ -13,7 +13,7 @@ typedef union {
|
||||
struct vcpu *alloc_vcpu(
|
||||
struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
|
||||
struct vcpu *alloc_dom0_vcpu0(void);
|
||||
-void vcpu_reset(struct vcpu *v);
|
||||
+int vcpu_reset(struct vcpu *);
|
||||
|
||||
struct xen_domctl_getdomaininfo;
|
||||
void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
|
||||
@@ -67,7 +67,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
|
||||
|
||||
void arch_dump_domain_info(struct domain *d);
|
||||
|
||||
-void arch_vcpu_reset(struct vcpu *v);
|
||||
+int arch_vcpu_reset(struct vcpu *);
|
||||
|
||||
extern spinlock_t vcpu_alloc_lock;
|
||||
bool_t domctl_lock_acquire(void);
|
||||
Index: xen-4.2.2-testing/xen/include/xen/sched.h
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/include/xen/sched.h
|
||||
+++ xen-4.2.2-testing/xen/include/xen/sched.h
|
||||
@@ -644,6 +644,9 @@ static inline struct domain *next_domain
|
||||
/* VCPU is blocked due to missing mem_sharing ring. */
|
||||
#define _VPF_mem_sharing 6
|
||||
#define VPF_mem_sharing (1UL<<_VPF_mem_sharing)
|
||||
+ /* VCPU is being reset. */
|
||||
+#define _VPF_in_reset 7
|
||||
+#define VPF_in_reset (1UL<<_VPF_in_reset)
|
||||
|
||||
static inline int vcpu_runnable(struct vcpu *v)
|
||||
{
|
212
CVE-2013-1918-xsa45-5-set-info-guest-preemptible.patch
Normal file
212
CVE-2013-1918-xsa45-5-set-info-guest-preemptible.patch
Normal file
@ -0,0 +1,212 @@
|
||||
x86: make arch_set_info_guest() preemptible
|
||||
|
||||
.. as the root page table validation (and the dropping of an eventual
|
||||
old one) can require meaningful amounts of time.
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.2-testing/xen/arch/x86/domain.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/arch/x86/domain.c
|
||||
+++ xen-4.2.2-testing/xen/arch/x86/domain.c
|
||||
@@ -858,6 +858,9 @@ int arch_set_info_guest(
|
||||
|
||||
if ( !v->is_initialised )
|
||||
{
|
||||
+ if ( !compat && !(flags & VGCF_in_kernel) && !c.nat->ctrlreg[1] )
|
||||
+ return -EINVAL;
|
||||
+
|
||||
v->arch.pv_vcpu.ldt_base = c(ldt_base);
|
||||
v->arch.pv_vcpu.ldt_ents = c(ldt_ents);
|
||||
}
|
||||
@@ -955,24 +958,44 @@ int arch_set_info_guest(
|
||||
if ( rc != 0 )
|
||||
return rc;
|
||||
|
||||
+ set_bit(_VPF_in_reset, &v->pause_flags);
|
||||
+
|
||||
if ( !compat )
|
||||
- {
|
||||
cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]);
|
||||
- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
|
||||
-
|
||||
- if ( !cr3_page )
|
||||
- {
|
||||
- destroy_gdt(v);
|
||||
- return -EINVAL;
|
||||
- }
|
||||
- if ( !paging_mode_refcounts(d)
|
||||
- && !get_page_type(cr3_page, PGT_base_page_table) )
|
||||
- {
|
||||
- put_page(cr3_page);
|
||||
- destroy_gdt(v);
|
||||
- return -EINVAL;
|
||||
- }
|
||||
+#ifdef CONFIG_COMPAT
|
||||
+ else
|
||||
+ cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]);
|
||||
+#endif
|
||||
+ cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
|
||||
|
||||
+ if ( !cr3_page )
|
||||
+ rc = -EINVAL;
|
||||
+ else if ( paging_mode_refcounts(d) )
|
||||
+ /* nothing */;
|
||||
+ else if ( cr3_page == v->arch.old_guest_table )
|
||||
+ {
|
||||
+ v->arch.old_guest_table = NULL;
|
||||
+ put_page(cr3_page);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /*
|
||||
+ * Since v->arch.guest_table{,_user} are both NULL, this effectively
|
||||
+ * is just a call to put_old_guest_table().
|
||||
+ */
|
||||
+ if ( !compat )
|
||||
+ rc = vcpu_destroy_pagetables(v);
|
||||
+ if ( !rc )
|
||||
+ rc = get_page_type_preemptible(cr3_page,
|
||||
+ !compat ? PGT_root_page_table
|
||||
+ : PGT_l3_page_table);
|
||||
+ if ( rc == -EINTR )
|
||||
+ rc = -EAGAIN;
|
||||
+ }
|
||||
+ if ( rc )
|
||||
+ /* handled below */;
|
||||
+ else if ( !compat )
|
||||
+ {
|
||||
v->arch.guest_table = pagetable_from_page(cr3_page);
|
||||
#ifdef __x86_64__
|
||||
if ( c.nat->ctrlreg[1] )
|
||||
@@ -980,56 +1003,44 @@ int arch_set_info_guest(
|
||||
cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]);
|
||||
cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
|
||||
|
||||
- if ( !cr3_page ||
|
||||
- (!paging_mode_refcounts(d)
|
||||
- && !get_page_type(cr3_page, PGT_base_page_table)) )
|
||||
+ if ( !cr3_page )
|
||||
+ rc = -EINVAL;
|
||||
+ else if ( !paging_mode_refcounts(d) )
|
||||
{
|
||||
- if (cr3_page)
|
||||
- put_page(cr3_page);
|
||||
- cr3_page = pagetable_get_page(v->arch.guest_table);
|
||||
- v->arch.guest_table = pagetable_null();
|
||||
- if ( paging_mode_refcounts(d) )
|
||||
- put_page(cr3_page);
|
||||
- else
|
||||
- put_page_and_type(cr3_page);
|
||||
- destroy_gdt(v);
|
||||
- return -EINVAL;
|
||||
+ rc = get_page_type_preemptible(cr3_page, PGT_root_page_table);
|
||||
+ switch ( rc )
|
||||
+ {
|
||||
+ case -EINTR:
|
||||
+ rc = -EAGAIN;
|
||||
+ case -EAGAIN:
|
||||
+ v->arch.old_guest_table =
|
||||
+ pagetable_get_page(v->arch.guest_table);
|
||||
+ v->arch.guest_table = pagetable_null();
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
-
|
||||
- v->arch.guest_table_user = pagetable_from_page(cr3_page);
|
||||
- }
|
||||
- else if ( !(flags & VGCF_in_kernel) )
|
||||
- {
|
||||
- destroy_gdt(v);
|
||||
- return -EINVAL;
|
||||
+ if ( !rc )
|
||||
+ v->arch.guest_table_user = pagetable_from_page(cr3_page);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
l4_pgentry_t *l4tab;
|
||||
|
||||
- cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]);
|
||||
- cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
|
||||
-
|
||||
- if ( !cr3_page)
|
||||
- {
|
||||
- destroy_gdt(v);
|
||||
- return -EINVAL;
|
||||
- }
|
||||
-
|
||||
- if (!paging_mode_refcounts(d)
|
||||
- && !get_page_type(cr3_page, PGT_l3_page_table) )
|
||||
- {
|
||||
- put_page(cr3_page);
|
||||
- destroy_gdt(v);
|
||||
- return -EINVAL;
|
||||
- }
|
||||
-
|
||||
l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
|
||||
*l4tab = l4e_from_pfn(page_to_mfn(cr3_page),
|
||||
_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
|
||||
#endif
|
||||
}
|
||||
+ if ( rc )
|
||||
+ {
|
||||
+ if ( cr3_page )
|
||||
+ put_page(cr3_page);
|
||||
+ destroy_gdt(v);
|
||||
+ return rc;
|
||||
+ }
|
||||
+
|
||||
+ clear_bit(_VPF_in_reset, &v->pause_flags);
|
||||
|
||||
if ( v->vcpu_id == 0 )
|
||||
update_domain_wallclock_time(d);
|
||||
Index: xen-4.2.2-testing/xen/common/compat/domain.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/common/compat/domain.c
|
||||
+++ xen-4.2.2-testing/xen/common/compat/domain.c
|
||||
@@ -50,6 +50,10 @@ int compat_vcpu_op(int cmd, int vcpuid,
|
||||
rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, cmp_ctxt);
|
||||
domain_unlock(d);
|
||||
|
||||
+ if ( rc == -EAGAIN )
|
||||
+ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
|
||||
+ cmd, vcpuid, arg);
|
||||
+
|
||||
xfree(cmp_ctxt);
|
||||
break;
|
||||
}
|
||||
Index: xen-4.2.2-testing/xen/common/domain.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/common/domain.c
|
||||
+++ xen-4.2.2-testing/xen/common/domain.c
|
||||
@@ -849,6 +849,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
|
||||
domain_unlock(d);
|
||||
|
||||
free_vcpu_guest_context(ctxt);
|
||||
+
|
||||
+ if ( rc == -EAGAIN )
|
||||
+ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
|
||||
+ cmd, vcpuid, arg);
|
||||
+
|
||||
break;
|
||||
|
||||
case VCPUOP_up: {
|
||||
Index: xen-4.2.2-testing/xen/common/domctl.c
|
||||
===================================================================
|
||||
--- xen-4.2.2-testing.orig/xen/common/domctl.c
|
||||
+++ xen-4.2.2-testing/xen/common/domctl.c
|
||||
@@ -339,6 +339,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
|
||||
domain_pause(d);
|
||||
ret = arch_set_info_guest(v, c);
|
||||
domain_unpause(d);
|
||||
+
|
||||
+ if ( ret == -EAGAIN )
|
||||
+ ret = hypercall_create_continuation(
|
||||
+ __HYPERVISOR_domctl, "h", u_domctl);
|
||||
}
|
||||
|
||||
svc_out:
|
131
CVE-2013-1918-xsa45-6-unpin-preemptible.patch
Normal file
131
CVE-2013-1918-xsa45-6-unpin-preemptible.patch
Normal file
@ -0,0 +1,131 @@
|
||||
x86: make page table unpinning preemptible
|
||||
|
||||
... as it may take significant amounts of time.
|
||||
|
||||
Since we can't re-invoke the operation in a second attempt, the
|
||||
continuation logic must be slightly tweaked so that we make sure
|
||||
do_mmuext_op() gets run one more time even when the preempted unpin
|
||||
operation was the last one in a batch.
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
@@ -3140,6 +3140,14 @@ long do_mmuext_op(
|
||||
return rc;
|
||||
}
|
||||
|
||||
+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
|
||||
+ likely(guest_handle_is_null(uops)) )
|
||||
+ {
|
||||
+ /* See the curr->arch.old_guest_table related
|
||||
+ * hypercall_create_continuation() below. */
|
||||
+ return (int)foreigndom;
|
||||
+ }
|
||||
+
|
||||
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
|
||||
{
|
||||
count &= ~MMU_UPDATE_PREEMPTED;
|
||||
@@ -3163,7 +3171,7 @@ long do_mmuext_op(
|
||||
|
||||
for ( i = 0; i < count; i++ )
|
||||
{
|
||||
- if ( hypercall_preempt_check() )
|
||||
+ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
|
||||
{
|
||||
rc = -EAGAIN;
|
||||
break;
|
||||
@@ -3283,7 +3291,17 @@ long do_mmuext_op(
|
||||
break;
|
||||
}
|
||||
|
||||
- put_page_and_type(page);
|
||||
+ switch ( rc = put_page_and_type_preemptible(page, 1) )
|
||||
+ {
|
||||
+ case -EINTR:
|
||||
+ case -EAGAIN:
|
||||
+ curr->arch.old_guest_table = page;
|
||||
+ rc = 0;
|
||||
+ break;
|
||||
+ default:
|
||||
+ BUG_ON(rc);
|
||||
+ break;
|
||||
+ }
|
||||
put_page(page);
|
||||
|
||||
/* A page is dirtied when its pin status is cleared. */
|
||||
@@ -3604,9 +3622,27 @@ long do_mmuext_op(
|
||||
}
|
||||
|
||||
if ( rc == -EAGAIN )
|
||||
+ {
|
||||
+ ASSERT(i < count);
|
||||
rc = hypercall_create_continuation(
|
||||
__HYPERVISOR_mmuext_op, "hihi",
|
||||
uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
||||
+ }
|
||||
+ else if ( curr->arch.old_guest_table )
|
||||
+ {
|
||||
+ XEN_GUEST_HANDLE(void) null;
|
||||
+
|
||||
+ ASSERT(rc || i == count);
|
||||
+ set_xen_guest_handle(null, NULL);
|
||||
+ /*
|
||||
+ * In order to have a way to communicate the final return value to
|
||||
+ * our continuation, we pass this in place of "foreigndom", building
|
||||
+ * on the fact that this argument isn't needed anymore.
|
||||
+ */
|
||||
+ rc = hypercall_create_continuation(
|
||||
+ __HYPERVISOR_mmuext_op, "hihi", null,
|
||||
+ MMU_UPDATE_PREEMPTED, null, rc);
|
||||
+ }
|
||||
|
||||
put_pg_owner(pg_owner);
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/x86_64/compat/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/x86_64/compat/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/x86_64/compat/mm.c
|
||||
@@ -268,6 +268,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
|
||||
int rc = 0;
|
||||
XEN_GUEST_HANDLE(mmuext_op_t) nat_ops;
|
||||
|
||||
+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
|
||||
+ likely(guest_handle_is_null(cmp_uops)) )
|
||||
+ {
|
||||
+ set_xen_guest_handle(nat_ops, NULL);
|
||||
+ return do_mmuext_op(nat_ops, count, pdone, foreigndom);
|
||||
+ }
|
||||
+
|
||||
preempt_mask = count & MMU_UPDATE_PREEMPTED;
|
||||
count ^= preempt_mask;
|
||||
|
||||
@@ -370,12 +377,18 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
|
||||
guest_handle_add_offset(nat_ops, i - left);
|
||||
guest_handle_subtract_offset(cmp_uops, left);
|
||||
left = 1;
|
||||
- BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops));
|
||||
- BUG_ON(left != arg1);
|
||||
- if (!test_bit(_MCSF_in_multicall, &mcs->flags))
|
||||
- regs->_ecx += count - i;
|
||||
+ if ( arg1 != MMU_UPDATE_PREEMPTED )
|
||||
+ {
|
||||
+ BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops,
|
||||
+ cmp_uops));
|
||||
+ if ( !test_bit(_MCSF_in_multicall, &mcs->flags) )
|
||||
+ regs->_ecx += count - i;
|
||||
+ else
|
||||
+ mcs->compat_call.args[1] += count - i;
|
||||
+ }
|
||||
else
|
||||
- mcs->compat_call.args[1] += count - i;
|
||||
+ BUG_ON(hypercall_xlat_continuation(&left, 0));
|
||||
+ BUG_ON(left != arg1);
|
||||
}
|
||||
else
|
||||
BUG_ON(err > 0);
|
257
CVE-2013-1918-xsa45-7-mm-error-paths-preemptible.patch
Normal file
257
CVE-2013-1918-xsa45-7-mm-error-paths-preemptible.patch
Normal file
@ -0,0 +1,257 @@
|
||||
x86: make page table handling error paths preemptible
|
||||
|
||||
... as they may take significant amounts of time.
|
||||
|
||||
This requires cloning the tweaked continuation logic from
|
||||
do_mmuext_op() to do_mmu_update().
|
||||
|
||||
Note that in mod_l[34]_entry() a negative "preemptible" value gets
|
||||
passed to put_page_from_l[34]e() now, telling the callee to store the
|
||||
respective page in current->arch.old_guest_table (for a hypercall
|
||||
continuation to pick up), rather than carrying out the put right away.
|
||||
This is going to be made a little more explicit by a subsequent cleanup
|
||||
patch.
|
||||
|
||||
This is part of CVE-2013-1918 / XSA-45.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
Index: xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
===================================================================
|
||||
--- xen-4.2.1-testing.orig/xen/arch/x86/mm.c
|
||||
+++ xen-4.2.1-testing/xen/arch/x86/mm.c
|
||||
@@ -1258,7 +1258,16 @@ static int put_page_from_l3e(l3_pgentry_
|
||||
#endif
|
||||
|
||||
if ( unlikely(partial > 0) )
|
||||
+ {
|
||||
+ ASSERT(preemptible >= 0);
|
||||
return __put_page_type(l3e_get_page(l3e), preemptible);
|
||||
+ }
|
||||
+
|
||||
+ if ( preemptible < 0 )
|
||||
+ {
|
||||
+ current->arch.old_guest_table = l3e_get_page(l3e);
|
||||
+ return 0;
|
||||
+ }
|
||||
|
||||
return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
|
||||
}
|
||||
@@ -1271,7 +1280,17 @@ static int put_page_from_l4e(l4_pgentry_
|
||||
(l4e_get_pfn(l4e) != pfn) )
|
||||
{
|
||||
if ( unlikely(partial > 0) )
|
||||
+ {
|
||||
+ ASSERT(preemptible >= 0);
|
||||
return __put_page_type(l4e_get_page(l4e), preemptible);
|
||||
+ }
|
||||
+
|
||||
+ if ( preemptible < 0 )
|
||||
+ {
|
||||
+ current->arch.old_guest_table = l4e_get_page(l4e);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
|
||||
}
|
||||
return 1;
|
||||
@@ -1566,12 +1585,17 @@ static int alloc_l3_table(struct page_in
|
||||
if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
|
||||
{
|
||||
MEM_LOG("Failure in alloc_l3_table: entry %d", i);
|
||||
+ if ( i )
|
||||
+ {
|
||||
+ page->nr_validated_ptes = i;
|
||||
+ page->partial_pte = 0;
|
||||
+ current->arch.old_guest_table = page;
|
||||
+ }
|
||||
while ( i-- > 0 )
|
||||
{
|
||||
if ( !is_guest_l3_slot(i) )
|
||||
continue;
|
||||
unadjust_guest_l3e(pl3e[i], d);
|
||||
- put_page_from_l3e(pl3e[i], pfn, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1601,22 +1625,24 @@ static int alloc_l4_table(struct page_in
|
||||
page->nr_validated_ptes = i;
|
||||
page->partial_pte = partial ?: 1;
|
||||
}
|
||||
- else if ( rc == -EINTR )
|
||||
+ else if ( rc < 0 )
|
||||
{
|
||||
+ if ( rc != -EINTR )
|
||||
+ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
|
||||
if ( i )
|
||||
{
|
||||
page->nr_validated_ptes = i;
|
||||
page->partial_pte = 0;
|
||||
- rc = -EAGAIN;
|
||||
+ if ( rc == -EINTR )
|
||||
+ rc = -EAGAIN;
|
||||
+ else
|
||||
+ {
|
||||
+ if ( current->arch.old_guest_table )
|
||||
+ page->nr_validated_ptes++;
|
||||
+ current->arch.old_guest_table = page;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
- else if ( rc < 0 )
|
||||
- {
|
||||
- MEM_LOG("Failure in alloc_l4_table: entry %d", i);
|
||||
- while ( i-- > 0 )
|
||||
- if ( is_guest_l4_slot(d, i) )
|
||||
- put_page_from_l4e(pl4e[i], pfn, 0, 0);
|
||||
- }
|
||||
if ( rc < 0 )
|
||||
return rc;
|
||||
|
||||
@@ -2064,7 +2090,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
||||
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
|
||||
}
|
||||
|
||||
- put_page_from_l3e(ol3e, pfn, 0, 0);
|
||||
+ put_page_from_l3e(ol3e, pfn, 0, -preemptible);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -2127,7 +2153,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
- put_page_from_l4e(ol4e, pfn, 0, 0);
|
||||
+ put_page_from_l4e(ol4e, pfn, 0, -preemptible);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -2285,7 +2311,15 @@ static int alloc_page_type(struct page_i
|
||||
PRtype_info ": caf=%08lx taf=%" PRtype_info,
|
||||
page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
|
||||
type, page->count_info, page->u.inuse.type_info);
|
||||
- page->u.inuse.type_info = 0;
|
||||
+ if ( page != current->arch.old_guest_table )
|
||||
+ page->u.inuse.type_info = 0;
|
||||
+ else
|
||||
+ {
|
||||
+ ASSERT((page->u.inuse.type_info &
|
||||
+ (PGT_count_mask | PGT_validated)) == 1);
|
||||
+ get_page_light(page);
|
||||
+ page->u.inuse.type_info |= PGT_partial;
|
||||
+ }
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -3235,21 +3269,17 @@ long do_mmuext_op(
|
||||
}
|
||||
|
||||
if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 )
|
||||
- {
|
||||
- put_page_and_type(page);
|
||||
okay = 0;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- if ( unlikely(test_and_set_bit(_PGT_pinned,
|
||||
- &page->u.inuse.type_info)) )
|
||||
+ else if ( unlikely(test_and_set_bit(_PGT_pinned,
|
||||
+ &page->u.inuse.type_info)) )
|
||||
{
|
||||
MEM_LOG("Mfn %lx already pinned", page_to_mfn(page));
|
||||
- put_page_and_type(page);
|
||||
okay = 0;
|
||||
- break;
|
||||
}
|
||||
|
||||
+ if ( unlikely(!okay) )
|
||||
+ goto pin_drop;
|
||||
+
|
||||
/* A page is dirtied when its pin status is set. */
|
||||
paging_mark_dirty(pg_owner, page_to_mfn(page));
|
||||
|
||||
@@ -3263,7 +3293,13 @@ long do_mmuext_op(
|
||||
&page->u.inuse.type_info));
|
||||
spin_unlock(&pg_owner->page_alloc_lock);
|
||||
if ( drop_ref )
|
||||
- put_page_and_type(page);
|
||||
+ {
|
||||
+ pin_drop:
|
||||
+ if ( type == PGT_l1_page_table )
|
||||
+ put_page_and_type(page);
|
||||
+ else
|
||||
+ curr->arch.old_guest_table = page;
|
||||
+ }
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -3669,11 +3705,28 @@ long do_mmu_update(
|
||||
void *va;
|
||||
unsigned long gpfn, gmfn, mfn;
|
||||
struct page_info *page;
|
||||
- int rc = 0, i = 0;
|
||||
- unsigned int cmd, done = 0, pt_dom;
|
||||
- struct vcpu *v = current;
|
||||
+ unsigned int cmd, i = 0, done = 0, pt_dom;
|
||||
+ struct vcpu *curr = current, *v = curr;
|
||||
struct domain *d = v->domain, *pt_owner = d, *pg_owner;
|
||||
struct domain_mmap_cache mapcache;
|
||||
+ int rc = put_old_guest_table(curr);
|
||||
+
|
||||
+ if ( unlikely(rc) )
|
||||
+ {
|
||||
+ if ( likely(rc == -EAGAIN) )
|
||||
+ rc = hypercall_create_continuation(
|
||||
+ __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
|
||||
+ foreigndom);
|
||||
+ return rc;
|
||||
+ }
|
||||
+
|
||||
+ if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
|
||||
+ likely(guest_handle_is_null(ureqs)) )
|
||||
+ {
|
||||
+ /* See the curr->arch.old_guest_table related
|
||||
+ * hypercall_create_continuation() below. */
|
||||
+ return (int)foreigndom;
|
||||
+ }
|
||||
|
||||
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
|
||||
{
|
||||
@@ -3722,7 +3775,7 @@ long do_mmu_update(
|
||||
|
||||
for ( i = 0; i < count; i++ )
|
||||
{
|
||||
- if ( hypercall_preempt_check() )
|
||||
+ if ( curr->arch.old_guest_table || hypercall_preempt_check() )
|
||||
{
|
||||
rc = -EAGAIN;
|
||||
break;
|
||||
@@ -3903,9 +3956,27 @@ long do_mmu_update(
|
||||
}
|
||||
|
||||
if ( rc == -EAGAIN )
|
||||
+ {
|
||||
+ ASSERT(i < count);
|
||||
rc = hypercall_create_continuation(
|
||||
__HYPERVISOR_mmu_update, "hihi",
|
||||
ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
||||
+ }
|
||||
+ else if ( curr->arch.old_guest_table )
|
||||
+ {
|
||||
+ XEN_GUEST_HANDLE(void) null;
|
||||
+
|
||||
+ ASSERT(rc || i == count);
|
||||
+ set_xen_guest_handle(null, NULL);
|
||||
+ /*
|
||||
+ * In order to have a way to communicate the final return value to
|
||||
+ * our continuation, we pass this in place of "foreigndom", building
|
||||
+ * on the fact that this argument isn't needed anymore.
|
||||
+ */
|
||||
+ rc = hypercall_create_continuation(
|
||||
+ __HYPERVISOR_mmu_update, "hihi", null,
|
||||
+ MMU_UPDATE_PREEMPTED, null, rc);
|
||||
+ }
|
||||
|
||||
put_pg_owner(pg_owner);
|
||||
|
406
CVE-2013-1918-xsa45-followup.patch
Normal file
406
CVE-2013-1918-xsa45-followup.patch
Normal file
@ -0,0 +1,406 @@
|
||||
x86: cleanup after making various page table manipulation operations preemptible
|
||||
|
||||
This drops the "preemptible" parameters from various functions where
|
||||
now they can't (or shouldn't, validated by assertions) be run in non-
|
||||
preemptible mode anymore, to prove that manipulations of at least L3
|
||||
and L4 page tables and page table entries are now always preemptible,
|
||||
i.e. the earlier patches actually fulfill their purpose of fixing the
|
||||
resulting security issue.
|
||||
|
||||
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||||
Acked-by: Tim Deegan <tim@xen.org>
|
||||
|
||||
--- a/xen/arch/x86/domain.c
|
||||
+++ b/xen/arch/x86/domain.c
|
||||
@@ -1986,7 +1986,7 @@ static int relinquish_memory(
|
||||
}
|
||||
|
||||
if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
|
||||
- ret = put_page_and_type_preemptible(page, 1);
|
||||
+ ret = put_page_and_type_preemptible(page);
|
||||
switch ( ret )
|
||||
{
|
||||
case 0:
|
||||
--- a/xen/arch/x86/mm.c
|
||||
+++ b/xen/arch/x86/mm.c
|
||||
@@ -1044,7 +1044,7 @@ get_page_from_l2e(
|
||||
define_get_linear_pagetable(l3);
|
||||
static int
|
||||
get_page_from_l3e(
|
||||
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible)
|
||||
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@@ -1058,7 +1058,7 @@ get_page_from_l3e(
|
||||
}
|
||||
|
||||
rc = get_page_and_type_from_pagenr(
|
||||
- l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
|
||||
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1);
|
||||
if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
|
||||
rc = 0;
|
||||
|
||||
@@ -1069,7 +1069,7 @@ get_page_from_l3e(
|
||||
define_get_linear_pagetable(l4);
|
||||
static int
|
||||
get_page_from_l4e(
|
||||
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible)
|
||||
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@@ -1083,7 +1083,7 @@ get_page_from_l4e(
|
||||
}
|
||||
|
||||
rc = get_page_and_type_from_pagenr(
|
||||
- l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
|
||||
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, 1);
|
||||
if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
|
||||
rc = 0;
|
||||
|
||||
@@ -1237,8 +1237,10 @@ static int put_page_from_l2e(l2_pgentry_
|
||||
static int __put_page_type(struct page_info *, int preemptible);
|
||||
|
||||
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
|
||||
- int partial, int preemptible)
|
||||
+ int partial, bool_t defer)
|
||||
{
|
||||
+ struct page_info *pg;
|
||||
+
|
||||
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
|
||||
return 1;
|
||||
|
||||
@@ -1257,41 +1259,45 @@ static int put_page_from_l3e(l3_pgentry_
|
||||
}
|
||||
#endif
|
||||
|
||||
+ pg = l3e_get_page(l3e);
|
||||
+
|
||||
if ( unlikely(partial > 0) )
|
||||
{
|
||||
- ASSERT(preemptible >= 0);
|
||||
- return __put_page_type(l3e_get_page(l3e), preemptible);
|
||||
+ ASSERT(!defer);
|
||||
+ return __put_page_type(pg, 1);
|
||||
}
|
||||
|
||||
- if ( preemptible < 0 )
|
||||
+ if ( defer )
|
||||
{
|
||||
- current->arch.old_guest_table = l3e_get_page(l3e);
|
||||
+ current->arch.old_guest_table = pg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
- return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
|
||||
+ return put_page_and_type_preemptible(pg);
|
||||
}
|
||||
|
||||
#if CONFIG_PAGING_LEVELS >= 4
|
||||
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
|
||||
- int partial, int preemptible)
|
||||
+ int partial, bool_t defer)
|
||||
{
|
||||
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
|
||||
(l4e_get_pfn(l4e) != pfn) )
|
||||
{
|
||||
+ struct page_info *pg = l4e_get_page(l4e);
|
||||
+
|
||||
if ( unlikely(partial > 0) )
|
||||
{
|
||||
- ASSERT(preemptible >= 0);
|
||||
- return __put_page_type(l4e_get_page(l4e), preemptible);
|
||||
+ ASSERT(!defer);
|
||||
+ return __put_page_type(pg, 1);
|
||||
}
|
||||
|
||||
- if ( preemptible < 0 )
|
||||
+ if ( defer )
|
||||
{
|
||||
- current->arch.old_guest_table = l4e_get_page(l4e);
|
||||
+ current->arch.old_guest_table = pg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
- return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
|
||||
+ return put_page_and_type_preemptible(pg);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@@ -1509,7 +1515,7 @@ static int alloc_l2_table(struct page_in
|
||||
return rc > 0 ? 0 : rc;
|
||||
}
|
||||
|
||||
-static int alloc_l3_table(struct page_info *page, int preemptible)
|
||||
+static int alloc_l3_table(struct page_info *page)
|
||||
{
|
||||
struct domain *d = page_get_owner(page);
|
||||
unsigned long pfn = page_to_mfn(page);
|
||||
@@ -1556,11 +1562,10 @@ static int alloc_l3_table(struct page_in
|
||||
rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
|
||||
PGT_l2_page_table |
|
||||
PGT_pae_xen_l2,
|
||||
- d, partial, preemptible);
|
||||
+ d, partial, 1);
|
||||
}
|
||||
else if ( !is_guest_l3_slot(i) ||
|
||||
- (rc = get_page_from_l3e(pl3e[i], pfn, d,
|
||||
- partial, preemptible)) > 0 )
|
||||
+ (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 )
|
||||
continue;
|
||||
|
||||
if ( rc == -EAGAIN )
|
||||
@@ -1604,7 +1609,7 @@ static int alloc_l3_table(struct page_in
|
||||
}
|
||||
|
||||
#if CONFIG_PAGING_LEVELS >= 4
|
||||
-static int alloc_l4_table(struct page_info *page, int preemptible)
|
||||
+static int alloc_l4_table(struct page_info *page)
|
||||
{
|
||||
struct domain *d = page_get_owner(page);
|
||||
unsigned long pfn = page_to_mfn(page);
|
||||
@@ -1616,8 +1621,7 @@ static int alloc_l4_table(struct page_in
|
||||
i++, partial = 0 )
|
||||
{
|
||||
if ( !is_guest_l4_slot(d, i) ||
|
||||
- (rc = get_page_from_l4e(pl4e[i], pfn, d,
|
||||
- partial, preemptible)) > 0 )
|
||||
+ (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 )
|
||||
continue;
|
||||
|
||||
if ( rc == -EAGAIN )
|
||||
@@ -1662,7 +1666,7 @@ static int alloc_l4_table(struct page_in
|
||||
return rc > 0 ? 0 : rc;
|
||||
}
|
||||
#else
|
||||
-#define alloc_l4_table(page, preemptible) (-EINVAL)
|
||||
+#define alloc_l4_table(page) (-EINVAL)
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1714,7 +1718,7 @@ static int free_l2_table(struct page_inf
|
||||
return err;
|
||||
}
|
||||
|
||||
-static int free_l3_table(struct page_info *page, int preemptible)
|
||||
+static int free_l3_table(struct page_info *page)
|
||||
{
|
||||
struct domain *d = page_get_owner(page);
|
||||
unsigned long pfn = page_to_mfn(page);
|
||||
@@ -1727,7 +1731,7 @@ static int free_l3_table(struct page_inf
|
||||
do {
|
||||
if ( is_guest_l3_slot(i) )
|
||||
{
|
||||
- rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
|
||||
+ rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
|
||||
if ( rc < 0 )
|
||||
break;
|
||||
partial = 0;
|
||||
@@ -1754,7 +1758,7 @@ static int free_l3_table(struct page_inf
|
||||
}
|
||||
|
||||
#if CONFIG_PAGING_LEVELS >= 4
|
||||
-static int free_l4_table(struct page_info *page, int preemptible)
|
||||
+static int free_l4_table(struct page_info *page)
|
||||
{
|
||||
struct domain *d = page_get_owner(page);
|
||||
unsigned long pfn = page_to_mfn(page);
|
||||
@@ -1764,7 +1768,7 @@ static int free_l4_table(struct page_inf
|
||||
|
||||
do {
|
||||
if ( is_guest_l4_slot(d, i) )
|
||||
- rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
|
||||
+ rc = put_page_from_l4e(pl4e[i], pfn, partial, 0);
|
||||
if ( rc < 0 )
|
||||
break;
|
||||
partial = 0;
|
||||
@@ -1784,7 +1788,7 @@ static int free_l4_table(struct page_inf
|
||||
return rc > 0 ? 0 : rc;
|
||||
}
|
||||
#else
|
||||
-#define free_l4_table(page, preemptible) (-EINVAL)
|
||||
+#define free_l4_table(page) (-EINVAL)
|
||||
#endif
|
||||
|
||||
int page_lock(struct page_info *page)
|
||||
@@ -2023,7 +2027,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
||||
l3_pgentry_t nl3e,
|
||||
unsigned long pfn,
|
||||
int preserve_ad,
|
||||
- int preemptible,
|
||||
struct vcpu *vcpu)
|
||||
{
|
||||
l3_pgentry_t ol3e;
|
||||
@@ -2063,7 +2066,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
||||
return rc ? 0 : -EFAULT;
|
||||
}
|
||||
|
||||
- rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
|
||||
+ rc = get_page_from_l3e(nl3e, pfn, d, 0);
|
||||
if ( unlikely(rc < 0) )
|
||||
return rc;
|
||||
rc = 0;
|
||||
@@ -2090,7 +2093,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
||||
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
|
||||
}
|
||||
|
||||
- put_page_from_l3e(ol3e, pfn, 0, -preemptible);
|
||||
+ put_page_from_l3e(ol3e, pfn, 0, 1);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -2101,7 +2104,6 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
||||
l4_pgentry_t nl4e,
|
||||
unsigned long pfn,
|
||||
int preserve_ad,
|
||||
- int preemptible,
|
||||
struct vcpu *vcpu)
|
||||
{
|
||||
struct domain *d = vcpu->domain;
|
||||
@@ -2134,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
||||
return rc ? 0 : -EFAULT;
|
||||
}
|
||||
|
||||
- rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
|
||||
+ rc = get_page_from_l4e(nl4e, pfn, d, 0);
|
||||
if ( unlikely(rc < 0) )
|
||||
return rc;
|
||||
rc = 0;
|
||||
@@ -2153,7 +2155,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
- put_page_from_l4e(ol4e, pfn, 0, -preemptible);
|
||||
+ put_page_from_l4e(ol4e, pfn, 0, 1);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -2275,10 +2277,12 @@ static int alloc_page_type(struct page_i
|
||||
rc = alloc_l2_table(page, type, preemptible);
|
||||
break;
|
||||
case PGT_l3_page_table:
|
||||
- rc = alloc_l3_table(page, preemptible);
|
||||
+ ASSERT(preemptible);
|
||||
+ rc = alloc_l3_table(page);
|
||||
break;
|
||||
case PGT_l4_page_table:
|
||||
- rc = alloc_l4_table(page, preemptible);
|
||||
+ ASSERT(preemptible);
|
||||
+ rc = alloc_l4_table(page);
|
||||
break;
|
||||
case PGT_seg_desc_page:
|
||||
rc = alloc_segdesc_page(page);
|
||||
@@ -2372,10 +2376,12 @@ int free_page_type(struct page_info *pag
|
||||
if ( !(type & PGT_partial) )
|
||||
page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
|
||||
#endif
|
||||
- rc = free_l3_table(page, preemptible);
|
||||
+ ASSERT(preemptible);
|
||||
+ rc = free_l3_table(page);
|
||||
break;
|
||||
case PGT_l4_page_table:
|
||||
- rc = free_l4_table(page, preemptible);
|
||||
+ ASSERT(preemptible);
|
||||
+ rc = free_l4_table(page);
|
||||
break;
|
||||
default:
|
||||
MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
|
||||
@@ -2866,7 +2872,7 @@ static int put_old_guest_table(struct vc
|
||||
if ( !v->arch.old_guest_table )
|
||||
return 0;
|
||||
|
||||
- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) )
|
||||
+ switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) )
|
||||
{
|
||||
case -EINTR:
|
||||
case -EAGAIN:
|
||||
@@ -2898,7 +2904,7 @@ int vcpu_destroy_pagetables(struct vcpu
|
||||
if ( paging_mode_refcounts(v->domain) )
|
||||
put_page(page);
|
||||
else
|
||||
- rc = put_page_and_type_preemptible(page, 1);
|
||||
+ rc = put_page_and_type_preemptible(page);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@@ -2924,7 +2930,7 @@ int vcpu_destroy_pagetables(struct vcpu
|
||||
if ( paging_mode_refcounts(v->domain) )
|
||||
put_page(page);
|
||||
else
|
||||
- rc = put_page_and_type_preemptible(page, 1);
|
||||
+ rc = put_page_and_type_preemptible(page);
|
||||
}
|
||||
if ( !rc )
|
||||
v->arch.guest_table_user = pagetable_null();
|
||||
@@ -2953,7 +2959,7 @@ int new_guest_cr3(unsigned long mfn)
|
||||
l4e_from_pfn(
|
||||
mfn,
|
||||
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
|
||||
- pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr);
|
||||
+ pagetable_get_pfn(curr->arch.guest_table), 0, curr);
|
||||
switch ( rc )
|
||||
{
|
||||
case 0:
|
||||
@@ -3016,7 +3022,7 @@ int new_guest_cr3(unsigned long mfn)
|
||||
if ( paging_mode_refcounts(d) )
|
||||
put_page(page);
|
||||
else
|
||||
- switch ( rc = put_page_and_type_preemptible(page, 1) )
|
||||
+ switch ( rc = put_page_and_type_preemptible(page) )
|
||||
{
|
||||
case -EINTR:
|
||||
rc = -EAGAIN;
|
||||
@@ -3327,7 +3333,7 @@ long do_mmuext_op(
|
||||
break;
|
||||
}
|
||||
|
||||
- switch ( rc = put_page_and_type_preemptible(page, 1) )
|
||||
+ switch ( rc = put_page_and_type_preemptible(page) )
|
||||
{
|
||||
case -EINTR:
|
||||
case -EAGAIN:
|
||||
@@ -3405,7 +3411,7 @@ long do_mmuext_op(
|
||||
if ( paging_mode_refcounts(d) )
|
||||
put_page(page);
|
||||
else
|
||||
- switch ( rc = put_page_and_type_preemptible(page, 1) )
|
||||
+ switch ( rc = put_page_and_type_preemptible(page) )
|
||||
{
|
||||
case -EINTR:
|
||||
rc = -EAGAIN;
|
||||
@@ -3882,12 +3888,12 @@ long do_mmu_update(
|
||||
break;
|
||||
case PGT_l3_page_table:
|
||||
rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn,
|
||||
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
|
||||
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
|
||||
break;
|
||||
#if CONFIG_PAGING_LEVELS >= 4
|
||||
case PGT_l4_page_table:
|
||||
rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
|
||||
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
|
||||
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
|
||||
break;
|
||||
#endif
|
||||
case PGT_writable_page:
|
||||
--- a/xen/include/asm-x86/mm.h
|
||||
+++ b/xen/include/asm-x86/mm.h
|
||||
@@ -384,15 +384,10 @@ static inline void put_page_and_type(str
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
-static inline int put_page_and_type_preemptible(struct page_info *page,
|
||||
- int preemptible)
|
||||
+static inline int put_page_and_type_preemptible(struct page_info *page)
|
||||
{
|
||||
- int rc = 0;
|
||||
+ int rc = put_page_type_preemptible(page);
|
||||
|
||||
- if ( preemptible )
|
||||
- rc = put_page_type_preemptible(page);
|
||||
- else
|
||||
- put_page_type(page);
|
||||
if ( likely(rc == 0) )
|
||||
put_page(page);
|
||||
return rc;
|
112
CVE-2013-1922-xsa48.patch
Normal file
112
CVE-2013-1922-xsa48.patch
Normal file
@ -0,0 +1,112 @@
|
||||
References: bnc#81???? CVE-2013-1922 XSA-48
|
||||
|
||||
Add -f FMT / --format FMT arg to qemu-nbd
|
||||
|
||||
From: "Daniel P. Berrange" <berrange@redhat.com>
|
||||
|
||||
Currently the qemu-nbd program will auto-detect the format of
|
||||
any disk it is given. This behaviour is known to be insecure.
|
||||
For example, if qemu-nbd initially exposes a 'raw' file to an
|
||||
unprivileged app, and that app runs
|
||||
|
||||
'qemu-img create -f qcow2 -o backing_file=/etc/shadow /dev/nbd0'
|
||||
|
||||
then the next time the app is started, the qemu-nbd will now
|
||||
detect it as a 'qcow2' file and expose /etc/shadow to the
|
||||
unprivileged app.
|
||||
|
||||
The only way to avoid this is to explicitly tell qemu-nbd what
|
||||
disk format to use on the command line, completely disabling
|
||||
auto-detection. This patch adds a '-f' / '--format' arg for
|
||||
this purpose, mirroring what is already available via qemu-img
|
||||
and qemu commands.
|
||||
|
||||
qemu-nbd --format raw -p 9000 evil.img
|
||||
|
||||
will now always use raw, regardless of what format 'evil.img'
|
||||
looks like it contains
|
||||
|
||||
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
[Use errx, not err. - Paolo]
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
|
||||
|
||||
[ This is a security issue, CVE-2013-1922 / XSA-48. ]
|
||||
|
||||
--- a/tools/qemu-xen-dir-remote/qemu-nbd.c
|
||||
+++ b/tools/qemu-xen-dir-remote/qemu-nbd.c
|
||||
@@ -247,6 +247,7 @@ out:
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
BlockDriverState *bs;
|
||||
+ BlockDriver *drv;
|
||||
off_t dev_offset = 0;
|
||||
off_t offset = 0;
|
||||
uint32_t nbdflags = 0;
|
||||
@@ -256,7 +257,7 @@ int main(int argc, char **argv)
|
||||
struct sockaddr_in addr;
|
||||
socklen_t addr_len = sizeof(addr);
|
||||
off_t fd_size;
|
||||
- const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t";
|
||||
+ const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:t";
|
||||
struct option lopt[] = {
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ "version", 0, NULL, 'V' },
|
||||
@@ -271,6 +272,7 @@ int main(int argc, char **argv)
|
||||
{ "snapshot", 0, NULL, 's' },
|
||||
{ "nocache", 0, NULL, 'n' },
|
||||
{ "shared", 1, NULL, 'e' },
|
||||
+ { "format", 1, NULL, 'f' },
|
||||
{ "persistent", 0, NULL, 't' },
|
||||
{ "verbose", 0, NULL, 'v' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
@@ -292,6 +294,7 @@ int main(int argc, char **argv)
|
||||
int max_fd;
|
||||
int persistent = 0;
|
||||
pthread_t client_thread;
|
||||
+ const char *fmt = NULL;
|
||||
|
||||
/* The client thread uses SIGTERM to interrupt the server. A signal
|
||||
* handler ensures that "qemu-nbd -v -c" exits with a nice status code.
|
||||
@@ -368,6 +371,9 @@ int main(int argc, char **argv)
|
||||
errx(EXIT_FAILURE, "Shared device number must be greater than 0\n");
|
||||
}
|
||||
break;
|
||||
+ case 'f':
|
||||
+ fmt = optarg;
|
||||
+ break;
|
||||
case 't':
|
||||
persistent = 1;
|
||||
break;
|
||||
@@ -478,9 +484,19 @@ int main(int argc, char **argv)
|
||||
bdrv_init();
|
||||
atexit(bdrv_close_all);
|
||||
|
||||
+ if (fmt) {
|
||||
+ drv = bdrv_find_format(fmt);
|
||||
+ if (!drv) {
|
||||
+ errx(EXIT_FAILURE, "Unknown file format '%s'", fmt);
|
||||
+ }
|
||||
+ } else {
|
||||
+ drv = NULL;
|
||||
+ }
|
||||
+
|
||||
bs = bdrv_new("hda");
|
||||
srcpath = argv[optind];
|
||||
- if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) {
|
||||
+ ret = bdrv_open(bs, srcpath, flags, drv);
|
||||
+ if (ret < 0) {
|
||||
errno = -ret;
|
||||
err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]);
|
||||
}
|
||||
--- a/tools/qemu-xen-dir-remote/qemu-nbd.texi
|
||||
+++ b/tools/qemu-xen-dir-remote/qemu-nbd.texi
|
||||
@@ -36,6 +36,8 @@ Export Qemu disk image using NBD protoco
|
||||
disconnect the specified device
|
||||
@item -e, --shared=@var{num}
|
||||
device can be shared by @var{num} clients (default @samp{1})
|
||||
+@item -f, --format=@var{fmt}
|
||||
+ force block driver for format @var{fmt} instead of auto-detecting
|
||||
@item -t, --persistent
|
||||
don't exit on the last connection
|
||||
@item -v, --verbose
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user