Compare commits

..

No commits in common. "devel" and "factory" have entirely different histories.

199 changed files with 13607 additions and 13606 deletions

View File

@ -0,0 +1,185 @@
From 270b8e85b5379fe93192f36966384ff07400fe7b Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:36 +0000
Subject: [PATCH 01/15] libxc: Rework extra module initialisation
This patch use xc_dom_alloc_segment() to allocate the memory space for the
ACPI modules and the SMBIOS modules. This is to replace the arbitrary
placement of 1MB after the hvmloader image.
In later patches, while trying to load a firmware such as OVMF, the later
could easily be loaded past the address 4MB (OVMF is a 2MB binary), but
hvmloader use a range of memory from 4MB to 8MB to perform tests and in the
process, clear the memory, before loading the modules.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/libxc/xc_dom_hvmloader.c | 131 ++++++++++++-----------------------------
1 file changed, 38 insertions(+), 93 deletions(-)
Index: xen-4.7.0-testing/tools/libxc/xc_dom_hvmloader.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxc/xc_dom_hvmloader.c
+++ xen-4.7.0-testing/tools/libxc/xc_dom_hvmloader.c
@@ -129,98 +129,52 @@ static elf_errorstatus xc_dom_parse_hvm_
return rc;
}
-static int modules_init(struct xc_dom_image *dom,
- uint64_t vend, struct elf_binary *elf,
- uint64_t *mstart_out, uint64_t *mend_out)
+static int module_init_one(struct xc_dom_image *dom,
+ struct xc_hvm_firmware_module *module,
+ char *name)
{
-#define MODULE_ALIGN 1UL << 7
-#define MB_ALIGN 1UL << 20
-#define MKALIGN(x, a) (((uint64_t)(x) + (a) - 1) & ~(uint64_t)((a) - 1))
- uint64_t total_len = 0, offset1 = 0;
-
- if ( dom->acpi_module.length == 0 && dom->smbios_module.length == 0 )
- return 0;
-
- /* Find the total length for the firmware modules with a reasonable large
- * alignment size to align each the modules.
- */
- total_len = MKALIGN(dom->acpi_module.length, MODULE_ALIGN);
- offset1 = total_len;
- total_len += MKALIGN(dom->smbios_module.length, MODULE_ALIGN);
-
- /* Want to place the modules 1Mb+change behind the loader image. */
- *mstart_out = MKALIGN(elf->pend, MB_ALIGN) + (MB_ALIGN);
- *mend_out = *mstart_out + total_len;
-
- if ( *mend_out > vend )
- return -1;
-
- if ( dom->acpi_module.length != 0 )
- dom->acpi_module.guest_addr_out = *mstart_out;
- if ( dom->smbios_module.length != 0 )
- dom->smbios_module.guest_addr_out = *mstart_out + offset1;
+ struct xc_dom_seg seg;
+ void *dest;
+
+ if ( module->length )
+ {
+ if ( xc_dom_alloc_segment(dom, &seg, name, 0, module->length) )
+ goto err;
+ dest = xc_dom_seg_to_ptr(dom, &seg);
+ if ( dest == NULL )
+ {
+ DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &seg) => NULL",
+ __FUNCTION__);
+ goto err;
+ }
+ memcpy(dest, module->data, module->length);
+ module->guest_addr_out = seg.vstart;
+ if ( module->guest_addr_out > UINT32_MAX ||
+ module->guest_addr_out + module->length > UINT32_MAX )
+ {
+ DOMPRINTF("%s: Module %s would be loaded abrove 4GB",
+ __FUNCTION__, name);
+ goto err;
+ }
+ }
return 0;
+err:
+ return -1;
}
-static int loadmodules(struct xc_dom_image *dom,
- uint64_t mstart, uint64_t mend,
- uint32_t domid)
+static int modules_init(struct xc_dom_image *dom)
{
- privcmd_mmap_entry_t *entries = NULL;
- unsigned long pfn_start;
- unsigned long pfn_end;
- size_t pages;
- uint32_t i;
- uint8_t *dest;
- int rc = -1;
- xc_interface *xch = dom->xch;
-
- if ( mstart == 0 || mend == 0 )
- return 0;
-
- pfn_start = (unsigned long)(mstart >> PAGE_SHIFT);
- pfn_end = (unsigned long)((mend + PAGE_SIZE - 1) >> PAGE_SHIFT);
- pages = pfn_end - pfn_start;
+ int rc;
- /* Map address space for module list. */
- entries = calloc(pages, sizeof(privcmd_mmap_entry_t));
- if ( entries == NULL )
- goto error_out;
+ rc = module_init_one(dom, &dom->acpi_module, "acpi module");
+ if ( rc ) goto err;
+ rc = module_init_one(dom, &dom->smbios_module, "smbios module");
+ if ( rc ) goto err;
- for ( i = 0; i < pages; i++ )
- entries[i].mfn = (mstart >> PAGE_SHIFT) + i;
-
- dest = xc_map_foreign_ranges(
- xch, domid, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
- entries, pages);
- if ( dest == NULL )
- goto error_out;
-
- /* Zero the range so padding is clear between modules */
- memset(dest, 0, pages << PAGE_SHIFT);
-
- /* Load modules into range */
- if ( dom->acpi_module.length != 0 )
- {
- memcpy(dest,
- dom->acpi_module.data,
- dom->acpi_module.length);
- }
- if ( dom->smbios_module.length != 0 )
- {
- memcpy(dest + (dom->smbios_module.guest_addr_out - mstart),
- dom->smbios_module.data,
- dom->smbios_module.length);
- }
-
- munmap(dest, pages << PAGE_SHIFT);
- rc = 0;
-
- error_out:
- free(entries);
-
- return rc;
+ return 0;
+err:
+ return -1;
}
static elf_errorstatus xc_dom_load_hvm_kernel(struct xc_dom_image *dom)
@@ -229,7 +183,6 @@ static elf_errorstatus xc_dom_load_hvm_k
privcmd_mmap_entry_t *entries = NULL;
size_t pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
elf_errorstatus rc;
- uint64_t m_start = 0, m_end = 0;
int i;
/* Map address space for initial elf image. */
@@ -262,15 +215,7 @@ static elf_errorstatus xc_dom_load_hvm_k
munmap(elf->dest_base, elf->dest_size);
- rc = modules_init(dom, dom->total_pages << PAGE_SHIFT, elf, &m_start,
- &m_end);
- if ( rc != 0 )
- {
- DOMPRINTF("%s: insufficient space to load modules.", __func__);
- goto error;
- }
-
- rc = loadmodules(dom, m_start, m_end, dom->guest_domid);
+ rc = modules_init(dom);
if ( rc != 0 )
{
DOMPRINTF("%s: unable to load modules.", __func__);

View File

@ -0,0 +1,59 @@
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 18 May 2009 12:05:44 +0100
Subject: net: move the tap buffer into TAPState
Patch-mainline: v0.11.0-rc0
Git-commit: 5b01e886d9eb4d5e94384a79634dcb43848e7bbf
References: bnc#840196
KVM uses a 64k buffer for reading from tapfd (for GSO support)
and allocates the buffer with TAPState rather than on the stack.
Not allocating it on the stack probably makes sense for qemu
anyway, so merge it in advance of GSO support.
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/net.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/net.c b/tools/qemu-xen-traditional-dir-remote/net.c
index 0e7c77c..2ca85a3 100644
--- a/tools/qemu-xen-traditional-dir-remote/net.c
+++ b/tools/qemu-xen-traditional-dir-remote/net.c
@@ -700,6 +700,7 @@ typedef struct TAPState {
char down_script[1024];
char down_script_arg[128];
char script_arg[1024];
+ uint8_t buf[4096];
} TAPState;
#ifndef CONFIG_STUBDOM
@@ -735,20 +736,19 @@ static void tap_receive(void *opaque, const uint8_t *buf, int size)
static void tap_send(void *opaque)
{
TAPState *s = opaque;
- uint8_t buf[4096];
int size;
#ifdef __sun__
struct strbuf sbuf;
int f = 0;
- sbuf.maxlen = sizeof(buf);
- sbuf.buf = buf;
+ sbuf.maxlen = sizeof(s->buf);
+ sbuf.buf = s->buf;
size = getmsg(s->fd, NULL, &sbuf, &f) >=0 ? sbuf.len : -1;
#else
- size = read(s->fd, buf, sizeof(buf));
+ size = read(s->fd, s->buf, sizeof(s->buf));
#endif
if (size > 0) {
- qemu_send_packet(s->vc, buf, size);
+ qemu_send_packet(s->vc, s->buf, size);
}
}
--
1.8.1.4

View File

@ -0,0 +1,261 @@
From 34cd9218de8579722240d1acdcaae4e4278f667e Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:37 +0000
Subject: [PATCH 02/15] libxc: Prepare a start info structure for hvmloader
... and load BIOS into guest memory.
This adds a new firmware module, bios_module. It is
loaded in the guest memory and final location is provided to hvmloader
via the hvm_start_info struct.
This patch create the hvm_start_info struct for HVM guest that have a
device model, so this is now common code with HVM guest without device
model.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/libxc/include/xc_dom.h | 3 +
tools/libxc/xc_dom_hvmloader.c | 2 +
tools/libxc/xc_dom_x86.c | 132 ++++++++++++++++++++++++++++-------------
xen/include/public/xen.h | 2 +-
4 files changed, 96 insertions(+), 43 deletions(-)
Index: xen-4.7.0-testing/tools/libxc/include/xc_dom.h
===================================================================
--- xen-4.7.0-testing.orig/tools/libxc/include/xc_dom.h
+++ xen-4.7.0-testing/tools/libxc/include/xc_dom.h
@@ -209,6 +209,9 @@ struct xc_dom_image {
/* If unset disables the setup of the IOREQ pages. */
bool device_model;
+ /* BIOS passed to HVMLOADER */
+ struct xc_hvm_firmware_module bios_module;
+
/* Extra ACPI tables passed to HVMLOADER */
struct xc_hvm_firmware_module acpi_module;
Index: xen-4.7.0-testing/tools/libxc/xc_dom_hvmloader.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxc/xc_dom_hvmloader.c
+++ xen-4.7.0-testing/tools/libxc/xc_dom_hvmloader.c
@@ -167,6 +167,8 @@ static int modules_init(struct xc_dom_im
{
int rc;
+ rc = module_init_one(dom, &dom->bios_module, "bios module");
+ if ( rc ) goto err;
rc = module_init_one(dom, &dom->acpi_module, "acpi module");
if ( rc ) goto err;
rc = module_init_one(dom, &dom->smbios_module, "smbios module");
Index: xen-4.7.0-testing/tools/libxc/xc_dom_x86.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxc/xc_dom_x86.c
+++ xen-4.7.0-testing/tools/libxc/xc_dom_x86.c
@@ -69,6 +69,9 @@
#define round_up(addr, mask) ((addr) | (mask))
#define round_pg_up(addr) (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
+#define HVMLOADER_MODULE_MAX_COUNT 1
+#define HVMLOADER_MODULE_NAME_SIZE 10
+
struct xc_dom_params {
unsigned levels;
xen_vaddr_t vaddr_mask;
@@ -590,6 +593,7 @@ static int alloc_magic_pages_hvm(struct
xen_pfn_t special_array[X86_HVM_NR_SPECIAL_PAGES];
xen_pfn_t ioreq_server_array[NR_IOREQ_SERVER_PAGES];
xc_interface *xch = dom->xch;
+ size_t start_info_size = sizeof(struct hvm_start_info);
/* Allocate and clear special pages. */
for ( i = 0; i < X86_HVM_NR_SPECIAL_PAGES; i++ )
@@ -624,8 +628,6 @@ static int alloc_magic_pages_hvm(struct
if ( !dom->device_model )
{
- size_t start_info_size = sizeof(struct hvm_start_info);
-
if ( dom->cmdline )
{
dom->cmdline_size = ROUNDUP(strlen(dom->cmdline) + 1, 8);
@@ -635,17 +637,26 @@ static int alloc_magic_pages_hvm(struct
/* Limited to one module. */
if ( dom->ramdisk_blob )
start_info_size += sizeof(struct hvm_modlist_entry);
-
- rc = xc_dom_alloc_segment(dom, &dom->start_info_seg,
- "HVMlite start info", 0, start_info_size);
- if ( rc != 0 )
- {
- DOMPRINTF("Unable to reserve memory for the start info");
- goto out;
- }
}
else
{
+ start_info_size +=
+ sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
+ /* Add extra space to write modules name */
+ start_info_size +=
+ HVMLOADER_MODULE_NAME_SIZE * HVMLOADER_MODULE_MAX_COUNT;
+ }
+
+ rc = xc_dom_alloc_segment(dom, &dom->start_info_seg,
+ "HVMlite start info", 0, start_info_size);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Unable to reserve memory for the start info");
+ goto out;
+ }
+
+ if ( dom->device_model )
+ {
/*
* Allocate and clear additional ioreq server pages. The default
* server will use the IOREQ and BUFIOREQ special pages above.
@@ -1689,39 +1700,68 @@ static int alloc_pgtables_hvm(struct xc_
return 0;
}
+static void add_module_to_list(struct xc_dom_image *dom,
+ struct xc_hvm_firmware_module *module,
+ const char *name,
+ struct hvm_modlist_entry *modlist,
+ struct hvm_start_info *start_info)
+{
+ uint32_t index = start_info->nr_modules;
+ if ( module->length == 0 )
+ return;
+
+ assert(start_info->nr_modules < HVMLOADER_MODULE_MAX_COUNT);
+ assert(strnlen(name, HVMLOADER_MODULE_NAME_SIZE)
+ < HVMLOADER_MODULE_NAME_SIZE);
+
+ modlist[index].paddr = module->guest_addr_out;
+ modlist[index].size = module->length;
+ strncpy((char*)(modlist + HVMLOADER_MODULE_MAX_COUNT)
+ + HVMLOADER_MODULE_NAME_SIZE * index,
+ name, HVMLOADER_MODULE_NAME_SIZE);
+ modlist[index].cmdline_paddr =
+ (dom->start_info_seg.pfn << PAGE_SHIFT) +
+ ((uintptr_t)modlist - (uintptr_t)start_info) +
+ sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT +
+ HVMLOADER_MODULE_NAME_SIZE * index;
+
+ start_info->nr_modules++;
+}
+
static int bootlate_hvm(struct xc_dom_image *dom)
{
uint32_t domid = dom->guest_domid;
xc_interface *xch = dom->xch;
+ struct hvm_start_info *start_info;
+ size_t start_info_size;
+ void *start_page;
+ struct hvm_modlist_entry *modlist;
- if ( !dom->device_model )
- {
- struct hvm_start_info *start_info;
- size_t start_info_size;
- void *start_page;
-
- start_info_size = sizeof(*start_info) + dom->cmdline_size;
- if ( dom->ramdisk_blob )
- start_info_size += sizeof(struct hvm_modlist_entry);
+ start_info_size = sizeof(*start_info) + dom->cmdline_size;
+ if ( dom->ramdisk_blob )
+ start_info_size += sizeof(struct hvm_modlist_entry);
- if ( start_info_size >
- dom->start_info_seg.pages << XC_DOM_PAGE_SHIFT(dom) )
- {
- DOMPRINTF("Trying to map beyond start_info_seg");
- return -1;
- }
+ if ( start_info_size >
+ dom->start_info_seg.pages << XC_DOM_PAGE_SHIFT(dom) )
+ {
+ DOMPRINTF("Trying to map beyond start_info_seg");
+ return -1;
+ }
- start_page = xc_map_foreign_range(xch, domid, start_info_size,
- PROT_READ | PROT_WRITE,
- dom->start_info_seg.pfn);
- if ( start_page == NULL )
- {
- DOMPRINTF("Unable to map HVM start info page");
- return -1;
- }
+ start_page = xc_map_foreign_range(xch, domid, start_info_size,
+ PROT_READ | PROT_WRITE,
+ dom->start_info_seg.pfn);
+ if ( start_page == NULL )
+ {
+ DOMPRINTF("Unable to map HVM start info page");
+ return -1;
+ }
- start_info = start_page;
+ start_info = start_page;
+ modlist = start_page + sizeof(*start_info) + dom->cmdline_size;
+ if ( !dom->device_model )
+ {
if ( dom->cmdline )
{
char *cmdline = start_page + sizeof(*start_info);
@@ -1733,22 +1773,30 @@ static int bootlate_hvm(struct xc_dom_im
if ( dom->ramdisk_blob )
{
- struct hvm_modlist_entry *modlist =
- start_page + sizeof(*start_info) + dom->cmdline_size;
modlist[0].paddr = dom->ramdisk_seg.vstart - dom->parms.virt_base;
modlist[0].size = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
- start_info->modlist_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
- ((uintptr_t)modlist - (uintptr_t)start_info);
start_info->nr_modules = 1;
}
-
- start_info->magic = XEN_HVM_START_MAGIC_VALUE;
-
- munmap(start_page, start_info_size);
}
else
{
+ add_module_to_list(dom, &dom->bios_module, "bios",
+ modlist, start_info);
+ }
+
+ if ( start_info->nr_modules )
+ {
+ start_info->modlist_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
+ ((uintptr_t)modlist - (uintptr_t)start_info);
+ }
+
+ start_info->magic = XEN_HVM_START_MAGIC_VALUE;
+
+ munmap(start_page, start_info_size);
+
+ if ( dom->device_model )
+ {
void *hvm_info_page;
if ( (hvm_info_page = xc_map_foreign_range(
Index: xen-4.7.0-testing/xen/include/public/xen.h
===================================================================
--- xen-4.7.0-testing.orig/xen/include/public/xen.h
+++ xen-4.7.0-testing/xen/include/public/xen.h
@@ -814,7 +814,7 @@ struct start_info {
typedef struct start_info start_info_t;
/*
- * Start of day structure passed to PVH guests in %ebx.
+ * Start of day structure passed to PVH guests and to HVM guests in %ebx.
*
* NOTE: nothing will be loaded at physical address 0, so a 0 value in any
* of the address fields should be treated as not present.

View File

@ -0,0 +1,47 @@
From: Michal Kubecek <mkubecek@suse.cz>
Date: Fri, 27 Sep 2013 19:05:45 +0200
Subject: net: increase tap buffer size
Patch-mainline: v0.12.0-rc0
Git-commit: 8e0f8e5bf8fd483dd28329055336cf895b74c89f (partial)
References: bnc#840196
Increase size of buffere embedded in struct TAPState to allow
jumbo frames longer then 4096 bytes.
Part of upstream qemu commit
8e0f8e5b net: enable IFF_VNET_HDR on tap fds if available
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/net.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/net.c b/tools/qemu-xen-traditional-dir-remote/net.c
index 2ca85a3..502a691 100644
--- a/tools/qemu-xen-traditional-dir-remote/net.c
+++ b/tools/qemu-xen-traditional-dir-remote/net.c
@@ -693,6 +693,11 @@ static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
#if !defined(_WIN32)
+/* Maximum GSO packet size (64k) plus plenty of room for
+ * the ethernet and virtio_net headers
+ */
+#define TAP_BUFSIZE (4096 + 65536)
+
typedef struct TAPState {
VLANClientState *vc;
int fd;
@@ -700,7 +705,7 @@ typedef struct TAPState {
char down_script[1024];
char down_script_arg[128];
char script_arg[1024];
- uint8_t buf[4096];
+ uint8_t buf[TAP_BUFSIZE];
} TAPState;
#ifndef CONFIG_STUBDOM
--
1.8.1.4

View File

@ -0,0 +1,38 @@
From d12d422d347ca3a8fd8181b78ee2736561cd0e57 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:38 +0000
Subject: [PATCH 03/15] configure: #define SEABIOS_PATH and OVMF_PATH
Those paths are to be used by libxl, in order to load the firmware in
memory. If a system path is not define via --with-system-seabios or
--with-system-ovmf, then this default to the Xen firmware directory.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/configure.ac | 6 ++++++
1 file changed, 6 insertions(+)
Index: xen-4.7.0-testing/tools/configure.ac
===================================================================
--- xen-4.7.0-testing.orig/tools/configure.ac
+++ xen-4.7.0-testing/tools/configure.ac
@@ -218,6 +218,9 @@ AC_ARG_WITH([system-seabios],
esac
],[])
AC_SUBST(seabios_path)
+AC_DEFINE_UNQUOTED([SEABIOS_PATH],
+ ["${seabios_path:-$XENFIRMWAREDIR/seabios.bin}"],
+ [SeaBIOS path])
AC_ARG_WITH([system-ovmf],
AS_HELP_STRING([--with-system-ovmf@<:@=PATH@:>@],
@@ -229,6 +232,9 @@ AC_ARG_WITH([system-ovmf],
esac
],[])
AC_SUBST(ovmf_path)
+AC_DEFINE_UNQUOTED([OVMF_PATH],
+ ["${ovmf_path:-$XENFIRMWAREDIR/ovmf.bin}"],
+ [OVMF path])
AC_ARG_WITH([extra-qemuu-configure-args],
AS_HELP_STRING([--with-extra-qemuu-configure-args@<:@="--ARG1 ..."@:>@],

View File

@ -0,0 +1,41 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 12 Jul 2010 20:24:59 +0300
Subject: e1000: fix access 4 bytes beyond buffer end
Patch-mainline: v0.13.0-rc0
Git-commit: b0b900070c7cb29bbefb732ec00397abe5de6d73
References: bnc#840196
We do range check for size, and get size as buffer,
but copy size + 4 bytes (4 is for FCS).
Let's copy size bytes but put size + 4 in length.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index c75bc5e..9b062db 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -659,7 +659,6 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
}
rdh_start = s->mac_reg[RDH];
- size += 4; // for the header
do {
if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
set_ics(s, 0, E1000_ICS_RXO);
@@ -673,7 +672,7 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
if (desc.buffer_addr) {
cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
(void *)(buf + vlan_offset), size);
- desc.length = cpu_to_le16(size);
+ desc.length = cpu_to_le16(size + 4 /* for FCS */);
desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
--
1.8.1.4

View File

@ -0,0 +1,47 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 12 Jul 2010 20:41:02 +0300
Subject: e1000: secrc support
Patch-mainline: v0.13.0-rc0
Git-commit: 55e8d1ce6b09300cc5f3adcd9a705156d168381d
References: bnc#840196
Add support for secrc field. Reportedly needed by old RHEL guests.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 9b062db..07e681d 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -338,6 +338,15 @@ is_vlan_txd(uint32_t txd_lower)
return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
}
+/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
+ * fill it in, just pad descriptor length by 4 bytes unless guest
+ * told us to trip it off the packet. */
+static inline int
+fcs_len(E1000State *s)
+{
+ return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
+}
+
static void
xmit_seg(E1000State *s)
{
@@ -672,7 +681,7 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
if (desc.buffer_addr) {
cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
(void *)(buf + vlan_offset), size);
- desc.length = cpu_to_le16(size + 4 /* for FCS */);
+ desc.length = cpu_to_le16(size + fcs_len(s));
desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
--
1.8.1.4

View File

@ -0,0 +1,43 @@
From b44077cb7b2844d083ddae0d2174d4ae8a5101b6 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:39 +0000
Subject: [PATCH 04/15] firmware/makefile: install BIOS blob ...
... into the firmware directory, along with hvmloader.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/firmware/Makefile | 13 +++++++++++++
1 file changed, 13 insertions(+)
Index: xen-4.7.0-testing/tools/firmware/Makefile
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/Makefile
+++ xen-4.7.0-testing/tools/firmware/Makefile
@@ -19,6 +19,9 @@ SUBDIRS-y += hvmloader
LD32BIT-$(CONFIG_FreeBSD) := LD32BIT_FLAG=-melf_i386_fbsd
+SEABIOS_ROM := seabios-dir/out/bios.bin
+OVMF_ROM := ovmf-dir/ovmf.bin
+
ovmf-dir:
GIT=$(GIT) $(XEN_ROOT)/scripts/git-checkout.sh $(OVMF_UPSTREAM_URL) $(OVMF_UPSTREAM_REVISION) ovmf-dir
cp ovmf-makefile ovmf-dir/Makefile;
@@ -45,6 +48,16 @@ endif
install: all
[ -d $(INST_DIR) ] || $(INSTALL_DIR) $(INST_DIR)
[ ! -e $(TARGET) ] || $(INSTALL_DATA) $(TARGET) $(INST_DIR)
+ifeq ($(CONFIG_SEABIOS),y)
+ifeq ($(SEABIOS_PATH),)
+ $(INSTALL_DATA) $(SEABIOS_ROM) $(INST_DIR)/seabios.bin
+endif
+endif
+ifeq ($(CONFIG_OVMF),y)
+ifeq ($(OVMF_PATH),)
+ $(INSTALL_DATA) $(OVMF_ROM) $(INST_DIR)/ovmf.bin
+endif
+endif
.PHONY: clean
clean: subdirs-clean

View File

@ -0,0 +1,104 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Feb 2011 18:27:48 +0200
Subject: e1000: multi-buffer packet support
Patch-mainline: v0.15.0-rc0
Git-commit: b19487e27ed3009df7f555998a454ba19aefd4b8
References: bnc#840196
e1000 supports multi-buffer packets larger than rxbuf_size.
This fixes the following (on linux):
- in guest: ifconfig eth1 mtu 16110
- in host: ifconfig tap0 mtu 16110
ping -s 16082 <guest-ip>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 39 +++++++++++++++++-------
1 file changed, 28 insertions(+), 11 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 07e681d..34818e0 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -632,16 +632,13 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
uint32_t rdh_start;
uint16_t vlan_special = 0;
uint8_t vlan_status = 0, vlan_offset = 0;
+ size_t desc_offset;
+ size_t desc_size;
+ size_t total_size;
if (!(s->mac_reg[RCTL] & E1000_RCTL_EN))
return;
- if (size > s->rxbuf_size) {
- DBGOUT(RX, "packet too large for buffers (%d > %d)\n", size,
- s->rxbuf_size);
- return;
- }
-
/* Discard oversized packets if !LPE and !SBP. */
if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
(size > MAXIMUM_ETHERNET_VLAN_SIZE
@@ -668,8 +665,16 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
}
rdh_start = s->mac_reg[RDH];
+ desc_offset = 0;
+ total_size = size + fcs_len(s);
do {
+ desc_size = total_size - desc_offset;
+ if (desc_size > s->rxbuf_size) {
+ desc_size = s->rxbuf_size;
+ }
if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
+ /* Discard all data written so far */
+ s->mac_reg[RDH] = rdh_start;
set_ics(s, 0, E1000_ICS_RXO);
return;
}
@@ -679,10 +684,22 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
desc.special = vlan_special;
desc.status |= (vlan_status | E1000_RXD_STAT_DD);
if (desc.buffer_addr) {
- cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
- (void *)(buf + vlan_offset), size);
- desc.length = cpu_to_le16(size + fcs_len(s));
- desc.status |= E1000_RXD_STAT_EOP|E1000_RXD_STAT_IXSM;
+ if (desc_offset < size) {
+ size_t copy_size = size - desc_offset;
+ if (copy_size > s->rxbuf_size) {
+ copy_size = s->rxbuf_size;
+ }
+ cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
+ (void *)(buf + desc_offset + vlan_offset),
+ copy_size);
+ }
+ desc_offset += desc_size;
+ if (desc_offset >= total_size) {
+ desc.length = cpu_to_le16(desc_size);
+ desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
+ } else {
+ desc.length = cpu_to_le16(desc_size);
+ }
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
cpu_physical_memory_write(base, (void *)&desc, sizeof(desc));
@@ -697,7 +714,7 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
set_ics(s, 0, E1000_ICS_RXO);
return;
}
- } while (desc.buffer_addr == 0);
+ } while (desc_offset < total_size);
s->mac_reg[GPRC]++;
s->mac_reg[TPR]++;
--
1.8.1.4

View File

@ -0,0 +1,212 @@
From a8eef037b010662e73428907af761b6d2aef4eae Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:40 +0000
Subject: [PATCH 05/15] libxl: Load guest BIOS from file
The path to the BIOS blob can be override by the xl's bios_override option,
or provided by u.hvm.bios_firmware in the domain_build_info struct by other
libxl user.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
docs/man/xl.cfg.pod.5 | 9 +++++++
tools/libxl/libxl.h | 8 +++++++
tools/libxl/libxl_dom.c | 57 ++++++++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_internal.h | 2 ++
tools/libxl/libxl_paths.c | 10 ++++++++
tools/libxl/libxl_types.idl | 1 +
tools/libxl/xl_cmdimpl.c | 11 ++++++---
7 files changed, 95 insertions(+), 3 deletions(-)
Index: xen-4.7.0-testing/docs/man/xl.cfg.pod.5
===================================================================
--- xen-4.7.0-testing.orig/docs/man/xl.cfg.pod.5
+++ xen-4.7.0-testing/docs/man/xl.cfg.pod.5
@@ -1268,6 +1268,15 @@ Requires device_model_version=qemu-xen.
=back
+=item B<bios_override="PATH">
+
+Override the path to the blob to be used as BIOS. The blob provided here MUST
+be consistent with the `bios` which you have specified. You should not normally
+need to specify this option.
+
+This options does not have any effect if using bios="rombios" or
+device_model_version="qemu-xen-traditional".
+
=item B<pae=BOOLEAN>
Hide or expose the IA32 Physical Address Extensions. These extensions
Index: xen-4.7.0-testing/tools/libxl/libxl.h
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl.h
+++ xen-4.7.0-testing/tools/libxl/libxl.h
@@ -947,6 +947,14 @@ void libxl_mac_copy(libxl_ctx *ctx, libx
#define LIBXL_HAVE_CHECKPOINTED_STREAM 1
/*
+ * LIBXL_HAVE_BUILDINFO_HVM_BIOS_FIRMWARE
+ *
+ * libxl_domain_build_info has u.hvm.bios_firmware field which can be use
+ * to provide a different bios blob (like SeaBIOS or OVMF).
+ */
+#define LIBXL_HAVE_BUILDINFO_HVM_BIOS_FIRMWARE
+
+/*
* ERROR_REMUS_XXX error code only exists from Xen 4.5, Xen 4.6 and it
* is changed to ERROR_CHECKPOINT_XXX in Xen 4.7
*/
Index: xen-4.7.0-testing/tools/libxl/libxl_dom.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl_dom.c
+++ xen-4.7.0-testing/tools/libxl/libxl_dom.c
@@ -860,6 +860,38 @@ err:
return ret;
}
+static int libxl__load_hvm_firmware_module(libxl__gc *gc,
+ const char *filename,
+ const char *what,
+ struct xc_hvm_firmware_module *m)
+{
+ int datalen = 0;
+ void *data = NULL;
+ int e;
+
+ LOG(DEBUG, "Loading %s: %s", what, filename);
+ e = libxl_read_file_contents(CTX, filename, &data, &datalen);
+ if (e) {
+ /*
+ * Print a message only on ENOENT, other error are logged by the
+ * function libxl_read_file_contents().
+ */
+ if (e == ENOENT)
+ LOGEV(ERROR, e, "failed to read %s file", what);
+ return ERROR_FAIL;
+ }
+ libxl__ptr_add(gc, data);
+ if (datalen) {
+ /* Only accept non-empty files */
+ m->data = data;
+ m->length = datalen;
+ } else {
+ LOG(ERROR, "file %s for %s is empty", filename, what);
+ return ERROR_INVAL;
+ }
+ return 0;
+}
+
static int libxl__domain_firmware(libxl__gc *gc,
libxl_domain_build_info *info,
struct xc_dom_image *dom)
@@ -869,6 +901,7 @@ static int libxl__domain_firmware(libxl_
int e, rc;
int datalen = 0;
void *data;
+ const char *bios_filename = NULL;
if (info->u.hvm.firmware)
firmware = info->u.hvm.firmware;
@@ -912,6 +945,30 @@ static int libxl__domain_firmware(libxl_
goto out;
}
+ if (info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
+ if (info->u.hvm.bios_firmware) {
+ bios_filename = info->u.hvm.bios_firmware;
+ } else {
+ switch (info->u.hvm.bios) {
+ case LIBXL_BIOS_TYPE_SEABIOS:
+ bios_filename = libxl__seabios_path();
+ break;
+ case LIBXL_BIOS_TYPE_OVMF:
+ bios_filename = libxl__ovmf_path();
+ break;
+ case LIBXL_BIOS_TYPE_ROMBIOS:
+ default:
+ abort();
+ }
+ }
+ }
+
+ if (bios_filename) {
+ rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
+ &dom->bios_module);
+ if (rc) goto out;
+ }
+
if (info->u.hvm.smbios_firmware) {
data = NULL;
e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
Index: xen-4.7.0-testing/tools/libxl/libxl_internal.h
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl_internal.h
+++ xen-4.7.0-testing/tools/libxl/libxl_internal.h
@@ -2317,6 +2317,8 @@ _hidden const char *libxl__xen_config_di
_hidden const char *libxl__xen_script_dir_path(void);
_hidden const char *libxl__lock_dir_path(void);
_hidden const char *libxl__run_dir_path(void);
+_hidden const char *libxl__seabios_path(void);
+_hidden const char *libxl__ovmf_path(void);
/*----- subprocess execution with timeout -----*/
Index: xen-4.7.0-testing/tools/libxl/libxl_paths.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl_paths.c
+++ xen-4.7.0-testing/tools/libxl/libxl_paths.c
@@ -35,6 +35,16 @@ const char *libxl__run_dir_path(void)
return XEN_RUN_DIR;
}
+const char *libxl__seabios_path(void)
+{
+ return SEABIOS_PATH;
+}
+
+const char *libxl__ovmf_path(void)
+{
+ return OVMF_PATH;
+}
+
/*
* Local variables:
* mode: C
Index: xen-4.7.0-testing/tools/libxl/libxl_types.idl
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl_types.idl
+++ xen-4.7.0-testing/tools/libxl/libxl_types.idl
@@ -513,6 +513,7 @@ libxl_domain_build_info = Struct("domain
("timer_mode", libxl_timer_mode),
("nested_hvm", libxl_defbool),
("altp2m", libxl_defbool),
+ ("bios_firmware", string),
("smbios_firmware", string),
("acpi_firmware", string),
("hdtype", libxl_hdtype),
Index: xen-4.7.0-testing/tools/libxl/xl_cmdimpl.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/xl_cmdimpl.c
+++ xen-4.7.0-testing/tools/libxl/xl_cmdimpl.c
@@ -1562,12 +1562,17 @@ static void parse_config_data(const char
xlu_cfg_replace_string (config, "firmware_override",
&b_info->u.hvm.firmware, 0);
- if (!xlu_cfg_get_string(config, "bios", &buf, 0) &&
- libxl_bios_type_from_string(buf, &b_info->u.hvm.bios)) {
+ xlu_cfg_replace_string (config, "bios_override",
+ &b_info->u.hvm.bios_firmware, 0);
+ if (!xlu_cfg_get_string(config, "bios", &buf, 0)) {
+ if (libxl_bios_type_from_string(buf, &b_info->u.hvm.bios)) {
fprintf(stderr, "ERROR: invalid value \"%s\" for \"bios\"\n",
buf);
exit (1);
- }
+ }
+ } else if (b_info->u.hvm.bios_firmware)
+ fprintf(stderr, "WARNING: "
+ "bios_override given without specific bios name\n");
xlu_cfg_get_defbool(config, "pae", &b_info->u.hvm.pae, 0);
xlu_cfg_get_defbool(config, "apic", &b_info->u.hvm.apic, 0);

View File

@ -0,0 +1,55 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Feb 2011 18:27:52 +0200
Subject: e1000: clear EOP for multi-buffer descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Patch-mainline: v0.15.0-rc0
Git-commit: ee912ccfa007351a62ba42bd60499769f6c02c1e
References: bnc#840196
The e1000 spec says: if software statically allocates
buffers, and uses memory read to check for completed descriptors, it
simply has to zero the status byte in the descriptor to make it ready
for reuse by hardware. This is not a hardware requirement (moving the
hardware tail pointer is), but is necessary for performing an inmemory
scan.
Thus the guest does not have to clear the status byte. In case it
doesn't we need to clear EOP for all descriptors
except the last. While I don't know of any such guests,
it's probably a good idea to stick to the spec.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reported-by: Juan Quintela <quintela@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 34818e0..7e791dc 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -694,11 +694,13 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
copy_size);
}
desc_offset += desc_size;
+ desc.length = cpu_to_le16(desc_size);
if (desc_offset >= total_size) {
- desc.length = cpu_to_le16(desc_size);
desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
} else {
- desc.length = cpu_to_le16(desc_size);
+ /* Guest zeroing out status is not a hardware requirement.
+ Clear EOP in case guest didn't do it. */
+ desc.status &= ~E1000_RXD_STAT_EOP;
}
} else // as per intel docs; skip descriptors with null buf addr
DBGOUT(RX, "Null RX descriptor!!\n");
--
1.8.1.4

View File

@ -0,0 +1,99 @@
From b920bea09b69c1cdd5bb4c5964ce20d0bf7ced8b Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:41 +0000
Subject: [PATCH 06/15] xen: Move the hvm_start_info C representation from
libxc to public/xen.h
Instead of having several representation of hvm_start_info in C, define
it in public/xen.h so both libxc and hvmloader can use it.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/libxc/include/xc_dom.h | 31 -------------------------------
xen/include/public/xen.h | 31 +++++++++++++++++++++++++++++++
2 files changed, 31 insertions(+), 31 deletions(-)
Index: xen-4.7.0-testing/tools/libxc/include/xc_dom.h
===================================================================
--- xen-4.7.0-testing.orig/tools/libxc/include/xc_dom.h
+++ xen-4.7.0-testing/tools/libxc/include/xc_dom.h
@@ -219,37 +219,6 @@ struct xc_dom_image {
struct xc_hvm_firmware_module smbios_module;
};
-#if defined(__i386__) || defined(__x86_64__)
-/* C representation of the x86/HVM start info layout.
- *
- * The canonical definition of this layout resides in public/xen.h, this
- * is just a way to represent the layout described there using C types.
- *
- * NB: the packed attribute is not really needed, but it helps us enforce
- * the fact this this is just a representation, and it might indeed
- * be required in the future if there are alignment changes.
- */
-struct hvm_start_info {
- uint32_t magic; /* Contains the magic value 0x336ec578 */
- /* ("xEn3" with the 0x80 bit of the "E" set).*/
- uint32_t version; /* Version of this structure. */
- uint32_t flags; /* SIF_xxx flags. */
- uint32_t nr_modules; /* Number of modules passed to the kernel. */
- uint64_t modlist_paddr; /* Physical address of an array of */
- /* hvm_modlist_entry. */
- uint64_t cmdline_paddr; /* Physical address of the command line. */
- uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */
- /* structure. */
-} __attribute__((packed));
-
-struct hvm_modlist_entry {
- uint64_t paddr; /* Physical address of the module. */
- uint64_t size; /* Size of the module in bytes. */
- uint64_t cmdline_paddr; /* Physical address of the command line. */
- uint64_t reserved;
-} __attribute__((packed));
-#endif /* x86 */
-
/* --- pluggable kernel loader ------------------------------------- */
struct xc_dom_loader {
Index: xen-4.7.0-testing/xen/include/public/xen.h
===================================================================
--- xen-4.7.0-testing.orig/xen/include/public/xen.h
+++ xen-4.7.0-testing/xen/include/public/xen.h
@@ -859,6 +859,37 @@ typedef struct start_info start_info_t;
*/
#define XEN_HVM_START_MAGIC_VALUE 0x336ec578
+#if defined(__i386__) || defined(__x86_64__)
+/* C representation of the x86/HVM start info layout.
+ *
+ * The canonical definition of this layout resides in public/xen.h, this
+ * is just a way to represent the layout described there using C types.
+ *
+ * NB: the packed attribute is not really needed, but it helps us enforce
+ * the fact this this is just a representation, and it might indeed
+ * be required in the future if there are alignment changes.
+ */
+struct hvm_start_info {
+ uint32_t magic; /* Contains the magic value 0x336ec578 */
+ /* ("xEn3" with the 0x80 bit of the "E" set).*/
+ uint32_t version; /* Version of this structure. */
+ uint32_t flags; /* SIF_xxx flags. */
+ uint32_t nr_modules; /* Number of modules passed to the kernel. */
+ uint64_t modlist_paddr; /* Physical address of an array of */
+ /* hvm_modlist_entry. */
+ uint64_t cmdline_paddr; /* Physical address of the command line. */
+ uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */
+ /* structure. */
+} __attribute__((packed));
+
+struct hvm_modlist_entry {
+ uint64_t paddr; /* Physical address of the module. */
+ uint64_t size; /* Size of the module in bytes. */
+ uint64_t cmdline_paddr; /* Physical address of the command line. */
+ uint64_t reserved;
+} __attribute__((packed));
+#endif /* x86 */
+
/* New console union for dom0 introduced in 0x00030203. */
#if __XEN_INTERFACE_VERSION__ < 0x00030203
#define console_mfn console.domU.mfn

View File

@ -0,0 +1,83 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Feb 2011 18:27:55 +0200
Subject: e1000: verify we have buffers, upfront
Patch-mainline: v0.15.0-rc0
Git-commit: 322fd48afbed1ef7b834ac343a0c8687bcb33695
References: bnc#840196
The spec says: Any descriptor with a non-zero status byte has been
processed by the hardware, and is ready to be handled by the software.
Thus, once we change a descriptor status to non-zero we should
never move the head backwards and try to reuse this
descriptor from hardware.
This actually happened with a multibuffer packet
that arrives when we don't have enough buffers.
Fix by checking that we have enough buffers upfront
so we never need to discard the packet midway through.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 28 +++++++++++++++++++-----
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 7e791dc..18d7597 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -622,6 +622,24 @@ e1000_can_receive(void *opaque)
return (s->mac_reg[RCTL] & E1000_RCTL_EN && s->mac_reg[RDLEN] != 0);
}
+static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
+{
+ int bufs;
+ /* Fast-path short packets */
+ if (total_size <= s->rxbuf_size) {
+ return s->mac_reg[RDH] != s->mac_reg[RDT] || !s->check_rxov;
+ }
+ if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
+ bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
+ } else if (s->mac_reg[RDH] > s->mac_reg[RDT] || !s->check_rxov) {
+ bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
+ s->mac_reg[RDT] - s->mac_reg[RDH];
+ } else {
+ return false;
+ }
+ return total_size <= bufs * s->rxbuf_size;
+}
+
static void
e1000_receive(void *opaque, const uint8_t *buf, int size)
{
@@ -667,17 +685,15 @@ e1000_receive(void *opaque, const uint8_t *buf, int size)
rdh_start = s->mac_reg[RDH];
desc_offset = 0;
total_size = size + fcs_len(s);
+ if (!e1000_has_rxbufs(s, total_size)) {
+ set_ics(s, 0, E1000_ICS_RXO);
+ return;
+ }
do {
desc_size = total_size - desc_offset;
if (desc_size > s->rxbuf_size) {
desc_size = s->rxbuf_size;
}
- if (s->mac_reg[RDH] == s->mac_reg[RDT] && s->check_rxov) {
- /* Discard all data written so far */
- s->mac_reg[RDH] = rdh_start;
- set_ics(s, 0, E1000_ICS_RXO);
- return;
- }
base = ((uint64_t)s->mac_reg[RDBAH] << 32) + s->mac_reg[RDBAL] +
sizeof(desc) * s->mac_reg[RDH];
cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
--
1.8.1.4

View File

@ -0,0 +1,55 @@
From e3d13cec19a919b06dea49edd64a50c68e1094a7 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:42 +0000
Subject: [PATCH 07/15] hvmloader: Grab the hvm_start_info pointer
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/firmware/hvmloader/hvmloader.c | 5 +++++
tools/firmware/hvmloader/util.h | 3 +++
2 files changed, 8 insertions(+)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/hvmloader.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
@@ -29,6 +29,8 @@
#include <xen/version.h>
#include <xen/hvm/params.h>
+const struct hvm_start_info *hvm_start_info;
+
asm (
" .text \n"
" .globl _start \n"
@@ -46,6 +48,8 @@ asm (
" ljmp $"STR(SEL_CODE32)",$1f \n"
"1: movl $stack_top,%esp \n"
" movl %esp,%ebp \n"
+ /* store HVM start info ptr */
+ " mov %ebx, hvm_start_info \n"
" call main \n"
/* Relocate real-mode trampoline to 0x0. */
" mov $trampoline_start,%esi \n"
@@ -258,6 +262,7 @@ int main(void)
memset((void *)HYPERCALL_PHYSICAL_ADDRESS, 0xc3 /* RET */, PAGE_SIZE);
printf("HVM Loader\n");
+ BUG_ON(hvm_start_info->magic != XEN_HVM_START_MAGIC_VALUE);
init_hypercalls();
Index: xen-4.7.0-testing/tools/firmware/hvmloader/util.h
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/util.h
+++ xen-4.7.0-testing/tools/firmware/hvmloader/util.h
@@ -158,6 +158,9 @@ static inline void cpu_relax(void)
struct hvm_info_table *get_hvm_info_table(void) __attribute__ ((const));
#define hvm_info (get_hvm_info_table())
+/* HVM start info */
+extern const struct hvm_start_info *hvm_start_info;
+
/* String and memory functions */
int strcmp(const char *cs, const char *ct);
int strncmp(const char *s1, const char *s2, uint32_t n);

View File

@ -0,0 +1,55 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 27 Mar 2011 13:37:35 +0200
Subject: e1000: check buffer availability
Patch-mainline: v0.15.0-rc0
Git-commit: 6cdfab2868dd593902e2b7db3ba9f49f2cc03e3f
References: bnc#840196
Reduce spurious packet drops on RX ring empty
by verifying that we have at least 1 buffer
ahead of the time.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
---
tools/qemu-xen-traditional-dir-remote/hw/e1000.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
index 18d7597..b07c6cb 100644
--- a/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ b/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -614,14 +614,6 @@ e1000_set_link_status(VLANClientState *vc)
set_ics(s, 0, E1000_ICR_LSC);
}
-static int
-e1000_can_receive(void *opaque)
-{
- E1000State *s = opaque;
-
- return (s->mac_reg[RCTL] & E1000_RCTL_EN && s->mac_reg[RDLEN] != 0);
-}
-
static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
{
int bufs;
@@ -640,6 +632,15 @@ static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
return total_size <= bufs * s->rxbuf_size;
}
+static int
+e1000_can_receive(void *opaque)
+{
+ E1000State *s = opaque;
+
+ return (s->mac_reg[RCTL] & E1000_RCTL_EN) && (s->mac_reg[RDLEN] != 0) &&
+ e1000_has_rxbufs(s, 1);
+}
+
static void
e1000_receive(void *opaque, const uint8_t *buf, int size)
{
--
1.8.1.4

View File

@ -0,0 +1,139 @@
From 463aedc4fd6e09518b4711e931048bf932b6ee39 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:43 +0000
Subject: [PATCH 08/15] hvmloader: Locate the BIOS blob
The BIOS can be found an entry called "bios" of the modlist of the
hvm_start_info struct.
The found BIOS blob is not loaded by this patch, but only passed as
argument to bios_load() function. It is going to be used by the next few
patches.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/firmware/hvmloader/config.h | 2 +-
tools/firmware/hvmloader/hvmloader.c | 42 ++++++++++++++++++++++++++++++++++--
tools/firmware/hvmloader/ovmf.c | 3 ++-
tools/firmware/hvmloader/rombios.c | 3 ++-
tools/firmware/hvmloader/util.h | 2 ++
5 files changed, 47 insertions(+), 5 deletions(-)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/config.h
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/config.h
+++ xen-4.7.0-testing/tools/firmware/hvmloader/config.h
@@ -22,7 +22,7 @@ struct bios_config {
/* ROMS */
void (*load_roms)(void);
- void (*bios_load)(const struct bios_config *config);
+ void (*bios_load)(const struct bios_config *config, void *addr, uint32_t size);
void (*bios_info_setup)(void);
void (*bios_info_finish)(void);
Index: xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/hvmloader.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
@@ -253,10 +253,40 @@ static void acpi_enable_sci(void)
BUG_ON(!(pm1a_cnt_val & ACPI_PM1C_SCI_EN));
}
+const struct hvm_modlist_entry *get_module_entry(
+ const struct hvm_start_info *info,
+ const char *name)
+{
+ const struct hvm_modlist_entry *modlist =
+ (struct hvm_modlist_entry *)((uintptr_t)info->modlist_paddr);
+ unsigned int i;
+
+ if ( !modlist )
+ return NULL;
+
+ for ( i = 0; i < info->nr_modules; i++ )
+ {
+ uint32_t module_name = modlist[i].cmdline_paddr;
+
+ BUG_ON(!modlist[i].cmdline_paddr ||
+ modlist[i].cmdline_paddr > UINT_MAX);
+
+ if ( !strcmp(name, (char*)module_name) )
+ {
+ BUG_ON(!modlist[i].paddr || modlist[i].paddr > UINT_MAX ||
+ modlist[i].size > UINT_MAX);
+ return &modlist[i];
+ }
+ }
+
+ return NULL;
+}
+
int main(void)
{
const struct bios_config *bios;
int acpi_enabled;
+ const struct hvm_modlist_entry *bios_module;
/* Initialise hypercall stubs with RET, rendering them no-ops. */
memset((void *)HYPERCALL_PHYSICAL_ADDRESS, 0xc3 /* RET */, PAGE_SIZE);
@@ -292,8 +322,16 @@ int main(void)
}
printf("Loading %s ...\n", bios->name);
- if ( bios->bios_load )
- bios->bios_load(bios);
+ bios_module = get_module_entry(hvm_start_info, "bios");
+ if ( bios_module && bios->bios_load )
+ {
+ uint32_t paddr = bios_module->paddr;
+ bios->bios_load(bios, (void*)paddr, bios_module->size);
+ }
+ else if ( bios->bios_load )
+ {
+ bios->bios_load(bios, 0, 0);
+ }
else
{
BUG_ON(bios->bios_address + bios->image_size >
Index: xen-4.7.0-testing/tools/firmware/hvmloader/ovmf.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/ovmf.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/ovmf.c
@@ -93,7 +93,8 @@ static void ovmf_finish_bios_info(void)
info->checksum = -checksum;
}
-static void ovmf_load(const struct bios_config *config)
+static void ovmf_load(const struct bios_config *config,
+ void *bios_addr, uint32_t bios_length)
{
xen_pfn_t mfn;
uint64_t addr = OVMF_BEGIN;
Index: xen-4.7.0-testing/tools/firmware/hvmloader/rombios.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/rombios.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/rombios.c
@@ -121,7 +121,8 @@ static void rombios_load_roms(void)
option_rom_phys_addr + option_rom_sz - 1);
}
-static void rombios_load(const struct bios_config *config)
+static void rombios_load(const struct bios_config *config,
+ void *unused_addr, uint32_t unused_size)
{
uint32_t bioshigh;
struct rombios_info *info;
Index: xen-4.7.0-testing/tools/firmware/hvmloader/util.h
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/util.h
+++ xen-4.7.0-testing/tools/firmware/hvmloader/util.h
@@ -34,6 +34,8 @@ enum {
#undef NULL
#define NULL ((void*)0)
+#define UINT_MAX (~0U)
+
void __assert_failed(char *assertion, char *file, int line)
__attribute__((noreturn));
#define ASSERT(p) \

View File

@ -0,0 +1,44 @@
From c3f4c5bcf0d8d93b5116f3e368c4739abe2dc06d Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:44 +0000
Subject: [PATCH 09/15] hvmloader: Check modules whereabouts in perform_tests
As perform_tests() is going to clear memory past 4MB, we check that the
memory can be use or we skip the tests.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/firmware/hvmloader/tests.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/tests.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/tests.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/tests.c
@@ -210,6 +210,26 @@ void perform_tests(void)
return;
}
+ /* Check that tests does not use memory where modules are stored */
+ if ( ((uint32_t)hvm_start_info + sizeof(struct hvm_start_info)) > 4 << 20
+ && (uint32_t)hvm_start_info < 8 << 20 )
+ {
+ printf("Skipping tests due to memory used by hvm_start_info\n");
+ return;
+ }
+ for ( unsigned i = 0; i < hvm_start_info->nr_modules; i++ )
+ {
+ const struct hvm_modlist_entry *modlist =
+ (struct hvm_modlist_entry *)((uintptr_t)hvm_start_info->modlist_paddr);
+ if ( modlist[i].paddr
+ && modlist[i].paddr + modlist[i].size > 4ul << 20
+ && modlist[i].paddr < 8ul << 20 )
+ {
+ printf("Skipping tests due to memory used by a module\n");
+ return;
+ }
+ }
+
passed = skipped = 0;
for ( i = 0; tests[i].test; i++ )
{

View File

@ -0,0 +1,112 @@
From df9fdafcfc38c931181dae1de3e6a9eee28829d4 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:45 +0000
Subject: [PATCH 10/15] hvmloader: Load SeaBIOS from hvm_start_info modules
... and do not include the SeaBIOS ROM into hvmloader anymore.
This also fix the dependency on roms.inc, hvmloader.o does not include it.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/firmware/hvmloader/Makefile | 15 +--------------
tools/firmware/hvmloader/seabios.c | 24 ++++++++++++++----------
2 files changed, 15 insertions(+), 24 deletions(-)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/Makefile
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/Makefile
+++ xen-4.7.0-testing/tools/firmware/hvmloader/Makefile
@@ -45,7 +45,6 @@ CIRRUSVGA_DEBUG ?= n
OVMF_DIR := ../ovmf-dir
ROMBIOS_DIR := ../rombios
-SEABIOS_DIR := ../seabios-dir
ifeq ($(CONFIG_ROMBIOS),y)
STDVGA_ROM := ../vgabios/VGABIOS-lgpl-latest.bin
@@ -80,19 +79,13 @@ endif
ifeq ($(CONFIG_SEABIOS),y)
OBJS += seabios.o
CFLAGS += -DENABLE_SEABIOS
-ifeq ($(SEABIOS_PATH),)
- SEABIOS_ROM := $(SEABIOS_DIR)/out/bios.bin
-else
- SEABIOS_ROM := $(SEABIOS_PATH)
-endif
-ROMS += $(SEABIOS_ROM)
endif
.PHONY: all
all: subdirs-all
$(MAKE) hvmloader
-ovmf.o rombios.o seabios.o hvmloader.o: roms.inc
+ovmf.o rombios.o: roms.inc
smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(SMBIOS_REL_DATE)\""
hvmloader: $(OBJS) acpi/acpi.a
@@ -109,12 +102,6 @@ ifneq ($(ROMBIOS_ROM),)
echo "#endif" >> $@.new
endif
-ifneq ($(SEABIOS_ROM),)
- echo "#ifdef ROM_INCLUDE_SEABIOS" >> $@.new
- sh ./mkhex seabios $(SEABIOS_ROM) >> $@.new
- echo "#endif" >> $@.new
-endif
-
ifneq ($(OVMF_ROM),)
echo "#ifdef ROM_INCLUDE_OVMF" >> $@.new
sh ./mkhex ovmf $(OVMF_ROM) >> $@.new
Index: xen-4.7.0-testing/tools/firmware/hvmloader/seabios.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/seabios.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/seabios.c
@@ -27,9 +27,6 @@
#include "smbios_types.h"
#include "acpi/acpi2_0.h"
-#define ROM_INCLUDE_SEABIOS
-#include "roms.inc"
-
extern unsigned char dsdt_anycpu_qemu_xen[];
extern int dsdt_anycpu_qemu_xen_len;
@@ -127,22 +124,29 @@ static void seabios_setup_e820(void)
struct e820entry *e820 = scratch_alloc(sizeof(struct e820entry)*16, 0);
info->e820 = (uint32_t)e820;
+ BUG_ON(seabios_config.bios_address < 0xc0000 || seabios_config.bios_address >= 0x100000);
/* SeaBIOS reserves memory in e820 as necessary so no low reservation. */
- info->e820_nr = build_e820_table(e820, 0, 0x100000-sizeof(seabios));
+ info->e820_nr = build_e820_table(e820, 0, seabios_config.bios_address);
dump_e820_table(e820, info->e820_nr);
}
-struct bios_config seabios_config = {
- .name = "SeaBIOS",
+static void seabios_load(const struct bios_config *bios,
+ void *bios_addr, uint32_t bios_length)
+{
+ unsigned int bios_dest = 0x100000 - bios_length;
- .image = seabios,
- .image_size = sizeof(seabios),
+ BUG_ON(bios_dest + bios_length > HVMLOADER_PHYSICAL_ADDRESS);
+ memcpy((void *)bios_dest, bios_addr, bios_length);
+ seabios_config.bios_address = bios_dest;
+ seabios_config.image_size = bios_length;
+}
- .bios_address = 0x100000 - sizeof(seabios),
+struct bios_config seabios_config = {
+ .name = "SeaBIOS",
.load_roms = NULL,
- .bios_load = NULL,
+ .bios_load = seabios_load,
.bios_info_setup = seabios_setup_bios_info,
.bios_info_finish = seabios_finish_bios_info,

View File

@ -0,0 +1,131 @@
From 009fef2fc4bdffd1c9e5caf557157b4949d3842b Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:46 +0000
Subject: [PATCH 11/15] hvmloader: Load OVMF from modules
... and do not include the OVMF ROM into hvmloader anymore.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/firmware/hvmloader/Makefile | 15 +--------------
tools/firmware/hvmloader/ovmf.c | 30 +++++++++++++-----------------
2 files changed, 14 insertions(+), 31 deletions(-)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/Makefile
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/Makefile
+++ xen-4.7.0-testing/tools/firmware/hvmloader/Makefile
@@ -43,7 +43,6 @@ endif
CIRRUSVGA_DEBUG ?= n
-OVMF_DIR := ../ovmf-dir
ROMBIOS_DIR := ../rombios
ifeq ($(CONFIG_ROMBIOS),y)
@@ -61,12 +60,6 @@ ROMS :=
ifeq ($(CONFIG_OVMF),y)
OBJS += ovmf.o
CFLAGS += -DENABLE_OVMF
-ifeq ($(OVMF_PATH),)
- OVMF_ROM := $(OVMF_DIR)/ovmf.bin
-else
- OVMF_ROM := $(OVMF_PATH)
-endif
-ROMS += $(OVMF_ROM)
endif
ifeq ($(CONFIG_ROMBIOS),y)
@@ -85,7 +78,7 @@ endif
all: subdirs-all
$(MAKE) hvmloader
-ovmf.o rombios.o: roms.inc
+rombios.o: roms.inc
smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(SMBIOS_REL_DATE)\""
hvmloader: $(OBJS) acpi/acpi.a
@@ -102,12 +95,6 @@ ifneq ($(ROMBIOS_ROM),)
echo "#endif" >> $@.new
endif
-ifneq ($(OVMF_ROM),)
- echo "#ifdef ROM_INCLUDE_OVMF" >> $@.new
- sh ./mkhex ovmf $(OVMF_ROM) >> $@.new
- echo "#endif" >> $@.new
-endif
-
ifneq ($(STDVGA_ROM),)
echo "#ifdef ROM_INCLUDE_VGABIOS" >> $@.new
sh ./mkhex vgabios_stdvga $(STDVGA_ROM) >> $@.new
Index: xen-4.7.0-testing/tools/firmware/hvmloader/ovmf.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/ovmf.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/ovmf.c
@@ -34,17 +34,10 @@
#include <xen/hvm/ioreq.h>
#include <xen/memory.h>
-#define ROM_INCLUDE_OVMF
-#include "roms.inc"
-
-#define OVMF_SIZE (sizeof(ovmf))
#define OVMF_MAXOFFSET 0x000FFFFFULL
-#define OVMF_BEGIN (0x100000000ULL - ((OVMF_SIZE + OVMF_MAXOFFSET) & ~OVMF_MAXOFFSET))
-#define OVMF_END (OVMF_BEGIN + OVMF_SIZE)
#define LOWCHUNK_BEGIN 0x000F0000
#define LOWCHUNK_SIZE 0x00010000
#define LOWCHUNK_MAXOFFSET 0x0000FFFF
-#define LOWCHUNK_END (OVMF_BEGIN + OVMF_SIZE)
#define OVMF_INFO_PHYSICAL_ADDRESS 0x00001000
extern unsigned char dsdt_anycpu_qemu_xen[];
@@ -97,24 +90,31 @@ static void ovmf_load(const struct bios_
void *bios_addr, uint32_t bios_length)
{
xen_pfn_t mfn;
- uint64_t addr = OVMF_BEGIN;
+ uint64_t addr = 0x100000000ULL
+ - ((bios_length + OVMF_MAXOFFSET) & ~OVMF_MAXOFFSET);
+ uint64_t ovmf_end = addr + bios_length;
+
+ ovmf_config.bios_address = addr;
+ ovmf_config.image_size = bios_length;
/* Copy low-reset vector portion. */
- memcpy((void *) LOWCHUNK_BEGIN, (uint8_t *) config->image
- + OVMF_SIZE
- - LOWCHUNK_SIZE,
+ memcpy((void *) LOWCHUNK_BEGIN,
+ (uint8_t *) bios_addr + bios_length - LOWCHUNK_SIZE,
LOWCHUNK_SIZE);
/* Ensure we have backing page prior to moving FD. */
- while ( (addr >> PAGE_SHIFT) != (OVMF_END >> PAGE_SHIFT) )
+ while ( (addr >> PAGE_SHIFT) != (ovmf_end >> PAGE_SHIFT) )
{
mfn = (uint32_t) (addr >> PAGE_SHIFT);
addr += PAGE_SIZE;
mem_hole_populate_ram(mfn, 1);
}
+ /* Check that source and destination does not overlaps. */
+ BUG_ON(addr + bios_length > (unsigned)bios_addr
+ && addr < (unsigned)bios_addr + bios_length);
/* Copy FD. */
- memcpy((void *) OVMF_BEGIN, config->image, OVMF_SIZE);
+ memcpy((void *) ovmf_config.bios_address, bios_addr, bios_length);
}
static void ovmf_acpi_build_tables(void)
@@ -151,10 +151,6 @@ static void ovmf_setup_e820(void)
struct bios_config ovmf_config = {
.name = "OVMF",
- .image = ovmf,
- .image_size = sizeof(ovmf),
-
- .bios_address = OVMF_BEGIN,
.bios_load = ovmf_load,
.load_roms = 0,

View File

@ -0,0 +1,51 @@
From 258c5050f08bdf69394dd8790398b6dfe453886e Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:47 +0000
Subject: [PATCH 12/15] hvmloader: Specific bios_load function required
All BIOS but ROMBIOS needs to be loaded via modules.
ROMBIOS is handled as a special case.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
tools/firmware/hvmloader/hvmloader.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/hvmloader.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
@@ -323,21 +323,25 @@ int main(void)
printf("Loading %s ...\n", bios->name);
bios_module = get_module_entry(hvm_start_info, "bios");
- if ( bios_module && bios->bios_load )
+ if ( bios_module )
{
uint32_t paddr = bios_module->paddr;
bios->bios_load(bios, (void*)paddr, bios_module->size);
}
- else if ( bios->bios_load )
+#ifdef ENABLE_ROMBIOS
+ else if ( bios == &rombios_config )
{
bios->bios_load(bios, 0, 0);
}
+#endif
else
{
- BUG_ON(bios->bios_address + bios->image_size >
- HVMLOADER_PHYSICAL_ADDRESS);
- memcpy((void *)bios->bios_address, bios->image,
- bios->image_size);
+ /*
+ * If there is no BIOS module supplied and if there is no embeded BIOS
+ * image, then we failed. Only rombios might have an embedded bios blob.
+ */
+ printf("no BIOS ROM image found\n");
+ BUG();
}
if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )

View File

@ -0,0 +1,65 @@
From e7497ead178f01fd5c94cfb8506d31b77cc38c94 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:48 +0000
Subject: [PATCH 13/15] hvmloader: Always build-in SeaBIOS and OVMF loader
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
tools/firmware/hvmloader/Makefile | 11 +----------
tools/firmware/hvmloader/hvmloader.c | 4 ----
2 files changed, 1 insertion(+), 14 deletions(-)
Index: xen-4.7.0-testing/tools/firmware/hvmloader/Makefile
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/Makefile
+++ xen-4.7.0-testing/tools/firmware/hvmloader/Makefile
@@ -37,6 +37,7 @@ OBJS = hvmloader.o mp_tables.o util.o s
OBJS += smp.o cacheattr.o xenbus.o vnuma.o
OBJS += e820.o pci.o pir.o ctype.o
OBJS += hvm_param.o
+OBJS += ovmf.o seabios.o
ifeq ($(debug),y)
OBJS += tests.o
endif
@@ -57,11 +58,6 @@ endif
ROMS :=
-ifeq ($(CONFIG_OVMF),y)
-OBJS += ovmf.o
-CFLAGS += -DENABLE_OVMF
-endif
-
ifeq ($(CONFIG_ROMBIOS),y)
OBJS += optionroms.o 32bitbios_support.o rombios.o
CFLAGS += -DENABLE_ROMBIOS
@@ -69,11 +65,6 @@ ROMBIOS_ROM := $(ROMBIOS_DIR)/BIOS-bochs
ROMS += $(ROMBIOS_ROM) $(STDVGA_ROM) $(CIRRUSVGA_ROM) $(ETHERBOOT_ROMS)
endif
-ifeq ($(CONFIG_SEABIOS),y)
-OBJS += seabios.o
-CFLAGS += -DENABLE_SEABIOS
-endif
-
.PHONY: all
all: subdirs-all
$(MAKE) hvmloader
Index: xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/hvmloader/hvmloader.c
+++ xen-4.7.0-testing/tools/firmware/hvmloader/hvmloader.c
@@ -209,12 +209,8 @@ struct bios_info {
#ifdef ENABLE_ROMBIOS
{ "rombios", &rombios_config, },
#endif
-#ifdef ENABLE_SEABIOS
{ "seabios", &seabios_config, },
-#endif
-#ifdef ENABLE_OVMF
{ "ovmf", &ovmf_config, },
-#endif
{ NULL, NULL }
};

View File

@ -0,0 +1,84 @@
From d42d9e59472e2c637776245db8e80de0b907d46b Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 14 Mar 2016 17:55:49 +0000
Subject: [PATCH 14/15] configure: do not depend on SEABIOS_PATH or OVMF_PATH
...
... to compile SeaBIOS and OVMF. Only depends on CONFIG_*.
If --with-system-* configure option is used, then set *_CONFIG=n to not
compile SEABIOS and OVMF.
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
---
tools/configure.ac | 6 ++++--
tools/firmware/Makefile | 8 --------
2 files changed, 4 insertions(+), 10 deletions(-)
Index: xen-4.7.0-testing/tools/configure.ac
===================================================================
--- xen-4.7.0-testing.orig/tools/configure.ac
+++ xen-4.7.0-testing/tools/configure.ac
@@ -212,12 +212,13 @@ AC_ARG_WITH([system-seabios],
AS_HELP_STRING([--with-system-seabios@<:@=PATH@:>@],
[Use system supplied seabios PATH instead of building and installing
our own version]),[
+ # Disable compilation of SeaBIOS.
+ seabios=n
case $withval in
no) seabios_path= ;;
*) seabios_path=$withval ;;
esac
],[])
-AC_SUBST(seabios_path)
AC_DEFINE_UNQUOTED([SEABIOS_PATH],
["${seabios_path:-$XENFIRMWAREDIR/seabios.bin}"],
[SeaBIOS path])
@@ -226,12 +227,13 @@ AC_ARG_WITH([system-ovmf],
AS_HELP_STRING([--with-system-ovmf@<:@=PATH@:>@],
[Use system supplied OVMF PATH instead of building and installing
our own version]),[
+ # Disable compilation of OVMF.
+ ovmf=n
case $withval in
no) ovmf_path= ;;
*) ovmf_path=$withval ;;
esac
],[])
-AC_SUBST(ovmf_path)
AC_DEFINE_UNQUOTED([OVMF_PATH],
["${ovmf_path:-$XENFIRMWAREDIR/ovmf.bin}"],
[OVMF path])
Index: xen-4.7.0-testing/tools/firmware/Makefile
===================================================================
--- xen-4.7.0-testing.orig/tools/firmware/Makefile
+++ xen-4.7.0-testing/tools/firmware/Makefile
@@ -6,12 +6,8 @@ TARGET := hvmloader/hvmloader
INST_DIR := $(DESTDIR)$(XENFIRMWAREDIR)
SUBDIRS-y :=
-ifeq ($(OVMF_PATH),)
SUBDIRS-$(CONFIG_OVMF) += ovmf-dir
-endif
-ifeq ($(SEABIOS_PATH),)
SUBDIRS-$(CONFIG_SEABIOS) += seabios-dir
-endif
SUBDIRS-$(CONFIG_ROMBIOS) += rombios
SUBDIRS-$(CONFIG_ROMBIOS) += vgabios
SUBDIRS-$(CONFIG_ROMBIOS) += etherboot
@@ -49,15 +45,11 @@ install: all
[ -d $(INST_DIR) ] || $(INSTALL_DIR) $(INST_DIR)
[ ! -e $(TARGET) ] || $(INSTALL_DATA) $(TARGET) $(INST_DIR)
ifeq ($(CONFIG_SEABIOS),y)
-ifeq ($(SEABIOS_PATH),)
$(INSTALL_DATA) $(SEABIOS_ROM) $(INST_DIR)/seabios.bin
endif
-endif
ifeq ($(CONFIG_OVMF),y)
-ifeq ($(OVMF_PATH),)
$(INSTALL_DATA) $(OVMF_ROM) $(INST_DIR)/ovmf.bin
endif
-endif
.PHONY: clean
clean: subdirs-clean

View File

@ -0,0 +1,144 @@
References: bsc#900418
# Commit cd42ccb27f4e364b6e75b6fecb06bb99ad8da988
# Date 2016-06-08 14:12:45 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
kexec: allow relaxed placement specification via command line
Rather than just allowing a fixed address or fully automatic placement,
also allow for specifying an upper bound. Especially on EFI systems,
where firmware memory use is commonly less predictable than on legacy
BIOS ones, this makes success of the reservation more likely when
automatic placement is not an option (e.g. because of special DMA
restrictions of devices involved in actually carrying out the dump).
Also take the opportunity to actually add text to the "crashkernel"
entry in the command line option doc.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -458,7 +458,18 @@ Specify the maximum address to allocate
combination with the `low_crashinfo` command line option.
### crashkernel
-> `= <ramsize-range>:<size>[,...][@<offset>]`
+> `= <ramsize-range>:<size>[,...][{@,<}<offset>]`
+> `= <size>[{@,<}<offset>]`
+
+Specify sizes and optionally placement of the crash kernel reservation
+area. The `<ramsize-range>:<size>` pairs indicate how much memory to
+set aside for a crash kernel (`<size>`) for a given range of installed
+RAM (`<ramsize-range>`). Each `<ramsize-range>` is of the form
+`<start>-[<end>]`.
+
+A trailing `@<offset>` specifies the exact address this area should be
+placed at, whereas `<` in place of `@` just specifies an upper bound of
+the address range the area should fall into.
### credit2\_balance\_over
> `= <integer>`
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1044,13 +1044,23 @@ void __init noreturn __start_xen(unsigne
}
#ifdef CONFIG_KEXEC
- /* Don't overlap with modules. */
- e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
- mod, mbi->mods_count, -1);
- if ( !kexec_crash_area.start && (s < e) )
+ /*
+ * Looking backwards from the crash area limit, find a large
+ * enough range that does not overlap with modules.
+ */
+ while ( !kexec_crash_area.start )
{
- e = (e - kexec_crash_area.size) & PAGE_MASK;
- kexec_crash_area.start = e;
+ /* Don't overlap with modules. */
+ e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size),
+ mod, mbi->mods_count, -1);
+ if ( s >= e )
+ break;
+ if ( e > kexec_crash_area_limit )
+ {
+ e = kexec_crash_area_limit & PAGE_MASK;
+ continue;
+ }
+ kexec_crash_area.start = (e - kexec_crash_area.size) & PAGE_MASK;
}
#endif
}
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -60,6 +60,7 @@ static unsigned char vmcoreinfo_data[VMC
static size_t vmcoreinfo_size = 0;
xen_kexec_reserve_t kexec_crash_area;
+paddr_t __initdata kexec_crash_area_limit = ~(paddr_t)0;
static struct {
u64 start, end;
unsigned long size;
@@ -86,7 +87,7 @@ static void *crash_heap_current = NULL,
/*
* Parse command lines in the format
*
- * crashkernel=<ramsize-range>:<size>[,...][@<offset>]
+ * crashkernel=<ramsize-range>:<size>[,...][{@,<}<address>]
*
* with <ramsize-range> being of form
*
@@ -94,7 +95,7 @@ static void *crash_heap_current = NULL,
*
* as well as the legacy ones in the format
*
- * crashkernel=<size>[@<offset>]
+ * crashkernel=<size>[{@,<}<address>]
*/
static void __init parse_crashkernel(const char *str)
{
@@ -109,7 +110,7 @@ static void __init parse_crashkernel(con
{
printk(XENLOG_WARNING "crashkernel: too many ranges\n");
cur = NULL;
- str = strchr(str, '@');
+ str = strpbrk(str, "@<");
break;
}
@@ -154,9 +155,16 @@ static void __init parse_crashkernel(con
}
else
kexec_crash_area.size = parse_size_and_unit(cur = str, &str);
- if ( cur != str && *str == '@' )
- kexec_crash_area.start = parse_size_and_unit(cur = str + 1, &str);
- if ( cur == str )
+ if ( cur != str )
+ {
+ if ( *str == '@' )
+ kexec_crash_area.start = parse_size_and_unit(cur = str + 1, &str);
+ else if ( *str == '<' )
+ kexec_crash_area_limit = parse_size_and_unit(cur = str + 1, &str);
+ else
+ printk(XENLOG_WARNING "crashkernel: '%s' ignored\n", str);
+ }
+ if ( cur && cur == str )
printk(XENLOG_WARNING "crashkernel: memory value expected\n");
}
custom_param("crashkernel", parse_crashkernel);
--- a/xen/include/xen/kexec.h
+++ b/xen/include/xen/kexec.h
@@ -14,6 +14,7 @@ typedef struct xen_kexec_reserve {
} xen_kexec_reserve_t;
extern xen_kexec_reserve_t kexec_crash_area;
+extern paddr_t kexec_crash_area_limit;
extern bool_t kexecing;

View File

@ -0,0 +1,62 @@
# Commit 5e02972646132ad98c365ebfcfcb43b40a0dde36
# Date 2016-06-13 12:44:32 +0100
# Author Euan Harris <euan.harris@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
nested vmx: Validate host VMX MSRs before accessing them
Some VMX MSRs may not exist on certain processor models, or may
be disabled because of configuration settings. It is only safe to
access these MSRs if configuration flags in other MSRs are set. These
prerequisites are listed in the Intel 64 and IA-32 Architectures
Software Developers Manual, Vol 3, Appendix A.
nvmx_msr_read_intercept() does not check the prerequisites before
accessing MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC on the host. Accessing these MSRs from a nested
VMX guest running on a host which does not support them will cause
Xen to crash with a GPF.
Signed-off-by: Euan Harris <euan.harris@citrix.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1820,11 +1820,22 @@ int nvmx_msr_read_intercept(unsigned int
return 0;
/*
- * Those MSRs are available only when bit 55 of
- * MSR_IA32_VMX_BASIC is set.
+ * These MSRs are only available when flags in other MSRs are set.
+ * These prerequisites are listed in the Intel 64 and IA-32
+ * Architectures Software Developers Manual, Vol 3, Appendix A.
*/
switch ( msr )
{
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ if ( !cpu_has_vmx_secondary_exec_control )
+ return 0;
+ break;
+
+ case MSR_IA32_VMX_EPT_VPID_CAP:
+ if ( !(cpu_has_vmx_ept || cpu_has_vmx_vpid) )
+ return 0;
+ break;
+
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
@@ -1832,6 +1843,11 @@ int nvmx_msr_read_intercept(unsigned int
if ( !(vmx_basic_msr & VMX_BASIC_DEFAULT1_ZERO) )
return 0;
break;
+
+ case MSR_IA32_VMX_VMFUNC:
+ if ( !cpu_has_vmx_vmfunc )
+ return 0;
+ break;
}
rdmsrl(msr, host_data);

View File

@ -0,0 +1,50 @@
References: bsc#970135
# Commit b64438c7c1495a7580d1bb9d8ba644f3705e1ffb
# Date 2016-06-14 15:08:47 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: use correct (local) time stamp in constant-TSC calibration fast path
This looks like a copy and paste mistake in commit 1b6a99892d ("x86:
Simpler time handling when TSC is constant across all power saving
states"), responsible for occasional many-microsecond cross-CPU skew of
what NOW() returns.
Also improve the correlation between local TSC and stime stamps
obtained at the end of the two calibration handlers: Compute the stime
one from the TSC one, instead of doing another rdtsc() for that
compuation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -998,7 +998,7 @@ static void local_time_calibration(void)
/* Atomically read cpu_calibration struct and write cpu_time struct. */
local_irq_disable();
t->local_tsc_stamp = c->local_tsc_stamp;
- t->stime_local_stamp = c->stime_master_stamp;
+ t->stime_local_stamp = c->stime_local_stamp;
t->stime_master_stamp = c->stime_master_stamp;
local_irq_enable();
update_vcpu_system_time(current);
@@ -1275,7 +1275,7 @@ static void time_calibration_tsc_rendezv
}
c->local_tsc_stamp = rdtsc();
- c->stime_local_stamp = get_s_time();
+ c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
raise_softirq(TIME_CALIBRATE_SOFTIRQ);
@@ -1305,7 +1305,7 @@ static void time_calibration_std_rendezv
}
c->local_tsc_stamp = rdtsc();
- c->stime_local_stamp = get_s_time();
+ c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
raise_softirq(TIME_CALIBRATE_SOFTIRQ);

View File

@ -0,0 +1,32 @@
# Commit 9dec2c47406f4ef31711656722f5f70d758d6160
# Date 2016-06-17 15:08:08 +0100
# Author Dario Faggioli <dario.faggioli@citrix.com>
# Committer George Dunlap <george.dunlap@citrix.com>
xen: sched: use default scheduler upon an invalid "sched="
instead of just the first scheduler we find in the array.
In fact, right now, if someone makes a typo when passing
the "sched=" command line option to Xen, we (with all
schedulers configured in) pick ARINC653, which is most
likely not what one would expect.
Go for the default scheduler instead.
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-By: Jonathan Creekmore <jonathan.creekmore@gmail.com>
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1625,7 +1625,8 @@ void __init scheduler_init(void)
{
printk("Could not find scheduler: %s\n", opt_sched);
for ( i = 0; i < NUM_SCHEDULERS; i++ )
- if ( schedulers[i] )
+ if ( schedulers[i] &&
+ !strcmp(schedulers[i]->opt_name, CONFIG_SCHED_DEFAULT) )
{
ops = *schedulers[i];
break;

View File

@ -0,0 +1,133 @@
References: bsc#970135
# Commit 06f083c826836a098f793db821845b313ad88a7f
# Date 2016-06-21 12:01:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: also generate assembler usable equates for synthesized features
... to make it possible to base alternative instruction patching upon
such.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -219,7 +219,8 @@ long arch_do_sysctl(
}
/* Clip the number of entries. */
- nr = min(sysctl->u.cpu_featureset.nr_features, FSCAPINTS);
+ nr = min_t(unsigned int, sysctl->u.cpu_featureset.nr_features,
+ FSCAPINTS);
/* Look up requested featureset. */
if ( sysctl->u.cpu_featureset.index < ARRAY_SIZE(featureset_table) )
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -3,8 +3,23 @@
*
* Defines x86 CPU feature bits
*/
+#if defined(XEN_CPUFEATURE)
-#ifndef __ASM_I386_CPUFEATURE_H
+/* Other features, Xen-defined mapping. */
+/* This range is used for feature bits which conflict or are synthesized */
+XEN_CPUFEATURE(CONSTANT_TSC, (FSCAPINTS+0)*32+ 0) /* TSC ticks at a constant rate */
+XEN_CPUFEATURE(NONSTOP_TSC, (FSCAPINTS+0)*32+ 1) /* TSC does not stop in C states */
+XEN_CPUFEATURE(ARAT, (FSCAPINTS+0)*32+ 2) /* Always running APIC timer */
+XEN_CPUFEATURE(ARCH_PERFMON, (FSCAPINTS+0)*32+ 3) /* Intel Architectural PerfMon */
+XEN_CPUFEATURE(TSC_RELIABLE, (FSCAPINTS+0)*32+ 4) /* TSC is known to be reliable */
+XEN_CPUFEATURE(XTOPOLOGY, (FSCAPINTS+0)*32+ 5) /* cpu topology enum extensions */
+XEN_CPUFEATURE(CPUID_FAULTING, (FSCAPINTS+0)*32+ 6) /* cpuid faulting */
+XEN_CPUFEATURE(CLFLUSH_MONITOR, (FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */
+XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */
+
+#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
+
+#elif !defined(__ASM_I386_CPUFEATURE_H)
#ifndef X86_FEATURES_ONLY
#define __ASM_I386_CPUFEATURE_H
#endif
@@ -12,20 +27,6 @@
#include <xen/const.h>
#include <asm/cpuid.h>
-#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
-
-/* Other features, Xen-defined mapping. */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CONSTANT_TSC ((FSCAPINTS+0)*32+ 0) /* TSC ticks at a constant rate */
-#define X86_FEATURE_NONSTOP_TSC ((FSCAPINTS+0)*32+ 1) /* TSC does not stop in C states */
-#define X86_FEATURE_ARAT ((FSCAPINTS+0)*32+ 2) /* Always running APIC timer */
-#define X86_FEATURE_ARCH_PERFMON ((FSCAPINTS+0)*32+ 3) /* Intel Architectural PerfMon */
-#define X86_FEATURE_TSC_RELIABLE ((FSCAPINTS+0)*32+ 4) /* TSC is known to be reliable */
-#define X86_FEATURE_XTOPOLOGY ((FSCAPINTS+0)*32+ 5) /* cpu topology enum extensions */
-#define X86_FEATURE_CPUID_FAULTING ((FSCAPINTS+0)*32+ 6) /* cpuid faulting */
-#define X86_FEATURE_CLFLUSH_MONITOR ((FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */
-#define X86_FEATURE_APERFMPERF ((FSCAPINTS+0)*32+ 8) /* APERFMPERF */
-
#define cpufeat_word(idx) ((idx) / 32)
#define cpufeat_bit(idx) ((idx) % 32)
#define cpufeat_mask(idx) (_AC(1, U) << cpufeat_bit(idx))
--- a/xen/include/asm-x86/cpufeatureset.h
+++ b/xen/include/asm-x86/cpufeatureset.h
@@ -3,19 +3,25 @@
#ifndef __ASSEMBLY__
+#include <xen/stringify.h>
+
#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
enum {
#include <public/arch-x86/cpufeatureset.h>
+#include <asm/cpufeature.h>
};
#undef XEN_CPUFEATURE
-#define XEN_CPUFEATURE(name, value) asm (".equ X86_FEATURE_" #name ", " #value);
+#define XEN_CPUFEATURE(name, value) asm (".equ X86_FEATURE_" #name ", " \
+ __stringify(value));
#include <public/arch-x86/cpufeatureset.h>
+#include <asm/cpufeature.h>
#else /* !__ASSEMBLY__ */
#define XEN_CPUFEATURE(name, value) .equ X86_FEATURE_##name, value
#include <public/arch-x86/cpufeatureset.h>
+#include <asm/cpufeature.h>
#endif /* __ASSEMBLY__ */
--- a/xen/include/asm-x86/cpuid.h
+++ b/xen/include/asm-x86/cpuid.h
@@ -1,12 +1,13 @@
#ifndef __X86_CPUID_H__
#define __X86_CPUID_H__
-#include <asm/cpufeatureset.h>
#include <asm/cpuid-autogen.h>
-#include <asm/percpu.h>
#define FSCAPINTS FEATURESET_NR_ENTRIES
+#include <asm/cpufeatureset.h>
+#include <asm/percpu.h>
+
#define FEATURESET_1d 0 /* 0x00000001.edx */
#define FEATURESET_1c 1 /* 0x00000001.ecx */
#define FEATURESET_e1d 2 /* 0x80000001.edx */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -291,7 +291,7 @@ def write_results(state):
state.output.write(
"""
-#define FEATURESET_NR_ENTRIES %sU
+#define FEATURESET_NR_ENTRIES %s
#define CPUID_COMMON_1D_FEATURES %s

View File

@ -0,0 +1,94 @@
References: bsc#991934
# Commit 9f358ddd69463fa8fb65cf67beb5f6f0d3350e32
# Date 2016-07-26 10:42:49 +0100
# Author George Dunlap <george.dunlap@citrix.com>
# Committer George Dunlap <george.dunlap@citrix.com>
xen: Have schedulers revise initial placement
The generic domain creation logic in
xen/common/domctl.c:default_vcpu0_location() attempts to try to do
initial placement load-balancing by placing vcpu 0 on the least-busy
non-primary hyperthread available. Unfortunately, the logic can end
up picking a pcpu that's not in the online mask. When this is passed
to a scheduler such which assumes that the initial assignment is
valid, it causes a null pointer dereference looking up the runqueue.
Furthermore, this initial placement doesn't take into account hard or
soft affinity, or any scheduler-specific knowledge (such as historic
runqueue load, as in credit2).
To solve this, when inserting a vcpu, always call the per-scheduler
"pick" function to revise the initial placement. This will
automatically take all knowledge the scheduler has into account.
csched2_cpu_pick ASSERTs that the vcpu's pcpu scheduler lock has been
taken. Grab and release the lock to minimize time spend with irqs
disabled.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>
Reviwed-by: Dario Faggioli <dario.faggioli@citrix.com>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -994,6 +994,9 @@ csched_vcpu_insert(const struct schedule
BUG_ON( is_idle_vcpu(vc) );
+ /* This is safe because vc isn't yet being scheduled */
+ vc->processor = csched_cpu_pick(ops, vc);
+
lock = vcpu_schedule_lock_irq(vc);
if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -318,6 +318,8 @@ struct csched2_dom {
uint16_t nr_vcpus;
};
+static int csched2_cpu_pick(const struct scheduler *ops, struct vcpu *vc);
+
/*
* When a hard affinity change occurs, we may not be able to check some
* (any!) of the other runqueues, when looking for the best new processor
@@ -956,9 +958,16 @@ csched2_vcpu_insert(const struct schedul
BUG_ON(is_idle_vcpu(vc));
- /* Add vcpu to runqueue of initial processor */
+ /* csched2_cpu_pick() expects the pcpu lock to be held */
lock = vcpu_schedule_lock_irq(vc);
+ vc->processor = csched2_cpu_pick(ops, vc);
+
+ spin_unlock_irq(lock);
+
+ lock = vcpu_schedule_lock_irq(vc);
+
+ /* Add vcpu to runqueue of initial processor */
runq_assign(ops, vc);
vcpu_schedule_unlock_irq(lock, vc);
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -203,6 +203,8 @@ struct rt_dom {
struct domain *dom; /* pointer to upper domain */
};
+static int rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc);
+
/*
* Useful inline functions
*/
@@ -845,6 +847,9 @@ rt_vcpu_insert(const struct scheduler *o
BUG_ON( is_idle_vcpu(vc) );
+ /* This is safe because vc isn't yet being scheduled */
+ vc->processor = rt_cpu_pick(ops, vc);
+
lock = vcpu_schedule_lock_irq(vc);
now = NOW();

View File

@ -0,0 +1,84 @@
References: bsc#991934
# Commit d5438accceecc8172db2d37d98b695eb8bc43afc
# Date 2016-07-26 10:44:06 +0100
# Author George Dunlap <george.dunlap@citrix.com>
# Committer George Dunlap <george.dunlap@citrix.com>
xen: Remove buggy initial placement algorithm
The initial placement algorithm sometimes picks cpus outside of the
mask it's given, does a lot of unnecessary bitmasking, does its own
separate load calculation, and completely ignores vcpu hard and soft
affinities. Just get rid of it and rely on the schedulers to do
initial placement.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -217,54 +217,6 @@ void getdomaininfo(struct domain *d, str
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
}
-static unsigned int default_vcpu0_location(cpumask_t *online)
-{
- struct domain *d;
- struct vcpu *v;
- unsigned int i, cpu, nr_cpus, *cnt;
- cpumask_t cpu_exclude_map;
-
- /* Do an initial CPU placement. Pick the least-populated CPU. */
- nr_cpus = cpumask_last(&cpu_online_map) + 1;
- cnt = xzalloc_array(unsigned int, nr_cpus);
- if ( cnt )
- {
- rcu_read_lock(&domlist_read_lock);
- for_each_domain ( d )
- for_each_vcpu ( d, v )
- if ( !(v->pause_flags & VPF_down)
- && ((cpu = v->processor) < nr_cpus) )
- cnt[cpu]++;
- rcu_read_unlock(&domlist_read_lock);
- }
-
- /*
- * If we're on a HT system, we only auto-allocate to a non-primary HT. We
- * favour high numbered CPUs in the event of a tie.
- */
- cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0));
- cpu = cpumask_first(&cpu_exclude_map);
- i = cpumask_next(cpu, &cpu_exclude_map);
- if ( i < nr_cpu_ids )
- cpu = i;
- for_each_cpu(i, online)
- {
- if ( cpumask_test_cpu(i, &cpu_exclude_map) )
- continue;
- if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) &&
- (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) )
- continue;
- cpumask_or(&cpu_exclude_map, &cpu_exclude_map,
- per_cpu(cpu_sibling_mask, i));
- if ( !cnt || cnt[i] <= cnt[cpu] )
- cpu = i;
- }
-
- xfree(cnt);
-
- return cpu;
-}
-
bool_t domctl_lock_acquire(void)
{
/*
@@ -691,7 +643,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
continue;
cpu = (i == 0) ?
- default_vcpu0_location(online) :
+ cpumask_any(online) :
cpumask_cycle(d->vcpu[i-1]->processor, online);
if ( alloc_vcpu(d, i, cpu) == NULL )

View File

@ -0,0 +1,94 @@
References: bsc#988675 CVE-2016-6258 XSA-182
# Commit e1bff4c2ea3b32464510ac00c320bba28a8dbcca
# Date 2016-07-26 14:06:59 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/pv: Remove unsafe bits from the mod_l?_entry() fastpath
All changes in writeability and cacheability must go through full
re-validation.
Rework the logic as a whitelist, to make it clearer to follow.
This is XSA-182
Reported-by: Jérémie Boutoille <jboutoille@ext.quarkslab.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1852,6 +1852,14 @@ static inline int update_intpte(intpte_t
_t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
(_m), (_v), (_ad))
+/*
+ * PTE flags that a guest may change without re-validating the PTE.
+ * All other bits affect translation, caching, or Xen's safety.
+ */
+#define FASTPATH_FLAG_WHITELIST \
+ (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \
+ _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER)
+
/* Update the L1 entry at pl1e to new value nl1e. */
static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
unsigned long gl1mfn, int preserve_ad,
@@ -1891,9 +1899,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
nl1e = l1e_from_pfn(page_to_mfn(page), l1e_get_flags(nl1e));
}
- /* Fast path for identical mapping, r/w, presence, and cachability. */
- if ( !l1e_has_changed(ol1e, nl1e,
- PAGE_CACHE_ATTRS | _PAGE_RW | _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l1e(nl1e, pt_dom);
rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
@@ -1970,11 +1977,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
return -EINVAL;
}
- /* Fast path for identical mapping and presence. */
- if ( !l2e_has_changed(ol2e, nl2e,
- unlikely(opt_allow_superpage)
- ? _PAGE_PSE | _PAGE_RW | _PAGE_PRESENT
- : _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l2e(nl2e, d);
if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) )
@@ -2039,8 +2043,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
return -EINVAL;
}
- /* Fast path for identical mapping and presence. */
- if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l3e(nl3e, d);
rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad);
@@ -2103,8 +2107,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
return -EINVAL;
}
- /* Fast path for identical mapping and presence. */
- if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
+ /* Fast path for sufficiently-similar mappings. */
+ if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) )
{
adjust_guest_l4e(nl4e, d);
rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad);
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -313,6 +313,7 @@ void efi_update_l4_pgtable(unsigned int
#define _PAGE_AVAIL2 _AC(0x800,U)
#define _PAGE_AVAIL _AC(0xE00,U)
#define _PAGE_PSE_PAT _AC(0x1000,U)
+#define _PAGE_AVAIL_HIGH (_AC(0x7ff, U) << 12)
#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0)
/* non-architectural flags */
#define _PAGE_PAGED 0x2000U

View File

@ -0,0 +1,61 @@
References: bsc#988676 CVE-2016-6259 XSA-183
# Commit 9f1441487aa215193a7c00fd9cb80b335542465e
# Date 2016-07-26 14:07:04 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/entry: Avoid SMAP violation in compat_create_bounce_frame()
A 32bit guest kernel might be running on user mappings.
compat_create_bounce_frame() must whitelist its guest accesses to avoid
risking a SMAP violation.
For both variants of create_bounce_frame(), re-blacklist user accesses if
execution exits via an exception table redirection.
This is XSA-183 / CVE-2016-6259
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -318,6 +318,7 @@ ENTRY(compat_int80_direct_trap)
compat_create_bounce_frame:
ASSERT_INTERRUPTS_ENABLED
mov %fs,%edi
+ ASM_STAC
testb $2,UREGS_cs+8(%rsp)
jz 1f
/* Push new frame at registered guest-OS stack base. */
@@ -364,6 +365,7 @@ compat_create_bounce_frame:
movl TRAPBOUNCE_error_code(%rdx),%eax
.Lft8: movl %eax,%fs:(%rsi) # ERROR CODE
1:
+ ASM_CLAC
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
@@ -403,6 +405,7 @@ compat_crash_page_fault_4:
addl $4,%esi
compat_crash_page_fault:
.Lft14: mov %edi,%fs
+ ASM_CLAC
movl %esi,%edi
call show_page_walk
jmp dom_crash_sync_extable
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -420,9 +420,11 @@ domain_crash_page_fault_16:
domain_crash_page_fault_8:
addq $8,%rsi
domain_crash_page_fault:
+ ASM_CLAC
movq %rsi,%rdi
call show_page_walk
ENTRY(dom_crash_sync_extable)
+ ASM_CLAC
# Get out of the guest-save area of the stack.
GET_STACK_END(ax)
leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp

View File

@ -0,0 +1,123 @@
References: bsc#970135
# Commit bb49fd3092a84ce151f5528794c0e612eeb4961a
# Date 2016-08-03 14:39:31 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: adjust local system time initialization
Using the bare return value from read_platform_stime() is not suitable
when local_time_calibration() is going to use its fast path: Divergence
of several dozen microseconds between NOW() return values on different
CPUs results when platform and local time don't stay in close sync.
Latch local and platform time on the CPU initiating AP bringup, such
that the AP can use these values to seed its stime_local_stamp with as
little of an error as possible. The boot CPU, otoh, can simply
calculate the correct initial value (other CPUs could do so too with
even greater accuracy than the approach being introduced, but that can
work only if all CPUs' TSCs start ticking at the same time, which
generally can't be assumed to be the case on multi-socket systems).
This slightly defers init_percpu_time() (moved ahead by commit
dd2658f966 ["x86/time: initialise time earlier during
start_secondary()"]) in order to reduce as much as possible the gap
between populating the stamps and consuming them.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -328,12 +328,12 @@ void start_secondary(void *unused)
percpu_traps_init();
- init_percpu_time();
-
cpu_init();
smp_callin();
+ init_percpu_time();
+
setup_secondary_APIC_clock();
/*
@@ -996,6 +996,8 @@ int __cpu_up(unsigned int cpu)
if ( (ret = do_boot_cpu(apicid, cpu)) != 0 )
return ret;
+ time_latch_stamps();
+
set_cpu_state(CPU_STATE_ONLINE);
while ( !cpu_online(cpu) )
{
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1328,21 +1328,52 @@ static void time_calibration(void *unuse
&r, 1);
}
+static struct {
+ s_time_t local_stime, master_stime;
+} ap_bringup_ref;
+
+void time_latch_stamps(void)
+{
+ unsigned long flags;
+ u64 tsc;
+
+ local_irq_save(flags);
+ ap_bringup_ref.master_stime = read_platform_stime();
+ tsc = rdtsc();
+ local_irq_restore(flags);
+
+ ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
+}
+
void init_percpu_time(void)
{
struct cpu_time *t = &this_cpu(cpu_time);
unsigned long flags;
+ u64 tsc;
s_time_t now;
/* Initial estimate for TSC rate. */
t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
local_irq_save(flags);
- t->local_tsc_stamp = rdtsc();
now = read_platform_stime();
+ tsc = rdtsc();
local_irq_restore(flags);
t->stime_master_stamp = now;
+ /*
+ * To avoid a discontinuity (TSC and platform clock can't be expected
+ * to be in perfect sync), initialization here needs to match up with
+ * local_time_calibration()'s decision whether to use its fast path.
+ */
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ {
+ if ( system_state < SYS_STATE_smp_boot )
+ now = get_s_time_fixed(tsc);
+ else
+ now += ap_bringup_ref.local_stime - ap_bringup_ref.master_stime;
+ }
+ t->local_tsc_stamp = tsc;
t->stime_local_stamp = now;
}
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -40,6 +40,7 @@ int time_suspend(void);
int time_resume(void);
void init_percpu_time(void);
+void time_latch_stamps(void);
struct ioreq;
int hwdom_pit_access(struct ioreq *ioreq);

View File

@ -0,0 +1,231 @@
References: bsc#970135
# Commit fa74e70500fd73dd2fc441c7dc00b190fb37cee5
# Date 2016-08-03 14:40:44 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: introduce and use rdtsc_ordered()
Matching Linux commit 03b9730b76 ("x86/asm/tsc: Add rdtsc_ordered() and
use it in trivial call sites") and earlier ones it builds upon, let's
make sure timing loops don't have their rdtsc()-s re-ordered, as that
would harm precision of the result (values were observed to be several
hundred clocks off without this adjustment).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Joao Martins <joao.m.martins@oracle.com>
# Commit 7fb0a87d97201f9c3639f85615eacd93110dc1c5
# Date 2016-08-05 18:00:45 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: also use rdtsc_ordered() in check_tsc_warp()
This really was meant to be added in a v2 of what became commit
fa74e70500 ("x86/time: introduce and use rdtsc_ordered()").
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1137,7 +1137,7 @@ static int __init calibrate_APIC_clock(v
/*
* We wrapped around just now. Let's start:
*/
- t1 = rdtsc();
+ t1 = rdtsc_ordered();
tt1 = apic_read(APIC_TMCCT);
/*
@@ -1147,7 +1147,7 @@ static int __init calibrate_APIC_clock(v
wait_8254_wraparound();
tt2 = apic_read(APIC_TMCCT);
- t2 = rdtsc();
+ t2 = rdtsc_ordered();
/*
* The APIC bus clock counter is 32 bits only, it
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -541,6 +541,9 @@ static void init_amd(struct cpuinfo_x86
wrmsr_amd_safe(0xc001100d, l, h & ~1);
}
+ /* MFENCE stops RDTSC speculation */
+ __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
+
switch(c->x86)
{
case 0xf ... 0x17:
--- a/xen/arch/x86/delay.c
+++ b/xen/arch/x86/delay.c
@@ -21,10 +21,10 @@ void __udelay(unsigned long usecs)
unsigned long ticks = usecs * (cpu_khz / 1000);
unsigned long s, e;
- s = rdtsc();
+ s = rdtsc_ordered();
do
{
rep_nop();
- e = rdtsc();
+ e = rdtsc_ordered();
} while ((e-s) < ticks);
}
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -123,7 +123,7 @@ static void synchronize_tsc_master(unsig
for ( i = 1; i <= 5; i++ )
{
- tsc_value = rdtsc();
+ tsc_value = rdtsc_ordered();
wmb();
atomic_inc(&tsc_count);
while ( atomic_read(&tsc_count) != (i<<1) )
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -257,10 +257,10 @@ static u64 init_pit_and_calibrate_tsc(vo
outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
- start = rdtsc();
+ start = rdtsc_ordered();
for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
continue;
- end = rdtsc();
+ end = rdtsc_ordered();
/* Error if the CTC doesn't behave itself. */
if ( count == 0 )
@@ -760,7 +760,7 @@ s_time_t get_s_time_fixed(u64 at_tsc)
if ( at_tsc )
tsc = at_tsc;
else
- tsc = rdtsc();
+ tsc = rdtsc_ordered();
delta = tsc - t->local_tsc_stamp;
now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
@@ -933,7 +933,7 @@ int cpu_frequency_change(u64 freq)
/* TSC-extrapolated time may be bogus after frequency change. */
/*t->stime_local_stamp = get_s_time();*/
t->stime_local_stamp = t->stime_master_stamp;
- curr_tsc = rdtsc();
+ curr_tsc = rdtsc_ordered();
t->local_tsc_stamp = curr_tsc;
set_time_scale(&t->tsc_scale, freq);
local_irq_enable();
@@ -1124,16 +1124,13 @@ static void local_time_calibration(void)
*/
static void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
{
-#define rdtsc_barrier() mb()
static DEFINE_SPINLOCK(sync_lock);
static cycles_t last_tsc;
cycles_t start, now, prev, end;
int i;
- rdtsc_barrier();
- start = get_cycles();
- rdtsc_barrier();
+ start = rdtsc_ordered();
/* The measurement runs for 20 msecs: */
end = start + tsc_khz * 20ULL;
@@ -1148,9 +1145,7 @@ static void check_tsc_warp(unsigned long
*/
spin_lock(&sync_lock);
prev = last_tsc;
- rdtsc_barrier();
- now = get_cycles();
- rdtsc_barrier();
+ now = rdtsc_ordered();
last_tsc = now;
spin_unlock(&sync_lock);
@@ -1248,7 +1243,7 @@ static void time_calibration_tsc_rendezv
if ( r->master_stime == 0 )
{
r->master_stime = read_platform_stime();
- r->master_tsc_stamp = rdtsc();
+ r->master_tsc_stamp = rdtsc_ordered();
}
atomic_inc(&r->semaphore);
@@ -1274,7 +1269,7 @@ static void time_calibration_tsc_rendezv
}
}
- c->local_tsc_stamp = rdtsc();
+ c->local_tsc_stamp = rdtsc_ordered();
c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
@@ -1304,7 +1299,7 @@ static void time_calibration_std_rendezv
mb(); /* receive signal /then/ read r->master_stime */
}
- c->local_tsc_stamp = rdtsc();
+ c->local_tsc_stamp = rdtsc_ordered();
c->stime_local_stamp = get_s_time_fixed(c->local_tsc_stamp);
c->stime_master_stamp = r->master_stime;
@@ -1339,7 +1334,7 @@ void time_latch_stamps(void)
local_irq_save(flags);
ap_bringup_ref.master_stime = read_platform_stime();
- tsc = rdtsc();
+ tsc = rdtsc_ordered();
local_irq_restore(flags);
ap_bringup_ref.local_stime = get_s_time_fixed(tsc);
@@ -1357,7 +1352,7 @@ void init_percpu_time(void)
local_irq_save(flags);
now = read_platform_stime();
- tsc = rdtsc();
+ tsc = rdtsc_ordered();
local_irq_restore(flags);
t->stime_master_stamp = now;
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -16,6 +16,7 @@ XEN_CPUFEATURE(XTOPOLOGY, (FSCAPIN
XEN_CPUFEATURE(CPUID_FAULTING, (FSCAPINTS+0)*32+ 6) /* cpuid faulting */
XEN_CPUFEATURE(CLFLUSH_MONITOR, (FSCAPINTS+0)*32+ 7) /* clflush reqd with monitor */
XEN_CPUFEATURE(APERFMPERF, (FSCAPINTS+0)*32+ 8) /* APERFMPERF */
+XEN_CPUFEATURE(MFENCE_RDTSC, (FSCAPINTS+0)*32+ 9) /* MFENCE synchronizes RDTSC */
#define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -80,6 +80,22 @@ static inline uint64_t rdtsc(void)
return ((uint64_t)high << 32) | low;
}
+static inline uint64_t rdtsc_ordered(void)
+{
+ /*
+ * The RDTSC instruction is not ordered relative to memory access.
+ * The Intel SDM and the AMD APM are both vague on this point, but
+ * empirically an RDTSC instruction can be speculatively executed
+ * before prior loads. An RDTSC immediately after an appropriate
+ * barrier appears to be ordered as a normal load, that is, it
+ * provides the same ordering guarantees as reading from a global
+ * memory location that some other imaginary CPU is updating
+ * continuously with a time stamp.
+ */
+ alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
+ return rdtsc();
+}
+
#define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
#define write_tsc(val) ({ \
/* Reliable TSCs are in lockstep across all CPUs. We should \

View File

@ -0,0 +1,298 @@
References: bsc#970135
# Commit 93340297802b8e743b6ce66b0bc366af1ad51f39
# Date 2016-08-04 10:02:52 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/time: calibrate TSC against platform timer
... instead of unconditionally against the PIT. This allows for local
and master system times to remain in better sync (which matters even
when, on any modern system, the master time is really used only during
secondary CPU bringup, as the error between the two is in fact
noticable in cross-CPU NOW() invocation monotonicity).
This involves moving the init_platform_timer() invocation into
early_time_init(), splitting out the few things which really need to be
done in init_xen_time(). That in turn allows dropping the open coded
PIT initialization from init_IRQ() (it was needed for APIC clock
calibration, which runs between early_time_init() and init_xen_time()).
In the course of this re-ordering also set the timer channel 2 gate low
after having finished calibration. This should be benign to overall
system operation, but appears to be the more clean state.
Also do away with open coded 8254 register manipulation from 8259 code.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -359,13 +359,6 @@ void __init init_IRQ(void)
apic_intr_init();
- /* Set the clock to HZ Hz */
-#define CLOCK_TICK_RATE 1193182 /* crystal freq (Hz) */
-#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
- outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
- outb(LATCH >> 8, PIT_CH0); /* MSB */
-
setup_irq(2, 0, &cascade);
}
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -59,7 +59,7 @@ struct platform_timesource {
char *name;
u64 frequency;
u64 (*read_counter)(void);
- int (*init)(struct platform_timesource *);
+ s64 (*init)(struct platform_timesource *);
void (*resume)(struct platform_timesource *);
int counter_bits;
};
@@ -224,49 +224,18 @@ static struct irqaction __read_mostly ir
timer_interrupt, "timer", NULL
};
-/* ------ Calibrate the TSC -------
- * Return processor ticks per second / CALIBRATE_FRAC.
- */
-
#define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
-#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+#define CALIBRATE_VALUE(freq) (((freq) + CALIBRATE_FRAC / 2) / CALIBRATE_FRAC)
-static u64 init_pit_and_calibrate_tsc(void)
+static void preinit_pit(void)
{
- u64 start, end;
- unsigned long count;
-
/* Set PIT channel 0 to HZ Hz. */
#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
outb(LATCH >> 8, PIT_CH0); /* MSB */
-
- /* Set the Gate high, disable speaker */
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
- /*
- * Now let's take care of CTC channel 2
- *
- * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
- * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
- * to begin countdown.
- */
- outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
- outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
- outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
-
- start = rdtsc_ordered();
- for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
- continue;
- end = rdtsc_ordered();
-
- /* Error if the CTC doesn't behave itself. */
- if ( count == 0 )
- return 0;
-
- return ((end - start) * (u64)CALIBRATE_FRAC);
+#undef LATCH
}
void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
@@ -327,10 +296,49 @@ static u64 read_pit_count(void)
return count32;
}
-static int __init init_pit(struct platform_timesource *pts)
+static s64 __init init_pit(struct platform_timesource *pts)
{
+ u8 portb = inb(0x61);
+ u64 start, end;
+ unsigned long count;
+
using_pit = 1;
- return 1;
+
+ /* Set the Gate high, disable speaker. */
+ outb((portb & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2: mode 0, (interrupt on
+ * terminal count mode), binary count, load CALIBRATE_LATCH count,
+ * (LSB and MSB) to begin countdown.
+ */
+#define CALIBRATE_LATCH CALIBRATE_VALUE(CLOCK_TICK_RATE)
+ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
+ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
+#undef CALIBRATE_LATCH
+
+ start = rdtsc_ordered();
+ for ( count = 0; !(inb(0x61) & 0x20); ++count )
+ continue;
+ end = rdtsc_ordered();
+
+ /* Set the Gate low, disable speaker. */
+ outb(portb & ~0x03, 0x61);
+
+ /* Error if the CTC doesn't behave itself. */
+ if ( count == 0 )
+ return 0;
+
+ return (end - start) * CALIBRATE_FRAC;
+}
+
+static void resume_pit(struct platform_timesource *pts)
+{
+ /* Set CTC channel 2 to mode 0 again; initial value does not matter. */
+ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(0, PIT_CH2); /* LSB of count */
+ outb(0, PIT_CH2); /* MSB of count */
}
static struct platform_timesource __initdata plt_pit =
@@ -340,7 +348,8 @@ static struct platform_timesource __init
.frequency = CLOCK_TICK_RATE,
.read_counter = read_pit_count,
.counter_bits = 32,
- .init = init_pit
+ .init = init_pit,
+ .resume = resume_pit,
};
/************************************************************
@@ -352,15 +361,26 @@ static u64 read_hpet_count(void)
return hpet_read32(HPET_COUNTER);
}
-static int __init init_hpet(struct platform_timesource *pts)
+static s64 __init init_hpet(struct platform_timesource *pts)
{
- u64 hpet_rate = hpet_setup();
+ u64 hpet_rate = hpet_setup(), start;
+ u32 count, target;
if ( hpet_rate == 0 )
return 0;
pts->frequency = hpet_rate;
- return 1;
+
+ count = hpet_read32(HPET_COUNTER);
+ start = rdtsc_ordered();
+ target = count + CALIBRATE_VALUE(hpet_rate);
+ if ( target < count )
+ while ( hpet_read32(HPET_COUNTER) >= count )
+ continue;
+ while ( hpet_read32(HPET_COUNTER) < target )
+ continue;
+
+ return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
}
static void resume_hpet(struct platform_timesource *pts)
@@ -392,12 +412,24 @@ static u64 read_pmtimer_count(void)
return inl(pmtmr_ioport);
}
-static int __init init_pmtimer(struct platform_timesource *pts)
+static s64 __init init_pmtimer(struct platform_timesource *pts)
{
+ u64 start;
+ u32 count, target, mask = 0xffffff;
+
if ( pmtmr_ioport == 0 )
return 0;
- return 1;
+ count = inl(pmtmr_ioport) & mask;
+ start = rdtsc_ordered();
+ target = count + CALIBRATE_VALUE(ACPI_PM_FREQUENCY);
+ if ( target < count )
+ while ( (inl(pmtmr_ioport) & mask) >= count )
+ continue;
+ while ( (inl(pmtmr_ioport) & mask) < target )
+ continue;
+
+ return (rdtsc_ordered() - start) * CALIBRATE_FRAC;
}
static struct platform_timesource __initdata plt_pmtimer =
@@ -533,14 +565,15 @@ static void resume_platform_timer(void)
plt_stamp = plt_src.read_counter();
}
-static void __init init_platform_timer(void)
+static u64 __init init_platform_timer(void)
{
static struct platform_timesource * __initdata plt_timers[] = {
&plt_hpet, &plt_pmtimer, &plt_pit
};
struct platform_timesource *pts = NULL;
- int i, rc = -1;
+ unsigned int i;
+ s64 rc = -1;
if ( opt_clocksource[0] != '\0' )
{
@@ -578,15 +611,12 @@ static void __init init_platform_timer(v
plt_overflow_period = scale_delta(
1ull << (pts->counter_bits-1), &plt_scale);
- init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
plt_src = *pts;
- plt_overflow(NULL);
-
- platform_timer_stamp = plt_stamp64;
- stime_platform_stamp = NOW();
printk("Platform timer is %s %s\n",
freq_string(pts->frequency), pts->name);
+
+ return rc;
}
u64 stime2tsc(s_time_t stime)
@@ -1474,7 +1504,11 @@ int __init init_xen_time(void)
/* NB. get_cmos_time() can take over one second to execute. */
do_settime(get_cmos_time(), 0, NOW());
- init_platform_timer();
+ /* Finish platform timer initialization. */
+ init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+ plt_overflow(NULL);
+ platform_timer_stamp = plt_stamp64;
+ stime_platform_stamp = NOW();
init_percpu_time();
@@ -1489,7 +1523,10 @@ int __init init_xen_time(void)
void __init early_time_init(void)
{
struct cpu_time *t = &this_cpu(cpu_time);
- u64 tmp = init_pit_and_calibrate_tsc();
+ u64 tmp;
+
+ preinit_pit();
+ tmp = init_platform_timer();
set_time_scale(&t->tsc_scale, tmp);
t->local_tsc_stamp = boot_tsc_stamp;
@@ -1598,7 +1635,7 @@ int time_suspend(void)
int time_resume(void)
{
- init_pit_and_calibrate_tsc();
+ preinit_pit();
resume_platform_timer();

View File

@ -0,0 +1,200 @@
# Commit 350bc1a9d4ebc03b18a43cdafcb626618caace55
# Date 2016-08-04 10:52:49 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: support newer Intel CPU models
... as per the June 2016 edition of the SDM.
Also remove a couple of dead break statements as well as unused
*MSR_PM_LASTBRANCH* #define-s.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -61,14 +61,14 @@
#define GET_HW_RES_IN_NS(msr, val) \
do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
-#define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val) /* Atom E3000 only */
+#define GET_MC6_RES(val) GET_HW_RES_IN_NS(0x664, val)
#define GET_PC2_RES(val) GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */
#define GET_PC3_RES(val) GET_HW_RES_IN_NS(0x3F8, val)
#define GET_PC6_RES(val) GET_HW_RES_IN_NS(0x3F9, val)
#define GET_PC7_RES(val) GET_HW_RES_IN_NS(0x3FA, val)
-#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */
-#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */
-#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */
+#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val)
+#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val)
+#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val)
#define GET_CC1_RES(val) GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */
#define GET_CC3_RES(val) GET_HW_RES_IN_NS(0x3FC, val)
#define GET_CC6_RES(val) GET_HW_RES_IN_NS(0x3FD, val)
@@ -142,6 +142,8 @@ static void do_get_hw_residencies(void *
{
/* 4th generation Intel Core (Haswell) */
case 0x45:
+ /* Xeon E5/E7 v4 (Broadwell) */
+ case 0x4F:
GET_PC8_RES(hw_res->pc8);
GET_PC9_RES(hw_res->pc9);
GET_PC10_RES(hw_res->pc10);
@@ -158,10 +160,11 @@ static void do_get_hw_residencies(void *
case 0x46:
/* Broadwell */
case 0x3D:
- case 0x4F:
+ case 0x47:
case 0x56:
- /* future */
+ /* Skylake */
case 0x4E:
+ case 0x5E:
GET_PC2_RES(hw_res->pc2);
GET_CC7_RES(hw_res->cc7);
/* fall through */
@@ -198,18 +201,28 @@ static void do_get_hw_residencies(void *
break;
/* Silvermont */
case 0x37:
- GET_MC6_RES(hw_res->mc6);
- /* fall through */
case 0x4A:
case 0x4D:
case 0x5A:
case 0x5D:
/* Airmont */
case 0x4C:
+ GET_MC6_RES(hw_res->mc6);
GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
GET_CC1_RES(hw_res->cc1);
GET_CC6_RES(hw_res->cc6);
break;
+ /* Goldmont */
+ case 0x5C:
+ case 0x5F:
+ GET_PC2_RES(hw_res->pc2);
+ GET_PC3_RES(hw_res->pc3);
+ GET_PC6_RES(hw_res->pc6);
+ GET_PC10_RES(hw_res->pc10);
+ GET_CC1_RES(hw_res->cc1);
+ GET_CC3_RES(hw_res->cc3);
+ GET_CC6_RES(hw_res->cc6);
+ break;
}
}
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2526,6 +2526,14 @@ static const struct lbr_info {
{ MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
{ MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
{ 0, 0 }
+}, sk_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_SKL_LASTBRANCH_TOS, 1 },
+ { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH },
+ { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH },
+ { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH },
+ { 0, 0 }
}, at_lbr[] = {
{ MSR_IA32_LASTINTFROMIP, 1 },
{ MSR_IA32_LASTINTTOIP, 1 },
@@ -2533,6 +2541,13 @@ static const struct lbr_info {
{ MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
{ MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
{ 0, 0 }
+}, gm_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_GM_LASTBRANCH_TOS, 1 },
+ { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
+ { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
+ { 0, 0 }
};
static const struct lbr_info *last_branch_msr_get(void)
@@ -2547,7 +2562,6 @@ static const struct lbr_info *last_branc
/* Enhanced Core */
case 23:
return c2_lbr;
- break;
/* Nehalem */
case 26: case 30: case 31: case 46:
/* Westmere */
@@ -2559,11 +2573,13 @@ static const struct lbr_info *last_branc
/* Haswell */
case 60: case 63: case 69: case 70:
/* Broadwell */
- case 61: case 79: case 86:
- /* future */
- case 78:
+ case 61: case 71: case 79: case 86:
return nh_lbr;
- break;
+ /* Skylake */
+ case 78: case 94:
+ /* future */
+ case 142: case 158:
+ return sk_lbr;
/* Atom */
case 28: case 38: case 39: case 53: case 54:
/* Silvermont */
@@ -2573,7 +2589,9 @@ static const struct lbr_info *last_branc
/* Airmont */
case 76:
return at_lbr;
- break;
+ /* Goldmont */
+ case 92: case 95:
+ return gm_lbr;
}
break;
@@ -2583,7 +2601,6 @@ static const struct lbr_info *last_branc
/* Pentium4/Xeon with em64t */
case 3: case 4: case 6:
return p4_lbr;
- break;
}
break;
}
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -458,11 +458,6 @@
#define MSR_P4_LASTBRANCH_0_TO_LIP 0x000006c0
#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16
-/* Pentium M (and Core) last-branch recording */
-#define MSR_PM_LASTBRANCH_TOS 0x000001c9
-#define MSR_PM_LASTBRANCH_0 0x00000040
-#define NUM_MSR_PM_LASTBRANCH 8
-
/* Core 2 and Atom last-branch recording */
#define MSR_C2_LASTBRANCH_TOS 0x000001c9
#define MSR_C2_LASTBRANCH_0_FROM_IP 0x00000040
@@ -470,6 +465,19 @@
#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4
#define NUM_MSR_ATOM_LASTBRANCH_FROM_TO 8
+/* Skylake (and newer) last-branch recording */
+#define MSR_SKL_LASTBRANCH_TOS 0x000001c9
+#define MSR_SKL_LASTBRANCH_0_FROM_IP 0x00000680
+#define MSR_SKL_LASTBRANCH_0_TO_IP 0x000006c0
+#define MSR_SKL_LASTBRANCH_0_INFO 0x00000dc0
+#define NUM_MSR_SKL_LASTBRANCH 32
+
+/* Goldmont last-branch recording */
+#define MSR_GM_LASTBRANCH_TOS 0x000001c9
+#define MSR_GM_LASTBRANCH_0_FROM_IP 0x00000680
+#define MSR_GM_LASTBRANCH_0_TO_IP 0x000006c0
+#define NUM_MSR_GM_LASTBRANCH_FROM_TO 32
+
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a

View File

@ -0,0 +1,102 @@
References: bsc#992224
# Commit d0d6597d3d682f324b6a79e3278e6f5bb6bad153
# Date 2016-08-11 13:35:50 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
page-alloc/x86: don't restrict DMA heap to node 0
When node zero has no memory, the DMA bit width will end up getting set
to 9, which is obviously not helpful to hold back a reasonable amount
of low enough memory for Dom0 to use for DMA purposes. Find the lowest
node with memory below 4Gb instead.
Introduce arch_get_dma_bitsize() to keep this arch-specific logic out
of common code.
Also adjust the original calculation: I think the subtraction of 1
should have been part of the flsl() argument rather than getting
applied to its result. And while previously the division by 4 was valid
to be done on the flsl() result, this now also needs to be converted,
as is should only be applied to the spanned pages value.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -355,11 +355,25 @@ void __init init_cpu_to_node(void)
}
}
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode_shift);
-EXPORT_SYMBOL(memnodemap);
-EXPORT_SYMBOL(node_data);
+unsigned int __init arch_get_dma_bitsize(void)
+{
+ unsigned int node;
+
+ for_each_online_node(node)
+ if ( node_spanned_pages(node) &&
+ !(node_start_pfn(node) >> (32 - PAGE_SHIFT)) )
+ break;
+ if ( node >= MAX_NUMNODES )
+ panic("No node with memory below 4Gb");
+
+ /*
+ * Try to not reserve the whole node's memory for DMA, but dividing
+ * its spanned pages by (arbitrarily chosen) 4.
+ */
+ return min_t(unsigned int,
+ flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
+ + PAGE_SHIFT, 32);
+}
static void dump_numa(unsigned char key)
{
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1368,16 +1368,7 @@ void __init end_boot_allocator(void)
init_heap_pages(virt_to_page(bootmem_region_list), 1);
if ( !dma_bitsize && (num_online_nodes() > 1) )
- {
-#ifdef CONFIG_X86
- dma_bitsize = min_t(unsigned int,
- flsl(NODE_DATA(0)->node_spanned_pages) - 1
- + PAGE_SHIFT - 2,
- 32);
-#else
- dma_bitsize = 32;
-#endif
- }
+ dma_bitsize = arch_get_dma_bitsize();
printk("Domain heap initialised");
if ( dma_bitsize )
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -17,6 +17,11 @@ static inline __attribute__((pure)) node
#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx))
#define __node_distance(a, b) (20)
+static inline unsigned int arch_get_dma_bitsize(void)
+{
+ return 32;
+}
+
#endif /* __ARCH_ARM_NUMA_H */
/*
* Local variables:
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -86,5 +86,6 @@ extern int valid_numa_range(u64 start, u
void srat_parse_regions(u64 addr);
extern u8 __node_distance(nodeid_t a, nodeid_t b);
+unsigned int arch_get_dma_bitsize(void);
#endif

View File

@ -0,0 +1,48 @@
References: bsc#978755 bsc#983697
# Commit c5b4805bcd6bc749a8717e7406faa4a0e95468b4
# Date 2016-08-19 17:03:33 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/EFI: don't apply relocations to l{2,3}_bootmap
Other than claimed in commit 2ce5963727's ("x86: construct the
{l2,l3}_bootmap at compile time") the initialization of the two page
tables doesn't take care of everything without furher adjustment: The
compile time initialization obviously requires base relocations, and
those get processed after efi_arch_memory_setup(). Hence without
additional care the correctly initialized values may then get wrongly
"adjusted" again. Except the two table from being subject to base
relocation.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper@citrix.com>
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -47,11 +47,23 @@ static void __init efi_arch_relocate_ima
for ( base_relocs = __base_relocs_start; base_relocs < __base_relocs_end; )
{
- unsigned int i, n;
+ unsigned int i = 0, n;
n = (base_relocs->size - sizeof(*base_relocs)) /
sizeof(*base_relocs->entries);
- for ( i = 0; i < n; ++i )
+
+ /*
+ * Relevant l{2,3}_bootmap entries get initialized explicitly in
+ * efi_arch_memory_setup(), so we must not apply relocations there.
+ * l2_identmap's first slot, otoh, should be handled normally, as
+ * efi_arch_memory_setup() won't touch it (xen_phys_start should
+ * never be zero).
+ */
+ if ( xen_phys_start + base_relocs->rva == (unsigned long)l3_bootmap ||
+ xen_phys_start + base_relocs->rva == (unsigned long)l2_bootmap )
+ i = n;
+
+ for ( ; i < n; ++i )
{
unsigned long addr = xen_phys_start + base_relocs->rva +
(base_relocs->entries[i] & 0xfff);

View File

@ -0,0 +1,52 @@
# Commit 2a99aa99fc84a45f505f84802af56b006d14c52e
# Date 2016-08-19 18:40:11 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
xen/physmap: Do not permit a guest to populate PoD pages for itself
PoD is supposed to be entirely transparent to guest, but this interface has
been left exposed for a long time.
The use of PoD requires careful co-ordination by the toolstack with the
XENMEM_{get,set}_pod_target hypercalls, and xenstore ballooning target. The
best a guest can do without toolstack cooperation crash.
Furthermore, there are combinations of features (e.g. c/s c63868ff "libxl:
disallow PCI device assignment for HVM guest when PoD is enabled") which a
toolstack might wish to explicitly prohibit (in this case, because the two
simply don't function in combination). In such cases, the guest mustn't be
able to subvert the configuration chosen by the toolstack.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -140,14 +140,14 @@ static void populate_physmap(struct memo
struct page_info *page;
unsigned int i, j;
xen_pfn_t gpfn, mfn;
- struct domain *d = a->domain;
+ struct domain *d = a->domain, *curr_d = current->domain;
if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
a->nr_extents-1) )
return;
if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER :
- max_order(current->domain)) )
+ max_order(curr_d)) )
return;
for ( i = a->nr_done; i < a->nr_extents; i++ )
@@ -163,6 +163,10 @@ static void populate_physmap(struct memo
if ( a->memflags & MEMF_populate_on_demand )
{
+ /* Disallow populating PoD pages on oneself. */
+ if ( d == curr_d )
+ goto out;
+
if ( guest_physmap_mark_populate_on_demand(d, gpfn,
a->extent_order) < 0 )
goto out;

View File

@ -0,0 +1,25 @@
# Commit 81caac0cd0f56b0052a7884e6bd99e3a652ddd59
# Date 2016-08-29 16:05:31 +0200
# Author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/HVM: add guarding logic for VMX specific code
The struct hvm_domain.vmx is defined in a union along with the svm.
This can causes issue for SVM since this code is used in the common
scheduling code for x86. The logic must check for cpu_has_vmx before
accessing the hvm_domain.vmx sturcture.
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -611,7 +611,7 @@ unsigned long hvm_cr4_guest_reserved_bit
struct vcpu *v_ = (v); \
struct domain *d_ = v_->domain; \
if ( has_hvm_container_domain(d_) && \
- d_->arch.hvm_domain.vmx.vcpu_block ) \
+ (cpu_has_vmx && d_->arch.hvm_domain.vmx.vcpu_block) ) \
d_->arch.hvm_domain.vmx.vcpu_block(v_); \
})

View File

@ -0,0 +1,30 @@
# Commit 9daed8321b44c3ca82e412eb130f84e6b6c17dc5
# Date 2016-08-30 13:43:31 +0100
# Author Juergen Gross <jgross@suse.com>
# Committer Wei Liu <wei.liu2@citrix.com>
libxc: correct max_pfn calculation for saving domain
Commit 91e204d37f44913913776d0a89279721694f8b32 ("libxc: try to find
last used pfn when migrating") introduced a bug for the case of a
domain supporting the virtual mapped linear p2m list: the maximum pfn
of the domain calculated from the p2m memory allocation might be too
low.
Correct this.
Reported-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: Stefan Bader <stefan.bader@canonical.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
--- a/tools/libxc/xc_sr_save_x86_pv.c
+++ b/tools/libxc/xc_sr_save_x86_pv.c
@@ -430,6 +430,8 @@ static int map_p2m_list(struct xc_sr_con
if ( level == 2 )
{
+ if ( saved_idx == idx_end )
+ saved_idx++;
max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp - 1;
if ( max_pfn < ctx->x86_pv.max_pfn )
{

View File

@ -0,0 +1,51 @@
# Commit 3b7cac5232012e167b284aba738fef1eceda33f8
# Date 2016-09-01 11:41:03 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/levelling: Restrict non-architectural OSXSAVE handling to emulated CPUID
There is no need to extend the workaround to the faulted CPUID view, as
Linux's dependence on the workaround is stricly via the emulated view.
This causes a guest kernel faulted CPUID to observe architectural behaviour
with respect to its CR4.OSXSAVE setting.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -972,6 +972,8 @@ void pv_cpuid(struct cpu_user_regs *regs
*
* Therefore, the leaking of Xen's OSXSAVE setting has become a
* defacto part of the PV ABI and can't reasonably be corrected.
+ * It can however be restricted to only the enlightened CPUID
+ * view, as seen by the guest kernel.
*
* The following situations and logic now applies:
*
@@ -985,14 +987,18 @@ void pv_cpuid(struct cpu_user_regs *regs
*
* - Enlightened CPUID or CPUID faulting available:
* Xen can fully control what is seen here. Guest kernels need
- * to see the leaked OSXSAVE, but guest userspace is given
- * architectural behaviour, to reflect the guest kernels
- * intentions.
+ * to see the leaked OSXSAVE via the enlightened path, but
+ * guest userspace and the native is given architectural
+ * behaviour.
+ *
+ * Emulated vs Faulted CPUID is distinguised based on whether a
+ * #UD or #GP is currently being serviced.
*/
/* OSXSAVE cleared by pv_featureset. Fast-forward CR4 back in. */
- if ( (guest_kernel_mode(curr, regs) &&
- (read_cr4() & X86_CR4_OSXSAVE)) ||
- (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) )
+ if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) ||
+ (regs->entry_vector == TRAP_invalid_op &&
+ guest_kernel_mode(curr, regs) &&
+ (read_cr4() & X86_CR4_OSXSAVE)) )
c |= cpufeat_mask(X86_FEATURE_OSXSAVE);
/*

View File

@ -0,0 +1,80 @@
# Commit 33b23e5ab319a6bf9bfd38c4d9268fa6d9d072c6
# Date 2016-09-01 11:41:05 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/levelling: Pass a vcpu rather than a domain to ctxt_switch_levelling()
A subsequent change needs to special-case OSXSAVE handling, which is per-vcpu
rather than per-domain.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -203,9 +203,10 @@ static void __init noinline probe_maskin
* used to context switch to the default host state (by the cpu bringup-code,
* crash path, etc).
*/
-static void amd_ctxt_switch_levelling(const struct domain *nextd)
+static void amd_ctxt_switch_levelling(const struct vcpu *next)
{
struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+ const struct domain *nextd = next ? next->domain : NULL;
const struct cpuidmasks *masks =
(nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -90,11 +90,11 @@ static const struct cpu_dev default_cpu
};
static const struct cpu_dev *this_cpu = &default_cpu;
-static void default_ctxt_switch_levelling(const struct domain *nextd)
+static void default_ctxt_switch_levelling(const struct vcpu *next)
{
/* Nop */
}
-void (* __read_mostly ctxt_switch_levelling)(const struct domain *nextd) =
+void (* __read_mostly ctxt_switch_levelling)(const struct vcpu *next) =
default_ctxt_switch_levelling;
bool_t opt_cpu_info;
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -151,9 +151,10 @@ static void __init probe_masking_msrs(vo
* used to context switch to the default host state (by the cpu bringup-code,
* crash path, etc).
*/
-static void intel_ctxt_switch_levelling(const struct domain *nextd)
+static void intel_ctxt_switch_levelling(const struct vcpu *next)
{
struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+ const struct domain *nextd = next ? next->domain : NULL;
const struct cpuidmasks *masks;
if (cpu_has_cpuid_faulting) {
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2107,7 +2107,7 @@ void context_switch(struct vcpu *prev, s
load_segments(next);
}
- ctxt_switch_levelling(nextd);
+ ctxt_switch_levelling(next);
}
context_saved(prev);
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -211,7 +211,7 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 cpu_data[];
#define current_cpu_data cpu_data[smp_processor_id()]
-extern void (*ctxt_switch_levelling)(const struct domain *nextd);
+extern void (*ctxt_switch_levelling)(const struct vcpu *next);
extern u64 host_pat;
extern bool_t opt_cpu_info;

View File

@ -0,0 +1,164 @@
# Commit 08e7738ec3644350fbac0325085baac6b3c7cd11
# Date 2016-09-01 11:41:07 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/levelling: Provide architectural OSXSAVE handling to masked native CPUID
Contrary to c/s b2507fe7 "x86/domctl: Update PV domain cpumasks when setting
cpuid policy", Intel CPUID masks are applied after fast forwarding hardware
state, rather than before. (All behaviour in this regard appears completely
undocumented by both Intel and AMD).
Therefore, a set bit in the MSR causes hardware to be fast-forwarded, while a
clear bit forces the guests view to 0, even if Xen's CR4.OSXSAVE is actually
set.
This allows Xen to provide an architectural view of a guest kernels
CR4.OSXSAVE setting to any native CPUID instruction issused by guest kernel or
userspace, even when masking is used.
The masking value defaults to 1 (if the guest has XSAVE available) to cause
fast-forwarding to occur for the HVM and idle vcpus.
When setting the MSRs, a PV guest kernel's choice of OXSAVE is taken into
account, and clobbered from the MSR if not set. This causes the
fast-forwarding of Xen's CR4 state not to happen.
As a side effect however, levelling potentially need updating on all PV CR4
changes.
Reported-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
# Commit 1461504ce3c414fc5dc717ce16f039d0742b455a
# Date 2016-09-02 08:12:29 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/levelling: fix breakage on older Intel boxes from c/s 08e7738
cpufeat_mask() yields an unsigned integer constant. As a result, taking its
complement causes zero extention rather than sign extention.
The result is that, when a guest OS has OXSAVE disabled, all features in 1d
are hidden from native CPUID. Amongst other things, this causes the early
code in Linux to find no LAPIC, but for everything to appear fine later when
userspace is up and running.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Tested-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -211,6 +211,24 @@ static void amd_ctxt_switch_levelling(co
(nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
+ if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
+ uint64_t val = masks->_1cd;
+
+ /*
+ * OSXSAVE defaults to 1, which causes fast-forwarding of
+ * Xen's real setting. Clobber it if disabled by the guest
+ * kernel.
+ */
+ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
+ !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
+ val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32);
+
+ if (unlikely(these_masks->_1cd != val)) {
+ wrmsr_amd(MSR_K8_FEATURE_MASK, val);
+ these_masks->_1cd = val;
+ }
+ }
+
#define LAZY(cap, msr, field) \
({ \
if (unlikely(these_masks->field != masks->field) && \
@@ -221,7 +239,6 @@ static void amd_ctxt_switch_levelling(co
} \
})
- LAZY(LCAP_1cd, MSR_K8_FEATURE_MASK, _1cd);
LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK, e1cd);
LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0);
LAZY(LCAP_6c, MSR_AMD_THRM_FEATURE_MASK, _6c);
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -182,6 +182,24 @@ static void intel_ctxt_switch_levelling(
masks = (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
+ if (msr_basic) {
+ uint64_t val = masks->_1cd;
+
+ /*
+ * OSXSAVE defaults to 1, which causes fast-forwarding of
+ * Xen's real setting. Clobber it if disabled by the guest
+ * kernel.
+ */
+ if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
+ !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
+ val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE);
+
+ if (unlikely(these_masks->_1cd != val)) {
+ wrmsrl(msr_basic, val);
+ these_masks->_1cd = val;
+ }
+ }
+
#define LAZY(msr, field) \
({ \
if (unlikely(these_masks->field != masks->field) && \
@@ -192,7 +210,6 @@ static void intel_ctxt_switch_levelling(
} \
})
- LAZY(msr_basic, _1cd);
LAZY(msr_ext, e1cd);
LAZY(msr_xsave, Da1);
@@ -218,6 +235,11 @@ static void __init noinline intel_init_l
ecx &= opt_cpuid_mask_ecx;
edx &= opt_cpuid_mask_edx;
+ /* Fast-forward bits - Must be set. */
+ if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
+ ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
+ edx |= cpufeat_mask(X86_FEATURE_APIC);
+
cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx;
}
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -110,10 +110,18 @@ static void update_domain_cpuid_info(str
case X86_VENDOR_INTEL:
/*
* Intel masking MSRs are documented as AND masks.
- * Experimentally, they are applied before OSXSAVE and APIC
+ * Experimentally, they are applied after OSXSAVE and APIC
* are fast-forwarded from real hardware state.
*/
mask &= ((uint64_t)edx << 32) | ecx;
+
+ if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
+ ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
+ else
+ ecx = 0;
+ edx = cpufeat_mask(X86_FEATURE_APIC);
+
+ mask |= ((uint64_t)edx << 32) | ecx;
break;
case X86_VENDOR_AMD:
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2696,6 +2696,7 @@ static int emulate_privileged_op(struct
case 4: /* Write CR4 */
v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg);
write_cr4(pv_guest_cr4_to_real_cr4(v));
+ ctxt_switch_levelling(v);
break;
default:

View File

@ -0,0 +1,48 @@
# Commit ee1cc4bfdca84d526805c4c72302c026f5e9cd94
# Date 2016-09-01 15:23:46 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/32on64: misc adjustments to call gate emulation
- There's no 32-bit displacement in 16-bit addressing mode.
- It is wrong to ASSERT() anything on parts of an instruction fetched
from guest memory.
- The two scaling bits of a SIB byte don't affect whether there is a
scaled index register or not.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3176,7 +3176,7 @@ static void emulate_gate_op(struct cpu_u
sib = insn_fetch(u8, base, eip, limit);
modrm = (modrm & ~7) | (sib & 7);
- if ( (sib >>= 3) != 4 )
+ if ( ((sib >>= 3) & 7) != 4 )
opnd_off = *(unsigned long *)
decode_register(sib & 7, regs, 0);
opnd_off <<= sib >> 3;
@@ -3236,7 +3236,10 @@ static void emulate_gate_op(struct cpu_u
opnd_off += insn_fetch(s8, base, eip, limit);
break;
case 0x80:
- opnd_off += insn_fetch(s32, base, eip, limit);
+ if ( ad_bytes > 2 )
+ opnd_off += insn_fetch(s32, base, eip, limit);
+ else
+ opnd_off += insn_fetch(s16, base, eip, limit);
break;
}
if ( ad_bytes == 4 )
@@ -3273,8 +3276,7 @@ static void emulate_gate_op(struct cpu_u
#define ad_default ad_bytes
opnd_sel = insn_fetch(u16, base, opnd_off, limit);
#undef ad_default
- ASSERT((opnd_sel & ~3) == regs->error_code);
- if ( dpl < (opnd_sel & 3) )
+ if ( (opnd_sel & ~3) != regs->error_code || dpl < (opnd_sel & 3) )
{
do_guest_trap(TRAP_gp_fault, regs, 1);
return;

View File

@ -0,0 +1,27 @@
References: bsc#989679
Subject: libxl: fix libxl_device_usbdev_list()
From: Juergen Gross jgross@suse.com Fri Sep 2 10:16:14 2016 +0200
Date: Fri Sep 2 09:54:42 2016 +0100:
Git: 74157a2f9886b55cd45714e58c80035bfe3e080c
Commit 03814de1d2ecdabedabceb8e728d934a632a43b9 ("libxl: Do not trust
frontend for vusb") introduced an error in libxl_device_usbdev_list().
Fix it.
Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Index: xen-4.7.0-testing/tools/libxl/libxl_pvusb.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl_pvusb.c
+++ xen-4.7.0-testing/tools/libxl/libxl_pvusb.c
@@ -732,7 +732,7 @@ libxl_device_usbdev_list(libxl_ctx *ctx,
*num = 0;
libxl_vusbs_path = GCSPRINTF("%s/device/vusb",
- libxl__xs_libxl_path(gc, !domid));
+ libxl__xs_libxl_path(gc, domid));
usbctrls = libxl__xs_directory(gc, XBT_NULL, libxl_vusbs_path, &nc);
for (i = 0; i < nc; i++) {

View File

@ -0,0 +1,146 @@
References: bsc#991934
# Commit 9109bf55084398c4547b8956906410c158eb9a17
# Date 2016-09-02 14:17:55 +0200
# Author Dario Faggioli <dario.faggioli@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
credit1: fix a race when picking initial pCPU for a vCPU
In the Credit1 hunk of 9f358ddd69463 ("xen: Have
schedulers revise initial placement") csched_cpu_pick()
is called without taking the runqueue lock of the
(temporary) pCPU that the vCPU has been assigned to
(e.g., in XEN_DOMCTL_max_vcpus).
However, although 'hidden' in the IS_RUNQ_IDLE() macro,
that function does access the runq (for doing load
balancing calculations). Two scenarios are possible:
1) we are on cpu X, and IS_RUNQ_IDLE() peeks at cpu's
X own runq;
2) we are on cpu X, but IS_RUNQ_IDLE() peeks at some
other cpu's runq.
Scenario 2) absolutely requies that the appropriate
runq lock is taken. Scenario 1) works even without
taking the cpu's own runq lock. That is actually what
happens when when _csched_pick_cpu() is called from
csched_vcpu_acct() (in turn, called by csched_tick()).
Races have been observed and reported (by both XenServer
own testing and OSSTest [1]), in the form of
IS_RUNQ_IDLE() falling over LIST_POISON, because we're
not currently holding the proper lock, in
csched_vcpu_insert(), when scenario 1) occurs.
However, for better robustness, from now on we always
ask for the proper runq lock to be held when calling
IS_RUNQ_IDLE() (which is also becoming a static inline
function instead of macro).
In order to comply with that, we take the lock around
the call to _csched_cpu_pick() in csched_vcpu_acct().
[1] https://lists.xen.org/archives/html/xen-devel/2016-08/msg02144.html
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -84,9 +84,6 @@
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
-/* Is the first element of _cpu's runq its idle vcpu? */
-#define IS_RUNQ_IDLE(_cpu) (list_empty(RUNQ(_cpu)) || \
- is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu))
/*
@@ -248,6 +245,18 @@ __runq_elem(struct list_head *elem)
return list_entry(elem, struct csched_vcpu, runq_elem);
}
+/* Is the first element of cpu's runq (if any) cpu's idle vcpu? */
+static inline bool_t is_runq_idle(unsigned int cpu)
+{
+ /*
+ * We're peeking at cpu's runq, we must hold the proper lock.
+ */
+ ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
+
+ return list_empty(RUNQ(cpu)) ||
+ is_idle_vcpu(__runq_elem(RUNQ(cpu)->next)->vcpu);
+}
+
static inline void
__runq_insert(struct csched_vcpu *svc)
{
@@ -767,7 +776,7 @@ _csched_cpu_pick(const struct scheduler
* runnable vcpu on cpu, we add cpu to the idlers.
*/
cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers);
- if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) )
+ if ( vc->processor == cpu && is_runq_idle(cpu) )
__cpumask_set_cpu(cpu, &idlers);
cpumask_and(&cpus, &cpus, &idlers);
@@ -947,21 +956,33 @@ csched_vcpu_acct(struct csched_private *
/*
* Put this VCPU and domain back on the active list if it was
* idling.
- *
- * If it's been active a while, check if we'd be better off
- * migrating it to run elsewhere (see multi-core and multi-thread
- * support in csched_cpu_pick()).
*/
if ( list_empty(&svc->active_vcpu_elem) )
{
__csched_vcpu_acct_start(prv, svc);
}
- else if ( _csched_cpu_pick(ops, current, 0) != cpu )
+ else
{
- SCHED_VCPU_STAT_CRANK(svc, migrate_r);
- SCHED_STAT_CRANK(migrate_running);
- set_bit(_VPF_migrating, &current->pause_flags);
- cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+ unsigned int new_cpu;
+ unsigned long flags;
+ spinlock_t *lock = vcpu_schedule_lock_irqsave(current, &flags);
+
+ /*
+ * If it's been active a while, check if we'd be better off
+ * migrating it to run elsewhere (see multi-core and multi-thread
+ * support in csched_cpu_pick()).
+ */
+ new_cpu = _csched_cpu_pick(ops, current, 0);
+
+ vcpu_schedule_unlock_irqrestore(lock, flags, current);
+
+ if ( new_cpu != cpu )
+ {
+ SCHED_VCPU_STAT_CRANK(svc, migrate_r);
+ SCHED_STAT_CRANK(migrate_running);
+ set_bit(_VPF_migrating, &current->pause_flags);
+ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+ }
}
}
@@ -994,9 +1015,13 @@ csched_vcpu_insert(const struct schedule
BUG_ON( is_idle_vcpu(vc) );
- /* This is safe because vc isn't yet being scheduled */
+ /* csched_cpu_pick() looks in vc->processor's runq, so we need the lock. */
+ lock = vcpu_schedule_lock_irq(vc);
+
vc->processor = csched_cpu_pick(ops, vc);
+ spin_unlock_irq(lock);
+
lock = vcpu_schedule_lock_irq(vc);
if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )

View File

@ -0,0 +1,25 @@
# Commit f8f185dc4359a1cd8e7896dfbcacb54b473436c8
# Date 2016-09-02 14:18:52 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: correct PT_NOTE file position
Program and section headers disagreed about the file offset at which
the build ID note lives.
Reported-by: Sylvain Munaut <s.munaut@whatever-company.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/boot/mkelf32.c
+++ b/xen/arch/x86/boot/mkelf32.c
@@ -394,7 +394,7 @@ int main(int argc, char **argv)
note_phdr.p_paddr = note_base;
note_phdr.p_filesz = note_sz;
note_phdr.p_memsz = note_sz;
- note_phdr.p_offset = offset;
+ note_phdr.p_offset = RAW_OFFSET + offset;
/* Tack on the .note\0 */
out_shdr[2].sh_size += sizeof(out_shstrtab_extra);

View File

@ -0,0 +1,146 @@
# Commit 68eb1a4d92be58e26bd11d02b8e0317bd56294ac
# Date 2016-09-07 12:34:43 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VMX: correct feature checks for MPX and XSAVES
Their VMCS fields aren't tied to the respective base CPU feature flags
but instead to VMX specific ones.
Note that while the VMCS GUEST_BNDCFGS field exists if either of the
two respective features is available, MPX continues to get exposed to
guests only with both features present.
Also add the so far missing handling of
- GUEST_BNDCFGS in construct_vmcs()
- MSR_IA32_BNDCFGS in vmx_msr_{read,write}_intercept()
and mirror the extra correctness checks during MSR write to
vmx_load_msr().
Reported-by: "Rockosov, Dmitry" <dmitry.rockosov@intel.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: "Rockosov, Dmitry" <dmitry.rockosov@intel.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -168,8 +168,7 @@ static void __init calculate_hvm_feature
*/
if ( cpu_has_vmx )
{
- if ( !(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) ||
- !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS) )
+ if ( !cpu_has_vmx_mpx )
__clear_bit(X86_FEATURE_MPX, hvm_featureset);
if ( !cpu_has_vmx_xsaves )
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1281,6 +1281,8 @@ static int construct_vmcs(struct vcpu *v
__vmwrite(HOST_PAT, host_pat);
__vmwrite(GUEST_PAT, guest_pat);
}
+ if ( cpu_has_vmx_mpx )
+ __vmwrite(GUEST_BNDCFGS, 0);
if ( cpu_has_vmx_xsaves )
__vmwrite(XSS_EXIT_BITMAP, 0);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -786,14 +786,15 @@ static int vmx_load_vmcs_ctxt(struct vcp
static unsigned int __init vmx_init_msr(void)
{
- return !!cpu_has_mpx + !!cpu_has_xsaves;
+ return (cpu_has_mpx && cpu_has_vmx_mpx) +
+ (cpu_has_xsaves && cpu_has_vmx_xsaves);
}
static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
{
vmx_vmcs_enter(v);
- if ( cpu_has_mpx )
+ if ( cpu_has_mpx && cpu_has_vmx_mpx )
{
__vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val);
if ( ctxt->msr[ctxt->count].val )
@@ -802,7 +803,7 @@ static void vmx_save_msr(struct vcpu *v,
vmx_vmcs_exit(v);
- if ( cpu_has_xsaves )
+ if ( cpu_has_xsaves && cpu_has_vmx_xsaves )
{
ctxt->msr[ctxt->count].val = v->arch.hvm_vcpu.msr_xss;
if ( ctxt->msr[ctxt->count].val )
@@ -822,13 +823,15 @@ static int vmx_load_msr(struct vcpu *v,
switch ( ctxt->msr[i].index )
{
case MSR_IA32_BNDCFGS:
- if ( cpu_has_mpx )
+ if ( cpu_has_mpx && cpu_has_vmx_mpx &&
+ is_canonical_address(ctxt->msr[i].val) &&
+ !(ctxt->msr[i].val & IA32_BNDCFGS_RESERVED) )
__vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val);
else if ( ctxt->msr[i].val )
err = -ENXIO;
break;
case MSR_IA32_XSS:
- if ( cpu_has_xsaves )
+ if ( cpu_has_xsaves && cpu_has_vmx_xsaves )
v->arch.hvm_vcpu.msr_xss = ctxt->msr[i].val;
else
err = -ENXIO;
@@ -2640,6 +2643,11 @@ static int vmx_msr_read_intercept(unsign
case MSR_IA32_DEBUGCTLMSR:
__vmread(GUEST_IA32_DEBUGCTL, msr_content);
break;
+ case MSR_IA32_BNDCFGS:
+ if ( !cpu_has_mpx || !cpu_has_vmx_mpx )
+ goto gp_fault;
+ __vmread(GUEST_BNDCFGS, msr_content);
+ break;
case IA32_FEATURE_CONTROL_MSR:
case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
if ( !nvmx_msr_read_intercept(msr, msr_content) )
@@ -2866,6 +2874,13 @@ static int vmx_msr_write_intercept(unsig
break;
}
+ case MSR_IA32_BNDCFGS:
+ if ( !cpu_has_mpx || !cpu_has_vmx_mpx ||
+ !is_canonical_address(msr_content) ||
+ (msr_content & IA32_BNDCFGS_RESERVED) )
+ goto gp_fault;
+ __vmwrite(GUEST_BNDCFGS, msr_content);
+ break;
case IA32_FEATURE_CONTROL_MSR:
case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS:
if ( !nvmx_msr_write_intercept(msr, msr_content) )
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -375,6 +375,9 @@ extern u64 vmx_ept_vpid_cap;
(vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS)
#define cpu_has_vmx_pml \
(vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML)
+#define cpu_has_vmx_mpx \
+ ((vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) && \
+ (vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS))
#define cpu_has_vmx_xsaves \
(vmx_secondary_exec_control & SECONDARY_EXEC_XSAVES)
#define cpu_has_vmx_tsc_scaling \
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -56,7 +56,10 @@
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
-#define MSR_IA32_BNDCFGS 0x00000D90
+#define MSR_IA32_BNDCFGS 0x00000d90
+#define IA32_BNDCFGS_ENABLE 0x00000001
+#define IA32_BNDCFGS_PRESERVE 0x00000002
+#define IA32_BNDCFGS_RESERVED 0x00000ffc
#define MSR_IA32_XSS 0x00000da0

View File

@ -0,0 +1,33 @@
References: bsc#995785 CVE-2016-7092 XSA-185
# Commit c844d637d92a75854ea5c8d4e5ca34302a9f623c
# Date 2016-09-08 14:14:53 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/32on64: don't allow recursive page tables from L3
L3 entries are special in PAE mode, and hence can't reasonably be used
for setting up recursive (and hence linear) page table mappings. Since
abuse is possible when the guest in fact gets run on 4-level page
tables, this needs to be excluded explicitly.
This is XSA-185 / CVE-2016-7092.
Reported-by: Jérémie Boutoille <jboutoille@ext.quarkslab.com>
Reported-by: "栾尚聪(好风)" <shangcong.lsc@alibaba-inc.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1123,7 +1123,9 @@ get_page_from_l3e(
rc = get_page_and_type_from_pagenr(
l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1);
- if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
+ if ( unlikely(rc == -EINVAL) &&
+ !is_pv_32bit_domain(d) &&
+ get_l3_linear_pagetable(l3e, pfn, d) )
rc = 0;
return rc;

View File

@ -0,0 +1,67 @@
References: bsc#995789 CVE-2016-7093 XSA-186
# Commit e9575f980df81aeb0e5b6139f485fd6f7bb7f5b6
# Date 2016-09-08 14:15:53 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/emulate: Correct boundary interactions of emulated instructions
This reverts most of c/s 0640ffb6 "x86emul: fix rIP handling".
Experimentally, in long mode processors will execute an instruction stream
which crosses the 64bit -1 -> 0 virtual boundary, whether the instruction
boundary is aligned on the virtual boundary, or is misaligned.
In compatibility mode, Intel processors will execute an instruction stream
which crosses the 32bit -1 -> 0 virtual boundary, while AMD processors raise a
segmentation fault. Xen's segmentation behaviour matches AMD.
For 16bit code, hardware does not ever truncated %ip. %eip is always used and
behaves normally as a 32bit register, including in 16bit protected mode
segments, as well as in Real and Unreal mode.
This is XSA-186 / CVE-2016-7093.
Reported-by: Brian Marcotte <marcotte@panix.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1538,10 +1538,6 @@ x86_emulate(
#endif
}
- /* Truncate rIP to def_ad_bytes (2 or 4) if necessary. */
- if ( def_ad_bytes < sizeof(_regs.eip) )
- _regs.eip &= (1UL << (def_ad_bytes * 8)) - 1;
-
/* Prefix bytes. */
for ( ; ; )
{
@@ -3843,21 +3839,11 @@ x86_emulate(
/* Commit shadow register state. */
_regs.eflags &= ~EFLG_RF;
- switch ( __builtin_expect(def_ad_bytes, sizeof(_regs.eip)) )
- {
- uint16_t ip;
- case 2:
- ip = _regs.eip;
- _regs.eip = ctxt->regs->eip;
- *(uint16_t *)&_regs.eip = ip;
- break;
-#ifdef __x86_64__
- case 4:
- _regs.rip = _regs._eip;
- break;
-#endif
- }
+ /* Zero the upper 32 bits of %rip if not in long mode. */
+ if ( def_ad_bytes < sizeof(_regs.eip) )
+ _regs.eip = (uint32_t)_regs.eip;
+
*ctxt->regs = _regs;
done:

View File

@ -0,0 +1,47 @@
References: bsc#995792 CVE-2016-7094 XSA-187
# Commit a9f3b3bad17d91e2067fc00d51b0302349570d08
# Date 2016-09-08 14:16:26 +0200
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/shadow: Avoid overflowing sh_ctxt->seg_reg[]
hvm_get_seg_reg() does not perform a range check on its input segment, calls
hvm_get_segment_register() and writes straight into sh_ctxt->seg_reg[].
x86_seg_none is outside the bounds of sh_ctxt->seg_reg[], and will hit a BUG()
in {vmx,svm}_get_segment_register().
HVM guests running with shadow paging can end up performing a virtual to
linear translation with x86_seg_none. This is used for addresses which are
already linear. However, none of this is a legitimate pagetable update, so
fail the emulation in such a case.
This is XSA-187 / CVE-2016-7094.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Tim Deegan <tim@xen.org>
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -140,9 +140,18 @@ static int hvm_translate_linear_addr(
struct sh_emulate_ctxt *sh_ctxt,
unsigned long *paddr)
{
- struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt);
+ struct segment_register *reg;
int okay;
+ /*
+ * Can arrive here with non-user segments. However, no such cirucmstance
+ * is part of a legitimate pagetable update, so fail the emulation.
+ */
+ if ( !is_x86_user_segment(seg) )
+ return X86EMUL_UNHANDLEABLE;
+
+ reg = hvm_get_seg_reg(seg, sh_ctxt);
+
okay = hvm_virtual_to_linear_addr(
seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);

View File

@ -0,0 +1,32 @@
References: bsc#995789
# Commit 7b5cee79dad24e7006059667b02bd7de685d8ee5
# Date 2016-09-08 16:39:46 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
hvm/fep: Allow testing of instructions crossing the -1 -> 0 virtual boundary
The Force Emulation Prefix is named to follow its PV counterpart for cpuid or
rdtsc, but isn't really an instruction prefix. It behaves as a break-out into
Xen, with the purpose of emulating the next instruction in the current state.
It is important to be able to test legal situations which occur in real
hardware, including instruction which cross certain boundaries, and
instructions starting at 0.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3905,6 +3905,10 @@ void hvm_ud_intercept(struct cpu_user_re
{
regs->eip += sizeof(sig);
regs->eflags &= ~X86_EFLAGS_RF;
+
+ /* Zero the upper 32 bits of %rip if not in long mode. */
+ if ( !(hvm_long_mode_enabled(cur) && cs.attr.fields.l) )
+ regs->eip = regs->_eip;
}
}

View File

@ -0,0 +1,203 @@
References: bsc#995792
# Commit 4fa0105d95be6e7145a1f6fd1036ccd43976228c
# Date 2016-09-08 16:39:46 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/segment: Bounds check accesses to emulation ctxt->seg_reg[]
HVM HAP codepaths have space for all segment registers in the seg_reg[]
cache (with x86_seg_none still risking an array overrun), while the shadow
codepaths only have space for the user segments.
Range check the input segment of *_get_seg_reg() against the size of the array
used to cache the results, to avoid overruns in the case that the callers
don't filter their input suitably.
Subsume the is_x86_user_segment(seg) checks from the shadow code, which were
an incomplete attempt at range checking, and are now superceeded. Make
hvm_get_seg_reg() static, as it is not used outside of shadow/common.c
No functional change, but far easier to reason that no overflow is possible.
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tim Deegan <tim@xen.org>
Acked-by: Jan Beulich <jbeulich@suse.com>
# Commit 4c47c47938ea24c73d9459f9f0b6923513772b5d
# Date 2016-09-09 15:31:01 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
xen/x86: Fix build with clang following c/s 4fa0105
https://travis-ci.org/xen-project/xen/jobs/158494027#L2344
Clang complains:
emulate.c:2016:14: error: comparison of unsigned enum expression < 0
is always false [-Werror,-Wtautological-compare]
if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
~~~ ^ ~
Clang is wrong to raise a warning like this. The signed-ness of an enum is
implementation defined in C, and robust code must not assume the choices made
by the compiler.
In this case, dropping the < 0 check creates a latent bug which would result
in an array underflow when compiled with a compiler which chooses a signed
enum.
Work around the bug by explicitly pulling seg into an unsigned integer, and
only perform the upper bounds check.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -534,6 +534,8 @@ static int hvmemul_virtual_to_linear(
*reps = min_t(unsigned long, *reps, max_reps);
reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+ if ( IS_ERR(reg) )
+ return -PTR_ERR(reg);
if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) )
{
@@ -1369,6 +1371,10 @@ static int hvmemul_read_segment(
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+
+ if ( IS_ERR(sreg) )
+ return -PTR_ERR(sreg);
+
memcpy(reg, sreg, sizeof(struct segment_register));
return X86EMUL_OKAY;
}
@@ -1382,6 +1388,9 @@ static int hvmemul_write_segment(
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+ if ( IS_ERR(sreg) )
+ return -PTR_ERR(sreg);
+
memcpy(sreg, reg, sizeof(struct segment_register));
__set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
@@ -1934,13 +1943,22 @@ void hvm_emulate_writeback(
}
}
+/*
+ * Callers which pass a known in-range x86_segment can rely on the return
+ * pointer being valid. Other callers must explicitly check for errors.
+ */
struct segment_register *hvmemul_get_seg_reg(
enum x86_segment seg,
struct hvm_emulate_ctxt *hvmemul_ctxt)
{
- if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) )
- hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
- return &hvmemul_ctxt->seg_reg[seg];
+ unsigned int idx = seg;
+
+ if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
+ return ERR_PTR(-X86EMUL_UNHANDLEABLE);
+
+ if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) )
+ hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]);
+ return &hvmemul_ctxt->seg_reg[idx];
}
static const char *guest_x86_mode_to_str(int mode)
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -123,12 +123,22 @@ __initcall(shadow_audit_key_init);
/* x86 emulator support for the shadow code
*/
-struct segment_register *hvm_get_seg_reg(
+/*
+ * Callers which pass a known in-range x86_segment can rely on the return
+ * pointer being valid. Other callers must explicitly check for errors.
+ */
+static struct segment_register *hvm_get_seg_reg(
enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
{
- struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
- if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) )
- hvm_get_segment_register(current, seg, seg_reg);
+ unsigned int idx = seg;
+ struct segment_register *seg_reg;
+
+ if ( idx >= ARRAY_SIZE(sh_ctxt->seg_reg) )
+ return ERR_PTR(-X86EMUL_UNHANDLEABLE);
+
+ seg_reg = &sh_ctxt->seg_reg[idx];
+ if ( !__test_and_set_bit(idx, &sh_ctxt->valid_seg_regs) )
+ hvm_get_segment_register(current, idx, seg_reg);
return seg_reg;
}
@@ -143,14 +153,9 @@ static int hvm_translate_linear_addr(
struct segment_register *reg;
int okay;
- /*
- * Can arrive here with non-user segments. However, no such cirucmstance
- * is part of a legitimate pagetable update, so fail the emulation.
- */
- if ( !is_x86_user_segment(seg) )
- return X86EMUL_UNHANDLEABLE;
-
reg = hvm_get_seg_reg(seg, sh_ctxt);
+ if ( IS_ERR(reg) )
+ return -PTR_ERR(reg);
okay = hvm_virtual_to_linear_addr(
seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);
@@ -253,9 +258,6 @@ hvm_emulate_write(enum x86_segment seg,
unsigned long addr;
int rc;
- if ( !is_x86_user_segment(seg) )
- return X86EMUL_UNHANDLEABLE;
-
/* How many emulations could we save if we unshadowed on stack writes? */
if ( seg == x86_seg_ss )
perfc_incr(shadow_fault_emulate_stack);
@@ -283,7 +285,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg
unsigned long addr, old, new;
int rc;
- if ( !is_x86_user_segment(seg) || bytes > sizeof(long) )
+ if ( bytes > sizeof(long) )
return X86EMUL_UNHANDLEABLE;
rc = hvm_translate_linear_addr(
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -740,8 +740,6 @@ const struct x86_emulate_ops *shadow_ini
struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
void shadow_continue_emulation(
struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
-struct segment_register *hvm_get_seg_reg(
- enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt);
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
/**************************************************************************/
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -13,6 +13,7 @@
#define __ASM_X86_HVM_EMULATE_H__
#include <xen/config.h>
+#include <xen/err.h>
#include <asm/hvm/hvm.h>
#include <asm/x86_emulate.h>

View File

@ -0,0 +1,49 @@
References: bsc#964644 CVE-2013-4533
Subject: pxa2xx: avoid buffer overrun on incoming migration
From: Michael S. Tsirkin mst@redhat.com Thu Apr 3 19:51:57 2014 +0300
Date: Mon May 5 22:15:02 2014 +0200:
Git: caa881abe0e01f9931125a0977ec33c5343e4aa7
CVE-2013-4533
s->rx_level is read from the wire and used to determine how many bytes
to subsequently read into s->rx_fifo[]. If s->rx_level exceeds the
length of s->rx_fifo[] the buffer can be overrun with arbitrary data
from the wire.
Fix this by validating rx_level against the size of s->rx_fifo.
Cc: Don Koch <dkoch@verizon.com>
Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Don Koch <dkoch@verizon.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pxa2xx.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pxa2xx.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pxa2xx.c
@@ -847,7 +847,7 @@ static void pxa2xx_ssp_save(QEMUFile *f,
static int pxa2xx_ssp_load(QEMUFile *f, void *opaque, int version_id)
{
struct pxa2xx_ssp_s *s = (struct pxa2xx_ssp_s *) opaque;
- int i;
+ int i, v;
s->enable = qemu_get_be32(f);
@@ -861,7 +861,11 @@ static int pxa2xx_ssp_load(QEMUFile *f,
qemu_get_8s(f, &s->ssrsa);
qemu_get_8s(f, &s->ssacd);
- s->rx_level = qemu_get_byte(f);
+ v = qemu_get_byte(f);
+ if (v < 0 || v > ARRAY_SIZE(s->rx_fifo)) {
+ return -EINVAL;
+ }
+ s->rx_level = v;
s->rx_start = 0;
for (i = 0; i < s->rx_level; i ++)
s->rx_fifo[i] = qemu_get_byte(f);

View File

@ -0,0 +1,56 @@
References: bsc#964452 CVE-2013-4534
Subject: openpic: avoid buffer overrun on incoming migration
From: Michael Roth mdroth@linux.vnet.ibm.com Mon Apr 28 16:08:17 2014 +0300
Date: Mon May 5 22:15:03 2014 +0200:
Git: 73d963c0a75cb99c6aaa3f6f25e427aa0b35a02e
CVE-2013-4534
opp->nb_cpus is read from the wire and used to determine how many
IRQDest elements to read into opp->dst[]. If the value exceeds the
length of opp->dst[], MAX_CPU, opp->dst[] can be overrun with arbitrary
data from the wire.
Fix this by failing migration if the value read from the wire exceeds
MAX_CPU.
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/openpic.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/openpic.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/openpic.c
@@ -36,6 +36,7 @@
#include "ppc_mac.h"
#include "pci.h"
#include "openpic.h"
+#include "qemu/qerror.h"
//#define DEBUG_OPENPIC
@@ -1132,7 +1133,7 @@ static void openpic_load_IRQ_queue(QEMUF
static int openpic_load(QEMUFile* f, void *opaque, int version_id)
{
openpic_t *opp = (openpic_t *)opaque;
- unsigned int i;
+ unsigned int i, nb_cpus;
if (version_id != 1)
return -EINVAL;
@@ -1153,7 +1154,11 @@ static int openpic_load(QEMUFile* f, voi
qemu_get_sbe32s(f, &opp->src[i].pending);
}
- qemu_get_sbe32s(f, &opp->nb_cpus);
+ qemu_get_be32s(f, &nb_cpus);
+ if (opp->nb_cpus != nb_cpus) {
+ return -EINVAL;
+ }
+ assert(nb_cpus > 0 && nb_cpus <= MAX_CPU);
for (i = 0; i < opp->nb_cpus; i++) {
qemu_get_be32s(f, &opp->dst[i].tfrr);

View File

@ -0,0 +1,39 @@
References: bsc#962642 CVE-2013-4537
Subject: ssi-sd: fix buffer overrun on invalid state load
From: Michael S. Tsirkin mst@redhat.com Mon Apr 28 16:08:14 2014 +0300
Date: Mon May 5 22:15:03 2014 +0200:
Git: a9c380db3b8c6af19546a68145c8d1438a09c92b
CVE-2013-4537
s->arglen is taken from wire and used as idx
in ssi_sd_transfer().
Validate it before access.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ssi-sd.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ssi-sd.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ssi-sd.c
@@ -221,8 +221,17 @@ static int ssi_sd_load(QEMUFile *f, void
for (i = 0; i < 5; i++)
s->response[i] = qemu_get_be32(f);
s->arglen = qemu_get_be32(f);
+ if (s->mode == SSI_SD_CMDARG &&
+ (s->arglen < 0 || s->arglen >= ARRAY_SIZE(s->cmdarg))) {
+ return -EINVAL;
+ }
s->response_pos = qemu_get_be32(f);
s->stopping = qemu_get_be32(f);
+ if (s->mode == SSI_SD_RESPONSE &&
+ (s->response_pos < 0 || s->response_pos >= ARRAY_SIZE(s->response) ||
+ (!s->stopping && s->arglen > ARRAY_SIZE(s->response)))) {
+ return -EINVAL;
+ }
return 0;
}

View File

@ -0,0 +1,28 @@
References: bsc#962335 CVE-2013-4538
s->cmd_len used as index in ssd0323_transfer() to store 32-bit field.
Possible this field might then be supplied by guest to overwrite a
return addr somewhere. Same for row/col fields, which are indicies into
framebuffer array.
To fix validate after load.
Signed-off-by: Michael S. Tsirkin <address@hidden>
---
hw/display/ssd0323.c | 3 +++
1 file changed, 3 insertions(+)
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ssd0323.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ssd0323.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ssd0323.c
@@ -304,6 +304,9 @@ static int ssd0323_load(QEMUFile *f, voi
return -EINVAL;
s->cmd_len = qemu_get_be32(f);
+ if (s->cmd_len < 0 || s->cmd_len > ARRAY_SIZE(s->cmd_data)) {
+ return -EINVAL;
+ }
s->cmd = qemu_get_be32(f);
for (i = 0; i < 8; i++)
s->cmd_data[i] = qemu_get_be32(f);

View File

@ -0,0 +1,43 @@
Subject: tsc210x: fix buffer overrun on invalid state load
From: Michael S. Tsirkin mst@redhat.com Thu Apr 3 19:52:09 2014 +0300
Date: Mon May 5 22:15:02 2014 +0200:
Git: 5193be3be35f29a35bc465036cd64ad60d43385f
CVE-2013-4539
s->precision, nextprecision, function and nextfunction
come from wire and are used
as idx into resolution[] in TSC_CUT_RESOLUTION.
Validate after load to avoid buffer overrun.
Cc: Andreas Färber <afaerber@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/tsc210x.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/tsc210x.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/tsc210x.c
@@ -1077,9 +1077,21 @@ static int tsc210x_load(QEMUFile *f, voi
s->enabled = qemu_get_byte(f);
s->host_mode = qemu_get_byte(f);
s->function = qemu_get_byte(f);
+ if (s->function < 0 || s->function >= ARRAY_SIZE(mode_regs)) {
+ return -EINVAL;
+ }
s->nextfunction = qemu_get_byte(f);
+ if (s->nextfunction < 0 || s->nextfunction >= ARRAY_SIZE(mode_regs)) {
+ return -EINVAL;
+ }
s->precision = qemu_get_byte(f);
+ if (s->precision < 0 || s->precision >= ARRAY_SIZE(resolution)) {
+ return -EINVAL;
+ }
s->nextprecision = qemu_get_byte(f);
+ if (s->nextprecision < 0 || s->nextprecision >= ARRAY_SIZE(resolution)) {
+ return -EINVAL;
+ }
s->filter = qemu_get_byte(f);
s->pin_func = qemu_get_byte(f);
s->ref = qemu_get_byte(f);

View File

@ -0,0 +1,38 @@
References: bsc#964925
Subject: qcow1: Validate L2 table size (CVE-2014-0222)
From: Kevin Wolf kwolf@redhat.com Thu May 15 16:10:11 2014 +0200
Date: Mon May 19 11:36:49 2014 +0200:
Git: 42eb58179b3b215bb507da3262b682b8a2ec10b5
Too large L2 table sizes cause unbounded allocations. Images actually
created by qemu-img only have 512 byte or 4k L2 tables.
To keep things consistent with cluster sizes, allow ranges between 512
bytes and 64k (in fact, down to 1 entry = 8 bytes is technically
working, but L2 table sizes smaller than a cluster don't make a lot of
sense).
This also means that the number of bytes on the virtual disk that are
described by the same L2 table is limited to at most 8k * 64k or 2^29,
preventively avoiding any integer overflows.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
Index: xen-4.6.0-testing/tools/blktap2/drivers/block-qcow.c
===================================================================
--- xen-4.6.0-testing.orig/tools/blktap2/drivers/block-qcow.c
+++ xen-4.6.0-testing/tools/blktap2/drivers/block-qcow.c
@@ -909,6 +909,10 @@ int tdqcow_open (td_driver_t *driver, co
if (header.size <= 1 || header.cluster_bits < 9)
goto fail;
+ /* l2_bits specifies number of entries; storing a uint64_t in each entry,
+ * so bytes = num_entries << 3. */
+ if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3)
+ goto fail;
if (header.crypt_method > QCOW_CRYPT_AES)
goto fail;
s->crypt_method_header = header.crypt_method;

View File

@ -0,0 +1,38 @@
References: bsc#877642
Subject: qcow1: Validate L2 table size (CVE-2014-0222)
From: Kevin Wolf kwolf@redhat.com Thu May 15 16:10:11 2014 +0200
Date: Mon May 19 11:36:49 2014 +0200:
Git: 42eb58179b3b215bb507da3262b682b8a2ec10b5
Too large L2 table sizes cause unbounded allocations. Images actually
created by qemu-img only have 512 byte or 4k L2 tables.
To keep things consistent with cluster sizes, allow ranges between 512
bytes and 64k (in fact, down to 1 entry = 8 bytes is technically
working, but L2 table sizes smaller than a cluster don't make a lot of
sense).
This also means that the number of bytes on the virtual disk that are
described by the same L2 table is limited to at most 8k * 64k or 2^29,
preventively avoiding any integer overflows.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/block-qcow.c
===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/block-qcow.c
+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/block-qcow.c
@@ -126,6 +126,10 @@ static int qcow_open(BlockDriverState *b
goto fail;
if (header.size <= 1 || header.cluster_bits < 9)
goto fail;
+ /* l2_bits specifies number of entries; storing a uint64_t in each entry,
+ * so bytes = num_entries << 3. */
+ if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3)
+ goto fail;
if (header.crypt_method > QCOW_CRYPT_AES)
goto fail;
s->crypt_method_header = header.crypt_method;

View File

@ -0,0 +1,36 @@
Subject: slirp: udp: fix NULL pointer dereference because of uninitialized socket
From: Petr Matousek pmatouse@redhat.com Thu Sep 18 08:35:37 2014 +0200
Date: Tue Sep 23 19:15:05 2014 +0100:
Git: 01f7cecf0037997cb0e58ec0d56bf9b5a6f7cb2a
When guest sends udp packet with source port and source addr 0,
uninitialized socket is picked up when looking for matching and already
created udp sockets, and later passed to sosendto() where NULL pointer
dereference is hit during so->slirp->vnetwork_mask.s_addr access.
Fix this by checking that the socket is not just a socket stub.
This is CVE-2014-3640.
Signed-off-by: Petr Matousek <pmatouse@redhat.com>
Reported-by: Xavier Mehrenberger <xavier.mehrenberger@airbus.com>
Reported-by: Stephane Duverger <stephane.duverger@eads.net>
Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
Message-id: 20140918063537.GX9321@dhcp-25-225.brq.redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/slirp/udp.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/slirp/udp.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/slirp/udp.c
@@ -168,7 +168,7 @@ udp_input(m, iphlen)
* Locate pcb for datagram.
*/
so = udp_last_so;
- if (so->so_lport != uh->uh_sport ||
+ if (so == &slirp->udb || so->so_lport != uh->uh_sport ||
so->so_laddr.s_addr != ip->ip_src.s_addr) {
struct socket *tmp;

View File

@ -0,0 +1,39 @@
References: bsc#932267
Subject: slirp: use less predictable directory name in /tmp for smb config (CVE-2015-4037)
From: Michael Tokarev mjt@tls.msk.ru Thu May 28 14:12:26 2015 +0300
Date: Wed Jun 3 14:21:45 2015 +0300:
Git: 8b8f1c7e9ddb2e88a144638f6527bf70e32343e3
In this version I used mkdtemp(3) which is:
_BSD_SOURCE
|| /* Since glibc 2.10: */
(_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700)
(POSIX.1-2008), so should be available on systems we care about.
While at it, reset the resulting directory name within smb structure
on error so cleanup function wont try to remove directory which we
failed to create.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.c
===================================================================
--- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/net.c
+++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/net.c
@@ -624,9 +624,10 @@ void net_slirp_smb(const char *exported_
}
/* XXX: better tmp dir construction */
- snprintf(smb_dir, sizeof(smb_dir), "/tmp/qemu-smb.%d", getpid());
- if (mkdir(smb_dir, 0700) < 0) {
+ snprintf(smb_dir, sizeof(smb_dir), "/tmp/qemu-smb.XXXXXX");
+ if (!mkdtemp(smb_dir)) {
fprintf(stderr, "qemu: could not create samba server dir '%s'\n", smb_dir);
+ smb_dir[0] = 0;
exit(1);
}
snprintf(smb_conf, sizeof(smb_conf), "%s/%s", smb_dir, "smb.conf");

View File

@ -0,0 +1,54 @@
Subject: ATAPI: STARTSTOPUNIT only eject/load media if powercondition is 0
From: Ronnie Sahlberg ronniesahlberg@gmail.com Tue Jul 31 11:28:26 2012 +1000
Date: Wed Sep 12 15:50:09 2012 +0200:
Git: ce560dcf20c14194db5ef3b9fc1ea592d4e68109
The START STOP UNIT command will only eject/load media if
power condition is zero.
If power condition is !0 then LOEJ and START will be ignored.
From MMC (sbc contains similar wordings too)
The Power Conditions field requests the block device to be placed
in the power condition defined in
Table 558. If this field has a value other than 0h then the Start
and LoEj bits shall be ignored.
Signed-off-by: Ronnie Sahlberg <ronniesahlberg@gmail.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
From aa851d30acfbb9580098ac1dc82885530cb8b3c1 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 3 Jun 2015 14:17:46 +0200
Subject: [PATCH 2/3] ide/atapi: Fix START STOP UNIT command completion
The command must be completed on all code paths. START STOP UNIT with
pwrcnd set should succeed without doing anything.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/ide/atapi.c | 1 +
1 file changed, 1 insertion(+)
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ide.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/ide.c
@@ -2098,9 +2098,16 @@ static void ide_atapi_cmd(IDEState *s)
break;
case GPCMD_START_STOP_UNIT:
{
- int start, eject;
+ int start, eject, pwrcnd;
start = packet[4] & 1;
eject = (packet[4] >> 1) & 1;
+ pwrcnd = buf[4] & 0xf0;
+
+ if (pwrcnd) {
+ /* eject/load only happens for power condition == 0 */
+ ide_atapi_cmd_ok(s);
+ return;
+ }
if (eject && !start) {
/* eject the disk */

View File

@ -0,0 +1,30 @@
References: bsc#964947 CVE-2015-5278
Subject: net: avoid infinite loop when receiving packets(CVE-2015-5278)
From: P J P pjp@fedoraproject.org Tue Sep 15 16:46:59 2015 +0530
Date: Tue Sep 15 12:51:14 2015 +0100:
Git: 737d2b3c41d59eb8f94ab7eb419b957938f24943
Ne2000 NIC uses ring buffer of NE2000_MEM_SIZE(49152)
bytes to process network packets. While receiving packets
via ne2000_receive() routine, a local 'index' variable
could exceed the ring buffer size, leading to an infinite
loop situation.
Reported-by: Qinghao Tang <luodalongde@gmail.com>
Signed-off-by: P J P <pjp@fedoraproject.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
@@ -328,7 +328,7 @@ static void ne2000_receive(void *opaque,
if (index <= s->stop)
avail = s->stop - index;
else
- avail = 0;
+ break;
len = size;
if (len > avail)
len = avail;

View File

@ -0,0 +1,31 @@
References: bsc#944697
From: P J P <address@hidden>
While processing transmit descriptors, it could lead to an infinite
loop if 'bytes' was to become zero; Add a check to avoid it.
[The guest can force 'bytes' to 0 by setting the hdr_len and mss
descriptor fields to 0.
--Stefan]
Signed-off-by: P J P <address@hidden>
Signed-off-by: Stefan Hajnoczi <address@hidden>
---
hw/net/e1000.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -470,7 +470,8 @@ process_tx_desc(E1000State *s, struct e1
memmove(tp->data, tp->header, hdr);
tp->size = hdr;
}
- } while (split_size -= bytes);
+ split_size -= bytes;
+ } while (bytes && split_size);
} else if (!tp->tse && tp->cptse) {
// context descriptor TSE is not set, while data descriptor TSE is set
DBGOUT(TXERR, "TCP segmentaion Error\n");

View File

@ -0,0 +1,30 @@
References: bsc#962360 CVE-2015-7512
Backends could provide a packet whose length is greater than buffer
size. Check for this and truncate the packet to avoid rx buffer
overflow in this case.
Cc: Prasad J Pandit <address@hidden>
Cc: address@hidden
Signed-off-by: Jason Wang <address@hidden>
---
hw/net/pcnet.c | 6 ++++++
1 file changed, 6 insertions(+)
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pcnet.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pcnet.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pcnet.c
@@ -1133,6 +1133,12 @@ static void pcnet_receive(void *opaque,
int pktcount = 0;
if (!s->looptest) {
+ if (size > 4092) {
+#ifdef PCNET_DEBUG_RMD
+ fprintf(stderr, "pcnet: truncates rx packet.\n");
+#endif
+ size = 4092;
+ }
memcpy(src, buf, size);
/* no need to compute the CRC */
src[size] = 0;

View File

@ -0,0 +1,59 @@
References: bsc#956832 CVE-2015-8345
Subject: eepro100: Prevent two endless loops
From: Stefan Weil sw@weilnetz.de Fri Nov 20 08:42:33 2015 +0100
Date: Fri Nov 27 10:39:55 2015 +0800:
Git: 00837731d254908a841d69298a4f9f077babaf24
http://lists.nongnu.org/archive/html/qemu-devel/2015-11/msg04592.html
shows an example how an endless loop in function action_command can
be achieved.
During my code review, I noticed a 2nd case which can result in an
endless loop.
Reported-by: Qinghao Tang <luodalongde@gmail.com>
Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/eepro100.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/eepro100.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/eepro100.c
@@ -657,6 +657,10 @@ static void eepro100_cu_command(EEPRO100
{
eepro100_tx_t tx;
uint32_t cb_address;
+ /* The loop below won't stop if it gets special handcrafted data.
+ Therefore we limit the number of iterations. */
+ unsigned max_loop_count = 16;
+
switch (val) {
case CU_NOP:
/* No operation. */
@@ -685,6 +689,13 @@ static void eepro100_cu_command(EEPRO100
bool bit_nc = ((command & 0x0010) != 0);
//~ bool bit_sf = ((command & 0x0008) != 0);
uint16_t cmd = command & 0x0007;
+
+ if (max_loop_count-- == 0) {
+ /* Prevent an endless loop. (see goto next_command) */
+ logout("loop in %s:%u\n", __FILE__, __LINE__);
+ break;
+ }
+
s->cu_offset = le32_to_cpu(tx.link);
switch (cmd) {
case CmdNOp:
@@ -726,6 +737,11 @@ static void eepro100_cu_command(EEPRO100
uint32_t tx_buffer_address = ldl_phys(tbd_address);
uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
//~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
+ if (tx_buffer_size == 0) {
+ /* Prevent an endless loop. */
+ logout("loop in %s:%u\n", __FILE__, __LINE__);
+ break;
+ }
tbd_address += 8;
logout
("TBD (simplified mode): buffer address 0x%08x, size 0x%04x\n",

View File

@ -0,0 +1,25 @@
References: bsc#958493 CVE-2015-8504
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1644,15 +1644,15 @@ static void set_pixel_format(VncState *v
}
vs->clientds = vs->serverds;
- vs->clientds.pf.rmax = red_max;
+ vs->clientds.pf.rmax = red_max ? red_max : 0xFF;
count_bits(vs->clientds.pf.rbits, red_max);
vs->clientds.pf.rshift = red_shift;
vs->clientds.pf.rmask = red_max << red_shift;
- vs->clientds.pf.gmax = green_max;
+ vs->clientds.pf.gmax = green_max ? green_max : 0xFF;
count_bits(vs->clientds.pf.gbits, green_max);
vs->clientds.pf.gshift = green_shift;
vs->clientds.pf.gmask = green_max << green_shift;
- vs->clientds.pf.bmax = blue_max;
+ vs->clientds.pf.bmax = blue_max ? blue_max : 0xFF;
count_bits(vs->clientds.pf.bbits, blue_max);
vs->clientds.pf.bshift = blue_shift;
vs->clientds.pf.bmask = blue_max << blue_shift;

View File

@ -0,0 +1,47 @@
Reference: bsc#961692 CVE-2016-1714
When processing firmware configurations, an OOB r/w access occurs
if 's->cur_entry' is set to be invalid(FW_CFG_INVALID=0xffff).
Add a check to validate 's->cur_entry' to avoid such access.
Reported-by: Donghai Zdh <address@hidden>
Signed-off-by: Prasad J Pandit <address@hidden>
---
hw/nvram/fw_cfg.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
Updated as per review in
-> https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg00398.html
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/fw_cfg.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/fw_cfg.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/hw/fw_cfg.c
@@ -54,11 +54,15 @@ typedef struct _FWCfgState {
static void fw_cfg_write(FWCfgState *s, uint8_t value)
{
int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
- FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
+ FWCfgEntry *e = (s->cur_entry == FW_CFG_INVALID) ? NULL :
+ &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
FW_CFG_DPRINTF("write %d\n", value);
- if (s->cur_entry & FW_CFG_WRITE_CHANNEL && s->cur_offset < e->len) {
+ if (s->cur_entry & FW_CFG_WRITE_CHANNEL
+ && e != NULL
+ && e->callback
+ && s->cur_offset < e->len) {
e->data[s->cur_offset++] = value;
if (s->cur_offset == e->len) {
e->callback(e->callback_opaque, e->data);
@@ -88,7 +92,8 @@ static int fw_cfg_select(FWCfgState *s,
static uint8_t fw_cfg_read(FWCfgState *s)
{
int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
- FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
+ FWCfgEntry *e = (s->cur_entry == FW_CFG_INVALID) ? NULL :
+ &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
uint8_t ret;
if (s->cur_entry == FW_CFG_INVALID || !e->data || s->cur_offset >= e->len)

View File

@ -0,0 +1,94 @@
The start_xmit() and e1000_receive_iov() functions implement DMA transfers
iterating over a set of descriptors that the guest's e1000 driver
prepares:
- the TDLEN and RDLEN registers store the total size of the descriptor
area,
- while the TDH and RDH registers store the offset (in whole tx / rx
descriptors) into the area where the transfer is supposed to start.
Each time a descriptor is processed, the TDH and RDH register is bumped
(as appropriate for the transfer direction).
QEMU already contains logic to deal with bogus transfers submitted by the
guest:
- Normally, the transmit case wants to increase TDH from its initial value
to TDT. (TDT is allowed to be numerically smaller than the initial TDH
value; wrapping at or above TDLEN bytes to zero is normal.) The failsafe
that QEMU currently has here is a check against reaching the original
TDH value again -- a complete wraparound, which should never happen.
- In the receive case RDH is increased from its initial value until
"total_size" bytes have been received; preferably in a single step, or
in "s->rxbuf_size" byte steps, if the latter is smaller. However, null
RX descriptors are skipped without receiving data, while RDH is
incremented just the same. QEMU tries to prevent an infinite loop
(processing only null RX descriptors) by detecting whether RDH assumes
its original value during the loop. (Again, wrapping from RDLEN to 0 is
normal.)
What both directions miss is that the guest could program TDLEN and RDLEN
so low, and the initial TDH and RDH so high, that these registers will
immediately be truncated to zero, and then never reassume their initial
values in the loop -- a full wraparound will never occur.
The condition that expresses this is:
xdh_start >= s->mac_reg[XDLEN] / sizeof(desc)
i.e., TDH or RDH start out after the last whole rx or tx descriptor that
fits into the TDLEN or RDLEN sized area.
This condition could be checked before we enter the loops, but
pci_dma_read() / pci_dma_write() knows how to fill in buffers safely for
bogus DMA addresses, so we just extend the existing failsafes with the
above condition.
Cc: "Michael S. Tsirkin" <address@hidden>
Cc: Petr Matousek <address@hidden>
Cc: Stefano Stabellini <address@hidden>
Cc: Prasad Pandit <address@hidden>
Cc: Michael Roth <address@hidden>
Cc: Jason Wang <address@hidden>
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1296044
Signed-off-by: Laszlo Ersek <address@hidden>
Reviewed-by: Jason Wang <address@hidden>
---
Notes:
Regarding the public posting: we made an honest effort to vet this
vulnerability, and the impact seems low -- no host side reads/writes,
"just" a DoS (infinite loop). We decided the patch could be posted
publicly, for the usual review process. Jason and Prasad checked the
patch in the internal discussion already, but comments, improvements
etc. are clearly welcome. The CVE request is underway. Thanks.
hw/net/e1000.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c
@@ -538,7 +538,8 @@ start_xmit(E1000State *s)
* bogus values to TDT/TDLEN.
* there's nothing too intelligent we could do about this.
*/
- if (s->mac_reg[TDH] == tdh_start) {
+ if (s->mac_reg[TDH] == tdh_start ||
+ tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
break;
@@ -728,7 +729,8 @@ e1000_receive(void *opaque, const uint8_
s->mac_reg[RDH] = 0;
s->check_rxov = 1;
/* see comment in start_xmit; same here */
- if (s->mac_reg[RDH] == rdh_start) {
+ if (s->mac_reg[RDH] == rdh_start ||
+ rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
set_ics(s, 0, E1000_ICS_RXO);

View File

@ -0,0 +1,54 @@
References: bsc#967101 CVE-2016-2391
From d1b07becc481e09225cfe905ec357807ae07f095 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <address@hidden>
Date: Tue, 16 Feb 2016 15:15:04 +0100
Subject: [PATCH] ohci timer fix
Signed-off-by: Gerd Hoffmann <address@hidden>
---
hw/usb/hcd-ohci.c | 31 +++++--------------------------
1 file changed, 5 insertions(+), 26 deletions(-)
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-ohci.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/usb-ohci.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-ohci.c
@@ -1139,16 +1139,6 @@ static void ohci_frame_boundary(void *op
*/
static int ohci_bus_start(OHCIState *ohci)
{
- ohci->eof_timer = qemu_new_timer(vm_clock,
- ohci_frame_boundary,
- ohci);
-
- if (ohci->eof_timer == NULL) {
- fprintf(stderr, "usb-ohci: %s: qemu_new_timer failed\n", ohci->name);
- /* TODO: Signal unrecoverable error */
- return 0;
- }
-
dprintf("usb-ohci: %s: USB Operational\n", ohci->name);
ohci_sof(ohci);
@@ -1159,9 +1149,7 @@ static int ohci_bus_start(OHCIState *ohc
/* Stop sending SOF tokens on the bus */
static void ohci_bus_stop(OHCIState *ohci)
{
- if (ohci->eof_timer)
- qemu_del_timer(ohci->eof_timer);
- ohci->eof_timer = NULL;
+ qemu_del_timer(ohci->eof_timer);
}
/* Sets a flag in a port status register but only set it if the port is
@@ -1654,6 +1642,9 @@ static void usb_ohci_init(OHCIState *ohc
ohci->async_td = 0;
qemu_register_reset(ohci_reset, ohci);
ohci_reset(ohci);
+
+ ohci->eof_timer = qemu_new_timer(vm_clock,
+ ohci_frame_boundary, ohci);
}
typedef struct {

View File

@ -0,0 +1,34 @@
References: bsc#969351 CVE-2016-2841
From: Prasad J Pandit <address@hidden>
Ne2000 NIC uses ring buffer of NE2000_MEM_SIZE(49152)
bytes to process network packets. Registers PSTART & PSTOP
define ring buffer size & location. Setting these registers
to invalid values could lead to infinite loop or OOB r/w
access issues. Add check to avoid it.
Reported-by: Yang Hongke <address@hidden>
Signed-off-by: Prasad J Pandit <address@hidden>
---
hw/net/ne2000.c | 4 ++++
1 file changed, 4 insertions(+)
Update per review:
-> https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg05522.html
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/hw/ne2000.c
@@ -202,6 +202,10 @@ static int ne2000_buffer_full(NE2000Stat
{
int avail, index, boundary;
+ if (s->stop <= s->start) {
+ return 1;
+ }
+
index = s->curpag << 8;
boundary = s->boundary << 8;
if (index < boundary)

View File

@ -0,0 +1,33 @@
References: bsc#980716 CVE-2016-4439
The 53C9X Fast SCSI Controller(FSC) comes with an internal 16-byte
FIFO buffer. It is used to handle command and data transfer. While
writing to this command buffer 's->cmdbuf[TI_BUFSZ=16]', a check
was missing to validate input length. Add check to avoid OOB write
access.
Fixes CVE-2016-4439
Reported-by: Li Qiang <address@hidden>
Signed-off-by: Prasad J Pandit <address@hidden>
---
hw/scsi/esp.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
Index: xen-4.4.4-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
===================================================================
--- xen-4.4.4-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/esp.c
+++ xen-4.4.4-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
@@ -471,7 +471,11 @@ static void esp_mem_writeb(void *opaque,
break;
case ESP_FIFO:
if (s->do_cmd) {
- s->cmdbuf[s->cmdlen++] = val & 0xff;
+ if (s->cmdlen < TI_BUFSZ) {
+ s->cmdbuf[s->cmdlen++] = val & 0xff;
+ } else {
+ ESP_ERROR("fifo overrun\n");
+ }
} else if (s->ti_size == TI_BUFSZ - 1) {
ESP_ERROR("fifo overrun\n");
} else {

View File

@ -0,0 +1,56 @@
References: bsc#980724 CVE-2016-4441
The 53C9X Fast SCSI Controller(FSC) comes with an internal 16-byte
FIFO buffer. It is used to handle command and data transfer.
Routine get_cmd() uses DMA to read scsi commands into this buffer.
Add check to validate DMA length against buffer size to avoid any
overrun.
Fixes CVE-2016-4441
Reported-by: Li Qiang <address@hidden>
Signed-off-by: Prasad J Pandit <address@hidden>
---
hw/scsi/esp.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/esp.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
@@ -162,7 +162,7 @@ static void esp_lower_irq(ESPState *s)
}
}
-static uint32_t get_cmd(ESPState *s, uint8_t *buf)
+static uint32_t get_cmd(ESPState *s, uint8_t *buf, uint8_t buflen)
{
uint32_t dmalen;
int target;
@@ -170,6 +170,9 @@ static uint32_t get_cmd(ESPState *s, uin
target = s->wregs[ESP_WBUSID] & BUSID_DID;
if (s->dma) {
dmalen = s->rregs[ESP_TCLO] | (s->rregs[ESP_TCMID] << 8);
+ if (dmalen > buflen) {
+ return 0;
+ }
s->dma_memory_read(s->dma_opaque, buf, dmalen);
} else {
dmalen = s->ti_size;
@@ -231,14 +234,14 @@ static void handle_satn(ESPState *s)
uint8_t buf[32];
int len;
- len = get_cmd(s, buf);
+ len = get_cmd(s, buf, sizeof(buf));
if (len)
do_cmd(s, buf);
}
static void handle_satn_stop(ESPState *s)
{
- s->cmdlen = get_cmd(s, s->cmdbuf);
+ s->cmdlen = get_cmd(s, s->cmdbuf, sizeof(s->cmdbuf));
if (s->cmdlen) {
DPRINTF("Set ATN & Stop: cmdlen %d\n", s->cmdlen);
s->do_cmd = 1;

View File

@ -0,0 +1,37 @@
References: bsc#982960 CVE-2016-5238
The 53C9X Fast SCSI Controller(FSC) comes with an internal 16-byte
FIFO buffer. It is used to handle command and data transfer.
Routine get_cmd() in non-DMA mode, uses 'ti_size' to read scsi
command into a buffer. Add check to validate command length against
buffer size to avoid any overrun.
Reported-by: Li Qiang <address@hidden>
Signed-off-by: Prasad J Pandit <address@hidden>
---
hw/scsi/esp.c | 3 +++
1 file changed, 3 insertions(+)
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/esp.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
@@ -176,6 +176,9 @@ static uint32_t get_cmd(ESPState *s, uin
s->dma_memory_read(s->dma_opaque, buf, dmalen);
} else {
dmalen = s->ti_size;
+ if (dmalen > TI_BUFSZ) {
+ return 0;
+ }
memcpy(buf, s->ti_buf, dmalen);
buf[0] = 0;
}
@@ -265,7 +268,7 @@ static void write_response(ESPState *s)
} else {
s->ti_size = 2;
s->ti_rptr = 0;
- s->ti_wptr = 0;
+ s->ti_wptr = 2;
s->rregs[ESP_RFLAGS] = 2;
}
esp_raise_irq(s);

View File

@ -0,0 +1,65 @@
References: bsc#983984 CVE-2016-5338
The 53C9X Fast SCSI Controller(FSC) comes with internal 16-byte
FIFO buffers. One is used to handle commands and other is for
information transfer. Three control variables 'ti_rptr',
'ti_wptr' and 'ti_size' are used to control r/w access to the
information transfer buffer ti_buf[TI_BUFSZ=16]. In that,
'ti_rptr' is used as read index, where read occurs.
'ti_wptr' is a write index, where write would occur.
'ti_size' indicates total bytes to be read from the buffer.
While reading/writing to this buffer, index could exceed its
size. Add check to avoid OOB r/w access.
Reported-by: Huawei PSIRT <address@hidden>
Reported-by: Li Qiang <address@hidden>
Signed-off-by: Prasad J Pandit <address@hidden>
---
hw/scsi/esp.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
Update as per:
-> https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg01326.html
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/esp.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
@@ -435,18 +435,17 @@ static uint32_t esp_mem_readb(void *opaq
DPRINTF("read reg[%d]: 0x%2.2x\n", saddr, s->rregs[saddr]);
switch (saddr) {
case ESP_FIFO:
- if (s->ti_size > 0) {
+ if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
+ /* Data out. */
+ ESP_ERROR("PIO data read not implemented\n");
+ s->rregs[ESP_FIFO] = 0;
+ esp_raise_irq(s);
+ } else if (s->ti_rptr < s->ti_wptr) {
s->ti_size--;
- if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
- /* Data out. */
- ESP_ERROR("PIO data read not implemented\n");
- s->rregs[ESP_FIFO] = 0;
- } else {
- s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
- }
+ s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
esp_raise_irq(s);
}
- if (s->ti_size == 0) {
+ if (s->ti_rptr == s->ti_wptr) {
s->ti_rptr = 0;
s->ti_wptr = 0;
}
@@ -482,7 +481,7 @@ static void esp_mem_writeb(void *opaque,
} else {
ESP_ERROR("fifo overrun\n");
}
- } else if (s->ti_size == TI_BUFSZ - 1) {
+ } else if (s->ti_wptr == TI_BUFSZ - 1) {
ESP_ERROR("fifo overrun\n");
} else {
s->ti_size++;

View File

@ -0,0 +1,73 @@
References: bsc#990843 CVE-2016-6351
Subject: scsi: esp: make cmdbuf big enough for maximum CDB size
From: Prasad J Pandit pjp@fedoraproject.org Thu Jun 16 00:22:35 2016 +0200
Date: Thu Jun 16 18:39:05 2016 +0200:
Git: 926cde5f3e4d2504ed161ed0cb771ac7cad6fd11
While doing DMA read into ESP command buffer 's->cmdbuf', it could
write past the 's->cmdbuf' area, if it was transferring more than 16
bytes. Increase the command buffer size to 32, which is maximum when
's->do_cmd' is set, and add a check on 'len' to avoid OOB access.
Reported-by: Li Qiang <liqiang6-s@360.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/esp.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/esp.c
@@ -26,6 +26,8 @@
#include "scsi-disk.h"
#include "scsi.h"
+#include <assert.h>
+
/* debug ESP card */
//#define DEBUG_ESP
@@ -49,6 +51,7 @@ do { printf("ESP ERROR: %s: " fmt, __fun
#define ESP_REGS 16
#define TI_BUFSZ 16
+#define ESP_CMDBUF_SZ 32
typedef struct ESPState ESPState;
@@ -64,7 +67,7 @@ struct ESPState {
uint32_t dma;
SCSIDevice *scsi_dev[ESP_MAX_DEVS];
SCSIDevice *current_dev;
- uint8_t cmdbuf[TI_BUFSZ];
+ uint8_t cmdbuf[ESP_CMDBUF_SZ];
uint32_t cmdlen;
uint32_t do_cmd;
@@ -294,6 +297,8 @@ static void esp_do_dma(ESPState *s)
len = s->dma_left;
if (s->do_cmd) {
DPRINTF("command len %d + %d\n", s->cmdlen, len);
+ assert (s->cmdlen <= sizeof(s->cmdbuf) &&
+ len <= sizeof(s->cmdbuf) - s->cmdlen);
s->dma_memory_read(s->dma_opaque, &s->cmdbuf[s->cmdlen], len);
s->ti_size = 0;
s->cmdlen = 0;
@@ -382,7 +387,7 @@ static void handle_ti(ESPState *s)
s->dma_counter = dmalen;
if (s->do_cmd)
- minlen = (dmalen < 32) ? dmalen : 32;
+ minlen = (dmalen < ESP_CMDBUF_SZ) ? dmalen : ESP_CMDBUF_SZ;
else if (s->ti_size < 0)
minlen = (dmalen < -s->ti_size) ? dmalen : -s->ti_size;
else
@@ -476,7 +481,7 @@ static void esp_mem_writeb(void *opaque,
break;
case ESP_FIFO:
if (s->do_cmd) {
- if (s->cmdlen < TI_BUFSZ) {
+ if (s->cmdlen < ESP_CMDBUF_SZ) {
s->cmdbuf[s->cmdlen++] = val & 0xff;
} else {
ESP_ERROR("fifo overrun\n");

View File

@ -527,64 +527,6 @@ the amount of memory assigned to dom0. Reboot the host for these changes to
take effect.
Adjusting LIBXL_HOTPLUG_TIMEOUT at runtime
------------------------------------------
A domU with a large amount of disks may run into the hardcoded
LIBXL_HOTPLUG_TIMEOUT limit, which is 40 seconds. This happens if the
preparation for each disk takes an unexpected large amount of time. Then
the sum of all configured disks and the individual preparation time will
be larger than 40 seconds. The hotplug script which does the preparation
takes a lock before doing the actual preparation. Since the hotplug
scripts for each disk are spawned at nearly the same time, each one has
to wait for the lock. Due to this contention, the total execution time
of a script can easily exceed the timeout. In this case libxl will
terminate the script because it has to assume an error condition.
Example:
10 configured disks, each one takes 3 seconds within the critital
section. The total execution time will be 30 seconds, which is still
within the limit. With 5 additional configured disks, the total
execution time will be 45 seconds, which would trigger the timeout.
To handle such setup without a recompile of libxl, a special key/value
has to be created in xenstore prior domain creation. This can be done
either manually, or at system startup. A dedicated systemd service file
exists to set the required value. To enable it, run these commands:
/etc/systemd/system # systemctl enable xen-LIBXL_HOTPLUG_TIMEOUT.service
/etc/systemd/system # systemctl start xen-LIBXL_HOTPLUG_TIMEOUT.service
In case the value in this service file needs to be changed, a copy with
the exact same name must be created in the /etc/systemd/system directory:
/etc/systemd/system # cat xen-LIBXL_HOTPLUG_TIMEOUT.service
[Unit]
Description=set global LIBXL_HOTPLUG_TIMEOUT
ConditionPathExists=/proc/xen/capabilities
Requires=xenstored.service
After=xenstored.service
Requires=xen-init-dom0.service
After=xen-init-dom0.service
Before=xencommons.service
[Service]
Type=oneshot
RemainAfterExit=true
ExecStartPre=/bin/grep -q control_d /proc/xen/capabilities
ExecStart=/usr/bin/xenstore-write /libxl/suse/per-device-LIBXL_HOTPLUG_TIMEOUT 10
[Install]
WantedBy=multi-user.target
In this example the per-device value will be set to 10 seconds.
The change for libxl which handles this xenstore value will enable
additional logging if the key is found. That extra logging will show how
the execution time of each script.
Troubleshooting
---------------
First try to get Linux running on bare metal before trying with Xen.
@ -697,7 +639,7 @@ please report it back to the xen-devel list:
xen-devel@lists.xen.org
If you find issues with the packaging or setup done by SUSE, please report
it through bugzilla:
https://bugzilla.suse.com
https://bugzilla.novell.com
ENJOY!

View File

@ -0,0 +1,157 @@
From 9ca313aa0824f2d350a7a6c9b1ef6c47e0408f1d Mon Sep 17 00:00:00 2001
From: aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
Date: Sat, 23 Aug 2008 23:27:37 +0000
Subject: [PATCH] VNC: Support for ExtendedKeyEvent client message
This patch adds support for the ExtendedKeyEvent client message. This message
allows a client to send raw scan codes directly to the server. If the client
and server are using the same keymap, then it's unnecessary to use the '-k'
option with QEMU when this extension is supported.
This is extension is currently only implemented by gtk-vnc based clients
(gvncviewer, virt-manager, vinagre, etc.).
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5076 c046a42c-6fe2-441c-8c8c-71466251a162
---
vnc.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 50 insertions(+), 9 deletions(-)
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1285,35 +1285,22 @@ static void press_key_altgr_down(VncStat
}
}
-static void do_key_event(VncState *vs, int down, uint32_t sym)
+static void do_key_event(VncState *vs, int down, int keycode, int sym, int shift)
{
- int keycode;
int shift_keys = 0;
- int shift = 0;
int keypad = 0;
int altgr = 0;
int altgr_keys = 0;
if (is_graphic_console()) {
- if (sym >= 'A' && sym <= 'Z') {
- sym = sym - 'A' + 'a';
- shift = 1;
- }
- else {
+ if (!shift)
shift = keysym_is_shift(vs->kbd_layout, sym & 0xFFFF);
- }
altgr = keysym_is_altgr(vs->kbd_layout, sym & 0xFFFF);
}
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
- keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
- if (keycode == 0) {
- fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);
- return;
- }
-
/* QEMU console switch */
switch(keycode) {
case 0x2a: /* Left Shift */
@@ -1445,7 +1432,25 @@ static void do_key_event(VncState *vs, i
static void key_event(VncState *vs, int down, uint32_t sym)
{
- do_key_event(vs, down, sym);
+ int keycode;
+ int shift = 0;
+
+ if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
+ sym = sym - 'A' + 'a';
+ shift = 1;
+ }
+ keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
+ do_key_event(vs, down, keycode, sym, shift);
+}
+
+static void ext_key_event(VncState *vs, int down,
+ uint32_t sym, uint16_t keycode)
+{
+ /* if the user specifies a keyboard layout, always use it */
+ if (keyboard_layout)
+ key_event(vs, down, sym);
+ else
+ do_key_event(vs, down, keycode, sym, 0);
}
static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h)
@@ -1534,6 +1539,15 @@ static void framebuffer_update_request(V
qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock));
}
+static void send_ext_key_event_ack(VncState *vs)
+{
+ vnc_write_u8(vs, 0);
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1);
+ vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds), ds_get_height(vs->ds), -258);
+ vnc_flush(vs);
+}
+
static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
{
int i;
@@ -1562,6 +1576,9 @@ static void set_encodings(VncState *vs,
case -257:
vs->has_pointer_type_change = 1;
break;
+ case -258:
+ send_ext_key_event_ack(vs);
+ break;
case 0x574D5669:
vs->has_WMVi = 1;
default:
@@ -1790,6 +1807,24 @@ static int protocol_client_msg(VncState
client_cut_text(vs, read_u32(data, 4), (char *)(data + 8));
break;
+ case 255:
+ if (len == 1)
+ return 2;
+
+ switch (read_u8(data, 1)) {
+ case 0:
+ if (len == 2)
+ return 12;
+
+ ext_key_event(vs, read_u16(data, 2),
+ read_u32(data, 4), read_u32(data, 8));
+ break;
+ default:
+ printf("Msg: %d\n", read_u16(data, 0));
+ vnc_client_error(vs);
+ break;
+ }
+ break;
default:
printf("Msg: %d\n", data[0]);
vnc_client_error(vs);
@@ -2486,10 +2521,11 @@ void vnc_display_init(DisplayState *ds)
vs->ds = ds;
- if (!keyboard_layout)
- keyboard_layout = "en-us";
+ if (keyboard_layout)
+ vs->kbd_layout = init_keyboard_layout(keyboard_layout);
+ else
+ vs->kbd_layout = init_keyboard_layout("en-us");
- vs->kbd_layout = init_keyboard_layout(keyboard_layout);
if (!vs->kbd_layout)
exit(1);
vs->modifiers_state[0x45] = 1; /* NumLock on - on boot */

View File

@ -0,0 +1,37 @@
From 98abe3b337e69371678859c4cfd19df61aebb0d9 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Sun, 2 Feb 2014 20:42:42 +0100
Subject: aarch64: rename PSR_MODE_ELxx to match linux headers
https://bugs.launchpad.net/linaro-aarch64/+bug/1169164
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/include/public/arch-arm.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
Index: xen-4.7.0-testing/xen/include/public/arch-arm.h
===================================================================
--- xen-4.7.0-testing.orig/xen/include/public/arch-arm.h
+++ xen-4.7.0-testing/xen/include/public/arch-arm.h
@@ -362,13 +362,13 @@ typedef uint64_t xen_callback_t;
/* 64 bit modes */
#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */
-#define PSR_MODE_EL3h 0x0d
-#define PSR_MODE_EL3t 0x0c
-#define PSR_MODE_EL2h 0x09
-#define PSR_MODE_EL2t 0x08
-#define PSR_MODE_EL1h 0x05
-#define PSR_MODE_EL1t 0x04
-#define PSR_MODE_EL0t 0x00
+#define PSR_MODE_EL3h 0x0000000d
+#define PSR_MODE_EL3t 0x0000000c
+#define PSR_MODE_EL2h 0x00000009
+#define PSR_MODE_EL2t 0x00000008
+#define PSR_MODE_EL1h 0x00000005
+#define PSR_MODE_EL1t 0x00000004
+#define PSR_MODE_EL0t 0x00000000
#define PSR_GUEST32_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)
#define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h)

45
altgr_2.patch Normal file
View File

@ -0,0 +1,45 @@
When access domU from Windows VNC client, spanish keyboard altgr key
doesn't work. According to log info, we found that the keycodes passed
from vncclient to qemu vncserver have something wrong. When altgr and "2"
pressed, keycodes vncserver receives are:
ALT_R down,
CTRL_L down,
CTRL_L up,
ATL_R up,
"2" down,
"2" up,
...
Since when send "2" down, there is no altgr modifier, the char displayed
on screen will be "2" but not "@".
To solve this problem, there is another patch applied by upstream which
sends an additional altgr modifier before "2" down in the above case.
It works well when domU is windows, but on sles10 sp3 domU, sometimes it
display "@" and sometimes it still displays "2", especially when press
altgr+2 continuously.
For the sles10 sp3 domU problem, maybe because there are two many alt_r (same
keycode as altgr on "es") up and down events and the domU OS couldn't handle
it well.
To furtherly solve this problem, I write this patch, when vncserver
is "es" and receives a alt_r keysym (this is already abnormal since "es" has
no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr
keycodes up and down events and make sure the intentionally added altgr keycode can take effect.
Signed-off by Chunyan Liu (cyliu@novell.com)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1440,6 +1440,9 @@ static void key_event(VncState *vs, int
int keycode;
int shift = 0;
+ if ( sym == 0xffea && keyboard_layout && !strcmp(keyboard_layout,"es") )
+ sym = 0xffe9;
+
if (sym >= 'A' && sym <= 'Z' && is_graphic_console()) {
sym = sym - 'A' + 'a';
shift = 1;

32
bdrv_default_rwflag.patch Normal file
View File

@ -0,0 +1,32 @@
Subject: modify default read/write flag in bdrv_init.
Signed-off by Chunyan Liu <cyliu@novell.com>
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -2626,6 +2626,8 @@ int drive_init(struct drive_opt *arg, in
strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
nb_drives++;
+ bdrv_flags = BDRV_O_RDWR;
+
switch(type) {
case IF_IDE:
case IF_XEN:
@@ -2639,6 +2641,7 @@ int drive_init(struct drive_opt *arg, in
break;
case MEDIA_CDROM:
bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
+ bdrv_flags &= ~BDRV_O_RDWR;
break;
}
break;
@@ -2659,7 +2662,6 @@ int drive_init(struct drive_opt *arg, in
}
if (!file[0])
return -2;
- bdrv_flags = 0;
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
cache = 2; /* always use write-back with snapshot */

129
bdrv_open2_fix_flags.patch Normal file
View File

@ -0,0 +1,129 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block.c
@@ -350,7 +350,7 @@ int bdrv_file_open(BlockDriverState **pb
int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
{
- return bdrv_open2(bs, filename, flags, NULL);
+ return bdrv_open2(bs, filename, flags|BDRV_O_RDWR, NULL);
}
int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
@@ -419,12 +419,13 @@ int bdrv_open2(BlockDriverState *bs, con
}
bs->drv = drv;
bs->opaque = qemu_mallocz(drv->instance_size);
- /* Note: for compatibility, we open disk image files as RDWR, and
- RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
- open_flags = (flags & BDRV_O_ACCESS) | (flags & BDRV_O_CACHE_MASK);
+ open_flags = flags;
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
+ if (!(open_flags & BDRV_O_RDWR))
+ bs->read_only = 1;
+
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/usb-msd.c
@@ -551,7 +551,7 @@ USBDevice *usb_msd_init(const char *file
s = qemu_mallocz(sizeof(MSDState));
bdrv = bdrv_new("usb");
- if (bdrv_open2(bdrv, filename, 0, drv) < 0)
+ if (bdrv_open2(bdrv, filename, BDRV_O_RDWR, drv) < 0)
goto fail;
s->bs = bdrv;
*pbs = bdrv;
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-img.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-img.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-img.c
@@ -32,7 +32,7 @@
#endif
/* Default to cache=writeback as data integrity is not important for qemu-tcg. */
-#define BRDV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_O_FLAGS BDRV_O_CACHE_WB
static void QEMU_NORETURN error(const char *fmt, ...)
{
@@ -185,7 +185,7 @@ static int read_password(char *buf, int
#endif
static BlockDriverState *bdrv_new_open(const char *filename,
- const char *fmt)
+ const char *fmt, int flags)
{
BlockDriverState *bs;
BlockDriver *drv;
@@ -201,7 +201,7 @@ static BlockDriverState *bdrv_new_open(c
} else {
drv = &bdrv_raw;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, flags, drv) < 0) {
error("Could not open '%s'", filename);
}
if (bdrv_is_encrypted(bs)) {
@@ -253,7 +253,7 @@ static int img_create(int argc, char **a
size = 0;
if (base_filename) {
BlockDriverState *bs;
- bs = bdrv_new_open(base_filename, NULL);
+ bs = bdrv_new_open(base_filename, NULL, BDRV_O_RDWR);
bdrv_get_geometry(bs, &size);
size *= 512;
bdrv_delete(bs);
@@ -332,7 +332,7 @@ static int img_commit(int argc, char **a
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
ret = bdrv_commit(bs);
@@ -455,7 +455,8 @@ static int img_convert(int argc, char **
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
- bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt);
+ bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt,
+ BDRV_O_CACHE_WB|BDRV_O_RDONLY);
if (!bs[bs_i])
error("Could not open '%s'", argv[optind + bs_i]);
bdrv_get_geometry(bs[bs_i], &bs_sectors);
@@ -483,7 +484,7 @@ static int img_convert(int argc, char **
}
}
- out_bs = bdrv_new_open(out_filename, out_fmt);
+ out_bs = bdrv_new_open(out_filename, out_fmt, BDRV_O_CACHE_WB|BDRV_O_RDWR);
bs_i = 0;
bs_offset = 0;
@@ -706,7 +707,7 @@ static int img_info(int argc, char **arg
} else {
drv = NULL;
}
- if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_FLAGS|BDRV_O_RDWR, drv) < 0) {
error("Could not open '%s'", filename);
}
bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
@@ -810,7 +811,7 @@ static void img_snapshot(int argc, char
if (!bs)
error("Not enough memory");
- if (bdrv_open2(bs, filename, 0, NULL) < 0) {
+ if (bdrv_open2(bs, filename, BDRV_O_RDWR, NULL) < 0) {
error("Could not open '%s'", filename);
}

51
bdrv_open2_flags_2.patch Normal file
View File

@ -0,0 +1,51 @@
Index: xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.5.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.5.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -134,7 +134,8 @@ static void insert_media(void *opaque)
else
format = &bdrv_raw;
- bdrv_open2(bs, media_filename[i], 0, format);
+ /* Temporary BDRV_O_RDWR */
+ bdrv_open2(bs, media_filename[i], BDRV_O_RDWR, format);
#ifdef CONFIG_STUBDOM
{
char *buf, *backend, *params_path, *params;
@@ -509,7 +510,8 @@ void xenstore_parse_domain_config(int hv
}
for (i = 0; i < num; i++) {
- format = NULL; /* don't know what the format is yet */
+ flags = 0;
+ format = NULL; /* don't know what the format is yet */
/* read the backend path */
xenstore_get_backend_path(&bpath, "vbd", danger_path, hvm_domid, e_danger[i]);
if (bpath == NULL)
@@ -595,6 +597,17 @@ void xenstore_parse_domain_config(int hv
format = &bdrv_raw;
}
+ /* read the mode of the device */
+ if (pasprintf(&buf, "%s/mode", bpath) == -1)
+ continue;
+ free(mode);
+ mode = xs_read(xsh, XBT_NULL, buf, &len);
+
+ if (!strcmp(mode, "r") || !strcmp(mode, "ro"))
+ flags |= BDRV_O_RDONLY;
+ if (!strcmp(mode, "w") || !strcmp(mode, "rw"))
+ flags |= BDRV_O_RDWR;
+
#if 0
/* Phantom VBDs are disabled because the use of paths
* from guest-controlled areas in xenstore is unsafe.
@@ -662,7 +675,7 @@ void xenstore_parse_domain_config(int hv
#ifdef CONFIG_STUBDOM
if (pasprintf(&danger_buf, "%s/device/vbd/%s", danger_path, e_danger[i]) == -1)
continue;
- if (bdrv_open2(bs, danger_buf, BDRV_O_CACHE_WB /* snapshot and write-back */, &bdrv_raw) == 0) {
+ if (bdrv_open2(bs, danger_buf, flags|BDRV_O_CACHE_WB /* snapshot and write-back */, &bdrv_raw) == 0) {
if (pasprintf(&buf, "%s/params", bpath) == -1)
continue;
free(params);

42
blktap.patch Normal file
View File

@ -0,0 +1,42 @@
bug #239173
bug #242953
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -447,9 +447,9 @@ void xenstore_parse_domain_config(int hv
{
char **e_danger = NULL;
char *buf = NULL;
- char *fpath = NULL, *bpath = NULL,
+ char *fpath = NULL, *bpath = NULL, *btype = NULL,
*dev = NULL, *params = NULL, *drv = NULL;
- int i, ret;
+ int i, ret, is_tap;
unsigned int len, num, hd_index, pci_devid = 0;
BlockDriverState *bs;
BlockDriver *format;
@@ -486,6 +486,14 @@ void xenstore_parse_domain_config(int hv
e_danger[i]);
if (bpath == NULL)
continue;
+ /* check to see if type is tap or not */
+ if (pasprintf(&buf, "%s/type", bpath) == -1)
+ continue;
+ free(btype);
+ btype = xs_read(xsh, XBT_NULL, buf, &len);
+ if (btype == NULL)
+ continue;
+ is_tap = !strncmp(btype, "tap", 3);
/* read the name of the device */
if (pasprintf(&buf, "%s/dev", bpath) == -1)
continue;
@@ -760,6 +768,7 @@ void xenstore_parse_domain_config(int hv
free(mode);
free(params);
free(dev);
+ free(btype);
free(bpath);
free(buf);
free(danger_buf);

10
blktap2-no-uninit.patch Normal file
View File

@ -0,0 +1,10 @@
--- xen-4.5.0-testing/tools/blktap2/drivers/Makefile.orig 2015-04-08 11:25:54.974241326 +0200
+++ xen-4.5.0-testing/tools/blktap2/drivers/Makefile 2015-04-08 11:26:10.150411238 +0200
@@ -11,6 +11,7 @@
CFLAGS += -Werror
CFLAGS += -Wno-unused
+CFLAGS += -Wno-error=array-bounds
CFLAGS += -fno-strict-aliasing
CFLAGS += -I$(BLKTAP_ROOT)/include -I$(BLKTAP_ROOT)/drivers
CFLAGS += $(CFLAGS_libxenctrl)

View File

@ -2,34 +2,20 @@
# Usage: block-dmmd [add args | remove args]
#
# the dmmd device syntax (in xl commands/configs) is something like:
# script=block-dmmd,md;/dev/md0;md;/dev/md1;lvm;/dev/vg1/lv1
# the dmmd device syntax (in xm/xl commands/configs) is something like:
# dmmd:md;/dev/md0;md;/dev/md1;lvm;/dev/vg1/lv1
# or
# script=block-dmmd,lvm;/dev/vg1/lv1;lvm;/dev/vg1/lv2;md;/dev/md0
# dmmd:lvm;/dev/vg1/lv1;lvm;/dev/vg1/lv2;md;/dev/md0
# device pairs (type;dev) are processed in order, with the last device
# assigned to the VM
#
# Note - When using the libxl stack, the "script=block-dmmd" option
# is required. See man xl-disk-configuration(5) for more information.
#
# md devices can optionally:
# specify a config file through:
# md;/dev/md100(/var/xen/config/mdadm.conf)
# use an array name (mdadm -N option):
# md;My-MD-name;lvm;/dev/vg1/lv1
# dmmd:md;My-MD-name;lvm;/dev/vg1/lv1
#
# Completely expressive syntax should be similar to:
# "format=raw, vdev=xvdb, access=rw, script=block-dmmd, \
# target=md;/dev/md0(/etc/mdadm.conf);lvm;/dev/vg1/lv1"
#
##
# History:
# 2017-07-10, mlatimer@suse.com:
# Modification to use syslog for progress messages by ldevulder@suse.com
# 2017-06-12, mlatimer@suse.com:
# Merge LVM improvements by loic.devulder@mpsa.com
# Document libxl "script=block-dmmd" syntax in examples
# Remove xm/xend references (e.g. parsed_timeout from xend-config.sxp)
# 2016-05-27, mlatimer@suse.com:
# Merge improvements by loic.devulder@mpsa.com. Highlights include:
# - Re-write and simplification to speed up the script!
@ -65,27 +51,16 @@ typeset -rx LVCHANGE_BIN=/sbin/lvchange
typeset -rx PVSCAN_BIN=/sbin/pvscan
typeset -rx VGSCAN_BIN=/sbin/vgscan
typeset -rx VGCHANGE_BIN=/sbin/vgchange
typeset -rx CLVMD_BIN=/usr/sbin/clvmd
typeset -rx DATE_LOG="date +%F_%T.%N"
typeset -rx DATE_SEC="date +%s"
# Uncomment for debugging purposes
# exec >> /tmp/block-dmmd-$(${DATE_LOG}).log 2>&1
# echo shell-flags: $-
# We check for errors ourselves
set +e
function reload_clvm()
{
# If we are in cluster mode
if ps -e | grep -q [c]lvmd 2>/dev/null; then
# Logging message
log info "Synchronizing cLVM..."
# Synchronize cLVM
${CLVMD_BIN} -R > /dev/null 2>&1 \
|| return 1
fi
return 0
}
function run_mdadm()
{
local mdadm_cmd=$1
@ -139,26 +114,25 @@ function activate_md()
fi
# Logging message
log info "Activating MD device ${dev}..."
echo "[$(${DATE_LOG})] activate MD device ${dev}..." >&2
# Is MD device already active?
# We need to use full path name, aliase is not possible...
if [ -e ${dev_path}/${dev##*/} ]; then
${MDADM_BIN} -Q -D ${dev_path}/${dev##*/} 2>/dev/null \
if [ -e $dev_path/${dev##*/} ]; then
${MDADM_BIN} -Q -D $dev_path/${dev##*/} 2>/dev/null \
| grep -iq state.*\:.*inactive || return 0
fi
# Activate MD device
run_mdadm "-A ${mdadm_opts} ${dev} ${cfg}"
rc=$?
# A return code of 2 can indicate the array configuration was incorrect
if [[ ${rc} == 2 ]]; then
# Logging message
log info "Verifying MD device ${dev} activation..."
echo "[$(${DATE_LOG})] verifying MD device ${dev} activation..." >&2
# If the array is active, return 0, otherwise return an error
${MDADM_BIN} -Q -D ${dev_path}/${dev##*/} &>/dev/null && return 0 \
${MDADM_BIN} -Q -D $dev_path/${dev##*/} &>/dev/null && return 0 \
|| return 1
fi
@ -185,7 +159,7 @@ function deactivate_md()
fi
# Logging message
log info "Deactivating MD device ${dev}..."
echo "[$(${DATE_LOG})] deactivate MD device ${dev}..." >&2
# We need the device name only while deactivating
${MDADM_BIN} -S ${dev_path}/${dev##*/} > /dev/null 2>&1
@ -193,52 +167,46 @@ function deactivate_md()
return $?
}
function lvm_action()
function activate_lvm()
{
local action=$1
local dev=$2
local run_timeout=90
local parsed_timeout
local end_time
# If /etc/xen/xend-config.sxp exists (e.g. SLES11), use
# device-create-timeout, instead of the default setting
if [[ -f /etc/xen/xend-config.sxp ]]; then
parsed_timeout=$(grep -v "^[ \t]*#.*" /etc/xen/xend-config.sxp \
|sed -n 's/(device-create-timeout \+\([0-9]\+\))/\1/p')
if [[ ! -z $parsed_timeout ]]; then
run_timeout=$((${parsed_timeout}*9/10))
fi
fi
# First scan for PVs and VGs
# We need this for using MD device as PV
${PVSCAN_BIN} > /dev/null 2>&1
# ${VGSCAN_BIN} --mknodes > /dev/null 2>&1
# Logging message
log info "${action} LVM device ${dev}..."
echo "[$(${DATE_LOG})] activate LVM device ${dev}..." >&2
# Set end_time for the loop
(( end_time = $(${DATE_SEC}) + run_timeout ))
while true; do
# Action depends of what the user asks
if [[ ${action} == activate ]]; then
# First scan for PVs and VGs
# We need this for using MD device as PV
${PVSCAN_BIN} > /dev/null 2>&1
${LVCHANGE_BIN} -aey $1 > /dev/null 2>&1
${LVCHANGE_BIN} -aey ${dev} > /dev/null 2>&1 \
&& [[ -e ${dev} ]] \
&& return 0
elif [[ ${action} == deactivate ]]; then
${LVCHANGE_BIN} -aen ${dev} > /dev/null 2>&1 \
&& return 0
# If the LV is already deactivated we may be in an infinite loop
# So we need to test if the LV is still present
[[ -e ${dev} ]] || return 0
if [ $? -eq 0 -a -e $1 ]; then
return 0
fi
# It seems that we had a problem during lvchange
# If we are in a cluster the problem may be due to a cLVM locking bug,
# so try to reload it
reload_clvm
sleep 0.1
# If it takes too long we need to return an error
if (( $(${DATE_SEC}) >= end_time )); then
log err "Failed to ${action} $1 within ${run_timeout} seconds"
log err "Failed to activate $1 within ${run_timeout} seconds"
return 1
fi
# Briefly sleep before restarting the loop
sleep 0.1
done
# Normally we should not get here, but if this happens
@ -246,6 +214,23 @@ function lvm_action()
return 1
}
function deactivate_lvm()
{
# Logging message
echo "[$(${DATE_LOG})] deactivate LVM device ${dev}..." >&2
${LVCHANGE_BIN} -aen $1 > /dev/null 2>&1
if [ $? -eq 0 ]; then
# We may have to deactivate the VG now, but can ignore errors:
# ${VGCHANGE_BIN} -an ${1%/*} || :
# Maybe we need to cleanup the LVM cache:
# ${VGSCAN_BIN} --mknodes || :
return 0
fi
return 1
}
# Variables
typeset command=$1
typeset BP=100
@ -280,7 +265,7 @@ function activate_dmmd()
return $?
;;
"lvm")
lvm_action activate $2
activate_lvm $2
return $?
;;
esac
@ -298,7 +283,7 @@ function deactivate_dmmd()
return $?
;;
"lvm")
lvm_action deactivate $2
deactivate_lvm $2
return $?
;;
esac

View File

@ -1,138 +0,0 @@
Index: xen-4.19.0-testing/Config.mk
===================================================================
--- xen-4.19.0-testing.orig/Config.mk
+++ xen-4.19.0-testing/Config.mk
@@ -77,7 +77,7 @@ EXTRA_INCLUDES += $(EXTRA_PREFIX)/includ
EXTRA_LIB += $(EXTRA_PREFIX)/lib
endif
-PYTHON ?= python
+PYTHON ?= python3
PYTHON_PREFIX_ARG ?= --prefix="$(prefix)"
# The above requires that prefix contains *no spaces*. This variable is here
# to permit the user to set PYTHON_PREFIX_ARG to '' to workaround this bug:
Index: xen-4.19.0-testing/tools/configure
===================================================================
--- xen-4.19.0-testing.orig/tools/configure
+++ xen-4.19.0-testing/tools/configure
@@ -8297,15 +8297,15 @@ if test x"${PYTHONPATH}" = x"no"
then
as_fn_error $? "Unable to find $PYTHON, please install $PYTHON" "$LINENO" 5
fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python version >= 2.7 " >&5
-printf %s "checking for python version >= 2.7 ... " >&6; }
-`$PYTHON -c 'import sys; sys.exit(eval("sys.version_info < (2, 7)"))'`
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python version >= 3.0 " >&5
+printf %s "checking for python version >= 3.0 ... " >&6; }
+`$PYTHON -c 'import sys; sys.exit(eval("sys.version_info < (3, 0)"))'`
if test "$?" != "0"
then
python_version=`$PYTHON -V 2>&1`
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
- as_fn_error $? "$python_version is too old, minimum required version is 2.7" "$LINENO" 5
+ as_fn_error $? "$python_version is too old, minimum required version is 3.0" "$LINENO" 5
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
printf "%s\n" "yes" >&6; }
Index: xen-4.19.0-testing/tools/libs/light/idl.py
===================================================================
--- xen-4.19.0-testing.orig/tools/libs/light/idl.py
+++ xen-4.19.0-testing/tools/libs/light/idl.py
@@ -271,7 +271,7 @@ class KeyedUnion(Aggregate):
if not isinstance(keyvar_type, Enumeration):
raise ValueError
- kv_kwargs = dict([(x.lstrip('keyvar_'),y) for (x,y) in kwargs.items() if x.startswith('keyvar_')])
+ kv_kwargs = dict([(x.lstrip('keyvar_'),y) for (x,y) in list(kwargs.items()) if x.startswith('keyvar_')])
self.keyvar = Field(keyvar_type, keyvar_name, **kv_kwargs)
@@ -317,7 +317,7 @@ class Array(Type):
kwargs.setdefault('json_parse_type', 'JSON_ARRAY')
Type.__init__(self, namespace=elem_type.namespace, typename=elem_type.rawname + " *", **kwargs)
- lv_kwargs = dict([(x.lstrip('lenvar_'),y) for (x,y) in kwargs.items() if x.startswith('lenvar_')])
+ lv_kwargs = dict([(x.lstrip('lenvar_'),y) for (x,y) in list(kwargs.items()) if x.startswith('lenvar_')])
self.lenvar = Field(integer, lenvar_name, **lv_kwargs)
self.elem_type = elem_type
@@ -353,7 +353,7 @@ def parse(f):
globs = {}
locs = OrderedDict()
- for n,t in globals().items():
+ for n,t in list(globals().items()):
if isinstance(t, Type):
globs[n] = t
elif isinstance(t,type(object)) and issubclass(t, Type):
Index: xen-4.19.0-testing/tools/libs/light/gentest.py
===================================================================
--- xen-4.19.0-testing.orig/tools/libs/light/gentest.py
+++ xen-4.19.0-testing/tools/libs/light/gentest.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
from __future__ import print_function
Index: xen-4.19.0-testing/tools/libs/light/gentypes.py
===================================================================
--- xen-4.19.0-testing.orig/tools/libs/light/gentypes.py
+++ xen-4.19.0-testing/tools/libs/light/gentypes.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
from __future__ import print_function
Index: xen-4.19.0-testing/tools/include/xen-foreign/mkheader.py
===================================================================
--- xen-4.19.0-testing.orig/tools/include/xen-foreign/mkheader.py
+++ xen-4.19.0-testing/tools/include/xen-foreign/mkheader.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
from __future__ import print_function
Index: xen-4.19.0-testing/tools/include/xen-foreign/mkchecker.py
===================================================================
--- xen-4.19.0-testing.orig/tools/include/xen-foreign/mkchecker.py
+++ xen-4.19.0-testing/tools/include/xen-foreign/mkchecker.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
import sys;
from structs import structs, compat_arches;
Index: xen-4.19.0-testing/xen/tools/gen-cpuid.py
===================================================================
--- xen-4.19.0-testing.orig/xen/tools/gen-cpuid.py
+++ xen-4.19.0-testing/xen/tools/gen-cpuid.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys, os, re
Index: xen-4.19.0-testing/xen/tools/compat-build-source.py
===================================================================
--- xen-4.19.0-testing.orig/xen/tools/compat-build-source.py
+++ xen-4.19.0-testing/xen/tools/compat-build-source.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
import re,sys
Index: xen-4.19.0-testing/xen/tools/compat-build-header.py
===================================================================
--- xen-4.19.0-testing.orig/xen/tools/compat-build-header.py
+++ xen-4.19.0-testing/xen/tools/compat-build-header.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
import re,sys

16
capslock_enable.patch Normal file
View File

@ -0,0 +1,16 @@
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1329,6 +1329,11 @@ static void do_key_event(VncState *vs, i
}
break;
case 0x3a: /* CapsLock */
+ if(!down){
+ vs->modifiers_state[keycode] ^= 1;
+ kbd_put_keycode(keycode | 0x80);
+ }
+ return;
case 0x45: /* NumLock */
if (down) {
kbd_put_keycode(keycode & 0x7f);

97
cdrom-removable.patch Normal file
View File

@ -0,0 +1,97 @@
Index: xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.6.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.6.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -18,6 +18,7 @@
#include "exec-all.h"
#include "sysemu.h"
+#include "console.h"
#include "hw.h"
#include "pci.h"
#include "qemu-timer.h"
@@ -604,6 +605,21 @@ void xenstore_parse_domain_config(int hv
#endif
bs = bdrv_new(dev);
+
+ /* if cdrom physical put a watch on media-present */
+ if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) {
+ if (drv && !strcmp(drv, "phy")) {
+ if (pasprintf(&buf, "%s/media-present", bpath) != -1) {
+ if (bdrv_is_inserted(bs))
+ xs_write(xsh, XBT_NULL, buf, "1", strlen("1"));
+ else {
+ xs_write(xsh, XBT_NULL, buf, "0", strlen("0"));
+ }
+ xs_watch(xsh, buf, "media-present");
+ }
+ }
+ }
+
/* check if it is a cdrom */
if (danger_type && !strcmp(danger_type, "cdrom")) {
bdrv_set_type_hint(bs, BDRV_TYPE_CDROM);
@@ -1083,6 +1099,50 @@ static void xenstore_process_vcpu_set_ev
return;
}
+static void xenstore_process_media_change_event(char **vec)
+{
+ char *media_present = NULL;
+ unsigned int len;
+
+ media_present = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
+
+ if (media_present) {
+ BlockDriverState *bs;
+ char *buf = NULL, *cp = NULL, *path = NULL, *dev = NULL;
+
+ path = strdup(vec[XS_WATCH_PATH]);
+ cp = strstr(path, "media-present");
+ if (cp){
+ *(cp-1) = '\0';
+ pasprintf(&buf, "%s/dev", path);
+ dev = xs_read(xsh, XBT_NULL, buf, &len);
+ if (dev) {
+ if ( !strncmp(dev, "xvd", 3)) {
+ memmove(dev, dev+1, strlen(dev));
+ dev[0] = 'h';
+ dev[1] = 'd';
+ }
+ bs = bdrv_find(dev);
+ if (!bs) {
+ term_printf("device not found\n");
+ return;
+ }
+ if (strcmp(media_present, "0") == 0 && bs) {
+ bdrv_close(bs);
+ }
+ else if (strcmp(media_present, "1") == 0 &&
+ bs != NULL && bs->drv == NULL) {
+ if (bdrv_open(bs, bs->filename, 0 /* snapshot */) < 0) {
+ fprintf(logfile, "%s() qemu: could not open cdrom disk '%s'\n",
+ __func__, bs->filename);
+ }
+ bs->media_changed = 1;
+ }
+ }
+ }
+ }
+}
+
void xenstore_process_event(void *opaque)
{
char **vec, *offset, *bpath = NULL, *buf = NULL, *drv = NULL, *image = NULL;
@@ -1118,6 +1178,11 @@ void xenstore_process_event(void *opaque
xenstore_watch_callbacks[i].cb(vec[XS_WATCH_TOKEN],
xenstore_watch_callbacks[i].opaque);
+ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) {
+ xenstore_process_media_change_event(vec);
+ goto out;
+ }
+
hd_index = drive_name_to_index(vec[XS_WATCH_TOKEN]);
if (hd_index == -1) {
fprintf(stderr,"medium change watch on `%s' -"

View File

@ -1,8 +1,8 @@
Index: xen-4.18.0-testing/tools/libacpi/ssdt_s3.asl
Index: xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
===================================================================
--- xen-4.18.0-testing.orig/tools/libacpi/ssdt_s3.asl
+++ xen-4.18.0-testing/tools/libacpi/ssdt_s3.asl
@@ -7,13 +7,9 @@
--- xen-4.6.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s3.asl
+++ xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s3.asl
@@ -19,13 +19,9 @@
DefinitionBlock ("SSDT_S3.aml", "SSDT", 2, "Xen", "HVM", 0)
{
@ -20,11 +20,11 @@ Index: xen-4.18.0-testing/tools/libacpi/ssdt_s3.asl
+ */
}
Index: xen-4.18.0-testing/tools/libacpi/ssdt_s4.asl
Index: xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
===================================================================
--- xen-4.18.0-testing.orig/tools/libacpi/ssdt_s4.asl
+++ xen-4.18.0-testing/tools/libacpi/ssdt_s4.asl
@@ -7,13 +7,9 @@
--- xen-4.6.0-testing.orig/tools/firmware/hvmloader/acpi/ssdt_s4.asl
+++ xen-4.6.0-testing/tools/firmware/hvmloader/acpi/ssdt_s4.asl
@@ -19,13 +19,9 @@
DefinitionBlock ("SSDT_S4.aml", "SSDT", 2, "Xen", "HVM", 0)
{

View File

@ -1,54 +0,0 @@
References: bsc#1172356
The bug is that virt-manager reports a failure when in fact
the host and guest have added the network interface. The Xen
scripts are failing with an error when in fact that command
is succeeding.
The 'ip' commands seem to abort the script due to a 'set -e' in
xen-script-common.sh with what appears to be an error condition.
However, the command actually succeeds when checked from the
host console or also by inserting a sleep before each ip command
and executing it manually at the command line. This seems to be
an artifact of using 'set -e' everywhere.
Index: xen-4.15.0-testing/tools/hotplug/Linux/xen-network-common.sh
===================================================================
--- xen-4.15.0-testing.orig/tools/hotplug/Linux/xen-network-common.sh
+++ xen-4.15.0-testing/tools/hotplug/Linux/xen-network-common.sh
@@ -90,7 +90,7 @@ _setup_bridge_port() {
local virtual="$2"
# take interface down ...
- ip link set dev ${dev} down
+ (ip link set dev ${dev} down || true)
if [ $virtual -ne 0 ] ; then
# Initialise a dummy MAC address. We choose the numerically
@@ -101,7 +101,7 @@ _setup_bridge_port() {
fi
# ... and configure it
- ip address flush dev ${dev}
+ (ip address flush dev ${dev} || true)
}
setup_physical_bridge_port() {
@@ -136,15 +136,15 @@ add_to_bridge () {
if [ ! -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
log debug "adding $dev to bridge $bridge"
if which brctl >&/dev/null; then
- brctl addif ${bridge} ${dev}
+ (brctl addif ${bridge} ${dev} || true)
else
- ip link set ${dev} master ${bridge}
+ (ip link set ${dev} master ${bridge} || true)
fi
else
log debug "$dev already on bridge $bridge"
fi
- ip link set dev ${dev} up
+ (ip link set dev ${dev} up || true)
}
remove_from_bridge () {

2
init.xen_loop Normal file
View File

@ -0,0 +1,2 @@
# Increase the number of loopback devices available for vm creation
options loop max_loop=64

View File

@ -0,0 +1,32 @@
qcow2 corruption: Fix alloc_cluster_link_l2 (Kevin Wolf)
This patch fixes a qcow2 corruption bug introduced in SVN Rev 5861. L2 tables
are big endian, so entries must be converted before being passed to functions.
This bug is easy to trigger. The following script will create and destroy a
qcow2 image (the header is gone after three loop iterations):
#!/bin/bash
qemu-img create -f qcow2 test.qcow 1M
for i in $(seq 1 10); do
qemu-system-x86_64 -hda test.qcow -monitor stdio > /dev/null 2>&1 <<EOF
savevm test-$i
quit
EOF
done
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/block-qcow2.c
@@ -916,7 +916,7 @@ static int alloc_cluster_link_l2(BlockDr
goto err;
for (i = 0; i < j; i++)
- free_any_clusters(bs, old_cluster[i], 1);
+ free_any_clusters(bs, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
ret = 0;
err:

View File

@ -0,0 +1,76 @@
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
@@ -1,6 +1,8 @@
#ifndef QEMU_XEN_H
#define QEMU_XEN_H
+#include "hw/boards.h"
+
/* vl.c */
extern int restore;
extern int vga_ram_size;
@@ -65,7 +67,7 @@ void handle_buffered_pio(void);
/* xenstore.c */
void xenstore_init(void);
uint32_t xenstore_read_target(void);
-void xenstore_parse_domain_config(int domid);
+void xenstore_parse_domain_config(int domid, QEMUMachine *machine);
int xenstore_parse_disable_pf_config(void);
int xenstore_fd(void);
void xenstore_process_event(void *opaque);
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -5907,9 +5907,9 @@ int main(int argc, char **argv, char **e
if ((msg = xenbus_read(XBT_NIL, "domid", &domid_s)))
fprintf(stderr,"Can not read our own domid: %s\n", msg);
else
- xenstore_parse_domain_config(atoi(domid_s));
+ xenstore_parse_domain_config(atoi(domid_s), machine);
#else
- xenstore_parse_domain_config(domid);
+ xenstore_parse_domain_config(domid, machine);
#endif /* CONFIG_STUBDOM */
}
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xenstore.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/xenstore.c
@@ -445,7 +445,7 @@ void xenstore_init(void)
}
}
-void xenstore_parse_domain_config(int hvm_domid)
+void xenstore_parse_domain_config(int hvm_domid, QEMUMachine *machine)
{
char **e_danger = NULL;
char *buf = NULL;
@@ -739,11 +739,19 @@ void xenstore_parse_domain_config(int hv
#endif
- drives_table[nb_drives].bdrv = bs;
- drives_table[nb_drives].used = 1;
- media_filename[nb_drives] = strdup(bs->filename);
- nb_drives++;
-
+ if (machine == &xenfv_machine) {
+ drives_table[nb_drives].bdrv = bs;
+ drives_table[nb_drives].used = 1;
+#ifdef CONFIG_STUBDOM
+ media_filename[nb_drives] = strdup(danger_buf);
+#else
+ media_filename[nb_drives] = strdup(bs->filename);
+#endif
+ nb_drives++;
+ } else {
+ qemu_aio_flush();
+ bdrv_close(bs);
+ }
}
#ifdef CONFIG_STUBDOM

98
ioemu-disable-scsi.patch Normal file
View File

@ -0,0 +1,98 @@
---
tools/qemu-xen-traditional-dir-remote/hw/pci.c | 44 ++++++++++++++++
tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c | 2
tools/qemu-xen-traditional-dir-remote/qemu-xen.h | 1
3 files changed, 47 insertions(+)
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pci.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/pci.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/pci.c
@@ -871,6 +871,50 @@ void pci_unplug_netifs(void)
}
}
+void pci_unplug_scsi(void)
+{
+ PCIBus *bus;
+ PCIDevice *dev;
+ PCIIORegion *region;
+ int x;
+ int i;
+
+ /* We only support one PCI bus */
+ for (bus = first_bus; bus; bus = NULL) {
+ for (x = 0; x < 256; x++) {
+ dev = bus->devices[x];
+ if (dev &&
+ dev->config[0xa] == 0 &&
+ dev->config[0xb] == 1
+#ifdef CONFIG_PASSTHROUGH
+ && test_pci_devfn(x) != 1
+#endif
+ ) {
+ /* Found a scsi disk. Remove it from the bus. Note that
+ we don't free it here, since there could still be
+ references to it floating around. There are only
+ ever one or two structures leaked, and it's not
+ worth finding them all. */
+ bus->devices[x] = NULL;
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ region = &dev->io_regions[i];
+ if (region->addr == (uint32_t)-1 ||
+ region->size == 0)
+ continue;
+ fprintf(logfile, "region type %d at [%x,%x).\n",
+ region->type, region->addr,
+ region->addr+region->size);
+ if (region->type == PCI_ADDRESS_SPACE_IO) {
+ isa_unassign_ioport(region->addr, region->size);
+ } else if (region->type == PCI_ADDRESS_SPACE_MEM) {
+ unregister_iomem(region->addr);
+ }
+ }
+ }
+ }
+ }
+}
+
typedef struct {
PCIDevice dev;
PCIBus *bus;
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -156,8 +156,10 @@ static void platform_fixed_ioport_write2
/* Unplug devices. Value is a bitmask of which devices to
unplug, with bit 0 the IDE devices, bit 1 the network
devices, and bit 2 the non-primary-master IDE devices. */
- if (val & UNPLUG_ALL_IDE_DISKS)
+ if (val & UNPLUG_ALL_IDE_DISKS) {
ide_unplug_harddisks();
+ pci_unplug_scsi();
+ }
if (val & UNPLUG_ALL_NICS) {
pci_unplug_netifs();
net_tap_shutdown_all();
@@ -364,6 +364,8 @@ static void suse_platform_ioport_write(v
* If it controlled just disk or just LAN, it would use 8 below. */
fprintf(logfile, "Disconnect IDE hard disk...\n");
ide_unplug_harddisks();
+ fprintf(logfile, "Disconnect SCSI hard disk...\n");
+ pci_unplug_scsi();
fprintf(logfile, "Disconnect netifs...\n");
pci_unplug_netifs();
fprintf(logfile, "Shutdown taps...\n");
Index: xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
===================================================================
--- xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
+++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/qemu-xen.h
@@ -47,6 +47,7 @@ void unset_vram_mapping(void *opaque);
#endif
void pci_unplug_netifs(void);
+void pci_unplug_scsi(void);
void destroy_hvm_domain(void);
void unregister_iomem(target_phys_addr_t start);

View File

@ -0,0 +1,84 @@
---
tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c | 46 ++++++++++++++++
1 file changed, 46 insertions(+)
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/xen-hooks.mak
@@ -2,6 +2,9 @@ CPPFLAGS+= -I$(XEN_ROOT)/tools/libs/tool
CPPFLAGS+= -I$(XEN_ROOT)/tools/libs/evtchn/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/libs/gnttab/include
CPPFLAGS+= -DXC_WANT_COMPAT_MAP_FOREIGN_API
+CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc
+CPPFLAGS+= -I$(XEN_ROOT)/tools/libs/call/include
+CPPFLAGS+= -I$(XEN_ROOT)/tools/libs/foreignmemory/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore/include
CPPFLAGS+= -I$(XEN_ROOT)/tools/include
Index: xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
===================================================================
--- xen-4.7.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+++ xen-4.7.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
@@ -30,6 +30,8 @@
#include "qemu-xen.h"
#include "net.h"
#include "xen_platform.h"
+#include "sysemu.h"
+#include <xc_private.h>
#include <assert.h>
#include <xenguest.h>
@@ -335,8 +337,52 @@ static void xen_platform_ioport_writeb(v
}
}
+static uint32_t ioport_base;
+
+static void suse_platform_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ DECLARE_DOMCTL;
+ int rc;
+
+ if (val == 0)
+ qemu_invalidate_map_cache();
+
+ switch (addr - ioport_base) {
+ case 0:
+ /* FIXME Unknown who makes use of this code! */
+ fprintf(logfile, "Init hypercall page %x, addr %x.\n", val, addr);
+ domctl.domain = (domid_t)domid;
+ domctl.u.hypercall_init.gmfn = val;
+ domctl.cmd = XEN_DOMCTL_hypercall_init;
+ rc = xc_domctl(xc_handle, &domctl);
+ fprintf(logfile, "result -> %d.\n", rc);
+ break;
+ case 4:
+ /* xen-kmp used this since xen-3.0.4, instead the official protocol from xen-3.3+
+ * pre vmdp 1.7 made use of 4 and 8 depending on how vmdp was configured.
+ * If vmdp was to control both disk and LAN it would use 4.
+ * If it controlled just disk or just LAN, it would use 8 below. */
+ fprintf(logfile, "Disconnect IDE hard disk...\n");
+ ide_unplug_harddisks();
+ fprintf(logfile, "Disconnect netifs...\n");
+ pci_unplug_netifs();
+ fprintf(logfile, "Shutdown taps...\n");
+ net_tap_shutdown_all();
+ fprintf(logfile, "Done.\n");
+ break;
+ default:
+ fprintf(logfile, "Write %x to bad port %x (base %x) on evtchn device.\n",
+ val, addr, ioport_base);
+ break;
+ }
+}
+
static void platform_ioport_map(PCIDevice *pci_dev, int region_num, uint32_t addr, uint32_t size, int type)
{
+ ioport_base = addr;
+
+ register_ioport_write(addr, 16, 4, suse_platform_ioport_write, NULL);
+
PCIXenPlatformState *d = (PCIXenPlatformState *)pci_dev;
register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d);
register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d);

30
ioemu-vnc-resize.patch Normal file
View File

@ -0,0 +1,30 @@
Index: xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
===================================================================
--- xen-4.6.1-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c
+++ xen-4.6.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c
@@ -1761,6 +1761,25 @@ static int protocol_client_msg(VncState
}
set_encodings(vs, (int32_t *)(data + 4), limit);
+
+ /*
+ * The initialization of a VNC connection can race with xenfb changing
+ * the resolution. This happens when the VNC connection is already
+ * established, but the client has not yet advertised has_resize, so it
+ * won't get notified of the switch.
+ *
+ * Therefore we resend the resolution as soon as the client has sent its
+ * encodings.
+ */
+ if (vs->has_resize) {
+ /* Resize the VNC window */
+ vnc_write_u8(vs, 0); /* msg id */
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1); /* number of rects */
+ vnc_framebuffer_update(vs, 0, 0, vs->serverds.width, vs->serverds.height, -223);
+
+ vnc_flush(vs);
+ }
break;
case 3:
if (len == 1)

View File

@ -0,0 +1,34 @@
Subject: qdev: convert watchdogs
From: Markus Armbruster armbru@redhat.com Fri Aug 21 10:31:34 2009 +0200
Date: Thu Aug 27 20:35:24 2009 -0500:
Git: 09aaa1602f9381c0e0fb539390b1793e51bdfc7b
* THIS IS ONLY THE BUG FIX PART OF THE UPSTREAM PATCH *
Fixes ib700 not to use vm_clock before it is initialized: in
wdt_ib700_init(), called from register_watchdogs(), which runs before
init_timers(). The bug made ib700_write_enable_reg() crash in
qemu_del_timer().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/wdt_ib700.c
@@ -93,6 +93,7 @@ static int ib700_load(QEMUFile *f, void
/* Create and initialize a virtual IB700 during PC creation. */
static void ib700_pc_init(PCIBus *unused)
{
+ timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
register_savevm("ib700_wdt", -1, 0, ib700_save, ib700_load, NULL);
register_ioport_write(0x441, 2, 1, ib700_write_disable_reg, NULL);
@@ -108,5 +109,4 @@ static WatchdogTimerModel model = {
void wdt_ib700_init(void)
{
watchdog_add_model(&model);
- timer = qemu_new_timer(vm_clock, ib700_timer_expired, NULL);
}

View File

@ -0,0 +1,72 @@
Subject: Move watchdog, watchdog_action, give them internal linkage
From: Markus Armbruster armbru@redhat.com Fri Aug 21 10:31:32 2009 +0200
Date: Thu Aug 27 20:30:23 2009 -0500:
Git: 88b3be201acf64e0bd19782bebd533901c951c87
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.c
@@ -26,6 +26,16 @@
#include "sysemu.h"
#include "hw/watchdog.h"
+/* Possible values for action parameter. */
+#define WDT_RESET 1 /* Hard reset. */
+#define WDT_SHUTDOWN 2 /* Shutdown. */
+#define WDT_POWEROFF 3 /* Quit. */
+#define WDT_PAUSE 4 /* Pause. */
+#define WDT_DEBUG 5 /* Prints a message and continues running. */
+#define WDT_NONE 6 /* Do nothing. */
+
+static WatchdogTimerModel *watchdog;
+static int watchdog_action = WDT_RESET;
static LIST_HEAD(watchdog_list, WatchdogTimerModel) watchdog_list;
void watchdog_add_model(WatchdogTimerModel *model)
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/hw/watchdog.h
@@ -27,13 +27,6 @@
extern void wdt_i6300esb_init(void);
extern void wdt_ib700_init(void);
-/* Possible values for action parameter. */
-#define WDT_RESET 1 /* Hard reset. */
-#define WDT_SHUTDOWN 2 /* Shutdown. */
-#define WDT_POWEROFF 3 /* Quit. */
-#define WDT_PAUSE 4 /* Pause. */
-#define WDT_DEBUG 5 /* Prints a message and continues running. */
-#define WDT_NONE 6 /* Do nothing. */
struct WatchdogTimerModel {
LIST_ENTRY(WatchdogTimerModel) entry;
@@ -50,10 +43,6 @@ struct WatchdogTimerModel {
};
typedef struct WatchdogTimerModel WatchdogTimerModel;
-/* in vl.c */
-extern WatchdogTimerModel *watchdog;
-extern int watchdog_action;
-
/* in hw/watchdog.c */
extern int select_watchdog(const char *p);
extern int select_watchdog_action(const char *action);
Index: xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
===================================================================
--- xen-4.2.0-testing.orig/tools/qemu-xen-traditional-dir-remote/vl.c
+++ xen-4.2.0-testing/tools/qemu-xen-traditional-dir-remote/vl.c
@@ -250,8 +250,6 @@ int no_shutdown = 0;
int cursor_hide = 1;
int graphic_rotate = 0;
int daemonize = 0;
-WatchdogTimerModel *watchdog = NULL;
-int watchdog_action = WDT_RESET;
const char *option_rom[MAX_OPTION_ROMS];
int nb_option_roms;
int semihosting_enabled = 0;

Some files were not shown because too many files have changed in this diff Show More